const std = @import( "std" ); const Allocator = std.mem.Allocator; const print = std.debug.print; const assert = std.debug.assert; // 2025-02-06 // grammar grammar v3, brute force // fish // while inotifywait grammar-grammar.v3.zig ; clear; zig test grammar-grammar.v3.zig ; end // ls grammar-grammar.v3.zig pan-grammar.lr1-v3.txt | entr zig test grammar-grammar.v3.zig // pub fn format(value: ?, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void // ``` // pub fn format( // value : ? , // comptime fmt : []const u8 , // options : std.fmt.FormatOptions , // writer : anytype , // ) !void // ``` // youtube "attack of the killer features" // XXX parameter reference optimization; scheduled for removal // result location sematics const List = std.ArrayList; /// =| <_> * ; const Grammar = struct { list :List( Assignment ), pub fn create( allocator :Allocator ) @This() { return .{ .list = List( Assignment ).init( allocator ) }; } pub fn destroy( grammar :*@This() ) void { defer { grammar.list.deinit(); grammar.* = undefined; } for( grammar.list.items ) |*item| { item.destroy(); } } /// pub fn format(value: ?, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void pub fn format( value: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype ) !void { for( value.list.items ) |item| { try writer.print( "{}", .{ item } ); } } }; /// =| '=' <_> + ';' <_> ; const Assignment = struct { identifier :[]const u8, rule_list :List( Rule ), pub fn create( allocator :Allocator, id :[]const u8 ) @This() { return .{ .identifier = id, .rule_list = List( Rule ).init( allocator ), }; } pub fn destroy( a :*@This() ) void { defer { a.rule_list.deinit(); a.* = undefined; } for( a.rule_list.items ) |*r| { r.destroy(); } } /// pub fn format(value: ?, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void pub fn format( value: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype ) !void { if( 1 < value.rule_list.items.len ) { try writer.print( "<{s}> =\n", .{ value.identifier } ); for( value.rule_list.items ) |item| { try writer.print( "|{}\n", .{ item } ); } try writer.print( ";\n", .{} ); } else if( 1 == value.rule_list.items.len ) { try writer.print( "<{s}> =|{} ;\n", .{ value.identifier, value.rule_list.items[0], } ); } else { @panic( "assignment with no rules\n" ); // return error.assignment_with_no_rules; } } }; /// =| '|' <_> * ; const Rule = struct { term_list :List( Term ), pub fn create( allocator :Allocator ) @This() { return .{ .term_list = List( Term ).init( allocator ) }; } pub fn destroy( rule :*@This() ) void { defer { rule.term_list.deinit(); rule.* = undefined; } for( rule.term_list.items ) |*term| { term.destroy(); } } /// pub fn format(value: ?, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void pub fn format( value: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype ) !void { for( value.term_list.items ) |item| { try writer.print( " {}", .{ item } ); } } }; /// =| ? <_> ; /// = /// | /// | /// ; const Term = struct { // atom :Atom, const Atom = union(enum) { identifier :[]const u8, // literal :List( u8 ), // 'abc' == 'a' or 'b' or 'c' // TODO XXX make literal a Set, not a List. sequence :List( u8 ), // "abc" == 'a' then 'b' then 'c' }; atom :Atom, suffix :?u8, // XXX pub fn create() @This() {} pub fn destroy( t :*@This() ) void { switch( t.atom ) { .identifier => {}, .literal => |l| { l.deinit(); }, .sequence => |l| { l.deinit(); }, } } /// pub fn format(value: ?, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void pub fn format( value: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype ) !void { switch( value.atom ) { .identifier => |id| { try writer.print( "<{s}>", .{ id } ); }, .literal => |li| { try writer.writeByte( '\'' ); for( li.items ) |it| { switch( it ) { '\t' => { try writer.print( "\\t" , .{ } ); }, '\n' => { try writer.print( "\\n" , .{ } ); }, '\r' => { try writer.print( "\\r" , .{ } ); }, '\\' => { try writer.print( "\\\\" , .{ } ); }, '\'' => { try writer.print( "\\'" , .{ } ); }, '"' => { try writer.print( "\"" , .{ } ); }, else => { try writer.print( "{c}" , .{ it } ); }, } } try writer.writeByte( '\'' ); }, .sequence => |li| { try writer.writeByte( '"' ); for( li.items ) |it| { switch( it ) { '\t' => { try writer.print( "\\t" , .{ } ); }, '\n' => { try writer.print( "\\n" , .{ } ); }, '\r' => { try writer.print( "\\r" , .{ } ); }, '\\' => { try writer.print( "\\\\" , .{ } ); }, '\'' => { try writer.print( "'" , .{ } ); }, '\"' => { try writer.print( "\\\"" , .{ } ); }, else => { try writer.print( "{c}" , .{ it } ); }, } } try writer.writeByte( '"' ); }, } if( value.suffix ) |x| { try writer.print( "{c}", .{ x } ); } } pub fn eql( term :Term, other :Term ) bool { return ( @intFromEnum( term.atom ) == @intFromEnum( other.atom ) ) and switch( term.atom ) { .identifier => |id| std.mem.eql( u8, id, other.atom.identifier ), .literal => |li| for( li.items ) |x| { // XXX using a list as a set.. _ = std.mem.indexOfScalar( u8, other.atom.literal.items, x ) orelse { break false; }; } else ( true ), .sequence => |se| std.mem.eql( u8, se.items, other.atom.sequence.items ), } and term.suffix == other.suffix ; } pub fn lt( term :Term, other :Term ) bool { if( @intFromEnum( term.atom ) != @intFromEnum( other.atom ) ) { return ( @intFromEnum( term.atom ) < @intFromEnum( other.atom ) ); } // if( @intFromEnum( term.atom ) < @intFromEnum( other.atom ) ) { return true; } // if( @intFromEnum( other.atom ) < @intFromEnum( term.atom ) ) { return false; } switch( term.atom ) { .identifier => |id| { if( !std.mem.eql( u8, id, other.atom.identifier ) ) { return std.mem.lessThan( u8, id, other.atom.identifier ); } // if( std.mem.lessThan( u8, id, other.atom.identifier ) ) { return true; } // if( std.mem.lessThan( u8, other.atom.identifier, id ) ) { return false; } }, .literal => |li| { // XXX comparing lists as sets of ordered elements.. const li2 = other.atom.literal; for( 0..std.math.maxInt(u8) ) |x| { const left = ( std.mem.indexOfScalar( u8, li .items, x ) != null ); const right = ( std.mem.indexOfScalar( u8, li2 .items, x ) != null ); if( left == right ) { continue; } return left; // XXX this is not great, but it is functional // an empty list is 'infinite', less than none } }, .sequence => |se| { if( !std.mem.eql( u8, se, other.atom.sequence ) ) { return std.mem.lessThan( u8, se, other.atom.sequence ); } // if( std.mem.lessThan( u8, se, other.atom.sequence ) ) { return true; } // if( std.mem.lessThan( u8, other.atom.sequence, se ) ) { return false; } }, } // if( ( term.suffix orelse '1' ) < ( other.suffix orelse '1' ) ) { return true; } // if( ( other.suffix orelse '1' ) < ( term.suffix orelse '1' ) ) { return false; } return ( ( term.suffix orelse '1' ) < ( other.suffix orelse '1' ) ); // return false; } }; // const Atom = union(enum) { // identifier :[]const u8, // literal :List( u8 ), // pub fn destroy( a :*@This() ) void { // switch( a.* ) { // .identifier => {}, // .literal => |l| { l.deinit(); }, // } // } // }; // =| '\'' + '\'' ; // = // | '\\' // | // ; // # - tab, (newline), backslash, and apostrophe // =| '?' | '+' | '*' ; const Parser = struct { allocator :Allocator, src :[]const u8, i :usize = 0, fn more( p :*Parser ) bool { return p.i < p.src.len; } fn peek( p :*Parser ) ?u8 { return if( p.more() ) p.src[p.i] else null; } fn step( p :*Parser ) void { assert( p.more() ); p.i += 1; } fn consume( p :*Parser, x :u8 ) !void { const y = p.peek() orelse return error.eof; if( x != y ) return error.bad_consume; p.step(); } //fn pointAt( p :*Parser ) void { // const begin = if( std.mem.lastIndexOfScalar( u8, p.src[0..p.i], '\n' ) ) |index| index + 1 else 0; // const end = std.mem.indexOfScalar( u8, p.src[begin..], '\n' ) orelse ( p.src.len - begin ); // print( "{s}\n", .{ p.src[ begin.. ][ 0..end ] } ); // for( 0..end-|1 ) |_| { print( " ", .{} ); } // print( "^\n", .{} ); //} fn pointAt( p :*Parser ) void { const begin = if( std.mem.lastIndexOfScalar( u8, p.src[0..p.i], '\n' ) ) |b| (b+1) else ( 0 ); const end = std.mem.indexOfScalar( u8, p.src[p.i..], '\n' ) orelse ( p.src.len - p.i ); const line = p.src[ begin .. p.i+end ]; print( "{s}\n", .{ line } ); for( 0..( p.i - begin ) ) |_| { print( " ", .{} ); } print( "^\n", .{} ); } pub fn parse( allocator :Allocator, src :[]const u8 ) !Grammar { var p = Parser { .src = src, .allocator = allocator, }; errdefer |e| { print( "error {s}\n", .{ @errorName( e ) } ); p.pointAt(); } try p.parseWhitespace(); var grammar = Grammar.create( allocator ); errdefer { grammar.destroy(); } while( p.more() and p.peek().? == '<' ) { try grammar.list.append( try p.parseAssignment() ); } if( p.peek() != null ) return error.early_eof; return grammar; } fn parseWhitespace( p :*Parser ) !void { while( p.more() ) { switch( p.peek().? ) { '#' => { try p.consume( '#' ); // p.step(); while( p.more() and p.peek().? != '\n' ) { p.step(); } }, ' ', '\t', '\n', '\r' => { p.step(); }, else => { break; }, } } } fn parseAssignment( p :*Parser ) !Assignment { const identifier = try p.parseIdentifier(); try p.consume( '=' ); try p.parseWhitespace(); var assignment = Assignment.create( p.allocator, identifier ); errdefer { assignment.destroy(); } while( p.more() and p.peek() == '|' ) { var rule = try p.parseRule(); errdefer { rule.destroy(); } try assignment.rule_list.append( rule ); } if( assignment.rule_list.items.len == 0 ) return error.no_rules; try p.consume( ';' ); try p.parseWhitespace(); return assignment; } fn parseIdentifier( p :*Parser ) ![]const u8 { try p.consume( '<' ); const start = p.i; while( p.more() and p.peek() != '>' ) { if( null == std.mem.indexOfScalar( u8, "_" ++ "abcdefghijklmnopqrstuvwxyz" ++ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ++ "0123456789", p.peek().?, ) ) { return error.bad_identifier; } p.step(); } const end = p.i; try p.consume( '>' ); try p.parseWhitespace(); return p.src[ start..end ]; } fn parseRule( p :*Parser ) !Rule { // !List( Term ) try p.consume( '|' ); // p.step(); try p.parseWhitespace(); var rule = Rule.create( p.allocator ); errdefer { rule.destroy(); } while( p.more() and p.peek() != ';' and p.peek() != '|' ) { const atom = switch( p.peek() orelse return error.atom_eof ) { '<' => Term.Atom { .identifier = try p.parseIdentifier () }, '\'' => Term.Atom { .literal = try p.parseLiteral () }, '"' => Term.Atom { .sequence = try p.parseSequence () }, else => { return error.bad_atom; }, }; const suffix = switch( p.peek() orelse '_' ) { '?', '+', '*' => |x| x, else => null, }; if( suffix ) |x| { try p.consume( x ); } try p.parseWhitespace(); try rule.term_list.append( .{ .atom = atom, .suffix = suffix, } ); } return rule; } fn parseLiteral( p :*Parser ) !List( u8 ) { var literal_list = List( u8 ).init( p.allocator ); errdefer { literal_list.deinit(); literal_list = undefined; } try p.consume( '\'' ); while( p.more() and p.peek().? != '\'' ) { switch( p.peek().? ) { '\\' => { try p.consume( '\\' ); const escaped = p.peek() orelse { return error.literal_eof; }; try literal_list.append( switch( escaped ) { 't' => '\t', 'n' => '\n', 'r' => '\r', else => escaped, } ); }, else => |x| { try literal_list.append( x ); }, } p.step(); } try p.consume( '\'' ); return literal_list; } fn parseSequence( p :*Parser ) !List( u8 ) { var sequence_list = List( u8 ).init( p.allocator ); errdefer { sequence_list.deinit(); sequence_list = undefined; } try p.consume( '"' ); while( p.more() and p.peek() != '"' ) { switch( p.peek().? ) { '\\' => { try p.consume( '\\' ); const escaped = p.peek() orelse { return error.sequence_eof; }; try sequence_list.append( switch( escaped ) { 't' => '\t', 'n' => '\n', 'r' => '\r', else => escaped, } ); }, else => |x| { try sequence_list.append( x ); }, } p.step(); } try p.consume( '"' ); return sequence_list; } }; const Epsilon = struct { // const StringSet = std.StringHashMap( void ); ids :std.StringHashMap( void ), fn destroy( epsilon :*Epsilon ) void { epsilon.ids.deinit(); } fn create( allocator :Allocator, grammar :Grammar ) !Epsilon { var epsilon = Epsilon { .ids = std.StringHashMap( void ).init( allocator ), }; errdefer { epsilon.destroy(); } var change = true; // do-while-loop while( change ) { change = false; for( grammar.list.items ) |a| { const id = a.identifier; if( epsilon.ids.contains( id ) or !epsilon.assignment( a ) ) { continue; } change = true; try epsilon.ids.put( id, void {} ); } } return epsilon; } /// const epsilon = try create( allocator, grammar ); /// defer { epsilon.destroy(); } pub fn identifier( epsilon :*const Epsilon, id :[]const u8 ) bool { return epsilon.ids.contains( id ); } fn assignment( epsilon :*const Epsilon, a :Assignment ) bool { return for( a.rule_list.items ) |r| { if( epsilon.rule( r ) ) { break true; } } else ( false ); } fn rule( epsilon :*const Epsilon, r :Rule ) bool { return for( r.term_list.items ) |t| { if( !epsilon.term( t ) ) { break false; } } else ( true ); } fn term( epsilon :*const Epsilon, t :Term ) bool { return switch( t.suffix orelse '1' ) { '?', '*' => true, else => switch( t.atom ) { .identifier => |x| epsilon.ids.contains( x ), .literal => false, .sequence => |x| ( x.items.len == 0 ), }, }; } /// https://ziglang.org/documentation/master/std/#src/std/fmt.zig /// ``` /// pub fn format( /// value : ? , /// comptime fmt : []const u8 , /// options : std.fmt.FormatOptions , /// writer : anytype , /// ) !void /// ``` pub fn format( value :Epsilon, comptime fmt :[]const u8, options :std.fmt.FormatOptions, writer :anytype ) !void { _ = fmt; _ = options; var it = value.ids.keyIterator(); try writer.print( "{{ <{s}>", .{ ( it.next() orelse { return try writer.print( "{{}}", .{} ); } ).* } ); while( it.next() ) |key| { try writer.print( ", <{s}>", .{ key.* } ); } try writer.print( " }}", .{} ); } }; // "token" ? vs lexeme // const Token = enum { // byte, // id, // /// context for `std.HashMap()` // fn hash( _ :@This(), k :Token ) u64 { // return switch( k ) { // // .byte => |b| std.hash.Wyhash.hash( 1, &[_]u8 { b } ), // // .id => |id| std.hash.Wyhash.hash( 2, &[_]u8 {} ), // .byte => |b| b, // .id => 256 + 0, // }; // } // /// context for `std.HashMap()` // fn eql( _ :@This(), a :Token, b :Token ) bool { // return switch( a ) { // .byte => |a_byte| switch( b ) { // .byte => |b_byte| a_byte == b_byte, // else => false, // }, // .id => switch( b ) { // .id => true, // .id => |b_id| std.mem.eql( u8, a_id, b_id ), // else => false, // }, // }; // } // }; const Token = enum(u7) { // https://dirtand.rocks/ascii.html // ascii, control characters, whitespace ( non-sequential ) @"\t" = '\t', // 9, 0x09 @"\n" = '\n', // 10, 0x0a // printable ascii, sequential @" " = ' ', @"!", @"\"", @"#", @"$", @"%", @"&", @"'", @"(", @")", @"*", @"+", @",", @"-", @".", @"/", @"0", @"1", @"2", @"3", @"4", @"5", @"6", @"7", @"8", @"9", @":", @";", @"<", @"=", @">", @"?", @"@", A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z, @"[", @"\\", @"]", @"^", @"_", @"`", a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z, @"{", @"|", @"}", @"~", // TODO consider whitespace can be emitted by the parser as one token.. // grammar terminals id = 1, // identifier, // placeholders // nil = 0, // test2 = 2, test3, test4, test5, test6, test7, test8, // test11 = 11, // test12, test13, test14, test15, test16, // test17, test18, test19, test20, test21, // test22, test23, test24, test25, test26, // test27, test28, test29, test30, test31, // XXXtest32, eof = 127, // fin = 127, // XXXoverflow, pub fn valid( byte :u8 ) bool { return switch( byte ) { '\t', '\n', ' '...'~' => true, else => false, }; } pub fn getNonBytes() *const [2]Token { return &[_]Token { .id, .eof, // TODO does this belong here ? }; } /// only handles printable characters /// `\t`, `\n`, ` `*space* thru `~` /// `0x09`, `0x0a`, `0x20` thru `0x7e` pub fn get( byte :u7 ) Token { assert( Token.valid( byte ) ); return switch( byte ) { '\t' => Token.@"\t", '\n' => Token.@"\n", ' '...'~' => |b| @enumFromInt( b ), else => @panic( "bad token byte" ), // unreachable }; } /// does not handle identifier token `.id` pub fn int( token :Token ) u7 { assert( token != .id ); return switch( token ) { .@"\t" => '\t', .@"\n" => '\n', else => |t| @intFromEnum( t ), }; } }; // const Terminal = union(Token) { // Lexeme // // TODO consider a `u9` and `[257]bool` .. see `TokenSet` // byte :u8, // 0..256, printable chars only // id :[]const u8, // 256+0 // // digits :[]const u8, // 256+1 // }; const TokenSet = struct { x :std.EnumSet( Token ), // ☯ λ • // wikipedia - list of unicode characters - control pictures // U+240x ␀ ␁ ␂ ␃ ␄ ␅ ␆ ␇ ␈ ␉ ␊ ␋ ␌ ␍ ␎ ␏ // U+241x ␐ ␑ ␒ ␓ ␔ ␕ ␖ ␗ ␘ ␙ ␚ ␛ ␜ ␝ ␞ ␟ // U+242x ␠ ␡ ␢ ␣ ␤ ␥ ␦ ␧ ␨ ␩ // XXX requiring a wrapper type, // solely to have a unique print function // is *not* a feature. fn initEmpty() TokenSet { return .{ .x = std.EnumSet( Token ).initEmpty(), }; } fn initOne( token :Token ) TokenSet { return .{ .x = std.EnumSet( Token ).initOne( token ), }; } fn eql( self :TokenSet, tokenSet :TokenSet ) bool { return self.x.eql( tokenSet.x ); } fn contains( self :TokenSet, token :Token ) bool { return self.x.contains( token ); } fn insert( self :*TokenSet, token :Token ) void { return self.x.insert( token ); } fn supersetOf( self :TokenSet, tokenSet :TokenSet ) bool { return self.x.supersetOf( tokenSet.x ); } fn setUnion( self :*TokenSet, tokenSet :TokenSet ) void { self.x.setUnion( tokenSet.x ); } /// https://ziglang.org/documentation/master/std/#src/std/fmt.zig /// ``` /// pub fn format( /// value : ? , /// comptime fmt : []const u8 , /// options : std.fmt.FormatOptions , /// writer : anytype , /// ) !void /// ``` pub fn format_( value :TokenSet, comptime fmt :[]const u8, options :std.fmt.FormatOptions, writer :anytype ) !void { _ = fmt; _ = options; try writer.print( "{{ {s}", .{ color() } ); // for( 0..128 ) |i| { // if( !Token.valid( @intCast(i) ) ) { continue; } // // const contains = tokenSet.contains( Token.get( @intCast(i) ) ); // try writer.print( "{c}", .{ @as( u8, if( value.contains( Token.get( @intCast(i) ) ) ) '1' else '0' ) } ); // } for( 0..128 ) |i| { if( !Token.valid( @intCast(i) ) ) { continue; } try writer.print( "{s}", .{ if( value.contains( Token.get( @intCast(i) ) ) ) ( switch( i ) { '\t' => ( "␉" ), '\n' => ( "¬" ), // ( "␤" ), '\r' => ( "␍" ), // XXX ' ' => ( "·" ), // ( "•" ), // ( "␠" ), else => ( &@as( u8, @intCast(i) ) )[0..1], } ) else ( " " ) } ); } try writer.print( " {s}", .{ if( value.contains( Token.id ) ) ( "id" ) else ( " " ) } ); // try writer.print( " {s}", .{ if( value.contains( Token.eof ) ) "eof" else " " } ); try writer.print( " {s}", .{ if( value.contains( Token.eof ) ) ( "␀" ) else ( " " ) } ); // for( Token.getNonBytes() ) |t| {} try writer.print( "{s} }}", .{ color_clear() } ); } pub fn format( value :TokenSet, comptime fmt :[]const u8, options :std.fmt.FormatOptions, writer :anytype ) !void { _ = fmt; _ = options; try writer.print( "{{ {s}", .{ color() } ); for( 0..128 ) |i| { if( !Token.valid( @intCast(i) ) ) { continue; } if( value.contains( Token.get( @intCast(i) ) ) ) { try writer.print( "{s}", .{ switch( i ) { '\t' => ( "␉" ), '\n' => ( "¬" ), // ( "␤" ), '\r' => ( "␍" ), // XXX ' ' => ( "·" ), // ( "•" ), // ( "␠" ), else => ( &@as( u8, @intCast(i) ) )[0..1], } } ); } } if( value.contains( Token.id ) ) { try writer.print( "{s}|{s}id", .{ color_clear(), color() } ); } if( value.contains( Token.eof ) ) { try writer.print( "{s}|{s}␀", .{ color_clear(), color() } ); } try writer.print( "{s} }}", .{ color_clear() } ); } }; // const Key = []const u8; // const StringToFirstMap = std.hash_map.HashMap( Key, First, struct { // fn hash( self :*@This(), key :Key ) u64 { // _ = self; // return std.hash_map.hashString( key ); // } // fn eql( self :*@This(), a :Key, b :Key ) bool { // _ = self; // return std.hash_map.eqlString( a, b ); // } // }, 75 ); // // just use std.StringHashMap( Value ) const First = struct { terminals :std.StringHashMap( TokenSet ), fn destroy( first :*First ) void { first.terminals.deinit(); first.* = undefined; } fn create( allocator :Allocator, grammar :Grammar, epsilon :Epsilon ) !First { var first = First { .terminals = std.StringHashMap( TokenSet ).init( allocator ), }; errdefer { first.destroy(); } for( grammar.list.items ) |assignment| { try first.terminals.put( assignment.identifier, TokenSet.initEmpty() ); } var change = true; while( change ) { change = false; for( grammar.list.items ) |assignment| { const entry = try first.terminals.getOrPutValue( assignment.identifier, TokenSet.initEmpty() ); const tokenSet = first.first_assignment( epsilon, assignment ); if( entry.value_ptr.eql( tokenSet ) ) { continue; } change = true; entry.value_ptr.setUnion( tokenSet ); } } return first; } fn first_assignment( first :*@This(), epsilon :Epsilon, assignment :Assignment ) TokenSet { var tokenSet = TokenSet.initEmpty(); for( assignment.rule_list.items ) |rule| { tokenSet.setUnion( first.first_rule( epsilon, rule ) ); } return tokenSet; } fn first_rule( first :*@This(), epsilon :Epsilon, rule :Rule ) TokenSet { var tokenSet = TokenSet.initEmpty(); for( rule.term_list.items ) |term| { tokenSet.setUnion( first.first_term( &epsilon, &term ) ); if( !epsilon.term( term ) ) { break; } } return tokenSet; } fn first_term( first :*const @This(), _ :*const Epsilon, term :*const Term ) TokenSet { switch( term.atom ) { .identifier => |id| { return first.terminals.get( id ) orelse TokenSet.initEmpty(); }, .literal => |list| { var tokenSet = TokenSet.initEmpty(); for( list.items ) |x| { if( Token.valid( @intCast(x) ) ) { tokenSet.insert( Token.get( @intCast( x ) ) ); } else { // TODO XXX handle this properly.. print( "invalid char '{c}'\n", .{ x } ); if( x == '\r' ) { print( "carriage return \\r\n", .{} ); } @panic( "TODO" ); // probably return an error ? } } return tokenSet; }, .sequence => |list| { return if( list.items.len == 0 ) ( TokenSet.initEmpty() ) else ( TokenSet.initOne( Token.get( @intCast( list.items[0] ) ) ) ); }, } } pub fn format( value :@This(), comptime _ :[]const u8, _ :std.fmt.FormatOptions, writer :anytype ) !void { var it = value.terminals.iterator(); while( it.next() ) |kv| { try writer.print( "{s: <24} : {}\n", .{ kv.key_ptr.*, kv.value_ptr.* } ); } } }; // TODO test grammars with rules and identifiers that are not identifiers or rules // or are directly self referential.. // and with rules that dont end, like "A =| 'x' A | 'y' A | 'z' A ;" // 'A' is infinite, not finite. // .. // terminals / literals are finite // assignments / identifiers / non-terminals that are epsilon are finite // assignments that have at least one finite rule are finite // rules whose terms are all finite are finite // term identifiers ( check epsilon, then ) recurse to assignments // term literals are finite .. const Follow = struct { // std.HashMap( id, followSet ); // followSet = tokenSet + eof; // added eof to Token enum terminals :std.StringHashMap( TokenSet ), pub fn destroy( follow :*@This() ) void { follow.terminals.deinit(); } pub fn create( allocator :Allocator, epsilon :Epsilon, first :First, grammar :Grammar, alpha :[]const u8, ) !Follow { var follow = Follow { .terminals = std.StringHashMap( TokenSet ).init( allocator ), }; errdefer { follow.destroy(); } for( grammar.list.items ) |assignment| { try follow.terminals.put( assignment.identifier, TokenSet.initEmpty() ); } try follow.terminals.put( alpha, TokenSet.initOne( Token.eof ) ); var change = true; // do-while-loop while( change ) { change = false; for( grammar.list.items ) |assignment| { change = try follow.follow_assignment( &first, &epsilon, &assignment ) or change; // // const entry = try follow.terminals.getOrPutValue( assignment.identifier, TokenSet.initEmpty() ); // const tokenSet = follow.follow_assignment( first, epsilon, assignment ); // if( entry.value_ptr.eql( tokenSet ) ) { continue; } // change = true; // entry.value_ptr.setUnion( tokenSet ); } } return follow; } /// updates follow in-place, returns true if any changes were made fn follow_assignment( follow :*Follow, first :*const First, epsilon :*const Epsilon, assignment :*const Assignment ) !bool { const tokenSet = follow.terminals.get( assignment.identifier ) orelse TokenSet.initEmpty(); var change = false; for( assignment.rule_list.items ) |rule| { change = try follow.follow_rule( first, epsilon, &rule, tokenSet ) or change; } return change; } /// updates follow.*.terminals TokenSet values for each terminal /// returns true if any change occurred; implies looping required fn follow_rule( follow :*Follow, first :*const First, epsilon :*const Epsilon, rule :*const Rule, assignment_followSet :TokenSet ) !bool { var change = false; for( rule.term_list.items, 0.. ) | term, i | { var tokenSet = TokenSet.initEmpty(); for( rule.term_list.items[i+1..] ) |next_term| { tokenSet.setUnion( first.first_term( epsilon, &next_term ) ); if( !epsilon.term( next_term ) ) { break; } } else { tokenSet.setUnion( assignment_followSet ); } change = try follow.follow_term( first, epsilon, &term, tokenSet ) or change; } return change; } fn follow_term( follow :*Follow, first :*const First, epsilon :*const Epsilon, term :*const Term, tokenSet :TokenSet ) !bool { switch( term.atom ) { .identifier => |id| { const entry = try follow.terminals.getOrPutValue( id, TokenSet.initEmpty() ); var change = !entry.value_ptr.supersetOf( tokenSet ); entry.value_ptr.setUnion( tokenSet ); switch( term.suffix orelse '1' ) { '+', '*' => { const loopSet = first.first_term( epsilon, term ); change = !entry.value_ptr.supersetOf( loopSet ) or change; entry.value_ptr.setUnion( loopSet ); }, else => {}, } return change; }, .literal => { return false; }, .sequence => { return false; }, } } pub fn format( value :@This(), comptime _ :[]const u8, _ :std.fmt.FormatOptions, writer :anytype ) !void { var it = value.terminals.iterator(); while( it.next() ) |kv| { try writer.print( "{s: <24} : {}\n", .{ kv.key_ptr.*, kv.value_ptr.* } ); } } }; const State = struct { const Piece = struct { id :[]const u8, terms :List( Term ), // Rule, position :usize, // of dot next :TokenSet, // lookahead // TODO XXX the next /lookahead set should be computed as a function pub fn destroy( self :@This() ) void { self.terms.deinit(); } //pub fn create( allocator :Allocator, id :[]const u8, terms :[]Term, position :u8, next :TokenSet ) !Piece { // var term_list = List( Term ).init( allocator ); // try term_list.appendSlice( terms ); // return .{ // .id = id, // .terms = term_list, // .position = position, // .next = next, // }; //} pub fn format( value :@This(), comptime _ :[]const u8, _ :std.fmt.FormatOptions, writer :anytype ) !void { try writer.print( "<{s}> =", .{ value.id } ); for( value.terms.items, 0.. ) |term, i| { try writer.print( "{s}", .{ if( value.position == i ) dot() else " " } ); try writer.print( "{}", .{ term } ); } try writer.print( "{s};", .{ if( value.position == value.terms.items.len ) dot() else " " } ); // try writer.print( "\n", .{} ); try writer.print( "{s}", .{ value.next } ); try writer.print( "\n", .{} ); } pub fn eql( piece :Piece, other :Piece ) bool { return std.mem.eql( u8, piece.id, other.id ) and piece.terms.items.len == other.terms.items.len and for( piece.terms.items, other.terms.items ) |a,b| { if( !a.eql( b ) ) { break false; } } else true and piece.position == other.position // and piece.lookahead.eql( other.lookahead ) ; } pub fn lt( piece :Piece, other :Piece ) bool { if( !std.mem.eql( u8, piece.id, other.id ) ) { return std.mem.lessThan( u8, piece.id, other.id ); } // var i :usize = 0; // while( i < piece.terms.items.len ) { // defer { i += 1; } // if( !( i < other.terms.items.len ) ) { return true; } // if( ) // } switch( compareListLexicographic( Term, Term.lt, null, piece.terms, other.terms ) ) { .lt => { return true; }, .eq => {}, // do nothing, continue /fallthru .gt => { return false; }, } if( piece.terms.items.len != other.terms.items.len ) { return ( piece.terms.items.len < other.terms.items.len ); } if( piece.position != other.position ) { return ( piece.position < other.position ); } TODO compare next TokenSet } }; //allocator :Allocator, items :List( Piece ), pub fn destroy( self :*@This() ) void { for( self.items.items ) |piece| { piece.destroy(); } self.items.deinit(); self.* = undefined; } pub fn create( allocator :Allocator ) @This() { return .{ .items = List( Piece ).init( allocator ) }; } pub fn format( value :@This(), comptime _ :[]const u8, _ :std.fmt.FormatOptions, writer :anytype ) !void { for( value.items.items ) |item| { try writer.print( "{}", .{ item } ); } try writer.print( "----\n", .{} ); } pub fn append( state :*State, piece :Piece ) !void { for( state.items.items ) |p2| { if( piece.eql( p2 ) ) { break; } } else { return try state.items.append( piece ); } return piece.destroy(); } }; fn compareListLexicographic( T :type, comptime lt :fn(T,T)bool, comptime eql :?fn(T,T)bool, list :List(T), other :List(T) ) enum { lt, eq, gt } { const eql_ = eql orelse struct { fn eql_( a :T, b :T ) bool { const lt_ = ( lt( a, b ) ); const gt_ = ( lt( b, a ) ); return ( !lt_ and !gt_ ); } }.eql_; const len = @min( list.items.len, other.items.len ); for( list.items[0..len], other.items[0..len] ) |a,b| { if( eql_( T, a, b ) ) { continue; } return ( if( lt( a, b ) ) ( .lt ) else ( .gt ) ); } if( list.items.len == other.items.len ) { return .eq; } return ( if( list.items.len < other.items.len ) ( .lt ) else ( .gt ) ); } test "state" { var grammar = try Parser.parse( tst.allocator, grammar_grammar_v3 ); defer { grammar.destroy(); } var epsilon = try Epsilon.create( tst.allocator, grammar ); defer { epsilon.destroy(); } var first = try First.create( std.testing.allocator, grammar, epsilon ); defer { first.destroy(); } var follow = try Follow.create( std.testing.allocator, epsilon, first, grammar, "grammar" ); defer { follow.destroy(); } const alpha = "grammar"; const assignment = for( grammar.list.items ) |assignment| { if( std.mem.eql( u8, alpha, assignment.identifier ) ) { break assignment; } } else unreachable; const id = assignment.identifier; var state = State.create( tst.allocator ); defer { state.destroy(); } for( assignment.rule_list.items ) |rule| { var terms = List( Term ).init( tst.allocator ); errdefer { terms.deinit(); } try terms.appendSlice( rule.term_list.items ); const piece = State.Piece { .id = id, .terms = terms, .position = 0, .next = TokenSet.initOne( Token.eof ), }; try state.append( piece ); } try expand( &state, tst.allocator, follow, first, epsilon, grammar ); print( "{}\n", .{ state } ); } test "state-cycle" { const src = \\ =| ')' ; \\ =| '}' ; ; const alpha = "alpha"; var grammar = try Parser.parse( tst.allocator, src ); defer { grammar.destroy(); } var epsilon = try Epsilon.create( tst.allocator, grammar ); defer { epsilon.destroy(); } var first = try First.create( std.testing.allocator, grammar, epsilon ); defer { first.destroy(); } var follow = try Follow.create( std.testing.allocator, epsilon, first, grammar, alpha ); defer { follow.destroy(); } const assignment = for( grammar.list.items ) |assignment| { if( std.mem.eql( u8, alpha, assignment.identifier ) ) { break assignment; } } else { return error.alpha_not_in_grammar; }; const id = assignment.identifier; var state = State.create( tst.allocator ); defer { state.destroy(); } for( assignment.rule_list.items ) |rule| { var terms = List( Term ).init( tst.allocator ); errdefer { terms.deinit(); } try terms.appendSlice( rule.term_list.items ); const piece = State.Piece { .id = id, .terms = terms, .position = 0, .next = TokenSet.initOne( Token.eof ), }; try state.append( piece ); } try expand( &state, tst.allocator, follow, first, epsilon, grammar ); print( "{}\n", .{ state } ); } /// state is the thing /// allocator for heap /// follow is the follow_sets of identifiers - XXX not sure if this is necessary since lr1 uses lookahead... /// first is the first_sets of identifiers /// epsilon is the set of possibly empty identifiers /// grammar is the full grammar /// ---- /// this function takes the existing rules/pieces in the state, /// and adds the rules that can be reached without consuming terminals. /// so if the 'dot' is before an identifier, `piece`s for each rule of that identifier are added to `state`. /// each `piece` has the rule-`identifier`, the rule-`terms`, the current `position` within those terms, and /// the `lookahead` set to expect after consuming all the terms. fn expand( state :*State, allocator :Allocator, follow :Follow, first :First, epsilon :Epsilon, grammar :Grammar ) !void { _ = follow; ////step forward thru each epsilon term ////add pieces for each position, that is not already added ////the lookahead set will be either //// a) the first set of the next non-epsilon term, or //// b) the follow set of the current piece. ////then continue with the unclosed pieces var i :usize = 0; while( i < state.items.items.len ) { defer { i += 1; } const piece = state.items.items[i]; for( piece.terms.items[piece.position..], piece.position.. ) |term, pos| { const piece_la = lookahead( &first, &epsilon, piece.next, piece.terms.items[pos+1..] ); switch( term.atom ) { .identifier => |id| { // grammar.getIdentifier( id ); const rules = for( grammar.list.items ) |assignment| { if( std.mem.eql( u8, assignment.identifier, id ) ) { break assignment.rule_list.items; } } else ( &[0]Rule {} ); for( rules ) |rule| { var terms = List( Term ).init( allocator ); errdefer { terms.deinit(); } try terms.appendSlice( rule.term_list.items ); // const la = lookahead( &first, &epsilon, piece_la, terms.items ); var la = piece_la; if( term.suffix == '+' or term.suffix == '*' ) { la.setUnion( first.terminals.get( id ) orelse TokenSet.initEmpty() ); } try state.append( .{ .id = id, .terms = terms, .position = 0, .next = la, } ); } }, .literal, .sequence => {}, } if( !epsilon.term( term ) ) { break; } var terms = List( Term ).init( allocator ); errdefer { terms.deinit(); } try terms.appendSlice( piece.terms.items ); // const la = lookahead( &first, &epsilon, piece.next, piece.terms.items[pos+1..] ); try state.append( .{ .id = piece.id, .terms = terms, .position = pos+1, .next = piece.next, // piece_la, } ); } } } /// return the lookahead set, given a remaining slice of terms and a parent lookahead set. fn lookahead( first :*const First, epsilon :*const Epsilon, outer_lookahead :TokenSet, terms :[]Term, ) TokenSet { var la = TokenSet.initEmpty(); for( terms ) |term| { const first_of_term = first.first_term( epsilon, &term ); la.setUnion( first_of_term ); if( !epsilon.term( term ) ) { break; } } else { la.setUnion( outer_lookahead ); } return la; } // testing const tst = std.testing; const expect = tst.expect; const expectEqual = std.testing.expectEqual; test "follow" { var grammar = try Parser.parse( tst.allocator, grammar_grammar_v3 ); defer { grammar.destroy(); } var epsilon = try Epsilon.create( tst.allocator, grammar ); defer { epsilon.destroy(); } var first = try First.create( std.testing.allocator, grammar, epsilon ); defer { first.destroy(); } var follow = try Follow.create( std.testing.allocator, epsilon, first, grammar, "grammar" ); defer { follow.destroy(); } // print( "follow sets\n{}\n", .{ follow } ); } test "first" { var grammar = try Parser.parse( tst.allocator, grammar_grammar_v3 ); defer { grammar.destroy(); } var epsilon = try Epsilon.create( tst.allocator, grammar ); defer { epsilon.destroy(); } var first = try First.create( std.testing.allocator, grammar, epsilon ); defer { first.destroy(); } // print( "first sets\n{}\n", .{ first } ); } test "epsilon" { var grammar = try Parser.parse( tst.allocator, grammar_grammar_v3 ); defer { grammar.destroy(); } var epsilon = try Epsilon.create( tst.allocator, grammar ); defer { epsilon.destroy(); } // try expect( try Epsilon.identifier( tst.allocator, grammar, "grammar" ) ); try expect( epsilon.identifier( "grammar" ) ); // print( "epsilon: {}\n", .{ epsilon } ); } test "grammar; parse, print, free" { var grammar = try Parser.parse( tst.allocator, grammar_grammar_v3 ); defer { grammar.destroy(); } // print( "var grammar_grammar_v3: {}\n", .{ grammar } ); } test "grammar-grammar.v3.txt" { const txt = @embedFile( "grammar-grammar.v3.txt" ); var grammar = try Parser.parse( tst.allocator, txt ); defer { grammar.destroy(); } // print( "grammar-grammar.v3.txt: {}\n", .{ grammar } ); } test "pan-grammar" { const txt = @embedFile( "./pan-grammar.lr1-v3.txt" ); // print( "grammar-grammar.v3.txt:\n", .{} ); try test_go( txt, false, "program" ); } fn test_go( src :[]const u8, comptime go :bool, start :[]const u8 ) !void { const writer = if( go ) ( std.io.getStdOut().writer() ) else ( std.io.null_writer ); var grammar = try Parser.parse( tst.allocator, src ); defer { grammar.destroy(); } try writer.print( "grammar: {}\n", .{ grammar } ); var epsilon = try Epsilon.create( tst.allocator, grammar ); defer { epsilon.destroy(); } try writer.print( "epsilon: {}\n", .{ epsilon } ); var first = try First.create( tst.allocator, grammar, epsilon ); defer { first.destroy(); } try writer.print( "first:\n{}\n", .{ first } ); var follow = try Follow.create( tst.allocator, epsilon, first, grammar, start ); defer { follow.destroy(); } try writer.print( "follow:\n{}\n", .{ follow } ); } // TODO add comparisons against first and follow sets // in-epsilon == nullable // eof-in-follow == endable test "https://smlweb.cpsc.ucalgary.ca/vital-stats.php?grammarfile=example-grammars/ll1-lr1-1.cfg" { const src = \\ =| "(" \\ | "sq)" \\ | ")" ; \\ =| ")" \\ | "sq)" ; \\ =| ; \\ =| ; \\ =| ; ; try test_go( src, false, "S" ); } test "https://smlweb.cpsc.ucalgary.ca/vital-stats.php?grammarfile=example-grammars/oth-oth-5.cfg" { const src = \\ =| "b" ; \\ =| "x" \\ | ; \\ =| "x" ; \\ =| "x" \\ | ; ; try test_go( src, false, "A" ); } test "https://smlweb.cpsc.ucalgary.ca/vital-stats.php?grammarfile=example-grammars/oth-oth-1.cfg" { const src = \\ =| "a" \\ | "a" ; \\ =| "a" \\ | ; \\ =| "a" \\ | "b" ; ; try test_go( src, false, "A" ); } test "https://smlweb.cpsc.ucalgary.ca/vital-stats.php?grammarfile=example-grammars/oth-oth-0.cfg" { const src = \\ =| "d" ; \\ =| "a" \\ | "b" "c" ; \\ =| "d" \\ | ; ; try test_go( src, false, "S" ); } test "https://smlweb.cpsc.ucalgary.ca/vital-stats.php?grammarfile=example-grammars/ll2-lr2-4.cfg" { const src = \\ =| ; \\ =| "a" ; \\ =| "b" \\ | ; \\ =| "c" \\ | ; \\ =| "a" \\ | ; ; try test_go( src, false, "A" ); } ////test "https://smlweb.cpsc.ucalgary.ca/vital-stats.php?grammarfile=example-grammars/oth-lr2-0.cfg" { //// const src = //// \\ ~~=| "(" //// \\ | "(" ")" ~~//// \\ | "sq)" //// \\ | "(" ; //// \\ =| ")" //// \\ | "sq)" ; //// \\ =|~~~~ ; //// \\ =| ; //// \\ =| ; //// ; //// try test_go( src, false, "S" ); //// //// set_color( .off ); //// defer { set_color( .on ); } //// //// // snapshot testing //// var list = List( u8 ).init( tst.allocator ); //// defer { list.deinit(); } //// var writer = list.writer(); //// //// var grammar = try Parser.parse( tst.allocator, src ); //// defer { grammar.destroy(); } //// try writer.print( "{}", .{ grammar } ); //// try tst.expectEqualStrings( list.items, //// \\ ~~= //// \\| "(" //// \\| "(" ")" ~~//// \\| "sq)" //// \\| "(" //// \\; //// \\ = //// \\| ")" //// \\| "sq)" //// \\; //// \\ =|~~~~ ; //// \\ =| ; //// \\ =| ; //// \\ //// ); //// list.clearRetainingCapacity(); //// //// var epsilon = try Epsilon.create( tst.allocator, grammar ); //// defer { epsilon.destroy(); } //// try writer.print( "{}", .{ epsilon } ); //// try tst.expectEqualStrings( list.items, //// \\{ , , } //// ); //// list.clearRetainingCapacity(); //// //// var first = try First.create( tst.allocator, grammar, epsilon ); //// defer { first.destroy(); } //// try writer.print( "{}", .{ first } ); //// try tst.expectEqualStrings( list.items, //// \\S : { ( s } //// \\X : { ) s } //// \\E : { } //// \\A : { } //// \\F : { } //// \\ //// ); //// list.clearRetainingCapacity(); //// //// var follow = try Follow.create( tst.allocator, epsilon, first, grammar, "S" ); //// defer { follow.destroy(); } //// try writer.print( "{}", .{ follow } ); //// try tst.expectEqualStrings( list.items, //// \\S : { ␀ } //// \\X : { ␀ } //// \\E : { ) s } //// \\A : { () s } //// \\F : { ( s } //// \\ //// ); //// list.clearRetainingCapacity(); ////} test "parser abc" { var parser = Parser { .allocator = tst.allocator, .src = "abc xyz", }; try expect( parser.more() ); try expectEqual( parser.peek(), 'a' ); parser.step(); try expectEqual( parser.peek(), 'b' ); try parser.consume( 'b' ); try expectEqual( parser.peek(), 'c' ); try parser.consume( 'c' ); try expect( parser.more() ); var i :u4 = 0; while( parser.more() ) { i += switch( parser.peek().? ) { 'x','y','z' => 1, else => 0, }; parser.step(); } try expectEqual( i, 3 ); try expect( !parser.more() ); } test "token bytes" { // min, max try expectEqual( Token.@" ", Token.get( ' ' ) ); try expectEqual( Token.int( Token.@" " ), ' ' ); try expectEqual( Token.@"~", Token.get( '~' ) ); try expectEqual( Token.int( Token.@"~" ), '~' ); // zig identifier escapes try expectEqual( Token.@"\\", Token.get( '\\' ) ); try expectEqual( Token.int( Token.@"\\" ), '\\' ); try expectEqual( Token.@"\"", Token.get( '\"' ) ); try expectEqual( Token.int( Token.@"\"" ), '\"' ); // 0,9, A,Z, a,z, try expectEqual( Token.@"0", Token.get( '0' ) ); try expectEqual( Token.int( Token.@"0" ), '0' ); try expectEqual( Token.@"9", Token.get( '9' ) ); try expectEqual( Token.int( Token.@"9" ), '9' ); try expectEqual( Token.A, Token.get( 'A' ) ); try expectEqual( Token.int( Token.A ), 'A' ); try expectEqual( Token.@"Z", Token.get( 'Z' ) ); try expectEqual( Token.int( Token.@"Z" ), 'Z' ); try expectEqual( Token.a, Token.get( 'a' ) ); try expectEqual( Token.int( Token.a ), 'a' ); try expectEqual( Token.@"z", Token.get( 'z' ) ); try expectEqual( Token.int( Token.@"z" ), 'z' ); } test "token size" { try expect( @sizeOf( Token ) <= @sizeOf( u7 ) ); try expectEqual( ' ', @as( u7, @intFromEnum( Token.@" " ) ) ); try expectEqual( Token.@" ", @as( Token, @enumFromInt( ' ' ) ) ); try expectEqual( '\t' , Token.@"\t" .int() ); try expectEqual( '\n' , Token.@"\n" .int() ); try expectEqual( ' ' , Token.@" " .int() ); try expectEqual( '~' , Token.@"~" .int() ); } // data const grammar_grammar_v3 = \\ \\# grammar grammar \\ \\ =| <_> * ; \\ \\<_> =| * ; \\ // \\ =| ' ' | '\t' | '\n' | '\r' | ; \\ =| ' ' | '\t' | '\n' | ; \\ \\ =| '#' * '\n' ; \\ \\# any single printing ascii character other than newline \\ = \\# alphanum \\| 'abcdefghijklmnopqrstuvwxyz' \\| 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \\| '0123456789' \\# whitespace // \\| ' \t' # '\n\r' \\| ' \t' # '\n' \\# symbols \\| '`~!@#$%^&*()-_=+' \\| '[{]}\\|' \\| ';:' | '\'' | '"' \\| ',<.>/?' \\; \\ \\ =| '=' <_> + ';' <_> ; \\ \\ =| '<' * '>' <_> ; \\ \\ = \\| '_' \\| 'abcdefghijklmnopqrstuvwxyz' \\| 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \\; \\ \\ = \\| '_' \\| 'abcdefghijklmnopqrstuvwxyz' \\| 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \\| '0123456789' \\; \\ \\ =| '|' <_> * ; \\ \\ =| ? <_> ; \\ \\ = \\| \\| \\; \\ \\ =| '\'' + '\'' ; \\ \\ = \\| '\\' \\| \\; \\ \\# - tab, (newline), backslash, and apostrophe \\ = \\# alphanum \\| 'abcdefghijklmnopqrstuvwxyz' \\| 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \\| '0123456789' \\# whitespace \\| ' ' # '\t\n\r' \\# symbols \\| '`~!@#$%^&*()-_=+' \\| '[{]}|' # '\\' \\| ';:' | '"' # '\'' \\| ',<.>/?' \\; \\ \\ =| '?' | '+' | '*' ; \\ \\# <_> =| '_' ; \\ ; // ☯ λ • // const ASDF = "\x1b[48;2;0;0;0;38;2;128;255;255m"; // cyan-on-black const ASDF = "\x1b[38;2;64;255;255m"; // foreground-cyan const FDSA = "\x1b[0m"; // clear colors, reset const DOT = "•" ; const COLORED_DOT = ASDF ++ "•" ++ FDSA ; // XXX this is a HACK, but works for now.. how to improve ? var _use_color = true; fn set_color( on :enum { on, off } ) void { _use_color = ( on == .on ); } fn use_color() bool { if( !_use_color ) { return false; } var buffer :[256]u8 = undefined; var fba = std.heap.FixedBufferAllocator.init( &buffer ); const str = std.process.getEnvVarOwned( fba.allocator(), "NO_COLOR" ) catch ""; return ( str.len == 0 ); } /// cyan "•", https://no-color.org fn dot() []const u8 { return ( if( use_color() ) COLORED_DOT else DOT ); } fn color () []const u8 { return ( if( use_color() ) ASDF else "" ); } fn color_clear () []const u8 { return ( if( use_color() ) FDSA else "" ); } ////test "item progress, colored dot" { //// const writer = std.io.getStdOut().writer(); //// try writer.print( " =| <_>{s}* ;\n", .{ dot() } ); //// try writer.print( " =| <_>{s}{s}{s}* ;\n", .{ //// color(), DOT, color_clear(), //// } ); ////}