// fish // while inotifywait grammar-grammar.v2.zig ; // clear ; // date --iso-8601=seconds ; // echo ; // zig test grammar-grammar.v2.zig ; // echo ; // end const std = @import( "std" ); test "parse" { const rules = try parse( std.testing.allocator, "S =| 'a' ;", null ); defer { parse_deinit( rules, std.testing.allocator ); } try std.testing.expect( rules.len == 1 ); try std.testing.expectEqualStrings( rules[0].name, "S" ); try std.testing.expect( rules[0].options.len == 1 ); try std.testing.expect( rules[0].options[0].len == 1 ); try std.testing.expect( rules[0].options[0][0].term.inverted == false ); try std.testing.expectEqualStrings( rules[0].options[0][0].term.escaped, "a" ); // try std.testing.expectEqualDeep( rules, &.{ .{ // .name = &"S", // .options = &.{ // &.{ // .{ .term = .{ // .inverted = false, // .escaped = "a" // } } // } // } // } } ); // std.debug.print( "success\n", .{} ); // print_rules( rules ); } test "grammar" { const all = std.testing.allocator; var err_buf :[128]u8 = undefined; var err_slc :[]u8 = err_buf[0..]; // const rules = try parse( std.testing.allocator, @embedFile( "grammar-grammar.v2.txt" ), null ); const rules = parse( std.testing.allocator, @embedFile( "grammar-grammar.v2.txt" ), // grammar_grammar, &err_slc, ) catch |e| { std.debug.print( "grammar error: {s}\n", .{ @errorName( e ) } ); std.debug.print( "{s}", .{ err_slc } ); return e; }; defer { parse_deinit( rules, all ); } // print_rules_2( rules ); var buf :[ grammar_grammar.len + 10 ]u8 = undefined; // const slc :[]u8 = buf[0..]; var fixed_buffer_stream = std.io.fixedBufferStream( buf[0..] ); const writer = fixed_buffer_stream.writer(); const ByteStream = std.io.FixedBufferStream( []u8 ); try write_rules( *ByteStream, ByteStream.WriteError, ByteStream.write, writer, rules, ); const written = fixed_buffer_stream.getWritten(); // @compileLog( written ); // @as( []u8, [runtime value] ) // thought it'd be `usize`, but a slice `[]u8` is better try std.testing.expectEqualStrings( grammar_grammar, written ); const simple_first = try first_set( all, rules, "simple" ); // simple_first.filter.print(); // 'a' try std.testing.expect( simple_first.filter.has( 'a' ) ); try std.testing.expect( !simple_first.epsilon ); const omega_first = try first_set( all, rules, "omega" ); // omega_first.filter.print(); // 0x7f try std.testing.expect( omega_first.filter.has( 0x7f ) ); try std.testing.expect( !omega_first.epsilon ); const hex_first = try first_set( all, rules, "hex" ); // hex_first.filter.print(); // '0' .. '9' 'A' .. 'F' 'a' .. 'f' try std.testing.expect( hex_first.filter.has( '0' ) ); try std.testing.expect( hex_first.filter.has( '9' ) ); try std.testing.expect( hex_first.filter.has( 'A' ) ); try std.testing.expect( hex_first.filter.has( 'F' ) ); try std.testing.expect( hex_first.filter.has( 'a' ) ); try std.testing.expect( hex_first.filter.has( 'f' ) ); try std.testing.expect( !hex_first.epsilon ); const escaped_first = try first_set( all, rules, "escaped" ); // escaped_first.filter.print(); // .. everything except ''' try std.testing.expect( !escaped_first.filter.has( '\'' ) ); try std.testing.expect( !escaped_first.epsilon ); var escaped_inverted = escaped_first; escaped_inverted.filter.invert(); // escaped_inverted.filter.print(); // ''' try std.testing.expect( escaped_inverted.filter.has( '\'' ) ); const hash_first = try first_set( all, rules, "HASH" ); try std.testing.expect( hash_first.filter.has( '#' ) ); try std.testing.expect( !hash_first.epsilon ); const start_first = try first_set( all, rules, "start" ); // start_first.filter.print(); try std.testing.expect( start_first.filter.has( ' ' ) ); try std.testing.expect( start_first.filter.has( '\t' ) ); try std.testing.expect( start_first.filter.has( '\r' ) ); try std.testing.expect( start_first.filter.has( '\n' ) ); try std.testing.expect( start_first.filter.has( '#' ) ); try std.testing.expect( start_first.filter.has( 'A' ) ); try std.testing.expect( start_first.filter.has( 'Z' ) ); try std.testing.expect( start_first.filter.has( '_' ) ); try std.testing.expect( start_first.filter.has( 'a' ) ); try std.testing.expect( start_first.filter.has( 'z' ) ); try std.testing.expect( start_first.epsilon ); // TODO consider.. } test "single recursive rule" { // const all = std.testing.allocator; const source = \\ s = \\ | '(' s. ')' s. \\ | # epsilon \\ ; ; const rules = try parse( std.testing.allocator, source, null ); defer { parse_deinit( rules, std.testing.allocator ); } const s_first = try first_set( std.testing.allocator, rules, "s" ); try std.testing.expect( s_first.filter.has( '(' ) ); try std.testing.expect( s_first.epsilon ); } test "first_set of recusive rules" { const source = \\ s = \\ | '(' o. ')' s. \\ | # epsilon \\ ; \\ o = \\ | '[' s. ']' o. \\ | # epsilon \\ ; ; const rules = try parse( std.testing.allocator, source, null ); defer { parse_deinit( rules, std.testing.allocator ); } const s_first = try first_set( std.testing.allocator, rules, "s" ); try std.testing.expect( s_first.filter.has( '(' ) ); } test "first_set of recursive rules v2" { const s2 = \\ a =| b? 'a' ; \\ b =| a? 'b' ; ; const r2 = try parse( std.testing.allocator, s2, null ); defer { parse_deinit( r2, std.testing.allocator ); } // if( true ) { return error.SkipZigTest; } // // if( true ) { return error.infinite_loop; } // XXX crash is an infinite loop here // const s2_a_first = try first_set( std.testing.allocator, r2, "a" ); // XXX // // https://ziglang.org/documentation/master/std/#std.hash_map.HashMap var epsilon_rules = try Epsilon.get_epsilon_rules( std.testing.allocator, r2 ); defer { epsilon_rules.deinit(); } var r2_first_sets = try FirstSet.get_first_sets( std.testing.allocator, r2, epsilon_rules ); defer { r2_first_sets.deinit(); } const s2_a_first = r2_first_sets.get( "a" ) orelse { return error.bad_parse; }; try std.testing.expect( s2_a_first.has( 'a' ) ); try std.testing.expect( s2_a_first.has( 'b' ) ); } const one_usize :usize = 1; // `std.hash_map.StringHashMap( void )` const StringToFirstMap = std.hash_map.HashMap( []const u8, First, struct { const K = []const u8; fn hash( self :*@This(), key :K ) u64 { _ = self; return std.hash_map.hashString( key ); } fn eql( self :*@This(), a :K, b :K ) bool { _ = self; return std.hash_map.eqlString( a, b ); } }, 75 ); // fn epsilon( all :std.mem.Allocator, rules :[]Rule, rule_name :[]const u8 ) !First { // var map = StringToFirstMap.init( all ); // defer { map.deinit(); } // return first_set_helper( rules, rule_name, &map ); // } // fn epsilon_helper( rules :[]Rule, rule_name :[]const u8 ) !bool { // const rule :Rule = for( rules ) |rule| { // if( std.mem.eql( u8, rule.name, rule_name ) ) { // break rule; // } // } else return error.no_rule_with_name; // options: for( rule.options ) |option| { // exprs: for( option ) |expr| { // switch( expr ) { // .item => |item| { // if( item.suffix == .question or item.suffix == .star ) { // continue :exprs; // } else { // continue :options; // } // }, // .term => { continue :options; }, // .byte => { continue :options; }, // } // } // return true; // } // return false; // } fn first_set_( all :std.mem.Allocator, rules :[]Rule, rule_name :[]const u8 ) !First { var epsilon_rules = try Epsilon.get_epsilon_rules( all, rules ); defer { epsilon_rules.deinit(); } var first_sets = try FirstSet.get_first_sets( all, rules, epsilon_rules ); defer { first_sets.deinit(); } const epsilon = epsilon_rules.contains( rule_name ); const filter = first_sets.get( rule_name ) orelse { return error.no_rule; }; return .{ .epsilon = epsilon, .filter = filter, }; } fn first_set( allocator :std.mem.Allocator, rules :[]Rule, rule_name :[]const u8, // seen :?*StringToFirstMap ) !First { // var y = std.heap.ArenaAllocator.init( std.heap.page_allocator ); // XXX // var y = std.heap.ArenaAllocator.init( std.testing.allocator ); var y = std.heap.ArenaAllocator.init( allocator ); defer { y.deinit(); } const all :std.mem.Allocator = y.allocator(); return try first_set_( all, rules, rule_name ); } fn _first_set( rules :[]Rule, rule_name :[]const u8, // seen :?*StringToFirstMap ) !First { // _ = seen; var first = First {}; const rule :Rule = for( rules ) |rule| { if( std.mem.eql( u8, rule.name, rule_name ) ) { break rule; } } else { return error.no_rule_with_name; }; // if( seen.*.contains( rule.name ) ) { // return error.previously_seen_rule; // } else { // seen.*.put( rule.name, rule ); // } for( rule.options ) |option| { if( option.len == 0 ) { first.epsilon = true; } var epsilon = true; for( option ) |expr| { switch( expr ) { .term => |t| { var term_first = First {}; for( t.escaped ) |esc| { // XXX // std.debug.print( "esc 0x{x} '{c}'\n", .{ esc, esc } ); term_first.filter.add( esc ); } if( (t.inverted and !term_first.epsilon) or (!t.inverted and term_first.epsilon) ) { term_first.filter.invert(); } first.filter.add_filter( term_first.filter ); epsilon = false; break; }, .item => |i| { const item_first = try _first_set( rules, i.name ); // const item_first = seen.get( i.name ) orelse b: { // const item_first = try _first_set( rules, i.name ); // try seen.put( i.name, item_first ); // break :b item_first; // }; for( first.filter.flags[0..], item_first.filter.flags[0..] ) |*f, ff| { if( f.* & ff != 0 ) { return error.ambiguous; } f.* |= ff; } switch( i.suffix ) { .question, .star, => continue, .dot, .plus, => { if( !item_first.epsilon ) { epsilon = false; break; } }, } }, .byte => |b| { epsilon = false; first.filter.add( b ); break; }, } } if( epsilon ) { first.epsilon = true; } } return first; } const Epsilon = struct { pub const RuleSet = std.hash_map.StringHashMap( void ); // const RuleSet = std.hash_map.HashMap( []const u8, void, struct { // const K = []const u8; // pub fn hash ( _ :@This(), key :K ) u64 { return std.hash_map.hashString( key ); } // pub fn eql ( _ :@This(), a :K, b :K ) bool { return std.hash_map.eqlString( a, b ); } // }, 80 ); /// an expression is epsilon if its an `item` with /// - suffix as question or star /// or /// - a rule that is epsilon (recursion) fn expr_is_epsilon( expr :*const Expr, epsilon_rules :RuleSet ) bool { switch( expr.* ) { .term => { return false; }, .item => |item| { if( item.suffix == .question or item.suffix == .star ) { return true; } return epsilon_rules.contains( item.name ); }, .byte => { return false; }, } } /// an option is epsilon if *every* expression is epsilon fn option_is_epsilon( exprs :*const []const Expr, epsilon_rules :RuleSet ) bool { // if( exprs.*.len == 0 ) { return true; } for( exprs.* ) |expr| { if( !expr_is_epsilon( &expr, epsilon_rules ) ) { return false; } } return true; } /// a rule is epsilon if *any* option is epsilon fn rule_is_epsilon( rule :*const Rule, epsilon_rules :RuleSet ) bool { for( rule.options ) |option| { if( option_is_epsilon( &option, epsilon_rules ) ) { return true; } } return false; } pub fn get_epsilon_rules( all :std.mem.Allocator, rules :[]const Rule ) !RuleSet { var set = RuleSet.init( all ); errdefer { set.deinit(); } var update = true; while( update ) { update = false; for( rules ) |rule| { if( set.contains( rule.name ) ) { continue; } if( rule_is_epsilon( &rule, set ) ) { // try set.put( rule ); // needs the void slot try set.put( rule.name, void {} ); update = true; } } } return set; } test "ABc" { const all = std.testing.allocator; const source = \\A =| B. ; \\B =| A. | 'c' ; ; const rules = try parse( all, source, null ); defer { parse_deinit( rules, all ); } var epsilon_rules = try Epsilon.get_epsilon_rules( all, rules ); defer { epsilon_rules.deinit(); } try std.testing.expect( epsilon_rules.count() == 0 ); } test "IJKx" { const all = std.testing.allocator; const source = \\I = # epsilon-3 \\| J. # epsilon-2 \\| K. \\; \\J = # epsilon-1 \\| # epsilon-0 \\| I. # epsilon-4 .. \\; \\K =| 'x' ; ; const rules = try parse( all, source, null ); defer { parse_deinit( rules, all ); } var epsilon_rules = try Epsilon.get_epsilon_rules( all, rules ); defer { epsilon_rules.deinit(); } try std.testing.expect( epsilon_rules.count() == 2 ); try std.testing.expect( epsilon_rules.contains( "I" ) ); try std.testing.expect( epsilon_rules.contains( "J" ) ); try std.testing.expect(!epsilon_rules.contains( "K" ) ); } test "ab" { const all = std.testing.allocator; const s2 = \\ a =| b? 'a' ; \\ b =| a* 'b' ; ; const rules = try parse( all, s2, null ); defer { parse_deinit( rules, all ); } var epsilon_rules = try Epsilon.get_epsilon_rules( all, rules ); defer { epsilon_rules.deinit(); } try std.testing.expect( epsilon_rules.count() == 0 ); // std.debug.print( "\nfin ab\n\n", .{} ); } }; test Epsilon { _ = Epsilon {}; } const FirstSet = struct { // FirstSet, second attempt pub const FirstSetMap = std.hash_map.StringHashMap( Filter ); fn expr_filter_set( expr :*const Expr, epsilon_rules :Epsilon.RuleSet, rule_filters :FirstSetMap ) !Filter { _ = epsilon_rules; switch( expr.* ) { .term => |term| { var term_filter = Filter {}; for( term.escaped ) |esc| { term_filter.add( esc ); } if( term.inverted ) { term_filter.invert(); } return term_filter; }, .item => |item| { return rule_filters.get( item.name ) orelse { return error.unknown_rule; }; }, .byte => |byte| { var byte_filter = Filter {}; byte_filter.add( byte ); return byte_filter; }, } } fn option_filter_set( exprs :*const []const Expr, epsilon_rules :Epsilon.RuleSet, rule_filters :FirstSetMap ) !Filter { var option_filter = Filter {}; for( exprs.* ) |expr| { const expr_filter = try expr_filter_set( &expr, epsilon_rules, rule_filters ); option_filter.add_filter( expr_filter ); if( !Epsilon.expr_is_epsilon( &expr, epsilon_rules ) ) { break; } } return option_filter; } fn rule_filter_set( rule :*const Rule, epsilon_rules :Epsilon.RuleSet, rule_filters :FirstSetMap ) !Filter { var rule_filter = Filter {}; for( rule.options ) |exprs| { const option_filter = try option_filter_set( &exprs, epsilon_rules, rule_filters ); rule_filter.add_filter( option_filter ); } return rule_filter; } pub fn get_first_sets( all :std.mem.Allocator, rules :[]const Rule, epsilon_rules :Epsilon.RuleSet ) !FirstSetMap { // TODO // calculate epsilon rules // calculate immediate first sets // calculate recursive first sets // return complete first sets var filter_sets = FirstSetMap.init( all ); errdefer { filter_sets.deinit(); } var update = true; for( rules ) |rule| { try filter_sets.put( rule.name, Filter {} ); } while( update ) { update = false; for( rules ) |rule| { const rule_filter = try rule_filter_set( &rule, epsilon_rules, filter_sets ); if( !std.meta.eql( rule_filter, filter_sets.get( rule.name ).? ) ) { // try filter_sets.put( rule.name, rule_filter ); // filter_sets.putAssumeCapacity( rule.name, rule_filter ); filter_sets.getPtr( rule.name ).?.* = rule_filter; update = true; } } } return filter_sets; } fn test_helper( all :std.mem.Allocator, source :[]const u8 ) !void { const rules = try parse( all, source, null ); defer { parse_deinit( rules, all ); } var epsilon_rules = try Epsilon.get_epsilon_rules( all, rules ); defer { epsilon_rules.deinit(); } var fsm :FirstSetMap = try get_first_sets( all, rules, epsilon_rules ); defer { fsm.deinit(); } // var iterator = fsm.iterator(); // while( iterator.next() ) |entry| { // std.debug.print( "rule: {s}\n", .{ entry.key_ptr.* } ); // std.debug.print( "filter: ", .{} ); // entry.value_ptr.*.print(); // } // XXX unordered for( rules ) |rule| { std.debug.print( "rule: {s}\n", .{ rule.name } ); std.debug.print( "filt: ", .{} ); fsm.get( rule.name ).?.print(); } std.debug.print( "\n", .{} ); } test "alpha" { const source = "S =| 'a' ;"; try test_helper( std.testing.allocator, source ); } test "grammargrammar" { // if( true ) { return error.SkipZigTest; } // const source = @embedFile( "grammar-grammar.v2.zig" ); // TODO why not ? const source = grammar_grammar; try test_helper( std.testing.allocator, source ); } }; test FirstSet { _ = FirstSet {}; } test "sequential identifiers" { // TODO "S =| id_and0xff ;" // TODO "abc" as (a)(b)(c) or (abc) // TODO "a++b" as (a)(++)(b) or (a)(+)(+)(b) { const rules = try parse( std.testing.allocator, "S =| id_and0xff. ;", null ); defer parse_deinit( rules, std.testing.allocator ); std.debug.assert( rules[0].options[0].len == 1 ); } { const rules = try parse( std.testing.allocator, "S =| id_and.0xff ;", null ); defer parse_deinit( rules, std.testing.allocator ); std.debug.assert( rules[0].options[0].len == 2 ); } } test "no option" { const rules = try parse( std.testing.allocator, "S =| ;", null ); defer parse_deinit( rules, std.testing.allocator ); try std.testing.expect( rules.len == 1 ); try std.testing.expect( rules[0].options.len == 1 ); try std.testing.expect( rules[0].options[0].len == 0 ); var buffer :[12]u8 = undefined; var slice :[]u8 = buffer[0..]; // ^^^^^^ // @compileLog( slice ); // without specifying the type of slice, // it becomes `*[12]u8` instead of `[]u8` // for both `&buffer` and `buffer[0..]` const err = parse( std.testing.allocator, "S = ;", &slice ); try std.testing.expectError( error.no_options, err ); // std.debug.print( "message:\n----\n{s}----\n", .{ slice } ); try std.testing.expectEqualStrings( slice, \\S = ; \\ ^ \\ ); } pub fn main() !void { return; } const First = struct { epsilon :bool = false, // can this token /option consume nothing filter :Filter = .{}, }; const Follow = struct { eos :bool = false, filter :Filter = .{}, }; const math = std.math; const assert = std.debug.assert; const Filter = struct { // TODO parameterize const Domain = u8; const domain_bit_count = @typeInfo( Domain ).Int.bits; const atom = u8; const PageIndex = std.meta.Int( .unsigned, math.log2_int( usize, std.mem.page_size ) ); // @Type( .{ .Int = .{ // .signedness = .unsigned, // .bits = math.log2_int_ceil( std.mem.page_size ), // } } ); // comptime { // assert( std.meta.fieldInfo( std.builtin.Type.Int, .bits ).type == u16 ); // assert( @TypeOf( @typeInfo( Domain ).Int.bits ) == u16 ); // assert( 16 - @clz( @typeInfo( Domain ).Int.bits ) == math.log2_int( u16, @typeInfo( u16 ).Int.bits ) ); // } const Block = usize; // u8; const block_bit_count :PageIndex = @typeInfo( Block ).Int.bits; // const block_index_bit_count = math.log2_int( u12, block_bit_count ); // log2_int_ceil ? const BlockIndex = math.Log2Int( Block ); // math.Log2IntCeil const block_index_bit_count = @typeInfo( BlockIndex ).Int.bits; const block_index_mask :BlockIndex = ( 1 << block_index_bit_count ) - 1; comptime { std.debug.assert( @typeInfo( Domain ).Int.signedness == .unsigned ); } comptime { std.debug.assert( block_index_bit_count == math.log2_int_ceil( u12, block_bit_count ) ); } // const range = math.maxInt( Domain ) + 1; // const range = 1 << @typeInfo( Domain ).Int.bits; const block_count :usize = 1 << ( domain_bit_count - block_index_bit_count ); // range / block_bit_count; flags :[ block_count ]Block = .{ 0 } ** block_count, fn has( self :*const Filter, flag :Domain ) bool { const x = self.*.flags[ flag >> block_index_bit_count ]; // const y :BlockIndex = @intCast( flag & block_index_mask ); const y :BlockIndex = @truncate( flag ); return ( 1 == ( ( x >> y ) & 1 ) ); } fn add( self :*Filter, flag :Domain ) void { // std.debug.assert( // self.*.flags[ flag >> block_index_bit_count ] & // @as( Filter.Block, 1 ) << @intCast( flag & block_index_mask ) // == 0 ); // const flag_index :BlockIndex = @truncate( flag ); self.*.flags[ flag >> block_index_bit_count ] |= @as( Filter.Block, 1 ) << @truncate( flag ) // @as( Filter.Block, 1 ) << flag_index ; } fn add_filter( self :*Filter, filter :Filter ) void { for( self.*.flags[0..], filter.flags[0..] ) |*sf, ff| { // std.debug.assert( sf.* & ff == 0 ); sf.* |= ff; } } fn union_( a :*const Filter, b :*const Filter ) Filter { var filter = Filter {}; for( filter.flags[0..], a.*.flags, b.*.flags ) |*f,af,bf| { f.* = af | bf; } return filter; } fn intersect( a :*const Filter, b :*const Filter ) Filter { var filter = Filter {}; for( filter.flags[0..], a.*.flags, b.*.flags ) |*f,af,bf| { f.* = af & bf; } return filter; } fn invert( self :*Filter ) void { for( self.*.flags[0..] ) |*f| { f.* = ~f.*; } } fn print( self :*const Filter ) void { const p = std.debug.print; for( 0..' ' ) |x| { const x_ :Filter.Domain = @intCast(x); if( self.*.has( x_ ) ) { p( "0x{x:0>2} ", .{ x_ } ); } } for( ' '..127 ) |y| { const y_ :Filter.Domain = @intCast(y); if( self.*.has( y_ ) ) { p( "'{c}' ", .{ y_ } ); } } for( 127..256 ) |z| { const z_ :Filter.Domain = @intCast(z); if( self.*.has( z_ ) ) { p( "0x{x:0>2} ", .{ z_ } ); } } p( "\n", .{} ); } test "filter_bits" { const this_domain = u8; // const domain_bits = @typeInfo( this_domain ).Int.bits; // 8 // const domain_index_bits = math.log2_int( domain_bits ); // 3 const flag_to_be_set :usize = 23; // const atom = u8; const this_block = usize; const block_bits = @typeInfo( this_block ).Int.bits; // 64 const block_index_bits = comptime math.log2_int( u16, block_bits ); // 6 // @compileLog( block_index_bits ); // runtime u4 const mask_index :usize = flag_to_be_set >> block_index_bits; // 0 const flag_index :this_domain = ( ( one_usize << block_index_bits ) - 1 ) & flag_to_be_set; // 23 const mask :this_block = 1 << flag_index; // 8M == 8 * 1024 * 1024 try std.testing.expect( mask_index == 0 ); try std.testing.expect( flag_index == 23 ); try std.testing.expect( mask == 0b0000_0000_1000_0000_0000_0000_0000_0000 ); } test "filter alpha" { // std.debug.print( "hello from filter alpha\n", .{} ); var x = @This() {}; x.add( 1 ); try std.testing.expect( x.flags[0] == 2 ); x.add( 128 ); try std.testing.expect( x.flags[ x.flags.len/2 ] == 1 ); // x.print(); // 0x01 0x80 } test "filter 'a'" { if( Block != usize ) { return error.SkipZigTest; } // const assert = std.debug.assert; var filter = Filter {}; filter.add( 'a' ); assert( filter.flags[1] == 0x00_00_00_02_00_00_00_00 ); // u64 assert( filter.flags[1] == ( 1 << 33 ) ); assert( filter.flags[0] == 0 ); filter.add( 0 ); assert( filter.flags[0] == 1 ); assert( filter.flags[3] == 0 ); filter.add( 0xff ); assert( filter.flags[3] == 0x80_00_00_00_00_00_00_00 ); // std.debug.print( "flags\n", .{} ); // filter.print(); // 0x00 'a' 0xff filter.invert(); assert( filter.flags[0] == 0xff_ff_ff_ff_ff_ff_ff_fe ); assert( filter.flags[1] == 0xff_ff_ff_fd_ff_ff_ff_ff ); assert( filter.flags[2] == 0xff_ff_ff_ff_ff_ff_ff_ff ); assert( filter.flags[3] == 0x7f_ff_ff_ff_ff_ff_ff_ff ); // std.debug.print( "inverted\n", .{} ); // filter.print(); // 0x01 .. 0x1f ' ' .. '`' 'b' .. '~' 0x7f .. 0xfe } test "union" { var a = Filter {}; var b = Filter {}; a.add( 'a' ); b.add( 'b' ); a.add( 'c' ); b.add( 'c' ); const union__ = Filter.union_( &a, &b ); try std.testing.expect( union__.has( 'a' ) ); try std.testing.expect( union__.has( 'b' ) ); try std.testing.expect( union__.has( 'c' ) ); try std.testing.expect(!union__.has( 'd' ) ); } test "intersect" { var a = Filter {}; var b = Filter {}; a.add( 'a' ); b.add( 'b' ); a.add( 'c' ); b.add( 'c' ); const intersect_ = Filter.intersect( &a, &b ); try std.testing.expect(!intersect_.has( 'a' ) ); try std.testing.expect(!intersect_.has( 'b' ) ); try std.testing.expect( intersect_.has( 'c' ) ); try std.testing.expect(!intersect_.has( 'd' ) ); } }; const Location = struct { usize, ?usize }; fn print_rules( rules :[]Rule ) void { const print = std.debug.print; for( rules ) |rule| { print( "#{{ rule: <{s}> }}#\n", .{ rule.name } ); for( rule.options ) |option| { print( "#{{ option }}#", .{} ); for( option ) |expr| { print( "#{{ expr }}#", .{} ); switch( expr ) { .item => |item| { print( "#{{ item: <{s}> <{any}> }}#", .{ item.name, item.suffix } ); }, .term => |term| { print( "#{{ term: <{any}> <{s}> }}#", .{ term.inverted, term.escaped } ); }, .byte => |byte| { print( "#{{ byte: <{d}> }}#", .{ byte } ); }, } } print( "\n", .{} ); } } } fn print_rules_2( rules :[]Rule ) void { const File = std.fs.File; write_rules( File, File.WriteError, File.write, std.io.getStdOut().writer(), rules ) catch {}; } fn GetWriterContext( Context :type, Error :type, // :@TypeOf( error {} ), writeFn :( fn( Context, []const u8 ) Error!usize ), ) struct { Context :type, Error :type, // :@TypeOf( error {} ), writeFn :( fn( Context, []const u8 ) Error!usize ), } { return .{ .Context = Context , .Error = Error , .writeFn = writeFn , }; } // fn write_rules( writer :std.io.Writer( std.fs.File, anyerror, std.fs.File.write ), rules :[]Rule ) !void { // fn write_rules( writer :@TypeOf( std.io.getStdOut().writer() ), rules :[]Rule ) !void { // fn write_rules( writer :anytype, rules :[]Rule ) !void { fn write_rules( // writer :std.io.Writer( std.fs.File, std.fs.File.WriteError, std.fs.File.write ), // writer :std.fs.File.Writer, comptime Context :type, comptime Error :type, comptime writeFn :fn( Context, []const u8 ) Error!usize, writer :std.io.Writer( Context, Error, writeFn ), // meta :struct { // Context :type, // Error :@TypeOf( error {} ), // // writeFn :fn( meta.Context, []const u8 ) meta.Error!usize, // // writeFn :type, // fn( Context, []const u8 ) Error!usize, // }, // comptime meta :anytype, // meta :struct { type, type, *opaque {} }, // writer :std.io.Writer( meta.@"0", meta.@"1", meta.@"2" ), // comptime T :type, // writer :std.io.Writer( T, T.WriteError, T.write ), rules :[]Rule ) !void { // const print = std.debug.print; for( rules ) |rule| { try writer.print( "\n{s} =", .{ rule.name } ); if( 1 < rule.options.len ) { try writer.print( "\n", .{} ); } for( rule.options ) |option| { try writer.print( "| ", .{} ); for( option ) |expr| { switch( expr ) { .item => |item| { // try writer.print( "{s}{s}", .{ item.name, switch( item.suffix ) { // .none => "", .plus => "+", .question => "?", .star => "*", // }, } ); try writer.print( "{s}{c}", .{ item.name, @as( u8, switch( item.suffix ) { .dot => '.', .plus => '+', .question => '?', .star => '*', } ) } ); }, .term => |term| { // try writer.print( "{s}'", .{ if( term.inverted ) "!" else "" } ); if( term.inverted ) { try writer.print( "!", .{} ); } try writer.print( "'", .{} ); for( term.escaped ) |c| { // try writer.print( "{c}", .{ switch( c ) { // // XXX hack; works, but is bad // '\t' => b: { try writer.print( "\\", .{} ); break :b 't'; }, // '\n' => b: { try writer.print( "\\", .{} ); break :b 'n'; }, // '\r' => b: { try writer.print( "\\", .{} ); break :b 'r'; }, // '\\' => b: { try writer.print( "\\", .{} ); break :b '\\'; }, // '\'' => b: { try writer.print( "\\", .{} ); break :b '\''; }, // else => |cc| cc, // } } ); switch( c ) { '\t' => { try writer.print( "\\t" , .{ } ); }, '\n' => { try writer.print( "\\n" , .{ } ); }, '\r' => { try writer.print( "\\r" , .{ } ); }, '\\' => { try writer.print( "\\\\" , .{ } ); }, '\'' => { try writer.print( "\\\'" , .{ } ); }, // ? else => |cc| { try writer.print( "{c}" , .{ cc } ); }, } } try writer.print( "'", .{} ); }, // XXX TODO escaped stuff .. .byte => |byte| { try writer.print( "0x{x}", .{ byte } ); }, } try writer.print( " ", .{} ); } if( 1 < rule.options.len ) { try writer.print( "\n", .{} ); } } try writer.print( ";\n", .{} ); } } // '•', 'ε' /// name & options const Rule = struct { name :[]u8, options :[][]Expr, }; // const Id = []u8; /// item, term, byte const Expr = union(enum) { item :Item, term :Term, // []u8, byte :u8, }; /// name & suffix const Item = struct { name :[]u8, suffix :Suffix, // pub const Suffix = enum { none, question, plus, star }; pub const Suffix = enum { dot, question, plus, star }; }; /// inverted & escaped const Term = struct { inverted :bool, escaped :[]u8, }; const State = union(enum) { start :*[]Rule , whitespace :void, it :u8, rule :*Rule , id :*[]u8 , options :*[][]Expr , exprs :*[]Expr , expr :*Expr , expr_item_suffix :*Item.Suffix , escaped :*[]u8 , }; fn parse_deinit( rules :[]Rule, all :std.mem.Allocator ) void { defer { all.free( rules ); } for( rules ) |rule| { defer { all.free( rule.name ); } defer { all.free( rule.options ); } for( rule.options ) |option| { defer { all.free( option ); } for( option ) |expr| { switch( expr ) { .item => |item| { defer { all.free( item.name ); } }, .term => |term| { defer { all.free( term.escaped ); } }, .byte => {}, } } } } } fn parse( all :std.mem.Allocator, source :[]const u8, errmsg :?*[]u8 ) ![]Rule { var stack = std.ArrayList( State ).init( all ); defer { stack.deinit(); } var rules = try all.alloc( Rule, 0 ); errdefer { parse_deinit( rules, std.testing.allocator ); } // try stack.append( State { .start = .{ // .rules = &rules, // } } ); try stack.append( .{ .start = &rules } ); var index :usize = 0; // source offset errdefer { // const print = std.debug.print; // const bufPrint = std.fmt.bufPrint; const line_begin = if( std.mem.lastIndexOf( u8, source[0..index ], "\n" ) ) |lio| lio + 1 else 0; const line_end = if( std.mem. indexOf( u8, source[ index..], "\n" ) ) | io| index + io else source.len; const line = source[ line_begin..line_end ]; var fallback = [0]u8 {}; var slice :[]u8 = fallback[0..]; const buf = errmsg orelse &slice; // https://ziglang.org/documentation/master/std/#src/std/io/fixed_buffer_stream.zig var fixed_buffer_stream = std.io.fixedBufferStream( buf.* ); const writer = fixed_buffer_stream.writer(); // std.debug.print( "{s}\n", .{ source } ); writer.print( "{s}\n", .{ line } ) catch {}; // print( "index : {d}\n", .{ index } ); // print( "line_begin: {d}\n", .{ line_begin } ); // print( "line_end : {d}\n", .{ line_end } ); // print( "line.len : {d}\n", .{ line.len } ); for( line_begin..index ) |_| { writer.print( " ", .{} ) catch {}; } writer.print( "^\n", .{} ) catch {}; } // while( index < source.len and 0 < stack.items.len ) { while( 0 < stack.items.len ) { // std.debug.print( "index: {d}\n", .{ index } ); const char = if( index < source.len ) source[ index ] else null; switch( stack.getLast() ) { // .invalid => { return error.invalid; }, .start => |ptr| switch( char orelse '$' ) { 'a'...'z', 'A'...'Z', '_', => { try stack.append( .{ .rule = try addOne( Rule, all, ptr, undefined ) } ); }, ' ','\t','\r','\n', '#', => { try stack.append( .whitespace ); }, else => { return if( index == source.len ) rules else error.invalid ; }, }, .whitespace => switch( char orelse '$' ) { ' ','\t','\r','\n', => { index += 1; }, '#', => { while( index < source.len ) { if( source[ index ] == '\n' ) { break; } index += 1; } }, else => { _ = stack.pop(); } }, .it => |it| { // shinies for tiny it const that = char orelse return error.it_eof; if( it != that ) { std.debug.print( "expected: '{c}',\nfound: '{c}'\n", .{ it, that } ); return error.bad_it; } index += 1; _ = stack.pop(); try stack.append( .whitespace ); }, .rule => |rule_ptr| switch( char orelse '$' ) { 'a'...'z', 'A'...'Z', '_', => { rule_ptr.*.name = try all.alloc( u8, 0 ); rule_ptr.*.options = try all.alloc( []Expr, 0 ); _ = stack.pop(); try stack.append( .{ .it = ';' } ); try stack.append( .{ .options = &rule_ptr.*.options } ); try stack.append( .{ .it = '=' } ); try stack.append( .{ .id = &rule_ptr.*.name } ); }, else => { return error.invalid; }, }, .options => |ptr| switch( char orelse '$' ) { '|' => { try stack.append( .{ .exprs = try addOne( []Expr, all, ptr, try all.alloc( Expr, 0 ) ) } ); try stack.append( .{ .it = '|' } ); }, ';' => { if( ptr.*.len == 0 ) { return error.no_options; } _ = stack.pop(); }, else => { return error.bad_options; }, }, // .option => unreachable, .exprs => |ptr| switch( char orelse '$' ) { 'a'...'z', 'A'...'Z', '_', '!', '\'', '0', => { try stack.append( .{ .expr = try addOne( Expr, all, ptr, switch( char orelse '$' ) { 'a'...'z', 'A'...'Z', '_', => .{ .item = undefined }, '!', '\'', => .{ .term = undefined }, '0', => .{ .byte = undefined }, else => unreachable, }, ) } ); }, '|', ';', => { _ = stack.pop(); }, else => { std.debug.print( "exprs char: '{c}'\n", .{ char orelse '$' } ); return error.TODO; }, }, .expr => |ptr| switch( char orelse '$' ) { 'a'...'z', 'A'...'Z', '_', => { // std.debug.print( "alpha: {c}\n", .{ char orelse '%' } ); ptr.* = ( .{ .item = .{ .name = try all.alloc( u8, 0 ), .suffix = undefined, // .none, } } ); _ = stack.pop(); try stack.append( .whitespace ); try stack.append( .{ .expr_item_suffix = &ptr.*.item.suffix } ); try stack.append( .{ .id = &ptr.*.item.name } ); }, '!', '\'', => |x| { // std.debug.print( "beta: {c}\n", .{ x } ); ptr.* = .{ .term = .{ .inverted = ( x == '!' ), .escaped = try all.alloc( u8, 0 ), } }; if( x == '!' ) { index += 1; } if( !( index < source.len ) ) { return error.term_eof; } if( source[ index ] != '\'' ) { return error.bad_term; } index += 1; _ = stack.pop(); try stack.append( .{ .it = '\'' } ); try stack.append( .{ .escaped = &ptr.*.term.escaped } ); }, '0', => { // std.debug.print( "byte: {c}\n", .{ char orelse '@' } ); std.debug.assert( source[ index ] == '0' ); // TODO proper stream api cleans this considerably index += 1; if( !( index < source.len ) ) { return error.hex_eof_1; } if( source[ index ] != 'x' ) { return error.hex_not_x; } index += 1; if( !( index < source.len ) ) { return error.hex_eof_2; } const hi_hex = source[ index ]; index += 1; if( !( index < source.len ) ) { return error.hex_eof_3; } const lo_hex = source[ index ]; index += 1; // XXX this is more than parsing; // XXX this is interpreting; // XXX leave interpreting for later... // for now, only identify the characters of the stream that matter the the byte token var byte :u8 = 0; inline for( [_]u8 { hi_hex, lo_hex } ) |hex| { byte <<= 4; byte |= switch( hex ) { '0'...'9' => |u| ( u - '0' ), 'a'...'f' => |u| ( 10 + u - 'a' ), 'A'...'F' => |u| ( 10 + u - 'A' ), else => { return error.bad_hex; }, }; } // std.debug.print( "(0x{x}x0)\n", .{ byte } ); ptr.* = .{ .byte = byte }; _ = stack.pop(); try stack.append( .whitespace ); }, '|', ';', => { _ = stack.pop(); }, else => { return error.bad_expr; }, }, .expr_item_suffix => |ptr| switch( char orelse '$' ) { // else => { ptr.* = .none; _ = stack.pop(); }, '.' => { ptr.* = .dot ; index += 1; _ = stack.pop(); }, '?' => { ptr.* = .question ; index += 1; _ = stack.pop(); }, '+' => { ptr.* = .plus ; index += 1; _ = stack.pop(); }, '*' => { ptr.* = .star ; index += 1; _ = stack.pop(); }, else => { return error.invalid; }, }, .escaped => |ptr| switch( char orelse '$' ) { '\'', => { _ = stack.pop(); }, '\\', => { index += 1; if( !( index < source.len ) ) { return error.escaped_backslash_eof; } _ = try addOne( u8, all, ptr, switch( source[ index ] ) { 't' => '\t', 'n' => '\n', 'r' => '\r', // '"' => '\"', // '\'' => '\'', // '\\' => '\\', else => |c| c, } ); index += 1; }, else => { const c = char orelse { return error.escaped_eof; }; _ = try addOne( u8, all, ptr, c ); index += 1; } }, .id => |ptr| switch( char orelse '$' ) { 'a'...'z', 'A'...'Z', '_', '0'...'9', => |c| { // ptr.* = try all.realloc( ptr.*, ptr.*.len+1 ); // ptr.*[ ptr.*.len-1 ] = c; _ = try addOne( u8, all, ptr, c ); index += 1; }, else => { _ = stack.pop(); try stack.append( .whitespace ); }, }, } } return error.eos; } fn trap() noreturn { unreachable; } fn addOne( T :type, all :std.mem.Allocator, ptr :*[]T, val :T ) !*T { ptr.* = try all.realloc( ptr.*, ptr.*.len+1 ); // const tmp_slice = try all.realloc( ptr.*, ptr.*.len+1 ); // const tmp_slice = try all.alloc( T, ptr.*.len+1 ); // @memcpy( tmp_slice[0..ptr.*.len], ptr.* ); // @memset( tmp_slice[ ptr.*.len..], undefined ); // all.free( ptr.* ); // ptr.* = tmp_slice; ptr.*[ ptr.*.len-1 ] = val; return &ptr.*[ ptr.*.len-1 ]; } const grammar_grammar = \\ \\ANY =| !'' ; \\ \\SPACE =| ' ' ; \\ \\TAB =| '\t' ; \\ \\RETURN =| '\r' ; \\ \\NEWLINE =| '\n' ; \\ \\SEMICOLON =| ';' ; \\ \\HASH =| '#' ; \\ \\EQUAL =| '=' ; \\ \\PIPE =| '|' ; \\ \\EXCLAMATION =| '!' ; \\ \\QUESTION =| '?' ; \\ \\PLUS =| '+' ; \\ \\STAR =| '*' ; \\ \\APOSTROPHE =| '\'' ; \\ \\BACKSLASH =| '\\' ; \\ \\DOT =| '.' ; \\ \\start =| _. rule* ; \\ \\_ =| whitespace* ; \\ \\whitespace = \\| ' \t\r\n' \\| comment. \\; \\ \\comment =| HASH. commented* NEWLINE. ; \\ \\commented =| !'\n' ; \\ \\rule =| id. EQUAL. option* SEMICOLON. ; \\ \\id =| alpha. alphanum* ; \\ \\alpha = \\| '_' \\| 'abcdefghijklmnopqrstuvwxyz' \\| 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' \\; \\ \\alphanum = \\| alpha. \\| numeric. \\; \\ \\numeric =| '0123456789' ; \\ \\option =| PIPE. expr* ; \\ \\expr = \\| id. DOT. \\| id. QUESTION. \\| id. PLUS. \\| id. STAR. \\| term. \\| byte. \\; \\ \\term =| EXCLAMATION? APOSTROPHE. escaped* APOSTROPHE. ; \\ \\escaped = \\| !'\\\'' \\| BACKSLASH. !'' \\; \\ \\byte =| '0' 'x' hex. hex. ; \\ \\hex = \\| numeric. \\| 'abcdef' \\| 'ABCDEF' \\; \\ \\del =| 0x7f ; \\ \\omega =| 0x7f 0x8f ; \\ \\simple =| 'a' 'b' ; \\ ;