const std = @import( "std" ); const Stream = @import( "stream-2.zig" ).Stream; const debug = std.debug.print; // // XXX still don't really understand this thing... // const GPA = std.heap.GeneralPurposeAllocator( .{} ); // // var aaa :GPA = .{}; // // aaa.allocator().create( u8 ); // // aaa.deinit(); // gpa :GPA, // this is required to be deinit()ed... // https://ziglang.org/documentation/master/std/#std.heap.GeneralPurposeAllocator // https://ziglang.org/documentation/master/std/#std.mem.Allocator // https://ziglang.org/documentation/master/#Tagged-union pub const Pon = struct { allocator :std.mem.Allocator, data :Data, pub const Data = union(enum) { list :[]const Pon, record :Record, }; pub const Record = struct { name :[]const u8, body :Body, }; pub const Body = union(enum) { fields :[]const Field, literal :[]const u8, string :[]const u8, }; pub const Field = struct { name :[]const u8, pon :Pon, }; fn deinit( pon :*const Pon ) void { switch( pon.*.data ) { .list => |list| { for( list ) |p| { p.deinit(); } pon.allocator.free( list ); }, .record => |record| { pon.allocator.free( record.name ); switch( record.body ) { .fields => |fields| { for( fields ) |field| { pon.allocator.free( field.name ); field.pon.deinit(); } pon.allocator.free( fields ); }, .literal => |literal| { pon.allocator.free( literal ); }, .string => |string| { pon.allocator.free( string ); }, } } } } fn print( pon :*const Pon ) void { pon._print( 0 ); } fn _print( pon :*const Pon, level :usize ) void { __print( level + 0, "pon:\n", .{} ); switch( pon.*.data ) { .list => |list| { __print( level + 1, "list:\n", .{} ); for( list ) |p| { p._print( level + 2 ); } // + 4 }, .record => |record| { __print( level + 1, "record:\n", .{} ); __print( level + 2, "type: \"{s}\"\n", .{ record.name } ); switch( record.body ) { .fields => |fields| { __print( level + 2, "fields:\n", .{} ); for( fields ) |field| { __print( level + 3, "name: \"{s}\"\n", .{ field.name } ); field.pon._print( level + 4 ); } }, .literal => |literal| { __print( level + 2, "literal: \"{s}\"\n", .{ literal } ); }, .string => |string| { __print( level + 2, "string: \"{s}\"\n", .{ string } ); }, } } } } fn __print( level :usize, comptime fmt :[]const u8, args :anytype ) void { for( 0..level ) |_| { debug( " ", .{} ); } debug( fmt, args ); } fn print_pon( pon :*const Pon ) void { pon._print_pon( 0 ); } fn _print_pon( pon :*const Pon, level :usize ) void { switch( pon.*.data ) { .list => |list| { __print( 0, "[", .{} ); defer { __print( 0, "]\n", .{} ); } if( 0 < list.len ) { __print( 0, "\n", .{} ); } // if( 1 < list.len ) { __print( 0, "\n", .{} ); } for( list ) |p| { __print( level + 1, "", .{} ); p._print_pon( level + 1 ); } // if( 1 < list.len ) { __print( level + 0, "", .{} ); } if( 0 < list.len ) { __print( level + 0, "", .{} ); } }, .record => |record| { __print( 0, "( {s}", .{ record.name } ); defer { __print( 0, ")\n", .{} ); } switch( record.body ) { .fields => |fields| { __print( 0, "\n", .{} ); for( fields ) |field| { __print( level + 1, ".{s} ", .{ field.name } ); // __print( level + 1, "", .{} ); field.pon._print_pon( level + 1 ); } __print( level + 0, "", .{} ); }, .literal => |literal| { __print( 0, " ", .{} ); if( literal.len != 0 ) { __print( 0, "{s} ", .{ literal } ); } }, .string => |string| { __print( 0, " \"{s}\" ", .{ string } ); }, } } } } }; const Parser = struct { allocator :std.mem.Allocator, stream :Stream, fn init( allocator :std.mem.Allocator, source :[]const u8 ) Parser { return .{ .allocator = allocator, .stream = Stream.init( source ), }; } fn parse_whitespace( p :*Parser ) void { while( p.stream.peek() ) |c| { switch( c ) { // '\v', '\f', ' ', '\t', '\r', '\n' => { _ = p.stream.step() catch unreachable; }, ';' => { p.parse_comment(); }, else => { break; }, } } } fn parse_comment( p :*Parser ) void { std.debug.assert( p.stream.peek() == ';' ); p.stream.consume( ";" ) catch unreachable; while( p.stream.peek() ) |c| { if( c == '\n' ) { break; } _ = p.stream.step() catch unreachable; } } const Pon_Error = error { end_of_stream_during_identifier, end_of_stream_during_pon, end_of_stream_during_list, end_of_stream_during_string, bad_pon, end_of_stream, not_equal, OutOfMemory, }; pub fn parse_pon( p :*Parser ) Pon_Error!Pon { const c = p.stream.peek() orelse return error.end_of_stream_during_pon; switch( c ) { '(' => { // TODO parse_record try p.stream.consume( "(" ); p.parse_whitespace(); const identifier = try p.parse_identifier(); const body = try p.parse_body(); try p.stream.consume( ")" ); p.parse_whitespace(); return .{ .allocator = p.allocator, .data = .{ .record = .{ .name = identifier, .body = body, }, }, }; }, '[' => { // TODO parse_list return .{ .allocator = p.allocator, .data = .{ .list = try p.parse_list() }, }; }, else => return error.bad_pon, } } fn parse_body( p :*Parser ) !Pon.Body { if( p.stream.peek() ) |c| { return switch( c ) { '.' => .{ .fields = try p.parse_fields() }, '"' => .{ .string = try p.parse_string() }, else => .{ .literal = try p.parse_identifier() }, }; } else { return .{ .fields = ( [_]Pon.Field {} )[0..0] }; // XXX bad pointer, but zero len slice... } } fn parse_fields( p :*Parser ) ![]const Pon.Field { var list = std.ArrayList( Pon.Field ).init( p.allocator ); defer { list.deinit(); } while( p.stream.peek() ) |c| { if( c != '.' ) { break; } p.stream.consume( "." ) catch unreachable; // p.parse_whitespace(); const identifier = try p.parse_identifier(); errdefer { p.allocator.free( identifier ); } const pon = try p.parse_pon(); p.parse_whitespace(); try list.append( .{ .name = identifier, .pon = pon, } ); } return list.toOwnedSlice(); } fn parse_identifier( p :*Parser ) ![]const u8 { var list = std.ArrayList( u8 ).init( p.allocator ); defer { list.deinit(); } while( p.stream.peek() ) |c| { switch( c ) { '(',')','[',']','.','"','\\', // significant ' ','\t','\r','\n',';', // whitespace => { // XXX if list is empty... p.parse_whitespace(); return list.toOwnedSlice(); }, else => { try list.append( try p.stream.step() ); } } } return error.end_of_stream_during_identifier; } fn parse_string( p :*Parser ) ![]const u8 { var list = std.ArrayList( u8 ).init( p.allocator ); defer { list.deinit(); } try p.stream.consume( "\"" ); while( p.stream.peek() ) |c| { switch( c ) { '"' => { try p.stream.consume( "\"" ); p.parse_whitespace(); return list.toOwnedSlice(); // break; }, '\\' => { try p.stream.consume( "\\" ); try list.append( try p.stream.step() ); }, else => try list.append( try p.stream.step() ), } } return error.end_of_stream_during_string; } fn parse_list( p :*Parser ) ![]const Pon { try p.stream.consume( "[" ); p.parse_whitespace(); var list = std.ArrayList( Pon ).init( p.allocator ); defer { list.deinit(); } while( p.stream.peek() ) |c| { if( c == ']' ) { p.stream.consume( "]" ) catch unreachable; p.parse_whitespace(); return list.toOwnedSlice(); } try list.append( try p.parse_pon() ); } return error.end_of_stream_during_list; } }; pub fn parse( allocator :std.mem.Allocator, source :[]const u8 ) !Pon { var parser = Parser.init( allocator, source ); parser.parse_whitespace(); return parser.parse_pon(); } // tests const tst = std.testing; // zig test --test-no-exec -femit-bin=test pon-parser.zig // gdb // info functions // info functions partial-name // - zig-function-names begin with the filename, without `.zig` suffix // - tab completion works // info breakpoints // s (step into) // n (step over) // b (breakpoint) // del ( delete breakpoint ) // finish (step out) // p (print) // list ( show location ) // .. // https://www.man7.org/linux/man-pages/man1/gdb.1.html // https://sourceware.org/gdb/current/onlinedocs/gdb fn test_helper( source :[]const u8 ) !void { const pon = try parse( tst.allocator, source ); defer { pon.deinit(); } // debug( "{s}\n{any}\n----\n", .{ source, pon } ); // pon.print(); pon.print_pon(); debug( "----\n", .{} ); } // also, looping on peeking without actually consuming... 2x now test "integer" { try test_helper( pon_integer ); } test "null" { try test_helper( pon_null ); } // XXX this parses as empty-literal vs empty-fields test "comment" { try test_helper( pon_comment ); } test "float" { try test_helper( pon_float ); } test "string" { try test_helper( pon_string ); } test "record" { try test_helper( pon_record ); } test "list" { try test_helper( pon_list ); } const pon_integer = \\( integer 234 ) ; const pon_null = \\( null ) ; const pon_comment = \\( null ) ; comment ; const pon_float = \\( float 3_14159 ) ; // TODO should floats be allowed ? how then to encode ? const pon_string = \\( string "hello" ) ; const pon_record = \\( record \\ .field-a ( int 1 ) \\ .field-b ( int 2 ) ) ; const pon_list = \\[ ( int 1 ) \\ ( null ) ] ; test "empty field name" { try test_helper( \\( beta .( null ) ) ); try test_helper( \\( zeta . ( null ) ) ); } test "fields" { try test_helper( \\( a \\ .f1( null ) \\ .f2( int 1 ) \\ .f3( string "s" ) \\ .f4( rec .f1( null ) ) \\ .f4[] \\) ); // fun with debugging tests, when the test is incorrect.. // note-to-self: comment-style strings do *not* need quotes escaped // \\this string's "quoted sections" don't need to be escaped. // "this string's \"quoted sections\" don't need to be escaped." } test "fields-isolate" { try test_helper( "( a\n .f3( string \"s\" )\n)" ); // this worked // try test_helper( // \\( a // \\ .f3( string \"s\" ) // \\) // ); } test "nests" { try test_helper( "( a .f1( null ) )" ); } test "nests 2" { try test_helper( "( a .f1( b .f1( c .f1( null ) ) ) )" ); } test "nests 3" { try test_helper( \\( a \\ .alpha ( null ) \\ .beta [ \\ [] \\ [ [] ] \\ ( null ) \\ ( type .field ( list .items[ [ ( str "xyz" ) ] ] ) ) \\ ] \\) ); } test "json-vs-pon" { // const json = // \\{ "key": "val", // \\ "list": [ 1, 2, 3 ], // \\ "flag": false, // \\ "data": null } // ; const pon = \\( object \\ .key ( string val ) \\ .list [ ( number 1 ) ( number 2 ) ( number 3 ) ] \\ .flag ( boolean false ) \\ .data ( null ) ) ; try test_helper( pon ); } // debugging summary // - double check boolean conditions // - ensure loop increments occur // - ensure the test is correct // 2024-09-09, pon v 0.1