// pon v0.2 2024-09-10

// i should be using git... haha..

const std = @import( "std" );
const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const assert = std.debug.assert;
const debug = std.debug.print;

const tst = std.testing;

// zig test --test-no-exec -femit-bin=test pon-parser.zig

// https://ziglang.org/documentation/master/std/#std.heap.GeneralPurposeAllocator
// https://ziglang.org/documentation/master/std/#std.mem.Allocator
// https://ziglang.org/documentation/master/#Tagged-union

/// api around a slice of bytes  
/// buffering is a non-issue because the slice is all in-memory  
/// not intended for actual streaming; TODO find better name  
pub const Stream = struct {
	src :[]const u8,
	off :usize = 0,
	
	/// initialize a Stream object  
	/// do not inc past end_of_stream  
	pub fn init( src :[]const u8 ) Stream {
		return .{ .src = src };
	}
	// /// is no more content available ?  
	// pub fn done( stream :*const Stream ) bool {
	// 	return stream.src.len <= stream.off;
	// }
	/// is more content available ?  
	pub fn more( stream :*const Stream ) bool {
		return stream.off < stream.src.len;
	}
	/// get next char  
	/// null if eos  
	/// does not increment  
	pub fn peek( stream :*const Stream ) ?u8 {
		return if( !stream.more() ) null
			else stream.src[ stream.off ]
		;
	}
	/// increments stream  
	pub fn inc( stream :*Stream ) void {
		if( stream.more() ) { stream.off += 1; }
	}
	/// peek & increment  
	pub fn step( stream :*Stream ) ?u8 {
		defer { stream.inc(); }
		return stream.peek();
	}
	/// peek & compare  
	/// does not increment  
	/// XXX dead method  
	pub fn next( stream :*const Stream, n :u8 ) bool {
		const p = stream.peek() orelse return false;
		return p == n;
	}
	/// step & compare  
	/// error if char does not match stream  
	/// increments stream  
	pub fn consume( stream :*Stream, char :u8 ) Pon_Parse_Error!void {
		const c = stream.step() orelse {
			return error.end_of_stream;
		};
		if( c != char ) { return error.not_equal; }
	}
};

test Stream {
	var abc = Stream.init( "abc" );
	try tst.expect( abc.more() );
	try tst.expectEqual( abc.peek().?, 'a' );
	try tst.expectEqual( abc.step().?, 'a' );
	try tst.expect( abc.more() );
	try abc.consume( 'b' );
	try tst.expect( abc.more() );
	// try tst.expect( abc.next( 'c' ) );
	while( abc.more() ) { abc.inc(); }
	// try tst.expect( abc.done() );
	try tst.expect( abc.peek() == null );
}

const Pon = struct {
	allocator :Allocator,
	kind :[]const u8,
	data :Data,
	
	pub fn deinit( pon :*const Pon ) void {
		pon.*.allocator.free( pon.*.kind );
		switch( pon.*.data ) {
			.primitive => |p| { pon.*.allocator.free( p ); },
			.records => |rs| {
				for( rs ) |r| { r.deinit(); }
				pon.allocator.free( rs );
			},
			.list => |l| {
				for( l ) |x| { x.deinit(); }
				pon.allocator.free( l );
			},
		}
	}
};
/// ( _ )
/// ( _ _ )
/// ( _ _ (_) )
/// ( _ (_) )
const Data = union(enum) {
	primitive :[]const u8,
	records :[]const Pair,
	list :[]const Pon,
};

/// ( _ _ (_) )
///     ^^^^^^
const Pair = struct {
	allocator :Allocator,
	key :[]const u8,
	val :Pon,
	
	pub fn deinit( pair :*const Pair ) void {
		pair.*.allocator.free( pair.*.key );
		pair.*.val.deinit();
	}
};

const Pon_Parse_Error = error {
	bad_escape,
	end_of_stream,
	end_of_stream_during_data,
	end_of_stream_during_quoted,
	end_of_stream_during_record_or_string,
	end_of_stream_during_string,
	missing_string,
	not_equal,
	// TODO,
	unexpected_character,
	unexpected_whitespace,
	unexpected_open_paren,
	OutOfMemory,
};

const Parser = struct {
	allocator :Allocator,
	stream :Stream,
	err_off :?*?usize,
	
	fn init( allocator :Allocator, source :[]const u8, err_off :?*?usize ) Parser {
		return .{
			.allocator = allocator,
			.stream = Stream.init( source ),
			.err_off = err_off,
		};
	}
	fn whitespace( p :*Parser ) void {
		while( p.stream.peek() ) |c| {
			switch( c ) {
				' ', '\t', '\r', '\n' => { p.stream.inc(); },
				';' => {
					p.stream.inc(); // try p.stream.consume( c );
					while( p.stream.peek() ) |cc| {
						if( cc == '\n' ) { break; }
						else { p.stream.inc(); }
					}
				},
				else => { break; },
			}
		}
	}
	
	// X fn primitive( p :*Parser ) Pon_Parse_Error!XYZ {}
	// X fn record( p :*Parser ) Pon_Parse_Error!XYZ {}
	// X fn list( p :*Parser ) Pon_Parse_Error!XYZ {}
	
	/// ```
	/// ( _ )
	/// ( _ _ )
	/// ( _ _ (_) )
	/// ( _ (_) )
	/// ```
	fn pon( p :*Parser ) Pon_Parse_Error!Pon {
		// debug( "{{ pon ", .{} ); defer { debug( " pon }}\n", .{} ); }
		try p.stream.consume( '(' );
		p.whitespace();
		const kind = try p.string();
		errdefer { p.allocator.free( kind ); }
		// p.whitespace();
		const body = try p.data();
		// _ = body;
		// return error.TODO;
		return .{
			.allocator = p.allocator,
			.kind = kind,
			.data = body,
		};
	}
	/// ```
	/// string =
	/// | literal
	/// | quoted
	/// ;
	/// ```
	fn string( p :*Parser ) Pon_Parse_Error![]const u8 {
		// debug( "{{ string ", .{} ); defer { debug( " string }}\n", .{} ); }
		const c = p.stream.peek() orelse {
			if( p.err_off ) |err| { err.* = p.stream.off; }
			return error.end_of_stream_during_string;
		};
		// switch( c ) {
		// 	'"' => { return p.quoted(); },
		// 	'(', ')', // pon boundaries
		// 	' ', '\t', '\r', '\n', // whitespace
		// 	';', // comment
		// 	=> { return error.missing_string; },
		// 	else => { return p.literal(); },
		// }
		const ret = switch( c ) {
			'"' => p.quoted(),
			'(', ')', // pon boundaries
			' ', '\t', '\r', '\n', // whitespace
			';', // comment
			=> error.missing_string,
			else => p.literal(),
		};
		if( ret ) |val| {
			_ = val;
			// debug( "<{s}>\n", .{ val } );
			// p.whitespace(); // see quoted & literal whitespace()
			// // also, it seems defer-for-the-happy-path has use as well
			// // tho that would nerf tail-call-optimization
		}
		// XXX careful removing this `else`, zig complains
		else |_| {
			// debug( "error: {s}\n", .{ @errorName( e ) } );
			if( p.err_off ) |err| { err.* = p.stream.off; }
		}
		return ret;
	}
	/// ```
	/// quoted =| QUOTE char* QUOTE _ ;
	/// 
	/// char =
	/// | !QUOTE !BACKSLASH ANY
	/// | BACKSLASH QUOTE
	/// | BACKSLASH BACKSLASH
	/// ;
	/// ```
	fn quoted( p :*Parser ) Pon_Parse_Error![]const u8 {
		// debug( "{{ quoted ", .{} ); defer { debug( " quoted }}\n", .{} ); }
		try p.stream.consume( '"' );
		var list = ArrayList( u8 ).init( p.allocator );
		defer { list.deinit(); }
		while( p.stream.step() ) |c| {
			switch( c ) {
				'"' => {
					p.whitespace();
					return list.toOwnedSlice();
				},
				'\\' => {
					const c2 = p.stream.step() orelse {
						if( p.err_off ) |err| { err.* = p.stream.off; }
						return error.end_of_stream_during_quoted;
					};
					switch( c2 ) {
						'"', '\\' => {
							try list.append( c2 );
							p.stream.inc();
						},
						else => {
							if( p.err_off ) |err| { err.* = p.stream.off; }
							return error.bad_escape;
						},
					}
				},
				else => { try list.append( c ); },
			}
		}
		if( p.err_off ) |err| { err.* = p.stream.off; }
		return error.end_of_stream_during_quoted;
	}
	/// ```
	/// literal =| (
	///  !PAREN_OPEN !PAREN_CLOSE
	///  !SPACE !TAB !RETURN !NEWLINE !SEMICOLON
	///  ANY
	/// )+ _ ;
	/// ```
	fn literal( p :*Parser ) Pon_Parse_Error![]const u8 {
		// debug( "{{ literal ", .{} ); defer { debug( " literal }}\n", .{} ); }
		var list = ArrayList( u8 ).init( p.allocator );
		defer { list.deinit(); }
		while( p.stream.peek() ) |c| {
			switch( c ) { // TODO consider adding '"' restrictions
				'(', ')', // pon boundaries
				' ', '\t', '\r', '\n', // whitespace
				';', // comment
				=> { break; },
				else => {
					try list.append( c );
					// debug( "<{c}>", .{ c } );
					p.stream.inc();
				},
			}
		}
		p.whitespace();
		return try list.toOwnedSlice();
	}
	/// ```
	/// ( _ )
	///     ^
	/// ( _ _ )
	///     ^^^
	/// ( _ _ (_) )
	///     ^^^^^^^
	/// ( _ (_) )
	///     ^^^^^
	/// ```
	fn data( p :*Parser ) Pon_Parse_Error!Data {
		// debug( "{{ data ", .{} ); defer { debug( " data }}\n", .{} ); }
		const c = p.stream.peek() orelse {
			if( p.err_off ) |err| { err.* = p.stream.off; }
			return error.end_of_stream_during_data;
		};
		switch( c ) {
			')' => {
				p.stream.inc();
				p.whitespace();
				// return .{ .list = .{
				// 	.ptr = null, // undefined,
				// 	.len = 0,
				// } };
				return .{ .list = ( [_]Pon {} )[0..] };
			},
			'(' => {
				return try p.data_list();
			},
			' ', '\t', '\r', '\n', // whitespace
			';', // comment
			=> {
				if( p.err_off ) |err| { err.* = p.stream.off; }
				return error.unexpected_whitespace;
			},
			else => {
				return try p.data_record_or_string();
			},
		}
	}
	/// ```
	/// ( _ (_) )
	///     ^^^^^
	/// ( _ (_) (_) )
	///     ^^^^^^^^^
	///         ++++
	/// ```
	fn data_list( p :*Parser ) Pon_Parse_Error!Data {
		// debug( "{{ data_list ", .{} ); defer { debug( " data_list }}\n", .{} ); }
		var list = ArrayList( Pon ).init( p.allocator );
		defer {
			for( list.items ) |l| { l.deinit(); }
			list.deinit();
		}
		while( p.stream.peek() ) |c| {
			switch( c ) {
				'(' => { try list.append( try p.pon() ); },
				')' => {
					p.stream.inc();
					p.whitespace();
					break;
				},
				else => {
					if( p.err_off ) |err| { err.* = p.stream.off; }
					return error.unexpected_character;
				},
			}
		}
		return .{ .list = try list.toOwnedSlice() };
	}
	/// ```
	/// ( _ _ )
	///     ^^^
	/// ( _ _ (_) )
	///     ^^^^^^^
	/// ```
	fn data_record_or_string( p :*Parser ) Pon_Parse_Error!Data {
		// debug( "{{ dros ", .{} ); defer { debug( " dros }}\n", .{} ); }
		const s = try p.string(); // resource
		// errdefer { p.allocator.free( s ); }
		const c = p.stream.peek() orelse {
			p.allocator.free( s );
			if( p.err_off ) |err| { err.* = p.stream.off; }
			return error.end_of_stream_during_record_or_string;
		};
		switch( c ) {
			')' => {
				p.stream.inc();
				p.whitespace();
				return .{ .primitive = s };
			},
			'(' => {
				// `data_record()` takes ownership-of and
				// responsibility-for-freeing `s`
				return p.data_record( s );
			},
			else => {
				p.allocator.free( s );
				if( p.err_off ) |err| { err.* = p.stream.off; }
				return error.unexpected_character;
			},
		}
	}
	/// ```
	/// ( _ _ (_) )
	///     v ^^^^^
	///    key
	/// ( _ _ (_) _ (_) )
	///       ^^^^^^^^^^^
	///           ++++++
	/// ```
	fn data_record( p :*Parser, key :[]const u8 ) Pon_Parse_Error!Data {
		// debug( "{{ data_record ", .{} ); defer { debug( " data_record }}\n", .{} ); }
		// errdefer { p.allocator.free( key ); }
		const val = p.pon() catch |e| {
			p.allocator.free( key );
			return e;
		};
		// errdefer { val.deinit(); }
		var list = ArrayList( Pair ).init( p.allocator );
		errdefer {
			for( list.items ) |i| { i.deinit(); }
			list.deinit();
		}
		list.append( .{
			.allocator = p.allocator,
			.key = key,
			.val = val,
		} ) catch |e| {
			p.allocator.free( key );
			val.deinit();
			return e;
		};
		while( p.stream.peek() ) |c| {
			switch( c ) {
				')' => {
					p.stream.inc();
					p.whitespace();
					break;
				},
				'(' => {
					if( p.err_off ) |err| { err.* = p.stream.off; }
					return error.unexpected_open_paren;
				},
				' ', '\t', '\r', '\n', // whitespace
				';', // comment
				=> {
					if( p.err_off ) |err| { err.* = p.stream.off; }
					return error.unexpected_whitespace;
				},
				else => {
					const field = try p.string();
					// errdefer { p.allocator.free( field ); }
					const value = p.pon() catch |e| {
						p.allocator.free( field );
						return e;
					};
					list.append( .{
						.allocator = p.allocator,
						.key = field,
						.val = value,
					} ) catch |e| {
						p.allocator.free( field );
						value.deinit();
						return e;
					};
				},
			}
		}
		return .{ .records = try list.toOwnedSlice() };
	}
};

pub fn parse( allocator :Allocator, source :[]const u8, err_off :?*?usize ) Pon_Parse_Error!Pon {
	var parser = Parser.init( allocator, source, err_off );
	parser.whitespace();
	return parser.pon();
}

// zig test --test-no-exec -femit-bin=test pon-parser.zig

test "atom" {
	const atom = try parse( tst.allocator, "( null )", null );
	defer { atom.deinit(); }
	// debug( "atom: {any}\n", .{ atom } );
}
test "primitive" {
	const primitive = try parse( tst.allocator, "( integer 1 )", null );
	defer { primitive.deinit(); }
}
test "record" {
	const record = try parse( tst.allocator, "( rec key( str val ) )", null );
	defer { record.deinit(); }
}
test "record2" {
	const record2 = try parse( tst.allocator, "( rec f1( str value ) f2( int 2 ) )", null );
	defer { record2.deinit(); }
}
test "list" {
	const list = try parse( tst.allocator, "( list ( x ) )", null );
	defer { list.deinit(); }
}
test "list2" {
	const list2 = try parse( tst.allocator, "( list2 ( y ) ( z ) )", null );
	defer { list2.deinit(); }
}

const Xoshiro256 = std.Random.Xoshiro256;
/// TODO
/// fuzzer that generates weighted characters
/// objective is not to generate *valid* input,
/// but to hammer the error handling
fn fuzz_fill_buffer( buffer :[]u8 ) void {
	// var buffer = [1] u8 { undefined } ** 65536;
	
	// https://ziglang.org/documentation/master/std/#src/std/Random/Xoshiro256.zig
	const seed :u64 = 0;
	var r = Xoshiro256.init( seed );
	// const x :u64 = r.next();
	
	for( buffer ) |*b| {
		
		// PAREN_OPEN     =| "(" ;
		// PAREN_CLOSE    =| ")" ;
		// QUOTE          =| "\"" ;
		// BACKSLASH      =| "\\" ;
		// SPACE          =| " " ;
		// TAB            =| "\t" ;
		// RETURN         =| "\r" ;
		// NEWLINE        =| "\n" ;
		// SEMICOLON      =| ";" ;
		// ANY            =| . ;
		
		b.* = switch( r.next() % 6 ) {
			0 => '(',
			1 => ')',
			3 => '"',
			2 => '\\',
			4 => switch( r.next() % 5 ) {
				0 => ' ',
				1 => '\t',
				2 => '\r',
				3 => '\n',
				4 => ';',
				else => 'y',
			},
			5 => '\n',
			else => 'x',
		};
		
	}
}

test "fuzzing Xoshiro256 0" {
	// if( true ) { return error.SkipZigTest; }
	var buffer :[65536*128]u8 = undefined;
	fuzz_fill_buffer( buffer[0..] );
	fuzz_fill_buffer( &buffer );
	for( 0..buffer.len ) |i| {
	// for( 411..412 ) |i| {
		// debug( "fuzz iteration {d} {{ ", .{ i } );
		// defer { debug( " }}\n", .{} ); }
		var err :?usize = null;
		var pon = parse( std.testing.allocator, buffer[i..i], &err ) catch {
			// debug( "hello?\n", .{} );
			// if( err ) |off| {
			// 	debug( "here\n", .{} );
			// 	debug(
			// 		"{{ <{d}>, <{s}>, <{d}>, <{c}> }}\n",
			// 		.{ i, buffer[i..i+64], off, buffer[i..i+64][ off ] }
			// 	);
			// 	debug( "there\n", .{} );
			// } else {
			// 	debug( "{{ succeeded?, <{d}>, <{s}> }}\n", .{ i, buffer[i..i+64] } );
			// }
			continue;
		};
		defer pon.deinit();
		// debug( "\n", .{} );
	}
}

test "fuzz-411" {
	debug( "fuzz-411\n", .{} );
	
	// when freeing lists,
	// remember to free the *content of the list* as well
	
	const input =
	\\(\(\"\\
	\\(\"")"
	;
	
	// const input =
	// \\( l1 ( l2 )(
	// ;
	var err :?usize = null;
	// const pon = parse(
	// 	std.testing.allocator,
	// 	input,
	// 	&err,
	// ) catch |e| {
	// 	debug( "catch\n", .{} );
	// 	if( err ) |off| {
	// 		debug( "error: {s} {d} '{?c}'\n", .{
	// 			@errorName( e ),
	// 			off,
	// 			( if( off < input.len ) input[off] else null )
	// 		} );
	// 	} else {
	// 		debug( "umm..\n", .{} );
	// 	}
	// 	return;
	// };
	const pon = parse( std.testing.allocator, input, &err ) catch {
		return;
	};
	defer { pon.deinit(); }
}