#! /usr/bin/pan

/"~/.config/sublime-text-3/Packages/User/Pan.sublime-syntax

// https://www.sublimetext.com/docs/syntax.html
// https://raw.githubusercontent.com/kkos/oniguruma/v6.9.1/doc/RE
// https://github.com/kkos/oniguruma/

// https://www.sublimetext.com/docs/scope_naming.html
// https://www.sublimetext.com/docs/color_schemes.html
// https://www.sublimetext.com/docs/packages.html#locations

// nil -vs- null
// pub -vs- export

import
export // ? vs pub
pub
var
mut
if
else
switch
case
break
for
while
continue
fn
return
defer
errdefer
infer
orelse
error
try
catch
comptime // :#[_]u8 = ...; // comptime array of u8 ?
test
assert // XXX
dis // discard

var x :u8     =    255; // max
var _ :s8     =   -128; // min
var _ :u16    =  65535; // max
var _ :s16    = -32768; // min
var _ :u32    =  0xffff_ffff; // max
var _ :s32    = -0x8000_0000; // min
var _ :u64    = 0x_ffff_ffff_ffff_ffff; // max
var _ :s64    = 0x_7fff_ffff_ffff_ffff; // max
var _ :usize  = @ptr( usize, null );
var _ :usize  = @ptr( ssize, null );
// what did i mean with "usize = @ptr( usize, null )" ?
// ..must have been a cast *to* a pointer *from* a usize ??
// maybe `var my_ptr :*u8 = @ptr( 0x_dead_beef );` ?
// but then why not `var my_ptr = @as( *u8, 0x_dead_beef );` ..?
// side-note: this is a way to bypass the type-system,
// & give oneself null-pointer-exceptions, even in a null-safe lang..
var _ :f16    = 0.5;
var _ :f32    = undefined;
var _ :f64    = undefined;
var _ :bool   = true;
var _ :void   = undefined; // don't know what this would look like..

var _ :#type  = u8; // # comptime
var _ :*u8    = &x; // pointer, never null
var mut y :u8 = 128;
mut z :u8 = 255; // ... should `var` be required when `mut` is already ?
var i :mut u8 = 129; // XXX not sure how i like this...
i += 1; // increment, loop control
var _ :*mut u8 = &y; // constant pointer to mutable u8
var _ :mut *u8 = &x; // mutable pointer to constant u8
var _ :?*usize = null; // optional, nullable pointer
var _ :*usize = @as( *u8, 0xdead_beef ); // XXX for embedded
var _ :[2]u8 = .( 0, 255 );
var _ :[3]u8 = [3]u8( 1, 2, 3 );
var _ :[4]u8 = [4]u8( 'g','o','o','d' );
var _ :[4]u8 = "good"; // same, shorter
// var _ : []u8 = &"good"; // different, native slice // .( .ptr :[*]u8, .len :usize = 4 )
// var _ : []u8 = "good"; // different, native slice // .( .ptr :[*]u8, .len :usize = 4 )
// are all intermediate values considered const /non-mut unless explicitly made mut?
// so `var str :[]u8 = &"again";` is fine, because the slice contents is const,
// but `var mut array = "another"; var slc :[]mut u8 = &array;`
// needs the array explicitly declared mutable?..
// seems consistent, at least.
var _ :[5]u8 = "hello";
var _ :[11]u8 = "lorem ipsum";
var _ = "lorem ipsum"; // [11]u8 // [_]u8( 'l','o','r', ... );
var _ = "lorem ipsum"; // []u8 // 
var _ :*[2]u8 = &[_]u8( 0, 1 );
var _ :  []u8 = &[_]u8( 0, 1 ); // .( .ptr :[*]u8 = ..., .len :usize = 2 ); // slice, native
var _ :  []u8 = &.( 0, 1 );
var _ :range = 0..10; // .{ .lo :ssize = 0; .hi :ssize = 10 }; // .lo <= _ < .hi
// should .lo < .hi ? should .lo be negative?
// NO, because zero-sized ranges are valid.
// should .lo <= .hi ? ..?
// definitely integer.. thinking negatives allowed, but assert( .lo <= .hi );
// except ranges are going to be primarily used for indexing into arrays/slices,
// so usize is to be preferred
// perhaps two ranges ?.. .( .lo :ssize, .hi :ssize, ); and .( .lo :usize, .hi :usize, );
// .. the most common case is: positive, { .lo <= .hi }, counting up.

hello world

// TODO decide between ( null & nil )
null // keyword
nil // keyword
undefined // keyword
unreachable(); // stdlib
assert( true ); // stdlib

true
false

// pan / pace / piece / peace
// .. pan, paan, bread

"goodbye"

"hello \" world"

"\t\t\tmultiple escapes.. highlights all, requires push scope"

"invalid unfinished quote /string

/"roses are red
/"violets are blue
// sequential line strings are joined with newlines
// blank lines & comments between line strings are weird, but still correct, i think.

/"line string with a newline at the end
/"

"normal strings can end with newlines as well\n"

var mut my_str = "string with a quote \" \n";
// this string has an unmatched quote
var unfinished_string = "it's nice to meet y
// line strings are not ideal for a single line
// since the expression/ statement has to continue on the next l8ne then..
var line_string = /"this can contain quotes """ see ?
;

{ // `if` expressions
	var x :u8 = 17;
	var y :?u8 = if( math.rand( u8 ) < 128 ) { x };
	var z :[]u8 =
		if( math.rand( s8 ) < 0 ) { "negative" }
		else { "positive" }
	;
}

{ // while stream & expression
	var stream = fn() ?u8 {
		return if( math.rand( bool ) ) {
			math.rand( u8 )
		} else {
			null
		};
	}; // TODO iterator example
	var one_or_large = while( stream() ) |byte| {
		stdout.print( "{d}\n", .( byte ) );
		if( 128 < byte ) {
			break byte;
		}
	} orelse { 1 };
}
{ // linked list iterator
	var Linked_list = fn( T :type ) :type {
		// ??? does Node need to be mutable to add a fn after ?
		var mut Node = struct ( next :?*@self(); item :T; );
		
		var Iterator :type = struct(
			.node :?*Node = null;
			
			::next = fn( self :*mut @self() ) :?T {
				return if( self.*.node ) |node| {
					var item = node.*.item;
					self.*.node = node.*.next;
					item
					// defer { self.*.node = node.*.next; }
					// node.*.item
				} else {
					null
				};
			};
		);
		
		// TODO syntax for type-functions...
		Node::iterator = fn( root :*@self() ) :Iterator {
			return .( .node = root );
		};
		// Node::iterator :fn( :*@self() ):Iterator =
		// 	.|self| { .( .node = self ) }
		// ;
		
		return Node;
		// Node // ??? should functions allow block expression returns ?
	};
	// var node :Linked_list( u8 ) = .{ .next = null; .item = 1; };
	// var root :Linked_list( u8 ) = .{ .next = &node; .item = 2; };
	var root :Linked_list( u8 ) = .( .item = 1;
		.next = &.( .item = 2;
			.next = &.( .item = 3;
				.next = &.( .item = 4;
					.next = &.( .item = 5;
						.next = null;
					);
				);
			);
		);
	);
	var mut iterator = root::iterator();
	var mut sum :usize = 0;
	var four :?u8 = while( iterator::next() ) |val| {
		if( val == 4 ) { break val; }
		sum += val;
	};
	assert { sum == 6 };
	var still_four :u8 = four orelse 0;
}

{ // function lambdas ??
	var addition        :fn( :u8, :u8 ):u8 = .| x, y | { x + y };
	var subtraction     :fn( :u8, :u8 ):u8 = .| x, y | { x - y };
	var multiplication  :fn( :u8, :u8 ):u8 = .| x, y | { x * y };
	var division        :fn( :u8, :u8 ):u8 = .| x, y | { x / y }; // div-by-zero ?
	var modulus         :fn( :u8, :u8 ):u8 = .| x, y | { x % y };
	// all functions are referred to via pointers
	// **function expressions evaluate to fn pointers**
	
	// var binary_functions = &[_]fn( :u8, :u8 ):u8 { addition, subtraction, multiplication, division, modulus };
	var binary_functions = .{ addition, subtraction, multiplication, division, modulus };
	var names = .{ "addition", "subtraction", "multiplication", "division", "modulus" };
	for( binary_functions, names ) |fx, name| {
		var x = 23;
		var y = 7
		var z = fx( 23, 7 );
		print( "{s}: fx( {d}, {d} ) = {d};\n", .( name, x, y, z ) );
		// addition: fx( 23, 7 ) = 30;
		// subtraction: fx( 23, 7 ) = 16;
		// multiplication: fx( 23, 7 ) = 161;
		// division: fx( 23, 7 ) = 3;
		// modulus: fx( 23, 7 ) = 2;
	}
}

var fin = {
	var mut i = 0;
	while( i < 10 ) {
		log( i );
		i += 1;
	};
	for( 0..10 ) |j| {
		log( j );
	};
	// .. how to label loops ?
	// `loop asdf while/for(...)` ?
	// okay to be verbose in uncommon cases
	// are loops the only location for labels ?
	// .. `label my_label while/for(...)` ?
	"fin"
};

struct
tuple
union
enum
slice // []u8 // .( .ptr :[*]u8; .len :usize; );
range // lo..hi // .( .lo :usize; .hi :usize; );
// within `.( ... );` comma `,` should probably be preferred over semicolon `;` ...

// switch compilation - https://en.wikipedia.org/wiki/Switch_statement#Compilation
// duff's device - https://en.wikipedia.org/wiki/Duff%27s_device
// branch table - https://en.wikipedia.org/wiki/Branch_table
// otherwise, binary search...
// TODO consider range lo/hi fields & values... positive/negative & lo<hi

// should `static` be a thing ??? basically global..

// // XXX indentation for control flow seems important
// const Directions = enum { .up, .down, .left, .right };
// // should enums be allowed type-methods ??
// var minus_one = { // and the item moved
// 	var mut iter = getIterator();
// 	while( iter.next() ) |item| orelse -1; // while expression value, when not break
// 		var direction = [_]enum { .up, .down, .left, .right };
// 		for( 0..10 ) |i|;
// 			for( direction ) |d|;
// 				item.move( i, d );
// };
// var x_less_than_half = {
// 	var x = math.rand();
// 	console.log( "{}", .( x ) );
// 	if( x < 0.5 );
// 		console.log( "less than half" );
// 		true; // XXX this is not a block...
// 	else;
// 		false;
// 	// ( x < 0.5 );
// };

var ptr :*[]mut u8 = &my_str;

$hmm

::

asdf

var fn_with_str_param = fn( str :[]u8 ) :void {
	std.debug.print( "debug: {}", .{ str } );
};

fn_with_str_param( "string" );
fn_with_str_param(
	/"full line string
);

var multi =
	/"multiline
	/"string
;

//var my_fn = fn() :[]u8 { return "mine"; };
// ..is it actually ok to return a static string / [4]u8 ;
// can .. its a `fn`.. return type is required
// anyway. slice of non-mut XXX, static u8 is fine.
// ..is that string guaranteed to be static ??
// otherwise, a local ptr would become undefined.
var my_str = "mine";
var my_fn = fn() :[]u8 { return my_str; };
var my_f2 :fn():[]u8 = .|| { my_str };
my_fn( number, multi ); // XXX incorrect number of params

var sum = fn( x :u8, y :u8 ) :u8 { return x + y; };
var add :fn(:u8,:u8):u8 = .| x, y | { x + y };
// how is parsing of keywords vs identifiers handled ??
// k-lookahead to deterministically avoid keywords ?
// ..doable, but verbose & unwieldy..
// how..

//assert { mem.eql  ( u8, "mine", my_fn() ) };
//assert { mem..eql ( u8, "mine", my_fn() ) };
//assert { mem.:eql ( u8, "mine", my_fn() ) };
//assert { mem:eql  ( u8, "mine", my_fn() ) };
//assert { mem::eql ( u8, "mine", my_fn() ) };
//assert { mem:.eql ( u8, "mine", my_fn() ) }; // XXX
// XXX probably demote `assert` to a normal method
// same as for allocators /malloc-and-free.
assert( mem.eql  ( u8, "mine", my_fn() ) );
assert( mem..eql ( u8, "mine", my_fn() ) );
assert( mem.:eql ( u8, "mine", my_fn() ) );
assert( mem:eql  ( u8, "mine", my_fn() ) );
assert( mem::eql ( u8, "mine", my_fn() ) );
assert( mem:.eql ( u8, "mine", my_fn() ) ); // XXX

// .. does changing the operator for type functions fix an issue ? .. no
// but it might open the way for traits.
// or traits could just build on the same solution that merges fields & type-functions..
// hmm //

// comment
//! license
//* markdown ??
/// document

// TODO
// reify interfaces & errors-with-data

// pan fmt
// pan build-wasm / compile
// pan build-exe
// pan run
// pan test
// pan lsp
// pan package ??

 	var no_spaces_before_tabs = // XXX space before tab is bad
 	  /"i would prefer this highlight as an error
	  /" tabs are for indentation /scoping
	  /" spaces are for alignment
	  /" but should generally be unnecessary
	  /" within literals is okay	acceptable
	;

//" this is a comment, not a string

'x'  // 'x  // this is a character literal
'ab' //     // this is an error
'\n' // '\n // this is a single char & is correct

var b = "\\"; // [_]u8
var d = '/'; // u8 // var d = '/;
( "a" ++ b ++ "c" ++ [_]u8{ d } ++ "e" ); // "a\c/e"

// decimal
10, 20, 30
1234567890
1_234_567_890

// hexadecimal
0x0123456789abcdefABCDEF
0xDeadBeef
0xdead_beef
0x_dead_beef_

// binary
0b00110000
0b0000_1000
0b_0000_0001_

adflkkjiweur

alpha

++ ** + - * /
< > <= >= == != !

0..10

1.2.3

2e10 // float

var optional :u8 = if( Math::rand( bool ) ) { 42 } else { null };
var optional_usage = fn( optional :?u8 ) :!void {
	// clearest; correct, safe
	if( optional ) |forty_two| {
		std.debug.print( "{d}\n", .( forty_two ) );
	} else {
		std.debug.print( "null value\n", .() );
	};
	// simplest; safe, may return error_null
	std.debug.print( "{d}\n", .( optional.? ) );
	std.debug.print( "{d}\n", .(
		optional orelse { return error._null; }
		// seems zig is almost equivalent:
		// optional orelse unreachable;
	) );
	// unsafe, bad, not recommended
	// sometimes preferred in pointer handling programming.. not sure why
	std.debug.print( "{d}\n", .( @as( u8, optional ) ) );
	// XXX do not do ; particularly with pointers,
	// leads to null-pointer-exceptions, SIGSEGV / segmentation faults, memory corruption
};


var parse_identifier = fn( allocator :Allocator, grammar :[]u8 ) :
var parse_grammar = fn( allocator :Allocator, grammar :[]u8 ) :[]Rules {
	
};
var create_lr1_states = fn( allocator: Allocator, rules :[]Rules ) :*LR1_Table {};
var parse = fn( allocator :Allocator, table :*LR1_Table ) {};

var Choice_of_Sequence = std.Array_List( std.Array_List( Expression ) );
var Rule = struct (
	.name :[]u8,
	.cho  :Choice_of_Sequence,
);
var Expression :type = variant (
	.choice       ( .cho  :Choice_of_Sequence           ),
	.optional     ( .opt  :?*Expression                 ),
	.one_or_more  ( .list :std.Array_List( Expression ) ),
	.zero_or_more ( .list :std.Array_List( Expression ) ),
	.item         ( .item :Item                         ),
);
var Item = variant (
	.identifier ( .name :[]u8     ),
	.literal    ( .str  :[]u8     ),
	.char_set   ( .set  :Char_Set ),
);

test "parse Grammar" {

	var grammar_str = /"
	/"// after every literal, whitespace is optional
	/"// except in expression-sequences
	/"
	/"start = _ rule* ;
	/"_ = space* ;
	/"space = " " | "\t" | "\n" | comment"
	/"comment = "//" [^\n]* "\n" ;
	/"rule = identifier "=" _ choice ";" _ ;
	/"identifier = [_a-zA-Z0-9]+ _ ;
	/"choice = sequence ( "|" _ sequence )* ;
	/"choice_ =           "|" _ sequence    ;
	/"sequence = expression ( space expression )* ;
	/"sequence_ =             space expression    ;
	/"expression = "(" _ choice ")"    _
	/"           | expression "?"      _
	/"           | expression "+"      _
	/"           | expression "*"      _
	/"           | item                
	/"           ;
	/"item = identifier
	/"     | literal
	/"     | set
	/"     ;
	/"literal = "\"" ( [^\"\n\\] | "\\" [^x\n] | "\\x" [0-9a-fA-F] [0-9a-fA-F] )+ "\"" _ ;
	/"literal_ =       [^\"\n\\] | "\\" [^x\n] | "\\x" [0-9a-fA-F] [0-9a-fA-F]           ;
	/"set = "[" ( [^\]\\] | "\\" [^x] | "\\x" [0-9a-zA-Z] [0-9a-zA-Z] )+ "]" _ ;
	/"set_ =      [^\]\\] | "\\" [^x] | "\\x" [0-9a-zA-Z] [0-9a-zA-Z]          ;
	/"
	;
	
	var mut grammar_slice = grammar_str[..];
	try Grammar::parse( testing.allocator, &grammar_slice );
	
};

var Grammar = struct ( // start
	.rules :[]Rule,
	
	::parse = fn( allocator :Allocator, src :*mut[]u8 ) :!Grammar {
		parse_whitespace( src );
		var list = std.Array_List( Rule )::init( allocator );
		defer { list.free(); };
		while( 0 < src.len ) {
			switch( src[0] ) {
				.( '_','a'..'z'+1,'A'..'Z'+1,'0'..'9'+1 ) { // identifier lead
					var rule = Rule::parse( allocator, src ).!;
					list.append( rule );
				},
				.() { return error.bad_char; },
			};
		};
		return .(
			.rules = list::to_owned_slice().!;
		);
	},
);

var Rule = struct (
	.identifier :[]u8,
	.choice :[][]Expression,
	
	::parse = fn( allocator :Allocator, src :*mut[]u8 ) :!Rule {
		// parse identifier
		var identifier :[]u8 = Identifier::parse( allocator, src ).!;
		consume( src, '=' ).!;
		parse_whitespace( src );
		var choice :[][]Expression = Choice::parse( allocator, src ).!;
		consume( src, ';' ).!;
		parse_whitespace( src );
		return .(
			.identifier = identifier,
			.choice = choice,
		);
	},
);

var parse_whitespace = fn( src :*mut[]u8 ) :void {
	// var len = src.len;
	while( 0 < src.len ) {
		switch( src[0] ) {
			.( ' ','\t','\r','\n','\v','\f' ) { // is_whitespace( src[0] )
				var mut i = 0;
				while( i < src.len and is_whitespace( src[i] ) ) { i += 1; };
				src.* = src[i..];
			},
			.( '/' ) {
				if( mem::starts_with( src, "//" ) ) {
					// src.* = src[ "//".len.. ];
					// while( 0 < src.len and src[0] != '\n' ) { src.* = src[1..]; };
					var mut i = "//".len;
					while( i < src.len and src[i] != '\n' ) { i += 1; };
					src.* = src[i..];
				} else {
					break;
				};
			},
			.() { break; },
		};
	};
	// return len - src.len;
};
var is_whitespace = fn( char :u8 ) :bool {
	return switch( char ) {
		.( ' ','\t','\r','\n','\v','\f' ) { true },
		// .( ' ,'\t,'\r,'\n,'\v,'\f ) { true },
		.() { false },
	};
};

var mem = struct ( // non-struct namespace ??
	::starts_with = fn( slice :[]u8, prefix :[]u8 ) :bool {
		if( slice.len < prefix.len ) { return false; };
		return for( slice[ 0..prefix.len ], prefix ) |a,b| {
			if( a != b ) { break false; }
		} orelse { true };
	},
);
var assert = std.assert;

var Choice = struct (
	::parse = fn( allocator :Allocator, src :*mut[]u8 ) :![][]Expression {
		var sequence_list = std.Array_List( []Expression )::init( allocator );
		defer { sequence_list.free(); };
		sequence_list.append( Sequence::parse( src ).! ).!;
		while( 0 < src.len and src[0] == '|' ) {
		// while( 0 < src.len and src[0] == '| ) {
			src.* = src[1..];
			parse_whitespace( src );
			sequence_list.append( Sequence::parse( src ).! ).!;
		};
		return sequence_list.to_owned_slice().!;
	};
);

var Sequence = struct (
	::parse = fn( src :*mut[]u8 ) :![]Expression {
		var ptr_0 = src.ptr;
		var expression_list = std.Array_List( Expression )::init( allocator );
		// TODO
		expression_list.append( Expression::parse( allocator, src ).! ).!;
		assert( ptr_0 < src.ptr ).!;
		while( 0 < src.len and switch( src[0] ) {
			.( '(', '_','a'..'z','z','A'..'Z','Z','0'..'9','9', '[', '"' ) { true },
			// .( '(, '_,'a..'z,'z,'A..'Z,'Z,'0..'9,'9, '[, '" ) { true },
			.() { false },
		} ) {
			// parse_whitespace( src );
			if( is_whitespace( src.*.ptr[-1] ) ) {
				var expression = Expression::parse( allocator, src ).!;
				expression_list.append( expression ).!;
			} else {
				break;
			};
			// XXX this will accept expressions without whitespace between...
			// `"hello""world"` -vs- `"hello" "world"`
		};
		return expression_list.to_owned_slice().!;
	};
);

var Expression = variant (
	// LEAD is '(', [_a-zA-Z0-9], '"', '['
	
	.parenthesis  ( .choice     :[][]Expression , ),
	.optional     ( .expression :*Expression , ),
	.one_or_more  ( .expression :*Expression , ),
	.zero_or_more ( .expression :*Expression , ),
	.identifier   ( .identifier :*Identifier , ),
	.character_set( .char_set   :*Char_Set   , ),
	.literal      ( .literal    :*Literal    , ),
	
	::parse = fn( allocator :Allocator, src :*mut[]u8 ) :!Expression {
		// choice (|*)
		// optionals ?
		// one_or_more +
		// zero_or_more *
		// item
		// - identifier [_a-zA-Z0-9]
		// - literal "
		// - char_set [
		if( src.len == 0 ) { return error.empty_source; }
		
		var mut item = allocator.create( Expression ).!;
		errdefer allocator.free( item );
		
		item.* = switch( src[0] ) {
			.( '(' ) { // parse `choice`
				src.* = src[1..]; // consume '('
				parse_whitespace( src ); // consume whitespace()
				var expr = .parenthesis(
					.choice = Choice::parse( src ).!, // TODO
				);
				if( 0 < src.len and src[0] == ')' ) {
					src.* = src[1..];
				} else {
					return error.unmatched_paren;
				};
				parse_whitespace( src ); // consume whitespace()
				expr
			},
			// .( '_, 'a..'z,'z, 'A..'Z,'Z, '0..'9,'9 ) {
			// .( '_', 'a'..'z','z', 'A'..'Z','Z', '0'..'9','9' ) {
			.( '_', 'a'..'z'+1, 'A'..'Z'+1, '0'..'9'+1 ) {
				.identifier( .identifier = Identifier::parse( allocator, src ).! )
			},
			.( '[' ) { // .( '[ ) {
				.character_set( .char_set = Char_Set::parse( src ).! )
			},
			.( '"' ) {
				.literal( .literal = Literal::parse( allocator, src ).! )
			},
			.() {
				return error.invalid_expression;
			},
		};
		var mut wrap = item;
		while( 0 < src.len ) {
			switch( src[0] ) {
				.( '?', '+', '*' ) {
					var symbol = src[0];
					src.* = src[1..];
					var expr = allocator.create( Expression ).!;
					expr.* = switch( symbol ) {
						.( '?' ) { .optional     ( .expression = wrap ) },
						.( '+' ) { .one_or_more  ( .expression = wrap ) },
						.( '*' ) { .zero_or_more ( .expression = wrap ) },
						.() { unreachable().! },
					};
					wrap = expr;
				},
				.() { break },
			};
		};
		parse_whitespace( src );
		return wrap;
	},
);

// •
// state transition table ( "shift-reduce" logic )
// | state_0
// | • start = • _ rule* ;
// |   • _ | ' ','\t','\n' | shift, state_0
// |   • rule | [_a-zA-Z0-9]* | shift, state_1
// |   • $ | $ | reduce, state_2
// | state_1
// | • start = _ • rule* ;
// |   • rule = • identifier _ "=" _ expression ( "|" expression )* ";" _ ;
// |     • identifier = • [_a-zA-Z0-9]+ _ ; | [_a-zA-Z0-9] | shift, state_0
// | start • = _ rule* • ;
// | state_x
// | • 

/" rule = r1 | r2 | r3 | r4 | r5 | r6 ;
/" r1 = "literal" ;
/" r2 = r3 r4 ; // sequence
/" r3 = r5 | r6 ; // choice
/" r4 = r1? ; // optional
/" r5 = r2+ ; // one-or-more
/" r6 = r3* ; // zero-or-more

var parse_token = fn( allocator :*Allocator, source :[]u8 ) !*Token {
	if( source.len == 0 ) { return error.empty_source; }
	// probably a `while` up here somewhere
	var char = source[0];
	// source = source[1..];
	switch( char ) {
		.( ' ', '\t', '\r', '\n' ) {
			source = source[1..];
			continue;
		},
		.( '"' ) {  }, // parse literal
		.( '(' ) {  }, // expression
		.( '=' ) {  }, // equal, parse rule
		.( ';' ) {  }, // semicolon, parse rule
	}
};

// var Expression :type = enum (
// 	.parenthesis  ( .choice     :[][]Expression , ),
// 	.optional     ( .expression :*Expression , ),
// 	.one_or_more  ( .expression :*Expression , ),
// 	.zero_or_more ( .expression :*Expression , ),
// 	.identifier   ( .identifier :*Identifier , ),
// 	.character_set( .char_set   :*Char_Set   , ),
// 	.literal      ( .literal    :*Literal    , ),
// );
var expression = null;
var token_kind :type = enum (
	.whitespace, // multiple characters
	.comment, // multiple characters
	.identifier, // multiple characters
	.equal,
	.semicolon,
	.pipe,
	.parenthesis_open,
	.parenthesis_close,
	.question,
	.plus,
	.star,
	.dot,
	.literal,
		// .quote,
		// .backslash,
	.set,
		// .bracket_open,
		// .bracket_close,
		// .hyphen,
		// .carot,
);
var token_literal :type = struct (
	.src :[]u8, // undefined
	.kind :token_kind,
);
var token_class :type = enum (
	.
);
var token :type = struct (
	.components :Array_List(),
);

var tokenize_grammar = fn( input :[]u8 ) :TODO {
	
};

var Identifier = struct (
	// LEAD is [_a-zA-Z0-9]
	// ( '_','a'..'z'+1,'A'..'Z'+1,'0'..'9'+1 )
	
	// identifier = [_a-zA-Z0-9]+ _ ;
	::parse = fn( allocator :Allocator, src :*mut[]u8 ) :![]u8 {
		var mut list = std.Array_List( u8 )::init( allocator );
		defer { list.free(); };
		// XXX var char_set = Char_Set::parse( &"_a-zA-Z0-9]" );
		var char_set = { // mutable pattern slice
			var mut pattern :[]u8 = "[_a-zA-Z0-9]";
			Char_Set::parse( &pattern ).!
		};
		while( 0 < src.len ) {
			if( char_set::matches( src[0] ) ) {
				list.append( src[0] ).!;
				src.* = src[1..];
			} elseif( list::len() == 0 ) {
				return error.empty_identifier;
			} else {
				parse_whitespace( src );
				return list::to_owned_slice().!;
			};
			// } else {
			// 	return if( list.items.len == 0 ) {
			// 		error.empty_identifier
			// 	} else {
			// 		list::to_owned_slice().!
			// 	};
			// };
		};
		// orelse { return error.end_of_input; };
		return error.end_of_input;
		// TODO `end_of_input` -vs- `early_eof`
	},
);

var Literal = struct (
	// only a namespace, Literal is a `[]u8`
	// LEAD is '"'
	
	::parse = fn( allocator :Allocator, src :*mut []u8 ) :![]u8 {
		var mut list = std.Array_List( u8 ) {
			.allocator = allocator,
		};
		errdefer { list.free(); };
		consume( src, '"' ).!;
		while( 0 < src.len ) {
			var atom = parse_atom( src, '"' ).! orelse { break; };
			list.append( atom ).!;
		};
		consume( src, '"' ).!;
		parse_whitespace( src );
		return list::to_owned_slice().!;
	},
);

var Char_Set = {
	var mut Char_Set = struct ( .bits :[4]u64 = .( 0, 0, 0, 0 ), ); // 256 bits
	
	// LEAD is '['
	
	Char_Set::matches = fn( self :*Char_Set, char :u8 ) bool {
		var chunk = self.*.bits[ char >> 6 ];
		var chunk_mask :u64 = 1 << { 0b_0011_1111 & char };
		var flag = chunk & chunk_mask;
		return flag != 0;
	};
	/// for parsing ie. "[^a-z234\n\xFF]".
	/// on success, consumes the closing bracket ']'.
	/// XXX atoms toggle, double negation
	/// `[a-cd-f]` == `[abcdef]`
	/// XXX `[a-dc-f]` == `[abef]`, should be `[abcdef]`
	Char_Set::parse = fn( src :*mut []u8 ) :!Char_Set {
		consume( src, '[' ).!;
		// TODO consider reseting src on error
		// var original_src = src;
		// errdefer { src = original_src; };
		var mut char_set :Char_Set = .{};
		var is_inverted = { 0 < src.len and src[0] == '^' };
		var fill :u64 = if( is_inverted ) { 0x_ffff_ffff_ffff_ffff } else { 0 };
		for( char_set.bits ) |*chunk| { chunk.* = fill; };
		var begin :usize = if( is_inverted ) { 1 } else { 0 };
		src.* = src[begin..];
		while( 0 < src.len ) {
			var atom = parse_atom( src, ']' ).! orelse {
				src.* = src[1..]; // consume( src, ']' ).!;
				return char_set;
			};
			if( 1 < src.len and src[0] == '-' ) {
				src.* = src[1..]; // consume( src, '-' ).!;
				var atom_range_end = parse_atom( src, ']' ).! orelse {
					src.* = src[1..]; // consume( src, ']' ).!;
					char_set::toggle_char( atom );
					char_set::toggle_char( '-' );
					return char_set;
				};
				char_set::toggle_range( atom, atom_range_end );
			} else {
				char_set::toggle_char( atom );
			};
		};
		return error.end_of_input;
		
		//return while( 0 < src.len ) {
		//	var atom = Char_Set::parse_atom( src ).!
		//		orelse { break char_set; }
		//	;
		//	if( 1 < src.len and src[0] == '-' ) {
		//		src.* = src[1..];
		//		var atom_range_end = Char_Set::parse_atom( src ).! orelse {
		//			char_set::toggle_char( atom );
		//			char_set::toggle_char( '-' );
		//			break char_set;
		//		};
		//		char_set::toggle_range( atom, atom_range_end );
		//	} else {
		//		char_set::toggle_char( atom );
		//	};
		//} orelse { error.end_of_input };
		
	};
	//Char_Set::parse_atom = fn( content :*mut []u8 ) !u8 {
	//	// XXX using ptr_to_slice.*[index] syntax here feels bad..
	//	// parse atoms: `a`, `\n`, `\xff`
	//	if( 1 <= content.*.len and content.*[0] != '\\' ) {
	//		// content.*[0..1];
	//		defer { content.* = content.*[1..]; };
	//		return content.*[0];
	//	};
	//	if( 2 <= content.*.len and content.*[1] != 'x' ) {
	//		// content.*[0..2];
	//		defer { content.* = content.*[2..]; };
	//		// https://ziglang.org/documentation/master/#Escape-Sequences
	//		// https://en.wikipedia.org/wiki/ASCII#Control_code_chart
	//		return switch( content.*[1] ) {
	//			.( '0' ) { 0x00 }, // null
	//			.( 't' ) { 0x09 }, // tab
	//			.( 'n' ) { 0x0A }, // line feed
	//			.( 'v' ) { 0x0B }, // vertical tab
	//			.( 'f' ) { 0x0C }, // form feed
	//			.( 'r' ) { 0x0D }, // carriage return
	//			.() { content.*[1] },
	//		};
	//	};
	//	if( 4 <= content.*.len ) {
	//		// content.*[0..4];
	//		defer { content.* = content.*[4..]; };
	//		return hex_to_byte( content.*[2], content.*[3] ).!;
	//	};
	//	return error.partial_atom;
	//};
	
	/// for parsing char_set atoms like `a`, `\n`, `\xff`.
	/// modifies `content`, consumes atom.
	/// - errors on end-of-input or invalid-hex.
	/// - null on ']', end-of-char_set.
	///   * ']' can be returned as u8 via "\]" or "\x5d".
	///   * '\' can be returned as u8 via "\\" or "\x__".
	/// - u8 on atom, normal.
	// Char_Set::parse_atom = fn( content :*mut []u8 ) :!?u8 {
	// 	return parse_atom( content, ']' ).!;
	// };
	Char_Set::toggle_char = fn( self :*mut Char_Set, char :u8 ) :void {
		var chunk_mask :u64 = 1 << { 0b_0011_1111 & char };
		self.*.bits[ char >> 6 ] ^= chunk_mask;
	};
	Char_Set::toggle_range = fn( self :*mut Char_Set, begin :u8, end :u8 ) :void {
		// XXX terribly inefficient, but functional
		// also, what if { end < begin } ..?
		for( begin..end ) |i| {
			self.*::toggle_char( i );
		};
		self.*::toggle_char( end ); // inclusive
	};
	Char_Set
};

/// for parsing char_set atoms like `a`, `\n`, `\xff`.
/// modifies `src`, consumes atom.
/// - errors on end-of-input or invalid-hex.
/// - null on terminal ( end of char_set ']' or literal '"' )
///   * '"' can be returned as u8 via "\"" or "\x22".
///   * ']' can be returned as u8 via "\]" or "\x5d".
///   * '\' can be returned as u8 via "\\" or "\x5c".
/// - u8 on atom, normal.
var parse_atom = fn( src :*mut []u8, terminal :u8 ) :!?u8 {
	// parse atoms: `a`, `\n`, `\xff`
	var mut atom_len = 0;
	defer { src.* = src[atom_len..]; };
	
	if( src.len < 1 ) { return error.partial_atom_0; };
	atom_len = 1;
	var a0 = src[0];
	if( a0 == terminal ) { return null; };
	if( a0 != '\\' ) { return a0; }; // `a`
	
	// a0 is `\`
	if( src.len < 2 ) { return error.partial_atom_1; };
	atom_len = 2;
	var a1 = src[1];
	if( a1 != 'x' ) {
		// https://ziglang.org/documentation/master/#Escape-Sequences
		// https://en.wikipedia.org/wiki/ASCII#Control_code_chart
		return switch( a1 ) { // `\q`
			.( '0' ) { 0x00 }, // null
			.( 't' ) { 0x09 }, // tab
			.( 'n' ) { 0x0A }, // line feed
			.( 'v' ) { 0x0B }, // vertical tab
			.( 'f' ) { 0x0C }, // form feed
			.( 'r' ) { 0x0D }, // carriage return
			.() { a1 },
		};
	};
	
	// a0 is `\`, a1 is `x`, src[0..2] is `\x`
	if( src.len < 4 ) { return error.partial_atom_4; };
	atom_len = 4;
	var a3 = src[2];
	var a4 = src[3];
	return hex_to_byte( a3, a4 ).!; // `\xff`
};
var hex_to_nibble = fn( hex :u8 ) :!u8 {
	return  if( '0' <= hex and hex <= '9' ) { hex - '0' }
	else    if( 'a' <= hex and hex <= 'f' ) { hex - 'a' }
	else    if( 'A' <= hex and hex <= 'F' ) { hex - 'A' }
	else { error.invalid_hex };
};
var hex_to_byte = fn( hi :u8, lo :u8 ) :!u8 {
	return ( hex_to_nibble( hi ).! << 4 ) | hex_to_nibble( lo ).!;
};

var consume = fn( src :*mut[]u8, char :u8 ) :!void {
	if( 0 < src.len ) {
		if( src[0] == char ) {
			src.* = src[1..];
		} else { return error.bad_char; };
	} else { return error.early_eof; };
};