const std = @import( "std" ); // https://alloc.dev/2025/05/25/syntax_highlighting // Dark Mode: // Comments: #808080 (RGB: 128, 128, 128) // Builtins: #ff7065 (RGB: 255, 112, 101) // Keywords: #ffbb65 (RGB: 255, 187, 101) // Strings: #deff65 (RGB: 222, 255, 101) // Numbers: #65ffc3 (RGB: 101, 255, 195) // Types: #65dfff (RGB: 101, 223, 255) // functions(): #659cff (RGB: 101, 156, 255) // var_names: #b565ff (RGB: 181, 101, 255) // Primitives: #ff65d3 (RGB: 255, 101, 211) // Default: #ffffff (RGB: 255, 255, 255) // const escape = @import( "../terminal-escape-codes/ansi-lib.zig" ); const escape = @import( "ansi-lib.zig" ); const rgb = escape.fmt_g_rgb; fn print_tokens_color( tokens :[]Token ) void { for( tokens ) |token| { std.debug.print( "{s}", .{ switch( token.val ) { .symbol => rgb( .foreground, 0xff, 0x70, 0x65 ), .operator => "\x1b[38;2;28;39;50m", // .{ r, g, b } .identifier => rgb( .foreground, 0xb5, 0x65, 0xff ), .keyword => "\x1b[38;2;128;139;150m", // .{ r, g, b } .number => rgb( .foreground, 0x65, 0xff, 0xc3 ), .string => rgb( .foreground, 0xde, 0xff, 0x65 ), .whitespace => "", } } ); std.debug.print( "{s}" ++ escape._fmt_clear_color, .{ token.str } ); } } fn print_tokens( tokens :[]Token ) void { for( tokens ) |token| { // if( token.tag == .whitespace ) { continue; } std.debug.print( "{s}: `{s}`\n", .{ @tagName( token.val ), token.str, } ); } } // TODO color print // TODO NO_COLOR, no-color.org pub fn main() !void { std.debug.print( "hello world\n", .{} ); defer { std.debug.print( "fin\n", .{} ); } errdefer { std.debug.print( "err\n", .{} ); } var GPA = std.heap.GeneralPurposeAllocator(.{}){}; var allocator = GPA.allocator(); if( true ) { const tokens = try tokenize( allocator, pan_hello ); defer { allocator.free( tokens ); } std.debug.print( "hello:\n", .{} ); // print_tokens ( tokens ); print_tokens_color ( tokens ); } // ISSUES // `..` ;; minor // `+` vs `++` ;; minor // `12.34e+56` vs `tuple.0` ;; fallback to `tuple.@0` // self-note: 2025-05-30 if( true ) { const tokens = try tokenize( allocator, pan_fibonacci ); defer { allocator.free( tokens ); } std.debug.print( "fibonacci:\n", .{} ); // print_tokens ( tokens ); print_tokens_color( tokens ); } std.debug.print( "success\n", .{} ); } const pan_hello = \\ \\var std = @import( "std" ); \\ \\pub var main = fn :!void { \\ std.debug.print( "hello world\n", .() ); \\}; \\ ; const pan_fibonacci = \\ \\var std = @import( "std" ); \\ \\// a recursive implementation \\var fib = fn :u64 | n :u64 | { \\ return if( n < 2 ) { \\ n \\ } else { \\ fib( n - 1 ) + fib( n - 2 ) \\ }; \\}; \\ \\// an iterative implementation \\var fib2 = fn :u64 | n :u64 | { \\ var mut a :u64 = 0; \\ var mut b :u64 = 1; \\ for( 0..n ) { \\ var x = a + b; \\ a = b; \\ b = x; \\ } \\ return a; \\}; \\ \\pub var main = fn :void { \\ var recursive = fib( 16 ); \\ var iterative = fib2( 16 ); \\ std.debug.assert( recursive == iterative ); \\}; \\ ; const TokenKind = enum { symbol, operator, identifier, keyword, number, string, whitespace, }; const Token = struct { str :[]const u8, val :union( TokenKind ) { symbol , // :void, operator :enum {}, // TODO identifier , // :void, keyword :enum {}, // TODO number :usize, // TODO dynamic bit width string , // TODO use a getter, avoid allocating for escapes whitespace , // :void, }, }; const Base = enum { dec, hex, bin, oct, fn get_base( src :[]const u8 ) @This() { return if( 1 < src.len ) ( switch( src[1] ) { // '0'...'9', '_' => .dec, else => .dec, // end-of-number 'x', 'X' => .hex, 'b', 'B' => .bin, 'o', 'O' => .oct, } ) else ( .dec ); // XXX the chars consumed vary.. } fn is_valid_char( base :@This(), char :u8 ) bool { return switch( base ) { .dec => ( char == '_' or ( '0' <= char and char <= '9' ) ), .hex => ( char == '_' or ( '0' <= char and char <= '9' ) or ( 'a' <= char and char <= 'f' ) or ( 'A' <= char and char <= 'F' ) ), .bin => ( char == '_' or ( '0' <= char and char <= '1' ) ), .oct => ( char == '_' or ( '0' <= char and char <= '7' ) ), }; } fn valid_chars( mode :@This(), src :[]const u8 ) usize { return for( src, 0.. ) |c,i| { if( !mode.is_valid_char( c ) ) { break i; } } else ( std.math.maxInt( usize ) ); } }; fn take_whitespace( src :[]const u8, i :*usize, list :*std.ArrayList(Token) ) !void { const begin = i.*; while( i.* < src.len and switch( src[i.*] ) { ' ','\t','\n' => true, else => false } ) { i.* += 1; } try list.append( .{ .str = src[ begin..i.* ], .val = .whitespace } ); } fn take_number( src :[]const u8, i :*usize, list :*std.ArrayList(Token) ) !void { const begin = i.*; // determine the base /radix const mode = Base.get_base( src ); i.* += if( mode != .dec ) ( "0x".len ) else ( 0 ); // parse the integer portion i.* += mode.valid_chars( src[i.*..] ); // signs ; probably handle signs later in the parser // handle decimal const j = i.* + 1; if( j < src.len and src[i.*] == '.' and mode.is_valid_char( src[j] ) ) { i.* = j; i.* += mode.valid_chars( src[i.*..] ); } // handle exponent / power if( src[i.*] == 'p' ) { // 'e' // (dec) 2.3p4 = 2.3 * 10^4 // (hex) 0xa.8cp-f7 == 0xa.8c * 0x10^-0xf7 // (bin) 0b1010.1000_1100_p-1111_0111 // TODO repeat (optionally signed) integer parsing unreachable; } try list.append( .{ .str = src[ begin..i.* ], .val = .{ .number = 0 } } ); } fn take_string( src :[]const u8, i :*usize, list :*std.ArrayList(Token) ) !void { const begin = i.*; i.* += 1; while( i.* < src.len and src[i.*] != '"' ) { i.* += if( src[i.*] == '\\' ) ( 2 ) else ( 1 ) ; // TODO refine escapes } if( i.* < src.len and src[i.*] == '"' ) { i.* += 1; } try list.append( .{ .str = src[ begin..i.* ], .val = .string } ); } /// _or_keyword fn take_identifier( src :[]const u8, i :*usize, list :*std.ArrayList(Token) ) !void { const begin = i.*; // ( 'a' <= c and c <= 'z' ) or ( 'A' <= c and c <= 'Z' ) or ( c == '_' ) or ( '0' <= c and c <= '9' ); // while( { // const c = src[i]; // 'a' <= c <= 'z' or 'A' <= c <= 'Z' or '0' <= c <= '9' or // c == '_' or c == '@' // } ) { // i += 1; // } while( i.* < src.len and switch( src[i.*] ) { 'a'...'z', 'A'...'Z', '0'...'9', '_', '@' => true, else => false } ) { i.* += 1; } try list.append( .{ .str = src[ begin..i.* ], .val = .identifier } ); } /// _or_operator fn take_symbol( src :[]const u8, i :*usize, list :*std.ArrayList(Token) ) !void { // TODO handle multi character operators const begin = i.*; const startsWith = std.mem.startsWith; if( startsWith( u8, src[i.*..], ".." ) ) { i.* += "..".len; } // range else if( startsWith( u8, src[i.*..], "+=" ) ) { i.* += "+=".len; } // XXX else if( startsWith( u8, src[i.*..], "++" ) ) { i.* += "++".len; } // array concat else if( startsWith( u8, src[i.*..], "**" ) ) { i.* += "**".len; } // array repetition else if( startsWith( u8, src[i.*..], "==" ) ) { i.* += "==".len; } // equality else if( startsWith( u8, src[i.*..], "<>" ) ) { i.* += "<>".len; } // inequality else if( startsWith( u8, src[i.*..], "<=" ) ) { i.* += "<=".len; } // less than or equal else if( startsWith( u8, src[i.*..], ">=" ) ) { i.* += ">=".len; } // greater than or equal else if( startsWith( u8, src[i.*..], "//" ) ) { i.* += std.mem.indexOf( u8, src[i.*..], "\n" ) orelse src[i.*..].len; } // comment else { i.* += 1; } try list.append( .{ .str = src[ begin..i.* ], .val = .symbol } ); } fn tokenize( allocator :std.mem.Allocator, src :[]const u8 ) ![]Token { var list = std.ArrayList( Token ).init( allocator ); errdefer { list.deinit(); } // errdefer { // std.debug.print( "tokens:\n", .{} ); // print_tokens( tokens ); // } // TODO newer switch loop syntax var i :usize = 0; while( i < src.len ) { switch( src[i] ) { // whitespace ' ', '\t', '\n' => { try take_whitespace( src, &i, &list ); }, // number '0'...'9' => { try take_number( src, &i, &list ); }, // string '"' => { try take_string( src, &i, &list ); }, // TODO `/"` // character '\'' => { return error.TODO_character; }, // identifier 'a'...'z', 'A'...'Z', '_', '@', => { try take_identifier( src, &i, &list ); }, // symbol, ( and multiline string ) '.', ',', ':', ';', '{', '}', '(', ')', '[', ']', '!', '?', '#', '=', '~', '-', '+', '*', '/', '%', '|', '&', '<', '>', => { try take_symbol( src, &i, &list ); }, // undefined, error '$', '^', '\\', '`', => { return error.invalid_character; }, else => { return error.unrecognized_character; }, } } return list.toOwnedSlice(); }