package main import "core:fmt" import "core:log" import "core:os" import "core:strconv" import "core:text/regex" String :: struct { value: string, } Name :: struct { value: string, } Number :: struct { value: u64, } Left_Paren :: struct {} Right_Paren :: struct {} Comma :: struct {} Token :: union { String, Name, Number, Left_Paren, Right_Paren, Comma, } Populator_Proc :: proc(value: string) -> Token Token_Desc :: struct { regex_comp: regex.Regular_Expression, populator_proc: Populator_Proc, } Tokenizer :: struct { token_descs: [dynamic]Token_Desc, } tokenizer_add_token :: proc( tokenizer: ^Tokenizer, regex_string: string, populator_proc: Populator_Proc, ) -> regex.Error { append( &tokenizer.token_descs, Token_Desc{regex_comp = regex.create(regex_string) or_return, populator_proc = populator_proc}, ) return nil } tokenizer_create :: proc() -> (tokenizer: Tokenizer, err: regex.Error) { tokenizer_add_token(&tokenizer, "^\\d+", proc(value: string) -> Token { parsed, _ := strconv.parse_u64(value) return Number{value = parsed} }) or_return tokenizer_add_token(&tokenizer, "^\".*?\"", proc(value: string) -> Token { return String{value = value[1:len(value) - 1]} }) or_return tokenizer_add_token(&tokenizer, "^\\w+", proc(value: string) -> Token { return Name{value = value} }) or_return tokenizer_add_token(&tokenizer, "^\\(", proc(_: string) -> Token {return Left_Paren{}}) or_return tokenizer_add_token(&tokenizer, "^\\)", proc(_: string) -> Token {return Right_Paren{}}) or_return tokenizer_add_token(&tokenizer, "^,", proc(_: string) -> Token {return Comma{}}) or_return return } tokenize :: proc(tokenizer: Tokenizer, input: string) -> []Token { tokens: [dynamic]Token i := 0 loop: for true { for reg in tokenizer.token_descs { if i >= len(input) { break loop } capture, matched := regex.match(reg.regex_comp, input[i:]) if matched { token := reg.populator_proc(input[i:i + capture.pos[0][1]]) append(&tokens, token) i += capture.pos[0][1] continue loop } } i += 1 } return tokens[:] } main :: proc() { context.logger = log.create_console_logger() bytes, ok := os.read_entire_file(os.args[1]) if !ok { log.fatal("Failed to read file") return } input := string(bytes) tokenizer, err := tokenizer_create() if err != nil { log.fatal(err) return } tokens := tokenize(tokenizer, input) for token in tokens { fmt.println(token) } }