commit 882f1b637b677223e4e9cf1b4e9639ac5f97b615 Author: sam Date: Mon Feb 3 18:42:24 2025 +1300 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b883f1f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.exe diff --git a/compiler b/compiler new file mode 100644 index 0000000..0c36b4a Binary files /dev/null and b/compiler differ diff --git a/main.odin b/main.odin new file mode 100644 index 0000000..ede1711 --- /dev/null +++ b/main.odin @@ -0,0 +1,124 @@ +package main + +import "core:fmt" +import "core:log" +import "core:os" +import "core:strconv" +import "core:text/regex" + +String :: struct { + value: string, +} + +Name :: struct { + value: string, +} + +Number :: struct { + value: u64, +} + +Left_Paren :: struct {} +Right_Paren :: struct {} +Comma :: struct {} + +Token :: union { + String, + Name, + Number, + Left_Paren, + Right_Paren, + Comma, +} + +Populator_Proc :: proc(value: string) -> Token + +Token_Desc :: struct { + regex_comp: regex.Regular_Expression, + populator_proc: Populator_Proc, +} + +Tokenizer :: struct { + token_descs: [dynamic]Token_Desc, +} + +tokenizer_add_token :: proc( + tokenizer: ^Tokenizer, + regex_string: string, + populator_proc: Populator_Proc, +) -> regex.Error { + append( + &tokenizer.token_descs, + Token_Desc{regex_comp = regex.create(regex_string) or_return, populator_proc = populator_proc}, + ) + + return nil +} + +tokenizer_create :: proc() -> (tokenizer: Tokenizer, err: regex.Error) { + tokenizer_add_token(&tokenizer, "^\\d+", proc(value: string) -> Token { + parsed, _ := strconv.parse_u64(value) + return Number{value = parsed} + }) or_return + + tokenizer_add_token(&tokenizer, "^\".*?\"", proc(value: string) -> Token { + return String{value = value[1:len(value) - 1]} + }) or_return + + tokenizer_add_token(&tokenizer, "^\\w+", proc(value: string) -> Token { + return Name{value = value} + }) or_return + + tokenizer_add_token(&tokenizer, "^\\(", proc(_: string) -> Token {return Left_Paren{}}) or_return + tokenizer_add_token(&tokenizer, "^\\)", proc(_: string) -> Token {return Right_Paren{}}) or_return + tokenizer_add_token(&tokenizer, "^,", proc(_: string) -> Token {return Comma{}}) or_return + + return +} + +tokenize :: proc(tokenizer: Tokenizer, input: string) -> []Token { + tokens: [dynamic]Token + + i := 0 + loop: for true { + for reg in tokenizer.token_descs { + if i >= len(input) { + break loop + } + + capture, matched := regex.match(reg.regex_comp, input[i:]) + if matched { + token := reg.populator_proc(input[i:i + capture.pos[0][1]]) + append(&tokens, token) + i += capture.pos[0][1] + continue loop + } + } + + i += 1 + } + + return tokens[:] +} + +main :: proc() { + context.logger = log.create_console_logger() + + bytes, ok := os.read_entire_file(os.args[1]) + if !ok { + log.fatal("Failed to read file") + return + } + input := string(bytes) + + tokenizer, err := tokenizer_create() + if err != nil { + log.fatal(err) + return + } + + tokens := tokenize(tokenizer, input) + for token in tokens { + fmt.println(token) + } +} diff --git a/odinfmt.json b/odinfmt.json new file mode 100644 index 0000000..4137bac --- /dev/null +++ b/odinfmt.json @@ -0,0 +1,5 @@ +{ + "character_width": 110, + "tabs": false, + "tabs_width": 4 +} diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..1d0cc8d --- /dev/null +++ b/test.txt @@ -0,0 +1 @@ +say_hi("bob", 26)