124 lines
2.7 KiB
Odin
124 lines
2.7 KiB
Odin
package main
|
|
|
|
import "core:fmt"
|
|
import "core:log"
|
|
import "core:os"
|
|
import "core:strconv"
|
|
import "core:text/regex"
|
|
|
|
String :: struct {
|
|
value: string,
|
|
}
|
|
|
|
Name :: struct {
|
|
value: string,
|
|
}
|
|
|
|
Number :: struct {
|
|
value: u64,
|
|
}
|
|
|
|
Left_Paren :: struct {}
|
|
Right_Paren :: struct {}
|
|
Comma :: struct {}
|
|
|
|
Token :: union {
|
|
String,
|
|
Name,
|
|
Number,
|
|
Left_Paren,
|
|
Right_Paren,
|
|
Comma,
|
|
}
|
|
|
|
Populator_Proc :: proc(value: string) -> Token
|
|
|
|
Token_Desc :: struct {
|
|
regex_comp: regex.Regular_Expression,
|
|
populator_proc: Populator_Proc,
|
|
}
|
|
|
|
Tokenizer :: struct {
|
|
token_descs: [dynamic]Token_Desc,
|
|
}
|
|
|
|
tokenizer_add_token :: proc(
|
|
tokenizer: ^Tokenizer,
|
|
regex_string: string,
|
|
populator_proc: Populator_Proc,
|
|
) -> regex.Error {
|
|
append(
|
|
&tokenizer.token_descs,
|
|
Token_Desc{regex_comp = regex.create(regex_string) or_return, populator_proc = populator_proc},
|
|
)
|
|
|
|
return nil
|
|
}
|
|
|
|
tokenizer_create :: proc() -> (tokenizer: Tokenizer, err: regex.Error) {
|
|
tokenizer_add_token(&tokenizer, "^\\d+", proc(value: string) -> Token {
|
|
parsed, _ := strconv.parse_u64(value)
|
|
return Number{value = parsed}
|
|
}) or_return
|
|
|
|
tokenizer_add_token(&tokenizer, "^\".*?\"", proc(value: string) -> Token {
|
|
return String{value = value[1:len(value) - 1]}
|
|
}) or_return
|
|
|
|
tokenizer_add_token(&tokenizer, "^\\w+", proc(value: string) -> Token {
|
|
return Name{value = value}
|
|
}) or_return
|
|
|
|
tokenizer_add_token(&tokenizer, "^\\(", proc(_: string) -> Token {return Left_Paren{}}) or_return
|
|
tokenizer_add_token(&tokenizer, "^\\)", proc(_: string) -> Token {return Right_Paren{}}) or_return
|
|
tokenizer_add_token(&tokenizer, "^,", proc(_: string) -> Token {return Comma{}}) or_return
|
|
|
|
return
|
|
}
|
|
|
|
tokenize :: proc(tokenizer: Tokenizer, input: string) -> []Token {
|
|
tokens: [dynamic]Token
|
|
|
|
i := 0
|
|
loop: for true {
|
|
for reg in tokenizer.token_descs {
|
|
if i >= len(input) {
|
|
break loop
|
|
}
|
|
|
|
capture, matched := regex.match(reg.regex_comp, input[i:])
|
|
if matched {
|
|
token := reg.populator_proc(input[i:i + capture.pos[0][1]])
|
|
append(&tokens, token)
|
|
i += capture.pos[0][1]
|
|
continue loop
|
|
}
|
|
}
|
|
|
|
i += 1
|
|
}
|
|
|
|
return tokens[:]
|
|
}
|
|
|
|
main :: proc() {
|
|
context.logger = log.create_console_logger()
|
|
|
|
bytes, ok := os.read_entire_file(os.args[1])
|
|
if !ok {
|
|
log.fatal("Failed to read file")
|
|
return
|
|
}
|
|
input := string(bytes)
|
|
|
|
tokenizer, err := tokenizer_create()
|
|
if err != nil {
|
|
log.fatal(err)
|
|
return
|
|
}
|
|
|
|
tokens := tokenize(tokenizer, input)
|
|
for token in tokens {
|
|
fmt.println(token)
|
|
}
|
|
}
|