From bee9e6fc9d52fbe1fc1723af5c29d16774d2dfdf Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 22 Jul 2024 15:46:25 +1200 Subject: [PATCH] optimize tokenizer by moving data ptr --- .gitignore | 3 ++- Makefile | 4 ++-- example.lisp | 3 ++- include/ast.h | 5 ++--- include/slibs | 2 +- include/tokenizer.h | 9 +++++---- src/ast.c | 35 ++++++++++++++++------------------- src/binary.c | 8 ++------ src/main.c | 3 ++- src/tokenizer.c | 37 ++++++++++++------------------------- 10 files changed, 46 insertions(+), 63 deletions(-) diff --git a/.gitignore b/.gitignore index c468a60..f5374ab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.o compiler -example \ No newline at end of file +example +.vscode \ No newline at end of file diff --git a/Makefile b/Makefile index 362b6c9..57e91e5 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ BINARY=compiler CC=gcc -CFLAGS=-O3 -Iinclude -Itcc -LDFLAGS=-Ltcc -ltcc +CFLAGS=-Iinclude -Itcc -Ofast -flto +LDFLAGS=-Ltcc -ltcc -Ofast CFILES=$(shell find -L src -type f -name '*.c') OBJ=$(CFILES:.c=.o) diff --git a/example.lisp b/example.lisp index cd3676d..c1af896 100755 --- a/example.lisp +++ b/example.lisp @@ -4,4 +4,5 @@ (printf "5 + (100 / 5) = %d\n" (add 5 - (divide 100 5))) \ No newline at end of file + (divide 100 5))) + diff --git a/include/ast.h b/include/ast.h index fe75630..e4c348a 100644 --- a/include/ast.h +++ b/include/ast.h @@ -24,10 +24,9 @@ typedef struct ASTNode { ASTVec params; } ASTNode; -ASTNode* ast_parse(Token** token); -ASTNode* ast_walk(Token** token); +ASTNode* ast_parse(TokenVec* token); +ASTNode* ast_walk(TokenVec* token); void ast_print(ASTNode* node, int indent); -void ast_step(Token** token); ASTNode* ast_create_empty(ASTType type); ASTNode* ast_create_program(ASTVec body); ASTNode* ast_create_call_expression(const char* name, ASTVec params); diff --git a/include/slibs b/include/slibs index 1de5b35..b0b09f6 160000 --- a/include/slibs +++ b/include/slibs @@ -1 +1 @@ -Subproject commit 1de5b35258cffda13d4bcf505e83c976e448e750 +Subproject commit b0b09f6fd9efd5367dbac19629caf0d027e657e2 diff --git a/include/tokenizer.h b/include/tokenizer.h index e961f5f..0217b8a 100644 --- a/include/tokenizer.h +++ b/include/tokenizer.h @@ -14,13 +14,14 @@ typedef enum TokenType { typedef struct Token { char* value; TokenType type; - struct Token* next; } Token; -Token* tokenize(char* input); +typedef sl_vec(Token*) TokenVec; -Token* token_create(char* value, TokenType type, Token* root); +void tokenize(char* input, TokenVec* tokens); + +Token* token_create(char* value, TokenType type); Token* token_append(Token* root, Token* new_token); -void tokens_print(Token* root); +void tokens_print(TokenVec tokens); #endif \ No newline at end of file diff --git a/src/ast.c b/src/ast.c index 619ecc1..d8c6692 100644 --- a/src/ast.c +++ b/src/ast.c @@ -8,41 +8,42 @@ const char* ASTTypeText[] = { "StringLiteral" }; -ASTNode* ast_parse(Token** token) { +ASTNode* ast_parse(TokenVec* token) { ASTVec body = { 0 }; - while((*token) != NULL) { + Token** end = sl_vec_end(*token); + while(token->data != end) { sl_vec_push(body, ast_walk(token)); } return ast_create_program(body); } -ASTNode* ast_walk(Token** token) { - if((*token)->type == TOKEN_NUMBER) { - ASTNode* number = ast_create_number_literal((*token)->value); - ast_step(token); +ASTNode* ast_walk(TokenVec* token) { + if(token->data[0]->type == TOKEN_NUMBER) { + ASTNode* number = ast_create_number_literal(token->data[0]->value); + sl_vec_forward(*token); return number; } - if((*token)->type == TOKEN_STRING) { - ASTNode* string = ast_create_string_literal((*token)->value); - ast_step(token); + if(token->data[0]->type == TOKEN_STRING) { + ASTNode* string = ast_create_string_literal(token->data[0]->value); + sl_vec_forward(*token); return string; } - if((*token)->type == TOKEN_LPAREN) { // Call expression - ast_step(token); - const char* name = (*token)->value; + if(token->data[0]->type == TOKEN_LPAREN) { // Call expression + sl_vec_forward(*token); + const char* name = token->data[0]->value; ASTVec params = { 0 }; - ast_step(token); + sl_vec_forward(*token); - while((*token)->type != TOKEN_RPAREN) { + while(token->data[0]->type != TOKEN_RPAREN) { sl_vec_push(params, ast_walk(token)); } - ast_step(token); + sl_vec_forward(*token); return ast_create_call_expression(name, params); } @@ -50,10 +51,6 @@ ASTNode* ast_walk(Token** token) { return NULL; } -void ast_step(Token** token) { - (*token) = (*token)->next; -} - void ast_print(ASTNode* node, int indent) { switch(node->type) { case AST_PROGRAM: diff --git a/src/binary.c b/src/binary.c index a34c322..5969fc7 100644 --- a/src/binary.c +++ b/src/binary.c @@ -17,13 +17,9 @@ int binary_produce(const char* code, Args args) { assert(tcc_add_file(state, "std/std.c") == 0); assert(tcc_compile_string(state, code) == 0); - int ret = -1; if(args.build) { - ret = tcc_output_file(state, args.output); - printf("Binary produced: %s\n", args.output); + return tcc_output_file(state, args.output); } else { - ret = tcc_run(state, 0, NULL); + return tcc_run(state, 0, NULL); } - - return ret; } \ No newline at end of file diff --git a/src/main.c b/src/main.c index 3cb9dda..d2efc98 100644 --- a/src/main.c +++ b/src/main.c @@ -14,7 +14,8 @@ int main(int argc, char* argv[]) { sl_read_file(args.input, &buffer); printf("Tokens:\n"); - Token* tokens = tokenize(sl_c_str(buffer)); + TokenVec tokens = { 0 }; + tokenize(sl_c_str(buffer), &tokens); tokens_print(tokens); printf("\n"); diff --git a/src/tokenizer.c b/src/tokenizer.c index 239f19b..4fdaebf 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -14,34 +14,33 @@ const char* TokenTypeText[] = { "string" }; -Token* tokenize(char* input) { +void tokenize(char* input, TokenVec* tokens) { regex_t name = regex_create("[a-z_]", REG_ICASE); regex_t number = regex_create("[0-9]", 0); regex_t string = regex_create("\"", 0); regex_t whitespace = regex_create("[ \n]", 0); - Token* root = NULL; sl_string collected = {0}; char c = *input; while (c != '\0') { if (match_char(name, c)) { collected = collect_until_no_match(name, &input); - root = token_create(sl_c_str(collected), TOKEN_NAME, root); + sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NAME)); } else if (match_char(number, c)) { collected = collect_until_no_match(number, &input); - root = token_create(sl_c_str(collected), TOKEN_NUMBER, root); + sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NUMBER)); } else if (c == '(') { - root = token_create("(", TOKEN_LPAREN, root); + sl_vec_push(*tokens, token_create("(", TOKEN_LPAREN)); input++; } else if (c == ')') { - root = token_create(")", TOKEN_RPAREN, root); + sl_vec_push(*tokens, token_create(")", TOKEN_RPAREN)); input++; } else if (match_char(whitespace, c)) { input++; } else if (match_char(string, c)) { regex_step(&input, &c); collected = collect_until_match_escapable(string, &input); - root = token_create(sl_c_str(collected), TOKEN_STRING, root); + sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_STRING)); input++; } else { printf("%c: no match\n", c); @@ -55,30 +54,18 @@ Token* tokenize(char* input) { regfree(&number); regfree(&string); regfree(&whitespace); - return root; } -Token* token_create(char* value, TokenType type, Token* root) { - Token* new_token = calloc(1, sizeof(Token)); +Token* token_create(char* value, TokenType type) { + Token* new_token = malloc(sizeof(Token)); new_token->value = value; new_token->type = type; - return token_append(root, new_token);; + return new_token; } -Token* token_append(Token* root, Token* new_token) { - if (!root) return new_token; - Token* current = root; - while (current->next) { - current = current->next; - } - current->next = new_token; - return root; -} - -void tokens_print(Token* root) { - while(root != NULL) { - printf("%s: %s\n", TokenTypeText[root->type], root->value); - root = root->next; +void tokens_print(TokenVec tokens) { + for(sl_vec_it(token, tokens)) { + printf("%s: %s\n", TokenTypeText[(*token)->type], (*token)->value); } } \ No newline at end of file