From bee9e6fc9d52fbe1fc1723af5c29d16774d2dfdf Mon Sep 17 00:00:00 2001
From: sam <multisniperism@gmail.com>
Date: Mon, 22 Jul 2024 15:46:25 +1200
Subject: [PATCH] optimize tokenizer by moving data ptr

---
 .gitignore          |  3 ++-
 Makefile            |  4 ++--
 example.lisp        |  3 ++-
 include/ast.h       |  5 ++---
 include/slibs       |  2 +-
 include/tokenizer.h |  9 +++++----
 src/ast.c           | 35 ++++++++++++++++-------------------
 src/binary.c        |  8 ++------
 src/main.c          |  3 ++-
 src/tokenizer.c     | 37 ++++++++++++-------------------------
 10 files changed, 46 insertions(+), 63 deletions(-)

diff --git a/.gitignore b/.gitignore
index c468a60..f5374ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.o
 compiler
-example
\ No newline at end of file
+example
+.vscode
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 362b6c9..57e91e5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 BINARY=compiler
 
 CC=gcc
-CFLAGS=-O3 -Iinclude -Itcc
-LDFLAGS=-Ltcc -ltcc
+CFLAGS=-Iinclude -Itcc -Ofast -flto
+LDFLAGS=-Ltcc -ltcc -Ofast
 
 CFILES=$(shell find -L src -type f -name '*.c')
 OBJ=$(CFILES:.c=.o)
diff --git a/example.lisp b/example.lisp
index cd3676d..c1af896 100755
--- a/example.lisp
+++ b/example.lisp
@@ -4,4 +4,5 @@
 
 (printf "5 + (100 / 5) = %d\n" 
     (add 5 
-        (divide 100 5)))
\ No newline at end of file
+        (divide 100 5)))
+
diff --git a/include/ast.h b/include/ast.h
index fe75630..e4c348a 100644
--- a/include/ast.h
+++ b/include/ast.h
@@ -24,10 +24,9 @@ typedef struct ASTNode {
     ASTVec params;
 } ASTNode;
 
-ASTNode* ast_parse(Token** token);
-ASTNode* ast_walk(Token** token);
+ASTNode* ast_parse(TokenVec* token);
+ASTNode* ast_walk(TokenVec* token);
 void ast_print(ASTNode* node, int indent);
-void ast_step(Token** token);
 ASTNode* ast_create_empty(ASTType type);
 ASTNode* ast_create_program(ASTVec body);
 ASTNode* ast_create_call_expression(const char* name, ASTVec params);
diff --git a/include/slibs b/include/slibs
index 1de5b35..b0b09f6 160000
--- a/include/slibs
+++ b/include/slibs
@@ -1 +1 @@
-Subproject commit 1de5b35258cffda13d4bcf505e83c976e448e750
+Subproject commit b0b09f6fd9efd5367dbac19629caf0d027e657e2
diff --git a/include/tokenizer.h b/include/tokenizer.h
index e961f5f..0217b8a 100644
--- a/include/tokenizer.h
+++ b/include/tokenizer.h
@@ -14,13 +14,14 @@ typedef enum TokenType {
 typedef struct Token {
     char* value;
     TokenType type;
-    struct Token* next;
 } Token;
 
-Token* tokenize(char* input);
+typedef sl_vec(Token*) TokenVec;
 
-Token* token_create(char* value, TokenType type, Token* root);
+void tokenize(char* input, TokenVec* tokens);
+
+Token* token_create(char* value, TokenType type);
 Token* token_append(Token* root, Token* new_token);
-void tokens_print(Token* root);
+void tokens_print(TokenVec tokens);
 
 #endif
\ No newline at end of file
diff --git a/src/ast.c b/src/ast.c
index 619ecc1..d8c6692 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -8,41 +8,42 @@ const char* ASTTypeText[] = {
     "StringLiteral"
 };
 
-ASTNode* ast_parse(Token** token) {
+ASTNode* ast_parse(TokenVec* token) {
     ASTVec body = { 0 };
 
-    while((*token) != NULL) {
+    Token** end = sl_vec_end(*token);
+    while(token->data != end) {
         sl_vec_push(body, ast_walk(token));
     }
 
     return ast_create_program(body);
 }
 
-ASTNode* ast_walk(Token** token) {
-    if((*token)->type == TOKEN_NUMBER) {
-        ASTNode* number = ast_create_number_literal((*token)->value);
-        ast_step(token);
+ASTNode* ast_walk(TokenVec* token) {
+    if(token->data[0]->type == TOKEN_NUMBER) {
+        ASTNode* number = ast_create_number_literal(token->data[0]->value);
+        sl_vec_forward(*token);
         return number;
     }
 
-    if((*token)->type == TOKEN_STRING) {
-        ASTNode* string = ast_create_string_literal((*token)->value);
-        ast_step(token);
+    if(token->data[0]->type == TOKEN_STRING) {
+        ASTNode* string = ast_create_string_literal(token->data[0]->value);
+        sl_vec_forward(*token);
         return string;
     }
 
-    if((*token)->type == TOKEN_LPAREN) { // Call expression
-        ast_step(token);
-        const char* name = (*token)->value;
+    if(token->data[0]->type == TOKEN_LPAREN) { // Call expression
+        sl_vec_forward(*token);
+        const char* name = token->data[0]->value;
         ASTVec params = { 0 };
 
-        ast_step(token);
+        sl_vec_forward(*token);
 
-        while((*token)->type != TOKEN_RPAREN) {
+        while(token->data[0]->type != TOKEN_RPAREN) {
             sl_vec_push(params, ast_walk(token));
         }
 
-        ast_step(token);
+        sl_vec_forward(*token);
 
         return ast_create_call_expression(name, params);
     }
@@ -50,10 +51,6 @@ ASTNode* ast_walk(Token** token) {
     return NULL;
 }
 
-void ast_step(Token** token) {
-    (*token) = (*token)->next;
-}
-
 void ast_print(ASTNode* node, int indent) {
     switch(node->type) {
     case AST_PROGRAM:
diff --git a/src/binary.c b/src/binary.c
index a34c322..5969fc7 100644
--- a/src/binary.c
+++ b/src/binary.c
@@ -17,13 +17,9 @@ int binary_produce(const char* code, Args args) {
     assert(tcc_add_file(state, "std/std.c")                     == 0);
     assert(tcc_compile_string(state, code)                      == 0);
 
-    int ret = -1;
     if(args.build) {
-        ret = tcc_output_file(state, args.output);
-        printf("Binary produced: %s\n", args.output);
+        return tcc_output_file(state, args.output);
     } else {
-        ret = tcc_run(state, 0, NULL);
+        return tcc_run(state, 0, NULL);
     }
-
-    return ret;
 }
\ No newline at end of file
diff --git a/src/main.c b/src/main.c
index 3cb9dda..d2efc98 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,7 +14,8 @@ int main(int argc, char* argv[]) {
     sl_read_file(args.input, &buffer);
 
     printf("Tokens:\n");
-    Token* tokens = tokenize(sl_c_str(buffer));
+    TokenVec tokens = { 0 };
+    tokenize(sl_c_str(buffer), &tokens);
     tokens_print(tokens);
     printf("\n");
 
diff --git a/src/tokenizer.c b/src/tokenizer.c
index 239f19b..4fdaebf 100644
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@@ -14,34 +14,33 @@ const char* TokenTypeText[] = {
     "string"
 };
 
-Token* tokenize(char* input) {
+void tokenize(char* input, TokenVec* tokens) {
     regex_t name = regex_create("[a-z_]", REG_ICASE);
     regex_t number = regex_create("[0-9]", 0);
     regex_t string = regex_create("\"", 0);
     regex_t whitespace = regex_create("[ \n]", 0);
-    Token* root = NULL;
     sl_string collected = {0};
 
     char c = *input;
     while (c != '\0') {
         if (match_char(name, c)) {
             collected = collect_until_no_match(name, &input);
-            root = token_create(sl_c_str(collected), TOKEN_NAME, root);
+            sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NAME));
         } else if (match_char(number, c)) {
             collected = collect_until_no_match(number, &input);
-            root = token_create(sl_c_str(collected), TOKEN_NUMBER, root);
+            sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NUMBER));
         } else if (c == '(') {
-            root = token_create("(", TOKEN_LPAREN, root);
+            sl_vec_push(*tokens, token_create("(", TOKEN_LPAREN));
             input++;
         } else if (c == ')') {
-            root = token_create(")", TOKEN_RPAREN, root);
+            sl_vec_push(*tokens, token_create(")", TOKEN_RPAREN));
             input++;
         } else if (match_char(whitespace, c)) {
             input++;
         } else if (match_char(string, c)) {
             regex_step(&input, &c);
             collected = collect_until_match_escapable(string, &input);
-            root = token_create(sl_c_str(collected), TOKEN_STRING, root);
+            sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_STRING));
             input++;
         } else {
             printf("%c: no match\n", c);
@@ -55,30 +54,18 @@ Token* tokenize(char* input) {
     regfree(&number);
     regfree(&string);
     regfree(&whitespace);
-    return root;
 }
 
-Token* token_create(char* value, TokenType type, Token* root) {
-    Token* new_token = calloc(1, sizeof(Token));
+Token* token_create(char* value, TokenType type) {
+    Token* new_token = malloc(sizeof(Token));
     new_token->value = value;
     new_token->type = type;
 
-    return token_append(root, new_token);;
+    return new_token;
 }
 
-Token* token_append(Token* root, Token* new_token) {
-    if (!root) return new_token;
-    Token* current = root;
-    while (current->next) {
-        current = current->next;
-    }
-    current->next = new_token;
-    return root;
-}
-
-void tokens_print(Token* root) {
-    while(root != NULL) {
-        printf("%s: %s\n", TokenTypeText[root->type], root->value);
-        root = root->next;
+void tokens_print(TokenVec tokens) {
+    for(sl_vec_it(token, tokens)) {
+        printf("%s: %s\n", TokenTypeText[(*token)->type], (*token)->value);
     }
 }
\ No newline at end of file