optimize tokenizer by moving data ptr

This commit is contained in:
sam 2024-07-22 15:46:25 +12:00
parent ce375024f3
commit bee9e6fc9d
10 changed files with 46 additions and 63 deletions

3
.gitignore vendored
View file

@ -1,3 +1,4 @@
*.o
compiler
example
example
.vscode

View file

@ -1,8 +1,8 @@
BINARY=compiler
CC=gcc
CFLAGS=-O3 -Iinclude -Itcc
LDFLAGS=-Ltcc -ltcc
CFLAGS=-Iinclude -Itcc -Ofast -flto
LDFLAGS=-Ltcc -ltcc -Ofast
CFILES=$(shell find -L src -type f -name '*.c')
OBJ=$(CFILES:.c=.o)

View file

@ -4,4 +4,5 @@
(printf "5 + (100 / 5) = %d\n"
(add 5
(divide 100 5)))
(divide 100 5)))

View file

@ -24,10 +24,9 @@ typedef struct ASTNode {
ASTVec params;
} ASTNode;
ASTNode* ast_parse(Token** token);
ASTNode* ast_walk(Token** token);
ASTNode* ast_parse(TokenVec* token);
ASTNode* ast_walk(TokenVec* token);
void ast_print(ASTNode* node, int indent);
void ast_step(Token** token);
ASTNode* ast_create_empty(ASTType type);
ASTNode* ast_create_program(ASTVec body);
ASTNode* ast_create_call_expression(const char* name, ASTVec params);

@ -1 +1 @@
Subproject commit 1de5b35258cffda13d4bcf505e83c976e448e750
Subproject commit b0b09f6fd9efd5367dbac19629caf0d027e657e2

View file

@ -14,13 +14,14 @@ typedef enum TokenType {
typedef struct Token {
char* value;
TokenType type;
struct Token* next;
} Token;
Token* tokenize(char* input);
typedef sl_vec(Token*) TokenVec;
Token* token_create(char* value, TokenType type, Token* root);
void tokenize(char* input, TokenVec* tokens);
Token* token_create(char* value, TokenType type);
Token* token_append(Token* root, Token* new_token);
void tokens_print(Token* root);
void tokens_print(TokenVec tokens);
#endif

View file

@ -8,41 +8,42 @@ const char* ASTTypeText[] = {
"StringLiteral"
};
ASTNode* ast_parse(Token** token) {
ASTNode* ast_parse(TokenVec* token) {
ASTVec body = { 0 };
while((*token) != NULL) {
Token** end = sl_vec_end(*token);
while(token->data != end) {
sl_vec_push(body, ast_walk(token));
}
return ast_create_program(body);
}
ASTNode* ast_walk(Token** token) {
if((*token)->type == TOKEN_NUMBER) {
ASTNode* number = ast_create_number_literal((*token)->value);
ast_step(token);
ASTNode* ast_walk(TokenVec* token) {
if(token->data[0]->type == TOKEN_NUMBER) {
ASTNode* number = ast_create_number_literal(token->data[0]->value);
sl_vec_forward(*token);
return number;
}
if((*token)->type == TOKEN_STRING) {
ASTNode* string = ast_create_string_literal((*token)->value);
ast_step(token);
if(token->data[0]->type == TOKEN_STRING) {
ASTNode* string = ast_create_string_literal(token->data[0]->value);
sl_vec_forward(*token);
return string;
}
if((*token)->type == TOKEN_LPAREN) { // Call expression
ast_step(token);
const char* name = (*token)->value;
if(token->data[0]->type == TOKEN_LPAREN) { // Call expression
sl_vec_forward(*token);
const char* name = token->data[0]->value;
ASTVec params = { 0 };
ast_step(token);
sl_vec_forward(*token);
while((*token)->type != TOKEN_RPAREN) {
while(token->data[0]->type != TOKEN_RPAREN) {
sl_vec_push(params, ast_walk(token));
}
ast_step(token);
sl_vec_forward(*token);
return ast_create_call_expression(name, params);
}
@ -50,10 +51,6 @@ ASTNode* ast_walk(Token** token) {
return NULL;
}
void ast_step(Token** token) {
(*token) = (*token)->next;
}
void ast_print(ASTNode* node, int indent) {
switch(node->type) {
case AST_PROGRAM:

View file

@ -17,13 +17,9 @@ int binary_produce(const char* code, Args args) {
assert(tcc_add_file(state, "std/std.c") == 0);
assert(tcc_compile_string(state, code) == 0);
int ret = -1;
if(args.build) {
ret = tcc_output_file(state, args.output);
printf("Binary produced: %s\n", args.output);
return tcc_output_file(state, args.output);
} else {
ret = tcc_run(state, 0, NULL);
return tcc_run(state, 0, NULL);
}
return ret;
}

View file

@ -14,7 +14,8 @@ int main(int argc, char* argv[]) {
sl_read_file(args.input, &buffer);
printf("Tokens:\n");
Token* tokens = tokenize(sl_c_str(buffer));
TokenVec tokens = { 0 };
tokenize(sl_c_str(buffer), &tokens);
tokens_print(tokens);
printf("\n");

View file

@ -14,34 +14,33 @@ const char* TokenTypeText[] = {
"string"
};
Token* tokenize(char* input) {
void tokenize(char* input, TokenVec* tokens) {
regex_t name = regex_create("[a-z_]", REG_ICASE);
regex_t number = regex_create("[0-9]", 0);
regex_t string = regex_create("\"", 0);
regex_t whitespace = regex_create("[ \n]", 0);
Token* root = NULL;
sl_string collected = {0};
char c = *input;
while (c != '\0') {
if (match_char(name, c)) {
collected = collect_until_no_match(name, &input);
root = token_create(sl_c_str(collected), TOKEN_NAME, root);
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NAME));
} else if (match_char(number, c)) {
collected = collect_until_no_match(number, &input);
root = token_create(sl_c_str(collected), TOKEN_NUMBER, root);
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NUMBER));
} else if (c == '(') {
root = token_create("(", TOKEN_LPAREN, root);
sl_vec_push(*tokens, token_create("(", TOKEN_LPAREN));
input++;
} else if (c == ')') {
root = token_create(")", TOKEN_RPAREN, root);
sl_vec_push(*tokens, token_create(")", TOKEN_RPAREN));
input++;
} else if (match_char(whitespace, c)) {
input++;
} else if (match_char(string, c)) {
regex_step(&input, &c);
collected = collect_until_match_escapable(string, &input);
root = token_create(sl_c_str(collected), TOKEN_STRING, root);
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_STRING));
input++;
} else {
printf("%c: no match\n", c);
@ -55,30 +54,18 @@ Token* tokenize(char* input) {
regfree(&number);
regfree(&string);
regfree(&whitespace);
return root;
}
Token* token_create(char* value, TokenType type, Token* root) {
Token* new_token = calloc(1, sizeof(Token));
Token* token_create(char* value, TokenType type) {
Token* new_token = malloc(sizeof(Token));
new_token->value = value;
new_token->type = type;
return token_append(root, new_token);;
return new_token;
}
Token* token_append(Token* root, Token* new_token) {
if (!root) return new_token;
Token* current = root;
while (current->next) {
current = current->next;
}
current->next = new_token;
return root;
}
void tokens_print(Token* root) {
while(root != NULL) {
printf("%s: %s\n", TokenTypeText[root->type], root->value);
root = root->next;
void tokens_print(TokenVec tokens) {
for(sl_vec_it(token, tokens)) {
printf("%s: %s\n", TokenTypeText[(*token)->type], (*token)->value);
}
}