optimize tokenizer by moving data ptr
This commit is contained in:
parent
ce375024f3
commit
bee9e6fc9d
10 changed files with 46 additions and 63 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
*.o
|
||||
compiler
|
||||
example
|
||||
example
|
||||
.vscode
|
4
Makefile
4
Makefile
|
@ -1,8 +1,8 @@
|
|||
BINARY=compiler
|
||||
|
||||
CC=gcc
|
||||
CFLAGS=-O3 -Iinclude -Itcc
|
||||
LDFLAGS=-Ltcc -ltcc
|
||||
CFLAGS=-Iinclude -Itcc -Ofast -flto
|
||||
LDFLAGS=-Ltcc -ltcc -Ofast
|
||||
|
||||
CFILES=$(shell find -L src -type f -name '*.c')
|
||||
OBJ=$(CFILES:.c=.o)
|
||||
|
|
|
@ -4,4 +4,5 @@
|
|||
|
||||
(printf "5 + (100 / 5) = %d\n"
|
||||
(add 5
|
||||
(divide 100 5)))
|
||||
(divide 100 5)))
|
||||
|
||||
|
|
|
@ -24,10 +24,9 @@ typedef struct ASTNode {
|
|||
ASTVec params;
|
||||
} ASTNode;
|
||||
|
||||
ASTNode* ast_parse(Token** token);
|
||||
ASTNode* ast_walk(Token** token);
|
||||
ASTNode* ast_parse(TokenVec* token);
|
||||
ASTNode* ast_walk(TokenVec* token);
|
||||
void ast_print(ASTNode* node, int indent);
|
||||
void ast_step(Token** token);
|
||||
ASTNode* ast_create_empty(ASTType type);
|
||||
ASTNode* ast_create_program(ASTVec body);
|
||||
ASTNode* ast_create_call_expression(const char* name, ASTVec params);
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 1de5b35258cffda13d4bcf505e83c976e448e750
|
||||
Subproject commit b0b09f6fd9efd5367dbac19629caf0d027e657e2
|
|
@ -14,13 +14,14 @@ typedef enum TokenType {
|
|||
typedef struct Token {
|
||||
char* value;
|
||||
TokenType type;
|
||||
struct Token* next;
|
||||
} Token;
|
||||
|
||||
Token* tokenize(char* input);
|
||||
typedef sl_vec(Token*) TokenVec;
|
||||
|
||||
Token* token_create(char* value, TokenType type, Token* root);
|
||||
void tokenize(char* input, TokenVec* tokens);
|
||||
|
||||
Token* token_create(char* value, TokenType type);
|
||||
Token* token_append(Token* root, Token* new_token);
|
||||
void tokens_print(Token* root);
|
||||
void tokens_print(TokenVec tokens);
|
||||
|
||||
#endif
|
35
src/ast.c
35
src/ast.c
|
@ -8,41 +8,42 @@ const char* ASTTypeText[] = {
|
|||
"StringLiteral"
|
||||
};
|
||||
|
||||
ASTNode* ast_parse(Token** token) {
|
||||
ASTNode* ast_parse(TokenVec* token) {
|
||||
ASTVec body = { 0 };
|
||||
|
||||
while((*token) != NULL) {
|
||||
Token** end = sl_vec_end(*token);
|
||||
while(token->data != end) {
|
||||
sl_vec_push(body, ast_walk(token));
|
||||
}
|
||||
|
||||
return ast_create_program(body);
|
||||
}
|
||||
|
||||
ASTNode* ast_walk(Token** token) {
|
||||
if((*token)->type == TOKEN_NUMBER) {
|
||||
ASTNode* number = ast_create_number_literal((*token)->value);
|
||||
ast_step(token);
|
||||
ASTNode* ast_walk(TokenVec* token) {
|
||||
if(token->data[0]->type == TOKEN_NUMBER) {
|
||||
ASTNode* number = ast_create_number_literal(token->data[0]->value);
|
||||
sl_vec_forward(*token);
|
||||
return number;
|
||||
}
|
||||
|
||||
if((*token)->type == TOKEN_STRING) {
|
||||
ASTNode* string = ast_create_string_literal((*token)->value);
|
||||
ast_step(token);
|
||||
if(token->data[0]->type == TOKEN_STRING) {
|
||||
ASTNode* string = ast_create_string_literal(token->data[0]->value);
|
||||
sl_vec_forward(*token);
|
||||
return string;
|
||||
}
|
||||
|
||||
if((*token)->type == TOKEN_LPAREN) { // Call expression
|
||||
ast_step(token);
|
||||
const char* name = (*token)->value;
|
||||
if(token->data[0]->type == TOKEN_LPAREN) { // Call expression
|
||||
sl_vec_forward(*token);
|
||||
const char* name = token->data[0]->value;
|
||||
ASTVec params = { 0 };
|
||||
|
||||
ast_step(token);
|
||||
sl_vec_forward(*token);
|
||||
|
||||
while((*token)->type != TOKEN_RPAREN) {
|
||||
while(token->data[0]->type != TOKEN_RPAREN) {
|
||||
sl_vec_push(params, ast_walk(token));
|
||||
}
|
||||
|
||||
ast_step(token);
|
||||
sl_vec_forward(*token);
|
||||
|
||||
return ast_create_call_expression(name, params);
|
||||
}
|
||||
|
@ -50,10 +51,6 @@ ASTNode* ast_walk(Token** token) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void ast_step(Token** token) {
|
||||
(*token) = (*token)->next;
|
||||
}
|
||||
|
||||
void ast_print(ASTNode* node, int indent) {
|
||||
switch(node->type) {
|
||||
case AST_PROGRAM:
|
||||
|
|
|
@ -17,13 +17,9 @@ int binary_produce(const char* code, Args args) {
|
|||
assert(tcc_add_file(state, "std/std.c") == 0);
|
||||
assert(tcc_compile_string(state, code) == 0);
|
||||
|
||||
int ret = -1;
|
||||
if(args.build) {
|
||||
ret = tcc_output_file(state, args.output);
|
||||
printf("Binary produced: %s\n", args.output);
|
||||
return tcc_output_file(state, args.output);
|
||||
} else {
|
||||
ret = tcc_run(state, 0, NULL);
|
||||
return tcc_run(state, 0, NULL);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -14,7 +14,8 @@ int main(int argc, char* argv[]) {
|
|||
sl_read_file(args.input, &buffer);
|
||||
|
||||
printf("Tokens:\n");
|
||||
Token* tokens = tokenize(sl_c_str(buffer));
|
||||
TokenVec tokens = { 0 };
|
||||
tokenize(sl_c_str(buffer), &tokens);
|
||||
tokens_print(tokens);
|
||||
printf("\n");
|
||||
|
||||
|
|
|
@ -14,34 +14,33 @@ const char* TokenTypeText[] = {
|
|||
"string"
|
||||
};
|
||||
|
||||
Token* tokenize(char* input) {
|
||||
void tokenize(char* input, TokenVec* tokens) {
|
||||
regex_t name = regex_create("[a-z_]", REG_ICASE);
|
||||
regex_t number = regex_create("[0-9]", 0);
|
||||
regex_t string = regex_create("\"", 0);
|
||||
regex_t whitespace = regex_create("[ \n]", 0);
|
||||
Token* root = NULL;
|
||||
sl_string collected = {0};
|
||||
|
||||
char c = *input;
|
||||
while (c != '\0') {
|
||||
if (match_char(name, c)) {
|
||||
collected = collect_until_no_match(name, &input);
|
||||
root = token_create(sl_c_str(collected), TOKEN_NAME, root);
|
||||
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NAME));
|
||||
} else if (match_char(number, c)) {
|
||||
collected = collect_until_no_match(number, &input);
|
||||
root = token_create(sl_c_str(collected), TOKEN_NUMBER, root);
|
||||
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NUMBER));
|
||||
} else if (c == '(') {
|
||||
root = token_create("(", TOKEN_LPAREN, root);
|
||||
sl_vec_push(*tokens, token_create("(", TOKEN_LPAREN));
|
||||
input++;
|
||||
} else if (c == ')') {
|
||||
root = token_create(")", TOKEN_RPAREN, root);
|
||||
sl_vec_push(*tokens, token_create(")", TOKEN_RPAREN));
|
||||
input++;
|
||||
} else if (match_char(whitespace, c)) {
|
||||
input++;
|
||||
} else if (match_char(string, c)) {
|
||||
regex_step(&input, &c);
|
||||
collected = collect_until_match_escapable(string, &input);
|
||||
root = token_create(sl_c_str(collected), TOKEN_STRING, root);
|
||||
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_STRING));
|
||||
input++;
|
||||
} else {
|
||||
printf("%c: no match\n", c);
|
||||
|
@ -55,30 +54,18 @@ Token* tokenize(char* input) {
|
|||
regfree(&number);
|
||||
regfree(&string);
|
||||
regfree(&whitespace);
|
||||
return root;
|
||||
}
|
||||
|
||||
Token* token_create(char* value, TokenType type, Token* root) {
|
||||
Token* new_token = calloc(1, sizeof(Token));
|
||||
Token* token_create(char* value, TokenType type) {
|
||||
Token* new_token = malloc(sizeof(Token));
|
||||
new_token->value = value;
|
||||
new_token->type = type;
|
||||
|
||||
return token_append(root, new_token);;
|
||||
return new_token;
|
||||
}
|
||||
|
||||
Token* token_append(Token* root, Token* new_token) {
|
||||
if (!root) return new_token;
|
||||
Token* current = root;
|
||||
while (current->next) {
|
||||
current = current->next;
|
||||
}
|
||||
current->next = new_token;
|
||||
return root;
|
||||
}
|
||||
|
||||
void tokens_print(Token* root) {
|
||||
while(root != NULL) {
|
||||
printf("%s: %s\n", TokenTypeText[root->type], root->value);
|
||||
root = root->next;
|
||||
void tokens_print(TokenVec tokens) {
|
||||
for(sl_vec_it(token, tokens)) {
|
||||
printf("%s: %s\n", TokenTypeText[(*token)->type], (*token)->value);
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue