optimize tokenizer by moving data ptr
This commit is contained in:
parent
ce375024f3
commit
bee9e6fc9d
10 changed files with 46 additions and 63 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
*.o
|
*.o
|
||||||
compiler
|
compiler
|
||||||
example
|
example
|
||||||
|
.vscode
|
4
Makefile
4
Makefile
|
@ -1,8 +1,8 @@
|
||||||
BINARY=compiler
|
BINARY=compiler
|
||||||
|
|
||||||
CC=gcc
|
CC=gcc
|
||||||
CFLAGS=-O3 -Iinclude -Itcc
|
CFLAGS=-Iinclude -Itcc -Ofast -flto
|
||||||
LDFLAGS=-Ltcc -ltcc
|
LDFLAGS=-Ltcc -ltcc -Ofast
|
||||||
|
|
||||||
CFILES=$(shell find -L src -type f -name '*.c')
|
CFILES=$(shell find -L src -type f -name '*.c')
|
||||||
OBJ=$(CFILES:.c=.o)
|
OBJ=$(CFILES:.c=.o)
|
||||||
|
|
|
@ -4,4 +4,5 @@
|
||||||
|
|
||||||
(printf "5 + (100 / 5) = %d\n"
|
(printf "5 + (100 / 5) = %d\n"
|
||||||
(add 5
|
(add 5
|
||||||
(divide 100 5)))
|
(divide 100 5)))
|
||||||
|
|
||||||
|
|
|
@ -24,10 +24,9 @@ typedef struct ASTNode {
|
||||||
ASTVec params;
|
ASTVec params;
|
||||||
} ASTNode;
|
} ASTNode;
|
||||||
|
|
||||||
ASTNode* ast_parse(Token** token);
|
ASTNode* ast_parse(TokenVec* token);
|
||||||
ASTNode* ast_walk(Token** token);
|
ASTNode* ast_walk(TokenVec* token);
|
||||||
void ast_print(ASTNode* node, int indent);
|
void ast_print(ASTNode* node, int indent);
|
||||||
void ast_step(Token** token);
|
|
||||||
ASTNode* ast_create_empty(ASTType type);
|
ASTNode* ast_create_empty(ASTType type);
|
||||||
ASTNode* ast_create_program(ASTVec body);
|
ASTNode* ast_create_program(ASTVec body);
|
||||||
ASTNode* ast_create_call_expression(const char* name, ASTVec params);
|
ASTNode* ast_create_call_expression(const char* name, ASTVec params);
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 1de5b35258cffda13d4bcf505e83c976e448e750
|
Subproject commit b0b09f6fd9efd5367dbac19629caf0d027e657e2
|
|
@ -14,13 +14,14 @@ typedef enum TokenType {
|
||||||
typedef struct Token {
|
typedef struct Token {
|
||||||
char* value;
|
char* value;
|
||||||
TokenType type;
|
TokenType type;
|
||||||
struct Token* next;
|
|
||||||
} Token;
|
} Token;
|
||||||
|
|
||||||
Token* tokenize(char* input);
|
typedef sl_vec(Token*) TokenVec;
|
||||||
|
|
||||||
Token* token_create(char* value, TokenType type, Token* root);
|
void tokenize(char* input, TokenVec* tokens);
|
||||||
|
|
||||||
|
Token* token_create(char* value, TokenType type);
|
||||||
Token* token_append(Token* root, Token* new_token);
|
Token* token_append(Token* root, Token* new_token);
|
||||||
void tokens_print(Token* root);
|
void tokens_print(TokenVec tokens);
|
||||||
|
|
||||||
#endif
|
#endif
|
35
src/ast.c
35
src/ast.c
|
@ -8,41 +8,42 @@ const char* ASTTypeText[] = {
|
||||||
"StringLiteral"
|
"StringLiteral"
|
||||||
};
|
};
|
||||||
|
|
||||||
ASTNode* ast_parse(Token** token) {
|
ASTNode* ast_parse(TokenVec* token) {
|
||||||
ASTVec body = { 0 };
|
ASTVec body = { 0 };
|
||||||
|
|
||||||
while((*token) != NULL) {
|
Token** end = sl_vec_end(*token);
|
||||||
|
while(token->data != end) {
|
||||||
sl_vec_push(body, ast_walk(token));
|
sl_vec_push(body, ast_walk(token));
|
||||||
}
|
}
|
||||||
|
|
||||||
return ast_create_program(body);
|
return ast_create_program(body);
|
||||||
}
|
}
|
||||||
|
|
||||||
ASTNode* ast_walk(Token** token) {
|
ASTNode* ast_walk(TokenVec* token) {
|
||||||
if((*token)->type == TOKEN_NUMBER) {
|
if(token->data[0]->type == TOKEN_NUMBER) {
|
||||||
ASTNode* number = ast_create_number_literal((*token)->value);
|
ASTNode* number = ast_create_number_literal(token->data[0]->value);
|
||||||
ast_step(token);
|
sl_vec_forward(*token);
|
||||||
return number;
|
return number;
|
||||||
}
|
}
|
||||||
|
|
||||||
if((*token)->type == TOKEN_STRING) {
|
if(token->data[0]->type == TOKEN_STRING) {
|
||||||
ASTNode* string = ast_create_string_literal((*token)->value);
|
ASTNode* string = ast_create_string_literal(token->data[0]->value);
|
||||||
ast_step(token);
|
sl_vec_forward(*token);
|
||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
|
|
||||||
if((*token)->type == TOKEN_LPAREN) { // Call expression
|
if(token->data[0]->type == TOKEN_LPAREN) { // Call expression
|
||||||
ast_step(token);
|
sl_vec_forward(*token);
|
||||||
const char* name = (*token)->value;
|
const char* name = token->data[0]->value;
|
||||||
ASTVec params = { 0 };
|
ASTVec params = { 0 };
|
||||||
|
|
||||||
ast_step(token);
|
sl_vec_forward(*token);
|
||||||
|
|
||||||
while((*token)->type != TOKEN_RPAREN) {
|
while(token->data[0]->type != TOKEN_RPAREN) {
|
||||||
sl_vec_push(params, ast_walk(token));
|
sl_vec_push(params, ast_walk(token));
|
||||||
}
|
}
|
||||||
|
|
||||||
ast_step(token);
|
sl_vec_forward(*token);
|
||||||
|
|
||||||
return ast_create_call_expression(name, params);
|
return ast_create_call_expression(name, params);
|
||||||
}
|
}
|
||||||
|
@ -50,10 +51,6 @@ ASTNode* ast_walk(Token** token) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_step(Token** token) {
|
|
||||||
(*token) = (*token)->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ast_print(ASTNode* node, int indent) {
|
void ast_print(ASTNode* node, int indent) {
|
||||||
switch(node->type) {
|
switch(node->type) {
|
||||||
case AST_PROGRAM:
|
case AST_PROGRAM:
|
||||||
|
|
|
@ -17,13 +17,9 @@ int binary_produce(const char* code, Args args) {
|
||||||
assert(tcc_add_file(state, "std/std.c") == 0);
|
assert(tcc_add_file(state, "std/std.c") == 0);
|
||||||
assert(tcc_compile_string(state, code) == 0);
|
assert(tcc_compile_string(state, code) == 0);
|
||||||
|
|
||||||
int ret = -1;
|
|
||||||
if(args.build) {
|
if(args.build) {
|
||||||
ret = tcc_output_file(state, args.output);
|
return tcc_output_file(state, args.output);
|
||||||
printf("Binary produced: %s\n", args.output);
|
|
||||||
} else {
|
} else {
|
||||||
ret = tcc_run(state, 0, NULL);
|
return tcc_run(state, 0, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
|
@ -14,7 +14,8 @@ int main(int argc, char* argv[]) {
|
||||||
sl_read_file(args.input, &buffer);
|
sl_read_file(args.input, &buffer);
|
||||||
|
|
||||||
printf("Tokens:\n");
|
printf("Tokens:\n");
|
||||||
Token* tokens = tokenize(sl_c_str(buffer));
|
TokenVec tokens = { 0 };
|
||||||
|
tokenize(sl_c_str(buffer), &tokens);
|
||||||
tokens_print(tokens);
|
tokens_print(tokens);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
|
|
|
@ -14,34 +14,33 @@ const char* TokenTypeText[] = {
|
||||||
"string"
|
"string"
|
||||||
};
|
};
|
||||||
|
|
||||||
Token* tokenize(char* input) {
|
void tokenize(char* input, TokenVec* tokens) {
|
||||||
regex_t name = regex_create("[a-z_]", REG_ICASE);
|
regex_t name = regex_create("[a-z_]", REG_ICASE);
|
||||||
regex_t number = regex_create("[0-9]", 0);
|
regex_t number = regex_create("[0-9]", 0);
|
||||||
regex_t string = regex_create("\"", 0);
|
regex_t string = regex_create("\"", 0);
|
||||||
regex_t whitespace = regex_create("[ \n]", 0);
|
regex_t whitespace = regex_create("[ \n]", 0);
|
||||||
Token* root = NULL;
|
|
||||||
sl_string collected = {0};
|
sl_string collected = {0};
|
||||||
|
|
||||||
char c = *input;
|
char c = *input;
|
||||||
while (c != '\0') {
|
while (c != '\0') {
|
||||||
if (match_char(name, c)) {
|
if (match_char(name, c)) {
|
||||||
collected = collect_until_no_match(name, &input);
|
collected = collect_until_no_match(name, &input);
|
||||||
root = token_create(sl_c_str(collected), TOKEN_NAME, root);
|
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NAME));
|
||||||
} else if (match_char(number, c)) {
|
} else if (match_char(number, c)) {
|
||||||
collected = collect_until_no_match(number, &input);
|
collected = collect_until_no_match(number, &input);
|
||||||
root = token_create(sl_c_str(collected), TOKEN_NUMBER, root);
|
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_NUMBER));
|
||||||
} else if (c == '(') {
|
} else if (c == '(') {
|
||||||
root = token_create("(", TOKEN_LPAREN, root);
|
sl_vec_push(*tokens, token_create("(", TOKEN_LPAREN));
|
||||||
input++;
|
input++;
|
||||||
} else if (c == ')') {
|
} else if (c == ')') {
|
||||||
root = token_create(")", TOKEN_RPAREN, root);
|
sl_vec_push(*tokens, token_create(")", TOKEN_RPAREN));
|
||||||
input++;
|
input++;
|
||||||
} else if (match_char(whitespace, c)) {
|
} else if (match_char(whitespace, c)) {
|
||||||
input++;
|
input++;
|
||||||
} else if (match_char(string, c)) {
|
} else if (match_char(string, c)) {
|
||||||
regex_step(&input, &c);
|
regex_step(&input, &c);
|
||||||
collected = collect_until_match_escapable(string, &input);
|
collected = collect_until_match_escapable(string, &input);
|
||||||
root = token_create(sl_c_str(collected), TOKEN_STRING, root);
|
sl_vec_push(*tokens, token_create(sl_c_str(collected), TOKEN_STRING));
|
||||||
input++;
|
input++;
|
||||||
} else {
|
} else {
|
||||||
printf("%c: no match\n", c);
|
printf("%c: no match\n", c);
|
||||||
|
@ -55,30 +54,18 @@ Token* tokenize(char* input) {
|
||||||
regfree(&number);
|
regfree(&number);
|
||||||
regfree(&string);
|
regfree(&string);
|
||||||
regfree(&whitespace);
|
regfree(&whitespace);
|
||||||
return root;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Token* token_create(char* value, TokenType type, Token* root) {
|
Token* token_create(char* value, TokenType type) {
|
||||||
Token* new_token = calloc(1, sizeof(Token));
|
Token* new_token = malloc(sizeof(Token));
|
||||||
new_token->value = value;
|
new_token->value = value;
|
||||||
new_token->type = type;
|
new_token->type = type;
|
||||||
|
|
||||||
return token_append(root, new_token);;
|
return new_token;
|
||||||
}
|
}
|
||||||
|
|
||||||
Token* token_append(Token* root, Token* new_token) {
|
void tokens_print(TokenVec tokens) {
|
||||||
if (!root) return new_token;
|
for(sl_vec_it(token, tokens)) {
|
||||||
Token* current = root;
|
printf("%s: %s\n", TokenTypeText[(*token)->type], (*token)->value);
|
||||||
while (current->next) {
|
|
||||||
current = current->next;
|
|
||||||
}
|
|
||||||
current->next = new_token;
|
|
||||||
return root;
|
|
||||||
}
|
|
||||||
|
|
||||||
void tokens_print(Token* root) {
|
|
||||||
while(root != NULL) {
|
|
||||||
printf("%s: %s\n", TokenTypeText[root->type], root->value);
|
|
||||||
root = root->next;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Add table
Reference in a new issue