From 5e02ecd80de1e940b9189ef81fbc6a64b3b3492a Mon Sep 17 00:00:00 2001 From: sam Date: Sat, 16 Nov 2024 21:02:09 +1300 Subject: [PATCH] clean up some stuff --- a.out | Bin 0 -> 70296 bytes src/codegen.c | 104 +++++++++++++++++++++++++++++++------------------ src/codegen.h | 14 ++++--- src/compiler.h | 2 + src/main.c | 4 +- src/node.h | 24 +++++++++++- src/parser.c | 85 ++++++++++++++++++++++++++++++++-------- test.s | 41 +++++++++++++++++++ test.ssa | 14 +++++++ test.txt | 9 ++++- 10 files changed, 231 insertions(+), 66 deletions(-) create mode 100755 a.out create mode 100644 test.s create mode 100644 test.ssa diff --git a/a.out b/a.out new file mode 100755 index 0000000000000000000000000000000000000000..08f1f099d3e9aeffb99403a2dd9858e5f38d1e8f GIT binary patch literal 70296 zcmeI0U2GiH701u+ntb4p*hvU!17Q=QilSz55(@&fu(snkuAD%PsVG7nWIuJY7M*~wUrPEgbGDTR1j@>FrrkLVz%eZoU@*t zU8ks0U#kB|v*+IbJ?GqefA`MU-rYa6KOT#b9SQmkNo{&Xprp0oMEjCYn)cFaO4G%( zg~Y9mPxFh~k2K8eD)yBl!KcLSjZ3ZBZV6=WAvmmevFEAQ%KSw;FiZYyf?pZy%wiBV zvlwJFJFVlmU#-PSRpMqfZdT*W&g;O;+Wcg0a*k-esNKhHa+nogIc|E}%Sm~MCW_i_ z-;2ns#eI$Abjj4?+@?dDhqb@uxJ>By&DuI#SuA9(Ear9=3#G}aovvHSj$PHAs+Lo` zf_ZP`{IbRahi;*++xEZk#wFRW{Njzrzk2J>Ul@M;x}W`8#w}xzeBvXA_^2#UpW&<+zFlh6a24oZYEi zSJ#Vsy2%+FIq2l_mHcR->g6jV2m6ZUQhvnE6!V;Lbi7>BE}X!%rK8R_&a0HDpMe-- z&RtxzefH4tAM^H1(6y>P<=5lXuQ}{GOz4K5(T#9IuQ$`vs?XPgoniXC>Z^2pO}F~% z>L26$cFjoFr$c&p%e))X!`IbEA>FQZ$#YWmI475VY{#!cdN}_xAw8UbE~MMFAh})) z>9S|-xHOq~b@ABmPAw+rIJ13l@z`@G=Mv6q);`Z|^F;HVFApqq+DTy*IGna?E@p_5?8ppHswt8`1B#e{%OLJ!|gy?zT^Q9(a!Z&vI<< zCj$!^KQ>;El|C1b{nlsyK|3vM;9Pg~rmsoI=xTqA^8*Xh9OI9vExmAx$Ijz@C`r@9 zcl6RVAGD_z7LPseua}MgO`eaH#X38d`gn33EqsumD`@rf@MFRuvEd4-Coh;DepKr} zWjo9HpIq0RdnsSAU(%fCUG}#ryQ}P!_+plHMV8VrM|YAwKW{$KfiibZWX^T@p*tX=|c|D(tPGv z`!ZWOl6;QvIm4RtPd#gskF-6S=zSo5=&e7$vQ{LCo8t3X_6c9((uM;BKmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;{vQa;tPEz6RvXhv zb*WvWcJ92OaDnNog2LytKC>7^Zqd3n)91B*LfbEnw14<%tt`@sc+jBT(8)wlUlAC3 zM%&NV`j6DMtKF>jLbYZz4b9Yo@o5^GQiqLdb^3IX)(_}FOuxS^Xs~(Bso!d~*8f(m zo8P}`!Th*)U;0$*w`#uHQoA2Hul4J-eL?G5)$IR!b6V&BpX{uz>vAKSH-Hg`RTQkm zKwsar$;)raOqRUKeb1otu0QcuuSOTDeW z4?}4vLFd)?U!%UFzE2zVm1OJJsIMYh_eOm++5T$O&nMf5je0xT_1LJdAzQCT{Q{b{ z3n|o)ptZEdHfn>uj*>0)4%*RD@1$E>>g&nw3!(JPa|7A^pi$pQt@cxbF05a_jsBas z+PshnzKT0q>UNbbWs$o=8@*p!Qi4)&T{rI&xgW-O2DX2a8Ym+nYUUi|>7-(WPoOt* zmiuX|@mT(+BI}rB|2X$+y4}wL|K@!X=*{cC^PK&*(@S<^WEjwKBE5i9OJ5ge2af$q5Tj%1&D$?&HL~!-@jRZgmoFG zJum&_{1fD9-sh+MJk5IG-$~OEZh_vsZ(rm*=i)gV;j-Th|-&g!`c2ZPlxn;j+ zeE;`r(d+bY62Mno^!~7@{x9kC*LUNeVE;7VuQu=ktv9ET?P5I{s$21gzAo7s_rnnD z9ieqM|JzvKZk3R|SN%6f@_4?!sp)=ljP>>L=IiL5W%7Jq^F+t_Fzd%u2G-v>0mJuhTG$#G^G|G%mpz2APQ zdF=Ub?|mP$F7t}|ui)iN`aL!NBw25@ANGhIZ@$0L&Dn~#D^;c)IdU8~Q*b{U-BwP=y1gy&pWxv@$n-Z5~?}u z={2}!v%O=vp{mEjbDaIdJqP=p{zLm5hn#)iIMj1+u#e?|L$^5n2ekFTzF~3(hHmWb z8FFsizyId`5oe^Qcc`BuoMIu9b*i3Q@tkqDP)ZLB4fggqyHk6*$r&6u=q$~8=O1vQ^C{-$Z`P67>GL@Mu6mvTZIhA^P2Y1Rg z^V`PU>KLVRM@pP8uwEr-xhr3(7RsfDion`iQVx+mU}I8t{?}aqZDu8ao(4W*V}(vlkYyhwCi_IdmlGz z--VW6e8b0OT#-i^U)KI%@9k!HsBXsYV^+V43*mTsKQo&&N43%TeXReJzqPFY6Iw89 z_b+=5iabW?5XZ~6sKq~{1+(+oue|=qH>t(j8|l|ow|M*hHEZ9yn#N~sWwGDnQ@$lF z-o8J~Zd7L*zxkQHliTIn)av$~V%Gi-knzjf+4war9p!>BwRroUF*~P@*1yHu{O{BF zUiGu@9qpc0W3{r_@9-Ipx9=#k6JcjAcSPzBM&j-H zY4&@O4WEw0KOBj-?=rKteijzpzq1-|&&s3%G8^@`y4jN&Z|i68ab~-%W3?7%^3#!c z`yMp=J#$oR>u2&D7uNDw|G!56SD29(NIs(eRyX?x_F2cL#oPDgiO(4SI>*|z$;JA2 z=&Z)i>-yRLvVLtJoZ%R8vHtDhEzuH0$5q zuR8ouHS^QySUA1UCDFR-N>H%)FGO^U4_n(iVb{Id|K&Ph`9Dy;`E2}lUCQ@~jXfNH d$Eu)*Ya+?5U-Ru;CjKYq2c93BquOZvzXAPahIs%0 literal 0 HcmV?d00001 diff --git a/src/codegen.c b/src/codegen.c index 1ba2247..e720c70 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -2,86 +2,114 @@ #include "helpers.h" #include -void codegen(Node node) { +void codegen(Node node, bool emit_type) { switch(node.type) { case NODE_FUNCTION_CALL: - codegen_function_call(node); + codegen_function_call(node.function_call); break; case NODE_FUNCTION_DECL: - codegen_function_decl(node); + codegen_function_decl(node.function_decl); break; case NODE_FUNCTION_IMPL: - codegen_function_impl(node); + codegen_function_impl(node.function_impl); break; case NODE_ARG_DECL: - codegen_arg_decl(node); + codegen_arg_decl(node.arg_decl, emit_type); + break; + case NODE_REFERENCE: + codegen_reference(node.reference, emit_type); break; case NODE_NUMBER: - codegen_number(node); + codegen_number(node.number, emit_type); + break; + case NODE_RETURN: + codegen_return(node.ret); break; default: syntax_error("unexpected node %c", node.type); } } -void codegen_function_call(Node node) { - printf("FunctionCall(%s", node.function_call.name); - if(Nodes_size(&node.function_call.args) > 0) - printf(", "); - for(size_t i = 0; i < Nodes_size(&node.function_call.args); i++) { - const Node* arg = Nodes_at(&node.function_call.args, i); - codegen(*arg); +void codegen_function_call(FunctionCall node) { + printf("call $%s(", node.name); + // printf("FunctionCall(%s", node.function_call.name); + /*if(Nodes_size(&node.function_call.args) > 0) + printf(", ");*/ + for(size_t i = 0; i < Nodes_size(&node.args); i++) { + const Node* arg = Nodes_at(&node.args, i); + codegen(*arg, true); - if(arg != Nodes_back(&node.function_call.args)) { + if(arg != Nodes_back(&node.args)) { printf(", "); } } printf(")\n"); } -void codegen_function_decl(Node node) { - printf("FunctionDecl(%s, %s, ", node.function_decl.type, node.function_decl.name); - for(size_t i = 0; i < Nodes_size(&node.function_decl.args); i++) { - const Node* arg = Nodes_at(&node.function_decl.args, i); - codegen(*arg); +void codegen_function_decl(FunctionDecl node) { + printf("FunctionDecl(%s, %s, ", node.type, node.name); + for(size_t i = 0; i < Nodes_size(&node.args); i++) { + const Node* arg = Nodes_at(&node.args, i); + codegen(*arg, true); - if(arg != Nodes_back(&node.function_decl.args)) { + if(arg != Nodes_back(&node.args)) { printf(", "); } } printf(")\n"); } -void codegen_function_impl(Node node) { - printf("FunctionImpl(%s, %s", node.function_impl.type, node.function_impl.name); - if(Nodes_size(&node.function_impl.args) > 0) - printf(", "); - for(size_t i = 0; i < Nodes_size(&node.function_impl.args); i++) { - const Node* arg = Nodes_at(&node.function_impl.args, i); - codegen(*arg); +void codegen_function_impl(FunctionImpl node) { + printf("export function %s $%s(", node.type, node.name); + for(size_t i = 0; i < Nodes_size(&node.args); i++) { + const Node* arg = Nodes_at(&node.args, i); + codegen(*arg, true); - if(arg != Nodes_back(&node.function_impl.args)) { + if(arg != Nodes_back(&node.args)) { printf(", "); } } - printf(") {\n\t"); + printf(") {\n@start\n\t"); - for(size_t i = 0; i < Nodes_size(&node.function_impl.body); i++) { - const Node* n = Nodes_at(&node.function_impl.body, i); - codegen(*n); + for(size_t i = 0; i < Nodes_size(&node.body); i++) { + const Node* n = Nodes_at(&node.body, i); + codegen(*n, true); - if(n != Nodes_back(&node.function_impl.body)) { + if(n != Nodes_back(&node.body)) { printf("\t"); } } - printf("}\n"); + printf("\t\n}\n"); } -void codegen_arg_decl(Node node) { - printf("ArgDecl(%s, %s)", node.arg_decl.type, node.arg_decl.name); +void codegen_arg_decl(ArgDecl node, bool emit_type) { + // printf("ArgDecl(%s, %s)", node.arg_decl.type, node.arg_decl.name); + if(emit_type) { + printf("%s ", node.type); + } + printf("%%%s", node.name); } -void codegen_number(Node node) { - printf("Number(%llu)", node.number.value); +void codegen_reference(Reference node, bool emit_type) { + // printf("Reference(%s)", node.reference.name); + if(emit_type) { + printf("%s ", node.type); + } + printf("%%%s", node.name); +} + +void codegen_number(Number node, bool emit_type) { + // printf("Number(%llu)", node.number.value); + if(emit_type) { + printf("w "); + } + printf("%d", node.value); +} + +void codegen_return(Return node) { + printf("ret "); + if(node.value != NULL) { + codegen(*node.value, false); + } } diff --git a/src/codegen.h b/src/codegen.h index d133082..9b2f2c7 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -3,11 +3,13 @@ #include "node.h" -void codegen(Node node); -void codegen_function_call(Node node); -void codegen_function_decl(Node node); -void codegen_arg_decl(Node node); -void codegen_function_impl(Node node); -void codegen_number(Node node); +void codegen(Node node, bool emit_type); +void codegen_function_call(FunctionCall node); +void codegen_function_decl(FunctionDecl node); +void codegen_arg_decl(ArgDecl node, bool emit_type); +void codegen_function_impl(FunctionImpl node); +void codegen_reference(Reference node, bool emit_type); +void codegen_number(Number node, bool emit_type); +void codegen_return(Return node); #endif diff --git a/src/compiler.h b/src/compiler.h index d385f7b..2f8c2a1 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -2,6 +2,7 @@ #define COMPILER_H #include "lexer.h" +#include "node.h" typedef enum { TOKEN_IDENTIFIER = 'I', @@ -32,6 +33,7 @@ typedef enum { typedef struct { Lexer* lexer; cmap_str types; + NodeMap symbols; } Compiler; bool next(Compiler* compiler, Token* token); diff --git a/src/main.c b/src/main.c index a016488..3180763 100644 --- a/src/main.c +++ b/src/main.c @@ -7,7 +7,6 @@ #include "token.h" #include #include -#include int main(int argc, char** argv) { assert(argc > 1); @@ -38,13 +37,14 @@ int main(int argc, char** argv) { { "i64", "l" }, { "void", "" }, }), + .symbols = { 0 }, }; Token token; while(next(&compiler, &token)) { // printf("%c: %s\n", token.type, token.value); Node node = parse_token(&compiler, token); - codegen(node); + codegen(node, true); } return 0; diff --git a/src/node.h b/src/node.h index d040b27..2a030ba 100644 --- a/src/node.h +++ b/src/node.h @@ -1,17 +1,22 @@ #ifndef NODE_H #define NODE_H +#include #include +#include #include typedef struct Node Node; forward_cvec(Nodes, struct Node); +forward_cmap(NodeMap, cstr, struct Node); typedef enum { NODE_FUNCTION_CALL = 'C', NODE_FUNCTION_DECL = 'D', NODE_FUNCTION_IMPL = 'I', NODE_ARG_DECL = 'A', + NODE_RETURN = 'R', + NODE_REFERENCE = 'r', NODE_NUMBER = 'N', } NodeType; @@ -39,9 +44,18 @@ typedef struct { } ArgDecl; typedef struct { - long long int value; + const char* type; + const char* name; +} Reference; + +typedef struct { + int value; } Number; +typedef struct { + Node* value; +} Return; + struct Nodes; typedef struct Node { NodeType type; @@ -50,7 +64,9 @@ typedef struct Node { FunctionDecl function_decl; FunctionImpl function_impl; ArgDecl arg_decl; + Reference reference; Number number; + Return ret; }; } Node; @@ -60,4 +76,10 @@ typedef struct Node { #define i_opt c_no_cmp #include +#define i_type NodeMap +#define i_is_forward +#define i_key_str +#define i_val Node +#include + #endif diff --git a/src/parser.c b/src/parser.c index df0eee6..5cb1582 100644 --- a/src/parser.c +++ b/src/parser.c @@ -43,23 +43,53 @@ Node parse_type(Compiler* self, const char* type) { } } -Node parse_identifier(Compiler* self, const char* name) { - Token token; - next(self, &token); - - switch(token.type) { - case TOKEN_OPAREN: - return parse_function_call(self, name); - break; +const char* resolve_type(Compiler* self, Node node) { + switch(node.type) { + case NODE_ARG_DECL: + return node.arg_decl.type; default: - syntax_error("unexpected token \"%s\" after identifier \"%s\"", token.value, name); + syntax_error("unexpected node fed to resolve_type"); + } +} + +Node parse_identifier(Compiler* self, const char* name) { + if(strcmp(name, "return") == 0) { + Token token; + next(self, &token); + Node* node = NULL; + + if(token.type != TOKEN_SEMICOLON) { + node = malloc(sizeof(Node)); + Node parsed = parse_token(self, token); + memcpy(node, &parsed, sizeof(Node)); + parse_semicolon(self); + } + return (Node){ .type = NODE_RETURN, .ret = { .value = node } }; + } + + const NodeMap_value* symbol = NodeMap_get(&self->symbols, name); + if(symbol == NULL) { + syntax_error("undefined symbol \"%s\"", name); + } + + switch(symbol->second.type) { + case NODE_FUNCTION_DECL: + case NODE_FUNCTION_IMPL: + return parse_function_call(self, name); + case NODE_ARG_DECL: + return (Node){ + .type = NODE_REFERENCE, + .reference = { .type = resolve_type(self, symbol->second), .name = name }, + }; + default: + syntax_error("unexpected symbol type"); } } Node parse_number(Compiler* self, const char* value) { return (Node){ .type = NODE_NUMBER, - .number = atoll(value), + .number = atoi(value), }; } @@ -85,15 +115,17 @@ Node parse_function(Compiler* self, const char* type, const char* name) { if(arg_type.type == TOKEN_CPAREN) { break; } + const cmap_str_value* type = cmap_str_get(&self->types, arg_type.value); Token arg_name; next(self, &arg_name); - Nodes_push(&args, - (Node){ - .type = NODE_ARG_DECL, - .arg_decl = { .type = arg_type.value, .name = arg_name.value }, - }); + Node arg_decl = (Node){ + .type = NODE_ARG_DECL, + .arg_decl = { .type = cstr_str(&type->second), .name = arg_name.value }, + }; + Nodes_push(&args, arg_decl); + NodeMap_insert(&self->symbols, cstr_from(arg_name.value), arg_decl); Token token; next(self, &token); @@ -109,19 +141,29 @@ Node parse_function(Compiler* self, const char* type, const char* name) { break; // only reached if didnt continue or error } + const cmap_str_value* real_type = cmap_str_get(&self->types, type); + const char* real_type_cstr = cstr_str(&real_type->second); + Token token; + Node function_node; if(next(self, &token)) { switch(token.type) { case TOKEN_SEMICOLON: - return parse_function_decl(self, type, name, args); + function_node = parse_function_decl(self, real_type_cstr, name, args); + break; case TOKEN_OBRACE: - return parse_function_impl(self, type, name, args); + function_node = parse_function_impl(self, real_type_cstr, name, args); + break; default: syntax_error("expected semicolon or opening brace found, %s", token.value); } } else { syntax_error("expected token, found eof"); } + + printf("inserting %s into symbols\n", name); + NodeMap_insert(&self->symbols, cstr_from(name), function_node); + return function_node; } Node parse_function_decl(Compiler* self, const char* type, const char* name, Nodes args) { @@ -142,6 +184,15 @@ Node parse_function_impl(Compiler* self, const char* type, const char* name, Nod Node parse_function_call(Compiler* self, const char* name) { printf("parse function call %s\n", name); + Token token; + next(self, &token); + switch(token.type) { + case TOKEN_OPAREN: + break; + default: + syntax_error("unexpected token \"%s\" after function name", token.value); + } + Nodes args = parse_until(self, TOKEN_CPAREN); parse_semicolon(self); diff --git a/test.s b/test.s new file mode 100644 index 0000000..eb5f0c6 --- /dev/null +++ b/test.s @@ -0,0 +1,41 @@ +.text +.balign 16 +.globl sayhi +sayhi: + hint #34 + stp x29, x30, [sp, -32]! + mov x29, sp + str x19, [x29, 24] + mov w19, w0 + mov w0, #72 + bl putchar + mov w0, w19 + mov w19, w0 + mov w0, #105 + bl putchar + mov w0, w19 + bl putchar + mov w0, #5 + ldr x19, [x29, 24] + ldp x29, x30, [sp], 32 + ret +.type sayhi, @function +.size sayhi, .-sayhi +/* end function sayhi */ + +.text +.balign 16 +.globl main +main: + hint #34 + stp x29, x30, [sp, -16]! + mov x29, sp + mov w0, #74 + bl sayhi + ldp x29, x30, [sp], 16 + ret +.type main, @function +.size main, .-main +/* end function main */ + +.section .note.GNU-stack,"",@progbits diff --git a/test.ssa b/test.ssa new file mode 100644 index 0000000..b7373cc --- /dev/null +++ b/test.ssa @@ -0,0 +1,14 @@ +export function w $sayhi(w %char) { +@start + call $putchar(w 72) + call $putchar(w 105) + call $putchar(w %char) + ret 5 +} + +export function $main() { +@start + call $sayhi(w 74) + ret +} + diff --git a/test.txt b/test.txt index 0d5b73f..1d1b4bd 100644 --- a/test.txt +++ b/test.txt @@ -1,8 +1,13 @@ i32 putchar(i32 char); -void sayhi() { +i32 sayhi(i32 char) { putchar(72); putchar(105); + putchar(char); + return 5; } -sayhi(); +void main() { + sayhi(74); + return; +}