From 0fd66745c872749b992df0c9452af9543cb873da Mon Sep 17 00:00:00 2001 From: sam Date: Fri, 15 Nov 2024 19:37:01 +1300 Subject: [PATCH] first commit --- .../index/codegen.h.B28082C1C1D042BA.idx | Bin 0 -> 462 bytes .../index/compiler.h.D712E1B2AB94B380.idx | Bin 0 -> 1560 bytes .../index/helpers.h.F9575AB77341F585.idx | Bin 0 -> 742 bytes .../clangd/index/lexer.h.4F141419C0AC5007.idx | Bin 0 -> 1206 bytes .../clangd/index/main.c.2393E5B60B02EEC0.idx | Bin 0 -> 1368 bytes .../clangd/index/node.h.714EF12F6C2AA1A8.idx | Bin 0 -> 1694 bytes .../index/parser.h.AB8FD292E884B1D7.idx | Bin 0 -> 984 bytes .../regex_helpers.h.468BB76668FECF71.idx | Bin 0 -> 640 bytes .../clangd/index/token.h.06BA855CD31E4C24.idx | Bin 0 -> 364 bytes .clang-format | 25 +++ .gitignore | 8 + compile_commands.json | 6 + src/codegen.c | 87 ++++++++++ src/codegen.h | 13 ++ src/compiler.c | 21 +++ src/compiler.h | 39 +++++ src/helpers.c | 44 +++++ src/helpers.h | 20 +++ src/lexer.c | 51 ++++++ src/lexer.h | 24 +++ src/main.c | 51 ++++++ src/node.h | 63 +++++++ src/parser.c | 164 ++++++++++++++++++ src/parser.h | 21 +++ src/token.h | 9 + test.txt | 10 ++ xmake.lua | 79 +++++++++ 27 files changed, 735 insertions(+) create mode 100644 .cache/clangd/index/codegen.h.B28082C1C1D042BA.idx create mode 100644 .cache/clangd/index/compiler.h.D712E1B2AB94B380.idx create mode 100644 .cache/clangd/index/helpers.h.F9575AB77341F585.idx create mode 100644 .cache/clangd/index/lexer.h.4F141419C0AC5007.idx create mode 100644 .cache/clangd/index/main.c.2393E5B60B02EEC0.idx create mode 100644 .cache/clangd/index/node.h.714EF12F6C2AA1A8.idx create mode 100644 .cache/clangd/index/parser.h.AB8FD292E884B1D7.idx create mode 100644 .cache/clangd/index/regex_helpers.h.468BB76668FECF71.idx create mode 100644 .cache/clangd/index/token.h.06BA855CD31E4C24.idx create mode 100644 .clang-format create mode 100644 .gitignore create mode 100644 compile_commands.json create mode 100644 src/codegen.c create mode 100644 src/codegen.h create mode 100644 src/compiler.c create mode 100644 src/compiler.h create mode 100644 src/helpers.c create mode 100644 src/helpers.h create mode 100644 src/lexer.c create mode 100644 src/lexer.h create mode 100644 src/main.c create mode 100644 src/node.h create mode 100644 src/parser.c create mode 100644 src/parser.h create mode 100644 src/token.h create mode 100644 test.txt create mode 100644 xmake.lua diff --git a/.cache/clangd/index/codegen.h.B28082C1C1D042BA.idx b/.cache/clangd/index/codegen.h.B28082C1C1D042BA.idx new file mode 100644 index 0000000000000000000000000000000000000000..83e81db68d9c6a783904eaf323cbd11090937778 GIT binary patch literal 462 zcmWIYbaOk#$iU#7;#rZKT9U}Zz`!63#Kk2=nN2|Y5)fC+Nxl%U(rnA8)Bc{JCzyq{ z&f<|O=u>mv6e{U#G-2wzS?r4+U-tSu$=m4j=SP!;i;E0ggM%g)OI$K44qE%=>sRAX z#lc^@g^Rz&ewyrE9Q@g<>nn5Dr_Di=8+)Goxx=(zlhWOkzmFEB)UI%>l6;cEwo`1y zW*vLuRz?Q-Ag|cs%G{)6peq(s1Wt|*%;M%@Uws6b73N71p^YEzIV=)|Zisosol45Gu+5vks^lYzLzxR9Z_r7oA zw?{@Q#Q+f*+l#X^^V10cP@%_~pSLGj0C2zsptz|!ek^XtU%l9Ir&3~!)&$=AoA}`5 z`{!Ki174_GI!%suk4(CfGdI~_cS}ob^p&a7No!n=?b*JQ**R>Y&+k>8m$P1MSZs{U zzEK$dMaEFpeA%_1#+Y+E-z|)xgVt@Tuj$Y{>CL?oe1CJ>oyFk=Ye$uNzxvp=xnWD_ zNZ14`VUIpdHy_+u5cN3V-b>T+8gCn8(0wFXS-3kWys)ky{6Mp3KcP`vY<`kdb+>F= zdC9<*5@SQ{e|}*J-8UP;>pt|Ve0AYv+ifa1_3Rqrh*fNBlnqR0z5d$L8@0b}Oivmk zdaqI`8=K;j#+i@bIs4r7(d&C>v%YQhEDB}U_gucz-}z-=MZvCPDSM;bMg7XJ{;Fry zmPh#}^S<&6zHW`FKc1hzXMUVWul(s9eM#lhHPjYc?miIzPjJsfMS@uKsHL&9ZpO)P}2+7sH{a`K0-Z_l!IM;aVJ^#XACoIez^DXz(OgVa_$#s^ zR|cd=AR}Z-9-M;7SVEF!`ECOZ8nlKRdXvPY28s}Z(hXG@pSjkR$87SG0rFb!E%cW1 zz*hi30{_D+`_vWlAvCb|0-!gFj1*8V0&rC$!Kg5n4ZNt@!GHo)kP=cg56&}e7|xjW z4GB+CfR_mLtc+Fi0HnR1*O_M$EiQ-B6ktG~4RV7U5BPH>8TRkz<8$>4S5Qw9q>=|` z`QoXF-K}YvjSR?@GMlf=}8b7eTNX>bR={JeNna$~2bcpsKJPhkGt5&yO2j zpK=1N6_A3}a~;wgKNS!vJB?@^sax$C2Mp0%y6{fr=c}6-`l$)db%SvYzIB{7q~@ZA z)vY(CgRds`EF)S?s#kl)(L*$sJFJGh%(v3H7i-=wYZN!};J`jsdqrVQEi-F#cRYL` T@^sgF=)ePX&Nj literal 0 HcmV?d00001 diff --git a/.cache/clangd/index/helpers.h.F9575AB77341F585.idx b/.cache/clangd/index/helpers.h.F9575AB77341F585.idx new file mode 100644 index 0000000000000000000000000000000000000000..85f6de16e23b939872897ba6a5c8e2642dcf73c3 GIT binary patch literal 742 zcmWIYbaT7M#K7R3;#rZKT9U}Zz`!63#Kk2=nL>;V40b@diaDkJr}LT(1imc)abTgN zr|vE9qnbg#8K?eYJfXu~_LMzJIz;o-{_6pgrcIFBGwbf$nX;SP--Jwg5ZfU$MP=Uh zTMH(Oowwe0^Q)1Y%Jw+HvLB(Y^Ugom^Ze$^l~?pW38cy#YMrw!%q>8;n2q~${N(3X zUkZ5c&NJFA{w;0KlRbXd-&Oyctruu^@Y|n#S<>mpv)=PI8(G)MaISuu7s{X8J>^;D z$5~l(@16K}>ZZsZ5hsoOK$gs-v)d0Y>UP{S`M-+?!=AUoy?hExjR6@|OFpbv_egQV z1->&E7R)fd_oioEtKni>5q)Deoztu^}pPNsE^U5U` zv#Prr^21t><`q}wCba-VsAoaY+_K|>vOEkzKpqJ23-AbtGH|joGVpOT^0OwtH@(Gk z$TgaWK?EYtz|X_VBf!AP&dear3si9J%`$zJGli)<45Bave5`ze44fP+44gbb1#5pU ze4?z+^Oy%5C15LfIe2*)IN6vO#JGS8ic-^xUjxMhoK6=`*KlKDpy8X(m|d@?ZCv4_uOeY6s6`Qg4o4H$;HYb2EVU<=F;EQb3>Q}1Q`TCA`k!%1_lNIuT literal 0 HcmV?d00001 diff --git a/.cache/clangd/index/lexer.h.4F141419C0AC5007.idx b/.cache/clangd/index/lexer.h.4F141419C0AC5007.idx new file mode 100644 index 0000000000000000000000000000000000000000..9d36e6f9eb5faab7d15d23111371cb32041795a2 GIT binary patch literal 1206 zcmYLH3rJH@7(RFIad&rj?sa!JbxpIh(ClF_%u3T5`K~mJ!jfen0kYSj^3@@n-a07% z*~NBD7M?sd@aS{pA1AMLEYqZB*Wc)Uve}r{`^xw(QPVKdnC8l~PoBQpl2FiVYX2Qr z26c{#@b0x^i^dJV2F7Auc21Y+8b%u9{B$2{t{dLJ%-(7ZEpTMS1kW+sZ$!y`swSS? z+pv8J##xp=EB>+VueMI2F+T-`fTOsg6 zddNbp2w1-*x9h@(f!2Ev_@FXLliniWvSVzB=K8bxXz_6gJ3y^Wi^j_&V73~K@6cE2 z>sr=7R|$qdN${D;c-H!TZjjD2fdy0Ex$ThglK{!7);}>(GW2x507I)-9kpV8^?oaAsbBN z+n^z093DRT%T7mkY|~R@F)1ZgSd26BqTJbdx@pKz42MsTO!MRB;VTy1DQRHn;8#Le t0elcXw8gwXOHf-ayRA!wA65YOnz%17&8_b*9$VM? ziXwbKVJ#6Yd6=-)!=+^(kGO+`d02S3V1_;Oa(4>kE*As}i%N>B!c_Mx2%1}VTu>G$ z%qz~T026*>#Jp#fM3WX!SW->W0A|Rghc~tq@f+&_g=JV|cwnkkm>%YA^$W4&V26*GMhz`6DE9)$w$`gxNr3ZivhV-S7bB-guy}9H$&y_;#jDx6cK}6p-0Ml5#LZOw)H9a@R9! z1qy5GY8t_WVKgHbO29XAIR0FJ>DdgRYGD~+4VbE%R)R^_3)$B&vIxlu%8A2-4Hxe_ zyEQFhJ5aS2hZa0cZr;gXu_oX32vAr+OTY@Iy8K)~XRVUaF(wvpz`}%4jokX~K9BJE z#4|v>QY=z%&ux&IBr|J)?MZK;@cbO`!y?P{%`G(N z$~<_aUt2ZpAyBoZq9!~Yto^z0iLyS=V|Ji(xdh-*2#gCZK_s3Kmk<(9m`fOmC&DFy z#1rKbMdFEZiNSd(cl4Ha3Os%Sbb*+p7~BODrZ^`Q{lD}IC=3c-Sdz0my|Owk<=uCn zu!y*b8qCvH0*B^qomu`5{e>#9#~t1_3yeQRrR! z(G}lLG8=R8@e2qFi-?Lzh%<=76M OHWp@1CU#y9Ms5H(1K(l* literal 0 HcmV?d00001 diff --git a/.cache/clangd/index/node.h.714EF12F6C2AA1A8.idx b/.cache/clangd/index/node.h.714EF12F6C2AA1A8.idx new file mode 100644 index 0000000000000000000000000000000000000000..2af2f2822d02dd1c26e423f5eede22ea06e84eaa GIT binary patch literal 1694 zcmZvc4NO~A6vyvtySBXd`uZ;YfKuBr7*3}FF-Es3!qD*{0|JXujfokf;2f<;I|tJ> zOk*--=mrx7Cc(*s!I(He(h%lA7;^-~fZLo#qrgx^7lmbp4CZ~eJyaKO(tmExXU@65 zyzJDZq-rHXkL9J7IPC823WSi3{9NwhouLdu3K>ErN4v6yo&EQT3r!0)q1}-xe%ba! zLSo{WJT7^3uP*y$f~Rr%+HdGs^K94Y&iZeM4t5S(y$>!nZY|P|eplu>GE-=JU~%-9 z*V0NOqdv^5b9kGpuhr#tUvaklyt{Y#-Sr_`wwgB2h+WY?evm$vRp@S;%NR;)>KSO6 z&-Vb`YAjcBeSqUhPaskA@keTOIacTa^s`p($8s*q-Lf6e04bC?mirI23 z=YW7T#4WO-Kn2|Q?)#$tnSph+KDUuI3Rn_wNA)^-C)!)vIjJA!WjsqIFdp+)_QIm< zMhAyD67W9@iv8%!NLRkCn?r!e(uK!O z6Bm?-rAw9iXjH+0fI~~a?%^(_j|#{@M%0R0Jr$V2ITPJs&oNE-+y>F0$C7}1WaHhL z^)J7DgF|vsIYbsBPzjXJZyC%wYZ8{of>j8cHD*i&@{8^Hu8Dt<#ITeg7z+rJ$$aGa zM^Mmzd*5sC;Z@5GK#`2~(c=%|u|;js1EP1>PHel{b~u?dh)_oe{ul&+5wk=W^yRC@ zUkcCde3FW}X#^mWe zj30Y{I6zCHC1Rlnpi0aVr(gy^iCLl(%m5}aOH_i{_VCW5Q}PPk%Tp z@V02cmh4kv7Had@tT6NNo`CLcxd1H5m1u=r7Kh8i3BtYuTbA=!sa5JAhpfr0S`AOm z`at%l0LCOmRER+^K97wGV-SoNgiR_F`PBFhfK7&$s0K3tP0SQzzr8%`jhT%3uxVnB zQb(=4A@EJ+k>G|RJhrN>dVptTCoE+f#w)fLZkG~V#XDRv(j{0D-x1-0kT!6|o0p%y N-m0-0<-wvB{R8b$pHBb) literal 0 HcmV?d00001 diff --git a/.cache/clangd/index/parser.h.AB8FD292E884B1D7.idx b/.cache/clangd/index/parser.h.AB8FD292E884B1D7.idx new file mode 100644 index 0000000000000000000000000000000000000000..d837d0a2064249b4349984d1dc091a8708bce255 GIT binary patch literal 984 zcmWIYbaT7F%)sEB;#rZKT9U}Zz`!63#Kk2=nJ<7e15mDF&fEao?862ESN$(OP~`P= zoqaZV!CYs>o4pqEH%57;s~i>Z`CpxS+%PX|){*Xt?_cbeBzG^V^t^u}#B1?m@BUps z>XL%rbg7@su3DhK#b(R%{Z#U$?30J?|Fb{R)n8{^SS?%W ze97(^-^q}7eMKn~)NcRX)^4hu3wW^e8~Mn z&N;1>GOInGzG43`jq6TvWp2`Qpm)Dow{7556yf7x5Ci!h2n5(!*##Il*_jx)c^Cy) zCqKz=`RSb{%)=lGk!KL#W#tuN;ACTDkmLm_m_PYuZo}&&eI5oOm;x?VE@1{vE>?&M zr3+hot*5AZ^MC^yYy~$fw*&(x2Qx%L`Prp^!k^DQ%flcJGl7$plb?Z;lLexnZ)a$m zTF{R-JPZ;r1w5=gQV1)mUl#C)&f)vO!yp1vz`@EPim-yAC^fBkIwJ!^UEHn2BCZd- zj9dbY9E<``)$m|o9%Z;E1$aeyWnnhMbO2od4ppcNU|NB~;IM=W z8!p~=c57P1b~d2H7}!~m9LB)Gg5)p;P8KAGF>tZKtl<&hX5eOlna(4?!@$D=a~Mnq z&`xmlK<%vC-#&+*N98&r&`w2mHJF_+T|m{~xPq#NIRYpQjv=TpFn)3pLG0q9?h0IB}~s@mA#Tk zD0pGHs+?;+2d6_p?WBu*Au0jhcRWD?IZvHPmm*WN0w%uPxLhCuRr(_1`;T%&my z_<=kS;AUfG6JX$EWnvKGV&rDM_GX#B%9+Ac9tHu3JOeieD~BKhCmS;ZJ10=V+Mf%b zDC_e)=3(H2DPZSd=VjnzVPp{G04iW8N=+-?3zUmJyy)4}30rs>dAK&iR5Qzu^FTKNC*lmqu?KkN)o{lT#8jMCEL%G{)QpgpGPI}W+)nYHpUure|* zvkI_^f(Zr=pd1j??QfsM&!cjkn}LaeftihojU7xdaDrtSnOS8XJkqbNn)Z;7fd#0a zg@c6$Ou+Or6s4vW#{d + +void codegen(Node node) { + switch(node.type) { + case NODE_FUNCTION_CALL: + codegen_function_call(node); + break; + case NODE_FUNCTION_DECL: + codegen_function_decl(node); + break; + case NODE_FUNCTION_IMPL: + codegen_function_impl(node); + break; + case NODE_ARG_DECL: + codegen_arg_decl(node); + break; + case NODE_NUMBER: + codegen_number(node); + break; + default: + syntax_error("unexpected node %c", node.type); + } +} + +void codegen_function_call(Node node) { + printf("FunctionCall(%s", node.function_call.name); + if(Nodes_size(&node.function_call.args) > 0) + printf(", "); + for(size_t i = 0; i < Nodes_size(&node.function_call.args); i++) { + const Node* arg = Nodes_at(&node.function_call.args, i); + codegen(*arg); + + if(arg != Nodes_back(&node.function_call.args)) { + printf(", "); + } + } + printf(")\n"); +} + +void codegen_function_decl(Node node) { + printf("FunctionDecl(%s, %s, ", node.function_decl.type, node.function_decl.name); + for(size_t i = 0; i < Nodes_size(&node.function_decl.args); i++) { + const Node* arg = Nodes_at(&node.function_decl.args, i); + codegen(*arg); + + if(arg != Nodes_back(&node.function_decl.args)) { + printf(", "); + } + } + printf(")\n"); +} + +void codegen_function_impl(Node node) { + printf("FunctionImpl(%s, %s", node.function_impl.type, node.function_impl.name); + if(Nodes_size(&node.function_impl.args) > 0) + printf(", "); + for(size_t i = 0; i < Nodes_size(&node.function_impl.args); i++) { + const Node* arg = Nodes_at(&node.function_impl.args, i); + codegen(*arg); + + if(arg != Nodes_back(&node.function_impl.args)) { + printf(", "); + } + } + printf(") {\n\t"); + + for(size_t i = 0; i < Nodes_size(&node.function_impl.body); i++) { + const Node* n = Nodes_at(&node.function_impl.body, i); + codegen(*n); + + if(n != Nodes_back(&node.function_impl.body)) { + printf("\t"); + } + } + + printf("}\n"); +} + +void codegen_arg_decl(Node node) { + printf("ArgDecl(%s, %s)", node.arg_decl.type, node.arg_decl.name); +} + +void codegen_number(Node node) { + printf("Number(%llu)", node.number.value); +} diff --git a/src/codegen.h b/src/codegen.h new file mode 100644 index 0000000..d133082 --- /dev/null +++ b/src/codegen.h @@ -0,0 +1,13 @@ +#ifndef CODEGEN_H +#define CODEGEN_H + +#include "node.h" + +void codegen(Node node); +void codegen_function_call(Node node); +void codegen_function_decl(Node node); +void codegen_arg_decl(Node node); +void codegen_function_impl(Node node); +void codegen_number(Node node); + +#endif diff --git a/src/compiler.c b/src/compiler.c new file mode 100644 index 0000000..561a55c --- /dev/null +++ b/src/compiler.c @@ -0,0 +1,21 @@ +#include "compiler.h" + +bool next(Compiler* compiler, Token* token) { + if(!lexer_next(compiler->lexer, token)) { + return false; + } + + switch(token->type) { + case TOKEN_WHITESPACE: + return next(compiler, token); + case TOKEN_IDENTIFIER: + if(cmap_str_contains(&compiler->types, token->value)) { + token->type = TOKEN_TYPEIDENTIFIER; + } + break; + } + + printf("tok: %c, val: %s\n", token->type, token->value); + + return true; +} diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..d385f7b --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,39 @@ +#ifndef COMPILER_H +#define COMPILER_H + +#include "lexer.h" + +typedef enum { + TOKEN_IDENTIFIER = 'I', + TOKEN_STRING = 'S', + TOKEN_NUMBER = 'N', + TOKEN_TYPEIDENTIFIER = 'T', + TOKEN_WHITESPACE = 'W', + TOKEN_OPAREN = '(', + TOKEN_CPAREN = ')', + TOKEN_OBRACE = '{', + TOKEN_CBRACE = '}', + TOKEN_SEMICOLON = ';', + TOKEN_COMMA = ',' +} TokenType; + +#include +#include + +#define i_key_str +#define i_val_str +#include + +#define i_type Tokens +#define i_val Token +#define i_opt c_no_cmp +#include + +typedef struct { + Lexer* lexer; + cmap_str types; +} Compiler; + +bool next(Compiler* compiler, Token* token); + +#endif diff --git a/src/helpers.c b/src/helpers.c new file mode 100644 index 0000000..05f6f9c --- /dev/null +++ b/src/helpers.c @@ -0,0 +1,44 @@ +#include "helpers.h" +#include +#include + +void regex_error(const regex_t* regex, int status) { + if(status > REG_NOMATCH) { + char error_msg[100]; + regerror(status, regex, error_msg, sizeof(error_msg)); + fprintf(stderr, "Regex compilation failed: %s\n", error_msg); + exit(EXIT_FAILURE); + } +} + +regex_t regex(const char* pattern) { + regex_t regex; + regex_error(®ex, regcomp(®ex, pattern, REG_EXTENDED)); + + return regex; +} + +bool regex_search(regmatch_t* match, const regex_t* regex, const char* string) { + int status = regexec(regex, string, 1, match, 0); + regex_error(regex, status); + + return !status; +} + +const char* read_file(const char* filename) { + FILE* file = fopen(filename, "r"); + if(file == NULL) { + perror("Failed to open file"); + exit(EXIT_FAILURE); + } + + fseek(file, 0, SEEK_END); + size_t size = ftell(file); + rewind(file); + + char* buffer = malloc(size + 1); + fread(buffer, size, 1, file); + fclose(file); + + return buffer; +} diff --git a/src/helpers.h b/src/helpers.h new file mode 100644 index 0000000..08c0133 --- /dev/null +++ b/src/helpers.h @@ -0,0 +1,20 @@ +#ifndef HELPERS_H +#define HELPERS_H + +#include +#include + +#define syntax_error(...) \ + { \ + fprintf(stderr, "Syntax error: " __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + exit(EXIT_FAILURE); \ + } + +void regex_error(const regex_t* regex, int status); +regex_t regex(const char* pattern); +bool regex_search(regmatch_t* match, const regex_t* regex, const char* string); + +const char* read_file(const char* filename); + +#endif diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..5a627df --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,51 @@ +#include "lexer.h" +#include "helpers.h" +#include +#include +#include +#include + +Lexer lexer_create(const TokenRule* rules, size_t num_rules) { + return (Lexer){ + .text = NULL, + .offset = 0, + .rules = rules, + .num_rules = num_rules, + }; +} + +void lexer_feed(Lexer* lexer, const char* text) { + assert(lexer != NULL); + lexer->text = text; +} + +bool lexer_next(Lexer* lexer, Token* token) { + assert(lexer != NULL); + assert(lexer->text != NULL); + + if(lexer->offset >= strlen(lexer->text)) { + return false; + } + + regmatch_t match; + for(int i = 0; i < lexer->num_rules; i++) { + TokenRule rule = lexer->rules[i]; + + if(regex_search(&match, &rule.regex, lexer->text + lexer->offset)) { + int length = match.rm_eo; + + char* slice = malloc(length); + strncpy(slice, lexer->text + lexer->offset, length); + slice[length] = '\0'; + + lexer->offset += length; + + token->type = rule.token_type; + token->value = slice; + return true; + } + } + + fprintf(stderr, "Unrecognized character: %c\n", *(lexer->text + lexer->offset)); + exit(EXIT_FAILURE); +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..fec0d8f --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,24 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "token.h" +#include +#include + +typedef struct { + regex_t regex; + int token_type; +} TokenRule; + +typedef struct { + int offset; + const char* text; + const TokenRule* rules; + size_t num_rules; +} Lexer; + +Lexer lexer_create(const TokenRule* rules, size_t num_rules); +void lexer_feed(Lexer* lexer, const char* text); +bool lexer_next(Lexer* lexer, Token* token); + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..a016488 --- /dev/null +++ b/src/main.c @@ -0,0 +1,51 @@ +#include "codegen.h" +#include "compiler.h" +#include "helpers.h" +#include "lexer.h" +#include "node.h" +#include "parser.h" +#include "token.h" +#include +#include +#include + +int main(int argc, char** argv) { + assert(argc > 1); + const char* text = read_file(argv[1]); + + TokenRule rules[] = { + { regex("^[A-Za-z][A-Za-z0-9]*"), TOKEN_IDENTIFIER }, + { regex("^[ \r\n]+"), TOKEN_WHITESPACE }, + { regex("^\".*?\""), TOKEN_STRING }, + { regex("^[0-9]+"), TOKEN_NUMBER }, + { regex("^;"), TOKEN_SEMICOLON }, + { regex("^\\("), TOKEN_OPAREN }, + { regex("^\\{"), TOKEN_OBRACE }, + { regex("^\\)"), TOKEN_CPAREN }, + { regex("^\\}"), TOKEN_CBRACE }, + }; + + Lexer lexer = lexer_create(rules, c_arraylen(rules)); + lexer_feed(&lexer, text); + + Compiler compiler = { + .lexer = &lexer, + .types = c_make(cmap_str, + { + { "i8", "b" }, + { "i16", "h" }, + { "i32", "w" }, + { "i64", "l" }, + { "void", "" }, + }), + }; + + Token token; + while(next(&compiler, &token)) { + // printf("%c: %s\n", token.type, token.value); + Node node = parse_token(&compiler, token); + codegen(node); + } + + return 0; +} diff --git a/src/node.h b/src/node.h new file mode 100644 index 0000000..d040b27 --- /dev/null +++ b/src/node.h @@ -0,0 +1,63 @@ +#ifndef NODE_H +#define NODE_H + +#include + +#include +typedef struct Node Node; +forward_cvec(Nodes, struct Node); + +typedef enum { + NODE_FUNCTION_CALL = 'C', + NODE_FUNCTION_DECL = 'D', + NODE_FUNCTION_IMPL = 'I', + NODE_ARG_DECL = 'A', + NODE_NUMBER = 'N', +} NodeType; + +typedef struct { + const char* name; + Nodes args; +} FunctionCall; + +typedef struct { + const char* type; + const char* name; + Nodes args; +} FunctionDecl; + +typedef struct { + const char* type; + const char* name; + Nodes args; + Nodes body; +} FunctionImpl; + +typedef struct { + const char* type; + const char* name; +} ArgDecl; + +typedef struct { + long long int value; +} Number; + +struct Nodes; +typedef struct Node { + NodeType type; + union { + FunctionCall function_call; + FunctionDecl function_decl; + FunctionImpl function_impl; + ArgDecl arg_decl; + Number number; + }; +} Node; + +#define i_type Nodes +#define i_is_forward +#define i_val Node +#define i_opt c_no_cmp +#include + +#endif diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..df0eee6 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,164 @@ +#include "parser.h" +#include "helpers.h" + +Node parse_token(Compiler* self, Token token) { + switch(token.type) { + case TOKEN_TYPEIDENTIFIER: + return parse_type(self, token.value); + case TOKEN_IDENTIFIER: + return parse_identifier(self, token.value); + case TOKEN_NUMBER: + return parse_number(self, token.value); + default: + syntax_error("unexpected token \"%s\"", token.value); + } +} + +Nodes parse_until(Compiler* self, TokenType end_token) { + Nodes collected = { 0 }; + + Token token; + while(next(self, &token)) { + if(token.type == end_token) { + break; + } + + Nodes_push(&collected, parse_token(self, token)); + } + + return collected; +} + +Node parse_type(Compiler* self, const char* type) { + printf("parse type %s\n", type); + Token token; + next(self, &token); + + switch(token.type) { + case TOKEN_IDENTIFIER: + return parse_type_ident_pair(self, type, token.value); + break; + default: + syntax_error("unexpected token \"%s\" after type \"%s\"", token.value, type); + } +} + +Node parse_identifier(Compiler* self, const char* name) { + Token token; + next(self, &token); + + switch(token.type) { + case TOKEN_OPAREN: + return parse_function_call(self, name); + break; + default: + syntax_error("unexpected token \"%s\" after identifier \"%s\"", token.value, name); + } +} + +Node parse_number(Compiler* self, const char* value) { + return (Node){ + .type = NODE_NUMBER, + .number = atoll(value), + }; +} + +Node parse_type_ident_pair(Compiler* self, const char* type, const char* name) { + printf("parse type ident pair %s %s\n", type, name); + Token token; + next(self, &token); + + switch(token.type) { + case TOKEN_OPAREN: + return parse_function(self, type, name); + default: + syntax_error("unexpected token \"%s\" after \"%s %s\"", token.value, type, name); + } +} + +Node parse_function(Compiler* self, const char* type, const char* name) { + printf("parse function %s %s\n", type, name); + Nodes args = { 0 }; + while(true) { + Token arg_type; + next(self, &arg_type); + if(arg_type.type == TOKEN_CPAREN) { + break; + } + + Token arg_name; + next(self, &arg_name); + + Nodes_push(&args, + (Node){ + .type = NODE_ARG_DECL, + .arg_decl = { .type = arg_type.value, .name = arg_name.value }, + }); + + Token token; + next(self, &token); + switch(token.type) { + case TOKEN_COMMA: + continue; + case TOKEN_CPAREN: + break; + default: + syntax_error("expected comma or closing parenthesis, found %s", token.value); + } + + break; // only reached if didnt continue or error + } + + Token token; + if(next(self, &token)) { + switch(token.type) { + case TOKEN_SEMICOLON: + return parse_function_decl(self, type, name, args); + case TOKEN_OBRACE: + return parse_function_impl(self, type, name, args); + default: + syntax_error("expected semicolon or opening brace found, %s", token.value); + } + } else { + syntax_error("expected token, found eof"); + } +} + +Node parse_function_decl(Compiler* self, const char* type, const char* name, Nodes args) { + return (Node){ + .type = NODE_FUNCTION_DECL, + .function_decl = { .type = type, .name = name, .args = args }, + }; +} + +Node parse_function_impl(Compiler* self, const char* type, const char* name, Nodes args) { + Nodes body = parse_until(self, TOKEN_CBRACE); + + return (Node){ + .type = NODE_FUNCTION_IMPL, + .function_impl = { .type = type, .name = name, .args = args, .body = body }, + }; +} + +Node parse_function_call(Compiler* self, const char* name) { + printf("parse function call %s\n", name); + Nodes args = parse_until(self, TOKEN_CPAREN); + + parse_semicolon(self); + return (Node){ + .type = NODE_FUNCTION_CALL, + .function_call = { .name = name, .args = args }, + }; +} + +void parse_semicolon(Compiler* self) { + Token token; + next(self, &token); + + switch(token.type) { + case TOKEN_SEMICOLON: + return; + default: + syntax_error("expected semicolon, found \"%s\"", token.value); + } +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..5828ea9 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,21 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "compiler.h" +#include "node.h" +#include "token.h" + +Node parse_token(Compiler* self, Token token); +Nodes parse_until(Compiler* self, TokenType end_token); +Node parse_type(Compiler* self, const char* type); +Node parse_identifier(Compiler* self, const char* name); +Node parse_number(Compiler* self, const char* value); +Node parse_type_ident_pair(Compiler* self, const char* type, const char* name); +Node parse_arg_decl(Compiler* self, const char* type, const char* name); +Node parse_function(Compiler* self, const char* type, const char* name); +Node parse_function_decl(Compiler* self, const char* type, const char* name, Nodes args); +Node parse_function_impl(Compiler* self, const char* type, const char* name, Nodes args); +Node parse_function_call(Compiler* self, const char* name); +void parse_semicolon(Compiler* self); + +#endif diff --git a/src/token.h b/src/token.h new file mode 100644 index 0000000..bd5ee4e --- /dev/null +++ b/src/token.h @@ -0,0 +1,9 @@ +#ifndef TOKEN_H +#define TOKEN_H + +typedef struct { + int type; + const char* value; +} Token; + +#endif diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..6afcd8a --- /dev/null +++ b/test.txt @@ -0,0 +1,10 @@ +hey "bob \"mr goonman\" gooner" + +i32 putchar(i32 char); + +void sayhi() { + putchar(72); + putchar(105); +} + +sayhi(); diff --git a/xmake.lua b/xmake.lua new file mode 100644 index 0000000..eae5da5 --- /dev/null +++ b/xmake.lua @@ -0,0 +1,79 @@ +add_rules("mode.debug", "mode.release") + +add_requires("stc") + +target("compiler-c") + set_kind("binary") + add_files("src/*.c") + add_packages("stc") + set_rundir(".") + +-- +-- If you want to known more usage about xmake, please see https://xmake.io +-- +-- ## FAQ +-- +-- You can enter the project directory firstly before building project. +-- +-- $ cd projectdir +-- +-- 1. How to build project? +-- +-- $ xmake +-- +-- 2. How to configure project? +-- +-- $ xmake f -p [macosx|linux|iphoneos ..] -a [x86_64|i386|arm64 ..] -m [debug|release] +-- +-- 3. Where is the build output directory? +-- +-- The default output directory is `./build` and you can configure the output directory. +-- +-- $ xmake f -o outputdir +-- $ xmake +-- +-- 4. How to run and debug target after building project? +-- +-- $ xmake run [targetname] +-- $ xmake run -d [targetname] +-- +-- 5. How to install target to the system directory or other output directory? +-- +-- $ xmake install +-- $ xmake install -o installdir +-- +-- 6. Add some frequently-used compilation flags in xmake.lua +-- +-- @code +-- -- add debug and release modes +-- add_rules("mode.debug", "mode.release") +-- +-- -- add macro definition +-- add_defines("NDEBUG", "_GNU_SOURCE=1") +-- +-- -- set warning all as error +-- set_warnings("all", "error") +-- +-- -- set language: c99, c++11 +-- set_languages("c99", "c++11") +-- +-- -- set optimization: none, faster, fastest, smallest +-- set_optimize("fastest") +-- +-- -- add include search directories +-- add_includedirs("/usr/include", "/usr/local/include") +-- +-- -- add link libraries and search directories +-- add_links("tbox") +-- add_linkdirs("/usr/local/lib", "/usr/lib") +-- +-- -- add system link libraries +-- add_syslinks("z", "pthread") +-- +-- -- add compilation and link flags +-- add_cxflags("-stdnolib", "-fno-strict-aliasing") +-- add_ldflags("-L/usr/local/lib", "-lpthread", {force = true}) +-- +-- @endcode +-- +