first commit

This commit is contained in:
sam 2024-11-15 19:37:01 +13:00
commit 0fd66745c8
27 changed files with 735 additions and 0 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

25
.clang-format Normal file
View file

@ -0,0 +1,25 @@
BasedOnStyle: WebKit
IndentWidth: 4
TabWidth: 4
UseTab: Never
AlignConsecutiveDeclarations: false
AlignConsecutiveAssignments: false
AlignTrailingComments: true
ColumnLimit: 105
BreakBeforeBraces: Attach
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AllowShortFunctionsOnASingleLine: false
AllowShortLambdasOnASingleLine: false
PointerAlignment: Left
SpaceBeforeParens: Never
SpacesInParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: false
SpaceAfterCStyleCast: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeSquareBrackets: false
SpacesBeforeTrailingComments: 2
PenaltyBreakAssignment: 1000
NamespaceIndentation: All

8
.gitignore vendored Normal file
View file

@ -0,0 +1,8 @@
# Xmake cache
.xmake/
build/
# MacOS Cache
.DS_Store

6
compile_commands.json Normal file
View file

@ -0,0 +1,6 @@
[
{
"directory": "/home/sam/Documents/Projects/compiler-c",
"arguments": ["/usr/bin/gcc", "-c", "-fvisibility=hidden", "-O3", "-I", "/home/sam/.xmake/packages/s/stc/v4.2/bfec6d3335d54b48969cc50946a9b5ac/include", "-DNDEBUG", "-o", "build/.objs/compiler-c/linux/arm64/release/src/main.c.o", "src/main.c"],
"file": "src/main.c"
}]

87
src/codegen.c Normal file
View file

@ -0,0 +1,87 @@
#include "codegen.h"
#include "helpers.h"
#include <stdio.h>
void codegen(Node node) {
switch(node.type) {
case NODE_FUNCTION_CALL:
codegen_function_call(node);
break;
case NODE_FUNCTION_DECL:
codegen_function_decl(node);
break;
case NODE_FUNCTION_IMPL:
codegen_function_impl(node);
break;
case NODE_ARG_DECL:
codegen_arg_decl(node);
break;
case NODE_NUMBER:
codegen_number(node);
break;
default:
syntax_error("unexpected node %c", node.type);
}
}
void codegen_function_call(Node node) {
printf("FunctionCall(%s", node.function_call.name);
if(Nodes_size(&node.function_call.args) > 0)
printf(", ");
for(size_t i = 0; i < Nodes_size(&node.function_call.args); i++) {
const Node* arg = Nodes_at(&node.function_call.args, i);
codegen(*arg);
if(arg != Nodes_back(&node.function_call.args)) {
printf(", ");
}
}
printf(")\n");
}
void codegen_function_decl(Node node) {
printf("FunctionDecl(%s, %s, ", node.function_decl.type, node.function_decl.name);
for(size_t i = 0; i < Nodes_size(&node.function_decl.args); i++) {
const Node* arg = Nodes_at(&node.function_decl.args, i);
codegen(*arg);
if(arg != Nodes_back(&node.function_decl.args)) {
printf(", ");
}
}
printf(")\n");
}
void codegen_function_impl(Node node) {
printf("FunctionImpl(%s, %s", node.function_impl.type, node.function_impl.name);
if(Nodes_size(&node.function_impl.args) > 0)
printf(", ");
for(size_t i = 0; i < Nodes_size(&node.function_impl.args); i++) {
const Node* arg = Nodes_at(&node.function_impl.args, i);
codegen(*arg);
if(arg != Nodes_back(&node.function_impl.args)) {
printf(", ");
}
}
printf(") {\n\t");
for(size_t i = 0; i < Nodes_size(&node.function_impl.body); i++) {
const Node* n = Nodes_at(&node.function_impl.body, i);
codegen(*n);
if(n != Nodes_back(&node.function_impl.body)) {
printf("\t");
}
}
printf("}\n");
}
void codegen_arg_decl(Node node) {
printf("ArgDecl(%s, %s)", node.arg_decl.type, node.arg_decl.name);
}
void codegen_number(Node node) {
printf("Number(%llu)", node.number.value);
}

13
src/codegen.h Normal file
View file

@ -0,0 +1,13 @@
#ifndef CODEGEN_H
#define CODEGEN_H
#include "node.h"
void codegen(Node node);
void codegen_function_call(Node node);
void codegen_function_decl(Node node);
void codegen_arg_decl(Node node);
void codegen_function_impl(Node node);
void codegen_number(Node node);
#endif

21
src/compiler.c Normal file
View file

@ -0,0 +1,21 @@
#include "compiler.h"
bool next(Compiler* compiler, Token* token) {
if(!lexer_next(compiler->lexer, token)) {
return false;
}
switch(token->type) {
case TOKEN_WHITESPACE:
return next(compiler, token);
case TOKEN_IDENTIFIER:
if(cmap_str_contains(&compiler->types, token->value)) {
token->type = TOKEN_TYPEIDENTIFIER;
}
break;
}
printf("tok: %c, val: %s\n", token->type, token->value);
return true;
}

39
src/compiler.h Normal file
View file

@ -0,0 +1,39 @@
#ifndef COMPILER_H
#define COMPILER_H
#include "lexer.h"
typedef enum {
TOKEN_IDENTIFIER = 'I',
TOKEN_STRING = 'S',
TOKEN_NUMBER = 'N',
TOKEN_TYPEIDENTIFIER = 'T',
TOKEN_WHITESPACE = 'W',
TOKEN_OPAREN = '(',
TOKEN_CPAREN = ')',
TOKEN_OBRACE = '{',
TOKEN_CBRACE = '}',
TOKEN_SEMICOLON = ';',
TOKEN_COMMA = ','
} TokenType;
#include <stc/ccommon.h>
#include <stc/cstr.h>
#define i_key_str
#define i_val_str
#include <stc/cmap.h>
#define i_type Tokens
#define i_val Token
#define i_opt c_no_cmp
#include <stc/cvec.h>
typedef struct {
Lexer* lexer;
cmap_str types;
} Compiler;
bool next(Compiler* compiler, Token* token);
#endif

44
src/helpers.c Normal file
View file

@ -0,0 +1,44 @@
#include "helpers.h"
#include <stdio.h>
#include <stdlib.h>
void regex_error(const regex_t* regex, int status) {
if(status > REG_NOMATCH) {
char error_msg[100];
regerror(status, regex, error_msg, sizeof(error_msg));
fprintf(stderr, "Regex compilation failed: %s\n", error_msg);
exit(EXIT_FAILURE);
}
}
regex_t regex(const char* pattern) {
regex_t regex;
regex_error(&regex, regcomp(&regex, pattern, REG_EXTENDED));
return regex;
}
bool regex_search(regmatch_t* match, const regex_t* regex, const char* string) {
int status = regexec(regex, string, 1, match, 0);
regex_error(regex, status);
return !status;
}
const char* read_file(const char* filename) {
FILE* file = fopen(filename, "r");
if(file == NULL) {
perror("Failed to open file");
exit(EXIT_FAILURE);
}
fseek(file, 0, SEEK_END);
size_t size = ftell(file);
rewind(file);
char* buffer = malloc(size + 1);
fread(buffer, size, 1, file);
fclose(file);
return buffer;
}

20
src/helpers.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef HELPERS_H
#define HELPERS_H
#include <regex.h>
#include <stdbool.h>
#define syntax_error(...) \
{ \
fprintf(stderr, "Syntax error: " __VA_ARGS__); \
fprintf(stderr, "\n"); \
exit(EXIT_FAILURE); \
}
void regex_error(const regex_t* regex, int status);
regex_t regex(const char* pattern);
bool regex_search(regmatch_t* match, const regex_t* regex, const char* string);
const char* read_file(const char* filename);
#endif

51
src/lexer.c Normal file
View file

@ -0,0 +1,51 @@
#include "lexer.h"
#include "helpers.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Lexer lexer_create(const TokenRule* rules, size_t num_rules) {
return (Lexer){
.text = NULL,
.offset = 0,
.rules = rules,
.num_rules = num_rules,
};
}
void lexer_feed(Lexer* lexer, const char* text) {
assert(lexer != NULL);
lexer->text = text;
}
bool lexer_next(Lexer* lexer, Token* token) {
assert(lexer != NULL);
assert(lexer->text != NULL);
if(lexer->offset >= strlen(lexer->text)) {
return false;
}
regmatch_t match;
for(int i = 0; i < lexer->num_rules; i++) {
TokenRule rule = lexer->rules[i];
if(regex_search(&match, &rule.regex, lexer->text + lexer->offset)) {
int length = match.rm_eo;
char* slice = malloc(length);
strncpy(slice, lexer->text + lexer->offset, length);
slice[length] = '\0';
lexer->offset += length;
token->type = rule.token_type;
token->value = slice;
return true;
}
}
fprintf(stderr, "Unrecognized character: %c\n", *(lexer->text + lexer->offset));
exit(EXIT_FAILURE);
}

24
src/lexer.h Normal file
View file

@ -0,0 +1,24 @@
#ifndef LEXER_H
#define LEXER_H
#include "token.h"
#include <regex.h>
#include <stdbool.h>
typedef struct {
regex_t regex;
int token_type;
} TokenRule;
typedef struct {
int offset;
const char* text;
const TokenRule* rules;
size_t num_rules;
} Lexer;
Lexer lexer_create(const TokenRule* rules, size_t num_rules);
void lexer_feed(Lexer* lexer, const char* text);
bool lexer_next(Lexer* lexer, Token* token);
#endif

51
src/main.c Normal file
View file

@ -0,0 +1,51 @@
#include "codegen.h"
#include "compiler.h"
#include "helpers.h"
#include "lexer.h"
#include "node.h"
#include "parser.h"
#include "token.h"
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
int main(int argc, char** argv) {
assert(argc > 1);
const char* text = read_file(argv[1]);
TokenRule rules[] = {
{ regex("^[A-Za-z][A-Za-z0-9]*"), TOKEN_IDENTIFIER },
{ regex("^[ \r\n]+"), TOKEN_WHITESPACE },
{ regex("^\".*?\""), TOKEN_STRING },
{ regex("^[0-9]+"), TOKEN_NUMBER },
{ regex("^;"), TOKEN_SEMICOLON },
{ regex("^\\("), TOKEN_OPAREN },
{ regex("^\\{"), TOKEN_OBRACE },
{ regex("^\\)"), TOKEN_CPAREN },
{ regex("^\\}"), TOKEN_CBRACE },
};
Lexer lexer = lexer_create(rules, c_arraylen(rules));
lexer_feed(&lexer, text);
Compiler compiler = {
.lexer = &lexer,
.types = c_make(cmap_str,
{
{ "i8", "b" },
{ "i16", "h" },
{ "i32", "w" },
{ "i64", "l" },
{ "void", "" },
}),
};
Token token;
while(next(&compiler, &token)) {
// printf("%c: %s\n", token.type, token.value);
Node node = parse_token(&compiler, token);
codegen(node);
}
return 0;
}

63
src/node.h Normal file
View file

@ -0,0 +1,63 @@
#ifndef NODE_H
#define NODE_H
#include <stddef.h>
#include <stc/forward.h>
typedef struct Node Node;
forward_cvec(Nodes, struct Node);
typedef enum {
NODE_FUNCTION_CALL = 'C',
NODE_FUNCTION_DECL = 'D',
NODE_FUNCTION_IMPL = 'I',
NODE_ARG_DECL = 'A',
NODE_NUMBER = 'N',
} NodeType;
typedef struct {
const char* name;
Nodes args;
} FunctionCall;
typedef struct {
const char* type;
const char* name;
Nodes args;
} FunctionDecl;
typedef struct {
const char* type;
const char* name;
Nodes args;
Nodes body;
} FunctionImpl;
typedef struct {
const char* type;
const char* name;
} ArgDecl;
typedef struct {
long long int value;
} Number;
struct Nodes;
typedef struct Node {
NodeType type;
union {
FunctionCall function_call;
FunctionDecl function_decl;
FunctionImpl function_impl;
ArgDecl arg_decl;
Number number;
};
} Node;
#define i_type Nodes
#define i_is_forward
#define i_val Node
#define i_opt c_no_cmp
#include <stc/cvec.h>
#endif

164
src/parser.c Normal file
View file

@ -0,0 +1,164 @@
#include "parser.h"
#include "helpers.h"
Node parse_token(Compiler* self, Token token) {
switch(token.type) {
case TOKEN_TYPEIDENTIFIER:
return parse_type(self, token.value);
case TOKEN_IDENTIFIER:
return parse_identifier(self, token.value);
case TOKEN_NUMBER:
return parse_number(self, token.value);
default:
syntax_error("unexpected token \"%s\"", token.value);
}
}
Nodes parse_until(Compiler* self, TokenType end_token) {
Nodes collected = { 0 };
Token token;
while(next(self, &token)) {
if(token.type == end_token) {
break;
}
Nodes_push(&collected, parse_token(self, token));
}
return collected;
}
Node parse_type(Compiler* self, const char* type) {
printf("parse type %s\n", type);
Token token;
next(self, &token);
switch(token.type) {
case TOKEN_IDENTIFIER:
return parse_type_ident_pair(self, type, token.value);
break;
default:
syntax_error("unexpected token \"%s\" after type \"%s\"", token.value, type);
}
}
Node parse_identifier(Compiler* self, const char* name) {
Token token;
next(self, &token);
switch(token.type) {
case TOKEN_OPAREN:
return parse_function_call(self, name);
break;
default:
syntax_error("unexpected token \"%s\" after identifier \"%s\"", token.value, name);
}
}
Node parse_number(Compiler* self, const char* value) {
return (Node){
.type = NODE_NUMBER,
.number = atoll(value),
};
}
Node parse_type_ident_pair(Compiler* self, const char* type, const char* name) {
printf("parse type ident pair %s %s\n", type, name);
Token token;
next(self, &token);
switch(token.type) {
case TOKEN_OPAREN:
return parse_function(self, type, name);
default:
syntax_error("unexpected token \"%s\" after \"%s %s\"", token.value, type, name);
}
}
Node parse_function(Compiler* self, const char* type, const char* name) {
printf("parse function %s %s\n", type, name);
Nodes args = { 0 };
while(true) {
Token arg_type;
next(self, &arg_type);
if(arg_type.type == TOKEN_CPAREN) {
break;
}
Token arg_name;
next(self, &arg_name);
Nodes_push(&args,
(Node){
.type = NODE_ARG_DECL,
.arg_decl = { .type = arg_type.value, .name = arg_name.value },
});
Token token;
next(self, &token);
switch(token.type) {
case TOKEN_COMMA:
continue;
case TOKEN_CPAREN:
break;
default:
syntax_error("expected comma or closing parenthesis, found %s", token.value);
}
break; // only reached if didnt continue or error
}
Token token;
if(next(self, &token)) {
switch(token.type) {
case TOKEN_SEMICOLON:
return parse_function_decl(self, type, name, args);
case TOKEN_OBRACE:
return parse_function_impl(self, type, name, args);
default:
syntax_error("expected semicolon or opening brace found, %s", token.value);
}
} else {
syntax_error("expected token, found eof");
}
}
Node parse_function_decl(Compiler* self, const char* type, const char* name, Nodes args) {
return (Node){
.type = NODE_FUNCTION_DECL,
.function_decl = { .type = type, .name = name, .args = args },
};
}
Node parse_function_impl(Compiler* self, const char* type, const char* name, Nodes args) {
Nodes body = parse_until(self, TOKEN_CBRACE);
return (Node){
.type = NODE_FUNCTION_IMPL,
.function_impl = { .type = type, .name = name, .args = args, .body = body },
};
}
Node parse_function_call(Compiler* self, const char* name) {
printf("parse function call %s\n", name);
Nodes args = parse_until(self, TOKEN_CPAREN);
parse_semicolon(self);
return (Node){
.type = NODE_FUNCTION_CALL,
.function_call = { .name = name, .args = args },
};
}
void parse_semicolon(Compiler* self) {
Token token;
next(self, &token);
switch(token.type) {
case TOKEN_SEMICOLON:
return;
default:
syntax_error("expected semicolon, found \"%s\"", token.value);
}
}

21
src/parser.h Normal file
View file

@ -0,0 +1,21 @@
#ifndef PARSER_H
#define PARSER_H
#include "compiler.h"
#include "node.h"
#include "token.h"
Node parse_token(Compiler* self, Token token);
Nodes parse_until(Compiler* self, TokenType end_token);
Node parse_type(Compiler* self, const char* type);
Node parse_identifier(Compiler* self, const char* name);
Node parse_number(Compiler* self, const char* value);
Node parse_type_ident_pair(Compiler* self, const char* type, const char* name);
Node parse_arg_decl(Compiler* self, const char* type, const char* name);
Node parse_function(Compiler* self, const char* type, const char* name);
Node parse_function_decl(Compiler* self, const char* type, const char* name, Nodes args);
Node parse_function_impl(Compiler* self, const char* type, const char* name, Nodes args);
Node parse_function_call(Compiler* self, const char* name);
void parse_semicolon(Compiler* self);
#endif

9
src/token.h Normal file
View file

@ -0,0 +1,9 @@
#ifndef TOKEN_H
#define TOKEN_H
typedef struct {
int type;
const char* value;
} Token;
#endif

10
test.txt Normal file
View file

@ -0,0 +1,10 @@
hey "bob \"mr goonman\" gooner"
i32 putchar(i32 char);
void sayhi() {
putchar(72);
putchar(105);
}
sayhi();

79
xmake.lua Normal file
View file

@ -0,0 +1,79 @@
add_rules("mode.debug", "mode.release")
add_requires("stc")
target("compiler-c")
set_kind("binary")
add_files("src/*.c")
add_packages("stc")
set_rundir(".")
--
-- If you want to known more usage about xmake, please see https://xmake.io
--
-- ## FAQ
--
-- You can enter the project directory firstly before building project.
--
-- $ cd projectdir
--
-- 1. How to build project?
--
-- $ xmake
--
-- 2. How to configure project?
--
-- $ xmake f -p [macosx|linux|iphoneos ..] -a [x86_64|i386|arm64 ..] -m [debug|release]
--
-- 3. Where is the build output directory?
--
-- The default output directory is `./build` and you can configure the output directory.
--
-- $ xmake f -o outputdir
-- $ xmake
--
-- 4. How to run and debug target after building project?
--
-- $ xmake run [targetname]
-- $ xmake run -d [targetname]
--
-- 5. How to install target to the system directory or other output directory?
--
-- $ xmake install
-- $ xmake install -o installdir
--
-- 6. Add some frequently-used compilation flags in xmake.lua
--
-- @code
-- -- add debug and release modes
-- add_rules("mode.debug", "mode.release")
--
-- -- add macro definition
-- add_defines("NDEBUG", "_GNU_SOURCE=1")
--
-- -- set warning all as error
-- set_warnings("all", "error")
--
-- -- set language: c99, c++11
-- set_languages("c99", "c++11")
--
-- -- set optimization: none, faster, fastest, smallest
-- set_optimize("fastest")
--
-- -- add include search directories
-- add_includedirs("/usr/include", "/usr/local/include")
--
-- -- add link libraries and search directories
-- add_links("tbox")
-- add_linkdirs("/usr/local/lib", "/usr/lib")
--
-- -- add system link libraries
-- add_syslinks("z", "pthread")
--
-- -- add compilation and link flags
-- add_cxflags("-stdnolib", "-fno-strict-aliasing")
-- add_ldflags("-L/usr/local/lib", "-lpthread", {force = true})
--
-- @endcode
--