This commit is contained in:
r4 2021-12-21 01:18:22 +01:00
parent ab1887c28d
commit 21694f98ac
20 changed files with 1615 additions and 0 deletions

34
Makefile Normal file
View File

@ -0,0 +1,34 @@
CFLAGS = -ggdb -std=c11 -Wall -Wextra -pedantic -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition
#CFLAGS = -pg -std=c11 -Wall -Wextra -pedantic -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition
#CFLAGS = -O3 -std=c11 -Wall -Wextra -pedantic -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition
LDFLAGS = -lm
SOURCE = main.c util.c tok.c lex.c ir.c parse.c runtime.c vm.c map.c
HEADERS = util.h tok.h lex.h ir.h parse.h runtime.h vm.h map.h
EXE = main
OBJ = $(SOURCE:.c=.o)
$(EXE): $(OBJ)
$(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
%.o: %.c
$(CC) -c -o $@ $< $(CFLAGS)
deps.mk: $(SOURCE) $(HEADERS)
@echo "# Automatically generated by $(CC) -MM." > $@
$(CC) -MM $(SOURCE) >> $@
map_test: map_test.c util.c map.c
$(CC) -o $@ $< $(CFLAGS) $(LDFLAGS)
run_map_test: map_test
valgrind ./map_test
.PHONY: clean
clean:
rm -f $(OBJ) $(EXE) deps.mk gmon.out map_test
ifneq ($(MAKECMDGOALS),clean)
include deps.mk
endif

16
example.script Normal file
View File

@ -0,0 +1,16 @@
a := 1
b := 1 - 2 * 2 + 5
c := a + b * 2 * b
d := a + 4 * b * a
/*x := 1
y := 1
i := 60
while i {
z := x + y
y = x
x = z
print(z)
i = i - 1
}*/

104
ir.c Normal file
View File

@ -0,0 +1,104 @@
#include "ir.h"
#include <stdio.h>
#include <stdlib.h>
const char *irinstr_str[IRInstrEnumSize] = {
[IRSet] = "set",
[IRNeg] = "neg",
[IRAdd] = "add",
[IRSub] = "sub",
[IRMul] = "mul",
[IRDiv] = "div",
[IRPrint] = "print",
[IRJnz] = "jnz",
};
#define IRTOKS_INIT_CAP 4096
void irtoks_init(IRToks *v) {
v->toks = malloc(sizeof(IRTok) * IRTOKS_INIT_CAP);
v->len = 0;
v->cap = IRTOKS_INIT_CAP;
}
void irtoks_term(IRToks *v) {
for (size_t i = 0; i < v->len; i++) {
if (v->toks[i].instr == IRPrint) {
for (IRArgs *a = v->toks[i].Print.args; a != NULL;) {
IRArgs *next = a->next;
free(a);
a = next;
}
}
}
free(v->toks);
}
void irtoks_app(IRToks *v, IRTok t) {
if (v->len+1 > v->cap)
v->toks = realloc(v->toks, sizeof(IRTok) * (v->cap *= 2));
v->toks[v->len++] = t;
}
static void print_val(const Value *v);
static void print_irparam(const IRParam *p);
static void print_val(const Value *v) {
switch (v->type.kind) {
case TypeFloat:
printf("%f", v->Float);
break;
case TypeInt:
printf("%zd", v->Int);
break;
default:
printf("(unknown type)");
break;
}
}
static void print_irparam(const IRParam *p) {
if (p->kind == IRParamLiteral) {
print_val(&p->Literal);
} else if (p->kind == IRParamAddr) {
printf("%%%zd", p->Addr);
}
}
void print_ir(IRToks *v) {
for (size_t i = 0; i < v->len; i++) {
printf("%s", irinstr_str[v->toks[i].instr]);
switch (v->toks[i].instr) {
case IRSet:
case IRNeg:
printf(" %%%zu ", v->toks[i].Unary.addr);
print_irparam(&v->toks[i].Unary.val);
break;
case IRAdd:
case IRSub:
case IRDiv:
case IRMul:
printf(" %%%zu ", v->toks[i].Arith.addr);
print_irparam(&v->toks[i].Arith.lhs);
printf(" ");
print_irparam(&v->toks[i].Arith.rhs);
break;
case IRPrint:
for (IRArgs *a = v->toks[i].Print.args; a != NULL; a = a->next) {
printf(" ");
print_irparam(&a->param);
}
break;
case IRJnz:
printf(" ");
print_irparam(&v->toks[i].CJmp.condition);
printf(" %zu", v->toks[i].CJmp.iaddr);
break;
default:
break;
}
printf(" ; %zu:%zu", v->toks[i].ln, v->toks[i].col);
printf("\n");
}
}

78
ir.h Normal file
View File

@ -0,0 +1,78 @@
#ifndef __IR_H__
#define __IR_H__
#include "tok.h"
enum IRInstr {
IRSet,
IRNeg,
IRAdd,
IRSub,
IRMul,
IRDiv,
IRPrint,
IRJnz,
IRInstrEnumSize,
};
typedef enum IRInstr IRInstr;
extern const char *irinstr_str[IRInstrEnumSize];
typedef struct IRParam {
enum {
IRParamNull = 0,
IRParamLiteral,
IRParamAddr,
} kind;
union {
Value Literal;
size_t Addr;
};
} IRParam;
typedef struct IRArgs {
struct IRArgs *next;
IRParam param;
} IRArgs;
typedef struct IRTok {
size_t ln, col;
IRInstr instr;
union {
struct {
size_t addr;
IRParam val;
} Unary;
struct {
size_t addr;
IRParam lhs, rhs;
} Arith;
struct {
IRArgs *args;
size_t args_size;
} Print;
struct {
size_t iaddr;
IRParam condition;
} CJmp;
};
} IRTok;
typedef struct IRToks {
size_t len, cap;
IRTok *toks;
} IRToks;
void irtoks_init(IRToks *v);
void irtoks_term(IRToks *v);
void irtoks_app(IRToks *v, IRTok t);
void print_ir(IRToks *v);
#endif /* IR_H */

232
lex.c Normal file
View File

@ -0,0 +1,232 @@
#include "lex.h"
#include "util.h"
typedef struct Pos {
size_t ln, col; /* current position */
size_t m_ln, m_col; /* marked position */
} Pos;
static void consume(Pos *p, char c);
static void emit(TokList *toks, const Pos *p, Tok t);
static void mark(Pos *p);
static void mark_err(const Pos *p);
static void consume(Pos *p, char c) {
if (c == '\n') {
p->ln++;
p->col = 1;
} else
p->col++;
}
static void emit(TokList *toks, const Pos *p, Tok t) {
t.ln = p->m_ln;
t.col = p->m_col;
toklist_append(toks, t);
}
static void mark(Pos *p) {
p->m_ln = p->ln;
p->m_col = p->col;
}
static void mark_err(const Pos *p) {
err_ln = p->m_ln;
err_col = p->m_col;
}
TokList lex(const char *s) {
TokList toks;
toklist_init(&toks);
Pos pos = { .ln = 1, .col = 1 };
for (;;) {
mark(&pos);
mark_err(&pos);
if (IS_ALPHA(s[0])) {
size_t i = 1;
const char *start = s;
consume(&pos, *(s++));
while (IS_ALNUM(s[0])) {
consume(&pos, *(s++));
i++;
}
if (streq_0_n("if", start, i))
emit(&toks, &pos, (Tok){ .kind = TokIf });
else if (streq_0_n("while", start, i))
emit(&toks, &pos, (Tok){ .kind = TokWhile });
else {
emit(&toks, &pos, (Tok){
.kind = TokIdent,
.Ident = {
.kind = IdentName,
.Name = sndup(start, i),
},
});
}
continue;
}
if (IS_NUM(s[0]) || s[0] == '.') {
const char *start = s;
size_t base = 10;
bool num_end = false;
bool is_float = false;
if (s[0] == '0') {
consume(&pos, *(s++));
if (s[0] == 'x' || s[0] == 'X') {
base = 16;
consume(&pos, *(s++));
start = s;
} else if (s[0] == 'b' || s[0] == 'B') {
base = 2;
consume(&pos, *(s++));
start = s;
} else if (!IS_NUM(s[0]) && s[0] != '.')
num_end = true;
}
if (!num_end) {
for (;;) {
if (s[0] == '.') {
if (is_float) {
mark(&pos);
mark_err(&pos);
set_err("Too many decimal points in number");
return toks;
}
if (base != 10) {
set_err("Only decimal floats are supported");
return toks;
}
is_float = true;
} else if (!IS_ALNUM(s[0]))
break;
consume(&pos, *(s++));
}
}
if (is_float) {
ssize_t endpos;
double num = stod(start, s - start, &endpos);
if (endpos != -1) {
err_col += endpos;
set_err("Invalid decimal float character: '%c'", start[endpos]);
return toks;
}
emit(&toks, &pos, (Tok){
.kind = TokVal,
.Val = {
.type = {
.kind = TypeFloat,
},
.Float = num,
},
});
} else {
ssize_t endpos;
intmax_t num = stoimax(start, s - start, base, &endpos);
if (endpos != -1) {
err_col += endpos;
set_err("Invalid base %zu numerical character: '%c'", base, start[endpos]);
return toks;
}
emit(&toks, &pos, (Tok){
.kind = TokVal,
.Val = {
.type = {
.kind = TypeInt,
},
.Int = num,
},
});
}
continue;
}
switch (s[0]) {
case 0:
goto end_of_file;
case ' ':
case '\t':
break;
case '\n':
emit(&toks, &pos, (Tok){
.kind = TokOp,
.Op = OpNewLn,
});
break;
case ':':
consume(&pos, *(s++));
if (s[0] == '=') {
emit(&toks, &pos, (Tok){ .kind = TokDeclare });
} else {
set_err("Expected ':='");
return toks;
}
break;
case '=':
emit(&toks, &pos, (Tok){ .kind = TokAssign });
break;
case '{':
case '}':
case '(':
case ')':
case ',':
case '+':
case '-':
case '*':
emit(&toks, &pos, (Tok){
.kind = TokOp,
.Op = s[0],
});
break;
case '/':
consume(&pos, *(s++));
if (s[0] == '/') {
consume(&pos, *(s++));
while (s[0] != '\n') {
if (s[0] == 0)
goto end_of_file;
consume(&pos, *(s++));
}
} else if (s[0] == '*') {
size_t depth = 1;
while (depth) {
consume(&pos, *(s++));
if (s[0] == '/') {
consume(&pos, *(s++));
if (s[0] == '*')
depth++;
} else if (s[0] == '*') {
consume(&pos, *(s++));
if (s[0] == '/')
depth--;
} else if (s[0] == 0) {
set_err("Unclosed comment");
return toks;
}
}
consume(&pos, *(s++));
} else {
emit(&toks, &pos, (Tok){
.kind = TokOp,
.Op = '/',
});
}
continue;
default:
set_err("Unrecognized character: '%c'", s[0]);
return toks;
}
consume(&pos, *(s++));
}
end_of_file:
emit(&toks, &pos, (Tok){
.kind = TokOp,
.Op = OpEOF,
});
return toks;
}

8
lex.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef __LEX_H__
#define __LEX_H__
#include "tok.h"
TokList lex(const char *s);
#endif /* LEX_H */

104
main.c Normal file
View File

@ -0,0 +1,104 @@
#include <errno.h>
#include <math.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ir.h"
#include "lex.h"
#include "parse.h"
#include "util.h"
static void usage(const char *prgname);
static void die(const char *fmt, ...);
static void usage(const char *prgname) {
fprintf(stderr, "Usage:\n"
" %s [OPTIONS] <FILENAME>\n"
"Options:\n"
" -emit-tokens\n"
" -emit-ir\n"
" -dry -- don't execute the script (just process it)\n"
, prgname);
}
static void die(const char *fmt, ...) {
fprintf(stderr, C_IRED "Error: " C_RESET);
va_list va;
va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);
exit(1);
}
int main(int argc, const char **argv) {
/* parse arguments */
size_t nargs = argc - 1;
const char *prgname = argv[0];
const char **args = argv + 1;
bool opt_emit_tokens = false;
bool opt_emit_ir = false;
bool opt_dry = false;
const char *filename = NULL;
for (size_t i = 0; i < nargs; i++) {
if (args[i][0] == '-') {
if (streq(args[i], "-h") || streq(args[i], "-help") || streq(args[i], "--help")) {
usage(prgname);
return 0;
} else if (streq(args[i], "-emit-ir"))
opt_emit_ir = true;
else if (streq(args[i], "-emit-tokens"))
opt_emit_tokens = true;
else if (streq(args[i], "-dry"))
opt_dry = true;
else {
die("Unknown option: %s\n", args[i]);
}
} else {
if (filename) {
die("Filename already set to '%s'\n", filename);
}
filename = args[i];
}
}
if (!filename) {
die("Please specify a filename\n");
}
/* read source file */
FILE *fp = fopen(filename, "r");
if (!fp) {
die("Failed to open '%s': %s\n", filename, strerror(errno));
}
char *file = mreadfile(fp);
if (!file) {
fclose(fp);
die("Failed to read '%s': %s\n", filename, strerror(errno));
}
fclose(fp);
/* lex source file */
TokList tokens = lex(file);
if (err) {
toklist_term(&tokens);
free(file);
fprintf(stderr, C_IRED "Lexer error" C_RESET " in " C_CYAN "%s" C_RESET ":%zu:%zu: %s\n", filename, err_ln, err_col, errbuf);
return 1;
}
free(file);
if (opt_emit_tokens)
print_toks(&tokens);
/* parse tokens into IR code */
IRToks ir = parse(&tokens);
if (err) {
irtoks_term(&ir);
toklist_term(&tokens);
fprintf(stderr, C_IRED "Parser error" C_RESET " in " C_CYAN "%s" C_RESET ":%zu:%zu: %s\n", filename, err_ln, err_col, errbuf);
return 1;
}
toklist_term(&tokens);
if (opt_emit_ir)
print_ir(&ir);
/* run the IR */
/* TODO... */
irtoks_term(&ir);
}

103
map.c Normal file
View File

@ -0,0 +1,103 @@
#include "map.h"
#include <stdlib.h>
#include "util.h"
#define MAP_INITIAL_CAP 32 /* must be a power of 2 in this implementation */
#define MAP_REHASH_THRESHOLD 0.7
static void init_with_cap(Map *m, size_t val_size, size_t cap);
static void rehash(Map *m);
static void init_with_cap(Map *m, size_t val_size, size_t cap) {
m->len = 0;
m->cap = cap;
m->val_size = val_size;
void *data = malloc(sizeof(MapSlot) * cap + val_size * cap);
m->slots = data;
m->vals = m->slots + cap;
for (size_t i = 0; i < cap; i++) {
m->slots[i] = (MapSlot){
.empty = true,
};
}
}
static void rehash(Map *m) {
size_t old_cap = m->cap;
MapSlot *old_slots = m->slots;
void *old_vals = m->vals;
init_with_cap(m, m->val_size, m->cap * 2);
for (size_t i = 0; i < old_cap; i++) {
if (!old_slots[i].empty) {
map_insert(m, old_slots[i].key, (uint8_t*)old_vals + m->val_size * i);
if (old_slots[i].heap_alloc)
free(old_slots[i].key);
/* TODO: Don't reallocate big keys. */
}
}
free(old_slots);
}
void map_init(Map *m, size_t val_size) {
init_with_cap(m, val_size, MAP_INITIAL_CAP);
}
void map_term(Map *m) {
for (size_t i = 0; i < m->cap; i++) {
if (!m->slots[i].empty && m->slots[i].heap_alloc)
free(m->slots[i].key);
}
free(m->slots);
}
bool map_insert(Map *m, const char *key, const void *val) {
if ((double)m->len / (double)m->cap > MAP_REHASH_THRESHOLD)
rehash(m);
size_t idx = fnv1a32(key, strlen(key)) & (m->cap - 1);
bool replaced;
for (;;) {
if (m->slots[idx].empty) {
replaced = false;
m->len++;
break;
} else {
if (streq(m->slots[idx].key, key)) {
replaced = true;
break;
} else {
if (++idx == m->cap) idx = 0;
}
}
}
m->slots[idx].empty = false;
size_t keylen = strlen(key);
if (keylen <= MAP_SMALLKEY_SIZE-1) {
strcpy(m->slots[idx].smallkey, key);
m->slots[idx].key = m->slots[idx].smallkey;
m->slots[idx].heap_alloc = false;
} else {
m->slots[idx].key = sndup(key, keylen);
m->slots[idx].heap_alloc = true;
}
memcpy((uint8_t*)m->vals + m->val_size * idx, val, m->val_size);
return replaced;
}
bool map_get(Map *m, const char *key, void *out_val) {
size_t idx = fnv1a32(key, strlen(key)) & (m->cap - 1);
for (;;) {
if (m->slots[idx].empty)
return false;
else {
if (streq(m->slots[idx].key, key)) {
if (out_val)
memcpy(out_val, (uint8_t*)m->vals + m->val_size * idx, m->val_size);
return true;
} else {
if (++idx == m->cap) idx = 0;
}
}
}
}

31
map.h Normal file
View File

@ -0,0 +1,31 @@
#ifndef __MAP_H__
#define __MAP_H__
#include <stdbool.h>
#include <stddef.h>
#define MAP_SMALLKEY_SIZE 32
typedef struct MapSlot {
bool empty : 1;
bool heap_alloc : 1;
char *key;
char smallkey[MAP_SMALLKEY_SIZE]; /* reduce unneeded mallocs */
} MapSlot;
typedef struct Map {
size_t len, cap; /* len: used slots; cap: available slots */
MapSlot *slots;
size_t val_size;
void *vals;
} Map;
void map_init(Map *m, size_t val_size);
void map_term(Map *m);
/* returns true if the value was replaced */
bool map_insert(Map *m, const char *key, const void *val);
/* Returns true if the key was found, returns false if it wasn't found.
* out_val may be set to NULL. */
bool map_get(Map *m, const char *key, void *out_val);
#endif /* MAP_H */

38
map_test.c Normal file
View File

@ -0,0 +1,38 @@
#include <assert.h>
#include "map.c"
#include "util.c"
int main(void) {
Map m;
map_init(&m, sizeof(size_t));
size_t a = 2;
map_insert(&m, "test", &a);
a = 55;
map_insert(&m, "some super long string that is definitely over thirty-one characters long", &a);
#define test_key(key, should_exist, expected_value) { \
size_t v; \
bool exists = map_get(&m, key, &v); \
assert(should_exist == exists); \
if (should_exist) \
assert(v == expected_value); \
}
test_key("test", true, 2);
test_key("test1", false, 0);
test_key("some super long string that is definitely over thirty-one characters long", true, 55);
for (size_t i = 0; i < 999; i++) {
char buf[32];
sprintf(buf, "number: %zu", i);
map_insert(&m, buf, &i);
}
for (size_t i = 0; i < 999; i++) {
char buf[32];
sprintf(buf, "number: %zu", i);
test_key(buf, true, i);
}
test_key("test", true, 2);
test_key("test1", false, 0);
assert(m.len == 1001);
map_term(&m);
printf("Passed map test!\n");
}

345
parse.c Normal file
View File

@ -0,0 +1,345 @@
#include "parse.h"
#include <stdbool.h>
#include "map.h"
#include "runtime.h"
typedef struct State {
TokList *toks;
IRToks *ir;
} State;
typedef struct Scope {
struct Scope *parent;
size_t mem_addr;
bool has_idents;
Map ident_addrs;
} Scope;
static void mark_err(const Tok *t);
static IRParam tok_to_irparam(Scope *sc, Tok *t);
static Scope make_scope(Scope *parent, size_t mem_addr, bool with_idents);
static void term_scope(Scope *sc);
static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool use_storage_addr, size_t storage_addr);
static void stmt(State *s, Scope *sc, TokListItem *t);
static void mark_err(const Tok *t) {
err_ln = t->ln;
err_col = t->col;
}
static IRParam tok_to_irparam(Scope *sc, Tok *t) {
if (t->kind == TokIdent) {
size_t addr;
if (t->Ident.kind == IdentName) {
bool exists = false;
for (Scope *i = sc; i != NULL; i = i->parent) {
if (!i->has_idents)
continue;
exists = map_get(&i->ident_addrs, t->Ident.Name, &addr);
if (exists)
break;
}
if (!exists) {
mark_err(t);
set_err("Identifier '%s' not recognized in this scope", t->Ident.Name);
return (IRParam){0};
}
} else if (t->Ident.kind == IdentAddr)
addr = t->Ident.Addr;
else
ASSERT_UNREACHED();
return (IRParam){
.kind = IRParamAddr,
.Addr = addr,
};
} else if (t->kind == TokVal) {
return (IRParam){
.kind = IRParamLiteral,
.Literal = t->Val,
};
} else
ASSERT_UNREACHED();
}
/* term_scope doesn't have to be called if with_idents is set to false. */
static Scope make_scope(Scope *parent, size_t mem_addr, bool with_idents) {
Scope s = { .parent = parent, .mem_addr = mem_addr, .has_idents = with_idents };
if (with_idents)
map_init(&s.ident_addrs, sizeof(size_t));
return s;
}
static void term_scope(Scope *sc) {
if (sc->has_idents)
map_term(&sc->ident_addrs);
}
/* If toplevel is set, newlines are seen as delimiters ending the expression.
* If use_storage_addr is set, the result is guaranteed to be put into storage_addr. */
static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool use_storage_addr, size_t storage_addr) {
/* A simplified example of how the operator precedence parsing works:
* ________________________________
* Where t points to (between l_op and r_op in each step)
* |
* v
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '+' is higher than that of the front delimiter => move forward
* ________________________________
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '*' is higher than that of '+' => move forward
* ________________________________
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '\n' (a delimiter) is lower than that of '*' => evaluate and move l_op 2 back
* ________________________________
* 5 + 4 \n
* ^ ^
* | |
* l_op r_op
* precedence of '\n' (a delimiter) is lower than that of '+' => evaluate and move l_op 2 back
* ________________________________
* 9 \n
* ^ ^
* | |
* l_op r_op
* both l_op and r_op are delimiters (their precedence is PREC_DELIM) => done
*/
TokListItem *start = t;
Scope *sc = parent_sc;
Scope expr_scope_obj;
if (toplevel) {
expr_scope_obj = make_scope(parent_sc, parent_sc->mem_addr, false);
sc = &expr_scope_obj;
}
for (;;) {
/* Prepare to collapse negative factor. */
bool negate = false;
if (t->tok.kind == TokOp && t->tok.Op == OpSub) {
t = t->next;
negate = true;
}
/* Ignore newlines if the expression is not toplevel. */
if (!toplevel && t->next->tok.kind == TokOp && t->next->tok.Op == OpNewLn)
toklist_del(s->toks, t->next, t->next);
/* Collapse negative factor. */
if (negate) {
bool is_last_operation = t->prev == start && t->next->tok.kind == TokOp && op_prec[t->next->tok.Op] == PREC_DELIM;
Tok *v = &t->tok;
t = t->prev;
toklist_del(s->toks, t->next, t->next);
if (v->kind == TokVal) {
/* immediately negate value */
t->tok.kind = TokVal;
t->tok.Val.type.kind = v->Val.type.kind;
switch (v->Val.type.kind) {
case TypeInt: t->tok.Val.Int = -v->Val.Int; break;
case TypeFloat: t->tok.Val.Float = -v->Val.Float; break;
default: ASSERT_UNREACHED();
}
} else {
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (use_storage_addr && is_last_operation)
res_addr = storage_addr;
else
res_addr = sc->mem_addr++;
/* add IR instruction to negate the value */
IRParam v_irparam;
TRY(v_irparam = tok_to_irparam(sc, v));
irtoks_app(s->ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRNeg,
.Unary = {
.addr = res_addr,
.val = v_irparam,
},
});
/* leave new memory address as result */
t->tok.kind = TokIdent;
t->tok.Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
if (use_storage_addr && is_last_operation)
/* Since the final result was written to the storage address,
* we're done. */
return;
}
}
/* Find out operator precedence of l_op and r_op. */
int8_t l_op_prec;
Tok *l_op;
if (t == start) {
l_op_prec = PREC_DELIM;
l_op = NULL;
} else {
l_op = &t->prev->tok;
if (l_op->kind != TokOp) {
mark_err(l_op);
set_err("Expected operator");
return;
}
l_op_prec = op_prec[l_op->Op];
}
int8_t r_op_prec;
Tok *r_op = &t->next->tok;
if (r_op->kind != TokOp) {
mark_err(r_op);
set_err("Expected operator");
return;
}
r_op_prec = op_prec[r_op->Op];
/* If l_op and r_op are both delimiters, the expression is fully evaluated.
* NOTE: Sometimes, we don't reach this point because the function already
* exits directly after the last operation. */
if (l_op_prec == PREC_DELIM && r_op_prec == PREC_DELIM) {
IRParam res;
TRY(res = tok_to_irparam(sc, &t->tok));
irtoks_app(s->ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRSet,
.Unary = {
.addr = use_storage_addr ? storage_addr : sc->mem_addr++,
.val = res,
},
});
toklist_del(s->toks, t, t);
return;
}
bool is_last_operation = t->prev && t->prev->prev == start && r_op_prec == PREC_DELIM;
/* This is the actual operator precedence parser as described above. */
if (r_op_prec > l_op_prec)
t = t->next->next;
else {
/* some basic checks */
Tok *rhs = &t->tok;
if (rhs->kind != TokVal && rhs->kind != TokIdent) {
mark_err(rhs);
set_err("Expected literal or identifier");
return;
}
t = t->prev->prev;
Tok *lhs = &t->tok;
if (lhs->kind != TokVal && lhs->kind != TokIdent) {
mark_err(lhs);
set_err("Expected literal or identifier");
return;
}
/* delete the tokens that fall away from collapsing the expression
* (NOTE: only their references are deleted here, that's important
* because we're still using their values later on) */
toklist_del(s->toks, t->next, t->next->next);
IRInstr instr;
switch (l_op->Op) {
case OpAdd: instr = IRAdd; break;
case OpSub: instr = IRSub; break;
case OpMul: instr = IRMul; break;
case OpDiv: instr = IRDiv; break;
default:
mark_err(l_op);
set_err("Unknown operation: '%s'", op_str[l_op->Op]);
return;
}
if (lhs->kind == TokVal && rhs->kind == TokVal) {
/* evaluate the constant expression immediately */
lhs->kind = TokVal;
TRY(lhs->Val = eval_arith(instr, &lhs->Val, &rhs->Val));
} else {
IRParam lhs_irparam, rhs_irparam;
TRY(lhs_irparam = tok_to_irparam(sc, lhs));
TRY(rhs_irparam = tok_to_irparam(sc, rhs));
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (use_storage_addr && is_last_operation)
res_addr = storage_addr;
else
res_addr = sc->mem_addr++;
/* emit IR code to evaluate the non-constant expression */
irtoks_app(s->ir, (IRTok){
.ln = l_op->ln,
.col = l_op->col,
.instr = instr,
.Arith = {
.addr = res_addr,
.lhs = lhs_irparam,
.rhs = rhs_irparam,
},
});
/* leave new memory address as result */
lhs->kind = TokIdent;
lhs->Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
if (use_storage_addr && is_last_operation)
/* Since the final result was written to the storage address,
* we're done. */
return;
}
}
}
}
static void stmt(State *s, Scope *sc, TokListItem *t) {
TokListItem *start = t;
if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName) {
char *name = t->tok.Ident.Name;
t = t->next;
if (t->tok.kind == TokDeclare) {
size_t addr = sc->mem_addr++;
bool replaced = map_insert(&sc->ident_addrs, name, &addr);
if (replaced) {
mark_err(&start->tok);
set_err("'%s' already declared in this scope", name);
return;
}
t = t->next;
TRY(expr(s, sc, t, true, true, addr));
}
}
toklist_del(s->toks, start, t);
}
IRToks parse(TokList *toks) {
IRToks ir;
irtoks_init(&ir);
State s = { .toks = toks, .ir = &ir };
Scope global_scope = make_scope(NULL, 0, true);
for (;;) {
if (toks->begin->tok.kind == TokOp && toks->begin->tok.Op == OpEOF)
break;
TRY_RET(stmt(&s, &global_scope, toks->begin), ir);
}
term_scope(&global_scope);
return ir;
}

10
parse.h Normal file
View File

@ -0,0 +1,10 @@
#ifndef __PARSE_H__
#define __PARSE_H__
#include "ir.h"
#include "tok.h"
#include "util.h"
IRToks parse(TokList *toks);
#endif /* PARSE_H */

57
runtime.c Normal file
View File

@ -0,0 +1,57 @@
#include "runtime.h"
#include "util.h"
Value eval_arith(IRInstr instr, const Value *lhs, const Value *rhs) {
switch (instr) {
case IRAdd:
case IRSub:
case IRMul:
case IRDiv: {
if (lhs->type.kind == TypeInt && rhs->type.kind == TypeInt) {
ssize_t res;
switch (instr) {
case IRAdd: res = lhs->Int + rhs->Int; break;
case IRSub: res = lhs->Int - rhs->Int; break;
case IRMul: res = lhs->Int * rhs->Int; break;
case IRDiv: res = lhs->Int / rhs->Int; break;
default: break;
}
return (Value){
.type.kind = TypeInt,
.Int = res,
};
} else if (lhs->type.kind == TypeFloat && rhs->type.kind == TypeFloat) {
float res;
switch (instr) {
case IRAdd: res = lhs->Float + rhs->Float; break;
case IRSub: res = lhs->Float - rhs->Float; break;
case IRMul: res = lhs->Float * rhs->Float; break;
case IRDiv: res = lhs->Float / rhs->Float; break;
default: break;
}
return (Value){
.type.kind = TypeFloat,
.Float = res,
};
} else {
set_err("Unsupported types for operation '%s'", irinstr_str[instr]);
return (Value){0};
}
}
default:
ASSERT_UNREACHED();
}
return (Value){0};
}
Value zero_val(Type ty) {
Value ret;
ret.type = ty;
switch (ty.kind) {
case TypeInt: ret.Int = 0; break;
case TypeFloat: ret.Float = 0.0; break;
default: ASSERT_UNREACHED();
}
return ret;
}

9
runtime.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef __RUNTIME_H__
#define __RUNTIME_H__
#include "ir.h"
Value eval_arith(IRInstr instr, const Value *lhs, const Value *rhs);
Value zero_val(Type ty);
#endif /* RUNTIME_H */

142
tok.c Normal file
View File

@ -0,0 +1,142 @@
#include "tok.h"
#include <stdio.h>
#include <stdlib.h>
#include "util.h"
int8_t op_prec[OperatorEnumSize] = {
[OpEOF] = PREC_DELIM,
[OpNewLn] = PREC_DELIM,
[OpLCurl] = PREC_DELIM,
[OpRParen] = PREC_DELIM,
[OpComma] = PREC_DELIM,
[OpAdd] = 0,
[OpSub] = 0,
[OpMul] = 1,
[OpDiv] = 1,
};
const char *op_str[OperatorEnumSize] = {
[OpLCurl] = "{",
[OpRCurl] = "}",
[OpLParen] = "(",
[OpRParen] = ")",
[OpComma] = ",",
[OpAdd] = "+",
[OpSub] = "-",
[OpMul] = "*",
[OpDiv] = "/",
[OpNewLn] = "\\n",
[OpEOF] = "EOF",
};
const char *tok_str[TokKindEnumSize] = {
[TokAssign] = "=",
[TokDeclare] = ":=",
[TokIf] = "if",
[TokWhile] = "while",
};
#define TOKLIST_MEMPOOL_INIT_CAP 4096
static inline TokListItem *toklist_alloc_item(TokList *l) {
if (l->curr_mempool_cap < l->mempool_sizes[l->curr_mempool]+1) {
if (l->curr_mempool+1 >= 32)
ASSERT_UNREACHED();
l->curr_mempool++;
l->curr_mempool_cap *= 2;
l->mempool_sizes[l->curr_mempool] = 0;
l->mempools[l->curr_mempool] = malloc(sizeof(TokListItem) * l->curr_mempool_cap);
}
TokListItem *itm = l->mempools[l->curr_mempool] + l->mempool_sizes[l->curr_mempool]++;
itm->prev = itm->next = NULL;
return itm;
}
void toklist_init(TokList *l) {
l->begin = l->end = NULL;
l->curr_mempool = 0;
l->mempools[l->curr_mempool] = malloc(sizeof(TokListItem) * TOKLIST_MEMPOOL_INIT_CAP);
l->curr_mempool_cap = TOKLIST_MEMPOOL_INIT_CAP;
l->mempool_sizes[0] = 0;
}
void toklist_term(TokList *l) {
for (size_t i = 0; i <= l->curr_mempool; i++) {
for (size_t j = 0; j < l->mempool_sizes[i]; j++) {
TokListItem *itm = &l->mempools[i][j];
if (itm->tok.kind == TokIdent && itm->tok.Ident.kind == IdentName) {
free(itm->tok.Ident.Name);
}
}
free(l->mempools[i]);
}
}
void toklist_append(TokList *l, Tok t) {
TokListItem *itm = toklist_alloc_item(l);
itm->tok = t;
if (l->begin == NULL) {
l->begin = l->end = itm;
return;
}
l->end->next = itm;
itm->prev = l->end;
l->end = itm;
}
void toklist_del(TokList *l, TokListItem *from, TokListItem *to) {
if (from == l->begin) {
l->begin = to->next;
if (to->next)
to->next->prev = NULL;
} else
from->prev->next = to->next;
if (to == l->end) {
l->end = from->prev;
if (from->prev)
from->prev->next = NULL;
} else
to->next->prev = from->prev;
}
void print_toks(TokList *l) {
for (TokListItem *i = l->begin; i != NULL; i = i->next) {
printf("( ");
switch (i->tok.kind) {
case TokOp:
printf(C_IYELLOW "Op" C_RESET);
printf(": " C_ICYAN "%s" C_RESET, op_str[i->tok.Op]);
break;
case TokVal:
printf(C_IYELLOW "Val" C_RESET);
switch (i->tok.Val.type.kind) {
case TypeFloat:
printf(": " C_ICYAN "%f" C_RESET, i->tok.Val.Float);
break;
case TypeInt:
printf(": " C_ICYAN "%zd" C_RESET, i->tok.Val.Int);
break;
default:
printf(" " C_ICYAN "(unknown type)" C_RESET);
break;
}
break;
case TokIdent:
printf(C_IYELLOW "Ident" C_RESET);
if (i->tok.Ident.kind == IdentName)
printf(": " C_ICYAN "Name" C_RESET ": " C_IGREEN "'%s'" C_RESET, i->tok.Ident.Name);
else if (i->tok.Ident.kind == IdentAddr)
printf(": " C_ICYAN "Addr" C_RESET ": " C_IGREEN "%zu" C_RESET, i->tok.Ident.Addr);
break;
default:
if (tok_str[i->tok.kind]) {
printf(C_IYELLOW "%s" C_RESET, tok_str[i->tok.kind]);
}
}
printf(" | %zu:%zu )\n", i->tok.ln, i->tok.col);
}
}

102
tok.h Normal file
View File

@ -0,0 +1,102 @@
#ifndef __TOK_H__
#define __TOK_H__
#include <stdint.h>
#include <unistd.h>
typedef struct Type {
enum {
TypeVoid = 0,
TypeFloat,
TypeInt,
} kind;
/*union {
};*/
} Type;
typedef struct Value {
Type type;
union {
double Float;
ssize_t Int;
};
} Value;
enum Operator {
OpLCurl = '{',
OpRCurl = '}',
OpLParen = '(',
OpRParen = ')',
OpComma = ',',
OpAdd = '+',
OpSub = '-',
OpMul = '*',
OpDiv = '/',
OpBeginNonchars = 256,
OpNewLn,
OpEOF,
OperatorEnumSize,
};
typedef enum Operator Operator;
#define PREC_DELIM -1
extern int8_t op_prec[OperatorEnumSize];
extern const char *op_str[OperatorEnumSize];
typedef struct Identifier {
enum {
IdentName,
IdentAddr,
} kind;
union {
char *Name;
size_t Addr;
};
} Identifier;
typedef struct Tok {
size_t ln, col;
enum {
TokOp,
TokVal,
TokIdent,
TokAssign,
TokDeclare,
TokIf,
TokWhile,
TokKindEnumSize,
} kind;
union {
Operator Op;
Value Val;
Identifier Ident;
};
} Tok;
extern const char *tok_str[TokKindEnumSize];
typedef struct TokListItem {
struct TokListItem *prev, *next;
Tok tok;
} TokListItem;
typedef struct TokList {
TokListItem *begin, *end;
TokListItem *mempools[32]; /* few mallocs, no copying => much speed */
size_t mempool_sizes[32];
size_t curr_mempool_cap;
size_t curr_mempool;
} TokList;
void toklist_init(TokList *l);
void toklist_term(TokList *l);
void toklist_append(TokList *l, Tok t);
void toklist_del(TokList *l, TokListItem *from, TokListItem *to);
void print_toks(TokList *l);
#endif /* TOK_H */

138
util.c Normal file
View File

@ -0,0 +1,138 @@
#include "util.h"
#include <stdarg.h>
char errbuf[ERRSZ];
bool err;
size_t err_ln, err_col;
static intmax_t stoimax_digits[256] = {
[0] = -1, [1] = -1, [2] = -1, [3] = -1, [4] = -1, [5] = -1, [6] = -1, [7] = -1,
[8] = -1, [9] = -1, [10] = -1, [11] = -1, [12] = -1, [13] = -1, [14] = -1, [15] = -1,
[16] = -1, [17] = -1, [18] = -1, [19] = -1, [20] = -1, [21] = -1, [22] = -1, [23] = -1,
[24] = -1, [25] = -1, [26] = -1, [27] = -1, [28] = -1, [29] = -1, [30] = -1, [31] = -1,
[32] = -1, [33] = -1, [34] = -1, [35] = -1, [36] = -1, [37] = -1, [38] = -1, [39] = -1,
[40] = -1, [41] = -1, [42] = -1, [43] = -1, [44] = -1, [45] = -1, [46] = -1, [47] = -1,
['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, ['5'] = 5, ['6'] = 6, ['7'] = 7,
['8'] = 8, ['9'] = 9, [58] = -1, [59] = -1, [60] = -1, [61] = -1, [62] = -1, [63] = -1,
[64] = -1, ['A'] = 10, ['B'] = 11, ['C'] = 12, ['D'] = 13, ['E'] = 14, ['F'] = 15, [71] = -1,
[72] = -1, [73] = -1, [74] = -1, [75] = -1, [76] = -1, [77] = -1, [78] = -1, [79] = -1,
[80] = -1, [81] = -1, [82] = -1, [83] = -1, [84] = -1, [85] = -1, [86] = -1, [87] = -1,
[88] = -1, [89] = -1, [90] = -1, [91] = -1, [92] = -1, [93] = -1, [94] = -1, [95] = -1,
[96] = -1, ['a'] = 10, ['b'] = 11, ['c'] = 12, ['d'] = 13, ['e'] = 14, ['f'] = 15, [103] = -1,
[104] = -1, [105] = -1, [106] = -1, [107] = -1, [108] = -1, [109] = -1, [110] = -1, [111] = -1,
[112] = -1, [113] = -1, [114] = -1, [115] = -1, [116] = -1, [117] = -1, [118] = -1, [119] = -1,
[120] = -1, [121] = -1, [122] = -1, [123] = -1, [124] = -1, [125] = -1, [126] = -1, [127] = -1,
[128] = -1, [129] = -1, [130] = -1, [131] = -1, [132] = -1, [133] = -1, [134] = -1, [135] = -1,
[136] = -1, [137] = -1, [138] = -1, [139] = -1, [140] = -1, [141] = -1, [142] = -1, [143] = -1,
[144] = -1, [145] = -1, [146] = -1, [147] = -1, [148] = -1, [149] = -1, [150] = -1, [151] = -1,
[152] = -1, [153] = -1, [154] = -1, [155] = -1, [156] = -1, [157] = -1, [158] = -1, [159] = -1,
[160] = -1, [161] = -1, [162] = -1, [163] = -1, [164] = -1, [165] = -1, [166] = -1, [167] = -1,
[168] = -1, [169] = -1, [170] = -1, [171] = -1, [172] = -1, [173] = -1, [174] = -1, [175] = -1,
[176] = -1, [177] = -1, [178] = -1, [179] = -1, [180] = -1, [181] = -1, [182] = -1, [183] = -1,
[184] = -1, [185] = -1, [186] = -1, [187] = -1, [188] = -1, [189] = -1, [190] = -1, [191] = -1,
[192] = -1, [193] = -1, [194] = -1, [195] = -1, [196] = -1, [197] = -1, [198] = -1, [199] = -1,
[200] = -1, [201] = -1, [202] = -1, [203] = -1, [204] = -1, [205] = -1, [206] = -1, [207] = -1,
[208] = -1, [209] = -1, [210] = -1, [211] = -1, [212] = -1, [213] = -1, [214] = -1, [215] = -1,
[216] = -1, [217] = -1, [218] = -1, [219] = -1, [220] = -1, [221] = -1, [222] = -1, [223] = -1,
[224] = -1, [225] = -1, [226] = -1, [227] = -1, [228] = -1, [229] = -1, [230] = -1, [231] = -1,
[232] = -1, [233] = -1, [234] = -1, [235] = -1, [236] = -1, [237] = -1, [238] = -1, [239] = -1,
[240] = -1, [241] = -1, [242] = -1, [243] = -1, [244] = -1, [245] = -1, [246] = -1, [247] = -1,
[248] = -1, [249] = -1, [250] = -1, [251] = -1, [252] = -1, [253] = -1, [254] = -1, [255] = -1,
};
void set_err(const char *fmt, ...) {
err = true;
va_list va;
va_start(va, fmt);
vsnprintf(errbuf, ERRSZ, fmt, va);
va_end(va);
}
char *sndup(const char *s, size_t n) {
char *ret = malloc(n+1);
if (ret) {
memcpy(ret, s, n);
ret[n] = 0;
}
return ret;
}
intmax_t stoimax(const char *s, size_t n, size_t base, ssize_t *endpos) {
for (size_t i = 0; i < n; i++) { if (s[i] == 0) { n = i; break; } }
intmax_t res = 0;
intmax_t order = 1;
for (ssize_t i = n - 1; i >= 0; i--) {
intmax_t dig = stoimax_digits[(size_t)s[i]];
if (dig == -1 || (size_t)dig >= base) {
if (endpos)
*endpos = i;
return 0;
}
res += order * dig;
order *= base;
}
if (endpos)
*endpos = -1;
return res;
}
double stod(const char *s, size_t n, ssize_t *endpos) {
for (size_t i = 0; i < n; i++) { if (s[i] == 0) { n = i; break; } }
double res = 0.0;
double order = 1.0;
size_t point_pos = n;
for (size_t i = 0; i < n; i++) { if (s[i] == '.') { point_pos = i; break; } }
for (ssize_t i = point_pos - 1; i >= 0; i--) {
if (!IS_NUM(s[i])) {
if (endpos)
*endpos = i;
return 0.0;
}
double dig = s[i] - '0';
res += order * dig;
order *= 10.0;
}
order = 0.1;
for (size_t i = point_pos + 1; i < n; i++) {
if (!IS_NUM(s[i])) {
if (endpos)
*endpos = i;
return 0.0;
}
double dig = s[i] - '0';
res += order * dig;
order *= 0.1;
}
if (endpos)
*endpos = -1;
return res;
}
char *mreadfile(FILE *fp) {
if (fseek(fp, 0l, SEEK_END) == -1)
return NULL;
long size = ftell(fp);
if (size == -1)
return NULL;
rewind(fp);
char *buf = malloc(size + 1);
if (!buf)
return NULL;
size_t read = fread(buf, size, 1, fp);
if (read != 1) {
free(buf);
return NULL;
}
buf[size] = 0;
return buf;
}
uint32_t fnv1a32(const void *data, size_t n) {
uint32_t res = 2166136261u;
for (size_t i = 0; i < n; i++) {
res ^= ((uint8_t*)data)[i];
res *= 16777619u;
}
return res;
}

59
util.h Normal file
View File

@ -0,0 +1,59 @@
#ifndef __UTIL_H__
#define __UTIL_H__
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/* some ANSI color codes */
#define C_RED "\x1b[31m"
#define C_GREEN "\x1b[32m"
#define C_YELLOW "\x1b[33m"
#define C_BLUE "\x1b[34m"
#define C_MAGENTA "\x1b[35m"
#define C_CYAN "\x1b[36m"
#define C_WHITE "\x1b[37m"
#define C_IRED "\x1b[31;1m"
#define C_IGREEN "\x1b[32;1m"
#define C_IYELLOW "\x1b[33;1m"
#define C_IBLUE "\x1b[34;1m"
#define C_IMAGENTA "\x1b[35;1m"
#define C_IWHITE "\x1b[37;1m"
#define C_ICYAN "\x1b[36;1m"
#define C_RESET "\x1b[m"
#define ERRSZ 4096
extern char errbuf[ERRSZ];
extern bool err;
extern size_t err_ln, err_col;
#define TRY(expr) {expr; if (err) return;}
#define TRY_RET(expr, ret) {expr; if (err) return (ret);}
void set_err(const char *fmt, ...);
#define ASSERT_UNREACHED() { fprintf(stderr, "Illegal code position reached in %s:%d\n", __FILE__, __LINE__); exit(1); }
#define IS_NUM(c) (c >= '0' && c <= '9')
#define IS_ALPHA(c) ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')
#define IS_ALNUM(c) (IS_ALPHA(c) || IS_NUM(c))
#define streq(a, b) (strcmp(a, b) == 0)
/* check if a null-terminated string and a non-null-terminated string are equal */
static inline bool streq_0_n(const char *a, const char *b, size_t bn) { return bn == strlen(a) ? strncmp(a, b, bn) == 0 : false; }
/* a more trusting version of strndup; also for systems that don't have strndup */
char *sndup(const char *s, size_t n);
/* convert a non-null-terminated string to an intmax_t */
intmax_t stoimax(const char *s, size_t n, size_t base, ssize_t *endpos /* -1 on success */);
/* convert a non-null-terminated string to a double */
double stod(const char *s, size_t n, ssize_t *endpos /* -1 on success */);
/* sets errno on failure */
char *mreadfile(FILE *fp);
uint32_t fnv1a32(const void *data, size_t n);
#endif /* UTIL_H */

1
vm.c Normal file
View File

@ -0,0 +1 @@
#include "vm.h"

4
vm.h Normal file
View File

@ -0,0 +1,4 @@
#ifndef __VM_H__
#define __VM_H__
#endif /* VM_H */