Compare commits

...

5 Commits

Author SHA1 Message Date
r4 9f339ed44d make while loops more efficient and improve expression parsing 2021-12-21 17:09:03 +01:00
r4 10d436107c very basic while loop 2021-12-21 15:02:53 +01:00
r4 61d5661b96 fix memory leak on error 2021-12-21 14:04:50 +01:00
r4 005309d1eb IR jmp and scopes 2021-12-21 13:59:08 +01:00
r4 6080901842 catch missing expression 2021-12-21 11:55:53 +01:00
6 changed files with 168 additions and 79 deletions

View File

@ -1,22 +1,11 @@
a := 1
a = a + 1
b := a - 2 * 3
b = 2 + b * a
c := -b
//a := 1
//b := 1 - 2 * 2 + 5
//c := a + b * 2 * b
//d := a + 4 * b * a
/*x := 1
x := 1
y := 1
i := 60
while i {
while i + 1 {
z := x + y
y = x
x = z
print(z)
//print(z)
i = i - 1
}*/
}

7
ir.c
View File

@ -11,6 +11,7 @@ const char *irinstr_str[IRInstrEnumSize] = {
[IRMul] = "mul",
[IRDiv] = "div",
[IRPrint] = "print",
[IRJmp] = "jmp",
[IRJnz] = "jnz",
};
@ -68,6 +69,7 @@ static void print_irparam(const IRParam *p) {
void print_ir(IRToks *v) {
for (size_t i = 0; i < v->len; i++) {
printf("%04zx ", i);
printf("%s", irinstr_str[v->toks[i].instr]);
switch (v->toks[i].instr) {
case IRSet:
@ -90,10 +92,13 @@ void print_ir(IRToks *v) {
print_irparam(&a->param);
}
break;
case IRJmp:
printf(" %zx", v->toks[i].Jmp.iaddr);
break;
case IRJnz:
printf(" ");
print_irparam(&v->toks[i].CJmp.condition);
printf(" %zu", v->toks[i].CJmp.iaddr);
printf(" %zx", v->toks[i].CJmp.iaddr);
break;
default:
break;

5
ir.h
View File

@ -11,6 +11,7 @@ enum IRInstr {
IRMul,
IRDiv,
IRPrint,
IRJmp,
IRJnz,
IRInstrEnumSize,
};
@ -56,6 +57,10 @@ typedef struct IRTok {
IRArgs *args;
size_t args_size;
} Print;
struct {
size_t iaddr;
} Jmp;
struct {
size_t iaddr;

210
parse.c
View File

@ -17,12 +17,25 @@ typedef struct Scope {
Map ident_addrs;
} Scope;
typedef struct ExprMode {
bool ignore_newln;
enum {
ExprModeJustCollapse, /* should leave either a literal or an own address as result */
ExprModeStorageAddr, /* should use the supplied storage address in any case; should leave no token behind */
} kind;
union {
size_t StorageAddr;
};
} ExprMode;
static void mark_err(const Tok *t);
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos);
static IRParam tok_to_irparam(Scope *sc, Tok *t);
static Scope make_scope(Scope *parent, size_t mem_addr, bool with_idents);
static Scope make_scope(Scope *parent, bool with_idents);
static void term_scope(Scope *sc);
static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool use_storage_addr, size_t storage_addr);
static void expr(State *s, Scope *parent_sc, TokListItem *t, ExprMode mode);
static void stmt(State *s, Scope *sc, TokListItem *t);
static void mark_err(const Tok *t) {
@ -71,8 +84,8 @@ static IRParam tok_to_irparam(Scope *sc, Tok *t) {
}
/* term_scope doesn't have to be called if with_idents is set to false. */
static Scope make_scope(Scope *parent, size_t mem_addr, bool with_idents) {
Scope s = { .parent = parent, .mem_addr = mem_addr, .has_idents = with_idents };
static Scope make_scope(Scope *parent, bool with_idents) {
Scope s = { .parent = parent, .mem_addr = parent ? parent->mem_addr : 0, .has_idents = with_idents };
if (with_idents)
map_init(&s.ident_addrs, sizeof(size_t));
return s;
@ -83,9 +96,7 @@ static void term_scope(Scope *sc) {
map_term(&sc->ident_addrs);
}
/* If toplevel is set, newlines are seen as delimiters ending the expression.
* If use_storage_addr is set, the result is guaranteed to be put into storage_addr. */
static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool use_storage_addr, size_t storage_addr) {
static void expr(State *s, Scope *parent_sc, TokListItem *t, ExprMode mode) {
/* A simplified example of how the operator precedence parsing works:
* ________________________________
* Where t points to (between l_op and r_op in each step)
@ -123,12 +134,9 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool
*/
TokListItem *start = t;
Scope *sc = parent_sc;
Scope expr_scope_obj;
if (toplevel) {
expr_scope_obj = make_scope(parent_sc, parent_sc->mem_addr, false);
sc = &expr_scope_obj;
}
/* Each expression and subexpression has its own scope. */
Scope sc = make_scope(parent_sc, false);
for (;;) {
/* Prepare to collapse negative factor. */
@ -138,8 +146,8 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool
negate = true;
}
/* Ignore newlines if the expression is not toplevel. */
if (!toplevel && t->next->tok.kind == TokOp && t->next->tok.Op == OpNewLn)
/* Ignore newlines if told to do so. */
if (mode.ignore_newln && t->next->tok.kind == TokOp && t->next->tok.Op == OpNewLn)
toklist_del(s->toks, t->next, t->next);
/* Collapse negative factor. */
@ -161,14 +169,14 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool
} else {
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (use_storage_addr && is_last_operation)
res_addr = storage_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc->mem_addr++;
res_addr = sc.mem_addr++;
/* add IR instruction to negate the value */
IRParam v_irparam;
TRY(v_irparam = tok_to_irparam(sc, v));
TRY(v_irparam = tok_to_irparam(&sc, v));
irtoks_app(s->ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
@ -179,17 +187,18 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool
},
});
/* leave new memory address as result */
t->tok.kind = TokIdent;
t->tok.Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
if (use_storage_addr && is_last_operation)
/* Since the final result was written to the storage address,
* we're done. */
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
/* done */
toklist_del(s->toks, t, t);
return;
} else {
/* leave new memory address as result */
t->tok.kind = TokIdent;
t->tok.Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
}
}
}
@ -221,18 +230,25 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool
* NOTE: Sometimes, we don't reach this point because the function already
* exits directly after the last operation. */
if (l_op_prec == PREC_DELIM && r_op_prec == PREC_DELIM) {
IRParam res;
TRY(res = tok_to_irparam(sc, &t->tok));
irtoks_app(s->ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRSet,
.Unary = {
.addr = use_storage_addr ? storage_addr : sc->mem_addr++,
.val = res,
},
});
toklist_del(s->toks, t, t);
if (t->tok.kind != TokVal && t->tok.kind != TokIdent) {
mark_err(&t->tok);
set_err("Expected literal or identifier");
return;
}
if (mode.kind == ExprModeStorageAddr) {
IRParam res;
TRY(res = tok_to_irparam(&sc, &t->tok));
irtoks_app(s->ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRSet,
.Unary = {
.addr = mode.StorageAddr,
.val = res,
},
});
toklist_del(s->toks, t, t);
}
return;
}
@ -279,15 +295,15 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool
TRY(lhs->Val = eval_arith(instr, &lhs->Val, &rhs->Val));
} else {
IRParam lhs_irparam, rhs_irparam;
TRY(lhs_irparam = tok_to_irparam(sc, lhs));
TRY(rhs_irparam = tok_to_irparam(sc, rhs));
TRY(lhs_irparam = tok_to_irparam(&sc, lhs));
TRY(rhs_irparam = tok_to_irparam(&sc, rhs));
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (use_storage_addr && is_last_operation)
res_addr = storage_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc->mem_addr++;
res_addr = sc.mem_addr++;
/* emit IR code to evaluate the non-constant expression */
irtoks_app(s->ir, (IRTok){
@ -301,17 +317,18 @@ static void expr(State *s, Scope *parent_sc, TokListItem *t, bool toplevel, bool
},
});
/* leave new memory address as result */
lhs->kind = TokIdent;
lhs->Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
if (use_storage_addr && is_last_operation)
/* Since the final result was written to the storage address,
* we're done. */
return;
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
/* done */
toklist_del(s->toks, t, t);
break;
} else {
/* leave new memory address as result */
lhs->kind = TokIdent;
lhs->Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
}
}
}
}
@ -331,13 +348,84 @@ static void stmt(State *s, Scope *sc, TokListItem *t) {
set_err("'%s' already declared in this scope", name);
return;
}
TRY(expr(s, sc, t, true, true, addr));
TRY(expr(s, sc, t, (ExprMode){ .kind = ExprModeStorageAddr, .ignore_newln = false, .StorageAddr = addr }));
} else if (t->tok.kind == TokAssign) {
t = t->next;
size_t addr;
TRY(addr = get_ident_addr(sc, name, &start->tok));
TRY(expr(s, sc, t, true, true, addr));
TRY(expr(s, sc, t, (ExprMode){ .kind = ExprModeStorageAddr, .ignore_newln = false, .StorageAddr = addr }));
}
} else if (t->tok.kind == TokOp && t->tok.Op == OpLCurl) {
Scope inner_sc = make_scope(sc, true);
for (;;) {
if (t->next->tok.kind == TokOp) {
if (t->next->tok.Op == OpEOF) {
term_scope(&inner_sc);
mark_err(&start->tok);
set_err("Unclosed '{'");
return;
}
if (t->next->tok.Op == OpRCurl)
break;
}
TRY_ELSE(stmt(s, &inner_sc, t->next), term_scope(&inner_sc));
}
term_scope(&inner_sc);
t = t->next;
} else if (t->tok.kind == TokWhile) {
/* How while is generally implemented in IR:
* 0: jmp to 3
* 1: some_code
* 2: some_code
* 3: some stuff evaluating condition xyz
* 4: jmp to 1 if condition xyz is met
* */
size_t jmp_instr_iaddr = s->ir->len;
irtoks_app(s->ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJmp,
.Jmp = {
.iaddr = 0, /* unknown for now */
},
});
t = t->next;
/* find beginning of while loop body */
TokListItem *lcurl;
for (TokListItem *i = t;; i++) {
if (i == NULL) {
mark_err(&start->tok);
set_err("Expected '{' after 'while' loop condition");
return;
}
if (i->tok.kind == TokOp && i->tok.Op == OpLCurl) {
lcurl = i;
break;
}
}
/* write loop body to IR stream */
TRY(stmt(s, sc, lcurl));
/* finally we know where the jmp from the beginning has to jump to */
s->ir->toks[jmp_instr_iaddr].Jmp.iaddr = s->ir->len;
TRY(expr(s, sc, t, (ExprMode){ .kind = ExprModeJustCollapse, .ignore_newln = false }));
IRParam condition;
TRY(condition = tok_to_irparam(sc, &t->tok));
irtoks_app(s->ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJnz,
.CJmp = {
.iaddr = jmp_instr_iaddr + 1,
.condition = condition,
},
});
}
toklist_del(s->toks, start, t);
}
@ -346,11 +434,11 @@ IRToks parse(TokList *toks) {
IRToks ir;
irtoks_init(&ir);
State s = { .toks = toks, .ir = &ir };
Scope global_scope = make_scope(NULL, 0, true);
Scope global_scope = make_scope(NULL, true);
for (;;) {
if (toks->begin->tok.kind == TokOp && toks->begin->tok.Op == OpEOF)
break;
TRY_RET(stmt(&s, &global_scope, toks->begin), ir);
TRY_RET_ELSE(stmt(&s, &global_scope, toks->begin), ir, term_scope(&global_scope));
}
term_scope(&global_scope);
return ir;

View File

@ -15,7 +15,7 @@ Value eval_arith(IRInstr instr, const Value *lhs, const Value *rhs) {
case IRSub: res = lhs->Int - rhs->Int; break;
case IRMul: res = lhs->Int * rhs->Int; break;
case IRDiv: res = lhs->Int / rhs->Int; break;
default: break;
default: ASSERT_UNREACHED();
}
return (Value){
.type.kind = TypeInt,
@ -28,7 +28,7 @@ Value eval_arith(IRInstr instr, const Value *lhs, const Value *rhs) {
case IRSub: res = lhs->Float - rhs->Float; break;
case IRMul: res = lhs->Float * rhs->Float; break;
case IRDiv: res = lhs->Float / rhs->Float; break;
default: break;
default: ASSERT_UNREACHED();
}
return (Value){
.type.kind = TypeFloat,

2
util.h
View File

@ -32,7 +32,9 @@ extern char errbuf[ERRSZ];
extern bool err;
extern size_t err_ln, err_col;
#define TRY(expr) {expr; if (err) return;}
#define TRY_ELSE(expr, onerr) {expr; if (err) {onerr; return;}}
#define TRY_RET(expr, ret) {expr; if (err) return (ret);}
#define TRY_RET_ELSE(expr, ret, onerr) {expr; if (err) {onerr; return (ret);}}
void set_err(const char *fmt, ...);
#define ASSERT_UNREACHED() { fprintf(stderr, "Illegal code position reached in %s:%d\n", __FILE__, __LINE__); exit(1); }