lang/parse.c
r4 e7f4773cba make while loop parsing more elegant
It is now possible to parse parts of the program and save the IR for
later addition. This is currently used for while loops but will also make other
control flow elements easier to implement in the future.
2021-12-22 12:52:16 +01:00

436 lines
12 KiB
C

#include "parse.h"
#include <stdbool.h>
#include "map.h"
#include "runtime.h"
typedef struct Scope {
struct Scope *parent;
size_t mem_addr;
bool has_idents;
Map ident_addrs;
} Scope;
typedef struct ExprMode {
bool ignore_newln;
enum {
ExprModeJustCollapse, /* should leave either a literal or an own address as result */
ExprModeStorageAddr, /* should use the supplied storage address in any case; should leave no token behind */
} kind;
union {
size_t StorageAddr;
};
} ExprMode;
static void mark_err(const Tok *t);
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos);
static IRParam tok_to_irparam(Scope *sc, Tok *t);
static Scope make_scope(Scope *parent, bool with_idents);
static void term_scope(Scope *sc);
static void expr(IRToks *out_ir, TokList *toks, Scope *parent_sc, TokListItem *t, ExprMode mode);
static void stmt(IRToks *out_ir, TokList *toks, Scope *sc, TokListItem *t);
static void mark_err(const Tok *t) {
err_ln = t->ln;
err_col = t->col;
}
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos) {
size_t addr;
bool exists = false;
for (const Scope *i = sc; i != NULL; i = i->parent) {
if (!i->has_idents)
continue;
exists = map_get(&i->ident_addrs, name, &addr);
if (exists)
break;
}
if (!exists) {
mark_err(errpos);
set_err("Identifier '%s' not recognized in this scope", name);
return 0;
}
return addr;
}
static IRParam tok_to_irparam(Scope *sc, Tok *t) {
if (t->kind == TokIdent) {
size_t addr;
if (t->Ident.kind == IdentName) {
TRY_RET(addr = get_ident_addr(sc, t->Ident.Name, t), (IRParam){0});
} else if (t->Ident.kind == IdentAddr)
addr = t->Ident.Addr;
else
ASSERT_UNREACHED();
return (IRParam){
.kind = IRParamAddr,
.Addr = addr,
};
} else if (t->kind == TokVal) {
return (IRParam){
.kind = IRParamLiteral,
.Literal = t->Val,
};
} else
ASSERT_UNREACHED();
}
/* term_scope doesn't have to be called if with_idents is set to false. */
static Scope make_scope(Scope *parent, bool with_idents) {
Scope s = { .parent = parent, .mem_addr = parent ? parent->mem_addr : 0, .has_idents = with_idents };
if (with_idents)
map_init(&s.ident_addrs, sizeof(size_t));
return s;
}
static void term_scope(Scope *sc) {
if (sc->has_idents)
map_term(&sc->ident_addrs);
}
static void expr(IRToks *out_ir, TokList *toks, Scope *parent_sc, TokListItem *t, ExprMode mode) {
/* A simplified example of how the operator precedence parsing works:
* ________________________________
* Where t points to (between l_op and r_op in each step)
* |
* v
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '+' is higher than that of the front delimiter => move forward
* ________________________________
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '*' is higher than that of '+' => move forward
* ________________________________
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '\n' (a delimiter) is lower than that of '*' => evaluate and move l_op 2 back
* ________________________________
* 5 + 4 \n
* ^ ^
* | |
* l_op r_op
* precedence of '\n' (a delimiter) is lower than that of '+' => evaluate and move l_op 2 back
* ________________________________
* 9 \n
* ^ ^
* | |
* l_op r_op
* both l_op and r_op are delimiters (their precedence is PREC_DELIM) => done
*/
TokListItem *start = t;
/* Each expression and subexpression has its own scope. */
Scope sc = make_scope(parent_sc, false);
for (;;) {
/* Prepare to collapse negative factor. */
bool negate = false;
if (t->tok.kind == TokOp && t->tok.Op == OpSub) {
t = t->next;
negate = true;
}
/* Ignore newlines if told to do so. */
if (mode.ignore_newln && t->next->tok.kind == TokOp && t->next->tok.Op == OpNewLn)
toklist_del(toks, t->next, t->next);
/* Collapse negative factor. */
if (negate) {
bool is_last_operation = t->prev == start && t->next->tok.kind == TokOp && op_prec[t->next->tok.Op] == PREC_DELIM;
Tok *v = &t->tok;
t = t->prev;
toklist_del(toks, t->next, t->next);
if (v->kind == TokVal) {
/* immediately negate value */
t->tok.kind = TokVal;
t->tok.Val.type.kind = v->Val.type.kind;
switch (v->Val.type.kind) {
case TypeInt: t->tok.Val.Int = -v->Val.Int; break;
case TypeFloat: t->tok.Val.Float = -v->Val.Float; break;
default: ASSERT_UNREACHED();
}
} else {
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc.mem_addr++;
/* add IR instruction to negate the value */
IRParam v_irparam;
TRY(v_irparam = tok_to_irparam(&sc, v));
irtoks_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRNeg,
.Unary = {
.addr = res_addr,
.val = v_irparam,
},
});
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
/* done */
toklist_del(toks, t, t);
return;
} else {
/* leave new memory address as result */
t->tok.kind = TokIdent;
t->tok.Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
}
}
}
/* Find out operator precedence of l_op and r_op. */
int8_t l_op_prec;
Tok *l_op;
if (t == start) {
l_op_prec = PREC_DELIM;
l_op = NULL;
} else {
l_op = &t->prev->tok;
if (l_op->kind != TokOp) {
mark_err(l_op);
set_err("Expected operator");
return;
}
l_op_prec = op_prec[l_op->Op];
}
int8_t r_op_prec;
Tok *r_op = &t->next->tok;
if (r_op->kind != TokOp) {
mark_err(r_op);
set_err("Expected operator");
return;
}
r_op_prec = op_prec[r_op->Op];
/* If l_op and r_op are both delimiters, the expression is fully evaluated.
* NOTE: Sometimes, we don't reach this point because the function already
* exits directly after the last operation. */
if (l_op_prec == PREC_DELIM && r_op_prec == PREC_DELIM) {
if (t->tok.kind != TokVal && t->tok.kind != TokIdent) {
mark_err(&t->tok);
set_err("Expected literal or identifier");
return;
}
if (mode.kind == ExprModeStorageAddr) {
IRParam res;
TRY(res = tok_to_irparam(&sc, &t->tok));
irtoks_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRSet,
.Unary = {
.addr = mode.StorageAddr,
.val = res,
},
});
toklist_del(toks, t, t);
}
return;
}
bool is_last_operation = t->prev && t->prev->prev == start && r_op_prec == PREC_DELIM;
/* This is the actual operator precedence parser as described above. */
if (r_op_prec > l_op_prec)
t = t->next->next;
else {
/* some basic checks */
Tok *rhs = &t->tok;
if (rhs->kind != TokVal && rhs->kind != TokIdent) {
mark_err(rhs);
set_err("Expected literal or identifier");
return;
}
t = t->prev->prev;
Tok *lhs = &t->tok;
if (lhs->kind != TokVal && lhs->kind != TokIdent) {
mark_err(lhs);
set_err("Expected literal or identifier");
return;
}
/* delete the tokens that fall away from collapsing the expression
* (NOTE: only their references are deleted here, that's important
* because we're still using their values later on) */
toklist_del(toks, t->next, t->next->next);
IRInstr instr;
switch (l_op->Op) {
case OpAdd: instr = IRAdd; break;
case OpSub: instr = IRSub; break;
case OpMul: instr = IRMul; break;
case OpDiv: instr = IRDiv; break;
default:
mark_err(l_op);
set_err("Unknown operation: '%s'", op_str[l_op->Op]);
return;
}
if (lhs->kind == TokVal && rhs->kind == TokVal) {
/* evaluate the constant expression immediately */
lhs->kind = TokVal;
TRY(lhs->Val = eval_arith(instr, &lhs->Val, &rhs->Val));
} else {
IRParam lhs_irparam, rhs_irparam;
TRY(lhs_irparam = tok_to_irparam(&sc, lhs));
TRY(rhs_irparam = tok_to_irparam(&sc, rhs));
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc.mem_addr++;
/* emit IR code to evaluate the non-constant expression */
irtoks_app(out_ir, (IRTok){
.ln = l_op->ln,
.col = l_op->col,
.instr = instr,
.Arith = {
.addr = res_addr,
.lhs = lhs_irparam,
.rhs = rhs_irparam,
},
});
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
/* done */
toklist_del(toks, t, t);
break;
} else {
/* leave new memory address as result */
lhs->kind = TokIdent;
lhs->Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
}
}
}
}
}
static void stmt(IRToks *out_ir, TokList *toks, Scope *sc, TokListItem *t) {
TokListItem *start = t;
if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName) {
char *name = t->tok.Ident.Name;
t = t->next;
if (t->tok.kind == TokDeclare) {
t = t->next;
size_t addr = sc->mem_addr++;
bool replaced = map_insert(&sc->ident_addrs, name, &addr);
if (replaced) {
mark_err(&start->tok);
set_err("'%s' already declared in this scope", name);
return;
}
TRY(expr(out_ir, toks, sc, t, (ExprMode){ .kind = ExprModeStorageAddr, .ignore_newln = false, .StorageAddr = addr }));
} else if (t->tok.kind == TokAssign) {
t = t->next;
size_t addr;
TRY(addr = get_ident_addr(sc, name, &start->tok));
TRY(expr(out_ir, toks, sc, t, (ExprMode){ .kind = ExprModeStorageAddr, .ignore_newln = false, .StorageAddr = addr }));
}
} else if (t->tok.kind == TokOp && t->tok.Op == OpLCurl) {
Scope inner_sc = make_scope(sc, true);
for (;;) {
if (t->next->tok.kind == TokOp) {
if (t->next->tok.Op == OpEOF) {
term_scope(&inner_sc);
mark_err(&start->tok);
set_err("Unclosed '{'");
return;
}
if (t->next->tok.Op == OpRCurl)
break;
}
TRY_ELSE(stmt(out_ir, toks, &inner_sc, t->next), term_scope(&inner_sc));
}
term_scope(&inner_sc);
t = t->next;
} else if (t->tok.kind == TokWhile) {
/* How while is generally implemented in IR:
* 0: jmp to 3
* 1: some_code
* 2: some_code
* 3: some stuff evaluating condition xyz
* 4: jmp to 1 if condition xyz is met
* */
/* add initial jmp instruction */
size_t jmp_instr_iaddr = out_ir->len;
irtoks_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJmp,
.Jmp = {
.iaddr = 0, /* unknown for now */
},
});
t = t->next;
/* parse condition */
IRToks cond_ir;
irtoks_init_short(&cond_ir);
TRY_ELSE(expr(&cond_ir, toks, sc, t, (ExprMode){ .kind = ExprModeJustCollapse, .ignore_newln = false }), irtoks_term(&cond_ir));
IRParam cond_irparam;
TRY_ELSE(cond_irparam = tok_to_irparam(sc, &t->tok), irtoks_term(&cond_ir));
/* add conditional jump */
irtoks_app(&cond_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJnz,
.CJmp = {
.iaddr = jmp_instr_iaddr + 1,
.condition = cond_irparam,
},
});
t = t->next;
/* parse loop body */
TRY_ELSE(stmt(out_ir, toks, sc, t), irtoks_term(&cond_ir));
/* finally we know where the jmp from the beginning has to jump to */
out_ir->toks[jmp_instr_iaddr].Jmp.iaddr = out_ir->len;
/* append condition IR to program IR, then terminate condition IR stream */
irtoks_app_irtoks(out_ir, &cond_ir);
irtoks_term(&cond_ir);
}
toklist_del(toks, start, t);
}
IRToks parse(TokList *toks) {
IRToks ir;
irtoks_init_long(&ir);
Scope global_scope = make_scope(NULL, true);
for (;;) {
if (toks->begin->tok.kind == TokOp && toks->begin->tok.Op == OpEOF)
break;
TRY_RET_ELSE(stmt(&ir, toks, &global_scope, toks->begin), ir, term_scope(&global_scope));
}
term_scope(&global_scope);
return ir;
}