lang/parse.c

556 lines
15 KiB
C

#include "parse.h"
#include <stdbool.h>
#include "map.h"
#include "runtime.h"
typedef struct Scope {
struct Scope *parent;
size_t mem_addr;
bool has_idents;
Map ident_addrs;
} Scope;
typedef struct ExprMode {
bool ignore_newln;
enum {
ExprModeJustCollapse, /* should leave either a literal or an own address as result */
ExprModeStorageAddr, /* should use the supplied storage address in any case; should leave no token behind */
} kind;
union {
size_t StorageAddr;
};
} ExprMode;
static void mark_err(const Tok *t);
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos);
static IRParam tok_to_irparam(Scope *sc, Tok *t);
static Scope make_scope(Scope *parent, bool with_idents);
static void term_scope(Scope *sc);
static void expr(IRToks *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t, ExprMode mode);
static void stmt(IRToks *out_ir, TokList *toks, Map *funcs, Scope *sc, TokListItem *t);
static void mark_err(const Tok *t) {
err_ln = t->ln;
err_col = t->col;
}
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos) {
size_t addr;
bool exists = false;
for (const Scope *i = sc; i != NULL; i = i->parent) {
if (!i->has_idents)
continue;
exists = map_get(&i->ident_addrs, name, &addr);
if (exists)
break;
}
if (!exists) {
mark_err(errpos);
set_err("Identifier '%s' not recognized in this scope", name);
return 0;
}
return addr;
}
static IRParam tok_to_irparam(Scope *sc, Tok *t) {
if (t->kind == TokIdent) {
size_t addr;
if (t->Ident.kind == IdentName) {
TRY_RET(addr = get_ident_addr(sc, t->Ident.Name, t), (IRParam){0});
} else if (t->Ident.kind == IdentAddr)
addr = t->Ident.Addr;
else
ASSERT_UNREACHED();
return (IRParam){
.kind = IRParamAddr,
.Addr = addr,
};
} else if (t->kind == TokVal) {
return (IRParam){
.kind = IRParamLiteral,
.Literal = t->Val,
};
} else
ASSERT_UNREACHED();
}
/* term_scope doesn't have to be called if with_idents is set to false. */
static Scope make_scope(Scope *parent, bool with_idents) {
Scope s = { .parent = parent, .mem_addr = parent ? parent->mem_addr : 0, .has_idents = with_idents };
if (with_idents)
map_init(&s.ident_addrs, sizeof(size_t));
return s;
}
static void term_scope(Scope *sc) {
if (sc->has_idents)
map_term(&sc->ident_addrs);
}
static void expr(IRToks *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t, ExprMode mode) {
/* A simplified example of how the operator precedence parsing works:
* ________________________________
* Where t points to (between l_op and r_op in each step)
* |
* v
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '+' is higher than that of the front delimiter => move forward
* ________________________________
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '*' is higher than that of '+' => move forward
* ________________________________
* 5 + 2 * 2 \n
* ^ ^
* | |
* l_op r_op
* precedence of '\n' (a delimiter) is lower than that of '*' => evaluate and move l_op 2 back
* ________________________________
* 5 + 4 \n
* ^ ^
* | |
* l_op r_op
* precedence of '\n' (a delimiter) is lower than that of '+' => evaluate and move l_op 2 back
* ________________________________
* 9 \n
* ^ ^
* | |
* l_op r_op
* both l_op and r_op are delimiters (their precedence is PREC_DELIM) => done
*/
TokListItem *start = t;
/* Each expression and subexpression has its own scope. */
Scope sc = make_scope(parent_sc, false);
for (;;) {
/* Prepare to collapse negative factor. */
bool negate = false;
if (t->tok.kind == TokOp && t->tok.Op == OpSub) {
t = t->next;
negate = true;
}
/* Ignore newlines if told to do so. */
if (mode.ignore_newln && t->next->tok.kind == TokOp && t->next->tok.Op == OpNewLn)
toklist_del(toks, t->next, t->next);
/* Collapse function call. */
if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName && t->next->tok.kind == TokOp && t->next->tok.Op == OpLParen) {
BuiltinFunc func;
bool exists = map_get(funcs, t->tok.Ident.Name, &func);
if (!exists) {
mark_err(&t->tok);
set_err("Unrecognized function: %s()", t->tok.Ident.Name);
return;
}
TokListItem *func_ident = t;
t = t->next->next;
toklist_del(toks, t->prev, t->prev);
size_t n_args = 0;
TokListItem *first_arg = t;
TokListItem *last_arg = NULL;
bool eval_func_in_place = !func.side_effects;
for (;;) {
n_args++;
TRY(expr(out_ir, toks, funcs, &sc, t, (ExprMode){ .kind = ExprModeJustCollapse, .ignore_newln = true }));
if (t->tok.kind == TokIdent)
eval_func_in_place = false;
if (t->next->tok.kind == TokOp) {
if (t->next->tok.Op == OpComma) {
toklist_del(toks, t->next, t->next);
t = t->next;
continue;
} else if (t->next->tok.Op == OpRParen) {
toklist_del(toks, t->next, t->next);
last_arg = t;
break;
}
}
mark_err(&t->next->tok);
set_err("Expected ',' or ')' after function argument");
return;
}
if (func.n_args != n_args) {
mark_err(&func_ident->tok);
const char *plural = func.n_args == 1 ? "" : "s";
set_err("Function %s() takes %zu argument%s but got %zu", func.name, func.n_args, plural, n_args);
return;
}
if (eval_func_in_place) {
Value *args = xmalloc(sizeof(Value) * n_args);
TokListItem *itm = first_arg;
for (size_t i = 0;; i++) {
args[i] = itm->tok.Val;
if (itm == last_arg)
break;
itm = itm->next;
}
func_ident->tok = (Tok) {
.kind = TokVal,
.Val = func.func(args),
};
free(args);
} else {
bool is_last_operation = func_ident == start && last_arg->next->tok.kind == TokOp && op_prec[last_arg->next->tok.Op] == PREC_DELIM;
IRParam *args = xmalloc(sizeof(IRParam) * n_args);
TokListItem *itm = first_arg;
for (size_t i = 0;; i++) {
TRY_ELSE(args[i] = tok_to_irparam(&sc, &itm->tok), free(args));
if (itm == last_arg)
break;
itm = itm->next;
}
size_t res_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc.mem_addr++;
irtoks_app(out_ir, (IRTok){
.ln = func_ident->tok.ln,
.col = func_ident->tok.col,
.instr = IRCallInternal,
.CallI = {
.ret_addr = res_addr,
.fid = func.fid,
.args = args,
},
});
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
toklist_del(toks, func_ident, func_ident);
break;
} else {
func_ident->tok = (Tok) {
.kind = TokIdent,
.Ident = {
.kind = IdentAddr,
.Addr = res_addr,
},
};
}
}
t = func_ident;
toklist_del(toks, first_arg, last_arg);
}
/* Collapse negative factor. */
if (negate) {
bool is_last_operation = t->prev == start && t->next->tok.kind == TokOp && op_prec[t->next->tok.Op] == PREC_DELIM;
Tok *v = &t->tok;
t = t->prev;
toklist_del(toks, t->next, t->next);
if (v->kind == TokVal) {
/* immediately negate value */
t->tok.kind = TokVal;
t->tok.Val = eval_unary(IRNeg, &v->Val);
} else {
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc.mem_addr++;
/* add IR instruction to negate the value */
IRParam v_irparam;
TRY(v_irparam = tok_to_irparam(&sc, v));
irtoks_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRNeg,
.Unary = {
.addr = res_addr,
.val = v_irparam,
},
});
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
/* done */
toklist_del(toks, t, t);
return;
} else {
/* leave new memory address as result */
t->tok.kind = TokIdent;
t->tok.Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
}
}
}
/* Find out operator precedence of l_op and r_op. */
int8_t l_op_prec;
Tok *l_op;
if (t == start) {
l_op_prec = PREC_DELIM;
l_op = NULL;
} else {
l_op = &t->prev->tok;
if (l_op->kind != TokOp) {
mark_err(l_op);
set_err("Expected operator");
return;
}
l_op_prec = op_prec[l_op->Op];
}
int8_t r_op_prec;
Tok *r_op = &t->next->tok;
if (r_op->kind != TokOp) {
mark_err(r_op);
set_err("Expected operator");
return;
}
r_op_prec = op_prec[r_op->Op];
/* If l_op and r_op are both delimiters, the expression is fully evaluated.
* NOTE: Sometimes, we don't reach this point because the function already
* exits directly after the last operation. */
if (l_op_prec == PREC_DELIM && r_op_prec == PREC_DELIM) {
if (t->tok.kind != TokVal && t->tok.kind != TokIdent) {
mark_err(&t->tok);
set_err("Expected literal or identifier");
return;
}
if (mode.kind == ExprModeStorageAddr) {
IRParam res;
TRY(res = tok_to_irparam(&sc, &t->tok));
irtoks_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRSet,
.Unary = {
.addr = mode.StorageAddr,
.val = res,
},
});
toklist_del(toks, t, t);
}
return;
}
bool is_last_operation = t->prev && t->prev->prev == start && r_op_prec == PREC_DELIM;
/* This is the actual operator precedence parser as described above. */
if (r_op_prec > l_op_prec)
t = t->next->next;
else {
/* some basic checks */
Tok *rhs = &t->tok;
if (rhs->kind != TokVal && rhs->kind != TokIdent) {
mark_err(rhs);
set_err("Expected literal or identifier");
return;
}
t = t->prev->prev;
Tok *lhs = &t->tok;
if (lhs->kind != TokVal && lhs->kind != TokIdent) {
mark_err(lhs);
set_err("Expected literal or identifier");
return;
}
/* delete the tokens that fall away from collapsing the expression
* (NOTE: only their references are deleted here, that's important
* because we're still using their values later on) */
toklist_del(toks, t->next, t->next->next);
IRInstr instr;
switch (l_op->Op) {
case OpAdd: instr = IRAdd; break;
case OpSub: instr = IRSub; break;
case OpMul: instr = IRMul; break;
case OpDiv: instr = IRDiv; break;
default:
mark_err(l_op);
set_err("Unknown operation: '%s'", op_str[l_op->Op]);
return;
}
if (lhs->kind == TokVal && rhs->kind == TokVal) {
/* evaluate the constant expression immediately */
lhs->kind = TokVal;
TRY(lhs->Val = eval_arith(instr, &lhs->Val, &rhs->Val));
} else {
IRParam lhs_irparam, rhs_irparam;
TRY(lhs_irparam = tok_to_irparam(&sc, lhs));
TRY(rhs_irparam = tok_to_irparam(&sc, rhs));
/* use the predefined storage address if it was requested and we're on the last operation */
size_t res_addr;
if (mode.kind == ExprModeStorageAddr && is_last_operation)
res_addr = mode.StorageAddr;
else
res_addr = sc.mem_addr++;
/* emit IR code to evaluate the non-constant expression */
irtoks_app(out_ir, (IRTok){
.ln = l_op->ln,
.col = l_op->col,
.instr = instr,
.Arith = {
.addr = res_addr,
.lhs = lhs_irparam,
.rhs = rhs_irparam,
},
});
if (mode.kind == ExprModeStorageAddr && is_last_operation) {
/* done */
toklist_del(toks, t, t);
break;
} else {
/* leave new memory address as result */
lhs->kind = TokIdent;
lhs->Ident = (Identifier){
.kind = IdentAddr,
.Addr = res_addr,
};
}
}
}
}
}
static void stmt(IRToks *out_ir, TokList *toks, Map *funcs, Scope *sc, TokListItem *t) {
TokListItem *start = t;
if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName && (t->next->tok.kind == TokDeclare || t->next->tok.kind == TokAssign)) {
char *name = t->tok.Ident.Name;
t = t->next;
if (t->tok.kind == TokDeclare) {
t = t->next;
size_t addr = sc->mem_addr++;
bool replaced = map_insert(&sc->ident_addrs, name, &addr);
if (replaced) {
mark_err(&start->tok);
set_err("'%s' already declared in this scope", name);
return;
}
TRY(expr(out_ir, toks, funcs, sc, t, (ExprMode){ .kind = ExprModeStorageAddr, .ignore_newln = false, .StorageAddr = addr }));
} else if (t->tok.kind == TokAssign) {
t = t->next;
size_t addr;
TRY(addr = get_ident_addr(sc, name, &start->tok));
TRY(expr(out_ir, toks, funcs, sc, t, (ExprMode){ .kind = ExprModeStorageAddr, .ignore_newln = false, .StorageAddr = addr }));
} else
ASSERT_UNREACHED();
} else if (t->tok.kind == TokOp && t->tok.Op == OpLCurl) {
Scope inner_sc = make_scope(sc, true);
for (;;) {
if (t->next->tok.kind == TokOp) {
if (t->next->tok.Op == OpEOF) {
term_scope(&inner_sc);
mark_err(&start->tok);
set_err("Unclosed '{'");
return;
}
if (t->next->tok.Op == OpRCurl)
break;
}
TRY_ELSE(stmt(out_ir, toks, funcs, &inner_sc, t->next), term_scope(&inner_sc));
}
term_scope(&inner_sc);
t = t->next;
} else if (t->tok.kind == TokWhile) {
/* How while is generally implemented in IR:
* 0: jmp to 3
* 1: some_code
* 2: some_code
* 3: some stuff evaluating condition xyz
* 4: jmp to 1 if condition xyz is met
* */
/* add initial jmp instruction */
size_t jmp_instr_iaddr = out_ir->len;
irtoks_app(out_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJmp,
.Jmp = {
.iaddr = 0, /* unknown for now */
},
});
t = t->next;
/* parse condition */
IRToks cond_ir;
irtoks_init_short(&cond_ir);
TRY_ELSE(expr(&cond_ir, toks, funcs, sc, t, (ExprMode){ .kind = ExprModeJustCollapse, .ignore_newln = true }), irtoks_term(&cond_ir));
IRParam cond_irparam;
TRY_ELSE(cond_irparam = tok_to_irparam(sc, &t->tok), irtoks_term(&cond_ir));
/* add conditional jump */
irtoks_app(&cond_ir, (IRTok){
.ln = t->tok.ln,
.col = t->tok.col,
.instr = IRJnz,
.CJmp = {
.iaddr = jmp_instr_iaddr + 1,
.condition = cond_irparam,
},
});
/* parse loop body */
TRY_ELSE(stmt(out_ir, toks, funcs, sc, t->next), irtoks_term(&cond_ir));
/* finally we know where the jmp from the beginning has to jump to */
out_ir->toks[jmp_instr_iaddr].Jmp.iaddr = out_ir->len;
/* append condition IR to program IR, then terminate condition IR stream */
irtoks_app_irtoks(out_ir, &cond_ir);
irtoks_term(&cond_ir);
t = t->next;
} else if (t->tok.kind == TokOp && t->tok.Op == OpNewLn) {
} else {
/* assume expression */
TRY(expr(out_ir, toks, funcs, sc, t, (ExprMode){ .kind = ExprModeJustCollapse, .ignore_newln = false }));
}
toklist_del(toks, start, t);
}
IRToks parse(TokList *toks, BuiltinFunc *builtin_funcs, size_t n_builtin_funcs) {
Map funcs;
map_init(&funcs, sizeof(BuiltinFunc));
for (size_t i = 0; i < n_builtin_funcs; i++) {
builtin_funcs[i].fid = i;
bool replaced = map_insert(&funcs, builtin_funcs[i].name, &builtin_funcs[i]);
if (replaced) {
err_ln = 0; err_col = 0;
set_err("Builtin function %s() declared more than once", builtin_funcs[i].name);
map_term(&funcs);
return (IRToks){0};
}
}
IRToks ir;
irtoks_init_long(&ir);
Scope global_scope = make_scope(NULL, true);
for (;;) {
if (toks->begin->tok.kind == TokOp && toks->begin->tok.Op == OpEOF)
break;
TRY_RET_ELSE(stmt(&ir, toks, &funcs, &global_scope, toks->begin), ir,
{ term_scope(&global_scope); map_term(&funcs); });
}
term_scope(&global_scope);
map_term(&funcs);
return ir;
}