2021-12-21 01:18:22 +01:00
|
|
|
#include "parse.h"
|
|
|
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
|
|
|
#include "map.h"
|
|
|
|
#include "runtime.h"
|
|
|
|
|
2021-12-23 19:58:00 +01:00
|
|
|
static BuiltinFunc *bf;
|
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
typedef struct Scope {
|
|
|
|
struct Scope *parent;
|
|
|
|
size_t mem_addr;
|
|
|
|
bool has_idents;
|
|
|
|
Map ident_addrs;
|
|
|
|
} Scope;
|
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
typedef struct ExprRet {
|
2021-12-21 17:09:03 +01:00
|
|
|
enum {
|
2021-12-23 15:56:12 +01:00
|
|
|
ExprRetVal,
|
|
|
|
ExprRetIdent,
|
|
|
|
ExprRetLastInstr,
|
2021-12-21 17:09:03 +01:00
|
|
|
} kind;
|
|
|
|
|
|
|
|
union {
|
2021-12-23 15:56:12 +01:00
|
|
|
IRTok LastInstr;
|
2021-12-21 17:09:03 +01:00
|
|
|
};
|
2021-12-23 15:56:12 +01:00
|
|
|
} ExprRet;
|
2021-12-21 17:09:03 +01:00
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
static void mark_err(const Tok *t);
|
2021-12-23 15:56:12 +01:00
|
|
|
static void set_irtok_dest_addr(IRTok *t, size_t addr);
|
2021-12-21 11:40:49 +01:00
|
|
|
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos);
|
2021-12-21 01:18:22 +01:00
|
|
|
static IRParam tok_to_irparam(Scope *sc, Tok *t);
|
2021-12-21 17:09:03 +01:00
|
|
|
static Scope make_scope(Scope *parent, bool with_idents);
|
2021-12-21 01:18:22 +01:00
|
|
|
static void term_scope(Scope *sc);
|
2021-12-26 15:06:33 +01:00
|
|
|
static bool expr_flush_ir_and_maybe_return(IRToks *out_ir, TokList *toks, IRTok instr, TokListItem *expr_start, Scope *expr_scope, TokListItem *t, ExprRet *out_ret);
|
2021-12-23 15:56:12 +01:00
|
|
|
static ExprRet expr(IRToks *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t);
|
|
|
|
static void expr_into_addr(IRToks *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t, size_t addr);
|
|
|
|
static IRParam expr_into_irparam(IRToks *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t);
|
2021-12-26 19:18:52 +01:00
|
|
|
static void skip_newlns(TokList *toks, TokListItem *from);
|
2021-12-22 16:09:52 +01:00
|
|
|
static void stmt(IRToks *out_ir, TokList *toks, Map *funcs, Scope *sc, TokListItem *t);
|
2021-12-21 01:18:22 +01:00
|
|
|
|
|
|
|
static void mark_err(const Tok *t) {
|
|
|
|
err_ln = t->ln;
|
|
|
|
err_col = t->col;
|
|
|
|
}
|
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
static void set_irtok_dest_addr(IRTok *t, size_t addr) {
|
|
|
|
switch (t->instr) {
|
|
|
|
case IRSet:
|
|
|
|
case IRNeg:
|
2021-12-23 21:06:49 +01:00
|
|
|
case IRNot:
|
2021-12-23 15:56:12 +01:00
|
|
|
t->Unary.addr = addr;
|
|
|
|
break;
|
|
|
|
case IRAdd:
|
|
|
|
case IRSub:
|
|
|
|
case IRMul:
|
|
|
|
case IRDiv:
|
2021-12-23 21:06:49 +01:00
|
|
|
case IREq:
|
2021-12-26 12:19:54 +01:00
|
|
|
case IRNeq:
|
2021-12-23 21:06:49 +01:00
|
|
|
case IRLt:
|
|
|
|
case IRLe:
|
2021-12-23 21:42:09 +01:00
|
|
|
case IRAnd:
|
|
|
|
case IROr:
|
2021-12-23 20:10:02 +01:00
|
|
|
t->Binary.addr = addr;
|
2021-12-23 15:56:12 +01:00
|
|
|
break;
|
|
|
|
case IRCallInternal:
|
|
|
|
t->CallI.ret_addr = addr;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT_UNREACHED();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-21 11:40:49 +01:00
|
|
|
static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos) {
|
|
|
|
size_t addr;
|
|
|
|
bool exists = false;
|
|
|
|
for (const Scope *i = sc; i != NULL; i = i->parent) {
|
|
|
|
if (!i->has_idents)
|
|
|
|
continue;
|
|
|
|
exists = map_get(&i->ident_addrs, name, &addr);
|
|
|
|
if (exists)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!exists) {
|
|
|
|
mark_err(errpos);
|
|
|
|
set_err("Identifier '%s' not recognized in this scope", name);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return addr;
|
|
|
|
}
|
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
static IRParam tok_to_irparam(Scope *sc, Tok *t) {
|
|
|
|
if (t->kind == TokIdent) {
|
|
|
|
size_t addr;
|
|
|
|
if (t->Ident.kind == IdentName) {
|
2021-12-21 11:40:49 +01:00
|
|
|
TRY_RET(addr = get_ident_addr(sc, t->Ident.Name, t), (IRParam){0});
|
2021-12-21 01:18:22 +01:00
|
|
|
} else if (t->Ident.kind == IdentAddr)
|
|
|
|
addr = t->Ident.Addr;
|
|
|
|
else
|
|
|
|
ASSERT_UNREACHED();
|
|
|
|
return (IRParam){
|
|
|
|
.kind = IRParamAddr,
|
|
|
|
.Addr = addr,
|
|
|
|
};
|
|
|
|
} else if (t->kind == TokVal) {
|
|
|
|
return (IRParam){
|
|
|
|
.kind = IRParamLiteral,
|
|
|
|
.Literal = t->Val,
|
|
|
|
};
|
|
|
|
} else
|
|
|
|
ASSERT_UNREACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* term_scope doesn't have to be called if with_idents is set to false. */
|
2021-12-21 17:09:03 +01:00
|
|
|
static Scope make_scope(Scope *parent, bool with_idents) {
|
|
|
|
Scope s = { .parent = parent, .mem_addr = parent ? parent->mem_addr : 0, .has_idents = with_idents };
|
2021-12-21 01:18:22 +01:00
|
|
|
if (with_idents)
|
|
|
|
map_init(&s.ident_addrs, sizeof(size_t));
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void term_scope(Scope *sc) {
|
|
|
|
if (sc->has_idents)
|
|
|
|
map_term(&sc->ident_addrs);
|
|
|
|
}
|
|
|
|
|
2021-12-26 15:06:33 +01:00
|
|
|
/* If ir_tok is the underlying expr() call's last evaluation, this function
|
|
|
|
* deletes t from toks, sets *out_ret and tells the caller it can return
|
|
|
|
* *out_ret by returning true.
|
|
|
|
*
|
|
|
|
* If ir_tok is not the expression's last instruction, ir_tok is written to
|
|
|
|
* out_ir and t is replaced by a pointer to the result's memory address.
|
|
|
|
* */
|
|
|
|
static bool expr_flush_ir_and_maybe_return(IRToks *out_ir, TokList *toks, IRTok ir_tok, TokListItem *expr_start, Scope *expr_scope, TokListItem *t, ExprRet *out_ret) {
|
|
|
|
if (t == expr_start && t->next->tok.kind == TokOp && op_prec[t->next->tok.Op] == PREC_DELIM) {
|
|
|
|
/* ir_tok was the expression's last IR instruction. */
|
|
|
|
|
|
|
|
toklist_del(toks, t, t);
|
|
|
|
|
|
|
|
*out_ret = (ExprRet){
|
|
|
|
.kind = ExprRetLastInstr,
|
|
|
|
.LastInstr = ir_tok,
|
|
|
|
};
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
/* ir_tok was not the expression's last IR instruction. */
|
|
|
|
|
|
|
|
size_t dest_addr = expr_scope->mem_addr++;
|
|
|
|
|
|
|
|
set_irtok_dest_addr(&ir_tok, dest_addr);
|
|
|
|
irtoks_app(out_ir, ir_tok);
|
|
|
|
|
|
|
|
t->tok = (Tok){
|
|
|
|
.kind = TokIdent,
|
|
|
|
.Ident = {
|
|
|
|
.kind = IdentAddr,
|
|
|
|
.Addr = dest_addr,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
/* The job of this function is to reduce the expression to the most simple form
|
|
|
|
* writing the least IR instructions possible (without overanalyzing).
|
|
|
|
* This means that the only IR instructions it will be writing are those for
|
|
|
|
* calculating intermediate values.
|
|
|
|
* In the case of ExprRetVal and ExprRetIdent, the value isn't 'returned' in
|
|
|
|
* the traditional sense, but rather the result is left in the token stream.
|
|
|
|
* The 'return' value can be of 3 different types:
|
|
|
|
* - ExprRetVal: The expression yields a constant value as a result.
|
|
|
|
* Examples: '5', '5 + 2 * 3' or '5 + (2 + 1) * 3'
|
|
|
|
* - ExprRetIdent: The expression yields an identifier as a result.
|
|
|
|
* Examples: 'a' or '(((a)))'
|
|
|
|
* - ExprRetLastInstr: The expression is a more complex sequence of
|
|
|
|
* instructions. Here the last instruction is returned so the caller can
|
|
|
|
* manually set the destination address.
|
|
|
|
* Examples: 'a + 1', '2 + a * b' or '2 + 4 * (b * b) / 5'
|
|
|
|
*
|
|
|
|
* Here is also a simplified example of how the operator precedence parsing works:
|
|
|
|
* ________________________________
|
|
|
|
* Where t points to (between l_op and r_op in each step)
|
|
|
|
* |
|
|
|
|
* v
|
|
|
|
* 5 + 2 * 2 \n
|
|
|
|
* ^ ^
|
|
|
|
* | |
|
|
|
|
* l_op r_op
|
|
|
|
* precedence of '+' is higher than that of the front delimiter => move forward
|
|
|
|
* ________________________________
|
|
|
|
* 5 + 2 * 2 \n
|
|
|
|
* ^ ^
|
|
|
|
* | |
|
|
|
|
* l_op r_op
|
|
|
|
* precedence of '*' is higher than that of '+' => move forward
|
|
|
|
* ________________________________
|
|
|
|
* 5 + 2 * 2 \n
|
|
|
|
* ^ ^
|
|
|
|
* | |
|
|
|
|
* l_op r_op
|
|
|
|
* precedence of '\n' (a delimiter) is lower than that of '*' => evaluate and move l_op 2 back
|
|
|
|
* ________________________________
|
|
|
|
* 5 + 4 \n
|
|
|
|
* ^ ^
|
|
|
|
* | |
|
|
|
|
* l_op r_op
|
|
|
|
* precedence of '\n' (a delimiter) is lower than that of '+' => evaluate and move l_op 2 back
|
|
|
|
* ________________________________
|
|
|
|
* 9 \n
|
|
|
|
* ^ ^
|
|
|
|
* | |
|
|
|
|
* l_op r_op
|
|
|
|
* both l_op and r_op are delimiters (their precedence is PREC_DELIM) => done
|
|
|
|
*/
|
|
|
|
static ExprRet expr(IRToks *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t) {
|
2021-12-21 01:18:22 +01:00
|
|
|
TokListItem *start = t;
|
2021-12-21 17:09:03 +01:00
|
|
|
|
|
|
|
Scope sc = make_scope(parent_sc, false);
|
2021-12-21 01:18:22 +01:00
|
|
|
|
|
|
|
for (;;) {
|
2021-12-23 21:06:49 +01:00
|
|
|
/* Prepare to collapse unary operation. */
|
|
|
|
bool perform_unary = false;
|
|
|
|
IRInstr unary_op;
|
|
|
|
if (t->tok.kind == TokOp) {
|
|
|
|
if (t->tok.Op == OpSub) {
|
|
|
|
t = t->next;
|
|
|
|
perform_unary = true;
|
|
|
|
unary_op = IRNeg;
|
|
|
|
} else if (t->tok.Op == OpNot) {
|
|
|
|
t = t->next;
|
|
|
|
perform_unary = true;
|
|
|
|
unary_op = IRNot;
|
|
|
|
}
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
|
2021-12-23 16:51:10 +01:00
|
|
|
/* Delete newline if we're definitely expecting an operand. */
|
|
|
|
if (t->tok.kind == TokOp && t->tok.Op == OpNewLn) {
|
|
|
|
if (t == start)
|
|
|
|
start = t->next;
|
|
|
|
t = t->next;
|
|
|
|
toklist_del(toks, t->prev, t->prev);
|
|
|
|
}
|
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
/* Collapse parentheses. */
|
|
|
|
if (t->tok.kind == TokOp && t->tok.Op == OpLParen) {
|
|
|
|
ExprRet r;
|
|
|
|
TRY_RET(r = expr(out_ir, toks, funcs, &sc, t->next), (ExprRet){0});
|
|
|
|
if (r.kind == ExprRetLastInstr) {
|
|
|
|
size_t res_addr = sc.mem_addr++;
|
|
|
|
set_irtok_dest_addr(&r.LastInstr, res_addr);
|
|
|
|
irtoks_app(out_ir, r.LastInstr);
|
|
|
|
t->tok = (Tok){
|
|
|
|
.ln = t->tok.ln,
|
|
|
|
.col = t->tok.col,
|
|
|
|
.kind = TokIdent,
|
|
|
|
.Ident = {
|
|
|
|
.kind = IdentAddr,
|
|
|
|
.Addr = res_addr,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
} else if (r.kind == ExprRetVal || r.kind == ExprRetIdent) {
|
|
|
|
t->tok = t->next->tok;
|
|
|
|
toklist_del(toks, t->next, t->next);
|
|
|
|
} else
|
|
|
|
ASSERT_UNREACHED();
|
2021-12-22 12:52:16 +01:00
|
|
|
toklist_del(toks, t->next, t->next);
|
2021-12-23 15:56:12 +01:00
|
|
|
}
|
2021-12-26 15:06:33 +01:00
|
|
|
|
2021-12-22 16:09:52 +01:00
|
|
|
/* Collapse function call. */
|
2021-12-23 15:56:12 +01:00
|
|
|
else if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName && t->next->tok.kind == TokOp && t->next->tok.Op == OpLParen) {
|
|
|
|
/* get function */
|
2021-12-22 16:09:52 +01:00
|
|
|
BuiltinFunc func;
|
|
|
|
bool exists = map_get(funcs, t->tok.Ident.Name, &func);
|
|
|
|
if (!exists) {
|
|
|
|
mark_err(&t->tok);
|
|
|
|
set_err("Unrecognized function: %s()", t->tok.Ident.Name);
|
2021-12-23 15:56:12 +01:00
|
|
|
return (ExprRet){0};
|
2021-12-22 16:09:52 +01:00
|
|
|
}
|
|
|
|
TokListItem *func_ident = t;
|
2021-12-23 15:56:12 +01:00
|
|
|
t = func_ident->next;
|
2021-12-22 16:09:52 +01:00
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
/* we want to try to eliminate function calls at runtime if possible */
|
2021-12-22 16:09:52 +01:00
|
|
|
bool eval_func_in_place = !func.side_effects;
|
2021-12-23 15:56:12 +01:00
|
|
|
|
|
|
|
size_t args_len = 0;
|
2021-12-23 22:25:41 +01:00
|
|
|
IRParam *args = NULL;
|
|
|
|
|
|
|
|
if (t->next->tok.kind == TokOp && t->next->tok.Op == OpRParen) {
|
|
|
|
/* no args */
|
|
|
|
toklist_del(toks, t->next, t->next); /* delete right parenthesis */
|
|
|
|
} else {
|
|
|
|
/* go through the arguments, evaluate them and put them into the args array */
|
|
|
|
size_t args_cap = 16;
|
|
|
|
args = xmalloc(sizeof(IRParam) * args_cap);
|
|
|
|
for (;;) {
|
|
|
|
if (args_len+1 > args_cap)
|
|
|
|
args = xrealloc(args, (args_cap *= 2));
|
|
|
|
IRParam a;
|
|
|
|
TRY_RET_ELSE(a = expr_into_irparam(out_ir, toks, funcs, &sc, t->next), (ExprRet){0}, free(args));
|
|
|
|
args[args_len++] = a;
|
|
|
|
if (a.kind != IRParamLiteral)
|
|
|
|
eval_func_in_place = false;
|
|
|
|
if (t->next->tok.kind == TokOp) {
|
|
|
|
if (t->next->tok.Op == OpComma) {
|
|
|
|
toklist_del(toks, t->next, t->next); /* delete right parenthesis */
|
|
|
|
continue;
|
|
|
|
} else if (t->next->tok.Op == OpRParen) {
|
|
|
|
toklist_del(toks, t->next, t->next); /* delete right parenthesis */
|
|
|
|
break;
|
|
|
|
}
|
2021-12-22 16:09:52 +01:00
|
|
|
}
|
2021-12-23 22:25:41 +01:00
|
|
|
mark_err(&t->next->tok);
|
|
|
|
set_err("Expected ',' or ')' after function argument");
|
|
|
|
free(args);
|
|
|
|
return (ExprRet){0};
|
2021-12-22 16:09:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
t = func_ident;
|
|
|
|
toklist_del(toks, t->next, t->next); /* delete left parenthesis */
|
|
|
|
|
|
|
|
if (func.n_args != args_len) {
|
2021-12-22 16:09:52 +01:00
|
|
|
mark_err(&func_ident->tok);
|
|
|
|
const char *plural = func.n_args == 1 ? "" : "s";
|
2021-12-23 15:56:12 +01:00
|
|
|
set_err("Function %s() takes %zu argument%s but got %zu", func.name, func.n_args, plural, args_len);
|
2021-12-23 22:25:41 +01:00
|
|
|
if (args)
|
|
|
|
free(args);
|
2021-12-23 15:56:12 +01:00
|
|
|
return (ExprRet){0};
|
2021-12-22 16:09:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (eval_func_in_place) {
|
2021-12-23 15:56:12 +01:00
|
|
|
/* evaluate the function in place */
|
2021-12-23 22:25:41 +01:00
|
|
|
Value *arg_vals = args_len ? xmalloc(sizeof(Value) * args_len) : NULL;
|
2021-12-23 15:56:12 +01:00
|
|
|
for (size_t i = 0; i < args_len; i++)
|
|
|
|
arg_vals[i] = args[i].Literal;
|
2021-12-25 12:32:52 +01:00
|
|
|
mark_err(&func_ident->tok);
|
2021-12-22 16:09:52 +01:00
|
|
|
func_ident->tok = (Tok) {
|
|
|
|
.kind = TokVal,
|
2021-12-23 15:56:12 +01:00
|
|
|
.Val = func.func(arg_vals),
|
2021-12-22 16:09:52 +01:00
|
|
|
};
|
2021-12-23 22:25:41 +01:00
|
|
|
if (arg_vals)
|
|
|
|
free(arg_vals);
|
|
|
|
if (args)
|
|
|
|
free(args);
|
2021-12-22 16:09:52 +01:00
|
|
|
} else {
|
2021-12-26 15:06:33 +01:00
|
|
|
/* function call IR instruction */
|
|
|
|
IRTok ir_tok = {
|
2021-12-22 16:09:52 +01:00
|
|
|
.ln = func_ident->tok.ln,
|
|
|
|
.col = func_ident->tok.col,
|
|
|
|
.instr = IRCallInternal,
|
|
|
|
.CallI = {
|
2021-12-26 15:06:33 +01:00
|
|
|
.ret_addr = 0,
|
2021-12-22 16:09:52 +01:00
|
|
|
.fid = func.fid,
|
|
|
|
.args = args,
|
|
|
|
},
|
2021-12-23 15:56:12 +01:00
|
|
|
};
|
|
|
|
|
2021-12-26 15:06:33 +01:00
|
|
|
/* return if we've just evaluated the last instruction */
|
|
|
|
ExprRet ret;
|
|
|
|
if (expr_flush_ir_and_maybe_return(out_ir, toks, ir_tok, start, &sc, func_ident, &ret))
|
|
|
|
return ret;
|
2021-12-22 16:09:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-23 21:06:49 +01:00
|
|
|
/* Collapse unary operation. */
|
|
|
|
if (perform_unary) {
|
|
|
|
Tok *v = &t->tok; /* what we want to perform the operation on */
|
2021-12-23 15:56:12 +01:00
|
|
|
t = t->prev; /* go back to the '-' sign */
|
|
|
|
toklist_del(toks, t->next, t->next); /* again, just removing the reference */
|
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
if (v->kind == TokVal) {
|
2021-12-23 21:06:49 +01:00
|
|
|
/* immediately perform operation */
|
2021-12-21 01:18:22 +01:00
|
|
|
t->tok.kind = TokVal;
|
2021-12-25 12:32:52 +01:00
|
|
|
mark_err(&t->tok);
|
2021-12-23 21:06:49 +01:00
|
|
|
TRY_RET(t->tok.Val = eval_unary(unary_op, &v->Val), (ExprRet){0});
|
2021-12-21 01:18:22 +01:00
|
|
|
} else {
|
2021-12-23 21:06:49 +01:00
|
|
|
/* unary IR instruction */
|
2021-12-21 01:18:22 +01:00
|
|
|
IRParam v_irparam;
|
2021-12-23 15:56:12 +01:00
|
|
|
TRY_RET(v_irparam = tok_to_irparam(&sc, v), (ExprRet){0});
|
2021-12-26 15:06:33 +01:00
|
|
|
IRTok ir_tok = {
|
2021-12-21 01:18:22 +01:00
|
|
|
.ln = t->tok.ln,
|
|
|
|
.col = t->tok.col,
|
2021-12-23 21:06:49 +01:00
|
|
|
.instr = unary_op,
|
2021-12-21 01:18:22 +01:00
|
|
|
.Unary = {
|
2021-12-26 15:06:33 +01:00
|
|
|
.addr = 0,
|
2021-12-21 01:18:22 +01:00
|
|
|
.val = v_irparam,
|
|
|
|
},
|
2021-12-23 15:56:12 +01:00
|
|
|
};
|
2021-12-21 01:18:22 +01:00
|
|
|
|
2021-12-26 15:06:33 +01:00
|
|
|
/* return if we've just evaluated the last instruction */
|
|
|
|
ExprRet ret;
|
|
|
|
if (expr_flush_ir_and_maybe_return(out_ir, toks, ir_tok, start, &sc, t, &ret))
|
|
|
|
return ret;
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find out operator precedence of l_op and r_op. */
|
|
|
|
int8_t l_op_prec;
|
|
|
|
Tok *l_op;
|
|
|
|
if (t == start) {
|
|
|
|
l_op_prec = PREC_DELIM;
|
|
|
|
l_op = NULL;
|
|
|
|
} else {
|
|
|
|
l_op = &t->prev->tok;
|
|
|
|
if (l_op->kind != TokOp) {
|
|
|
|
mark_err(l_op);
|
|
|
|
set_err("Expected operator");
|
2021-12-23 15:56:12 +01:00
|
|
|
return (ExprRet){0};
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
l_op_prec = op_prec[l_op->Op];
|
|
|
|
}
|
|
|
|
int8_t r_op_prec;
|
|
|
|
Tok *r_op = &t->next->tok;
|
|
|
|
if (r_op->kind != TokOp) {
|
|
|
|
mark_err(r_op);
|
|
|
|
set_err("Expected operator");
|
2021-12-23 15:56:12 +01:00
|
|
|
return (ExprRet){0};
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
r_op_prec = op_prec[r_op->Op];
|
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
/* If l_op and r_op are both delimiters, we don't have to evaluate
|
|
|
|
* anything. */
|
2021-12-21 01:18:22 +01:00
|
|
|
if (l_op_prec == PREC_DELIM && r_op_prec == PREC_DELIM) {
|
2021-12-23 15:56:12 +01:00
|
|
|
if (t->tok.kind == TokIdent) {
|
|
|
|
return (ExprRet){ .kind = ExprRetIdent };
|
|
|
|
} else if (t->tok.kind == TokVal) {
|
|
|
|
return (ExprRet){ .kind = ExprRetVal };
|
|
|
|
} else {
|
2021-12-21 11:55:53 +01:00
|
|
|
mark_err(&t->tok);
|
|
|
|
set_err("Expected literal or identifier");
|
2021-12-23 15:56:12 +01:00
|
|
|
return (ExprRet){0};
|
2021-12-21 17:09:03 +01:00
|
|
|
}
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
/* This is the operator precedence parser described above. */
|
2021-12-21 01:18:22 +01:00
|
|
|
if (r_op_prec > l_op_prec)
|
|
|
|
t = t->next->next;
|
|
|
|
else {
|
|
|
|
Tok *rhs = &t->tok;
|
|
|
|
if (rhs->kind != TokVal && rhs->kind != TokIdent) {
|
|
|
|
mark_err(rhs);
|
|
|
|
set_err("Expected literal or identifier");
|
2021-12-23 15:56:12 +01:00
|
|
|
return (ExprRet){0};
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
2021-12-23 15:56:12 +01:00
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
t = t->prev->prev;
|
2021-12-23 15:56:12 +01:00
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
Tok *lhs = &t->tok;
|
|
|
|
if (lhs->kind != TokVal && lhs->kind != TokIdent) {
|
|
|
|
mark_err(lhs);
|
|
|
|
set_err("Expected literal or identifier");
|
2021-12-23 15:56:12 +01:00
|
|
|
return (ExprRet){0};
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* delete the tokens that fall away from collapsing the expression
|
|
|
|
* (NOTE: only their references are deleted here, that's important
|
|
|
|
* because we're still using their values later on) */
|
2021-12-22 12:52:16 +01:00
|
|
|
toklist_del(toks, t->next, t->next->next);
|
2021-12-21 01:18:22 +01:00
|
|
|
|
2021-12-23 21:06:49 +01:00
|
|
|
bool swap_operands = false;
|
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
IRInstr instr;
|
|
|
|
switch (l_op->Op) {
|
|
|
|
case OpAdd: instr = IRAdd; break;
|
|
|
|
case OpSub: instr = IRSub; break;
|
|
|
|
case OpMul: instr = IRMul; break;
|
|
|
|
case OpDiv: instr = IRDiv; break;
|
2021-12-23 21:06:49 +01:00
|
|
|
case OpEq: instr = IREq; break;
|
2021-12-26 12:19:54 +01:00
|
|
|
case OpNeq: instr = IRNeq; break;
|
2021-12-23 21:06:49 +01:00
|
|
|
case OpLt: instr = IRLt; break;
|
|
|
|
case OpLe: instr = IRLe; break;
|
|
|
|
case OpGt: instr = IRLt; swap_operands = true; break;
|
|
|
|
case OpGe: instr = IRLe; swap_operands = true; break;
|
2021-12-23 21:42:09 +01:00
|
|
|
case OpAnd: instr = IRAnd; break;
|
|
|
|
case OpOr: instr = IROr; break;
|
2021-12-21 01:18:22 +01:00
|
|
|
default:
|
|
|
|
mark_err(l_op);
|
|
|
|
set_err("Unknown operation: '%s'", op_str[l_op->Op]);
|
2021-12-23 15:56:12 +01:00
|
|
|
return (ExprRet){0};
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
2021-12-23 15:56:12 +01:00
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
if (lhs->kind == TokVal && rhs->kind == TokVal) {
|
|
|
|
/* evaluate the constant expression immediately */
|
2021-12-23 21:06:49 +01:00
|
|
|
Value *lhs_val = swap_operands ? &rhs->Val : &lhs->Val;
|
|
|
|
Value *rhs_val = swap_operands ? &lhs->Val : &rhs->Val;
|
2021-12-21 01:18:22 +01:00
|
|
|
lhs->kind = TokVal;
|
2021-12-25 12:32:52 +01:00
|
|
|
mark_err(l_op);
|
2021-12-23 21:06:49 +01:00
|
|
|
TRY_RET(lhs->Val = eval_binary(instr, lhs_val, rhs_val), (ExprRet){0});
|
2021-12-21 01:18:22 +01:00
|
|
|
} else {
|
|
|
|
IRParam lhs_irparam, rhs_irparam;
|
2021-12-23 15:56:12 +01:00
|
|
|
TRY_RET(lhs_irparam = tok_to_irparam(&sc, lhs), (ExprRet){0});
|
|
|
|
TRY_RET(rhs_irparam = tok_to_irparam(&sc, rhs), (ExprRet){0});
|
|
|
|
|
2021-12-26 15:06:33 +01:00
|
|
|
/* binary IR instruction */
|
|
|
|
IRTok ir_tok = {
|
2021-12-21 01:18:22 +01:00
|
|
|
.ln = l_op->ln,
|
|
|
|
.col = l_op->col,
|
|
|
|
.instr = instr,
|
2021-12-23 20:10:02 +01:00
|
|
|
.Binary = {
|
2021-12-26 15:06:33 +01:00
|
|
|
.addr = 0,
|
2021-12-23 21:06:49 +01:00
|
|
|
.lhs = swap_operands ? rhs_irparam : lhs_irparam,
|
|
|
|
.rhs = swap_operands ? lhs_irparam : rhs_irparam,
|
2021-12-21 01:18:22 +01:00
|
|
|
},
|
2021-12-23 15:56:12 +01:00
|
|
|
};
|
2021-12-26 15:06:33 +01:00
|
|
|
|
|
|
|
/* return if we've just evaluated the last instruction */
|
|
|
|
ExprRet ret;
|
|
|
|
if (expr_flush_ir_and_maybe_return(out_ir, toks, ir_tok, start, &sc, t, &ret))
|
|
|
|
return ret;
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-23 15:56:12 +01:00
|
|
|
static void expr_into_addr(IRToks *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t, size_t addr) {
|
|
|
|
ExprRet r;
|
|
|
|
TRY(r = expr(out_ir, toks, funcs, parent_sc, t));
|
|
|
|
if (r.kind == ExprRetLastInstr) {
|
|
|
|
set_irtok_dest_addr(&r.LastInstr, addr);
|
|
|
|
irtoks_app(out_ir, r.LastInstr);
|
|
|
|
t->tok = (Tok){
|
|
|
|
.ln = t->tok.ln,
|
|
|
|
.col = t->tok.col,
|
|
|
|
.kind = TokIdent,
|
|
|
|
.Ident = {
|
|
|
|
.kind = IdentAddr,
|
|
|
|
.Addr = addr,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
} else if (r.kind == ExprRetVal || r.kind == ExprRetIdent) {
|
|
|
|
IRParam res;
|
|
|
|
TRY(res = tok_to_irparam(parent_sc, &t->tok));
|
|
|
|
irtoks_app(out_ir, (IRTok){
|
2021-12-25 12:32:52 +01:00
|
|
|
.ln = t->tok.ln,
|
|
|
|
.col = t->tok.col,
|
2021-12-23 15:56:12 +01:00
|
|
|
.instr = IRSet,
|
|
|
|
.Unary = {
|
|
|
|
.addr = addr,
|
|
|
|
.val = res,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
toklist_del(toks, t, t);
|
|
|
|
} else
|
|
|
|
ASSERT_UNREACHED();
|
|
|
|
}
|
|
|
|
|
|
|
|
static IRParam expr_into_irparam(IRToks *out_ir, TokList *toks, Map *funcs, Scope *parent_sc, TokListItem *t) {
|
|
|
|
ExprRet r;
|
|
|
|
TRY_RET(r = expr(out_ir, toks, funcs, parent_sc, t), (IRParam){0});
|
|
|
|
if (r.kind == ExprRetLastInstr) {
|
|
|
|
Scope sc = make_scope(parent_sc, false);
|
|
|
|
size_t addr = sc.mem_addr++;
|
|
|
|
set_irtok_dest_addr(&r.LastInstr, addr);
|
|
|
|
irtoks_app(out_ir, r.LastInstr);
|
|
|
|
return (IRParam){
|
|
|
|
.kind = IRParamAddr,
|
|
|
|
.Addr = addr,
|
|
|
|
};
|
|
|
|
} else if (r.kind == ExprRetVal || r.kind == ExprRetIdent) {
|
|
|
|
IRParam ret;
|
|
|
|
TRY_RET(ret = tok_to_irparam(parent_sc, &t->tok), (IRParam){0});
|
|
|
|
toklist_del(toks, t, t);
|
|
|
|
return ret;
|
|
|
|
} else
|
|
|
|
ASSERT_UNREACHED();
|
|
|
|
}
|
|
|
|
|
2021-12-26 19:18:52 +01:00
|
|
|
/* This WILL invalidate *from, so the caller should only call it on a
|
|
|
|
* TokListItem after any ones that are in use (e.g. skip_newlns(t->next)). */
|
|
|
|
static void skip_newlns(TokList *toks, TokListItem *from) {
|
|
|
|
TokListItem *curr = from;
|
|
|
|
while (curr->tok.kind == TokOp && curr->tok.Op == OpNewLn)
|
|
|
|
curr = curr->next;
|
|
|
|
if (curr != from)
|
|
|
|
toklist_del(toks, from, curr->prev);
|
|
|
|
}
|
|
|
|
|
2021-12-22 16:09:52 +01:00
|
|
|
static void stmt(IRToks *out_ir, TokList *toks, Map *funcs, Scope *sc, TokListItem *t) {
|
2021-12-21 01:18:22 +01:00
|
|
|
TokListItem *start = t;
|
2021-12-22 16:09:52 +01:00
|
|
|
if (t->tok.kind == TokIdent && t->tok.Ident.kind == IdentName && (t->next->tok.kind == TokDeclare || t->next->tok.kind == TokAssign)) {
|
2021-12-21 01:18:22 +01:00
|
|
|
char *name = t->tok.Ident.Name;
|
|
|
|
t = t->next;
|
|
|
|
if (t->tok.kind == TokDeclare) {
|
|
|
|
size_t addr = sc->mem_addr++;
|
2021-12-28 12:11:04 +01:00
|
|
|
TRY(expr_into_addr(out_ir, toks, funcs, sc, t->next, addr));
|
2021-12-21 01:18:22 +01:00
|
|
|
bool replaced = map_insert(&sc->ident_addrs, name, &addr);
|
|
|
|
if (replaced) {
|
|
|
|
mark_err(&start->tok);
|
|
|
|
set_err("'%s' already declared in this scope", name);
|
|
|
|
return;
|
|
|
|
}
|
2021-12-21 11:40:49 +01:00
|
|
|
} else if (t->tok.kind == TokAssign) {
|
|
|
|
size_t addr;
|
|
|
|
TRY(addr = get_ident_addr(sc, name, &start->tok));
|
2021-12-23 15:56:12 +01:00
|
|
|
TRY(expr_into_addr(out_ir, toks, funcs, sc, t->next, addr));
|
2021-12-22 16:09:52 +01:00
|
|
|
} else
|
|
|
|
ASSERT_UNREACHED();
|
2021-12-21 13:59:08 +01:00
|
|
|
} else if (t->tok.kind == TokOp && t->tok.Op == OpLCurl) {
|
2021-12-21 17:09:03 +01:00
|
|
|
Scope inner_sc = make_scope(sc, true);
|
2021-12-21 13:59:08 +01:00
|
|
|
for (;;) {
|
2021-12-26 19:18:52 +01:00
|
|
|
skip_newlns(toks, t->next);
|
2021-12-21 13:59:08 +01:00
|
|
|
if (t->next->tok.kind == TokOp) {
|
|
|
|
if (t->next->tok.Op == OpEOF) {
|
|
|
|
term_scope(&inner_sc);
|
|
|
|
mark_err(&start->tok);
|
|
|
|
set_err("Unclosed '{'");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (t->next->tok.Op == OpRCurl)
|
|
|
|
break;
|
|
|
|
}
|
2021-12-22 16:09:52 +01:00
|
|
|
TRY_ELSE(stmt(out_ir, toks, funcs, &inner_sc, t->next), term_scope(&inner_sc));
|
2021-12-21 13:59:08 +01:00
|
|
|
}
|
|
|
|
term_scope(&inner_sc);
|
2021-12-21 15:02:53 +01:00
|
|
|
t = t->next;
|
|
|
|
} else if (t->tok.kind == TokWhile) {
|
|
|
|
/* How while is generally implemented in IR:
|
|
|
|
* 0: jmp to 3
|
|
|
|
* 1: some_code
|
|
|
|
* 2: some_code
|
|
|
|
* 3: some stuff evaluating condition xyz
|
|
|
|
* 4: jmp to 1 if condition xyz is met
|
|
|
|
* */
|
|
|
|
|
2021-12-22 12:52:16 +01:00
|
|
|
/* add initial jmp instruction */
|
|
|
|
size_t jmp_instr_iaddr = out_ir->len;
|
|
|
|
irtoks_app(out_ir, (IRTok){
|
2021-12-21 15:02:53 +01:00
|
|
|
.ln = t->tok.ln,
|
|
|
|
.col = t->tok.col,
|
|
|
|
.instr = IRJmp,
|
|
|
|
.Jmp = {
|
|
|
|
.iaddr = 0, /* unknown for now */
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
2021-12-22 12:52:16 +01:00
|
|
|
/* parse condition */
|
|
|
|
IRToks cond_ir;
|
|
|
|
irtoks_init_short(&cond_ir);
|
2021-12-23 15:56:12 +01:00
|
|
|
IRParam cond;
|
|
|
|
TRY_ELSE(cond = expr_into_irparam(&cond_ir, toks, funcs, sc, t->next), irtoks_term(&cond_ir));
|
|
|
|
|
2021-12-23 19:58:00 +01:00
|
|
|
/* parse loop body */
|
2021-12-26 19:23:42 +01:00
|
|
|
skip_newlns(toks, t->next);
|
2021-12-23 19:58:00 +01:00
|
|
|
TRY_ELSE(stmt(out_ir, toks, funcs, sc, t->next), irtoks_term(&cond_ir));
|
|
|
|
|
|
|
|
/* finally we know where the jmp from the beginning has to jump to */
|
|
|
|
out_ir->toks[jmp_instr_iaddr].Jmp.iaddr = out_ir->len;
|
|
|
|
|
|
|
|
/* append condition IR to program IR, then terminate condition IR stream */
|
|
|
|
irtoks_eat_irtoks(out_ir, &cond_ir, out_ir->len-1);
|
|
|
|
|
2021-12-22 12:52:16 +01:00
|
|
|
/* add conditional jump */
|
2021-12-23 19:58:00 +01:00
|
|
|
irtoks_app(out_ir, (IRTok){
|
2021-12-23 15:56:12 +01:00
|
|
|
.ln = t->next->tok.ln,
|
|
|
|
.col = t->next->tok.col,
|
2021-12-21 15:02:53 +01:00
|
|
|
.instr = IRJnz,
|
|
|
|
.CJmp = {
|
|
|
|
.iaddr = jmp_instr_iaddr + 1,
|
2021-12-23 15:56:12 +01:00
|
|
|
.condition = cond,
|
2021-12-21 15:02:53 +01:00
|
|
|
},
|
|
|
|
});
|
2021-12-22 12:52:16 +01:00
|
|
|
|
2021-12-23 19:58:00 +01:00
|
|
|
t = t->next;
|
|
|
|
} else if (t->tok.kind == TokIf) {
|
|
|
|
/* How if is generally implemented in IR:
|
|
|
|
* 0: some stuff evaluating condition xyz
|
|
|
|
* 1: jmp to 5 if condition xyz is met
|
|
|
|
* 2: some_code in else
|
|
|
|
* 4: jmp to 6
|
|
|
|
* 5: some_code in if
|
|
|
|
* */
|
2021-12-22 12:52:16 +01:00
|
|
|
|
2021-12-23 19:58:00 +01:00
|
|
|
/* parse condition */
|
|
|
|
IRParam cond;
|
|
|
|
TRY(cond = expr_into_irparam(out_ir, toks, funcs, sc, t->next));
|
2021-12-22 12:52:16 +01:00
|
|
|
|
2021-12-23 19:58:00 +01:00
|
|
|
/* add conditional jmp instruction */
|
|
|
|
size_t if_cjmp_instr_iaddr = out_ir->len;
|
|
|
|
irtoks_app(out_ir, (IRTok){
|
|
|
|
.ln = t->tok.ln,
|
|
|
|
.col = t->tok.col,
|
|
|
|
.instr = IRJnz,
|
|
|
|
.CJmp = {
|
|
|
|
.iaddr = 0, /* unknown for now */
|
|
|
|
.condition = cond,
|
|
|
|
},
|
|
|
|
});
|
2021-12-22 16:09:52 +01:00
|
|
|
|
2021-12-23 19:58:00 +01:00
|
|
|
/* parse if body */
|
2021-12-26 19:23:42 +01:00
|
|
|
skip_newlns(toks, t->next);
|
2021-12-23 19:58:00 +01:00
|
|
|
IRToks if_body;
|
|
|
|
irtoks_init_short(&if_body);
|
|
|
|
TRY_ELSE(stmt(&if_body, toks, funcs, sc, t->next), irtoks_term(&if_body));
|
|
|
|
|
2021-12-26 19:18:52 +01:00
|
|
|
skip_newlns(toks, t->next);
|
2021-12-23 19:58:00 +01:00
|
|
|
if (t->next->tok.kind == TokElse) {
|
|
|
|
toklist_del(toks, t->next, t->next);
|
|
|
|
|
|
|
|
/* parse and add else body */
|
2021-12-26 19:23:42 +01:00
|
|
|
skip_newlns(toks, t->next);
|
2021-12-23 19:58:00 +01:00
|
|
|
TRY_ELSE(stmt(out_ir, toks, funcs, sc, t->next), irtoks_term(&if_body));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* add jmp instruction to jump back to common code */
|
|
|
|
size_t else_jmp_instr_iaddr = out_ir->len;
|
|
|
|
irtoks_app(out_ir, (IRTok){
|
|
|
|
.ln = t->tok.ln,
|
|
|
|
.col = t->tok.col,
|
|
|
|
.instr = IRJmp,
|
|
|
|
.Jmp = {
|
|
|
|
.iaddr = 0, /* unknown for now */
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
/* set if condition jmp target */
|
|
|
|
out_ir->toks[if_cjmp_instr_iaddr].CJmp.iaddr = out_ir->len;
|
|
|
|
|
|
|
|
/* add if body */
|
|
|
|
irtoks_eat_irtoks(out_ir, &if_body, out_ir->len-1);
|
|
|
|
|
|
|
|
/* set else jmp target */
|
|
|
|
out_ir->toks[else_jmp_instr_iaddr].CJmp.iaddr = out_ir->len;
|
2021-12-22 16:09:52 +01:00
|
|
|
} else {
|
|
|
|
/* assume expression */
|
2021-12-23 15:56:12 +01:00
|
|
|
TRY(expr_into_irparam(out_ir, toks, funcs, sc, t));
|
|
|
|
return;
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
2021-12-22 12:52:16 +01:00
|
|
|
toklist_del(toks, start, t);
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
|
2021-12-22 16:09:52 +01:00
|
|
|
IRToks parse(TokList *toks, BuiltinFunc *builtin_funcs, size_t n_builtin_funcs) {
|
2021-12-23 19:58:00 +01:00
|
|
|
bf = builtin_funcs;
|
|
|
|
|
2021-12-22 16:09:52 +01:00
|
|
|
Map funcs;
|
|
|
|
map_init(&funcs, sizeof(BuiltinFunc));
|
|
|
|
for (size_t i = 0; i < n_builtin_funcs; i++) {
|
|
|
|
builtin_funcs[i].fid = i;
|
|
|
|
bool replaced = map_insert(&funcs, builtin_funcs[i].name, &builtin_funcs[i]);
|
|
|
|
if (replaced) {
|
|
|
|
err_ln = 0; err_col = 0;
|
|
|
|
set_err("Builtin function %s() declared more than once", builtin_funcs[i].name);
|
|
|
|
map_term(&funcs);
|
|
|
|
return (IRToks){0};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-21 01:18:22 +01:00
|
|
|
IRToks ir;
|
2021-12-22 12:52:16 +01:00
|
|
|
irtoks_init_long(&ir);
|
2021-12-21 17:09:03 +01:00
|
|
|
Scope global_scope = make_scope(NULL, true);
|
2021-12-21 01:18:22 +01:00
|
|
|
for (;;) {
|
2021-12-26 19:18:52 +01:00
|
|
|
skip_newlns(toks, toks->begin);
|
2021-12-21 01:18:22 +01:00
|
|
|
if (toks->begin->tok.kind == TokOp && toks->begin->tok.Op == OpEOF)
|
|
|
|
break;
|
2021-12-22 16:09:52 +01:00
|
|
|
TRY_RET_ELSE(stmt(&ir, toks, &funcs, &global_scope, toks->begin), ir,
|
|
|
|
{ term_scope(&global_scope); map_term(&funcs); });
|
2021-12-21 01:18:22 +01:00
|
|
|
}
|
|
|
|
term_scope(&global_scope);
|
2021-12-22 16:09:52 +01:00
|
|
|
map_term(&funcs);
|
2021-12-21 01:18:22 +01:00
|
|
|
return ir;
|
|
|
|
}
|