lang/lex.c

235 lines
4.6 KiB
C
Raw Normal View History

2021-12-21 01:18:22 +01:00
#include "lex.h"
#include "util.h"
typedef struct Pos {
size_t ln, col; /* current position */
size_t m_ln, m_col; /* marked position */
} Pos;
static void consume(Pos *p, char c);
static void emit(TokList *toks, const Pos *p, Tok t);
static void mark(Pos *p);
static void mark_err(const Pos *p);
static void consume(Pos *p, char c) {
if (c == '\n') {
p->ln++;
p->col = 1;
} else
p->col++;
}
static void emit(TokList *toks, const Pos *p, Tok t) {
t.ln = p->m_ln;
t.col = p->m_col;
toklist_append(toks, t);
}
static void mark(Pos *p) {
p->m_ln = p->ln;
p->m_col = p->col;
}
static void mark_err(const Pos *p) {
err_ln = p->m_ln;
err_col = p->m_col;
}
TokList lex(const char *s) {
TokList toks;
toklist_init(&toks);
Pos pos = { .ln = 1, .col = 1 };
for (;;) {
mark(&pos);
mark_err(&pos);
if (IS_ALPHA(s[0])) {
size_t i = 1;
const char *start = s;
consume(&pos, *(s++));
while (IS_ALNUM(s[0])) {
consume(&pos, *(s++));
i++;
}
if (streq_0_n("if", start, i))
emit(&toks, &pos, (Tok){ .kind = TokIf });
else if (streq_0_n("else", start, i))
emit(&toks, &pos, (Tok){ .kind = TokElse });
2021-12-21 01:18:22 +01:00
else if (streq_0_n("while", start, i))
emit(&toks, &pos, (Tok){ .kind = TokWhile });
else {
emit(&toks, &pos, (Tok){
.kind = TokIdent,
.Ident = {
.kind = IdentName,
.Name = psndup(toks.p, start, i),
2021-12-21 01:18:22 +01:00
},
});
}
continue;
}
if (IS_NUM(s[0]) || s[0] == '.') {
const char *start = s;
size_t base = 10;
bool num_end = false;
bool is_float = false;
if (s[0] == '0') {
consume(&pos, *(s++));
if (s[0] == 'x' || s[0] == 'X') {
base = 16;
consume(&pos, *(s++));
start = s;
} else if (s[0] == 'b' || s[0] == 'B') {
base = 2;
consume(&pos, *(s++));
start = s;
} else if (!IS_NUM(s[0]) && s[0] != '.')
num_end = true;
}
if (!num_end) {
for (;;) {
if (s[0] == '.') {
if (is_float) {
mark(&pos);
mark_err(&pos);
set_err("Too many decimal points in number");
return toks;
}
if (base != 10) {
set_err("Only decimal floats are supported");
return toks;
}
is_float = true;
} else if (!IS_ALNUM(s[0]))
break;
consume(&pos, *(s++));
}
}
if (is_float) {
ssize_t endpos;
double num = stod(start, s - start, &endpos);
if (endpos != -1) {
err_col += endpos;
set_err("Invalid decimal float character: '%c'", start[endpos]);
return toks;
}
emit(&toks, &pos, (Tok){
.kind = TokVal,
.Val = {
.type = {
.kind = TypeFloat,
},
.Float = num,
},
});
} else {
ssize_t endpos;
intmax_t num = stoimax(start, s - start, base, &endpos);
if (endpos != -1) {
err_col += endpos;
set_err("Invalid base %zu numerical character: '%c'", base, start[endpos]);
return toks;
}
emit(&toks, &pos, (Tok){
.kind = TokVal,
.Val = {
.type = {
.kind = TypeInt,
},
.Int = num,
},
});
}
continue;
}
switch (s[0]) {
case 0:
goto end_of_file;
case ' ':
case '\t':
break;
case '\n':
emit(&toks, &pos, (Tok){
.kind = TokOp,
.Op = OpNewLn,
});
break;
case ':':
consume(&pos, *(s++));
if (s[0] == '=') {
emit(&toks, &pos, (Tok){ .kind = TokDeclare });
} else {
set_err("Expected ':='");
return toks;
}
break;
case '=':
emit(&toks, &pos, (Tok){ .kind = TokAssign });
break;
case '{':
case '}':
case '(':
case ')':
case ',':
case '+':
case '-':
case '*':
emit(&toks, &pos, (Tok){
.kind = TokOp,
.Op = s[0],
});
break;
case '/':
consume(&pos, *(s++));
if (s[0] == '/') {
consume(&pos, *(s++));
while (s[0] != '\n') {
if (s[0] == 0)
goto end_of_file;
consume(&pos, *(s++));
}
} else if (s[0] == '*') {
size_t depth = 1;
while (depth) {
consume(&pos, *(s++));
if (s[0] == '/') {
consume(&pos, *(s++));
if (s[0] == '*')
depth++;
} else if (s[0] == '*') {
consume(&pos, *(s++));
if (s[0] == '/')
depth--;
} else if (s[0] == 0) {
set_err("Unclosed comment");
return toks;
}
}
consume(&pos, *(s++));
} else {
emit(&toks, &pos, (Tok){
.kind = TokOp,
.Op = '/',
});
}
continue;
default:
set_err("Unrecognized character: '%c'", s[0]);
return toks;
}
consume(&pos, *(s++));
}
end_of_file:
emit(&toks, &pos, (Tok){
.kind = TokOp,
.Op = OpEOF,
});
return toks;
}