233 lines
4.5 KiB
C
233 lines
4.5 KiB
C
#include "lex.h"
|
|
|
|
#include "util.h"
|
|
|
|
typedef struct Pos {
|
|
size_t ln, col; /* current position */
|
|
size_t m_ln, m_col; /* marked position */
|
|
} Pos;
|
|
|
|
static void consume(Pos *p, char c);
|
|
static void emit(TokList *toks, const Pos *p, Tok t);
|
|
static void mark(Pos *p);
|
|
static void mark_err(const Pos *p);
|
|
|
|
static void consume(Pos *p, char c) {
|
|
if (c == '\n') {
|
|
p->ln++;
|
|
p->col = 1;
|
|
} else
|
|
p->col++;
|
|
}
|
|
|
|
static void emit(TokList *toks, const Pos *p, Tok t) {
|
|
t.ln = p->m_ln;
|
|
t.col = p->m_col;
|
|
toklist_append(toks, t);
|
|
}
|
|
|
|
static void mark(Pos *p) {
|
|
p->m_ln = p->ln;
|
|
p->m_col = p->col;
|
|
}
|
|
|
|
static void mark_err(const Pos *p) {
|
|
err_ln = p->m_ln;
|
|
err_col = p->m_col;
|
|
}
|
|
|
|
TokList lex(const char *s) {
|
|
TokList toks;
|
|
toklist_init(&toks);
|
|
Pos pos = { .ln = 1, .col = 1 };
|
|
for (;;) {
|
|
mark(&pos);
|
|
mark_err(&pos);
|
|
|
|
if (IS_ALPHA(s[0])) {
|
|
size_t i = 1;
|
|
const char *start = s;
|
|
consume(&pos, *(s++));
|
|
while (IS_ALNUM(s[0])) {
|
|
consume(&pos, *(s++));
|
|
i++;
|
|
}
|
|
if (streq_0_n("if", start, i))
|
|
emit(&toks, &pos, (Tok){ .kind = TokIf });
|
|
else if (streq_0_n("while", start, i))
|
|
emit(&toks, &pos, (Tok){ .kind = TokWhile });
|
|
else {
|
|
emit(&toks, &pos, (Tok){
|
|
.kind = TokIdent,
|
|
.Ident = {
|
|
.kind = IdentName,
|
|
.Name = sndup(start, i),
|
|
},
|
|
});
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (IS_NUM(s[0]) || s[0] == '.') {
|
|
const char *start = s;
|
|
size_t base = 10;
|
|
bool num_end = false;
|
|
bool is_float = false;
|
|
if (s[0] == '0') {
|
|
consume(&pos, *(s++));
|
|
if (s[0] == 'x' || s[0] == 'X') {
|
|
base = 16;
|
|
consume(&pos, *(s++));
|
|
start = s;
|
|
} else if (s[0] == 'b' || s[0] == 'B') {
|
|
base = 2;
|
|
consume(&pos, *(s++));
|
|
start = s;
|
|
} else if (!IS_NUM(s[0]) && s[0] != '.')
|
|
num_end = true;
|
|
}
|
|
if (!num_end) {
|
|
for (;;) {
|
|
if (s[0] == '.') {
|
|
if (is_float) {
|
|
mark(&pos);
|
|
mark_err(&pos);
|
|
set_err("Too many decimal points in number");
|
|
return toks;
|
|
}
|
|
if (base != 10) {
|
|
set_err("Only decimal floats are supported");
|
|
return toks;
|
|
}
|
|
is_float = true;
|
|
} else if (!IS_ALNUM(s[0]))
|
|
break;
|
|
consume(&pos, *(s++));
|
|
}
|
|
}
|
|
|
|
if (is_float) {
|
|
ssize_t endpos;
|
|
double num = stod(start, s - start, &endpos);
|
|
if (endpos != -1) {
|
|
err_col += endpos;
|
|
set_err("Invalid decimal float character: '%c'", start[endpos]);
|
|
return toks;
|
|
}
|
|
|
|
emit(&toks, &pos, (Tok){
|
|
.kind = TokVal,
|
|
.Val = {
|
|
.type = {
|
|
.kind = TypeFloat,
|
|
},
|
|
.Float = num,
|
|
},
|
|
});
|
|
} else {
|
|
ssize_t endpos;
|
|
intmax_t num = stoimax(start, s - start, base, &endpos);
|
|
if (endpos != -1) {
|
|
err_col += endpos;
|
|
set_err("Invalid base %zu numerical character: '%c'", base, start[endpos]);
|
|
return toks;
|
|
}
|
|
|
|
emit(&toks, &pos, (Tok){
|
|
.kind = TokVal,
|
|
.Val = {
|
|
.type = {
|
|
.kind = TypeInt,
|
|
},
|
|
.Int = num,
|
|
},
|
|
});
|
|
}
|
|
continue;
|
|
}
|
|
|
|
switch (s[0]) {
|
|
case 0:
|
|
goto end_of_file;
|
|
case ' ':
|
|
case '\t':
|
|
break;
|
|
case '\n':
|
|
emit(&toks, &pos, (Tok){
|
|
.kind = TokOp,
|
|
.Op = OpNewLn,
|
|
});
|
|
break;
|
|
case ':':
|
|
consume(&pos, *(s++));
|
|
if (s[0] == '=') {
|
|
emit(&toks, &pos, (Tok){ .kind = TokDeclare });
|
|
} else {
|
|
set_err("Expected ':='");
|
|
return toks;
|
|
}
|
|
break;
|
|
case '=':
|
|
emit(&toks, &pos, (Tok){ .kind = TokAssign });
|
|
break;
|
|
case '{':
|
|
case '}':
|
|
case '(':
|
|
case ')':
|
|
case ',':
|
|
case '+':
|
|
case '-':
|
|
case '*':
|
|
emit(&toks, &pos, (Tok){
|
|
.kind = TokOp,
|
|
.Op = s[0],
|
|
});
|
|
break;
|
|
case '/':
|
|
consume(&pos, *(s++));
|
|
if (s[0] == '/') {
|
|
consume(&pos, *(s++));
|
|
while (s[0] != '\n') {
|
|
if (s[0] == 0)
|
|
goto end_of_file;
|
|
consume(&pos, *(s++));
|
|
}
|
|
} else if (s[0] == '*') {
|
|
size_t depth = 1;
|
|
while (depth) {
|
|
consume(&pos, *(s++));
|
|
if (s[0] == '/') {
|
|
consume(&pos, *(s++));
|
|
if (s[0] == '*')
|
|
depth++;
|
|
} else if (s[0] == '*') {
|
|
consume(&pos, *(s++));
|
|
if (s[0] == '/')
|
|
depth--;
|
|
} else if (s[0] == 0) {
|
|
set_err("Unclosed comment");
|
|
return toks;
|
|
}
|
|
}
|
|
consume(&pos, *(s++));
|
|
} else {
|
|
emit(&toks, &pos, (Tok){
|
|
.kind = TokOp,
|
|
.Op = '/',
|
|
});
|
|
}
|
|
continue;
|
|
default:
|
|
set_err("Unrecognized character: '%c'", s[0]);
|
|
return toks;
|
|
}
|
|
consume(&pos, *(s++));
|
|
}
|
|
end_of_file:
|
|
emit(&toks, &pos, (Tok){
|
|
.kind = TokOp,
|
|
.Op = OpEOF,
|
|
});
|
|
return toks;
|
|
}
|