diff --git a/ir.c b/ir.c index 0b7d233..5158c14 100644 --- a/ir.c +++ b/ir.c @@ -72,32 +72,11 @@ void irtoks_eat_irtoks(IRToks *v, IRToks *other, size_t jmp_offset) { free(other->toks); } -static void print_val(const Value *v); static void print_irparam(const IRParam *p); -static void print_val(const Value *v) { - switch (v->type.kind) { - case TypeFloat: - printf("%f", v->Float); - break; - case TypeInt: - printf("%zd", v->Int); - break; - case TypeBool: - printf("%s", v->Bool ? "true" : "false"); - break; - case TypeChar: - printf("'%c'", v->Char); - break; - default: - printf("(unknown type)"); - break; - } -} - static void print_irparam(const IRParam *p) { if (p->kind == IRParamLiteral) { - print_val(&p->Literal); + print_value(&p->Literal, false); } else if (p->kind == IRParamAddr) { printf("%%%zd", p->Addr); } diff --git a/lex.c b/lex.c index 2e57395..3f3d274 100644 --- a/lex.c +++ b/lex.c @@ -54,7 +54,7 @@ static char get_esc_char(char c) { } } -TokList lex(const char *s) { +TokList lex(const char *s, Pool *static_vars) { TokList toks; toklist_init(&toks); Pos pos = { .ln = 1, .col = 1 }; @@ -299,6 +299,52 @@ TokList lex(const char *s) { emit(&toks, &pos, (Tok){ .kind = TokVal, .Val = { .type = { .kind = TypeChar, }, .Char = c, }, }); break; } + case '"': { + consume(&pos, *(s++)); + const char *start = s; + Pos start_pos = pos; + size_t size = 0; + + /* count the string size before allocating */ + while (s[0] != '"') { + if (!s[0]) { + set_err("Unexpected EOF in string literal"); + return toks; + } else if (s[0] == '\\') + consume(&pos, *(s++)); + consume(&pos, *(s++)); + size++; + } + + /* go through the actual string */ + s = start; + pos = start_pos; + char *str = pool_alloc(static_vars, type_size[TypeChar] * size); + for (size_t i = 0; i < size; i++) { + char c = s[0]; + if (c == '\\') { + consume(&pos, *(s++)); + c = get_esc_char(s[0]); + if (!c) { + set_err("Unrecognized escape sequence: '\\%c'", c); + return toks; + } + } + consume(&pos, *(s++)); + str[i] = c; + } + emit(&toks, &pos, (Tok){ .kind = TokVal, .Val = { + .type.kind = TypeArr, + .Arr = { + .is_string = true, + .type.kind = TypeChar, + .vals = str, + .len = size, + .cap = size, + }, + },}); + break; + } default: set_err("Unrecognized character: '%c'", s[0]); return toks; diff --git a/lex.h b/lex.h index b57ac35..bc60e54 100644 --- a/lex.h +++ b/lex.h @@ -3,6 +3,6 @@ #include "tok.h" -TokList lex(const char *s); +TokList lex(const char *s, Pool *static_vars); #endif /* LEX_H */ diff --git a/main.c b/main.c index e80bddd..8b196a2 100644 --- a/main.c +++ b/main.c @@ -34,15 +34,7 @@ static void die(const char *fmt, ...) { } static Value fn_put(Value *args) { - switch (args[0].type.kind) { - case TypeVoid: printf("(void)"); break; - case TypeFloat: printf("%f", args[0].Float); break; - case TypeInt: printf("%zd", args[0].Int); break; - case TypeBool: printf("%s", args[0].Bool ? "true" : "false"); break; - case TypeChar: printf("%c", args[0].Char); break; - default: - ASSERT_UNREACHED(); - } + print_value(&args[0], true); return (Value){0}; } @@ -136,9 +128,11 @@ int main(int argc, const char **argv) { } fclose(fp); /* lex source file */ - TokList tokens = lex(file); + Pool *static_vars = pool_new(4096); + TokList tokens = lex(file, static_vars); if (err) { toklist_term(&tokens); + pool_term(static_vars); free(file); fprintf(stderr, C_IRED "Lexer error" C_RESET " in " C_CYAN "%s" C_RESET ":%zu:%zu: %s\n", filename, err_ln, err_col, errbuf); return 1; @@ -158,6 +152,7 @@ int main(int argc, const char **argv) { if (err) { irtoks_term(&ir); toklist_term(&tokens); + pool_term(static_vars); fprintf(stderr, C_IRED "Parser error" C_RESET " in " C_CYAN "%s" C_RESET ":%zu:%zu: %s\n", filename, err_ln, err_col, errbuf); return 1; } @@ -169,9 +164,11 @@ int main(int argc, const char **argv) { run(&ir, funcs); if (err) { irtoks_term(&ir); + pool_term(static_vars); fprintf(stderr, C_IRED "Runtime error" C_RESET " in " C_CYAN "%s" C_RESET ":%zu:%zu: %s\n", filename, err_ln, err_col, errbuf); return 1; } } irtoks_term(&ir); + pool_term(static_vars); } diff --git a/tok.c b/tok.c index 33bee36..f76d69e 100644 --- a/tok.c +++ b/tok.c @@ -5,6 +5,75 @@ #include "util.h" +size_t type_size[TypeEnumSize] = { + [TypeVoid] = 0, + [TypeFloat] = sizeof(((Value*)NULL)->Float), + [TypeInt] = sizeof(((Value*)NULL)->Int), + [TypeBool] = sizeof(((Value*)NULL)->Bool), + [TypeChar] = sizeof(((Value*)NULL)->Char), + [TypeArr] = sizeof(((Value*)NULL)->Arr), +}; + +void print_value(const Value *v, bool raw) { + switch (v->type.kind) { + case TypeVoid: + printf("(void)"); + break; + case TypeFloat: + printf("%f", v->Float); + break; + case TypeInt: + printf("%zd", v->Int); + break; + case TypeBool: + printf("%s", v->Bool ? "true" : "false"); + break; + case TypeChar: + if (raw) + printf("%c", v->Char); + else { + const char *esc = unescape_char(v->Char); + if (esc) printf("'%s'", esc); + else printf("'%c'", v->Char); + } + break; + case TypeArr: + if (v->Arr.is_string) { + if (v->Arr.type.kind != TypeChar) + ASSERT_UNREACHED(); + char *str = v->Arr.vals; + if (!raw) + printf("\""); + for (size_t i = 0; i < v->Arr.len; i++) { + char c = str[i]; + if (raw) + printf("%c", c); + else { + const char *esc = unescape_char(c); + if (esc) printf("%s", esc); + else printf("%c", c); + } + } + if (!raw) + printf("\""); + } else { + printf("["); + for (size_t i = 0;; i++) { + size_t ty_sz = type_size[v->Arr.type.kind]; + Value ty_val = { .type = v->Arr.type }; + memcpy(&ty_val.Void, (uint8_t*)v->Arr.vals + ty_sz * i, ty_sz); + print_value(&ty_val, false); + if (i == v->Arr.len-1) break; + printf(", "); + } + printf("]"); + } + break; + default: + ASSERT_UNREACHED(); + } +} + int8_t op_prec[OperatorEnumSize] = { [OpEOF] = PREC_DELIM, [OpNewLn] = PREC_DELIM, @@ -108,24 +177,9 @@ void print_toks(TokList *l) { printf(": " C_ICYAN "%s" C_RESET, op_str[i->tok.Op]); break; case TokVal: - printf(C_IYELLOW "Val" C_RESET); - switch (i->tok.Val.type.kind) { - case TypeFloat: - printf(": " C_ICYAN "%f" C_RESET, i->tok.Val.Float); - break; - case TypeInt: - printf(": " C_ICYAN "%zd" C_RESET, i->tok.Val.Int); - break; - case TypeBool: - printf(": " C_ICYAN "%s" C_RESET, i->tok.Val.Bool ? "true" : "false"); - break; - case TypeChar: - printf(": " C_ICYAN "'%c'" C_RESET, i->tok.Val.Char); - break; - default: - printf(" " C_ICYAN "(unknown type)" C_RESET); - break; - } + printf(C_IYELLOW "Val" C_RESET ": " C_ICYAN); + print_value(&i->tok.Val, false); + printf(C_RESET); break; case TokIdent: printf(C_IYELLOW "Ident" C_RESET); diff --git a/tok.h b/tok.h index 5790756..1380ab6 100644 --- a/tok.h +++ b/tok.h @@ -13,23 +13,36 @@ typedef struct Type { TypeInt, TypeBool, TypeChar, + TypeArr, + TypeEnumSize, } kind; /*union { };*/ } Type; +extern size_t type_size[TypeEnumSize]; + typedef struct Value { Type type; union { + pseudo_void Void; double Float; ssize_t Int; bool Bool; char Char; + struct { + bool is_string : 1; + Type type; + void *vals; + size_t len, cap; + } Arr; }; } Value; +void print_value(const Value *v, bool raw); + enum Operator { OpLCurl = '{', OpRCurl = '}', diff --git a/util.c b/util.c index ed12f57..b16242b 100644 --- a/util.c +++ b/util.c @@ -168,6 +168,23 @@ double stod(const char *s, size_t n, ssize_t *endpos) { return res; } +const char *unescape_char(char c) { + switch (c) { + case '\a': return "\\a"; + case '\b': return "\\b"; + case '\033': return "\\e"; + case '\f': return "\\f"; + case '\n': return "\\n"; + case '\r': return "\\r"; + case '\t': return "\\t"; + case '\v': return "\\v"; + case '\\': return "\\\\"; + case '\'': return "\\'"; + case '"': return "\\\""; + default: return NULL; + } +} + char *mreadfile(FILE *fp) { if (fseek(fp, 0l, SEEK_END) == -1) return NULL; diff --git a/util.h b/util.h index a985dd1..680d48f 100644 --- a/util.h +++ b/util.h @@ -8,6 +8,8 @@ #include #include +typedef uint8_t pseudo_void; + /* some ANSI color codes */ #define C_RED "\x1b[31m" #define C_GREEN "\x1b[32m" @@ -67,6 +69,8 @@ char *psndup(Pool *p, const char *s, size_t n); intmax_t stoimax(const char *s, size_t n, size_t base, ssize_t *endpos /* -1 on success */); /* convert a non-null-terminated string to a double */ double stod(const char *s, size_t n, ssize_t *endpos /* -1 on success */); +/* return the escape sequence for a given character; return NULL if there is none */ +const char *unescape_char(char c); /* sets errno on failure */ char *mreadfile(FILE *fp);