From 63af3e907b715329b968582cb3eb74e1a42e29d9 Mon Sep 17 00:00:00 2001 From: r4 Date: Tue, 21 Dec 2021 11:40:49 +0100 Subject: [PATCH] add assignment operator and unify memory pools The unification of memory pools also fixed some memory leaks and hopefully reduced the mallocs of identifier strings significantly by giving them the same pool as the token stream. --- Makefile | 8 +++++++- example.script | 12 +++++++++--- lex.c | 2 +- map.c | 2 +- map.h | 2 +- parse.c | 38 +++++++++++++++++++++++++------------- pool_test.c | 18 ++++++++++++++++++ tok.c | 27 ++++----------------------- tok.h | 7 +++---- util.c | 39 +++++++++++++++++++++++++++++++++++++++ util.h | 14 +++++++++++++- 11 files changed, 121 insertions(+), 48 deletions(-) create mode 100644 pool_test.c diff --git a/Makefile b/Makefile index c51a5dc..00ac8fc 100644 --- a/Makefile +++ b/Makefile @@ -24,10 +24,16 @@ map_test: map_test.c util.c map.c run_map_test: map_test valgrind ./map_test +pool_test: pool_test.c util.c + $(CC) -o $@ $< $(CFLAGS) $(LDFLAGS) + +run_pool_test: pool_test + valgrind ./pool_test + .PHONY: clean clean: - rm -f $(OBJ) $(EXE) deps.mk gmon.out map_test + rm -f $(OBJ) $(EXE) deps.mk gmon.out map_test pool_test ifneq ($(MAKECMDGOALS),clean) include deps.mk diff --git a/example.script b/example.script index b2f7311..888c6da 100644 --- a/example.script +++ b/example.script @@ -1,7 +1,13 @@ a := 1 -b := 1 - 2 * 2 + 5 -c := a + b * 2 * b -d := a + 4 * b * a +a = a + 1 +b := a - 2 * 3 +b = 2 + b * a +c := -b + +//a := 1 +//b := 1 - 2 * 2 + 5 +//c := a + b * 2 * b +//d := a + 4 * b * a /*x := 1 y := 1 diff --git a/lex.c b/lex.c index bf38598..e233396 100644 --- a/lex.c +++ b/lex.c @@ -61,7 +61,7 @@ TokList lex(const char *s) { .kind = TokIdent, .Ident = { .kind = IdentName, - .Name = sndup(start, i), + .Name = psndup(toks.p, start, i), }, }); } diff --git a/map.c b/map.c index 2d1f020..99573a0 100644 --- a/map.c +++ b/map.c @@ -85,7 +85,7 @@ bool map_insert(Map *m, const char *key, const void *val) { return replaced; } -bool map_get(Map *m, const char *key, void *out_val) { +bool map_get(const Map *m, const char *key, void *out_val) { size_t idx = fnv1a32(key, strlen(key)) & (m->cap - 1); for (;;) { if (m->slots[idx].empty) diff --git a/map.h b/map.h index f760389..eba852a 100644 --- a/map.h +++ b/map.h @@ -26,6 +26,6 @@ void map_term(Map *m); bool map_insert(Map *m, const char *key, const void *val); /* Returns true if the key was found, returns false if it wasn't found. * out_val may be set to NULL. */ -bool map_get(Map *m, const char *key, void *out_val); +bool map_get(const Map *m, const char *key, void *out_val); #endif /* MAP_H */ diff --git a/parse.c b/parse.c index 7534f9a..97f4054 100644 --- a/parse.c +++ b/parse.c @@ -18,6 +18,7 @@ typedef struct Scope { } Scope; static void mark_err(const Tok *t); +static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos); static IRParam tok_to_irparam(Scope *sc, Tok *t); static Scope make_scope(Scope *parent, size_t mem_addr, bool with_idents); static void term_scope(Scope *sc); @@ -29,23 +30,29 @@ static void mark_err(const Tok *t) { err_col = t->col; } +static size_t get_ident_addr(const Scope *sc, const char *name, const Tok *errpos) { + size_t addr; + bool exists = false; + for (const Scope *i = sc; i != NULL; i = i->parent) { + if (!i->has_idents) + continue; + exists = map_get(&i->ident_addrs, name, &addr); + if (exists) + break; + } + if (!exists) { + mark_err(errpos); + set_err("Identifier '%s' not recognized in this scope", name); + return 0; + } + return addr; +} + static IRParam tok_to_irparam(Scope *sc, Tok *t) { if (t->kind == TokIdent) { size_t addr; if (t->Ident.kind == IdentName) { - bool exists = false; - for (Scope *i = sc; i != NULL; i = i->parent) { - if (!i->has_idents) - continue; - exists = map_get(&i->ident_addrs, t->Ident.Name, &addr); - if (exists) - break; - } - if (!exists) { - mark_err(t); - set_err("Identifier '%s' not recognized in this scope", t->Ident.Name); - return (IRParam){0}; - } + TRY_RET(addr = get_ident_addr(sc, t->Ident.Name, t), (IRParam){0}); } else if (t->Ident.kind == IdentAddr) addr = t->Ident.Addr; else @@ -316,6 +323,7 @@ static void stmt(State *s, Scope *sc, TokListItem *t) { char *name = t->tok.Ident.Name; t = t->next; if (t->tok.kind == TokDeclare) { + t = t->next; size_t addr = sc->mem_addr++; bool replaced = map_insert(&sc->ident_addrs, name, &addr); if (replaced) { @@ -323,7 +331,11 @@ static void stmt(State *s, Scope *sc, TokListItem *t) { set_err("'%s' already declared in this scope", name); return; } + TRY(expr(s, sc, t, true, true, addr)); + } else if (t->tok.kind == TokAssign) { t = t->next; + size_t addr; + TRY(addr = get_ident_addr(sc, name, &start->tok)); TRY(expr(s, sc, t, true, true, addr)); } } diff --git a/pool_test.c b/pool_test.c new file mode 100644 index 0000000..5f80d3d --- /dev/null +++ b/pool_test.c @@ -0,0 +1,18 @@ +#include "util.c" + +typedef struct Test { + char a_string[64]; +} Test; + +int main(void) { + Pool *p = pool_new(1); + Test *t = pool_alloc(p, sizeof(Test)); + strcpy(t->a_string, "a test string"); + Test *tarr = pool_alloc(p, sizeof(Test) * 32); + strcpy(tarr[31].a_string, "another test string"); + char *c = pool_alloc(p, 1); + *c = 'a'; + Test *largearr = pool_alloc(p, sizeof(Test) * 1024); + strcpy(largearr[1023].a_string, "yet another test string"); + pool_term(p); +} diff --git a/tok.c b/tok.c index 4806f86..91249d2 100644 --- a/tok.c +++ b/tok.c @@ -38,40 +38,21 @@ const char *tok_str[TokKindEnumSize] = { [TokWhile] = "while", }; -#define TOKLIST_MEMPOOL_INIT_CAP 4096 +#define TOKLIST_MEMPOOL_INIT_CAP 32768 static inline TokListItem *toklist_alloc_item(TokList *l) { - if (l->curr_mempool_cap < l->mempool_sizes[l->curr_mempool]+1) { - if (l->curr_mempool+1 >= 32) - ASSERT_UNREACHED(); - l->curr_mempool++; - l->curr_mempool_cap *= 2; - l->mempool_sizes[l->curr_mempool] = 0; - l->mempools[l->curr_mempool] = malloc(sizeof(TokListItem) * l->curr_mempool_cap); - } - TokListItem *itm = l->mempools[l->curr_mempool] + l->mempool_sizes[l->curr_mempool]++; + TokListItem *itm = pool_alloc(l->p, sizeof(TokListItem)); itm->prev = itm->next = NULL; return itm; } void toklist_init(TokList *l) { l->begin = l->end = NULL; - l->curr_mempool = 0; - l->mempools[l->curr_mempool] = malloc(sizeof(TokListItem) * TOKLIST_MEMPOOL_INIT_CAP); - l->curr_mempool_cap = TOKLIST_MEMPOOL_INIT_CAP; - l->mempool_sizes[0] = 0; + l->p = pool_new(TOKLIST_MEMPOOL_INIT_CAP); } void toklist_term(TokList *l) { - for (size_t i = 0; i <= l->curr_mempool; i++) { - for (size_t j = 0; j < l->mempool_sizes[i]; j++) { - TokListItem *itm = &l->mempools[i][j]; - if (itm->tok.kind == TokIdent && itm->tok.Ident.kind == IdentName) { - free(itm->tok.Ident.Name); - } - } - free(l->mempools[i]); - } + pool_term(l->p); } void toklist_append(TokList *l, Tok t) { diff --git a/tok.h b/tok.h index 179e8a7..03d5bf2 100644 --- a/tok.h +++ b/tok.h @@ -4,6 +4,8 @@ #include #include +#include "util.h" + typedef struct Type { enum { TypeVoid = 0, @@ -87,10 +89,7 @@ typedef struct TokListItem { typedef struct TokList { TokListItem *begin, *end; - TokListItem *mempools[32]; /* few mallocs, no copying => much speed */ - size_t mempool_sizes[32]; - size_t curr_mempool_cap; - size_t curr_mempool; + Pool *p; } TokList; void toklist_init(TokList *l); diff --git a/util.c b/util.c index a9dd61b..aec093b 100644 --- a/util.c +++ b/util.c @@ -49,6 +49,38 @@ void set_err(const char *fmt, ...) { va_end(va); } +Pool *pool_new(size_t init_cap) { + Pool *p = malloc(sizeof(Pool) + init_cap); + p->len = 0; + p->cap = init_cap; + p->data = p + 1; + p->next = NULL; + return p; +} + +void pool_term(Pool *p) { + for (Pool *i = p; i != NULL;) { + Pool *next = i->next; + free(i); + i = next; + } +} + +void *pool_alloc(Pool *p, size_t bytes) { + for (Pool *i = p;; i = i->next) { + if (i->len + bytes < i->cap) { + void *ret = (uint8_t*)i->data + i->len; + i->len += bytes; + return ret; + } + if (!i->next) { + i->next = pool_new(bytes + i->cap * 2); + i->next->len = bytes; + return i->next->data; + } + } +} + char *sndup(const char *s, size_t n) { char *ret = malloc(n+1); if (ret) { @@ -58,6 +90,13 @@ char *sndup(const char *s, size_t n) { return ret; } +char *psndup(Pool *p, const char *s, size_t n) { + char *ret = pool_alloc(p, n+1); + memcpy(ret, s, n); + ret[n] = 0; + return ret; +} + intmax_t stoimax(const char *s, size_t n, size_t base, ssize_t *endpos) { for (size_t i = 0; i < n; i++) { if (s[i] == 0) { n = i; break; } } intmax_t res = 0; diff --git a/util.h b/util.h index 41a23b4..89b17c2 100644 --- a/util.h +++ b/util.h @@ -41,11 +41,23 @@ void set_err(const char *fmt, ...); #define IS_ALPHA(c) ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_') #define IS_ALNUM(c) (IS_ALPHA(c) || IS_NUM(c)) +/* Useful for efficiently allocating lots of data that can all be freed at once afterwards. */ +typedef struct Pool { + struct Pool *next; + void *data; + size_t len, cap; +} Pool; +Pool *pool_new(size_t init_cap); /* You usually want init_cap to be pretty high. */ +void pool_term(Pool *p); +void *pool_alloc(Pool *p, size_t bytes); + #define streq(a, b) (strcmp(a, b) == 0) /* check if a null-terminated string and a non-null-terminated string are equal */ static inline bool streq_0_n(const char *a, const char *b, size_t bn) { return bn == strlen(a) ? strncmp(a, b, bn) == 0 : false; } -/* a more trusting version of strndup; also for systems that don't have strndup */ +/* a more trusting version of strndup */ char *sndup(const char *s, size_t n); +/* sndup with memory pools */ +char *psndup(Pool *p, const char *s, size_t n); /* convert a non-null-terminated string to an intmax_t */ intmax_t stoimax(const char *s, size_t n, size_t base, ssize_t *endpos /* -1 on success */); /* convert a non-null-terminated string to a double */