calculator/lex.c

187 lines
6.9 KiB
C

/* vim: set filetype=c: */
#include "lex.h"
#include <assert.h>
void Lex_init(Lex* obj) {
TokenList_init(&obj->tokens);
PtrStack_init(&obj->ptr_stack_);
obj->out_read_total = 0;
obj->out_written_total = 0;
obj->context_size_ = 0;
}
Result Lex_lex_char(Lex* obj, char in) {
/* If the current char is part of a Num token */
if((in >= '0' && in <= '9') || in == '.') {
/* Don't let contexts become as large as the predefined size, as
* we still need space for the null string terminator char \0 */
if(obj->context_size_ < LEX_MAX_CONTEXT_SIZE - 1) {
obj->context_[obj->context_size_] = in;
obj->context_size_++;
}
/* If the current char is not part of a number, but the
* previous one was, write the number as a token */
} else {
if(obj->context_size_) {
/* Add the context string terminator */
obj->context_[obj->context_size_] = '\0';
obj->context_size_++;
/* Write the token */
TokenListItem* itm = malloc(sizeof(TokenListItem));
itm->val.type = TokenTypeNum;
itm->val.data.num = strtold(obj->context_, NULL);
TokenList_push_back(&obj->tokens, itm);
obj->context_size_ = 0;
}
/* If the char is not a whitespace, newline, string terminator, etc.. */
if(!(in == ' ' || in == '\t' || in == '\n' || in == '\0')) {
switch(in) {
case '(':
case ')': {
TokenListItem* itm = malloc(sizeof(TokenListItem));
itm->val.type = TokenTypeSep;
itm->val.data.sep.sym = in;
if(in == '(')
PtrStack_push(&obj->ptr_stack_, (void*)itm);
else /* if in == ')' */ {
/* Throw an error, if the pointer stack has no top element */
if(obj->ptr_stack_.size == 0)
return Result_err("Found ')' without matching '('");
/* Matching LParen */
TokenListItem* match = obj->ptr_stack_.top->ptr;
/* Define the RParen's matching LParen */
itm->val.data.sep.matching = match;
/* Define the LParen's matching RParen */
match->val.data.sep.matching = itm;
/* Pop the top element */
PtrStack_pop(&obj->ptr_stack_);
}
TokenList_push_back(&obj->tokens, itm);
return Result_noerr();
break;
}
case '+':
case '-':
case '*':
case '/':
case '^': {
TokenListItem* itm = malloc(sizeof(TokenListItem));
itm->val.type = TokenTypeOp;
itm->val.data.op = in;
TokenList_push_back(&obj->tokens, itm);
return Result_noerr();
break;
}
default:
return Result_err("Unrecognized character");
break;
}
}
}
return Result_noerr();
}
Result Lex_finish_lex(Lex* obj) {
Result res = Lex_lex_char(obj, '\n');
if(res.has_err)
return res;
if(obj->ptr_stack_.size) {
return Result_err("Found '(' without matching ')'");
}
return Result_noerr();
}
static bool contains_char_(char c, const char* charset) {
size_t i;
for(i = 0; charset[i] != '\0'; i++) {
if(charset[i] == c)
return true;
}
return false;
}
Result Lex_apply_op_precedence(Lex* obj, const char* opsyms, bool right_to_left) {
/* Iterate through tokens, where curr is the current token */
TokenListItem* const begin = right_to_left ? obj->tokens.back : obj->tokens.front;
TokenListItem* curr;
for(curr = begin; curr != NULL; curr = right_to_left ? curr->prev : curr->next) {
if(curr->val.type != TokenTypeOp)
continue;
if(curr->prev == NULL || curr->next == NULL) {
return Result_err("Found an operator with at least one missing operand");
continue;
}
if(!contains_char_(curr->val.data.op, opsyms))
continue;
/* Items right and left, takes into consideration potential parentheses:
* For example, if we have (4+3)*5, lofop will be the position of the left paren,
* and rofop will be the position of the 5. This is possible due to each paren
* having a pointer to its matching paren (for details on how it works, refer to
* Lex_lex_char()). */
TokenListItem* lofop = NULL; /* Item left of operator */
TokenListItem* rofop = NULL; /* Item right of operator */
/* lofop */
if(curr->prev->val.type == TokenTypeNum)
lofop = curr->prev;
else if(curr->prev->val.type == TokenTypeSep)
/* Find the matching paren and position lofop there */
lofop = curr->prev->val.data.sep.matching;
else {
return Result_err("Invalid operand type left of operator");
}
/* rofop: essentially the same as lofop */
if(curr->next->val.type == TokenTypeNum)
rofop = curr->next;
else if(curr->next->val.type == TokenTypeSep)
rofop = curr->next->val.data.sep.matching;
else {
return Result_err("Invalid operand type right of operator");
}
assert(lofop != NULL);
assert(rofop != NULL);
/* Don't add any parens, if they would be redundant.
* We know that they are redundant, if lofop has an
* LParen left of it and rofop has an RParen right of it*/
if(
/* Check for LParen left of lofop */
lofop->prev != NULL &&
lofop->prev->val.type == TokenTypeSep &&
lofop->prev->val.data.sep.sym == '(' &&
/* Check for RParen right of rofop */
rofop->next != NULL &&
rofop->next->val.type == TokenTypeSep &&
rofop->next->val.data.sep.sym == ')'
) continue;
/* Insert parens to make the AST builder handle precedence */
TokenListItem* lp = malloc(sizeof(TokenListItem));
TokenListItem* rp = malloc(sizeof(TokenListItem));
lp->val.type = TokenTypeSep;
lp->val.data.sep.sym = '(';
lp->val.data.sep.matching = rp;
TokenList_insert_before(&obj->tokens, lofop, lp);
rp->val.type = TokenTypeSep;
rp->val.data.sep.sym = ')';
rp->val.data.sep.matching = lp;
TokenList_insert_after(&obj->tokens, rofop, rp);
}
return Result_noerr();
}
void Lex_uninit(Lex* obj) {
TokenList_uninit(&obj->tokens);
PtrStack_uninit(&obj->ptr_stack_);
}