/* vim: set filetype=c: */ #include "lex.h" #include void Lex_init(Lex* obj) { TokenList_init(&obj->tokens); PtrStack_init(&obj->ptr_stack_); obj->out_read_total = 0; obj->out_written_total = 0; obj->context_size_ = 0; } Result Lex_lex_char(Lex* obj, char in) { /* If the current char is part of a Num token */ if((in >= '0' && in <= '9') || in == '.') { /* Don't let contexts become as large as the predefined size, as * we still need space for the null string terminator char \0 */ if(obj->context_size_ < LEX_MAX_CONTEXT_SIZE - 1) { obj->context_[obj->context_size_] = in; obj->context_size_++; } /* If the current char is not part of a number, but the * previous one was, write the number as a token */ } else { if(obj->context_size_) { /* Add the context string terminator */ obj->context_[obj->context_size_] = '\0'; obj->context_size_++; /* Write the token */ TokenListItem* itm = malloc(sizeof(TokenListItem)); itm->val.type = TokenTypeNum; itm->val.data.num = strtold(obj->context_, NULL); TokenList_push_back(&obj->tokens, itm); obj->context_size_ = 0; } /* If the char is not a whitespace, newline, string terminator, etc.. */ if(!(in == ' ' || in == '\t' || in == '\n' || in == '\0')) { switch(in) { case '(': case ')': { TokenListItem* itm = malloc(sizeof(TokenListItem)); itm->val.type = TokenTypeSep; itm->val.data.sep.sym = in; if(in == '(') PtrStack_push(&obj->ptr_stack_, (void*)itm); else /* if in == ')' */ { /* Throw an error, if the pointer stack has no top element */ if(obj->ptr_stack_.size == 0) return Result_err("Found ')' without matching '('"); /* Matching LParen */ TokenListItem* match = obj->ptr_stack_.top->ptr; /* Define the RParen's matching LParen */ itm->val.data.sep.matching = match; /* Define the LParen's matching RParen */ match->val.data.sep.matching = itm; /* Pop the top element */ PtrStack_pop(&obj->ptr_stack_); } TokenList_push_back(&obj->tokens, itm); return Result_noerr(); break; } case '+': case '-': case '*': case '/': case '^': { TokenListItem* itm = malloc(sizeof(TokenListItem)); itm->val.type = TokenTypeOp; itm->val.data.op = in; TokenList_push_back(&obj->tokens, itm); return Result_noerr(); break; } default: return Result_err("Unrecognized character"); break; } } } return Result_noerr(); } Result Lex_finish_lex(Lex* obj) { Result res = Lex_lex_char(obj, '\n'); if(res.has_err) return res; if(obj->ptr_stack_.size) { return Result_err("Found '(' without matching ')'"); } return Result_noerr(); } static bool contains_char_(char c, const char* charset) { size_t i; for(i = 0; charset[i] != '\0'; i++) { if(charset[i] == c) return true; } return false; } Result Lex_apply_op_precedence(Lex* obj, const char* opsyms, bool right_to_left) { /* Iterate through tokens, where curr is the current token */ TokenListItem* const begin = right_to_left ? obj->tokens.back : obj->tokens.front; TokenListItem* curr; for(curr = begin; curr != NULL; curr = right_to_left ? curr->prev : curr->next) { if(curr->val.type != TokenTypeOp) continue; if(curr->prev == NULL || curr->next == NULL) { return Result_err("Found an operator with at least one missing operand"); continue; } if(!contains_char_(curr->val.data.op, opsyms)) continue; /* Items right and left, takes into consideration potential parentheses: * For example, if we have (4+3)*5, lofop will be the position of the left paren, * and rofop will be the position of the 5. This is possible due to each paren * having a pointer to its matching paren (for details on how it works, refer to * Lex_lex_char()). */ TokenListItem* lofop = NULL; /* Item left of operator */ TokenListItem* rofop = NULL; /* Item right of operator */ /* lofop */ if(curr->prev->val.type == TokenTypeNum) lofop = curr->prev; else if(curr->prev->val.type == TokenTypeSep) /* Find the matching paren and position lofop there */ lofop = curr->prev->val.data.sep.matching; else { return Result_err("Invalid operand type left of operator"); } /* rofop: essentially the same as lofop */ if(curr->next->val.type == TokenTypeNum) rofop = curr->next; else if(curr->next->val.type == TokenTypeSep) rofop = curr->next->val.data.sep.matching; else { return Result_err("Invalid operand type right of operator"); } assert(lofop != NULL); assert(rofop != NULL); /* Don't add any parens, if they would be redundant. * We know that they are redundant, if lofop has an * LParen left of it and rofop has an RParen right of it*/ if( /* Check for LParen left of lofop */ lofop->prev != NULL && lofop->prev->val.type == TokenTypeSep && lofop->prev->val.data.sep.sym == '(' && /* Check for RParen right of rofop */ rofop->next != NULL && rofop->next->val.type == TokenTypeSep && rofop->next->val.data.sep.sym == ')' ) continue; /* Insert parens to make the AST builder handle precedence */ TokenListItem* lp = malloc(sizeof(TokenListItem)); TokenListItem* rp = malloc(sizeof(TokenListItem)); lp->val.type = TokenTypeSep; lp->val.data.sep.sym = '('; lp->val.data.sep.matching = rp; TokenList_insert_before(&obj->tokens, lofop, lp); rp->val.type = TokenTypeSep; rp->val.data.sep.sym = ')'; rp->val.data.sep.matching = lp; TokenList_insert_after(&obj->tokens, rofop, rp); } return Result_noerr(); } void Lex_uninit(Lex* obj) { TokenList_uninit(&obj->tokens); PtrStack_uninit(&obj->ptr_stack_); }