summaryrefslogtreecommitdiff
path: root/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'comp/lucas-standen-NEA/code/tokenizer/tokenizer.c')
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/tokenizer.c186
1 files changed, 80 insertions, 106 deletions
diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
index 5cc596f..f94b640 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
+++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
@@ -1,154 +1,128 @@
-#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#include "parser.h"
-#include "util.h"
#include "types.h"
+#include "util.h"
-int functionIdCounter = 0;
-
-ast_node *GenAst(char *exp); // generates the ast of 1 expression
-int getArgCount(char *exp); // counts how many args are pressent in exp
-char **GetStringArgs(char *exp); // gets the string args of an expression
-char *GetFunction(char *exp); // gets the function used in 1 expression
-builtInFuncs IsBuiltIn(char *func); // returns the id of a function thats built in, or -1 if its not
-
-
-builtInFuncs IsBuiltIn(char *func){
+void getBuiltIn(char *func, ast_node *node){
if (strcmp(func, "defun") == 0){
- return DEFUN;
+ node->func->builtInFunc = DEFUN;
}else if (strcmp(func, "let") == 0){
- return LET;
+ node->func->builtInFunc = LET;
}else if (strcmp(func, "set") == 0){
- return SET;
+ node->func->builtInFunc = SET;
}else if (strcmp(func, "if") == 0){
- return IF;
+ node->func->builtInFunc = IF;
}else if (strcmp(func, "elif") == 0){
- return ELIF;
+ node->func->builtInFunc = ELIF;
}else if (strcmp(func, "else") == 0){
- return ELSE;
+ node->func->builtInFunc = ELSE;
}else if (strcmp(func, "for") == 0){
- return FOR;
+ node->func->builtInFunc = FOR;
}else if (strcmp(func, "while") == 0){
- return WHILE;
+ node->func->builtInFunc = WHILE;
}else if (strcmp(func, "symbol") == 0){
- return SYMBOL;
- }else if (strcmp(func, "struct") == 0){
- return STRUCT;
+ node->func->builtInFunc = SYMBOL;
}else if (strcmp(func, "+") == 0){
- return ADD;
+ node->func->builtInFunc = ADD;
}else if (strcmp(func, "-") == 0){
- return SUB;
+ node->func->builtInFunc = SUB;
}else if (strcmp(func, "*") == 0){
- return MUL;
+ node->func->builtInFunc = MUL;
}else if (strcmp(func, "/") == 0){
- return DIV;
+ node->func->builtInFunc = DIV;
}else if (strcmp(func, "=") == 0){
- return EQ;
+ node->func->builtInFunc = EQ;
}else if (strcmp(func, "!=") == 0){
- return NEQ;
+ node->func->builtInFunc = NEQ;
}else if (strcmp(func, ">") == 0){
- return GT;
+ node->func->builtInFunc = GT;
}else if (strcmp(func, "<") == 0){
- return LT;
+ node->func->builtInFunc = LT;
}else if (strcmp(func, ">=") == 0){
- return GTEQ;
+ node->func->builtInFunc = GTEQ;
}else if (strcmp(func, "<=") == 0){
- return LTEQ;
+ node->func->builtInFunc = LTEQ;
}else if (strcmp(func, "cast") == 0){
- return CAST;
+ node->func->builtInFunc = CAST;
}else if (strcmp(func, "typeof") == 0){
- return TYPEOF;
- }else if (strcmp(func, "terminate") == 0){
- return TERMINATE;
+ node->func->builtInFunc = TYPEOF;
+ }else if (strcmp(func, "exit") == 0){
+ node->func->builtInFunc = EXIT;
}else if (strcmp(func, "return") == 0){
- return RETURN;
+ node->func->builtInFunc = RETURN;
}
else {
- return -1;
+ node->func->builtInFunc = -1;
}
}
-char *GetFunction(char *exp){ // takes exp with brackets
- char *out = CheckedMalloc(strlen(exp));
- int i = 1;
- char c = exp[i];
- while (c != ' '){
- out[i-1] = c;
- i++;
- c = exp[i];
+ll_t *getUserDefinedFunction(char *function);
+
+void expressFunction(char *function, ast_node *node){
+ if ((node->func->builtInFunc = getBuiltIn(function)) == -1){
+ node->func->func = getUserDefinedFunction(function);
+ } else {
+ node->func->func = NULL;
}
- i++;
- out[i] = '\0';
- out = CheckedRealloc(out, i);
- return out;
}
-// TODO make it count any arg inside () as one arg
-char **GetStringArgs(char *exp){ // takes exp without brackets
- int spaceCount = 0;
- int i = 0;
- char c = exp[i];
- while (c != '\0'){
- spaceCount++;
- i++;
- c = exp[i];
-
+ast_node *tokenize(char *input){
+ ast_node *node;
+
+ char *exp, *function, **args;
+ size_t i, j;
+ int depth;
+
+ for (int i = 0; i < strlen(input); i++){
+ if (input[i] == '('){
+ depth = 1;
+ j = i;
+ exp = CheckedMalloc(strlen(input));
+ while (depth != 0){
+ if (input[j] == '('){
+ depth++;
+ } else if (input[j] == ')'){
+ depth--;
+ }
+ exp[j - i] = input[j+1];
+ j++;
+ if (input[j] == '\0'){
+ fprintf(stderr, "error brace not closed");
+ exit(1);
+ }
+ }
+ j -= 2;
+ exp[j] = '\0';
+ printf("%s\n", exp);
+ }else if (input[i] == '"'){
+ i++;
+ while (input[i] != '"') i++;
+ }
}
- char **out = CheckedMalloc(spaceCount);
- for (int i = 0; i < spaceCount; i++){
- out[i] = CheckedMalloc(strlen(exp));
- }
+ node = CheckedMalloc(sizeof(ast_node));
- int tokCounter = 0;
i = 0;
- int charCounter = 0;
- while (exp[i] != '\0'){
- if (exp[i] != ' '){
- if (tokCounter != 0){
- out[tokCounter-1][charCounter] = exp[i];
- charCounter++;
- }
- } else{
- out[tokCounter][i] = '\0';
- charCounter = 0;
- tokCounter++;
- }
- i++;
+ function = CheckedMalloc(strlen(exp));
+ while (exp[i] != ' '){
+ function[i] = exp[i];
+ i++;
}
- return out;
-}
+ function[i] = '\0';
+ function = CheckedRealloc(function, i);
+ printf("%s\n", function);
-ast_node *GenAst(char *exp){ // takes exp with brackets
- ast_node *head = CheckedMalloc(sizeof(ast_node));
- char *function = GetFunction(exp);
- head->builtInFunc = IsBuiltIn(function);
- free(function);
+ expressFunction(function, node);
- if (head->builtInFunc == -1){
- head->func = CheckedMalloc(sizeof(functionToken));
- head->func->id = functionIdCounter;
- functionIdCounter++;
- }else {
- head->func = NULL;
- }
+ free(function);
+ free(exp);
- return head;
+ return NULL;
}
int main(){
- ast_node *node = GenAst("(+ 1 2)");
- printf("%d\n", node->builtInFunc);
-
- char **args = GetStringArgs("+ 1 2");
- for (int i = 0; i < 2; i++){
- printf("%s\n", args[i]);
- }
-
- free(args);
-
- free(node);
+ char sample[] = "(+ \"hello(\" 1)";
+ tokenize(sample);
}