diff options
Diffstat (limited to 'comp/lucas-standen-NEA/code/tokenizer')
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/Makefile | 9 | ||||
-rwxr-xr-x | comp/lucas-standen-NEA/code/tokenizer/tokenizer | bin | 24392 -> 42264 bytes | |||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/tokenizer.c | 203 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/tokenizer.h | 0 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/types.h | 26 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/util.c | 38 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/util.h | 7 |
7 files changed, 162 insertions, 121 deletions
diff --git a/comp/lucas-standen-NEA/code/tokenizer/Makefile b/comp/lucas-standen-NEA/code/tokenizer/Makefile index 479b838..b09f177 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/Makefile +++ b/comp/lucas-standen-NEA/code/tokenizer/Makefile @@ -1,10 +1,9 @@ tokenizer: parser util tokenizer.c - cc -O3 tokenizer.c parser.o util.o -o tokenizer + cc -O3 tokenizer.c parser.o util.o -o tokenizer -ggdb parser: parser.c - cc -O3 parser.c -c -o parser.o -util: util.c - cc -O3 util.c -c -o util.o - + cc -O3 parser.c -c -o parser.o -ggdb +util: util.c + cc -O3 util.c -c -o util.o -ggdb clean: rm -rf *.o rm -rf tokenizer diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer b/comp/lucas-standen-NEA/code/tokenizer/tokenizer Binary files differindex 726ee21..2d2f1c0 100755 --- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer +++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c index 5cc596f..080951b 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c +++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c @@ -1,154 +1,151 @@ -#include <stdlib.h> #include <stdio.h> #include <string.h> -#include "parser.h" -#include "util.h" #include "types.h" +#include "util.h" -int functionIdCounter = 0; - -ast_node *GenAst(char *exp); // generates the ast of 1 expression -int getArgCount(char *exp); // counts how many args are pressent in exp -char **GetStringArgs(char *exp); // gets the string args of an expression -char *GetFunction(char *exp); // gets the function used in 1 expression -builtInFuncs IsBuiltIn(char *func); // returns the id of a function thats built in, or -1 if its not - +#define MAXARGS 8 -builtInFuncs IsBuiltIn(char *func){ +int getBuiltIn(char *func, ast_node *node){ if (strcmp(func, "defun") == 0){ - return DEFUN; + node->func->builtInFunc= DEFUN; }else if (strcmp(func, "let") == 0){ - return LET; + node->func->builtInFunc = LET; }else if (strcmp(func, "set") == 0){ - return SET; + node->func->builtInFunc = SET; }else if (strcmp(func, "if") == 0){ - return IF; + node->func->builtInFunc = IF; }else if (strcmp(func, "elif") == 0){ - return ELIF; + node->func->builtInFunc = ELIF; }else if (strcmp(func, "else") == 0){ - return ELSE; + node->func->builtInFunc = ELSE; }else if (strcmp(func, "for") == 0){ - return FOR; + node->func->builtInFunc = FOR; }else if (strcmp(func, "while") == 0){ - return WHILE; + node->func->builtInFunc = WHILE; }else if (strcmp(func, "symbol") == 0){ - return SYMBOL; - }else if (strcmp(func, "struct") == 0){ - return STRUCT; + node->func->builtInFunc = SYMBOL; }else if (strcmp(func, "+") == 0){ - return ADD; + node->func->builtInFunc = ADD; }else if (strcmp(func, "-") == 0){ - return SUB; + node->func->builtInFunc = SUB; }else if (strcmp(func, "*") == 0){ - return MUL; + node->func->builtInFunc = MUL; }else if (strcmp(func, "/") == 0){ - return DIV; + node->func->builtInFunc = DIV; }else if (strcmp(func, "=") == 0){ - return EQ; + node->func->builtInFunc = EQ; }else if (strcmp(func, "!=") == 0){ - return NEQ; + node->func->builtInFunc = NEQ; }else if (strcmp(func, ">") == 0){ - return GT; + node->func->builtInFunc = GT; }else if (strcmp(func, "<") == 0){ - return LT; + node->func->builtInFunc = LT; }else if (strcmp(func, ">=") == 0){ - return GTEQ; + node->func->builtInFunc = GTEQ; }else if (strcmp(func, "<=") == 0){ - return LTEQ; + node->func->builtInFunc = LTEQ; }else if (strcmp(func, "cast") == 0){ - return CAST; + node->func->builtInFunc = CAST; }else if (strcmp(func, "typeof") == 0){ - return TYPEOF; - }else if (strcmp(func, "terminate") == 0){ - return TERMINATE; + node->func->builtInFunc = TYPEOF; + }else if (strcmp(func, "exit") == 0){ + node->func->builtInFunc = EXIT; }else if (strcmp(func, "return") == 0){ - return RETURN; - } - else { + node->func->builtInFunc = RETURN; + }else { + node->func->builtInFunc = NIL; return -1; } + return 0; } -char *GetFunction(char *exp){ // takes exp with brackets - char *out = CheckedMalloc(strlen(exp)); - int i = 1; - char c = exp[i]; - while (c != ' '){ - out[i-1] = c; - i++; - c = exp[i]; +ll_t *getUserDefinedFunction(char *function); + +void expressFunction(char *function, ast_node *node){ + node->func = CheckedMalloc(sizeof(functionToken)); + if ((getBuiltIn(function, node)) == -1){ + //node->func->func = getUserDefinedFunction(function); + } else { + node->func->func = NULL; } - i++; - out[i] = '\0'; - out = CheckedRealloc(out, i); - return out; } -// TODO make it count any arg inside () as one arg -char **GetStringArgs(char *exp){ // takes exp without brackets - int spaceCount = 0; - int i = 0; - char c = exp[i]; - while (c != '\0'){ - spaceCount++; - i++; - c = exp[i]; - +void expressArgs(char **args, ast_node *node){ + for (int i = 0; i < MAXARGS; i++){ + if (node->args[i] == NULL){ + memcpy(node->literalArgs[i], args[i], strlen(args[i]) + 1); + } } + +} - char **out = CheckedMalloc(spaceCount); - for (int i = 0; i < spaceCount; i++){ - out[i] = CheckedMalloc(strlen(exp)); - } +ast_node *tokenize(char *input){ + ast_node *node, *child; - int tokCounter = 0; - i = 0; - int charCounter = 0; - while (exp[i] != '\0'){ - if (exp[i] != ' '){ - if (tokCounter != 0){ - out[tokCounter-1][charCounter] = exp[i]; - charCounter++; + char *exp, *function, **args; + size_t i = 0, argCount = -1; + int depth = 0; + + node = CheckedMalloc(sizeof(ast_node)); + node->args = CheckedMalloc(sizeof(ast_node) * MAXARGS); + node->literalArgs = CheckedMalloc(sizeof(void *) * MAXARGS); + + if (input[i] == '('){ + depth = 1; + i++; + exp = CheckedMalloc(strlen(input)); + while (depth != 0){ + if (input[i] == ' ') argCount++; + if (input[i] == '('){ + child = tokenize(&input[i]); + node->args[argCount] = child; + depth++; + } else if (input[i] == ')'){ + depth--; + } + exp[i - 1] = input[i]; + if (input[i] == '\0'){ + fprintf(stderr, "error brace not closed\n"); + exit(1); } - } else{ - out[tokCounter][i] = '\0'; - charCounter = 0; - tokCounter++; + i++; } + exp[i-2] = '\0'; + exp = CheckedRealloc(exp, strlen(exp) + 1); + printf("%s\n", exp); + }else if (input[i] == '"'){ i++; + while (input[i] != '"') i++; } - return out; -} - -ast_node *GenAst(char *exp){ // takes exp with brackets - ast_node *head = CheckedMalloc(sizeof(ast_node)); - char *function = GetFunction(exp); - head->builtInFunc = IsBuiltIn(function); - free(function); - if (head->builtInFunc == -1){ - head->func = CheckedMalloc(sizeof(functionToken)); - head->func->id = functionIdCounter; - functionIdCounter++; - }else { - head->func = NULL; + i = 0; + function = CheckedMalloc(strlen(exp)); + while (exp[i] != ' '){ + function[i] = exp[i]; + i++; } - return head; -} + function[i] = '\0'; + function = CheckedRealloc(function, i); + printf("%s\n", function); -int main(){ - ast_node *node = GenAst("(+ 1 2)"); - printf("%d\n", node->builtInFunc); + expressFunction(function, node); - char **args = GetStringArgs("+ 1 2"); - for (int i = 0; i < 2; i++){ - printf("%s\n", args[i]); - } + i++; + args = Split(&input[i], ' '); + // need a length + expressArgs(args, node /* length */ ); - free(args); + free(exp); - free(node); + return node; +} + +int main(){ + char sample[] = "(+ (- 2 2) 1)"; + ast_node *root = tokenize(sample); + printf("%d", root->args[0]->func->builtInFunc); + free(root); } diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h deleted file mode 100644 index e69de29..0000000 --- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h +++ /dev/null diff --git a/comp/lucas-standen-NEA/code/tokenizer/types.h b/comp/lucas-standen-NEA/code/tokenizer/types.h index 034dc04..8c79bd9 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/types.h +++ b/comp/lucas-standen-NEA/code/tokenizer/types.h @@ -10,8 +10,6 @@ typedef enum types { FLOAT_t = 4, CHAR_T = 5, FUNCTION_T = 6, - STRUCT_T = 7, - OBJ_T = 8, } types; // int types @@ -24,16 +22,9 @@ typedef uint64_t u64; // char and float types are still called char and float so no typedef needed -// function type -typedef struct functionToken { - int id; // a function id to avoid strings - types returnType; // what the function returns - types *args; // the types of args a function takes - ll_t astHead; // the code for the function -} functionToken; - // built in functions typedef enum builtInFuncs { + // general DEFUN = 0, LET = 1, SET = 2, @@ -43,7 +34,6 @@ typedef enum builtInFuncs { FOR = 6, WHILE = 7, SYMBOL = 8, - STRUCT = 9, // arithmetic ADD = 10, @@ -59,16 +49,26 @@ typedef enum builtInFuncs { GTEQ = 18, LTEQ = 19, + // misc CAST = 20, TYPEOF = 21, - TERMINATE = 22, + EXIT = 22, RETURN = 23, + NIL = -1, } builtInFuncs; +// function type +typedef struct functionToken { + int id; // a function id to avoid strings + types returnType; // what the function returns + types *args; // the types of args a function takes + ll_t *func; // the code for the function + builtInFuncs builtInFunc; // a built in functions +} functionToken; + typedef struct ast_node ast_node; typedef struct ast_node { - builtInFuncs builtInFunc; // if it's a builtin function call use this, else -1 functionToken *func; // if it's not builtin then use this void **literalArgs; // the args of the node, this will be an array of litteral values ast_node **args; // the non litteral tokens diff --git a/comp/lucas-standen-NEA/code/tokenizer/util.c b/comp/lucas-standen-NEA/code/tokenizer/util.c index de5b6b2..46deba8 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/util.c +++ b/comp/lucas-standen-NEA/code/tokenizer/util.c @@ -1,4 +1,5 @@ #include <stdio.h> +#include <string.h> #include <stdlib.h> #include <errno.h> #include <error.h> @@ -6,6 +7,7 @@ void Die(); // brings down the program void *CheckedMalloc(long size); // malloc checked void *CheckedRealloc(void *out, long size); // realloc checked +char **Split(char *s, char c); // splits a string into an array of strings around c void Die(){ perror("zpy parser"); @@ -25,3 +27,39 @@ void *CheckedRealloc(void *orig, long size){ Die(); return out; } + +static size_t countSegment(char const *s, char c){ + size_t counter = 0; + int i = 0; + while (s[i]){ + if (s[i] == c){ + i++; + continue; + } + counter++; + while (s[i] && s[i] != c) i++; + } + return counter; +} + +char **Split(char *s, char c){ + char **strs; + size_t tab_counter; + size_t i; + size_t j; + + if (s == NULL) return NULL; + tab_counter = countSegment(s, c); + if ((strs = (char**)CheckedMalloc(sizeof(char*) * (tab_counter + 1))) == NULL) return NULL; + tab_counter = 0; + j = -1; + while (s[++j]) { + if (s[j] == c) continue; + i = 0; + while (s[j + i] && s[j + i] != c) i++; + if ((strs[tab_counter++] = strndup(&s[j], i)) == NULL) return NULL; + j += i - 1; + } + strs[tab_counter] = NULL; + return strs; +} diff --git a/comp/lucas-standen-NEA/code/tokenizer/util.h b/comp/lucas-standen-NEA/code/tokenizer/util.h index cbcbdfa..c25ebec 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/util.h +++ b/comp/lucas-standen-NEA/code/tokenizer/util.h @@ -1,3 +1,10 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <error.h> + void Die(); // brings down the program void *CheckedMalloc(long size); // malloc checked void *CheckedRealloc(void *out, long size); // realloc checked +char **Split(char *s, char c); // splits a string into an array of strings around c |