diff options
author | standenboy <standenboy@seacrossedlovers.xyz> | 2024-06-06 08:16:19 +0100 |
---|---|---|
committer | standenboy <standenboy@seacrossedlovers.xyz> | 2024-06-06 08:16:19 +0100 |
commit | f6d4ab1521fe2427b2a43f92c4ecf163bbd889dc (patch) | |
tree | c03c3c59fabe43eb11019e46f4c4f6f960e59364 /comp/lucas-standen-NEA/code | |
parent | 9438453a5d391a42371b8b8d7931923678956995 (diff) |
added to much stuff
Diffstat (limited to 'comp/lucas-standen-NEA/code')
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/Makefile | 9 | ||||
-rwxr-xr-x | comp/lucas-standen-NEA/code/tokenizer/tokenizer | bin | 24392 -> 28928 bytes | |||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/tokenizer.c | 186 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/tokenizer.h | 0 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/types.h | 25 |
5 files changed, 96 insertions, 124 deletions
diff --git a/comp/lucas-standen-NEA/code/tokenizer/Makefile b/comp/lucas-standen-NEA/code/tokenizer/Makefile index 479b838..b09f177 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/Makefile +++ b/comp/lucas-standen-NEA/code/tokenizer/Makefile @@ -1,10 +1,9 @@ tokenizer: parser util tokenizer.c - cc -O3 tokenizer.c parser.o util.o -o tokenizer + cc -O3 tokenizer.c parser.o util.o -o tokenizer -ggdb parser: parser.c - cc -O3 parser.c -c -o parser.o -util: util.c - cc -O3 util.c -c -o util.o - + cc -O3 parser.c -c -o parser.o -ggdb +util: util.c + cc -O3 util.c -c -o util.o -ggdb clean: rm -rf *.o rm -rf tokenizer diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer b/comp/lucas-standen-NEA/code/tokenizer/tokenizer Binary files differindex 726ee21..ab76521 100755 --- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer +++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c index 5cc596f..f94b640 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c +++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c @@ -1,154 +1,128 @@ -#include <stdlib.h> #include <stdio.h> #include <string.h> -#include "parser.h" -#include "util.h" #include "types.h" +#include "util.h" -int functionIdCounter = 0; - -ast_node *GenAst(char *exp); // generates the ast of 1 expression -int getArgCount(char *exp); // counts how many args are pressent in exp -char **GetStringArgs(char *exp); // gets the string args of an expression -char *GetFunction(char *exp); // gets the function used in 1 expression -builtInFuncs IsBuiltIn(char *func); // returns the id of a function thats built in, or -1 if its not - - -builtInFuncs IsBuiltIn(char *func){ +void getBuiltIn(char *func, ast_node *node){ if (strcmp(func, "defun") == 0){ - return DEFUN; + node->func->builtInFunc = DEFUN; }else if (strcmp(func, "let") == 0){ - return LET; + node->func->builtInFunc = LET; }else if (strcmp(func, "set") == 0){ - return SET; + node->func->builtInFunc = SET; }else if (strcmp(func, "if") == 0){ - return IF; + node->func->builtInFunc = IF; }else if (strcmp(func, "elif") == 0){ - return ELIF; + node->func->builtInFunc = ELIF; }else if (strcmp(func, "else") == 0){ - return ELSE; + node->func->builtInFunc = ELSE; }else if (strcmp(func, "for") == 0){ - return FOR; + node->func->builtInFunc = FOR; }else if (strcmp(func, "while") == 0){ - return WHILE; + node->func->builtInFunc = WHILE; }else if (strcmp(func, "symbol") == 0){ - return SYMBOL; - }else if (strcmp(func, "struct") == 0){ - return STRUCT; + node->func->builtInFunc = SYMBOL; }else if (strcmp(func, "+") == 0){ - return ADD; + node->func->builtInFunc = ADD; }else if (strcmp(func, "-") == 0){ - return SUB; + node->func->builtInFunc = SUB; }else if (strcmp(func, "*") == 0){ - return MUL; + node->func->builtInFunc = MUL; }else if (strcmp(func, "/") == 0){ - return DIV; + node->func->builtInFunc = DIV; }else if (strcmp(func, "=") == 0){ - return EQ; + node->func->builtInFunc = EQ; }else if (strcmp(func, "!=") == 0){ - return NEQ; + node->func->builtInFunc = NEQ; }else if (strcmp(func, ">") == 0){ - return GT; + node->func->builtInFunc = GT; }else if (strcmp(func, "<") == 0){ - return LT; + node->func->builtInFunc = LT; }else if (strcmp(func, ">=") == 0){ - return GTEQ; + node->func->builtInFunc = GTEQ; }else if (strcmp(func, "<=") == 0){ - return LTEQ; + node->func->builtInFunc = LTEQ; }else if (strcmp(func, "cast") == 0){ - return CAST; + node->func->builtInFunc = CAST; }else if (strcmp(func, "typeof") == 0){ - return TYPEOF; - }else if (strcmp(func, "terminate") == 0){ - return TERMINATE; + node->func->builtInFunc = TYPEOF; + }else if (strcmp(func, "exit") == 0){ + node->func->builtInFunc = EXIT; }else if (strcmp(func, "return") == 0){ - return RETURN; + node->func->builtInFunc = RETURN; } else { - return -1; + node->func->builtInFunc = -1; } } -char *GetFunction(char *exp){ // takes exp with brackets - char *out = CheckedMalloc(strlen(exp)); - int i = 1; - char c = exp[i]; - while (c != ' '){ - out[i-1] = c; - i++; - c = exp[i]; +ll_t *getUserDefinedFunction(char *function); + +void expressFunction(char *function, ast_node *node){ + if ((node->func->builtInFunc = getBuiltIn(function)) == -1){ + node->func->func = getUserDefinedFunction(function); + } else { + node->func->func = NULL; } - i++; - out[i] = '\0'; - out = CheckedRealloc(out, i); - return out; } -// TODO make it count any arg inside () as one arg -char **GetStringArgs(char *exp){ // takes exp without brackets - int spaceCount = 0; - int i = 0; - char c = exp[i]; - while (c != '\0'){ - spaceCount++; - i++; - c = exp[i]; - +ast_node *tokenize(char *input){ + ast_node *node; + + char *exp, *function, **args; + size_t i, j; + int depth; + + for (int i = 0; i < strlen(input); i++){ + if (input[i] == '('){ + depth = 1; + j = i; + exp = CheckedMalloc(strlen(input)); + while (depth != 0){ + if (input[j] == '('){ + depth++; + } else if (input[j] == ')'){ + depth--; + } + exp[j - i] = input[j+1]; + j++; + if (input[j] == '\0'){ + fprintf(stderr, "error brace not closed"); + exit(1); + } + } + j -= 2; + exp[j] = '\0'; + printf("%s\n", exp); + }else if (input[i] == '"'){ + i++; + while (input[i] != '"') i++; + } } - char **out = CheckedMalloc(spaceCount); - for (int i = 0; i < spaceCount; i++){ - out[i] = CheckedMalloc(strlen(exp)); - } + node = CheckedMalloc(sizeof(ast_node)); - int tokCounter = 0; i = 0; - int charCounter = 0; - while (exp[i] != '\0'){ - if (exp[i] != ' '){ - if (tokCounter != 0){ - out[tokCounter-1][charCounter] = exp[i]; - charCounter++; - } - } else{ - out[tokCounter][i] = '\0'; - charCounter = 0; - tokCounter++; - } - i++; + function = CheckedMalloc(strlen(exp)); + while (exp[i] != ' '){ + function[i] = exp[i]; + i++; } - return out; -} + function[i] = '\0'; + function = CheckedRealloc(function, i); + printf("%s\n", function); -ast_node *GenAst(char *exp){ // takes exp with brackets - ast_node *head = CheckedMalloc(sizeof(ast_node)); - char *function = GetFunction(exp); - head->builtInFunc = IsBuiltIn(function); - free(function); + expressFunction(function, node); - if (head->builtInFunc == -1){ - head->func = CheckedMalloc(sizeof(functionToken)); - head->func->id = functionIdCounter; - functionIdCounter++; - }else { - head->func = NULL; - } + free(function); + free(exp); - return head; + return NULL; } int main(){ - ast_node *node = GenAst("(+ 1 2)"); - printf("%d\n", node->builtInFunc); - - char **args = GetStringArgs("+ 1 2"); - for (int i = 0; i < 2; i++){ - printf("%s\n", args[i]); - } - - free(args); - - free(node); + char sample[] = "(+ \"hello(\" 1)"; + tokenize(sample); } diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h deleted file mode 100644 index e69de29..0000000 --- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h +++ /dev/null diff --git a/comp/lucas-standen-NEA/code/tokenizer/types.h b/comp/lucas-standen-NEA/code/tokenizer/types.h index 034dc04..82eb3df 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/types.h +++ b/comp/lucas-standen-NEA/code/tokenizer/types.h @@ -10,8 +10,6 @@ typedef enum types { FLOAT_t = 4, CHAR_T = 5, FUNCTION_T = 6, - STRUCT_T = 7, - OBJ_T = 8, } types; // int types @@ -24,14 +22,6 @@ typedef uint64_t u64; // char and float types are still called char and float so no typedef needed -// function type -typedef struct functionToken { - int id; // a function id to avoid strings - types returnType; // what the function returns - types *args; // the types of args a function takes - ll_t astHead; // the code for the function -} functionToken; - // built in functions typedef enum builtInFuncs { DEFUN = 0, @@ -43,7 +33,6 @@ typedef enum builtInFuncs { FOR = 6, WHILE = 7, SYMBOL = 8, - STRUCT = 9, // arithmetic ADD = 10, @@ -61,14 +50,24 @@ typedef enum builtInFuncs { CAST = 20, TYPEOF = 21, - TERMINATE = 22, + EXIT = 22, RETURN = 23, } builtInFuncs; +// function type +typedef struct functionToken { + int id; // a function id to avoid strings + types returnType; // what the function returns + types *args; // the types of args a function takes + ll_t *func; // the code for the function + builtInFuncs builtInFunc; // a built in functions +} functionToken; + +// built in functions + typedef struct ast_node ast_node; typedef struct ast_node { - builtInFuncs builtInFunc; // if it's a builtin function call use this, else -1 functionToken *func; // if it's not builtin then use this void **literalArgs; // the args of the node, this will be an array of litteral values ast_node **args; // the non litteral tokens |