diff options
Diffstat (limited to 'comp/lucas-standen-NEA/code/tokenizer')
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/Makefile | 11 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/parser.c | 5 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/sample.zpy | 5 | ||||
-rwxr-xr-x | comp/lucas-standen-NEA/code/tokenizer/tokenizer | bin | 42264 -> 0 bytes | |||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/tokenizer.c | 98 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/tokenizer.h | 12 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/types.h | 76 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/util.c | 65 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code/tokenizer/util.h | 10 |
9 files changed, 86 insertions, 196 deletions
diff --git a/comp/lucas-standen-NEA/code/tokenizer/Makefile b/comp/lucas-standen-NEA/code/tokenizer/Makefile index b09f177..73e4033 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/Makefile +++ b/comp/lucas-standen-NEA/code/tokenizer/Makefile @@ -1,10 +1,9 @@ -tokenizer: parser util tokenizer.c - cc -O3 tokenizer.c parser.o util.o -o tokenizer -ggdb +all: tokenizer parser + $(info done tokenizer!) +tokenizer: tokenizer.c + cc tokenizer.c -c -o tokenizer.o parser: parser.c - cc -O3 parser.c -c -o parser.o -ggdb -util: util.c - cc -O3 util.c -c -o util.o -ggdb + cc parser.c -c -o parser.o clean: rm -rf *.o - rm -rf tokenizer diff --git a/comp/lucas-standen-NEA/code/tokenizer/parser.c b/comp/lucas-standen-NEA/code/tokenizer/parser.c index 9ac9fde..69ec458 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/parser.c +++ b/comp/lucas-standen-NEA/code/tokenizer/parser.c @@ -1,11 +1,10 @@ #include <stdio.h> #include <stdlib.h> -#include "util.h" - -char *Parse(char *fileName); // general parser function +#include "../global/util.h" char *ReadFile(char *fileName); // reads the file into a single var +char *Parse(char *fileName); // general parser function char *ReadFile(char *filename){ FILE *f = fopen(filename, "r"); diff --git a/comp/lucas-standen-NEA/code/tokenizer/sample.zpy b/comp/lucas-standen-NEA/code/tokenizer/sample.zpy deleted file mode 100644 index f0d9700..0000000 --- a/comp/lucas-standen-NEA/code/tokenizer/sample.zpy +++ /dev/null @@ -1,5 +0,0 @@ -(let x:char[] "he -llo") - -(let y:i32 20) - diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer b/comp/lucas-standen-NEA/code/tokenizer/tokenizer Binary files differdeleted file mode 100755 index 2d2f1c0..0000000 --- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer +++ /dev/null diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c index 080951b..a76760b 100644 --- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c +++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c @@ -1,12 +1,24 @@ #include <stdio.h> #include <string.h> -#include "types.h" -#include "util.h" +#include "../global/types.h" +#include "../global/util.h" #define MAXARGS 8 +#define MAXFUNCS 2048 +#define MAXVARS 8192 -int getBuiltIn(char *func, ast_node *node){ +char *userDefinedFunctions[MAXFUNCS]; +char *userDefinedVars[MAXVARS]; +size_t userFuncCount = 0; +size_t userVarCount = 0; + +int getBuiltIn(char *func, ast_node *node); // checks if a function is built in to zippy +void expressFunction(char *function, ast_node *node); // puts a string into the ast_node struct +ast_node *tokenize(char *input); // does the tokenization +void printAst(ast_node *root); // shows an ast and its sub nodes + +int getBuiltIn(char *func, ast_node *node){ // returns NIL when the function doesn't exist if (strcmp(func, "defun") == 0){ node->func->builtInFunc= DEFUN; }else if (strcmp(func, "let") == 0){ @@ -53,6 +65,8 @@ int getBuiltIn(char *func, ast_node *node){ node->func->builtInFunc = EXIT; }else if (strcmp(func, "return") == 0){ node->func->builtInFunc = RETURN; + }else if (strcmp(func, "write") == 0){ + node->func->builtInFunc = WRITE; }else { node->func->builtInFunc = NIL; return -1; @@ -60,24 +74,10 @@ int getBuiltIn(char *func, ast_node *node){ return 0; } -ll_t *getUserDefinedFunction(char *function); - void expressFunction(char *function, ast_node *node){ node->func = CheckedMalloc(sizeof(functionToken)); - if ((getBuiltIn(function, node)) == -1){ - //node->func->func = getUserDefinedFunction(function); - } else { - node->func->func = NULL; - } -} - -void expressArgs(char **args, ast_node *node){ - for (int i = 0; i < MAXARGS; i++){ - if (node->args[i] == NULL){ - memcpy(node->literalArgs[i], args[i], strlen(args[i]) + 1); - } - } - + if ((getBuiltIn(function, node)) == NIL) // non user defined function + node->func->name = function; } ast_node *tokenize(char *input){ @@ -113,7 +113,6 @@ ast_node *tokenize(char *input){ } exp[i-2] = '\0'; exp = CheckedRealloc(exp, strlen(exp) + 1); - printf("%s\n", exp); }else if (input[i] == '"'){ i++; while (input[i] != '"') i++; @@ -129,23 +128,60 @@ ast_node *tokenize(char *input){ function[i] = '\0'; function = CheckedRealloc(function, i); - printf("%s\n", function); expressFunction(function, node); - i++; - args = Split(&input[i], ' '); - // need a length - expressArgs(args, node /* length */ ); + char *tok; + tok = strtok(strstr(exp, " ") + 1, " "); + argCount = 0; + depth = 0; + do { + if (node->args[argCount] != NULL){ + argCount++; + } + if (tok[0] != '(' && tok[strlen(tok)-1] != ')' && depth == 0){ + if (node->args[argCount] == NULL){ + node->literalArgs[argCount] = malloc(strlen(tok)+1); + node->literalArgs[argCount] = tok; + } + argCount++; + } + + if (tok[0] == '(') depth++; + if (tok[strlen(tok)-1] == ')') depth--; + tok = strtok(NULL, " "); + } while (tok != NULL); + + if (strcmp(function, "set") == 0 || strcmp(function, "let") == 0){ + char *varName; + char *varType; + varName = strtok(node->literalArgs[0], ":"); + varType = strtok(NULL, ":"); + if (strcmp(varType, "function") == 0){ + userDefinedFunctions[userFuncCount] = CheckedMalloc(25); + userDefinedFunctions[userFuncCount] = varName; + userFuncCount++; + }else { + userDefinedVars[userVarCount] = CheckedMalloc(15); + userDefinedVars[userVarCount] = varName; + userVarCount++; + } + } - free(exp); + CheckedFree(exp); return node; } -int main(){ - char sample[] = "(+ (- 2 2) 1)"; - ast_node *root = tokenize(sample); - printf("%d", root->args[0]->func->builtInFunc); - free(root); +void printAst(ast_node *root){ + printf("-----------\n"); + if (root->func->builtInFunc == -1) printf("function: %s\n", root->func->name); + else printf("function (built in): %d\n", root->func->builtInFunc); + for (int i = 0; i < MAXARGS + 1; i++){ + if (root->args[i] != NULL) printAst(root->args[i]); + else { + if (root->literalArgs[i] != NULL) printf("%s\n", root->literalArgs[i]); + } + } + printf("-----------\n"); } diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h new file mode 100644 index 0000000..3cfaaf2 --- /dev/null +++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h @@ -0,0 +1,12 @@ +#include <stdio.h> +#include <string.h> + +#define MAXARGS 8 +#define MAXFUNCS 2048 +#define MAXVARS 8192 + +int getBuiltIn(char *func, ast_node *node); // checks if a function is built in to zippy +void expressFunction(char *function, ast_node *node); // puts a string into the ast_node struct +ast_node *tokenize(char *input); // does the tokenization +void printAst(ast_node *root); // shows an ast and its sub nodes + diff --git a/comp/lucas-standen-NEA/code/tokenizer/types.h b/comp/lucas-standen-NEA/code/tokenizer/types.h deleted file mode 100644 index 8c79bd9..0000000 --- a/comp/lucas-standen-NEA/code/tokenizer/types.h +++ /dev/null @@ -1,76 +0,0 @@ -#include <stdint.h> -#include "../ads/ll/ll.h" - -// all language types -typedef enum types { - I32_T = 0, - I64_T = 1, - U32_T = 2, - U64_T = 3, - FLOAT_t = 4, - CHAR_T = 5, - FUNCTION_T = 6, -} types; - -// int types -typedef int32_t i32; -typedef int64_t i64; - -// uint types -typedef uint32_t u32; -typedef uint64_t u64; - -// char and float types are still called char and float so no typedef needed - -// built in functions -typedef enum builtInFuncs { - // general - DEFUN = 0, - LET = 1, - SET = 2, - IF = 3, - ELIF = 4, - ELSE = 5, - FOR = 6, - WHILE = 7, - SYMBOL = 8, - - // arithmetic - ADD = 10, - SUB = 11, - MUL = 12, - DIV = 13, - - // comparison - EQ = 14, - NEQ = 15, - GT = 16, - LT = 17, - GTEQ = 18, - LTEQ = 19, - - // misc - CAST = 20, - TYPEOF = 21, - EXIT = 22, - RETURN = 23, - NIL = -1, -} builtInFuncs; - -// function type -typedef struct functionToken { - int id; // a function id to avoid strings - types returnType; // what the function returns - types *args; // the types of args a function takes - ll_t *func; // the code for the function - builtInFuncs builtInFunc; // a built in functions -} functionToken; - -typedef struct ast_node ast_node; - -typedef struct ast_node { - functionToken *func; // if it's not builtin then use this - void **literalArgs; // the args of the node, this will be an array of litteral values - ast_node **args; // the non litteral tokens - // if litteralArgs[x] is real then args[x] should be NULL, and vice versa -} ast_node; diff --git a/comp/lucas-standen-NEA/code/tokenizer/util.c b/comp/lucas-standen-NEA/code/tokenizer/util.c deleted file mode 100644 index 46deba8..0000000 --- a/comp/lucas-standen-NEA/code/tokenizer/util.c +++ /dev/null @@ -1,65 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <errno.h> -#include <error.h> - -void Die(); // brings down the program -void *CheckedMalloc(long size); // malloc checked -void *CheckedRealloc(void *out, long size); // realloc checked -char **Split(char *s, char c); // splits a string into an array of strings around c - -void Die(){ - perror("zpy parser"); - exit(errno); -} - -void *CheckedMalloc(long size){ - void *out = malloc(size); - if (out == NULL) - Die(); - return out; -} - -void *CheckedRealloc(void *orig, long size){ - void *out = realloc(orig, size); - if (out == NULL) - Die(); - return out; -} - -static size_t countSegment(char const *s, char c){ - size_t counter = 0; - int i = 0; - while (s[i]){ - if (s[i] == c){ - i++; - continue; - } - counter++; - while (s[i] && s[i] != c) i++; - } - return counter; -} - -char **Split(char *s, char c){ - char **strs; - size_t tab_counter; - size_t i; - size_t j; - - if (s == NULL) return NULL; - tab_counter = countSegment(s, c); - if ((strs = (char**)CheckedMalloc(sizeof(char*) * (tab_counter + 1))) == NULL) return NULL; - tab_counter = 0; - j = -1; - while (s[++j]) { - if (s[j] == c) continue; - i = 0; - while (s[j + i] && s[j + i] != c) i++; - if ((strs[tab_counter++] = strndup(&s[j], i)) == NULL) return NULL; - j += i - 1; - } - strs[tab_counter] = NULL; - return strs; -} diff --git a/comp/lucas-standen-NEA/code/tokenizer/util.h b/comp/lucas-standen-NEA/code/tokenizer/util.h deleted file mode 100644 index c25ebec..0000000 --- a/comp/lucas-standen-NEA/code/tokenizer/util.h +++ /dev/null @@ -1,10 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <errno.h> -#include <error.h> - -void Die(); // brings down the program -void *CheckedMalloc(long size); // malloc checked -void *CheckedRealloc(void *out, long size); // realloc checked -char **Split(char *s, char c); // splits a string into an array of strings around c |