summaryrefslogtreecommitdiff
path: root/comp/lucas-standen-NEA/code/tokenizer
diff options
context:
space:
mode:
Diffstat (limited to 'comp/lucas-standen-NEA/code/tokenizer')
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/Makefile11
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/parser.c5
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/sample.zpy5
-rwxr-xr-xcomp/lucas-standen-NEA/code/tokenizer/tokenizerbin42264 -> 0 bytes
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/tokenizer.c98
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/tokenizer.h12
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/types.h76
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/util.c65
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/util.h10
9 files changed, 86 insertions, 196 deletions
diff --git a/comp/lucas-standen-NEA/code/tokenizer/Makefile b/comp/lucas-standen-NEA/code/tokenizer/Makefile
index b09f177..73e4033 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/Makefile
+++ b/comp/lucas-standen-NEA/code/tokenizer/Makefile
@@ -1,10 +1,9 @@
-tokenizer: parser util tokenizer.c
- cc -O3 tokenizer.c parser.o util.o -o tokenizer -ggdb
+all: tokenizer parser
+ $(info done tokenizer!)
+tokenizer: tokenizer.c
+ cc tokenizer.c -c -o tokenizer.o
parser: parser.c
- cc -O3 parser.c -c -o parser.o -ggdb
-util: util.c
- cc -O3 util.c -c -o util.o -ggdb
+ cc parser.c -c -o parser.o
clean:
rm -rf *.o
- rm -rf tokenizer
diff --git a/comp/lucas-standen-NEA/code/tokenizer/parser.c b/comp/lucas-standen-NEA/code/tokenizer/parser.c
index 9ac9fde..69ec458 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/parser.c
+++ b/comp/lucas-standen-NEA/code/tokenizer/parser.c
@@ -1,11 +1,10 @@
#include <stdio.h>
#include <stdlib.h>
-#include "util.h"
-
-char *Parse(char *fileName); // general parser function
+#include "../global/util.h"
char *ReadFile(char *fileName); // reads the file into a single var
+char *Parse(char *fileName); // general parser function
char *ReadFile(char *filename){
FILE *f = fopen(filename, "r");
diff --git a/comp/lucas-standen-NEA/code/tokenizer/sample.zpy b/comp/lucas-standen-NEA/code/tokenizer/sample.zpy
deleted file mode 100644
index f0d9700..0000000
--- a/comp/lucas-standen-NEA/code/tokenizer/sample.zpy
+++ /dev/null
@@ -1,5 +0,0 @@
-(let x:char[] "he
-llo")
-
-(let y:i32 20)
-
diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer b/comp/lucas-standen-NEA/code/tokenizer/tokenizer
deleted file mode 100755
index 2d2f1c0..0000000
--- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer
+++ /dev/null
Binary files differ
diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
index 080951b..a76760b 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
+++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
@@ -1,12 +1,24 @@
#include <stdio.h>
#include <string.h>
-#include "types.h"
-#include "util.h"
+#include "../global/types.h"
+#include "../global/util.h"
#define MAXARGS 8
+#define MAXFUNCS 2048
+#define MAXVARS 8192
-int getBuiltIn(char *func, ast_node *node){
+char *userDefinedFunctions[MAXFUNCS];
+char *userDefinedVars[MAXVARS];
+size_t userFuncCount = 0;
+size_t userVarCount = 0;
+
+int getBuiltIn(char *func, ast_node *node); // checks if a function is built in to zippy
+void expressFunction(char *function, ast_node *node); // puts a string into the ast_node struct
+ast_node *tokenize(char *input); // does the tokenization
+void printAst(ast_node *root); // shows an ast and its sub nodes
+
+int getBuiltIn(char *func, ast_node *node){ // returns NIL when the function doesn't exist
if (strcmp(func, "defun") == 0){
node->func->builtInFunc= DEFUN;
}else if (strcmp(func, "let") == 0){
@@ -53,6 +65,8 @@ int getBuiltIn(char *func, ast_node *node){
node->func->builtInFunc = EXIT;
}else if (strcmp(func, "return") == 0){
node->func->builtInFunc = RETURN;
+ }else if (strcmp(func, "write") == 0){
+ node->func->builtInFunc = WRITE;
}else {
node->func->builtInFunc = NIL;
return -1;
@@ -60,24 +74,10 @@ int getBuiltIn(char *func, ast_node *node){
return 0;
}
-ll_t *getUserDefinedFunction(char *function);
-
void expressFunction(char *function, ast_node *node){
node->func = CheckedMalloc(sizeof(functionToken));
- if ((getBuiltIn(function, node)) == -1){
- //node->func->func = getUserDefinedFunction(function);
- } else {
- node->func->func = NULL;
- }
-}
-
-void expressArgs(char **args, ast_node *node){
- for (int i = 0; i < MAXARGS; i++){
- if (node->args[i] == NULL){
- memcpy(node->literalArgs[i], args[i], strlen(args[i]) + 1);
- }
- }
-
+ if ((getBuiltIn(function, node)) == NIL) // non user defined function
+ node->func->name = function;
}
ast_node *tokenize(char *input){
@@ -113,7 +113,6 @@ ast_node *tokenize(char *input){
}
exp[i-2] = '\0';
exp = CheckedRealloc(exp, strlen(exp) + 1);
- printf("%s\n", exp);
}else if (input[i] == '"'){
i++;
while (input[i] != '"') i++;
@@ -129,23 +128,60 @@ ast_node *tokenize(char *input){
function[i] = '\0';
function = CheckedRealloc(function, i);
- printf("%s\n", function);
expressFunction(function, node);
- i++;
- args = Split(&input[i], ' ');
- // need a length
- expressArgs(args, node /* length */ );
+ char *tok;
+ tok = strtok(strstr(exp, " ") + 1, " ");
+ argCount = 0;
+ depth = 0;
+ do {
+ if (node->args[argCount] != NULL){
+ argCount++;
+ }
+ if (tok[0] != '(' && tok[strlen(tok)-1] != ')' && depth == 0){
+ if (node->args[argCount] == NULL){
+ node->literalArgs[argCount] = malloc(strlen(tok)+1);
+ node->literalArgs[argCount] = tok;
+ }
+ argCount++;
+ }
+
+ if (tok[0] == '(') depth++;
+ if (tok[strlen(tok)-1] == ')') depth--;
+ tok = strtok(NULL, " ");
+ } while (tok != NULL);
+
+ if (strcmp(function, "set") == 0 || strcmp(function, "let") == 0){
+ char *varName;
+ char *varType;
+ varName = strtok(node->literalArgs[0], ":");
+ varType = strtok(NULL, ":");
+ if (strcmp(varType, "function") == 0){
+ userDefinedFunctions[userFuncCount] = CheckedMalloc(25);
+ userDefinedFunctions[userFuncCount] = varName;
+ userFuncCount++;
+ }else {
+ userDefinedVars[userVarCount] = CheckedMalloc(15);
+ userDefinedVars[userVarCount] = varName;
+ userVarCount++;
+ }
+ }
- free(exp);
+ CheckedFree(exp);
return node;
}
-int main(){
- char sample[] = "(+ (- 2 2) 1)";
- ast_node *root = tokenize(sample);
- printf("%d", root->args[0]->func->builtInFunc);
- free(root);
+void printAst(ast_node *root){
+ printf("-----------\n");
+ if (root->func->builtInFunc == -1) printf("function: %s\n", root->func->name);
+ else printf("function (built in): %d\n", root->func->builtInFunc);
+ for (int i = 0; i < MAXARGS + 1; i++){
+ if (root->args[i] != NULL) printAst(root->args[i]);
+ else {
+ if (root->literalArgs[i] != NULL) printf("%s\n", root->literalArgs[i]);
+ }
+ }
+ printf("-----------\n");
}
diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h
new file mode 100644
index 0000000..3cfaaf2
--- /dev/null
+++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h
@@ -0,0 +1,12 @@
+#include <stdio.h>
+#include <string.h>
+
+#define MAXARGS 8
+#define MAXFUNCS 2048
+#define MAXVARS 8192
+
+int getBuiltIn(char *func, ast_node *node); // checks if a function is built in to zippy
+void expressFunction(char *function, ast_node *node); // puts a string into the ast_node struct
+ast_node *tokenize(char *input); // does the tokenization
+void printAst(ast_node *root); // shows an ast and its sub nodes
+
diff --git a/comp/lucas-standen-NEA/code/tokenizer/types.h b/comp/lucas-standen-NEA/code/tokenizer/types.h
deleted file mode 100644
index 8c79bd9..0000000
--- a/comp/lucas-standen-NEA/code/tokenizer/types.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <stdint.h>
-#include "../ads/ll/ll.h"
-
-// all language types
-typedef enum types {
- I32_T = 0,
- I64_T = 1,
- U32_T = 2,
- U64_T = 3,
- FLOAT_t = 4,
- CHAR_T = 5,
- FUNCTION_T = 6,
-} types;
-
-// int types
-typedef int32_t i32;
-typedef int64_t i64;
-
-// uint types
-typedef uint32_t u32;
-typedef uint64_t u64;
-
-// char and float types are still called char and float so no typedef needed
-
-// built in functions
-typedef enum builtInFuncs {
- // general
- DEFUN = 0,
- LET = 1,
- SET = 2,
- IF = 3,
- ELIF = 4,
- ELSE = 5,
- FOR = 6,
- WHILE = 7,
- SYMBOL = 8,
-
- // arithmetic
- ADD = 10,
- SUB = 11,
- MUL = 12,
- DIV = 13,
-
- // comparison
- EQ = 14,
- NEQ = 15,
- GT = 16,
- LT = 17,
- GTEQ = 18,
- LTEQ = 19,
-
- // misc
- CAST = 20,
- TYPEOF = 21,
- EXIT = 22,
- RETURN = 23,
- NIL = -1,
-} builtInFuncs;
-
-// function type
-typedef struct functionToken {
- int id; // a function id to avoid strings
- types returnType; // what the function returns
- types *args; // the types of args a function takes
- ll_t *func; // the code for the function
- builtInFuncs builtInFunc; // a built in functions
-} functionToken;
-
-typedef struct ast_node ast_node;
-
-typedef struct ast_node {
- functionToken *func; // if it's not builtin then use this
- void **literalArgs; // the args of the node, this will be an array of litteral values
- ast_node **args; // the non litteral tokens
- // if litteralArgs[x] is real then args[x] should be NULL, and vice versa
-} ast_node;
diff --git a/comp/lucas-standen-NEA/code/tokenizer/util.c b/comp/lucas-standen-NEA/code/tokenizer/util.c
deleted file mode 100644
index 46deba8..0000000
--- a/comp/lucas-standen-NEA/code/tokenizer/util.c
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <error.h>
-
-void Die(); // brings down the program
-void *CheckedMalloc(long size); // malloc checked
-void *CheckedRealloc(void *out, long size); // realloc checked
-char **Split(char *s, char c); // splits a string into an array of strings around c
-
-void Die(){
- perror("zpy parser");
- exit(errno);
-}
-
-void *CheckedMalloc(long size){
- void *out = malloc(size);
- if (out == NULL)
- Die();
- return out;
-}
-
-void *CheckedRealloc(void *orig, long size){
- void *out = realloc(orig, size);
- if (out == NULL)
- Die();
- return out;
-}
-
-static size_t countSegment(char const *s, char c){
- size_t counter = 0;
- int i = 0;
- while (s[i]){
- if (s[i] == c){
- i++;
- continue;
- }
- counter++;
- while (s[i] && s[i] != c) i++;
- }
- return counter;
-}
-
-char **Split(char *s, char c){
- char **strs;
- size_t tab_counter;
- size_t i;
- size_t j;
-
- if (s == NULL) return NULL;
- tab_counter = countSegment(s, c);
- if ((strs = (char**)CheckedMalloc(sizeof(char*) * (tab_counter + 1))) == NULL) return NULL;
- tab_counter = 0;
- j = -1;
- while (s[++j]) {
- if (s[j] == c) continue;
- i = 0;
- while (s[j + i] && s[j + i] != c) i++;
- if ((strs[tab_counter++] = strndup(&s[j], i)) == NULL) return NULL;
- j += i - 1;
- }
- strs[tab_counter] = NULL;
- return strs;
-}
diff --git a/comp/lucas-standen-NEA/code/tokenizer/util.h b/comp/lucas-standen-NEA/code/tokenizer/util.h
deleted file mode 100644
index c25ebec..0000000
--- a/comp/lucas-standen-NEA/code/tokenizer/util.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <error.h>
-
-void Die(); // brings down the program
-void *CheckedMalloc(long size); // malloc checked
-void *CheckedRealloc(void *out, long size); // realloc checked
-char **Split(char *s, char c); // splits a string into an array of strings around c