summaryrefslogtreecommitdiff
path: root/comp/lucas-standen-NEA/code/tokenizer
diff options
context:
space:
mode:
Diffstat (limited to 'comp/lucas-standen-NEA/code/tokenizer')
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/Makefile9
-rwxr-xr-xcomp/lucas-standen-NEA/code/tokenizer/tokenizerbin24392 -> 42264 bytes
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/tokenizer.c203
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/tokenizer.h0
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/types.h26
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/util.c38
-rw-r--r--comp/lucas-standen-NEA/code/tokenizer/util.h7
7 files changed, 162 insertions, 121 deletions
diff --git a/comp/lucas-standen-NEA/code/tokenizer/Makefile b/comp/lucas-standen-NEA/code/tokenizer/Makefile
index 479b838..b09f177 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/Makefile
+++ b/comp/lucas-standen-NEA/code/tokenizer/Makefile
@@ -1,10 +1,9 @@
tokenizer: parser util tokenizer.c
- cc -O3 tokenizer.c parser.o util.o -o tokenizer
+ cc -O3 tokenizer.c parser.o util.o -o tokenizer -ggdb
parser: parser.c
- cc -O3 parser.c -c -o parser.o
-util: util.c
- cc -O3 util.c -c -o util.o
-
+ cc -O3 parser.c -c -o parser.o -ggdb
+util: util.c
+ cc -O3 util.c -c -o util.o -ggdb
clean:
rm -rf *.o
rm -rf tokenizer
diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer b/comp/lucas-standen-NEA/code/tokenizer/tokenizer
index 726ee21..2d2f1c0 100755
--- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer
+++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer
Binary files differ
diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
index 5cc596f..080951b 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
+++ b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.c
@@ -1,154 +1,151 @@
-#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#include "parser.h"
-#include "util.h"
#include "types.h"
+#include "util.h"
-int functionIdCounter = 0;
-
-ast_node *GenAst(char *exp); // generates the ast of 1 expression
-int getArgCount(char *exp); // counts how many args are pressent in exp
-char **GetStringArgs(char *exp); // gets the string args of an expression
-char *GetFunction(char *exp); // gets the function used in 1 expression
-builtInFuncs IsBuiltIn(char *func); // returns the id of a function thats built in, or -1 if its not
-
+#define MAXARGS 8
-builtInFuncs IsBuiltIn(char *func){
+int getBuiltIn(char *func, ast_node *node){
if (strcmp(func, "defun") == 0){
- return DEFUN;
+ node->func->builtInFunc= DEFUN;
}else if (strcmp(func, "let") == 0){
- return LET;
+ node->func->builtInFunc = LET;
}else if (strcmp(func, "set") == 0){
- return SET;
+ node->func->builtInFunc = SET;
}else if (strcmp(func, "if") == 0){
- return IF;
+ node->func->builtInFunc = IF;
}else if (strcmp(func, "elif") == 0){
- return ELIF;
+ node->func->builtInFunc = ELIF;
}else if (strcmp(func, "else") == 0){
- return ELSE;
+ node->func->builtInFunc = ELSE;
}else if (strcmp(func, "for") == 0){
- return FOR;
+ node->func->builtInFunc = FOR;
}else if (strcmp(func, "while") == 0){
- return WHILE;
+ node->func->builtInFunc = WHILE;
}else if (strcmp(func, "symbol") == 0){
- return SYMBOL;
- }else if (strcmp(func, "struct") == 0){
- return STRUCT;
+ node->func->builtInFunc = SYMBOL;
}else if (strcmp(func, "+") == 0){
- return ADD;
+ node->func->builtInFunc = ADD;
}else if (strcmp(func, "-") == 0){
- return SUB;
+ node->func->builtInFunc = SUB;
}else if (strcmp(func, "*") == 0){
- return MUL;
+ node->func->builtInFunc = MUL;
}else if (strcmp(func, "/") == 0){
- return DIV;
+ node->func->builtInFunc = DIV;
}else if (strcmp(func, "=") == 0){
- return EQ;
+ node->func->builtInFunc = EQ;
}else if (strcmp(func, "!=") == 0){
- return NEQ;
+ node->func->builtInFunc = NEQ;
}else if (strcmp(func, ">") == 0){
- return GT;
+ node->func->builtInFunc = GT;
}else if (strcmp(func, "<") == 0){
- return LT;
+ node->func->builtInFunc = LT;
}else if (strcmp(func, ">=") == 0){
- return GTEQ;
+ node->func->builtInFunc = GTEQ;
}else if (strcmp(func, "<=") == 0){
- return LTEQ;
+ node->func->builtInFunc = LTEQ;
}else if (strcmp(func, "cast") == 0){
- return CAST;
+ node->func->builtInFunc = CAST;
}else if (strcmp(func, "typeof") == 0){
- return TYPEOF;
- }else if (strcmp(func, "terminate") == 0){
- return TERMINATE;
+ node->func->builtInFunc = TYPEOF;
+ }else if (strcmp(func, "exit") == 0){
+ node->func->builtInFunc = EXIT;
}else if (strcmp(func, "return") == 0){
- return RETURN;
- }
- else {
+ node->func->builtInFunc = RETURN;
+ }else {
+ node->func->builtInFunc = NIL;
return -1;
}
+ return 0;
}
-char *GetFunction(char *exp){ // takes exp with brackets
- char *out = CheckedMalloc(strlen(exp));
- int i = 1;
- char c = exp[i];
- while (c != ' '){
- out[i-1] = c;
- i++;
- c = exp[i];
+ll_t *getUserDefinedFunction(char *function);
+
+void expressFunction(char *function, ast_node *node){
+ node->func = CheckedMalloc(sizeof(functionToken));
+ if ((getBuiltIn(function, node)) == -1){
+ //node->func->func = getUserDefinedFunction(function);
+ } else {
+ node->func->func = NULL;
}
- i++;
- out[i] = '\0';
- out = CheckedRealloc(out, i);
- return out;
}
-// TODO make it count any arg inside () as one arg
-char **GetStringArgs(char *exp){ // takes exp without brackets
- int spaceCount = 0;
- int i = 0;
- char c = exp[i];
- while (c != '\0'){
- spaceCount++;
- i++;
- c = exp[i];
-
+void expressArgs(char **args, ast_node *node){
+ for (int i = 0; i < MAXARGS; i++){
+ if (node->args[i] == NULL){
+ memcpy(node->literalArgs[i], args[i], strlen(args[i]) + 1);
+ }
}
+
+}
- char **out = CheckedMalloc(spaceCount);
- for (int i = 0; i < spaceCount; i++){
- out[i] = CheckedMalloc(strlen(exp));
- }
+ast_node *tokenize(char *input){
+ ast_node *node, *child;
- int tokCounter = 0;
- i = 0;
- int charCounter = 0;
- while (exp[i] != '\0'){
- if (exp[i] != ' '){
- if (tokCounter != 0){
- out[tokCounter-1][charCounter] = exp[i];
- charCounter++;
+ char *exp, *function, **args;
+ size_t i = 0, argCount = -1;
+ int depth = 0;
+
+ node = CheckedMalloc(sizeof(ast_node));
+ node->args = CheckedMalloc(sizeof(ast_node) * MAXARGS);
+ node->literalArgs = CheckedMalloc(sizeof(void *) * MAXARGS);
+
+ if (input[i] == '('){
+ depth = 1;
+ i++;
+ exp = CheckedMalloc(strlen(input));
+ while (depth != 0){
+ if (input[i] == ' ') argCount++;
+ if (input[i] == '('){
+ child = tokenize(&input[i]);
+ node->args[argCount] = child;
+ depth++;
+ } else if (input[i] == ')'){
+ depth--;
+ }
+ exp[i - 1] = input[i];
+ if (input[i] == '\0'){
+ fprintf(stderr, "error brace not closed\n");
+ exit(1);
}
- } else{
- out[tokCounter][i] = '\0';
- charCounter = 0;
- tokCounter++;
+ i++;
}
+ exp[i-2] = '\0';
+ exp = CheckedRealloc(exp, strlen(exp) + 1);
+ printf("%s\n", exp);
+ }else if (input[i] == '"'){
i++;
+ while (input[i] != '"') i++;
}
- return out;
-}
-
-ast_node *GenAst(char *exp){ // takes exp with brackets
- ast_node *head = CheckedMalloc(sizeof(ast_node));
- char *function = GetFunction(exp);
- head->builtInFunc = IsBuiltIn(function);
- free(function);
- if (head->builtInFunc == -1){
- head->func = CheckedMalloc(sizeof(functionToken));
- head->func->id = functionIdCounter;
- functionIdCounter++;
- }else {
- head->func = NULL;
+ i = 0;
+ function = CheckedMalloc(strlen(exp));
+ while (exp[i] != ' '){
+ function[i] = exp[i];
+ i++;
}
- return head;
-}
+ function[i] = '\0';
+ function = CheckedRealloc(function, i);
+ printf("%s\n", function);
-int main(){
- ast_node *node = GenAst("(+ 1 2)");
- printf("%d\n", node->builtInFunc);
+ expressFunction(function, node);
- char **args = GetStringArgs("+ 1 2");
- for (int i = 0; i < 2; i++){
- printf("%s\n", args[i]);
- }
+ i++;
+ args = Split(&input[i], ' ');
+ // need a length
+ expressArgs(args, node /* length */ );
- free(args);
+ free(exp);
- free(node);
+ return node;
+}
+
+int main(){
+ char sample[] = "(+ (- 2 2) 1)";
+ ast_node *root = tokenize(sample);
+ printf("%d", root->args[0]->func->builtInFunc);
+ free(root);
}
diff --git a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h b/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h
deleted file mode 100644
index e69de29..0000000
--- a/comp/lucas-standen-NEA/code/tokenizer/tokenizer.h
+++ /dev/null
diff --git a/comp/lucas-standen-NEA/code/tokenizer/types.h b/comp/lucas-standen-NEA/code/tokenizer/types.h
index 034dc04..8c79bd9 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/types.h
+++ b/comp/lucas-standen-NEA/code/tokenizer/types.h
@@ -10,8 +10,6 @@ typedef enum types {
FLOAT_t = 4,
CHAR_T = 5,
FUNCTION_T = 6,
- STRUCT_T = 7,
- OBJ_T = 8,
} types;
// int types
@@ -24,16 +22,9 @@ typedef uint64_t u64;
// char and float types are still called char and float so no typedef needed
-// function type
-typedef struct functionToken {
- int id; // a function id to avoid strings
- types returnType; // what the function returns
- types *args; // the types of args a function takes
- ll_t astHead; // the code for the function
-} functionToken;
-
// built in functions
typedef enum builtInFuncs {
+ // general
DEFUN = 0,
LET = 1,
SET = 2,
@@ -43,7 +34,6 @@ typedef enum builtInFuncs {
FOR = 6,
WHILE = 7,
SYMBOL = 8,
- STRUCT = 9,
// arithmetic
ADD = 10,
@@ -59,16 +49,26 @@ typedef enum builtInFuncs {
GTEQ = 18,
LTEQ = 19,
+ // misc
CAST = 20,
TYPEOF = 21,
- TERMINATE = 22,
+ EXIT = 22,
RETURN = 23,
+ NIL = -1,
} builtInFuncs;
+// function type
+typedef struct functionToken {
+ int id; // a function id to avoid strings
+ types returnType; // what the function returns
+ types *args; // the types of args a function takes
+ ll_t *func; // the code for the function
+ builtInFuncs builtInFunc; // a built in functions
+} functionToken;
+
typedef struct ast_node ast_node;
typedef struct ast_node {
- builtInFuncs builtInFunc; // if it's a builtin function call use this, else -1
functionToken *func; // if it's not builtin then use this
void **literalArgs; // the args of the node, this will be an array of litteral values
ast_node **args; // the non litteral tokens
diff --git a/comp/lucas-standen-NEA/code/tokenizer/util.c b/comp/lucas-standen-NEA/code/tokenizer/util.c
index de5b6b2..46deba8 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/util.c
+++ b/comp/lucas-standen-NEA/code/tokenizer/util.c
@@ -1,4 +1,5 @@
#include <stdio.h>
+#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <error.h>
@@ -6,6 +7,7 @@
void Die(); // brings down the program
void *CheckedMalloc(long size); // malloc checked
void *CheckedRealloc(void *out, long size); // realloc checked
+char **Split(char *s, char c); // splits a string into an array of strings around c
void Die(){
perror("zpy parser");
@@ -25,3 +27,39 @@ void *CheckedRealloc(void *orig, long size){
Die();
return out;
}
+
+static size_t countSegment(char const *s, char c){
+ size_t counter = 0;
+ int i = 0;
+ while (s[i]){
+ if (s[i] == c){
+ i++;
+ continue;
+ }
+ counter++;
+ while (s[i] && s[i] != c) i++;
+ }
+ return counter;
+}
+
+char **Split(char *s, char c){
+ char **strs;
+ size_t tab_counter;
+ size_t i;
+ size_t j;
+
+ if (s == NULL) return NULL;
+ tab_counter = countSegment(s, c);
+ if ((strs = (char**)CheckedMalloc(sizeof(char*) * (tab_counter + 1))) == NULL) return NULL;
+ tab_counter = 0;
+ j = -1;
+ while (s[++j]) {
+ if (s[j] == c) continue;
+ i = 0;
+ while (s[j + i] && s[j + i] != c) i++;
+ if ((strs[tab_counter++] = strndup(&s[j], i)) == NULL) return NULL;
+ j += i - 1;
+ }
+ strs[tab_counter] = NULL;
+ return strs;
+}
diff --git a/comp/lucas-standen-NEA/code/tokenizer/util.h b/comp/lucas-standen-NEA/code/tokenizer/util.h
index cbcbdfa..c25ebec 100644
--- a/comp/lucas-standen-NEA/code/tokenizer/util.h
+++ b/comp/lucas-standen-NEA/code/tokenizer/util.h
@@ -1,3 +1,10 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <error.h>
+
void Die(); // brings down the program
void *CheckedMalloc(long size); // malloc checked
void *CheckedRealloc(void *out, long size); // realloc checked
+char **Split(char *s, char c); // splits a string into an array of strings around c