finished the new tokenizer

author: Thing1 <thing1@seacrossedlovers.xyz> 2024-08-29 15:01:34 +0100
committer: Thing1 <thing1@seacrossedlovers.xyz> 2024-08-29 15:01:34 +0100
commit: d28f618c0e4c3da57c856a31d9ce3003a086e7ed (patch)
tree: 9edb35867649ba55268f40591015f4ff226f5abf /comp
parent: cfdd3c90877b59dc674cc9f68c0b7b4bb7c14ba8 (diff)
14 files changed, 122 insertions, 44 deletions
diff --git a/comp/lucas-standen-NEA/code2/Makefile b/comp/lucas-standen-NEA/code2/Makefile
index 2b37de2..d9aa963 100644
--- a/comp/lucas-standen-NEA/code2/Makefile
+++ b/comp/lucas-standen-NEA/code2/Makefile
@@ -1,7 +1,7 @@
-CFLAGS= -O0 -ggdb
+CFLAGS= -O3 
 
-all: _zpy _parser _tokenizer _util
-	cc zpy.o parser.o tokenizer.o util.o -o zpy ${CFLAGS}
+all: _zpy _parser _tokenizer _util _debug
+	cc zpy.o parser.o tokenizer.o util.o debug.o -o zpy ${CFLAGS}
 
 _zpy: zpy.c
 	cc zpy.c -c -o zpy.o ${CFLAGS}
@@ -12,5 +12,8 @@ _tokenizer: tokenizer.c
 _util: util.c
 	cc util.c -c -o util.o ${CFLAGS}
 
+_debug:
+	cc debug.c -c -o debug.o ${CFLAGS}
+
 clean:
 	rm -rf zpy *.o
diff --git a/comp/lucas-standen-NEA/code2/TODO b/comp/lucas-standen-NEA/code2/TODO
deleted file mode 100644
index 34deccd..0000000
--- a/comp/lucas-standen-NEA/code2/TODO
+++ /dev/null
@@ -1,2 +0,0 @@
-make the tokenizer work, it needs to call recursively whenever it see's a '(' execept the first
-expression, perhaps cut them off before the call
diff --git a/comp/lucas-standen-NEA/code2/debug.c b/comp/lucas-standen-NEA/code2/debug.c
new file mode 100644
index 0000000..3d2d75b
--- /dev/null
+++ b/comp/lucas-standen-NEA/code2/debug.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include "tokenizer.h"
+
+#include "util.h"
+
+void printAST(astNode *head){
+	printf("\n>>>\nfunc: %s\n", head->func);
+	printf("args: ");
+	for (int i = 0; i < 8; i++){
+		if (head->children[i] == NULL && head->args[i] == NULL){
+			printf("<<<");
+			return;
+		}
+				
+		if (head->args[i] != NULL){
+			printf("%s ", head->args[i]);
+		}
+		if (head->children[i] != NULL){
+			printAST(head->children[i]);
+		}
+
+	}
+	printf("\n");
+}
diff --git a/comp/lucas-standen-NEA/code2/debug.h b/comp/lucas-standen-NEA/code2/debug.h
new file mode 100644
index 0000000..ae0f931
--- /dev/null
+++ b/comp/lucas-standen-NEA/code2/debug.h
@@ -0,0 +1,4 @@
+#include <stdio.h>
+#include "util.h"
+
+void printAST(astNode *head);
diff --git a/comp/lucas-standen-NEA/code2/parser.c b/comp/lucas-standen-NEA/code2/parser.c
index ebf8e47..d9cb7bd 100644
--- a/comp/lucas-standen-NEA/code2/parser.c
+++ b/comp/lucas-standen-NEA/code2/parser.c
@@ -5,7 +5,12 @@
 
 #include "util.h"
 
-int countChars(char *s, char c){
+typedef struct strings {
+	char **strs;
+	int count;
+} strings;
+
+int countChars(char *s, char c){ // counts the number of times c ocurrs in s
 	int count = 0;
 	for (int i = 0; i < strlen(s); i++){
 		if (s[i] == c) count++;	
@@ -13,7 +18,7 @@ int countChars(char *s, char c){
 	return count;
 }
 
-char **parse(FILE *f){
+strings *parse(FILE *f){
 	fseek(f, 0, SEEK_END);
 	int len = ftell(f);
 	rewind(f);
@@ -43,9 +48,12 @@ char **parse(FILE *f){
 			tokCount++;
 		}	
 	}
+	strings *strs = malloc(sizeof(strings));
+	strs->strs = tokens;
+	strs->count = tokCount;
 	
 	free(line);
 	free(contents);
 
-	return tokens;
+	return strs;
 }
diff --git a/comp/lucas-standen-NEA/code2/parser.h b/comp/lucas-standen-NEA/code2/parser.h
index 94b5859..80a5f08 100644
--- a/comp/lucas-standen-NEA/code2/parser.h
+++ b/comp/lucas-standen-NEA/code2/parser.h
@@ -1,3 +1,13 @@
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
 
-char **parse(FILE *f);
+#include "util.h"
+
+typedef struct strings {
+	char **strs;
+	int count;
+} strings;
+
+strings *parse(FILE *f);
diff --git a/comp/lucas-standen-NEA/code2/sample.zpy b/comp/lucas-standen-NEA/code2/sample.zpy
index 7ffc605..fe7e1ae 100644
--- a/comp/lucas-standen-NEA/code2/sample.zpy
+++ b/comp/lucas-standen-NEA/code2/sample.zpy
@@ -1 +1 @@
-(let a:i64 (+ 2 3))
+(let a:i64 (+ (- 1 4) 3))
diff --git a/comp/lucas-standen-NEA/code2/tokenizer.c b/comp/lucas-standen-NEA/code2/tokenizer.c
index 3b7e394..10c70a7 100644
--- a/comp/lucas-standen-NEA/code2/tokenizer.c
+++ b/comp/lucas-standen-NEA/code2/tokenizer.c
@@ -5,38 +5,67 @@
 #include "util.h"
 
 typedef struct astNode {
-	char *funcName;
+	char *func;
 	char *args[8];
 	struct astNode *children[8];
 } astNode;
 
-astNode *tokenize(char *line){
+int readuntil(char *src, char c, char *dst){ // returns how many chars read, will read until 
+					     // the end of an expression, not the first 
+					     // occurence
+	int ptr = 0;
+	int depth = 0;
+	int i = 0;
+
+	while ((src[i] != c || depth != 0) && src[i] != '\0'){
+		if (c == ')'){
+			if (src[i] == '('){
+				depth++;
+			}else if (src[i] == ')'){
+				depth--;
+			}
+		}
+		dst[ptr] = src[i];
+		ptr++;
+		i++;
+	}
+	
+	dst[ptr] = '\0';
+
+	return i;
+}
+
+
+astNode *tokenize(char *line){ // asume the first set of brackets have been stripped
 	astNode *head = malloc(sizeof(astNode));
 
 	int depth = 0;
-	int charCount = 0;
 	int argCount = 0;
+	int i = 0;
 
-
-	for (int i = 0; i < strlen(line); i++){
-		switch (line[i]){
-			case ' ':
+top:
+	for (;i < strlen(line); i++){
+		char *chunk = malloc(strlen(line));
+		if (line[i] == ')'){ 
+			i++; 
+			goto top;
+		}
+		if (line[i] == '('){
+			i++;
+			i += readuntil(&line[i], ')', chunk); // reads a nested function
+			i++;
+			head->children[argCount] = tokenize(chunk);
+			argCount++;
+		}else { 
+			i += readuntil(&line[i], ' ', chunk); // reads func name or arg
+			if (head->func == NULL){
+				head->func = chunk;
+			} else{
+				head->args[argCount] = chunk;
 				argCount++;
-				charCount = 0;
-				break;
-			case '(':
-				1
-			default:
-				if (argCount >= 1){
-					head->args[argCount][charCount] = line[i];
-					charCount++;
-				}
-				else {
-					head->funcName[charCount] = line[i];
-					charCount++;
-				}
+			}
 		}
 	}
 
-	return NULL;
+	return head;
 }
diff --git a/comp/lucas-standen-NEA/code2/tokenizer.h b/comp/lucas-standen-NEA/code2/tokenizer.h
index eebfbc5..382eedf 100644
--- a/comp/lucas-standen-NEA/code2/tokenizer.h
+++ b/comp/lucas-standen-NEA/code2/tokenizer.h
@@ -1,5 +1,5 @@
 typedef struct astNode {
-	char *funcName;
+	char *func;
 	char *args[8];
 	struct astNode *children[8];
 } astNode;
diff --git a/comp/lucas-standen-NEA/code2/zpy b/comp/lucas-standen-NEA/code2/zpy
new file mode 100755
index 0000000..640c7fc
--- /dev/null
+++ b/comp/lucas-standen-NEA/code2/zpy
diff --git a/comp/lucas-standen-NEA/code2/zpy.c b/comp/lucas-standen-NEA/code2/zpy.c
index 3ec448a..e88ee2c 100644
--- a/comp/lucas-standen-NEA/code2/zpy.c
+++ b/comp/lucas-standen-NEA/code2/zpy.c
@@ -5,6 +5,8 @@
 #include "parser.h"
 #include "tokenizer.h"
 
+#include "debug.h"
+
 int main(int argc, char **argv){
 	if (argc < 2)
 		die("no input files!");
@@ -13,11 +15,18 @@ int main(int argc, char **argv){
 	if (f == NULL)
 		die("no such file or directory");
 
-	char **stringTokens = parse(f);
+	strings *stringTokens = parse(f);
 
 	if (stringTokens == NULL)
 		die("couldn't parse file, is it formated properly?");
+
+
+
+	for (int i = 0; i < stringTokens->count; i++){
+		stringTokens->strs[i]++;
+		stringTokens->strs[i][strlen(stringTokens->strs[i])] = '\0';
+		astNode *line = tokenize(stringTokens->strs[i]);
+	}
 	
-	tokenize(stringTokens[0]);
 	
 }
diff --git a/comp/lucas-standen-NEA/writeup/coverpage.ms b/comp/lucas-standen-NEA/writeup/coverpage.ms
index e18f9c0..5099c18 100644
--- a/comp/lucas-standen-NEA/writeup/coverpage.ms
+++ b/comp/lucas-standen-NEA/writeup/coverpage.ms
@@ -1,11 +1,4 @@
-.TL
-The solution To bad code
-.AU
-Lucas Standen
-.AI
-7949
-.AB
-
+.2C
 .NH 1
 Reading this document
 .LP
diff --git a/comp/lucas-standen-NEA/writeup/coverpage.ps b/comp/lucas-standen-NEA/writeup/coverpage.ps
index 3ca9e13..5f0ee77 100644
--- a/comp/lucas-standen-NEA/writeup/coverpage.ps
+++ b/comp/lucas-standen-NEA/writeup/coverpage.ps
@@ -1,6 +1,6 @@
 %!PS-Adobe-3.0
 %%Creator: groff version 1.23.0
-%%CreationDate: Thu Jul  4 09:06:34 2024
+%%CreationDate: Tue Aug 27 22:07:55 2024
 %%DocumentNeededResources: font Times-Bold
 %%+ font Times-Italic
 %%+ font Times-Roman
diff --git a/comp/lucas-standen-NEA/writeup/questions-for-amy.ps b/comp/lucas-standen-NEA/writeup/questions-for-amy.ps
index 19e1717..d4475aa 100644
--- a/comp/lucas-standen-NEA/writeup/questions-for-amy.ps
+++ b/comp/lucas-standen-NEA/writeup/questions-for-amy.ps
@@ -1,6 +1,6 @@
 %!PS-Adobe-3.0
 %%Creator: groff version 1.23.0
-%%CreationDate: Thu Jul  4 09:06:36 2024
+%%CreationDate: Tue Aug 27 22:07:55 2024
 %%DocumentNeededResources: font Times-Bold
 %%+ font Times-Roman
 %%DocumentSuppliedResources: procset grops 1.23 0
author	Thing1 <thing1@seacrossedlovers.xyz>	2024-08-29 15:01:34 +0100
committer	Thing1 <thing1@seacrossedlovers.xyz>	2024-08-29 15:01:34 +0100
commit	d28f618c0e4c3da57c856a31d9ce3003a086e7ed (patch)
tree	9edb35867649ba55268f40591015f4ff226f5abf /comp
parent	cfdd3c90877b59dc674cc9f68c0b7b4bb7c14ba8 (diff)