diff options
author | Thing1 <thing1@seacrossedlovers.xyz> | 2024-08-29 15:01:34 +0100 |
---|---|---|
committer | Thing1 <thing1@seacrossedlovers.xyz> | 2024-08-29 15:01:34 +0100 |
commit | d28f618c0e4c3da57c856a31d9ce3003a086e7ed (patch) | |
tree | 9edb35867649ba55268f40591015f4ff226f5abf /comp | |
parent | cfdd3c90877b59dc674cc9f68c0b7b4bb7c14ba8 (diff) |
finished the new tokenizer
Diffstat (limited to 'comp')
-rw-r--r-- | comp/lucas-standen-NEA/code2/Makefile | 9 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code2/TODO | 2 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code2/debug.c | 24 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code2/debug.h | 4 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code2/parser.c | 14 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code2/parser.h | 12 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code2/sample.zpy | 2 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code2/tokenizer.c | 71 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/code2/tokenizer.h | 2 | ||||
-rwxr-xr-x | comp/lucas-standen-NEA/code2/zpy | bin | 0 -> 16352 bytes | |||
-rw-r--r-- | comp/lucas-standen-NEA/code2/zpy.c | 13 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/writeup/coverpage.ms | 9 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/writeup/coverpage.ps | 2 | ||||
-rw-r--r-- | comp/lucas-standen-NEA/writeup/questions-for-amy.ps | 2 |
14 files changed, 122 insertions, 44 deletions
diff --git a/comp/lucas-standen-NEA/code2/Makefile b/comp/lucas-standen-NEA/code2/Makefile index 2b37de2..d9aa963 100644 --- a/comp/lucas-standen-NEA/code2/Makefile +++ b/comp/lucas-standen-NEA/code2/Makefile @@ -1,7 +1,7 @@ -CFLAGS= -O0 -ggdb +CFLAGS= -O3 -all: _zpy _parser _tokenizer _util - cc zpy.o parser.o tokenizer.o util.o -o zpy ${CFLAGS} +all: _zpy _parser _tokenizer _util _debug + cc zpy.o parser.o tokenizer.o util.o debug.o -o zpy ${CFLAGS} _zpy: zpy.c cc zpy.c -c -o zpy.o ${CFLAGS} @@ -12,5 +12,8 @@ _tokenizer: tokenizer.c _util: util.c cc util.c -c -o util.o ${CFLAGS} +_debug: + cc debug.c -c -o debug.o ${CFLAGS} + clean: rm -rf zpy *.o diff --git a/comp/lucas-standen-NEA/code2/TODO b/comp/lucas-standen-NEA/code2/TODO deleted file mode 100644 index 34deccd..0000000 --- a/comp/lucas-standen-NEA/code2/TODO +++ /dev/null @@ -1,2 +0,0 @@ -make the tokenizer work, it needs to call recursively whenever it see's a '(' execept the first -expression, perhaps cut them off before the call diff --git a/comp/lucas-standen-NEA/code2/debug.c b/comp/lucas-standen-NEA/code2/debug.c new file mode 100644 index 0000000..3d2d75b --- /dev/null +++ b/comp/lucas-standen-NEA/code2/debug.c @@ -0,0 +1,24 @@ +#include <stdio.h> +#include "tokenizer.h" + +#include "util.h" + +void printAST(astNode *head){ + printf("\n>>>\nfunc: %s\n", head->func); + printf("args: "); + for (int i = 0; i < 8; i++){ + if (head->children[i] == NULL && head->args[i] == NULL){ + printf("<<<"); + return; + } + + if (head->args[i] != NULL){ + printf("%s ", head->args[i]); + } + if (head->children[i] != NULL){ + printAST(head->children[i]); + } + + } + printf("\n"); +} diff --git a/comp/lucas-standen-NEA/code2/debug.h b/comp/lucas-standen-NEA/code2/debug.h new file mode 100644 index 0000000..ae0f931 --- /dev/null +++ b/comp/lucas-standen-NEA/code2/debug.h @@ -0,0 +1,4 @@ +#include <stdio.h> +#include "util.h" + +void printAST(astNode *head); diff --git a/comp/lucas-standen-NEA/code2/parser.c b/comp/lucas-standen-NEA/code2/parser.c index ebf8e47..d9cb7bd 100644 --- a/comp/lucas-standen-NEA/code2/parser.c +++ b/comp/lucas-standen-NEA/code2/parser.c @@ -5,7 +5,12 @@ #include "util.h" -int countChars(char *s, char c){ +typedef struct strings { + char **strs; + int count; +} strings; + +int countChars(char *s, char c){ // counts the number of times c ocurrs in s int count = 0; for (int i = 0; i < strlen(s); i++){ if (s[i] == c) count++; @@ -13,7 +18,7 @@ int countChars(char *s, char c){ return count; } -char **parse(FILE *f){ +strings *parse(FILE *f){ fseek(f, 0, SEEK_END); int len = ftell(f); rewind(f); @@ -43,9 +48,12 @@ char **parse(FILE *f){ tokCount++; } } + strings *strs = malloc(sizeof(strings)); + strs->strs = tokens; + strs->count = tokCount; free(line); free(contents); - return tokens; + return strs; } diff --git a/comp/lucas-standen-NEA/code2/parser.h b/comp/lucas-standen-NEA/code2/parser.h index 94b5859..80a5f08 100644 --- a/comp/lucas-standen-NEA/code2/parser.h +++ b/comp/lucas-standen-NEA/code2/parser.h @@ -1,3 +1,13 @@ #include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> -char **parse(FILE *f); +#include "util.h" + +typedef struct strings { + char **strs; + int count; +} strings; + +strings *parse(FILE *f); diff --git a/comp/lucas-standen-NEA/code2/sample.zpy b/comp/lucas-standen-NEA/code2/sample.zpy index 7ffc605..fe7e1ae 100644 --- a/comp/lucas-standen-NEA/code2/sample.zpy +++ b/comp/lucas-standen-NEA/code2/sample.zpy @@ -1 +1 @@ -(let a:i64 (+ 2 3)) +(let a:i64 (+ (- 1 4) 3)) diff --git a/comp/lucas-standen-NEA/code2/tokenizer.c b/comp/lucas-standen-NEA/code2/tokenizer.c index 3b7e394..10c70a7 100644 --- a/comp/lucas-standen-NEA/code2/tokenizer.c +++ b/comp/lucas-standen-NEA/code2/tokenizer.c @@ -5,38 +5,67 @@ #include "util.h" typedef struct astNode { - char *funcName; + char *func; char *args[8]; struct astNode *children[8]; } astNode; -astNode *tokenize(char *line){ +int readuntil(char *src, char c, char *dst){ // returns how many chars read, will read until + // the end of an expression, not the first + // occurence + int ptr = 0; + int depth = 0; + int i = 0; + + while ((src[i] != c || depth != 0) && src[i] != '\0'){ + if (c == ')'){ + if (src[i] == '('){ + depth++; + }else if (src[i] == ')'){ + depth--; + } + } + dst[ptr] = src[i]; + ptr++; + i++; + } + + dst[ptr] = '\0'; + + return i; +} + + +astNode *tokenize(char *line){ // asume the first set of brackets have been stripped astNode *head = malloc(sizeof(astNode)); int depth = 0; - int charCount = 0; int argCount = 0; + int i = 0; - - for (int i = 0; i < strlen(line); i++){ - switch (line[i]){ - case ' ': +top: + for (;i < strlen(line); i++){ + char *chunk = malloc(strlen(line)); + if (line[i] == ')'){ + i++; + goto top; + } + if (line[i] == '('){ + i++; + i += readuntil(&line[i], ')', chunk); // reads a nested function + i++; + head->children[argCount] = tokenize(chunk); + argCount++; + }else { + i += readuntil(&line[i], ' ', chunk); // reads func name or arg + if (head->func == NULL){ + head->func = chunk; + } else{ + head->args[argCount] = chunk; argCount++; - charCount = 0; - break; - case '(': - 1 - default: - if (argCount >= 1){ - head->args[argCount][charCount] = line[i]; - charCount++; - } - else { - head->funcName[charCount] = line[i]; - charCount++; - } + } } } - return NULL; + return head; } diff --git a/comp/lucas-standen-NEA/code2/tokenizer.h b/comp/lucas-standen-NEA/code2/tokenizer.h index eebfbc5..382eedf 100644 --- a/comp/lucas-standen-NEA/code2/tokenizer.h +++ b/comp/lucas-standen-NEA/code2/tokenizer.h @@ -1,5 +1,5 @@ typedef struct astNode { - char *funcName; + char *func; char *args[8]; struct astNode *children[8]; } astNode; diff --git a/comp/lucas-standen-NEA/code2/zpy b/comp/lucas-standen-NEA/code2/zpy Binary files differnew file mode 100755 index 0000000..640c7fc --- /dev/null +++ b/comp/lucas-standen-NEA/code2/zpy diff --git a/comp/lucas-standen-NEA/code2/zpy.c b/comp/lucas-standen-NEA/code2/zpy.c index 3ec448a..e88ee2c 100644 --- a/comp/lucas-standen-NEA/code2/zpy.c +++ b/comp/lucas-standen-NEA/code2/zpy.c @@ -5,6 +5,8 @@ #include "parser.h" #include "tokenizer.h" +#include "debug.h" + int main(int argc, char **argv){ if (argc < 2) die("no input files!"); @@ -13,11 +15,18 @@ int main(int argc, char **argv){ if (f == NULL) die("no such file or directory"); - char **stringTokens = parse(f); + strings *stringTokens = parse(f); if (stringTokens == NULL) die("couldn't parse file, is it formated properly?"); + + + + for (int i = 0; i < stringTokens->count; i++){ + stringTokens->strs[i]++; + stringTokens->strs[i][strlen(stringTokens->strs[i])] = '\0'; + astNode *line = tokenize(stringTokens->strs[i]); + } - tokenize(stringTokens[0]); } diff --git a/comp/lucas-standen-NEA/writeup/coverpage.ms b/comp/lucas-standen-NEA/writeup/coverpage.ms index e18f9c0..5099c18 100644 --- a/comp/lucas-standen-NEA/writeup/coverpage.ms +++ b/comp/lucas-standen-NEA/writeup/coverpage.ms @@ -1,11 +1,4 @@ -.TL -The solution To bad code -.AU -Lucas Standen -.AI -7949 -.AB - +.2C .NH 1 Reading this document .LP diff --git a/comp/lucas-standen-NEA/writeup/coverpage.ps b/comp/lucas-standen-NEA/writeup/coverpage.ps index 3ca9e13..5f0ee77 100644 --- a/comp/lucas-standen-NEA/writeup/coverpage.ps +++ b/comp/lucas-standen-NEA/writeup/coverpage.ps @@ -1,6 +1,6 @@ %!PS-Adobe-3.0 %%Creator: groff version 1.23.0 -%%CreationDate: Thu Jul 4 09:06:34 2024 +%%CreationDate: Tue Aug 27 22:07:55 2024 %%DocumentNeededResources: font Times-Bold %%+ font Times-Italic %%+ font Times-Roman diff --git a/comp/lucas-standen-NEA/writeup/questions-for-amy.ps b/comp/lucas-standen-NEA/writeup/questions-for-amy.ps index 19e1717..d4475aa 100644 --- a/comp/lucas-standen-NEA/writeup/questions-for-amy.ps +++ b/comp/lucas-standen-NEA/writeup/questions-for-amy.ps @@ -1,6 +1,6 @@ %!PS-Adobe-3.0 %%Creator: groff version 1.23.0 -%%CreationDate: Thu Jul 4 09:06:36 2024 +%%CreationDate: Tue Aug 27 22:07:55 2024 %%DocumentNeededResources: font Times-Bold %%+ font Times-Roman %%DocumentSuppliedResources: procset grops 1.23 0 |