diff options
author | thing1 <thing1@seacrossedlovers.xyz> | 2025-03-14 17:37:27 +0000 |
---|---|---|
committer | thing1 <thing1@seacrossedlovers.xyz> | 2025-03-14 17:37:27 +0000 |
commit | af780dc32cbc9d9a40bec1e2ea538e71001c36aa (patch) | |
tree | 6e1386faa63125ba5537e923b933868699df3156 |
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 13 | ||||
-rw-r--r-- | cc.c | 33 | ||||
-rw-r--r-- | lex.c | 102 | ||||
-rw-r--r-- | lex.h | 13 | ||||
-rw-r--r-- | parse.c | 39 | ||||
-rw-r--r-- | parse.h | 7 | ||||
-rw-r--r-- | test.c | 3 | ||||
-rw-r--r-- | types.h | 43 |
9 files changed, 255 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..47c460a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +cc diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e1233e9 --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +CFLAGS=-ggdb + +SRC := lex.c parse.c cc.c +OBJ := ${SRC:.c=.o} + +all: cc + +.c.o: + cc -c ${CFLAGS} $< +cc: ${OBJ} + cc -o $@ ${OBJ} +clean: + rm -rf *.o cc @@ -0,0 +1,33 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lex.h" +#include "parse.h" +#include "types.h" + +int main() { + FILE *f = fopen("test.c", "r"); + + lexobj lexes[4096]; + int lexcount = 0; + + char *line = malloc(256); + while (fgets(line, 256, f) != NULL) { + stripwhitespace(line); + while (line[0] != 0) { + lexes[lexcount] = lex(line); + lexcount++; + line = saveptr; + } + char *line = malloc(256); + } + + ast exprs[4096]; + int exprcount; + + while (lexcount > 0) { + exprs[exprcount] = genast(lexes, &lexcount); + exprcount++; + } +} @@ -0,0 +1,102 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "types.h" + +#define size(arr) sizeof(arr) / sizeof(arr[0]) + +char *keywords[] = { + "int", + "char", + "return", + "for", + "while", + "if", + "else", + "elif" +}; + + +char *saveptr = NULL; + +int lexchar(lexobj *l, char *line, char c) { + if (line[0] == c) { + l->data = NULL; + l->t = c; + saveptr = line + 1; + return 1; + } + return 0; +} + +int lexchars(lexobj *l, char *line, char *cs) { + for (int i = 0; i < strlen(cs); i++) { + if (lexchar(l, line, cs[i])) return 1; + } + return 0; +} + +lexobj lex(char *line) { +removepadding: + if (line[0] == ' ') line++; + if (line[0] == ' ') goto removepadding; + + static lexobj l; + + if (isdigit(line[0])) { + static char num[256]; + int ptr = 0; + while (isdigit(line[0])) { + num[ptr] = line[0]; + ptr++; + line++; + } + num[ptr] = 0; + saveptr = line; + l.data = num; + l.t = INTLIT; + return l; + } + + if (lexchars(&l, line, "(){};+-*/")) return l; + + for (int i = 0; i < size(keywords); i++) { + if (strstr(line, keywords[i]) == line) { + line += strlen(keywords[i]); + saveptr = line; + if (line[0] != ' ') goto retry; + l.data = keywords[i]; + l.t = KEYWORD; + return l; + } +retry: + continue; + } + + char *data = strchr(line, '('); + *data = 0; + l.data = strdup(line); + line += data - line; + *data = '('; + saveptr = line; + + return l; +} + +void stripwhitespace(char *line) { + char *out = malloc(256); + int ptr = 0; + for (int i = 0; i < strlen(line); i++) { + if (line[i] == '\t' || line[i] == '\n' + || line[i] == '\v') continue; + else { + out[ptr] = line[i]; + ptr++; + } + } + out[ptr] = 0; + + memcpy(line, out, 256); +} @@ -0,0 +1,13 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "types.h" + +extern char *saveptr; + +int lexchar(lexobj *l, char *line, char c); +int lexchars(lexobj *l, char *line, char *cs); +lexobj lex(char *line); +void stripwhitespace(char *line); @@ -0,0 +1,39 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "types.h" + +parsetypes prevtype = NILPARSETYPE; + +ast *genast(lexobj lexes[4096], int *lexcount) { + static ast a; + switch (prevtype) { + case NILPARSETYPE: + case FUNCEND: + a.function = lexes[*lexcount + 1].data; // function name + a.litteralchildren[0] = lexes[*lexcount].data; // function ret type + *lexcount += 2; + a.childcount++; + while (lexes[*lexcount].t != CLOSEBRACE) { + a.litteralchildren[a.childcount] = lexes[*lexcount].data; + *lexcount += 1; + a.childcount++; + } + *lexcount += 1; // move the lexptr to the start of the functions code + prevtype = FUNCTIONDEF; + return &a; + case FUNCTIONDEF: + a.function = lexes[*lexcount].data; + *lexcount += 1; + if (lexes[*lexcount].t == INTLIT) { + a.litteralchildren[0] = lexes[*lexcount].data; + *lexcount += 1; + return &a; + } + else { + a.children[0] = genast(lexes, lexcount); + } + } + return &a; +} @@ -0,0 +1,7 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "types.h" + +ast genast(lexobj lexes[4096], int *lexcount); @@ -0,0 +1,3 @@ +int main() { + return 0; +} @@ -0,0 +1,43 @@ +#ifndef __TYPES_H_ +#define __TYPES_H_ + +typedef enum parsetypes { + NILPARSETYPE, + FUNCTIONDEF, + FUNCEND, + MATHEXPR, + MEMEXPR, + KEYWORDEXPR, +} parsetypes; + +typedef enum lextypes { + NILLEXTYPE, + INTLIT, + KEYWORD, + NAME, + OPENCURLY = '{', + CLOSECURLY = '}', + OPENBRACE = '(', + CLOSEBRACE = ')', + SEMICOLON = ';', + ADD = '+', + SUB = '-', + DIV = '/', + MUL = '*', +} lextypes; + +typedef struct lexobj { + lextypes t; + char *data; +} lexobj; + +typedef struct ast { + char *function; + int childcount; + union { + struct ast *children[8]; + char *litteralchildren[8]; + }; +} ast; + +#endif |