From af780dc32cbc9d9a40bec1e2ea538e71001c36aa Mon Sep 17 00:00:00 2001 From: thing1 Date: Fri, 14 Mar 2025 17:37:27 +0000 Subject: init commit --- .gitignore | 2 ++ Makefile | 13 ++++++++ cc.c | 33 ++++++++++++++++++++ lex.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lex.h | 13 ++++++++ parse.c | 39 +++++++++++++++++++++++ parse.h | 7 +++++ test.c | 3 ++ types.h | 43 ++++++++++++++++++++++++++ 9 files changed, 255 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 cc.c create mode 100644 lex.c create mode 100644 lex.h create mode 100644 parse.c create mode 100644 parse.h create mode 100644 test.c create mode 100644 types.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..47c460a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +cc diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e1233e9 --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +CFLAGS=-ggdb + +SRC := lex.c parse.c cc.c +OBJ := ${SRC:.c=.o} + +all: cc + +.c.o: + cc -c ${CFLAGS} $< +cc: ${OBJ} + cc -o $@ ${OBJ} +clean: + rm -rf *.o cc diff --git a/cc.c b/cc.c new file mode 100644 index 0000000..0a38f0e --- /dev/null +++ b/cc.c @@ -0,0 +1,33 @@ +#include +#include +#include + +#include "lex.h" +#include "parse.h" +#include "types.h" + +int main() { + FILE *f = fopen("test.c", "r"); + + lexobj lexes[4096]; + int lexcount = 0; + + char *line = malloc(256); + while (fgets(line, 256, f) != NULL) { + stripwhitespace(line); + while (line[0] != 0) { + lexes[lexcount] = lex(line); + lexcount++; + line = saveptr; + } + char *line = malloc(256); + } + + ast exprs[4096]; + int exprcount; + + while (lexcount > 0) { + exprs[exprcount] = genast(lexes, &lexcount); + exprcount++; + } +} diff --git a/lex.c b/lex.c new file mode 100644 index 0000000..db16db0 --- /dev/null +++ b/lex.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include + +#include "types.h" + +#define size(arr) sizeof(arr) / sizeof(arr[0]) + +char *keywords[] = { + "int", + "char", + "return", + "for", + "while", + "if", + "else", + "elif" +}; + + +char *saveptr = NULL; + +int lexchar(lexobj *l, char *line, char c) { + if (line[0] == c) { + l->data = NULL; + l->t = c; + saveptr = line + 1; + return 1; + } + return 0; +} + +int lexchars(lexobj *l, char *line, char *cs) { + for (int i = 0; i < strlen(cs); i++) { + if (lexchar(l, line, cs[i])) return 1; + } + return 0; +} + +lexobj lex(char *line) { +removepadding: + if (line[0] == ' ') line++; + if (line[0] == ' ') goto removepadding; + + static lexobj l; + + if (isdigit(line[0])) { + static char num[256]; + int ptr = 0; + while (isdigit(line[0])) { + num[ptr] = line[0]; + ptr++; + line++; + } + num[ptr] = 0; + saveptr = line; + l.data = num; + l.t = INTLIT; + return l; + } + + if (lexchars(&l, line, "(){};+-*/")) return l; + + for (int i = 0; i < size(keywords); i++) { + if (strstr(line, keywords[i]) == line) { + line += strlen(keywords[i]); + saveptr = line; + if (line[0] != ' ') goto retry; + l.data = keywords[i]; + l.t = KEYWORD; + return l; + } +retry: + continue; + } + + char *data = strchr(line, '('); + *data = 0; + l.data = strdup(line); + line += data - line; + *data = '('; + saveptr = line; + + return l; +} + +void stripwhitespace(char *line) { + char *out = malloc(256); + int ptr = 0; + for (int i = 0; i < strlen(line); i++) { + if (line[i] == '\t' || line[i] == '\n' + || line[i] == '\v') continue; + else { + out[ptr] = line[i]; + ptr++; + } + } + out[ptr] = 0; + + memcpy(line, out, 256); +} diff --git a/lex.h b/lex.h new file mode 100644 index 0000000..26bac2c --- /dev/null +++ b/lex.h @@ -0,0 +1,13 @@ +#include +#include +#include +#include + +#include "types.h" + +extern char *saveptr; + +int lexchar(lexobj *l, char *line, char c); +int lexchars(lexobj *l, char *line, char *cs); +lexobj lex(char *line); +void stripwhitespace(char *line); diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..94623a5 --- /dev/null +++ b/parse.c @@ -0,0 +1,39 @@ +#include +#include +#include + +#include "types.h" + +parsetypes prevtype = NILPARSETYPE; + +ast *genast(lexobj lexes[4096], int *lexcount) { + static ast a; + switch (prevtype) { + case NILPARSETYPE: + case FUNCEND: + a.function = lexes[*lexcount + 1].data; // function name + a.litteralchildren[0] = lexes[*lexcount].data; // function ret type + *lexcount += 2; + a.childcount++; + while (lexes[*lexcount].t != CLOSEBRACE) { + a.litteralchildren[a.childcount] = lexes[*lexcount].data; + *lexcount += 1; + a.childcount++; + } + *lexcount += 1; // move the lexptr to the start of the functions code + prevtype = FUNCTIONDEF; + return &a; + case FUNCTIONDEF: + a.function = lexes[*lexcount].data; + *lexcount += 1; + if (lexes[*lexcount].t == INTLIT) { + a.litteralchildren[0] = lexes[*lexcount].data; + *lexcount += 1; + return &a; + } + else { + a.children[0] = genast(lexes, lexcount); + } + } + return &a; +} diff --git a/parse.h b/parse.h new file mode 100644 index 0000000..044657b --- /dev/null +++ b/parse.h @@ -0,0 +1,7 @@ +#include +#include +#include + +#include "types.h" + +ast genast(lexobj lexes[4096], int *lexcount); diff --git a/test.c b/test.c new file mode 100644 index 0000000..cb3f748 --- /dev/null +++ b/test.c @@ -0,0 +1,3 @@ +int main() { + return 0; +} diff --git a/types.h b/types.h new file mode 100644 index 0000000..75aa773 --- /dev/null +++ b/types.h @@ -0,0 +1,43 @@ +#ifndef __TYPES_H_ +#define __TYPES_H_ + +typedef enum parsetypes { + NILPARSETYPE, + FUNCTIONDEF, + FUNCEND, + MATHEXPR, + MEMEXPR, + KEYWORDEXPR, +} parsetypes; + +typedef enum lextypes { + NILLEXTYPE, + INTLIT, + KEYWORD, + NAME, + OPENCURLY = '{', + CLOSECURLY = '}', + OPENBRACE = '(', + CLOSEBRACE = ')', + SEMICOLON = ';', + ADD = '+', + SUB = '-', + DIV = '/', + MUL = '*', +} lextypes; + +typedef struct lexobj { + lextypes t; + char *data; +} lexobj; + +typedef struct ast { + char *function; + int childcount; + union { + struct ast *children[8]; + char *litteralchildren[8]; + }; +} ast; + +#endif -- cgit v1.2.3