summaryrefslogtreecommitdiff
path: root/comp/lucas-standen-NEA/code2/tokenizer.c
blob: 6275a7a0bc38784f30eff643d2a3d3973bca62d4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include <stdlib.h>
#include <stdio.h>
#include<string.h>

#include "util.h"

typedef struct astNode {
	char *func;
	char *args[8];
	struct astNode *children[8];
} astNode;

int readuntil(char *src, char c, char *dst){ // returns how many chars read, will read until 
					     // the end of an expression, not the first 
					     // occurence
	int ptr = 0;
	int depth = 0;
	int i = 0;

	while ((src[i] != c || depth != 0) && src[i] != '\0'){
		if (c == ')'){
			if (src[i] == '('){
				depth++;
			}else if (src[i] == ')'){
				depth--;
			}
		}
		dst[ptr] = src[i];
		ptr++;
		i++;
	}
	
	dst[ptr] = '\0';

	return i;
}


astNode *tokenize(char *line){ // asume the first set of brackets have been stripped
	astNode *head = malloc(sizeof(astNode));
	head->func = NULL;
	for (int i = 0; i < 8; i++){
		head->args[i] = NULL;
		head->children[i] = NULL;
	}	

	int depth = 0;
	int argCount = 0;
	int i = 0;

top:
	for (;i < strlen(line); i++){
		char *chunk = malloc(strlen(line));
		if (line[i] == ')'){ 
			i++; 
			goto top;
		}
		if (line[i] == '('){
			i++;
			i += readuntil(&line[i], ')', chunk); // reads a nested function
			i++;
			head->children[argCount] = tokenize(chunk);
			argCount++;
		}else { 
			i += readuntil(&line[i], ' ', chunk); // reads func name or arg
			if (head->func == NULL){
				head->func = chunk;
			} else{
				head->args[argCount] = chunk;
				argCount++;
			}
		}
	}

	return head;
}