From b5912b95f786f97b03db1345039c49aefd7534f5 Mon Sep 17 00:00:00 2001 From: VegOwOtenks Date: Tue, 1 Oct 2024 10:53:26 +0200 Subject: [PATCH] Tokenizing and printing --- src/interpreter.c | 50 +++++++++++++++++++++++++ src/interpreter.h | 59 ++++++++++++++++++++++++++++++ src/main.c | 40 +++++++++++++++++++- src/tokenizer.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++ src/tokenizer.h | 18 +++++++++ 5 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 src/interpreter.c create mode 100644 src/interpreter.h diff --git a/src/interpreter.c b/src/interpreter.c new file mode 100644 index 0000000..227bfc3 --- /dev/null +++ b/src/interpreter.c @@ -0,0 +1,50 @@ +#include "interpreter.h" + +static int CallFrame_Create(CallFrame* self, FlupFunctionAlternative* f) +{ + if (DynamicArray_Create(&self->stack, sizeof(Value), 8, NULL)) { + return ENOMEM; + } + self->function = f; + self->instruction_pointer = f->body_token_start; + + return EXIT_SUCCESS; +} + +static FlupFunctionAlternative* FlupFunctionAlternative_Malloc(size_t condition_token_start, size_t condition_token_end, size_t body_token_start, size_t body_token_end) +{ + FlupFunctionAlternative* a = malloc(sizeof(FlupFunctionAlternative)); + a->next = NULL; + a->condition_token_start = condition_token_start; + a->condition_token_end = condition_token_end; + a->body_token_start = body_token_start; + a->body_token_end = body_token_end; + + return a; +} + +int Interpreter_Create(Interpreter* self, DynamicArray* tokens) +{ + if (DynamicArray_Create(&self->call_frames, sizeof(CallFrame), 16, NULL)) { + return ENOMEM; + } + self->tokens = tokens; + + return EXIT_SUCCESS; +} + +int Interpreter_Interpret(Interpreter* self) +{ + CallFrame* first_frame; + DynamicArray_AppendEmpty(&self->call_frames, (void**) &first_frame); + if (CallFrame_Create(first_frame, )) { + return ENOMEM; + } + + return EXIT_SUCCESS; +} + +void Interpreter_Destroy(Interpreter* self) +{ + DynamicArray_Destroy(&self->call_frames); +} diff --git a/src/interpreter.h b/src/interpreter.h new file mode 100644 index 0000000..05cb840 --- /dev/null +++ b/src/interpreter.h @@ -0,0 +1,59 @@ +#ifndef FLUP_INTERPRETER_H +#define FLUP_INTERPRETER_H + +#include "../include/utilitiec/dynamicarray/dynamicarray.h" + +#include "tokenizer.h" +#include + +typedef struct FlupFunctionAlternative_s { + size_t condition_token_start; + size_t condition_token_end; + size_t body_token_start; + size_t body_token_end; + struct FlupFunctionAlternative_s* next; +} FlupFunctionAlternative; + +typedef struct ParameterDefinition_s { + StringView name; + StringView type; +} ParameterDefinition; + +typedef struct FlupFunction_s { + StringView name; + DynamicArray argument_defs; // ParameterDefinition + StringView return_type; + FlupFunctionAlternative* alternative; +} FlupFunction; + +enum ValueType { + VALUETYPE_INT64, + VALUETYPE_DOUBLE +}; + +union ValueContent { + int64_t i64; + double f64; +}; + +typedef struct Value_s { + enum ValueType type; + union ValueContent get; +} Value; + +typedef struct CallFrame_s { + size_t instruction_pointer; + FlupFunctionAlternative* function; + DynamicArray stack; // Value +} CallFrame; + +typedef struct Interpreter_s { + DynamicArray* tokens; + DynamicArray call_frames; // stores CallFrame +} Interpreter; + +int Interpreter_Create(Interpreter* self, DynamicArray* tokens); +int Interpreter_Interpret(Interpreter* self); +void Interpreter_Destroy(Interpreter* self); + +#endif //header guard diff --git a/src/main.c b/src/main.c index 5527518..2489e08 100644 --- a/src/main.c +++ b/src/main.c @@ -22,6 +22,20 @@ #include #include "../include/utilitiec/argumentc/argumentc.h" +#include "interpreter.h" +#include "tokenizer.h" + +int tokenize_all(StringView source, DynamicArray* a) +{ + Token t; + while ((t = Tokenizer_NextToken(&source)).type != TOKENTYPE_NONE) { + int append_code = DynamicArray_Append(a, &t); + if (append_code) return append_code; + if (t.type == TOKENTYPE_ERROR) break; + } + + return EXIT_SUCCESS; +} char* load_file_string(StringView path) { @@ -59,6 +73,8 @@ char* load_file_string(StringView path) fclose(stream); + buffer[length] = '\0'; + return buffer; } @@ -72,11 +88,33 @@ int main(int argc, const char* argv []) fprintf(stderr, "Usage: [program] --file path/to/script_file\n"); return 1; } + Argumentc_Destroy(&arguments); char* script_string = load_file_string(script_file.content); if (script_string == NULL) return 1; - puts(script_string); + StringView source = StringView_FromString(script_string); + DynamicArray tokens; + if (DynamicArray_Create(&tokens, sizeof(Token), 128, NULL)) { + fprintf(stderr, "Fatal Error: Failed to create dynamicarray\n"); + return 1; + } + + if (tokenize_all(source, &tokens)) { + fprintf(stderr, "Fatal Error: Out of Memory in tokenizing\n"); + return 1; + } + + Interpreter interpreter; + Interpreter_Create(&interpreter, &tokens); + + + Interpreter_Interpret(&interpreter); + + Interpreter_Destroy(&interpreter); + DynamicArray_Destroy(&tokens); + + free(script_string); return EXIT_SUCCESS; } diff --git a/src/tokenizer.c b/src/tokenizer.c index ce12bbb..1677205 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -94,6 +94,42 @@ static Token _Tokenizer_IdentifierToken(StringView* source) }; } +static Token _Tokenizer_SimpleToken(StringView* source) +{ + const char* literal_table[] = { "{", "}", "&", ":", "+", "->", "-", "*", "/", "|", "==", "!=", "<", "<=", ">", ">=", "," }; + const enum TokenType type_table[] = { + TOKENTYPE_LEFT_BRACE, + TOKENTYPE_RIGHT_BRACE, + TOKENTYPE_AMPERSAND, + TOKENTYPE_COLON, + TOKENTYPE_PLUS, + TOKENTYPE_ARROW, + TOKENTYPE_MINUS, + TOKENTYPE_MULTIPLY, + TOKENTYPE_DIVIDE, + TOKENTYPE_PIPE, + TOKENTYPE_EQUALITY, + TOKENTYPE_INEQUALITY, + TOKENTYPE_LESSTHAN, + TOKENTYPE_LESSEQUAL, + TOKENTYPE_GREATERTHAN, + TOKENTYPE_GREATEREQUAL, + TOKENTYPE_COMMA, + }; + + for (size_t i = 0; i < sizeof(literal_table) / sizeof(literal_table[0]); i++) { + StringView literal_view = StringView_FromString(literal_table[i]); + if (StringView_StartsWith(*source, literal_view)) { + *source = StringView_Drop(*source, literal_view.length); + return (Token) { + .type = type_table[i], + .get = { .identifier = STRINGVIEW_NONE } + }; + } + } + return TOKEN_NONE; +} + Token Tokenizer_NextToken(StringView* source) { while (source->length != 0 && isspace(source->source[0])) { @@ -104,6 +140,13 @@ Token Tokenizer_NextToken(StringView* source) return TOKEN_NONE; } + { + Token simple_token = _Tokenizer_SimpleToken(source); + if (simple_token.type != TOKENTYPE_NONE) { + return simple_token; + } + } + if (isdigit(source->source[0]) || StringView_StartsWith(*source, StringView_FromString("-"))) { // parse int/double return _Tokenizer_NumberToken(source); @@ -114,3 +157,53 @@ Token Tokenizer_NextToken(StringView* source) return (Token) {.type = TOKENTYPE_ERROR, .get = {.error = *source } }; } } + +const char* TokenType_ToString(enum TokenType type) +{ + switch (type) { + case TOKENTYPE_NONE: + return "TOKENTYPE_NONE"; + case TOKENTYPE_INTEGER: + return "TOKENTYPE_INTEGER"; + case TOKENTYPE_DOUBLE: + return "TOKENTYPE_DOUBLE"; + case TOKENTYPE_IDENTIFIER: + return "TOKENTYPE_IDENTIFIER"; + case TOKENTYPE_LEFT_BRACE: + return "TOKENTYPE_LEFT_BRACE"; + case TOKENTYPE_RIGHT_BRACE: + return "TOKENTYPE_RIGHT_BRACE"; + case TOKENTYPE_AMPERSAND: + return "TOKENTYPE_AMPERSAND"; + case TOKENTYPE_PLUS: + return "TOKENTYPE_PLUS"; + case TOKENTYPE_MINUS: + return "TOKENTYPE_MINUS"; + case TOKENTYPE_MULTIPLY: + return "TOKENTYPE_MULTIPLY"; + case TOKENTYPE_DIVIDE: + return "TOKENTYPE_DIVIDE"; + case TOKENTYPE_PIPE: + return "TOKENTYPE_PIPE"; + case TOKENTYPE_ARROW: + return "TOKENTYPE_ARROW"; + case TOKENTYPE_COLON: + return "TOKENTYPE_COLON"; + case TOKENTYPE_ERROR: + return "TOKENTYPE_ERROR"; + case TOKENTYPE_EQUALITY: + return "TOKENTYPE_EQUALITY"; + case TOKENTYPE_INEQUALITY: + return "TOKENTYPE_INEQUALITY"; + case TOKENTYPE_LESSTHAN: + return "TOKENTYPE_LESSTHAN"; + case TOKENTYPE_LESSEQUAL: + return "TOKENTYPE_LESSEQUAL"; + case TOKENTYPE_GREATERTHAN: + return "TOKENTYPE_GREATERTHAN"; + case TOKENTYPE_GREATEREQUAL: + return "TOKENTYPE_GREATEREQUAL"; + case TOKENTYPE_COMMA: + return "TOKENTYPE_COMMA"; + } +} diff --git a/src/tokenizer.h b/src/tokenizer.h index acf1dc6..c794417 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -9,6 +9,23 @@ enum TokenType { TOKENTYPE_INTEGER, TOKENTYPE_DOUBLE, TOKENTYPE_IDENTIFIER, + TOKENTYPE_LEFT_BRACE, + TOKENTYPE_RIGHT_BRACE, + TOKENTYPE_AMPERSAND, + TOKENTYPE_PLUS, + TOKENTYPE_MINUS, + TOKENTYPE_MULTIPLY, + TOKENTYPE_DIVIDE, + TOKENTYPE_PIPE, + TOKENTYPE_ARROW, + TOKENTYPE_COLON, + TOKENTYPE_EQUALITY, + TOKENTYPE_INEQUALITY, + TOKENTYPE_LESSTHAN, + TOKENTYPE_LESSEQUAL, + TOKENTYPE_GREATERTHAN, + TOKENTYPE_GREATEREQUAL, + TOKENTYPE_COMMA, TOKENTYPE_ERROR, }; @@ -28,5 +45,6 @@ typedef struct Token_s { #define TOKEN_NONE ((Token) {.type = TOKENTYPE_NONE, .get = {.error = STRINGVIEW_NONE } } ) Token Tokenizer_NextToken(StringView* source); +const char* TokenType_ToString(enum TokenType type); #endif //header guard