#include "tokenizer.h" #include #include static StringView StringViewOfNumberTillNextNonDigit(StringView* source) { StringView stringViewOfNumber = StringView_Slice(*source, 0, 0); while (source->length != 0 && isdigit(source->source[0])) { *source = StringView_Drop(*source, 1); stringViewOfNumber.length ++; } return stringViewOfNumber; } static int64_t* _StringView_FoldInt64(char c, int64_t* i) { *i = *i * 10 + (c - '0'); return i; } static double* _StringView_FoldDouble(char c, double* d) { *d = *d * 10 + (c - '0'); return d; } static Token _Tokenizer_ParseInt64(bool negative, StringView integerPart) { int64_t theInt64 = 0; StringView_FoldLeft(integerPart, &theInt64, (StringViewFoldFunction) _StringView_FoldInt64); return (Token) { .type = TOKENTYPE_INTEGER, .get = { .integer = theInt64 * (negative ? -1 : 1) } }; } static Token _Tokenizer_ParseDouble(bool negative, StringView integerPart, StringView decimalPart) { double theDouble = 0.0; StringView_FoldLeft(integerPart, &theDouble, (StringViewFoldFunction) _StringView_FoldDouble); double theDecimal = 0.0; StringView_FoldLeft(decimalPart, &theDecimal, (StringViewFoldFunction) _StringView_FoldDouble); double result = (negative ? -1 : 1) * (theDouble + theDecimal / pow(10.0, decimalPart.length)); return (Token) { .type = TOKENTYPE_DOUBLE, .get = { .decimal = result } }; } static Token _Tokenizer_NumberToken(StringView* source) { bool negative = false; if (StringView_StartsWith(*source, StringView_FromString("-"))) { negative = true; *source = StringView_Drop(*source, 1); } StringView integerPart = StringViewOfNumberTillNextNonDigit(source); bool has_point = false; if (source->length != 0 && source->source[0] == '.') { *source = StringView_Drop(*source, 1); has_point = true; } StringView decimalPart = StringViewOfNumberTillNextNonDigit(source); if (has_point) { return _Tokenizer_ParseDouble(negative, integerPart, decimalPart); } else { return _Tokenizer_ParseInt64(negative, integerPart); } } static bool _Tokenizer_IdentifierLetter(char c) { return isalnum(c); } static Token _Tokenizer_IdentifierToken(StringView* source) { StringView identifier = StringView_TakeWhile(*source, _Tokenizer_IdentifierLetter); *source = StringView_Drop(*source, identifier.length); return (Token) { .type = TOKENTYPE_IDENTIFIER, .get = { .identifier = identifier, } }; } static Token _Tokenizer_SimpleToken(StringView* source) { const char* literal_table[] = { "{", "}", "&", ":", "+", "->", "-", "*", "/", "|", "==", "!=", "<", "<=", ">", ">=", ",", ";" }; const enum TokenType type_table[] = { TOKENTYPE_LEFT_BRACE, TOKENTYPE_RIGHT_BRACE, TOKENTYPE_AMPERSAND, TOKENTYPE_COLON, TOKENTYPE_PLUS, TOKENTYPE_ARROW, TOKENTYPE_MINUS, TOKENTYPE_MULTIPLY, TOKENTYPE_DIVIDE, TOKENTYPE_PIPE, TOKENTYPE_EQUALITY, TOKENTYPE_INEQUALITY, TOKENTYPE_LESSTHAN, TOKENTYPE_LESSEQUAL, TOKENTYPE_GREATERTHAN, TOKENTYPE_GREATEREQUAL, TOKENTYPE_COMMA, TOKENTYPE_SEMICOLON, }; for (size_t i = 0; i < sizeof(literal_table) / sizeof(literal_table[0]); i++) { StringView literal_view = StringView_FromString(literal_table[i]); if (StringView_StartsWith(*source, literal_view)) { *source = StringView_Drop(*source, literal_view.length); return (Token) { .type = type_table[i], .get = { .identifier = STRINGVIEW_NONE } }; } } return TOKEN_NONE; } Token Tokenizer_NextToken(StringView* source) { while (source->length != 0 && isspace(source->source[0])) { 0[source] = StringView_Slice(*source, 1, source->length); } if (source->length == 0) { return TOKEN_NONE; } { Token simple_token = _Tokenizer_SimpleToken(source); if (simple_token.type != TOKENTYPE_NONE) { return simple_token; } } if (isdigit(source->source[0]) || StringView_StartsWith(*source, StringView_FromString("-"))) { // parse int/double return _Tokenizer_NumberToken(source); } else if (isalpha(source->source[0])) { // parse name return _Tokenizer_IdentifierToken(source); } else { return (Token) {.type = TOKENTYPE_ERROR, .get = {.error = *source } }; } } const char* TokenType_ToString(enum TokenType type) { switch (type) { case TOKENTYPE_NONE: return "TOKENTYPE_NONE"; case TOKENTYPE_INTEGER: return "TOKENTYPE_INTEGER"; case TOKENTYPE_DOUBLE: return "TOKENTYPE_DOUBLE"; case TOKENTYPE_IDENTIFIER: return "TOKENTYPE_IDENTIFIER"; case TOKENTYPE_LEFT_BRACE: return "TOKENTYPE_LEFT_BRACE"; case TOKENTYPE_RIGHT_BRACE: return "TOKENTYPE_RIGHT_BRACE"; case TOKENTYPE_AMPERSAND: return "TOKENTYPE_AMPERSAND"; case TOKENTYPE_PLUS: return "TOKENTYPE_PLUS"; case TOKENTYPE_MINUS: return "TOKENTYPE_MINUS"; case TOKENTYPE_MULTIPLY: return "TOKENTYPE_MULTIPLY"; case TOKENTYPE_DIVIDE: return "TOKENTYPE_DIVIDE"; case TOKENTYPE_PIPE: return "TOKENTYPE_PIPE"; case TOKENTYPE_ARROW: return "TOKENTYPE_ARROW"; case TOKENTYPE_COLON: return "TOKENTYPE_COLON"; case TOKENTYPE_ERROR: return "TOKENTYPE_ERROR"; case TOKENTYPE_EQUALITY: return "TOKENTYPE_EQUALITY"; case TOKENTYPE_INEQUALITY: return "TOKENTYPE_INEQUALITY"; case TOKENTYPE_LESSTHAN: return "TOKENTYPE_LESSTHAN"; case TOKENTYPE_LESSEQUAL: return "TOKENTYPE_LESSEQUAL"; case TOKENTYPE_GREATERTHAN: return "TOKENTYPE_GREATERTHAN"; case TOKENTYPE_GREATEREQUAL: return "TOKENTYPE_GREATEREQUAL"; case TOKENTYPE_COMMA: return "TOKENTYPE_COMMA"; case TOKENTYPE_SEMICOLON: return "TOKENTYPE_SEMICOLON"; } return "INVALID"; }