From e6064bd375f3567756218ee6cd2d6fa9f3287944 Mon Sep 17 00:00:00 2001 From: VegOwOtenks Date: Thu, 13 Jun 2024 16:50:37 +0200 Subject: [PATCH] Literal matching working again! --- src/regex/match.c | 245 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 185 insertions(+), 60 deletions(-) diff --git a/src/regex/match.c b/src/regex/match.c index 18622df..f6c3913 100644 --- a/src/regex/match.c +++ b/src/regex/match.c @@ -8,8 +8,10 @@ typedef struct RegexMatcher_s { RegexMatchThreadGroup top_group; + DynamicArray visitor_stack; + size_t depth; - RegexMatchThread finished_threads; + RegexMatchThread finished_thread; } RegexMatcher; static int RegexMatchThread_New(RegexMatchThread* thread, StringView string, size_t index, Regex* regex) @@ -275,7 +277,7 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors) visitors, visitors->reserved - 1 ); - RegexMatchThreadGroup* group = (void*) current_visitor->type; + RegexMatchThreadGroup* group = current_visitor->as.group; if (DynamicArray_GetLength(&group->threads) != 0) { // Append first sub thread to visitor stack @@ -312,6 +314,54 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors) return; } +static int _RegexMatcher_Create(RegexMatcher* matcher, Regex* regex, StringView string, size_t start) +{ + int group_code = RegexMatchThreadGroup_Create( + &matcher->top_group, + 0, + regex->machine_memory.allocator + ); + if (group_code) { + return group_code; + } + + int stack_code = DynamicArray_Create( + &matcher->visitor_stack, + sizeof(struct GroupVisitor), + 16, + regex->machine_memory.allocator + ); + if (stack_code) { + RegexMatchThreadGroup_Destroy2(&matcher->top_group); + return stack_code; + } + + RegexMatchThread* root_thread = _RegexMatchThreadGroup_NewThread(&matcher->top_group); + if (root_thread == NULL) { + RegexMatchThreadGroup_Destroy2(&matcher->top_group); + DynamicArray_Destroy(&matcher->visitor_stack); + return ENOMEM; + } + if (RegexMatchThread_New(root_thread, string, start, regex)) { + RegexMatchThreadGroup_Destroy2(&matcher->top_group); + DynamicArray_Destroy(&matcher->visitor_stack); + return ENOMEM; + } + + matcher->depth = 1; + + memset(&matcher->finished_thread, 0, sizeof(matcher->finished_thread)); + + return EXIT_SUCCESS; +} + +static void _RegexMatcher_Destroy(RegexMatcher* matcher) +{ + RegexMatchThreadGroup_Destroy2(&matcher->top_group); + DynamicArray_Destroy(&matcher->visitor_stack); + memset(matcher, 0, sizeof(*matcher)); +} + static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, size_t* depth, RegexMachineStateBase* new_head) { RegexMatchThread* child; @@ -384,6 +434,7 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz // remove from old group _RegexMatchThreadGroup_ForgetThread(parent_group, *parent); + parent_visitor->position--; } if (child == NULL) { @@ -396,18 +447,19 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz } child->machine_head = new_head; - child->number = parent->number + DynamicArray_GetLength(&parent_group.threads) - 1; + child->number = (*parent)->number + + DynamicArray_GetLength(&parent_group->threads) - 1; return EXIT_SUCCESS; } -static int _HandleOption(DynamicArray* visitors, size_t* depth) +static int _HandleOption(RegexMatcher* matcher) { struct GroupVisitor* visitor = DynamicArray_GetPointer( - visitors, - visitors->reserved - 1 + &matcher->visitor_stack, + DynamicArray_GetLength(&matcher->visitor_stack) ); - RegexMatchThread* thread = (void*) visitor->type; + RegexMatchThread* thread = visitor->as.thread; RegexMachineStateBase* first_head = thread->machine_head; { @@ -420,8 +472,8 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth) int child_code = _NewRegexChild( &thread, - visitors, - depth, + &matcher->visitor_stack, + &matcher->depth, option_head->next ); if (child_code) { @@ -435,13 +487,37 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth) return EXIT_SUCCESS; } -static int _AdvanceThread(DynamicArray* visitors, size_t* depth) +static void _TryFinishThread(RegexMatcher* matcher, RegexMatchThread* thread) +{ + struct GroupVisitor* parent_visitor = DynamicArray_GetPointer( + &matcher->visitor_stack, + DynamicArray_GetLength(&matcher->visitor_stack) - 2 + ); + RegexMatchThreadGroup* parent_group = parent_visitor->as.group; + size_t thread_index = DynamicArray_FindFunctionLinear( + &parent_group->threads, + (DynamicArrayLinearFindFunction) _FindThreadLinear, + thread + ); + + if (thread_index == 0 + && DynamicArray_GetLength(&matcher->visitor_stack) == 2) { + // Only the first thread in the top-most group is finishable + // All other threads have a lower priority, this is because + // of the order they have to be matched in. + matcher->finished_thread = *thread; + _RegexMatchThreadGroup_ForgetThread(parent_group, thread); + matcher->visitor_stack.reserved = 0; + } +} + +static int _AdvanceThread(RegexMatcher* matcher) { struct GroupVisitor* visitor = DynamicArray_GetPointer( - visitors, - visitors->reserved - 1 + &matcher->visitor_stack, + DynamicArray_GetLength(&matcher->visitor_stack) - 1 ); - RegexMatchThread* thread = (void*) visitor->type; + RegexMatchThread* thread = visitor->as.thread; int code; bool discard; @@ -453,7 +529,7 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth) code = _HandleGroup(thread); break; case REGEXMACHINESTATETYPE_OPTION: - code = _HandleOption(visitors, depth); + code = _HandleOption(matcher); break; case REGEXMACHINESTATETYPE_REPEAT: code = _HandleRepeat(thread); @@ -467,10 +543,12 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth) } if (thread->machine_head == NULL) { - // Match done - + _TryFinishThread(matcher, thread); } + // Remove from visitor stack + DynamicArray_Remove(&matcher->visitor_stack, matcher->visitor_stack.reserved - 1); + return EXIT_SUCCESS; } @@ -509,85 +587,132 @@ static void _DestroyThreadHierarchy(DynamicArray* visitors, RegexMatchThreadGrou return; } -static int _HandleAdvanceVisitor(DynamicArray* visitor_stack, size_t* depth) +static int _AdvanceGroup(RegexMatcher* matcher) { - struct GroupVisitor* current_visitor = DynamicArray_GetPointer( - visitor_stack, - visitor_stack->reserved - 1 + struct GroupVisitor* visitor = DynamicArray_GetPointer( + &matcher->visitor_stack, + DynamicArray_GetLength(&matcher->visitor_stack) - 1 ); + RegexMatchThreadGroup* group = visitor->as.group; + visitor->position += 1; - switch (current_visitor->type) { - case REGEXMATCHTHREADTYPE_THREAD: - _AdvanceThread( - visitor_stack, - depth + if (DynamicArray_GetLength(&group->threads) > visitor->position) { + struct GroupVisitor* child_visitor; + DynamicArray_AppendEmpty( + &matcher->visitor_stack, + (void**) &child_visitor + ); + child_visitor->type = REGEXMATCHTHREADTYPE_THREAD; + child_visitor->as.thread = DynamicArray_GetPointer(&group->threads, visitor->position); + } else { + // calculate position in subgroup array + unsigned int position = visitor->position + - DynamicArray_GetLength(&group->threads); + + if (DynamicArray_GetLength(&group->subgroups) == position) { + // Remove this group + DynamicArray_Remove( + &matcher->visitor_stack, + matcher->visitor_stack.reserved - 1 ); - break; - case REGEXMATCHTHREADTYPE_GROUP: - break; + } else { + // Add subgroup + struct GroupVisitor* sgroup_visitor; + DynamicArray_AppendEmpty( + &matcher->visitor_stack, + (void**) &sgroup_visitor + ); + sgroup_visitor->type = REGEXMATCHTHREADTYPE_GROUP; + sgroup_visitor->position = -1; + sgroup_visitor->as.group = DynamicArray_GetPointer( + &group->subgroups, + position + ); + } } return EXIT_SUCCESS; } -static int _WalkThreads(DynamicArray* visitor_stack, RegexMatchThreadGroup* group, size_t* depth) +static int _HandleAdvanceVisitor(RegexMatcher* matcher) +{ + struct GroupVisitor* current_visitor = DynamicArray_GetPointer( + &matcher->visitor_stack, + DynamicArray_GetLength(&matcher->visitor_stack) - 1 + ); + + int advance_code = EXIT_SUCCESS; + switch (current_visitor->type) { + case REGEXMATCHTHREADTYPE_THREAD: + advance_code = _AdvanceThread(matcher); + break; + case REGEXMATCHTHREADTYPE_GROUP: + advance_code = _AdvanceGroup(matcher); + break; + } + + return advance_code; +} + +static int _WalkThreads(RegexMatcher* matcher) { { struct GroupVisitor* zero_visitor; DynamicArray_AppendEmpty( - visitor_stack, + &matcher->visitor_stack, (void**) &zero_visitor ); zero_visitor->type = REGEXMATCHTHREADTYPE_GROUP; - zero_visitor->as.group = group; + zero_visitor->as.group = &matcher->top_group; + zero_visitor->position = -1; } - while (DynamicArray_GetLength(visitor_stack) != 0) { - int advance_code = _HandleAdvanceVisitor( - visitor_stack, - depth - ); + while (DynamicArray_GetLength(&matcher->visitor_stack) != 0) { + int advance_code = _HandleAdvanceVisitor(matcher); if (advance_code) return advance_code; } return EXIT_SUCCESS; } +static bool _HaveFinishedThread(RegexMatcher* matcher) +{ + return matcher->finished_thread.regex != NULL; +} + +static bool _HaveThreadsRunning(RegexMatcher* matcher) +{ + return matcher->top_group.subgroups.reserved != 0 + || matcher->top_group.threads.reserved != 0; +} + int Regex_MatchHere(Regex* regex, StringView string, size_t start, RegexMatch* match) { int return_code = EXIT_SUCCESS; - // initialize variables etc.... - DynamicArray visitor_stack; - if (DynamicArray_Create( - &visitor_stack, - sizeof(struct GroupVisitor), - 16, - regex->machine_memory.allocator) - ) { - return ENOMEM; + RegexMatcher matcher; + if (_RegexMatcher_Create(&matcher, regex, string, start)) { + return EXIT_FAILURE; } - RegexMatchThreadGroup top_level_group; - RegexMatchThreadGroup_Create(&top_level_group, 0, regex->machine_memory.allocator); - size_t depth = 1; - - { - RegexMatchThread* first_thread; - first_thread = RegexMatchThreadGroup_NewThread(&top_level_group); - first_head->number = 0; + while (! _HaveFinishedThread(&matcher) + && _HaveThreadsRunning(&matcher)) { + _WalkThreads(&matcher); } - bool haveFinishedThread = false; - while (! haveFinishedThread) { - _WalkThreads(&visitor_stack, &top_level_group, &depth); + if (_HaveFinishedThread(&matcher)) { + *match = matcher.finished_thread.match; + memset( + &matcher.finished_thread.match, + 0, + sizeof(matcher.finished_thread.match) + ); + RegexMatchThread_Del(&matcher.finished_thread); } -defer_tl_group: - _DestroyThreadHierarchy(&visitor_stack, &top_level_group); -defer_stack: - DynamicArray_Destroy(&visitor_stack); + _DestroyThreadHierarchy(&matcher.visitor_stack, &matcher.top_group); + _RegexMatcher_Destroy(&matcher); return return_code; }