Literal matching working again!

This commit is contained in:
VegOwOtenks 2024-06-13 16:50:37 +02:00
parent a2317836fa
commit e6064bd375

View file

@ -8,8 +8,10 @@
typedef struct RegexMatcher_s { typedef struct RegexMatcher_s {
RegexMatchThreadGroup top_group; RegexMatchThreadGroup top_group;
DynamicArray visitor_stack;
size_t depth;
RegexMatchThread finished_threads; RegexMatchThread finished_thread;
} RegexMatcher; } RegexMatcher;
static int RegexMatchThread_New(RegexMatchThread* thread, StringView string, size_t index, Regex* regex) static int RegexMatchThread_New(RegexMatchThread* thread, StringView string, size_t index, Regex* regex)
@ -275,7 +277,7 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors)
visitors, visitors,
visitors->reserved - 1 visitors->reserved - 1
); );
RegexMatchThreadGroup* group = (void*) current_visitor->type; RegexMatchThreadGroup* group = current_visitor->as.group;
if (DynamicArray_GetLength(&group->threads) != 0) { if (DynamicArray_GetLength(&group->threads) != 0) {
// Append first sub thread to visitor stack // Append first sub thread to visitor stack
@ -312,6 +314,54 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors)
return; return;
} }
static int _RegexMatcher_Create(RegexMatcher* matcher, Regex* regex, StringView string, size_t start)
{
int group_code = RegexMatchThreadGroup_Create(
&matcher->top_group,
0,
regex->machine_memory.allocator
);
if (group_code) {
return group_code;
}
int stack_code = DynamicArray_Create(
&matcher->visitor_stack,
sizeof(struct GroupVisitor),
16,
regex->machine_memory.allocator
);
if (stack_code) {
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
return stack_code;
}
RegexMatchThread* root_thread = _RegexMatchThreadGroup_NewThread(&matcher->top_group);
if (root_thread == NULL) {
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
DynamicArray_Destroy(&matcher->visitor_stack);
return ENOMEM;
}
if (RegexMatchThread_New(root_thread, string, start, regex)) {
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
DynamicArray_Destroy(&matcher->visitor_stack);
return ENOMEM;
}
matcher->depth = 1;
memset(&matcher->finished_thread, 0, sizeof(matcher->finished_thread));
return EXIT_SUCCESS;
}
static void _RegexMatcher_Destroy(RegexMatcher* matcher)
{
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
DynamicArray_Destroy(&matcher->visitor_stack);
memset(matcher, 0, sizeof(*matcher));
}
static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, size_t* depth, RegexMachineStateBase* new_head) static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, size_t* depth, RegexMachineStateBase* new_head)
{ {
RegexMatchThread* child; RegexMatchThread* child;
@ -384,6 +434,7 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz
// remove from old group // remove from old group
_RegexMatchThreadGroup_ForgetThread(parent_group, *parent); _RegexMatchThreadGroup_ForgetThread(parent_group, *parent);
parent_visitor->position--;
} }
if (child == NULL) { if (child == NULL) {
@ -396,18 +447,19 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz
} }
child->machine_head = new_head; child->machine_head = new_head;
child->number = parent->number + DynamicArray_GetLength(&parent_group.threads) - 1; child->number = (*parent)->number
+ DynamicArray_GetLength(&parent_group->threads) - 1;
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
static int _HandleOption(DynamicArray* visitors, size_t* depth) static int _HandleOption(RegexMatcher* matcher)
{ {
struct GroupVisitor* visitor = DynamicArray_GetPointer( struct GroupVisitor* visitor = DynamicArray_GetPointer(
visitors, &matcher->visitor_stack,
visitors->reserved - 1 DynamicArray_GetLength(&matcher->visitor_stack)
); );
RegexMatchThread* thread = (void*) visitor->type; RegexMatchThread* thread = visitor->as.thread;
RegexMachineStateBase* first_head = thread->machine_head; RegexMachineStateBase* first_head = thread->machine_head;
{ {
@ -420,8 +472,8 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth)
int child_code = _NewRegexChild( int child_code = _NewRegexChild(
&thread, &thread,
visitors, &matcher->visitor_stack,
depth, &matcher->depth,
option_head->next option_head->next
); );
if (child_code) { if (child_code) {
@ -435,13 +487,37 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth)
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
static int _AdvanceThread(DynamicArray* visitors, size_t* depth) static void _TryFinishThread(RegexMatcher* matcher, RegexMatchThread* thread)
{
struct GroupVisitor* parent_visitor = DynamicArray_GetPointer(
&matcher->visitor_stack,
DynamicArray_GetLength(&matcher->visitor_stack) - 2
);
RegexMatchThreadGroup* parent_group = parent_visitor->as.group;
size_t thread_index = DynamicArray_FindFunctionLinear(
&parent_group->threads,
(DynamicArrayLinearFindFunction) _FindThreadLinear,
thread
);
if (thread_index == 0
&& DynamicArray_GetLength(&matcher->visitor_stack) == 2) {
// Only the first thread in the top-most group is finishable
// All other threads have a lower priority, this is because
// of the order they have to be matched in.
matcher->finished_thread = *thread;
_RegexMatchThreadGroup_ForgetThread(parent_group, thread);
matcher->visitor_stack.reserved = 0;
}
}
static int _AdvanceThread(RegexMatcher* matcher)
{ {
struct GroupVisitor* visitor = DynamicArray_GetPointer( struct GroupVisitor* visitor = DynamicArray_GetPointer(
visitors, &matcher->visitor_stack,
visitors->reserved - 1 DynamicArray_GetLength(&matcher->visitor_stack) - 1
); );
RegexMatchThread* thread = (void*) visitor->type; RegexMatchThread* thread = visitor->as.thread;
int code; int code;
bool discard; bool discard;
@ -453,7 +529,7 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
code = _HandleGroup(thread); code = _HandleGroup(thread);
break; break;
case REGEXMACHINESTATETYPE_OPTION: case REGEXMACHINESTATETYPE_OPTION:
code = _HandleOption(visitors, depth); code = _HandleOption(matcher);
break; break;
case REGEXMACHINESTATETYPE_REPEAT: case REGEXMACHINESTATETYPE_REPEAT:
code = _HandleRepeat(thread); code = _HandleRepeat(thread);
@ -467,10 +543,12 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
} }
if (thread->machine_head == NULL) { if (thread->machine_head == NULL) {
// Match done _TryFinishThread(matcher, thread);
} }
// Remove from visitor stack
DynamicArray_Remove(&matcher->visitor_stack, matcher->visitor_stack.reserved - 1);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
@ -509,85 +587,132 @@ static void _DestroyThreadHierarchy(DynamicArray* visitors, RegexMatchThreadGrou
return; return;
} }
static int _HandleAdvanceVisitor(DynamicArray* visitor_stack, size_t* depth) static int _AdvanceGroup(RegexMatcher* matcher)
{ {
struct GroupVisitor* current_visitor = DynamicArray_GetPointer( struct GroupVisitor* visitor = DynamicArray_GetPointer(
visitor_stack, &matcher->visitor_stack,
visitor_stack->reserved - 1 DynamicArray_GetLength(&matcher->visitor_stack) - 1
); );
RegexMatchThreadGroup* group = visitor->as.group;
visitor->position += 1;
switch (current_visitor->type) { if (DynamicArray_GetLength(&group->threads) > visitor->position) {
case REGEXMATCHTHREADTYPE_THREAD: struct GroupVisitor* child_visitor;
_AdvanceThread( DynamicArray_AppendEmpty(
visitor_stack, &matcher->visitor_stack,
depth (void**) &child_visitor
);
child_visitor->type = REGEXMATCHTHREADTYPE_THREAD;
child_visitor->as.thread = DynamicArray_GetPointer(&group->threads, visitor->position);
} else {
// calculate position in subgroup array
unsigned int position = visitor->position
- DynamicArray_GetLength(&group->threads);
if (DynamicArray_GetLength(&group->subgroups) == position) {
// Remove this group
DynamicArray_Remove(
&matcher->visitor_stack,
matcher->visitor_stack.reserved - 1
); );
break; } else {
case REGEXMATCHTHREADTYPE_GROUP: // Add subgroup
break; struct GroupVisitor* sgroup_visitor;
DynamicArray_AppendEmpty(
&matcher->visitor_stack,
(void**) &sgroup_visitor
);
sgroup_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
sgroup_visitor->position = -1;
sgroup_visitor->as.group = DynamicArray_GetPointer(
&group->subgroups,
position
);
}
} }
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
static int _WalkThreads(DynamicArray* visitor_stack, RegexMatchThreadGroup* group, size_t* depth) static int _HandleAdvanceVisitor(RegexMatcher* matcher)
{
struct GroupVisitor* current_visitor = DynamicArray_GetPointer(
&matcher->visitor_stack,
DynamicArray_GetLength(&matcher->visitor_stack) - 1
);
int advance_code = EXIT_SUCCESS;
switch (current_visitor->type) {
case REGEXMATCHTHREADTYPE_THREAD:
advance_code = _AdvanceThread(matcher);
break;
case REGEXMATCHTHREADTYPE_GROUP:
advance_code = _AdvanceGroup(matcher);
break;
}
return advance_code;
}
static int _WalkThreads(RegexMatcher* matcher)
{ {
{ {
struct GroupVisitor* zero_visitor; struct GroupVisitor* zero_visitor;
DynamicArray_AppendEmpty( DynamicArray_AppendEmpty(
visitor_stack, &matcher->visitor_stack,
(void**) &zero_visitor (void**) &zero_visitor
); );
zero_visitor->type = REGEXMATCHTHREADTYPE_GROUP; zero_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
zero_visitor->as.group = group; zero_visitor->as.group = &matcher->top_group;
zero_visitor->position = -1;
} }
while (DynamicArray_GetLength(visitor_stack) != 0) { while (DynamicArray_GetLength(&matcher->visitor_stack) != 0) {
int advance_code = _HandleAdvanceVisitor( int advance_code = _HandleAdvanceVisitor(matcher);
visitor_stack,
depth
);
if (advance_code) return advance_code; if (advance_code) return advance_code;
} }
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
static bool _HaveFinishedThread(RegexMatcher* matcher)
{
return matcher->finished_thread.regex != NULL;
}
static bool _HaveThreadsRunning(RegexMatcher* matcher)
{
return matcher->top_group.subgroups.reserved != 0
|| matcher->top_group.threads.reserved != 0;
}
int Regex_MatchHere(Regex* regex, StringView string, size_t start, RegexMatch* match) int Regex_MatchHere(Regex* regex, StringView string, size_t start, RegexMatch* match)
{ {
int return_code = EXIT_SUCCESS; int return_code = EXIT_SUCCESS;
// initialize variables etc.... RegexMatcher matcher;
DynamicArray visitor_stack; if (_RegexMatcher_Create(&matcher, regex, string, start)) {
if (DynamicArray_Create( return EXIT_FAILURE;
&visitor_stack,
sizeof(struct GroupVisitor),
16,
regex->machine_memory.allocator)
) {
return ENOMEM;
} }
RegexMatchThreadGroup top_level_group; while (! _HaveFinishedThread(&matcher)
RegexMatchThreadGroup_Create(&top_level_group, 0, regex->machine_memory.allocator); && _HaveThreadsRunning(&matcher)) {
size_t depth = 1; _WalkThreads(&matcher);
{
RegexMatchThread* first_thread;
first_thread = RegexMatchThreadGroup_NewThread(&top_level_group);
first_head->number = 0;
} }
bool haveFinishedThread = false; if (_HaveFinishedThread(&matcher)) {
while (! haveFinishedThread) { *match = matcher.finished_thread.match;
_WalkThreads(&visitor_stack, &top_level_group, &depth); memset(
&matcher.finished_thread.match,
0,
sizeof(matcher.finished_thread.match)
);
RegexMatchThread_Del(&matcher.finished_thread);
} }
defer_tl_group: _DestroyThreadHierarchy(&matcher.visitor_stack, &matcher.top_group);
_DestroyThreadHierarchy(&visitor_stack, &top_level_group); _RegexMatcher_Destroy(&matcher);
defer_stack:
DynamicArray_Destroy(&visitor_stack);
return return_code; return return_code;
} }