Literal matching working again!

This commit is contained in:
VegOwOtenks 2024-06-13 16:50:37 +02:00
parent a2317836fa
commit e6064bd375

View file

@ -8,8 +8,10 @@
typedef struct RegexMatcher_s {
RegexMatchThreadGroup top_group;
DynamicArray visitor_stack;
size_t depth;
RegexMatchThread finished_threads;
RegexMatchThread finished_thread;
} RegexMatcher;
static int RegexMatchThread_New(RegexMatchThread* thread, StringView string, size_t index, Regex* regex)
@ -275,7 +277,7 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors)
visitors,
visitors->reserved - 1
);
RegexMatchThreadGroup* group = (void*) current_visitor->type;
RegexMatchThreadGroup* group = current_visitor->as.group;
if (DynamicArray_GetLength(&group->threads) != 0) {
// Append first sub thread to visitor stack
@ -312,6 +314,54 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors)
return;
}
static int _RegexMatcher_Create(RegexMatcher* matcher, Regex* regex, StringView string, size_t start)
{
int group_code = RegexMatchThreadGroup_Create(
&matcher->top_group,
0,
regex->machine_memory.allocator
);
if (group_code) {
return group_code;
}
int stack_code = DynamicArray_Create(
&matcher->visitor_stack,
sizeof(struct GroupVisitor),
16,
regex->machine_memory.allocator
);
if (stack_code) {
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
return stack_code;
}
RegexMatchThread* root_thread = _RegexMatchThreadGroup_NewThread(&matcher->top_group);
if (root_thread == NULL) {
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
DynamicArray_Destroy(&matcher->visitor_stack);
return ENOMEM;
}
if (RegexMatchThread_New(root_thread, string, start, regex)) {
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
DynamicArray_Destroy(&matcher->visitor_stack);
return ENOMEM;
}
matcher->depth = 1;
memset(&matcher->finished_thread, 0, sizeof(matcher->finished_thread));
return EXIT_SUCCESS;
}
static void _RegexMatcher_Destroy(RegexMatcher* matcher)
{
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
DynamicArray_Destroy(&matcher->visitor_stack);
memset(matcher, 0, sizeof(*matcher));
}
static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, size_t* depth, RegexMachineStateBase* new_head)
{
RegexMatchThread* child;
@ -384,6 +434,7 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz
// remove from old group
_RegexMatchThreadGroup_ForgetThread(parent_group, *parent);
parent_visitor->position--;
}
if (child == NULL) {
@ -396,18 +447,19 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz
}
child->machine_head = new_head;
child->number = parent->number + DynamicArray_GetLength(&parent_group.threads) - 1;
child->number = (*parent)->number
+ DynamicArray_GetLength(&parent_group->threads) - 1;
return EXIT_SUCCESS;
}
static int _HandleOption(DynamicArray* visitors, size_t* depth)
static int _HandleOption(RegexMatcher* matcher)
{
struct GroupVisitor* visitor = DynamicArray_GetPointer(
visitors,
visitors->reserved - 1
&matcher->visitor_stack,
DynamicArray_GetLength(&matcher->visitor_stack)
);
RegexMatchThread* thread = (void*) visitor->type;
RegexMatchThread* thread = visitor->as.thread;
RegexMachineStateBase* first_head = thread->machine_head;
{
@ -420,8 +472,8 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth)
int child_code = _NewRegexChild(
&thread,
visitors,
depth,
&matcher->visitor_stack,
&matcher->depth,
option_head->next
);
if (child_code) {
@ -435,13 +487,37 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth)
return EXIT_SUCCESS;
}
static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
static void _TryFinishThread(RegexMatcher* matcher, RegexMatchThread* thread)
{
struct GroupVisitor* parent_visitor = DynamicArray_GetPointer(
&matcher->visitor_stack,
DynamicArray_GetLength(&matcher->visitor_stack) - 2
);
RegexMatchThreadGroup* parent_group = parent_visitor->as.group;
size_t thread_index = DynamicArray_FindFunctionLinear(
&parent_group->threads,
(DynamicArrayLinearFindFunction) _FindThreadLinear,
thread
);
if (thread_index == 0
&& DynamicArray_GetLength(&matcher->visitor_stack) == 2) {
// Only the first thread in the top-most group is finishable
// All other threads have a lower priority, this is because
// of the order they have to be matched in.
matcher->finished_thread = *thread;
_RegexMatchThreadGroup_ForgetThread(parent_group, thread);
matcher->visitor_stack.reserved = 0;
}
}
static int _AdvanceThread(RegexMatcher* matcher)
{
struct GroupVisitor* visitor = DynamicArray_GetPointer(
visitors,
visitors->reserved - 1
&matcher->visitor_stack,
DynamicArray_GetLength(&matcher->visitor_stack) - 1
);
RegexMatchThread* thread = (void*) visitor->type;
RegexMatchThread* thread = visitor->as.thread;
int code;
bool discard;
@ -453,7 +529,7 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
code = _HandleGroup(thread);
break;
case REGEXMACHINESTATETYPE_OPTION:
code = _HandleOption(visitors, depth);
code = _HandleOption(matcher);
break;
case REGEXMACHINESTATETYPE_REPEAT:
code = _HandleRepeat(thread);
@ -467,10 +543,12 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
}
if (thread->machine_head == NULL) {
// Match done
_TryFinishThread(matcher, thread);
}
// Remove from visitor stack
DynamicArray_Remove(&matcher->visitor_stack, matcher->visitor_stack.reserved - 1);
return EXIT_SUCCESS;
}
@ -509,85 +587,132 @@ static void _DestroyThreadHierarchy(DynamicArray* visitors, RegexMatchThreadGrou
return;
}
static int _HandleAdvanceVisitor(DynamicArray* visitor_stack, size_t* depth)
static int _AdvanceGroup(RegexMatcher* matcher)
{
struct GroupVisitor* current_visitor = DynamicArray_GetPointer(
visitor_stack,
visitor_stack->reserved - 1
struct GroupVisitor* visitor = DynamicArray_GetPointer(
&matcher->visitor_stack,
DynamicArray_GetLength(&matcher->visitor_stack) - 1
);
RegexMatchThreadGroup* group = visitor->as.group;
visitor->position += 1;
switch (current_visitor->type) {
case REGEXMATCHTHREADTYPE_THREAD:
_AdvanceThread(
visitor_stack,
depth
if (DynamicArray_GetLength(&group->threads) > visitor->position) {
struct GroupVisitor* child_visitor;
DynamicArray_AppendEmpty(
&matcher->visitor_stack,
(void**) &child_visitor
);
child_visitor->type = REGEXMATCHTHREADTYPE_THREAD;
child_visitor->as.thread = DynamicArray_GetPointer(&group->threads, visitor->position);
} else {
// calculate position in subgroup array
unsigned int position = visitor->position
- DynamicArray_GetLength(&group->threads);
if (DynamicArray_GetLength(&group->subgroups) == position) {
// Remove this group
DynamicArray_Remove(
&matcher->visitor_stack,
matcher->visitor_stack.reserved - 1
);
break;
case REGEXMATCHTHREADTYPE_GROUP:
break;
} else {
// Add subgroup
struct GroupVisitor* sgroup_visitor;
DynamicArray_AppendEmpty(
&matcher->visitor_stack,
(void**) &sgroup_visitor
);
sgroup_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
sgroup_visitor->position = -1;
sgroup_visitor->as.group = DynamicArray_GetPointer(
&group->subgroups,
position
);
}
}
return EXIT_SUCCESS;
}
static int _WalkThreads(DynamicArray* visitor_stack, RegexMatchThreadGroup* group, size_t* depth)
static int _HandleAdvanceVisitor(RegexMatcher* matcher)
{
struct GroupVisitor* current_visitor = DynamicArray_GetPointer(
&matcher->visitor_stack,
DynamicArray_GetLength(&matcher->visitor_stack) - 1
);
int advance_code = EXIT_SUCCESS;
switch (current_visitor->type) {
case REGEXMATCHTHREADTYPE_THREAD:
advance_code = _AdvanceThread(matcher);
break;
case REGEXMATCHTHREADTYPE_GROUP:
advance_code = _AdvanceGroup(matcher);
break;
}
return advance_code;
}
static int _WalkThreads(RegexMatcher* matcher)
{
{
struct GroupVisitor* zero_visitor;
DynamicArray_AppendEmpty(
visitor_stack,
&matcher->visitor_stack,
(void**) &zero_visitor
);
zero_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
zero_visitor->as.group = group;
zero_visitor->as.group = &matcher->top_group;
zero_visitor->position = -1;
}
while (DynamicArray_GetLength(visitor_stack) != 0) {
int advance_code = _HandleAdvanceVisitor(
visitor_stack,
depth
);
while (DynamicArray_GetLength(&matcher->visitor_stack) != 0) {
int advance_code = _HandleAdvanceVisitor(matcher);
if (advance_code) return advance_code;
}
return EXIT_SUCCESS;
}
static bool _HaveFinishedThread(RegexMatcher* matcher)
{
return matcher->finished_thread.regex != NULL;
}
static bool _HaveThreadsRunning(RegexMatcher* matcher)
{
return matcher->top_group.subgroups.reserved != 0
|| matcher->top_group.threads.reserved != 0;
}
int Regex_MatchHere(Regex* regex, StringView string, size_t start, RegexMatch* match)
{
int return_code = EXIT_SUCCESS;
// initialize variables etc....
DynamicArray visitor_stack;
if (DynamicArray_Create(
&visitor_stack,
sizeof(struct GroupVisitor),
16,
regex->machine_memory.allocator)
) {
return ENOMEM;
RegexMatcher matcher;
if (_RegexMatcher_Create(&matcher, regex, string, start)) {
return EXIT_FAILURE;
}
RegexMatchThreadGroup top_level_group;
RegexMatchThreadGroup_Create(&top_level_group, 0, regex->machine_memory.allocator);
size_t depth = 1;
{
RegexMatchThread* first_thread;
first_thread = RegexMatchThreadGroup_NewThread(&top_level_group);
first_head->number = 0;
while (! _HaveFinishedThread(&matcher)
&& _HaveThreadsRunning(&matcher)) {
_WalkThreads(&matcher);
}
bool haveFinishedThread = false;
while (! haveFinishedThread) {
_WalkThreads(&visitor_stack, &top_level_group, &depth);
if (_HaveFinishedThread(&matcher)) {
*match = matcher.finished_thread.match;
memset(
&matcher.finished_thread.match,
0,
sizeof(matcher.finished_thread.match)
);
RegexMatchThread_Del(&matcher.finished_thread);
}
defer_tl_group:
_DestroyThreadHierarchy(&visitor_stack, &top_level_group);
defer_stack:
DynamicArray_Destroy(&visitor_stack);
_DestroyThreadHierarchy(&matcher.visitor_stack, &matcher.top_group);
_RegexMatcher_Destroy(&matcher);
return return_code;
}