Literal matching working again!
This commit is contained in:
parent
a2317836fa
commit
e6064bd375
1 changed files with 185 additions and 60 deletions
|
@ -8,8 +8,10 @@
|
|||
|
||||
typedef struct RegexMatcher_s {
|
||||
RegexMatchThreadGroup top_group;
|
||||
DynamicArray visitor_stack;
|
||||
size_t depth;
|
||||
|
||||
RegexMatchThread finished_threads;
|
||||
RegexMatchThread finished_thread;
|
||||
} RegexMatcher;
|
||||
|
||||
static int RegexMatchThread_New(RegexMatchThread* thread, StringView string, size_t index, Regex* regex)
|
||||
|
@ -275,7 +277,7 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors)
|
|||
visitors,
|
||||
visitors->reserved - 1
|
||||
);
|
||||
RegexMatchThreadGroup* group = (void*) current_visitor->type;
|
||||
RegexMatchThreadGroup* group = current_visitor->as.group;
|
||||
|
||||
if (DynamicArray_GetLength(&group->threads) != 0) {
|
||||
// Append first sub thread to visitor stack
|
||||
|
@ -312,6 +314,54 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors)
|
|||
return;
|
||||
}
|
||||
|
||||
static int _RegexMatcher_Create(RegexMatcher* matcher, Regex* regex, StringView string, size_t start)
|
||||
{
|
||||
int group_code = RegexMatchThreadGroup_Create(
|
||||
&matcher->top_group,
|
||||
0,
|
||||
regex->machine_memory.allocator
|
||||
);
|
||||
if (group_code) {
|
||||
return group_code;
|
||||
}
|
||||
|
||||
int stack_code = DynamicArray_Create(
|
||||
&matcher->visitor_stack,
|
||||
sizeof(struct GroupVisitor),
|
||||
16,
|
||||
regex->machine_memory.allocator
|
||||
);
|
||||
if (stack_code) {
|
||||
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
|
||||
return stack_code;
|
||||
}
|
||||
|
||||
RegexMatchThread* root_thread = _RegexMatchThreadGroup_NewThread(&matcher->top_group);
|
||||
if (root_thread == NULL) {
|
||||
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
|
||||
DynamicArray_Destroy(&matcher->visitor_stack);
|
||||
return ENOMEM;
|
||||
}
|
||||
if (RegexMatchThread_New(root_thread, string, start, regex)) {
|
||||
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
|
||||
DynamicArray_Destroy(&matcher->visitor_stack);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
matcher->depth = 1;
|
||||
|
||||
memset(&matcher->finished_thread, 0, sizeof(matcher->finished_thread));
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static void _RegexMatcher_Destroy(RegexMatcher* matcher)
|
||||
{
|
||||
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
|
||||
DynamicArray_Destroy(&matcher->visitor_stack);
|
||||
memset(matcher, 0, sizeof(*matcher));
|
||||
}
|
||||
|
||||
static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, size_t* depth, RegexMachineStateBase* new_head)
|
||||
{
|
||||
RegexMatchThread* child;
|
||||
|
@ -384,6 +434,7 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz
|
|||
|
||||
// remove from old group
|
||||
_RegexMatchThreadGroup_ForgetThread(parent_group, *parent);
|
||||
parent_visitor->position--;
|
||||
}
|
||||
|
||||
if (child == NULL) {
|
||||
|
@ -396,18 +447,19 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz
|
|||
}
|
||||
|
||||
child->machine_head = new_head;
|
||||
child->number = parent->number + DynamicArray_GetLength(&parent_group.threads) - 1;
|
||||
child->number = (*parent)->number
|
||||
+ DynamicArray_GetLength(&parent_group->threads) - 1;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int _HandleOption(DynamicArray* visitors, size_t* depth)
|
||||
static int _HandleOption(RegexMatcher* matcher)
|
||||
{
|
||||
struct GroupVisitor* visitor = DynamicArray_GetPointer(
|
||||
visitors,
|
||||
visitors->reserved - 1
|
||||
&matcher->visitor_stack,
|
||||
DynamicArray_GetLength(&matcher->visitor_stack)
|
||||
);
|
||||
RegexMatchThread* thread = (void*) visitor->type;
|
||||
RegexMatchThread* thread = visitor->as.thread;
|
||||
|
||||
RegexMachineStateBase* first_head = thread->machine_head;
|
||||
{
|
||||
|
@ -420,8 +472,8 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth)
|
|||
|
||||
int child_code = _NewRegexChild(
|
||||
&thread,
|
||||
visitors,
|
||||
depth,
|
||||
&matcher->visitor_stack,
|
||||
&matcher->depth,
|
||||
option_head->next
|
||||
);
|
||||
if (child_code) {
|
||||
|
@ -435,13 +487,37 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth)
|
|||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
|
||||
static void _TryFinishThread(RegexMatcher* matcher, RegexMatchThread* thread)
|
||||
{
|
||||
struct GroupVisitor* parent_visitor = DynamicArray_GetPointer(
|
||||
&matcher->visitor_stack,
|
||||
DynamicArray_GetLength(&matcher->visitor_stack) - 2
|
||||
);
|
||||
RegexMatchThreadGroup* parent_group = parent_visitor->as.group;
|
||||
size_t thread_index = DynamicArray_FindFunctionLinear(
|
||||
&parent_group->threads,
|
||||
(DynamicArrayLinearFindFunction) _FindThreadLinear,
|
||||
thread
|
||||
);
|
||||
|
||||
if (thread_index == 0
|
||||
&& DynamicArray_GetLength(&matcher->visitor_stack) == 2) {
|
||||
// Only the first thread in the top-most group is finishable
|
||||
// All other threads have a lower priority, this is because
|
||||
// of the order they have to be matched in.
|
||||
matcher->finished_thread = *thread;
|
||||
_RegexMatchThreadGroup_ForgetThread(parent_group, thread);
|
||||
matcher->visitor_stack.reserved = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int _AdvanceThread(RegexMatcher* matcher)
|
||||
{
|
||||
struct GroupVisitor* visitor = DynamicArray_GetPointer(
|
||||
visitors,
|
||||
visitors->reserved - 1
|
||||
&matcher->visitor_stack,
|
||||
DynamicArray_GetLength(&matcher->visitor_stack) - 1
|
||||
);
|
||||
RegexMatchThread* thread = (void*) visitor->type;
|
||||
RegexMatchThread* thread = visitor->as.thread;
|
||||
int code;
|
||||
bool discard;
|
||||
|
||||
|
@ -453,7 +529,7 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
|
|||
code = _HandleGroup(thread);
|
||||
break;
|
||||
case REGEXMACHINESTATETYPE_OPTION:
|
||||
code = _HandleOption(visitors, depth);
|
||||
code = _HandleOption(matcher);
|
||||
break;
|
||||
case REGEXMACHINESTATETYPE_REPEAT:
|
||||
code = _HandleRepeat(thread);
|
||||
|
@ -467,10 +543,12 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
|
|||
}
|
||||
|
||||
if (thread->machine_head == NULL) {
|
||||
// Match done
|
||||
|
||||
_TryFinishThread(matcher, thread);
|
||||
}
|
||||
|
||||
// Remove from visitor stack
|
||||
DynamicArray_Remove(&matcher->visitor_stack, matcher->visitor_stack.reserved - 1);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -509,85 +587,132 @@ static void _DestroyThreadHierarchy(DynamicArray* visitors, RegexMatchThreadGrou
|
|||
return;
|
||||
}
|
||||
|
||||
static int _HandleAdvanceVisitor(DynamicArray* visitor_stack, size_t* depth)
|
||||
static int _AdvanceGroup(RegexMatcher* matcher)
|
||||
{
|
||||
struct GroupVisitor* current_visitor = DynamicArray_GetPointer(
|
||||
visitor_stack,
|
||||
visitor_stack->reserved - 1
|
||||
struct GroupVisitor* visitor = DynamicArray_GetPointer(
|
||||
&matcher->visitor_stack,
|
||||
DynamicArray_GetLength(&matcher->visitor_stack) - 1
|
||||
);
|
||||
RegexMatchThreadGroup* group = visitor->as.group;
|
||||
visitor->position += 1;
|
||||
|
||||
switch (current_visitor->type) {
|
||||
case REGEXMATCHTHREADTYPE_THREAD:
|
||||
_AdvanceThread(
|
||||
visitor_stack,
|
||||
depth
|
||||
if (DynamicArray_GetLength(&group->threads) > visitor->position) {
|
||||
struct GroupVisitor* child_visitor;
|
||||
DynamicArray_AppendEmpty(
|
||||
&matcher->visitor_stack,
|
||||
(void**) &child_visitor
|
||||
);
|
||||
child_visitor->type = REGEXMATCHTHREADTYPE_THREAD;
|
||||
child_visitor->as.thread = DynamicArray_GetPointer(&group->threads, visitor->position);
|
||||
} else {
|
||||
// calculate position in subgroup array
|
||||
unsigned int position = visitor->position
|
||||
- DynamicArray_GetLength(&group->threads);
|
||||
|
||||
if (DynamicArray_GetLength(&group->subgroups) == position) {
|
||||
// Remove this group
|
||||
DynamicArray_Remove(
|
||||
&matcher->visitor_stack,
|
||||
matcher->visitor_stack.reserved - 1
|
||||
);
|
||||
break;
|
||||
case REGEXMATCHTHREADTYPE_GROUP:
|
||||
break;
|
||||
} else {
|
||||
// Add subgroup
|
||||
struct GroupVisitor* sgroup_visitor;
|
||||
DynamicArray_AppendEmpty(
|
||||
&matcher->visitor_stack,
|
||||
(void**) &sgroup_visitor
|
||||
);
|
||||
sgroup_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
|
||||
sgroup_visitor->position = -1;
|
||||
sgroup_visitor->as.group = DynamicArray_GetPointer(
|
||||
&group->subgroups,
|
||||
position
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int _WalkThreads(DynamicArray* visitor_stack, RegexMatchThreadGroup* group, size_t* depth)
|
||||
static int _HandleAdvanceVisitor(RegexMatcher* matcher)
|
||||
{
|
||||
struct GroupVisitor* current_visitor = DynamicArray_GetPointer(
|
||||
&matcher->visitor_stack,
|
||||
DynamicArray_GetLength(&matcher->visitor_stack) - 1
|
||||
);
|
||||
|
||||
int advance_code = EXIT_SUCCESS;
|
||||
switch (current_visitor->type) {
|
||||
case REGEXMATCHTHREADTYPE_THREAD:
|
||||
advance_code = _AdvanceThread(matcher);
|
||||
break;
|
||||
case REGEXMATCHTHREADTYPE_GROUP:
|
||||
advance_code = _AdvanceGroup(matcher);
|
||||
break;
|
||||
}
|
||||
|
||||
return advance_code;
|
||||
}
|
||||
|
||||
static int _WalkThreads(RegexMatcher* matcher)
|
||||
{
|
||||
{
|
||||
|
||||
struct GroupVisitor* zero_visitor;
|
||||
DynamicArray_AppendEmpty(
|
||||
visitor_stack,
|
||||
&matcher->visitor_stack,
|
||||
(void**) &zero_visitor
|
||||
);
|
||||
zero_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
|
||||
zero_visitor->as.group = group;
|
||||
zero_visitor->as.group = &matcher->top_group;
|
||||
zero_visitor->position = -1;
|
||||
|
||||
}
|
||||
|
||||
while (DynamicArray_GetLength(visitor_stack) != 0) {
|
||||
int advance_code = _HandleAdvanceVisitor(
|
||||
visitor_stack,
|
||||
depth
|
||||
);
|
||||
while (DynamicArray_GetLength(&matcher->visitor_stack) != 0) {
|
||||
int advance_code = _HandleAdvanceVisitor(matcher);
|
||||
if (advance_code) return advance_code;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static bool _HaveFinishedThread(RegexMatcher* matcher)
|
||||
{
|
||||
return matcher->finished_thread.regex != NULL;
|
||||
}
|
||||
|
||||
static bool _HaveThreadsRunning(RegexMatcher* matcher)
|
||||
{
|
||||
return matcher->top_group.subgroups.reserved != 0
|
||||
|| matcher->top_group.threads.reserved != 0;
|
||||
}
|
||||
|
||||
int Regex_MatchHere(Regex* regex, StringView string, size_t start, RegexMatch* match)
|
||||
{
|
||||
int return_code = EXIT_SUCCESS;
|
||||
|
||||
// initialize variables etc....
|
||||
DynamicArray visitor_stack;
|
||||
if (DynamicArray_Create(
|
||||
&visitor_stack,
|
||||
sizeof(struct GroupVisitor),
|
||||
16,
|
||||
regex->machine_memory.allocator)
|
||||
) {
|
||||
return ENOMEM;
|
||||
RegexMatcher matcher;
|
||||
if (_RegexMatcher_Create(&matcher, regex, string, start)) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
RegexMatchThreadGroup top_level_group;
|
||||
RegexMatchThreadGroup_Create(&top_level_group, 0, regex->machine_memory.allocator);
|
||||
size_t depth = 1;
|
||||
|
||||
{
|
||||
RegexMatchThread* first_thread;
|
||||
first_thread = RegexMatchThreadGroup_NewThread(&top_level_group);
|
||||
first_head->number = 0;
|
||||
while (! _HaveFinishedThread(&matcher)
|
||||
&& _HaveThreadsRunning(&matcher)) {
|
||||
_WalkThreads(&matcher);
|
||||
}
|
||||
|
||||
bool haveFinishedThread = false;
|
||||
while (! haveFinishedThread) {
|
||||
_WalkThreads(&visitor_stack, &top_level_group, &depth);
|
||||
if (_HaveFinishedThread(&matcher)) {
|
||||
*match = matcher.finished_thread.match;
|
||||
memset(
|
||||
&matcher.finished_thread.match,
|
||||
0,
|
||||
sizeof(matcher.finished_thread.match)
|
||||
);
|
||||
RegexMatchThread_Del(&matcher.finished_thread);
|
||||
}
|
||||
|
||||
defer_tl_group:
|
||||
_DestroyThreadHierarchy(&visitor_stack, &top_level_group);
|
||||
defer_stack:
|
||||
DynamicArray_Destroy(&visitor_stack);
|
||||
_DestroyThreadHierarchy(&matcher.visitor_stack, &matcher.top_group);
|
||||
_RegexMatcher_Destroy(&matcher);
|
||||
|
||||
return return_code;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue