Literal matching working again!
This commit is contained in:
parent
a2317836fa
commit
e6064bd375
1 changed files with 185 additions and 60 deletions
|
@ -8,8 +8,10 @@
|
||||||
|
|
||||||
typedef struct RegexMatcher_s {
|
typedef struct RegexMatcher_s {
|
||||||
RegexMatchThreadGroup top_group;
|
RegexMatchThreadGroup top_group;
|
||||||
|
DynamicArray visitor_stack;
|
||||||
|
size_t depth;
|
||||||
|
|
||||||
RegexMatchThread finished_threads;
|
RegexMatchThread finished_thread;
|
||||||
} RegexMatcher;
|
} RegexMatcher;
|
||||||
|
|
||||||
static int RegexMatchThread_New(RegexMatchThread* thread, StringView string, size_t index, Regex* regex)
|
static int RegexMatchThread_New(RegexMatchThread* thread, StringView string, size_t index, Regex* regex)
|
||||||
|
@ -275,7 +277,7 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors)
|
||||||
visitors,
|
visitors,
|
||||||
visitors->reserved - 1
|
visitors->reserved - 1
|
||||||
);
|
);
|
||||||
RegexMatchThreadGroup* group = (void*) current_visitor->type;
|
RegexMatchThreadGroup* group = current_visitor->as.group;
|
||||||
|
|
||||||
if (DynamicArray_GetLength(&group->threads) != 0) {
|
if (DynamicArray_GetLength(&group->threads) != 0) {
|
||||||
// Append first sub thread to visitor stack
|
// Append first sub thread to visitor stack
|
||||||
|
@ -312,6 +314,54 @@ static void RegexMatchThreadGroup_Destroy(DynamicArray* visitors)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int _RegexMatcher_Create(RegexMatcher* matcher, Regex* regex, StringView string, size_t start)
|
||||||
|
{
|
||||||
|
int group_code = RegexMatchThreadGroup_Create(
|
||||||
|
&matcher->top_group,
|
||||||
|
0,
|
||||||
|
regex->machine_memory.allocator
|
||||||
|
);
|
||||||
|
if (group_code) {
|
||||||
|
return group_code;
|
||||||
|
}
|
||||||
|
|
||||||
|
int stack_code = DynamicArray_Create(
|
||||||
|
&matcher->visitor_stack,
|
||||||
|
sizeof(struct GroupVisitor),
|
||||||
|
16,
|
||||||
|
regex->machine_memory.allocator
|
||||||
|
);
|
||||||
|
if (stack_code) {
|
||||||
|
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
|
||||||
|
return stack_code;
|
||||||
|
}
|
||||||
|
|
||||||
|
RegexMatchThread* root_thread = _RegexMatchThreadGroup_NewThread(&matcher->top_group);
|
||||||
|
if (root_thread == NULL) {
|
||||||
|
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
|
||||||
|
DynamicArray_Destroy(&matcher->visitor_stack);
|
||||||
|
return ENOMEM;
|
||||||
|
}
|
||||||
|
if (RegexMatchThread_New(root_thread, string, start, regex)) {
|
||||||
|
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
|
||||||
|
DynamicArray_Destroy(&matcher->visitor_stack);
|
||||||
|
return ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
matcher->depth = 1;
|
||||||
|
|
||||||
|
memset(&matcher->finished_thread, 0, sizeof(matcher->finished_thread));
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _RegexMatcher_Destroy(RegexMatcher* matcher)
|
||||||
|
{
|
||||||
|
RegexMatchThreadGroup_Destroy2(&matcher->top_group);
|
||||||
|
DynamicArray_Destroy(&matcher->visitor_stack);
|
||||||
|
memset(matcher, 0, sizeof(*matcher));
|
||||||
|
}
|
||||||
|
|
||||||
static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, size_t* depth, RegexMachineStateBase* new_head)
|
static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, size_t* depth, RegexMachineStateBase* new_head)
|
||||||
{
|
{
|
||||||
RegexMatchThread* child;
|
RegexMatchThread* child;
|
||||||
|
@ -384,6 +434,7 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz
|
||||||
|
|
||||||
// remove from old group
|
// remove from old group
|
||||||
_RegexMatchThreadGroup_ForgetThread(parent_group, *parent);
|
_RegexMatchThreadGroup_ForgetThread(parent_group, *parent);
|
||||||
|
parent_visitor->position--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (child == NULL) {
|
if (child == NULL) {
|
||||||
|
@ -396,18 +447,19 @@ static int _NewRegexChild(RegexMatchThread** parent, DynamicArray* visitors, siz
|
||||||
}
|
}
|
||||||
|
|
||||||
child->machine_head = new_head;
|
child->machine_head = new_head;
|
||||||
child->number = parent->number + DynamicArray_GetLength(&parent_group.threads) - 1;
|
child->number = (*parent)->number
|
||||||
|
+ DynamicArray_GetLength(&parent_group->threads) - 1;
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int _HandleOption(DynamicArray* visitors, size_t* depth)
|
static int _HandleOption(RegexMatcher* matcher)
|
||||||
{
|
{
|
||||||
struct GroupVisitor* visitor = DynamicArray_GetPointer(
|
struct GroupVisitor* visitor = DynamicArray_GetPointer(
|
||||||
visitors,
|
&matcher->visitor_stack,
|
||||||
visitors->reserved - 1
|
DynamicArray_GetLength(&matcher->visitor_stack)
|
||||||
);
|
);
|
||||||
RegexMatchThread* thread = (void*) visitor->type;
|
RegexMatchThread* thread = visitor->as.thread;
|
||||||
|
|
||||||
RegexMachineStateBase* first_head = thread->machine_head;
|
RegexMachineStateBase* first_head = thread->machine_head;
|
||||||
{
|
{
|
||||||
|
@ -420,8 +472,8 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth)
|
||||||
|
|
||||||
int child_code = _NewRegexChild(
|
int child_code = _NewRegexChild(
|
||||||
&thread,
|
&thread,
|
||||||
visitors,
|
&matcher->visitor_stack,
|
||||||
depth,
|
&matcher->depth,
|
||||||
option_head->next
|
option_head->next
|
||||||
);
|
);
|
||||||
if (child_code) {
|
if (child_code) {
|
||||||
|
@ -435,13 +487,37 @@ static int _HandleOption(DynamicArray* visitors, size_t* depth)
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
|
static void _TryFinishThread(RegexMatcher* matcher, RegexMatchThread* thread)
|
||||||
|
{
|
||||||
|
struct GroupVisitor* parent_visitor = DynamicArray_GetPointer(
|
||||||
|
&matcher->visitor_stack,
|
||||||
|
DynamicArray_GetLength(&matcher->visitor_stack) - 2
|
||||||
|
);
|
||||||
|
RegexMatchThreadGroup* parent_group = parent_visitor->as.group;
|
||||||
|
size_t thread_index = DynamicArray_FindFunctionLinear(
|
||||||
|
&parent_group->threads,
|
||||||
|
(DynamicArrayLinearFindFunction) _FindThreadLinear,
|
||||||
|
thread
|
||||||
|
);
|
||||||
|
|
||||||
|
if (thread_index == 0
|
||||||
|
&& DynamicArray_GetLength(&matcher->visitor_stack) == 2) {
|
||||||
|
// Only the first thread in the top-most group is finishable
|
||||||
|
// All other threads have a lower priority, this is because
|
||||||
|
// of the order they have to be matched in.
|
||||||
|
matcher->finished_thread = *thread;
|
||||||
|
_RegexMatchThreadGroup_ForgetThread(parent_group, thread);
|
||||||
|
matcher->visitor_stack.reserved = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _AdvanceThread(RegexMatcher* matcher)
|
||||||
{
|
{
|
||||||
struct GroupVisitor* visitor = DynamicArray_GetPointer(
|
struct GroupVisitor* visitor = DynamicArray_GetPointer(
|
||||||
visitors,
|
&matcher->visitor_stack,
|
||||||
visitors->reserved - 1
|
DynamicArray_GetLength(&matcher->visitor_stack) - 1
|
||||||
);
|
);
|
||||||
RegexMatchThread* thread = (void*) visitor->type;
|
RegexMatchThread* thread = visitor->as.thread;
|
||||||
int code;
|
int code;
|
||||||
bool discard;
|
bool discard;
|
||||||
|
|
||||||
|
@ -453,7 +529,7 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
|
||||||
code = _HandleGroup(thread);
|
code = _HandleGroup(thread);
|
||||||
break;
|
break;
|
||||||
case REGEXMACHINESTATETYPE_OPTION:
|
case REGEXMACHINESTATETYPE_OPTION:
|
||||||
code = _HandleOption(visitors, depth);
|
code = _HandleOption(matcher);
|
||||||
break;
|
break;
|
||||||
case REGEXMACHINESTATETYPE_REPEAT:
|
case REGEXMACHINESTATETYPE_REPEAT:
|
||||||
code = _HandleRepeat(thread);
|
code = _HandleRepeat(thread);
|
||||||
|
@ -467,10 +543,12 @@ static int _AdvanceThread(DynamicArray* visitors, size_t* depth)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (thread->machine_head == NULL) {
|
if (thread->machine_head == NULL) {
|
||||||
// Match done
|
_TryFinishThread(matcher, thread);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove from visitor stack
|
||||||
|
DynamicArray_Remove(&matcher->visitor_stack, matcher->visitor_stack.reserved - 1);
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -509,85 +587,132 @@ static void _DestroyThreadHierarchy(DynamicArray* visitors, RegexMatchThreadGrou
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int _HandleAdvanceVisitor(DynamicArray* visitor_stack, size_t* depth)
|
static int _AdvanceGroup(RegexMatcher* matcher)
|
||||||
{
|
{
|
||||||
struct GroupVisitor* current_visitor = DynamicArray_GetPointer(
|
struct GroupVisitor* visitor = DynamicArray_GetPointer(
|
||||||
visitor_stack,
|
&matcher->visitor_stack,
|
||||||
visitor_stack->reserved - 1
|
DynamicArray_GetLength(&matcher->visitor_stack) - 1
|
||||||
);
|
);
|
||||||
|
RegexMatchThreadGroup* group = visitor->as.group;
|
||||||
|
visitor->position += 1;
|
||||||
|
|
||||||
switch (current_visitor->type) {
|
if (DynamicArray_GetLength(&group->threads) > visitor->position) {
|
||||||
case REGEXMATCHTHREADTYPE_THREAD:
|
struct GroupVisitor* child_visitor;
|
||||||
_AdvanceThread(
|
DynamicArray_AppendEmpty(
|
||||||
visitor_stack,
|
&matcher->visitor_stack,
|
||||||
depth
|
(void**) &child_visitor
|
||||||
);
|
);
|
||||||
break;
|
child_visitor->type = REGEXMATCHTHREADTYPE_THREAD;
|
||||||
case REGEXMATCHTHREADTYPE_GROUP:
|
child_visitor->as.thread = DynamicArray_GetPointer(&group->threads, visitor->position);
|
||||||
break;
|
} else {
|
||||||
|
// calculate position in subgroup array
|
||||||
|
unsigned int position = visitor->position
|
||||||
|
- DynamicArray_GetLength(&group->threads);
|
||||||
|
|
||||||
|
if (DynamicArray_GetLength(&group->subgroups) == position) {
|
||||||
|
// Remove this group
|
||||||
|
DynamicArray_Remove(
|
||||||
|
&matcher->visitor_stack,
|
||||||
|
matcher->visitor_stack.reserved - 1
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// Add subgroup
|
||||||
|
struct GroupVisitor* sgroup_visitor;
|
||||||
|
DynamicArray_AppendEmpty(
|
||||||
|
&matcher->visitor_stack,
|
||||||
|
(void**) &sgroup_visitor
|
||||||
|
);
|
||||||
|
sgroup_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
|
||||||
|
sgroup_visitor->position = -1;
|
||||||
|
sgroup_visitor->as.group = DynamicArray_GetPointer(
|
||||||
|
&group->subgroups,
|
||||||
|
position
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int _WalkThreads(DynamicArray* visitor_stack, RegexMatchThreadGroup* group, size_t* depth)
|
static int _HandleAdvanceVisitor(RegexMatcher* matcher)
|
||||||
|
{
|
||||||
|
struct GroupVisitor* current_visitor = DynamicArray_GetPointer(
|
||||||
|
&matcher->visitor_stack,
|
||||||
|
DynamicArray_GetLength(&matcher->visitor_stack) - 1
|
||||||
|
);
|
||||||
|
|
||||||
|
int advance_code = EXIT_SUCCESS;
|
||||||
|
switch (current_visitor->type) {
|
||||||
|
case REGEXMATCHTHREADTYPE_THREAD:
|
||||||
|
advance_code = _AdvanceThread(matcher);
|
||||||
|
break;
|
||||||
|
case REGEXMATCHTHREADTYPE_GROUP:
|
||||||
|
advance_code = _AdvanceGroup(matcher);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return advance_code;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _WalkThreads(RegexMatcher* matcher)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
|
|
||||||
struct GroupVisitor* zero_visitor;
|
struct GroupVisitor* zero_visitor;
|
||||||
DynamicArray_AppendEmpty(
|
DynamicArray_AppendEmpty(
|
||||||
visitor_stack,
|
&matcher->visitor_stack,
|
||||||
(void**) &zero_visitor
|
(void**) &zero_visitor
|
||||||
);
|
);
|
||||||
zero_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
|
zero_visitor->type = REGEXMATCHTHREADTYPE_GROUP;
|
||||||
zero_visitor->as.group = group;
|
zero_visitor->as.group = &matcher->top_group;
|
||||||
|
zero_visitor->position = -1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while (DynamicArray_GetLength(visitor_stack) != 0) {
|
while (DynamicArray_GetLength(&matcher->visitor_stack) != 0) {
|
||||||
int advance_code = _HandleAdvanceVisitor(
|
int advance_code = _HandleAdvanceVisitor(matcher);
|
||||||
visitor_stack,
|
|
||||||
depth
|
|
||||||
);
|
|
||||||
if (advance_code) return advance_code;
|
if (advance_code) return advance_code;
|
||||||
}
|
}
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool _HaveFinishedThread(RegexMatcher* matcher)
|
||||||
|
{
|
||||||
|
return matcher->finished_thread.regex != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool _HaveThreadsRunning(RegexMatcher* matcher)
|
||||||
|
{
|
||||||
|
return matcher->top_group.subgroups.reserved != 0
|
||||||
|
|| matcher->top_group.threads.reserved != 0;
|
||||||
|
}
|
||||||
|
|
||||||
int Regex_MatchHere(Regex* regex, StringView string, size_t start, RegexMatch* match)
|
int Regex_MatchHere(Regex* regex, StringView string, size_t start, RegexMatch* match)
|
||||||
{
|
{
|
||||||
int return_code = EXIT_SUCCESS;
|
int return_code = EXIT_SUCCESS;
|
||||||
|
|
||||||
// initialize variables etc....
|
RegexMatcher matcher;
|
||||||
DynamicArray visitor_stack;
|
if (_RegexMatcher_Create(&matcher, regex, string, start)) {
|
||||||
if (DynamicArray_Create(
|
return EXIT_FAILURE;
|
||||||
&visitor_stack,
|
|
||||||
sizeof(struct GroupVisitor),
|
|
||||||
16,
|
|
||||||
regex->machine_memory.allocator)
|
|
||||||
) {
|
|
||||||
return ENOMEM;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexMatchThreadGroup top_level_group;
|
while (! _HaveFinishedThread(&matcher)
|
||||||
RegexMatchThreadGroup_Create(&top_level_group, 0, regex->machine_memory.allocator);
|
&& _HaveThreadsRunning(&matcher)) {
|
||||||
size_t depth = 1;
|
_WalkThreads(&matcher);
|
||||||
|
|
||||||
{
|
|
||||||
RegexMatchThread* first_thread;
|
|
||||||
first_thread = RegexMatchThreadGroup_NewThread(&top_level_group);
|
|
||||||
first_head->number = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool haveFinishedThread = false;
|
if (_HaveFinishedThread(&matcher)) {
|
||||||
while (! haveFinishedThread) {
|
*match = matcher.finished_thread.match;
|
||||||
_WalkThreads(&visitor_stack, &top_level_group, &depth);
|
memset(
|
||||||
|
&matcher.finished_thread.match,
|
||||||
|
0,
|
||||||
|
sizeof(matcher.finished_thread.match)
|
||||||
|
);
|
||||||
|
RegexMatchThread_Del(&matcher.finished_thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
defer_tl_group:
|
_DestroyThreadHierarchy(&matcher.visitor_stack, &matcher.top_group);
|
||||||
_DestroyThreadHierarchy(&visitor_stack, &top_level_group);
|
_RegexMatcher_Destroy(&matcher);
|
||||||
defer_stack:
|
|
||||||
DynamicArray_Destroy(&visitor_stack);
|
|
||||||
|
|
||||||
return return_code;
|
return return_code;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue