diff --git a/Makefile b/Makefile index fba0ccd..c3d4559 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ all: rmatch run -rmatch: rmatch.c +rmatch: rmatch.c rmatch.h gcc rmatch.c -Wall -Wextra -Ofast -o rmatch run: diff --git a/README.md b/README.md new file mode 100644 index 0000000..96a8663 --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +![workflow](https://github.com/GerbenAaltink/mrex/actions/workflows/make-mrex-single-platform.yml/badge.svg) + +# Rmatch + +## Summary +Super minimal regex validator. + + +## Supported functions +Support for: + - extract matched data + - . Does work in combination with greedy matching + - ^ + - \+ greedy + - \* greedy + - [abc] (or). Does NOT work in combination with greedy matching + +## Minimal usage matching +`rmatch(char * expr, char ** str)` is the main function. It returns int[3]. +If you have a `char *` as str argument, don't forget to add the `&`. +The return value contains: +``` +result[0] // 1 if validated. Else 0. +result[1] // match start. +result[2] // match length. +``` + +## Minimal usage extracting +`rmatch_extract(char * expr, char * str)` is the main function. It returns char * or NULL. +If you have a `char *` as str argument, don't forget to add the `&`. +You can use it in a loop since it uses the pointer of the input string: +``` +char * str = "testtest"; +char * expr = "t.*t"; +// Two times it equals to "test" +assert(!strcmp(rmatch_extract(expr, &str), "test")); +assert(!strcmp(rmatch_extract(expr, &str), "test")); +// Third time it equals to NULL +assert(!rmatch_extract(expr, &str)); +``` +Other example: +``` +char * str = "testtest"; +char * expr = "t.*t"; +char * result = NULL; +// Loops until NULL +while((result = rmatch_extract(expr, &str), "test")){ + printf("Found: \"%s\"\n", result); +} +// Be sure last result is invalid +assert(!result); +``` + +## Tests / examples +For executing tests in this repository, just run `make`. +You find test source [here](rmatch.c#tests). \ No newline at end of file diff --git a/rmatch.h b/rmatch.h index dda19aa..160fb76 100644 --- a/rmatch.h +++ b/rmatch.h @@ -4,7 +4,7 @@ #include int rmatchhere(char *regexp, char *text); int rmatchgreedy(int c, char *regexp, char *text, int required); -int * rmatch(char *regexp, char **txt) +int *rmatch(char *regexp, char **txt) { char *text = *txt; static int result[3]; @@ -32,14 +32,16 @@ int * rmatch(char *regexp, char **txt) result[2]--; result[0] = result[2] > 0; result[1] = steps_total; - //printf("%d : %d %d\n", steps_total, steps_false, result[2]); + // printf("%d : %d %d\n", steps_total, steps_false, result[2]); *txt += result[1] + result[2]; return result; - }else{ + } + else + { steps_false++; } } while (*text++ != '\0'); - return result; + return result; } int rmatchhere(char *regexp, char *text) { @@ -47,27 +49,29 @@ int rmatchhere(char *regexp, char *text) if (regexp[0] == '\0') return 1; if (regexp[1] == '*') - if((res = rmatchgreedy(regexp[0], regexp + 2, text, 0))) + if ((res = rmatchgreedy(regexp[0], regexp + 2, text, 0))) return res - 1; if (regexp[1] == '+') - if((res = rmatchgreedy(regexp[0], regexp + 2, text, 1))) + if ((res = rmatchgreedy(regexp[0], regexp + 2, text, 1))) return res - 1; - if (regexp[0] == '['){ + if (regexp[0] == '[') + { regexp++; int block_true = 0; - while(*regexp != ']'){ - if(!block_true && *regexp == *text) + while (*regexp != ']') + { + if (!block_true && *regexp == *text) block_true = 1; regexp++; } - if(block_true) + if (block_true) return rmatchhere(regexp + 1, text + 1) + 1; return 0; } if (regexp[0] == '$' && regexp[1] == '\0') return *text == '\0'; if (regexp[0] == '\\') - return *text == '.' && rmatchhere(regexp + 2, text + 1) + 1; + return *text == '.' && rmatchhere(regexp + 2, text + 1) + 1; if (*text != '\0' && (regexp[0] == '.' || regexp[0] == *text)) if ((res = rmatchhere(regexp + 1, text + 1))) return res + 1; @@ -81,104 +85,121 @@ int rmatchgreedy(int c, char *regexp, char *text, int required) { rmatch_success++; if ((res = rmatchhere(regexp, text))) - { + { res = rmatch_success + res; - if(!required){ + if (!required) + { return res + 1; - }else if(required && res > 1){ + } + else if (required && res > 1) + { return res + 1; } } } while (*text != '\0' && ((*text++ == c) || c == '.')); return 0; } -char * rmatch_extract(char *regexp, char **txt){ - int * result = rmatch(regexp, txt); - if(result[0]){ - char * extracted = *txt - result[2]; - char * str = (char *)malloc(result[2] + 1); +char *rmatch_extract(char *regexp, char **txt) +{ + int *result = rmatch(regexp, txt); + if (result[0]) + { + char *extracted = *txt - result[2]; + char *str = (char *)malloc(result[2] + 1); str[0] = 0; - strncpy(str,extracted,result[2]); + strncpy(str, extracted, result[2]); str[result[2]] = 0; - return str; + return str; } return NULL; } -void rmatch_test(char *expr, char *text, char **expected){ - char * res; - printf("test: <%s> \"%s\"\n",expr,text); +void rmatch_test(char *expr, char *text, char **expected) +{ + char *res; + printf("test: <%s> \"%s\"\n", expr, text); int expected_index = 0; - while(*text && expected[expected_index] != NULL){ + while (*text && expected[expected_index] != NULL) + { char *text_original = text; - res = rmatch_extract(expr,&text); - if((!res && !(res == expected[expected_index])) || strncmp(res,expected[expected_index],strlen(expected[expected_index]))){ + res = rmatch_extract(expr, &text); + if ((!res && !(res == expected[expected_index])) || strncmp(res, expected[expected_index], strlen(expected[expected_index]))) + { printf("\e[31merror:\n"); - printf(" expected: \"%s\"\n",expected[expected_index]); + printf(" expected: \"%s\"\n", expected[expected_index]); printf(" got: \"%s\"\e[0m\n", res ? res : "[no match]"); assert(0); - }else{ - printf("\e[32mcorrect:\e[0m <%s> \"%s\"\n",expr,text_original); + } + else + { + printf("\e[32mcorrect:\e[0m <%s> \"%s\"\n", expr, text_original); } expected_index++; } - if(*text && expected[expected_index] != NULL){ + if (*text && expected[expected_index] != NULL) + { printf("\e[31merror:\n"); - printf(" expected: %s\n","empty string"); + printf(" expected: %s\n", "empty string"); printf(" got: \"%s\"\e[0m\n", text); assert(!*text); } assert(!expected[expected_index]); } -void rmatch_tests(){ + +#example_one +void example_one() +{ + char *str = "testtest"; + char *expr = "t.*t"; + // Two times it equals to "test" + assert(!strcmp(rmatch_extract(expr, &str), "test")); + assert(!strcmp(rmatch_extract(expr, &str), "test")); + // Third time it equals to NULL + assert(!rmatch_extract(expr, &str)); +} + +#example_two +void example_two() +{ + printf("Testing example two.\n"); + char *str = "testtest"; + char *expr = "t.*t"; + char *result = NULL; + // Loops until NULL + while ((result = rmatch_extract(expr, &str))) + { + printf("Found: \"%s\"\n", result); + } + // Be sure last result is invalid + assert(!result); +} + +#tests +void rmatch_tests() +{ + // Examples + example_one(); + example_two(); // Asterisk - rmatch_test(".*H.*ry P.*rS.*la", "Harry PotterSim SalaHarry PotterSimSalaHarry PotterSimSalaHarry PotterSimSala", - (char *[]){ - "Harry PotterSim Sala", - "Harry PotterSimSala", - "Harry PotterSimSala", - "Harry PotterSimSala", - NULL} - ); + rmatch_test(".*H.*ry P.*rS.*la", "Harry PotterSim SalaHarry PotterSimSalaHarry PotterSimSalaHarry PotterSimSala", + (char *[]){ + "Harry PotterSim Sala", + "Harry PotterSimSala", + "Harry PotterSimSala", + "Harry PotterSimSala", + NULL}); char *text_fox = "The quick brown fox jumps over the lazy dog.The quick brown fox jumps over the lazy dog.The quick brown fox jumps over the lazy dog.The quick brown fox jumps over the lazy dog.The quick brown fox jumps over the lazy dog."; - rmatch_test("The q.*ick b.*n f.*x j.*s over the lazy dog.$",text_fox,(char *[]){ - "The quick brown fox jumps over the lazy dog.", - NULL - }); + rmatch_test("The q.*ick b.*n f.*x j.*s over the lazy dog.$", text_fox, (char *[]){"The quick brown fox jumps over the lazy dog.", NULL}); char *text_fox2 = "The quick brown fox jumps over the lazy dog.The quick brown fox jumps over the lazy dog.The quick brown fox jumps over the lazy dog.The quick brown fox jumps over the lazy dog.The quick brown fox jumps over the lazy dog."; - rmatch_test("T.*e q.*k b.*n f.*x j.*s o.*r t.*e l.*y d.*g.",text_fox2,(char *[]){ - "The quick brown fox jumps over the lazy dog.", - "The quick brown fox jumps over the lazy dog.", - "The quick brown fox jumps over the lazy dog.", - "The quick brown fox jumps over the lazy dog.", - "The quick brown fox jumps over the lazy dog.", - NULL - }); + rmatch_test("T.*e q.*k b.*n f.*x j.*s o.*r t.*e l.*y d.*g.", text_fox2, (char *[]){"The quick brown fox jumps over the lazy dog.", "The quick brown fox jumps over the lazy dog.", "The quick brown fox jumps over the lazy dog.", "The quick brown fox jumps over the lazy dog.", "The quick brown fox jumps over the lazy dog.", NULL}); // Block char *text_fox3 = "The quick brown fox jumps over the lazy dog."; - rmatch_test("T.*e q.*k b.*n f.*x j.*s o.*r t.*e l[oa][az].*y d[ao]g.",text_fox3,(char *[]){ - "The quick brown fox jumps over the lazy dog.", - NULL - }); + rmatch_test("T.*e q.*k b.*n f.*x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox3, (char *[]){"The quick brown fox jumps over the lazy dog.", NULL}); /// Plus char *text_fox4 = "The quick brown fooox jumps over the lazy dog."; - rmatch_test("T.*e q.*k b.*n fo+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.",text_fox4,(char *[]){ - "The quick brown fooox jumps over the lazy dog.", - NULL - }); - rmatch_test("T.*e q.*k b.*n f.+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.",text_fox4,(char *[]){ - "The quick brown fooox jumps over the lazy dog.", - NULL - }); - rmatch_test("T.*e q.*k b.*n f+.*x j.*s o.*r t.*e l[oa][az].*y d[ao]g.",text_fox4,(char *[]){ - "The quick brown fooox jumps over the lazy dog.", - NULL - }); - rmatch_test("T.*e q.*k b.*n fo+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.",text_fox4,(char *[]){ - "The quick brown fooox jumps over the lazy dog.", - NULL - }); - rmatch_test("T.*e q.*k b.*n f+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.",text_fox4,(char *[]){ - NULL - }); + rmatch_test("T.*e q.*k b.*n fo+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox4, (char *[]){"The quick brown fooox jumps over the lazy dog.", NULL}); + rmatch_test("T.*e q.*k b.*n f.+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox4, (char *[]){"The quick brown fooox jumps over the lazy dog.", NULL}); + rmatch_test("T.*e q.*k b.*n f+.*x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox4, (char *[]){"The quick brown fooox jumps over the lazy dog.", NULL}); + rmatch_test("T.*e q.*k b.*n fo+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox4, (char *[]){"The quick brown fooox jumps over the lazy dog.", NULL}); + rmatch_test("T.*e q.*k b.*n f+x j.*s o.*r t.*e l[oa][az].*y d[ao]g.", text_fox4, (char *[]){NULL}); } \ No newline at end of file