-
Notifications
You must be signed in to change notification settings - Fork 0
/
03-regex.cc
124 lines (108 loc) · 4.75 KB
/
03-regex.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
//
// Program
// Regular Expression:
// - Search an expression via iterator using simple regex.
// - Search an expression via iterator using regex making groups of character sequence.
//
// Compile
// g++ -Wall -Wextra -pedantic -std=c++17 -o 03-regex 03-regex.cc
//
// Execution
// ./03-regex
//
#include <iostream>
#include <regex>
#include <vector>
//
// Entry function
//
int main() {
{
// Our regex expression is looking for a sequence having
// - <START_TAG>text</END_TAG>
// - START_TAG and END_TAG can have any text
std::regex re(R"(<.*>.*<\/.*>)");
std::vector<std::string> input_texts = {
{ R"(<start>C++ Regex</start>)" }, // Match is there
{ R"(<start>C++ Regex</end>)" }, // Match is there
{ R"(<title>C++ Regex</title>)" }, // Match is there
{ R"(<title>C++ Regex title>)" }, // Not a match; End tag is missing
{ R"(<title C++ Regex</title>)" }, // Not a match; Start tag is missing
{ R"(sdummy<title>C++ Regex</title>)" }, // Match is there
{ R"(<title>C++ Regex</title>edummy)" }, // Match is there
{ R"(sdummy<title>C++ Regex</title>edummy)" }, // Match is there
};
std::cout << '\n'
<< "---- " << "Find start/end tag (iterator)" << " ----"
<< '\n' << '\n';
for (const auto& input: input_texts) {
std::cout << '\n' << "Sample Text: " << input << '\n';
auto start = std::sregex_iterator(std::begin(input), std::end(input), re);
auto end = std::sregex_iterator();
std::cout << " " << "Found: " << std::distance(start, end) << '\n';
for (size_t idx = 1; start != end; ++start, ++idx) {
std::cout << " " << "#" << idx << ": '" << start->str() << "'\n";
}
}
}
{
// Find start and end tags:
// - <START_TAG> text </ END_TAG>
// - start and end tags can be anywhere in the long sentence
// - start end end tags must have same text
std::regex re(R"(<(.*)>.*<\/(\1)>)");
std::vector<std::string> input_texts = {
{ R"(<start>C++ Regex</start>)" }, // Match is there
{ R"(<start>C++ Regex</end>)" }, // Not a match as start/end tags are different
{ R"(<title>C++ Regex</title>)" }, // Match is there
{ R"(<title>C++ Regex title>)" }, // Not a match; End tag is missing
{ R"(<title C++ Regex</title>)" }, // Not a match; Start tag is missing
{ R"(sdummy<title>C++ Regex</title>)" }, // Match is there
{ R"(<title>C++ Regex</title>edummy)" }, // Match is there
{ R"(sdummy<title>C++ Regex</title>edummy)" }, // Match is there
};
std::cout << '\n'
<< "---- " << "Find start/end tag (iterator)" << " ----"
<< '\n' << '\n';
for (const auto& input: input_texts) {
std::cout << '\n' << "Sample Text: " << input << '\n';
{
std::cout << '\n' << " " << "--- # 0 ---" << '\n';
auto start = std::sregex_token_iterator(std::begin(input), std::end(input), re, 0);
auto end = std::sregex_token_iterator();
std::cout << " " << "Found: " << std::distance(start, end) << '\n';
for (size_t idx = 1; start != end; ++start, ++idx) {
std::cout << " " << "#" << idx << ": '" << start->str() << "'\n";
}
}
{
std::cout << '\n' << " " << "--- # -1 ---" << '\n';
auto start = std::sregex_token_iterator(std::begin(input), std::end(input), re, -1);
auto end = std::sregex_token_iterator();
std::cout << " " << "Found: " << std::distance(start, end) << '\n';
for (size_t idx = 1; start != end; ++start, ++idx) {
std::cout << " " << "#" << idx << ": '" << start->str() << "'\n";
}
}
{
std::cout << '\n' << " " << "--- # {0, -1} ---" << '\n';
auto start = std::sregex_token_iterator(std::begin(input), std::end(input), re, {0, -1});
auto end = std::sregex_token_iterator();
std::cout << " " << "Found: " << std::distance(start, end) << '\n';
for (size_t idx = 1; start != end; ++start, ++idx) {
std::cout << " " << "#" << idx << ": '" << start->str() << "'\n";
}
}
{
std::cout << '\n' << " " << "--- # {0, 1} ---" << '\n';
auto start = std::sregex_token_iterator(std::begin(input), std::end(input), re, {0, 1});
auto end = std::sregex_token_iterator();
std::cout << " " << "Found: " << std::distance(start, end) << '\n';
for (size_t idx = 1; start != end; ++start, ++idx) {
std::cout << " " << "#" << idx << ": '" << start->str() << "'\n";
}
}
}
}
return 0;
}