-
Notifications
You must be signed in to change notification settings - Fork 3
/
jlb_algorithm.cpp
138 lines (104 loc) · 3.2 KB
/
jlb_algorithm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#include <algorithm>
#include <bits/stdc++.h>
#include <chrono>
#include <cmath>
#include <iostream>
#include <vector>
#include <unordered_map>
using namespace std;
int alphabet_list_length = 256;
int alphabet_list[256];
int get_alphabet_size() {
int size = 0;
for (int i = 0; i < alphabet_list_length; i++) {
if (alphabet_list[i] != -1) {
size += 1;
}
}
return size;
}
void set_alphabet(const string &alphabet) {
for (int i = 0; i < alphabet_list_length; i++) {
alphabet_list[i] = -1;
}
for (int i = 0; i < alphabet.size(); i++) {
alphabet_list[(int) alphabet[i]] = i;
}
}
int symbol_to_int(char symbol) {
return alphabet_list[(int) symbol];
}
bool comparison_method(long long x, long long y);
long long to_int(const string &sequence, int base) {
long long value = 0;
int m = sequence.size();
for (int index = 0; index < m; index++) {
int integer = symbol_to_int(sequence[index]);
if (integer == -1) {
return - (index + 1);
}
int exponent = (m - 1) - index;
value += integer * pow(base, exponent);
}
return value;
}
vector<int> search(const string &sequence, const string &pattern) {
vector<int> occurrences;
int n = sequence.size();
int m = pattern.size();
int base = get_alphabet_size();
// Preprocessing phase ...
long long pattern_value = to_int(pattern, base);
long long frame = -1;
int start_pos = 0;
while (frame <= -1 && start_pos < n) {
frame = to_int(sequence.substr(start_pos, m), base);
if (frame <= -1) {
start_pos -= frame;
}
}
long long most_significant_digit_position = pow(base, m - 1);
// Searching phase ...
if (comparison_method(frame, pattern_value)) {
occurrences.push_back(start_pos);
}
for (int i = start_pos + m; i < n; ++i) {
int element = symbol_to_int(sequence[i]);
int digit = (int)(frame / most_significant_digit_position);
if (element == -1) {
frame = -1;
i += 1;
while (frame <= -1 && i < n) {
frame = to_int(sequence.substr(i, m), base);
if (frame <= -1) {
i -= frame;
}
}
i += m - 1;
}
else {
frame = (frame - (digit * most_significant_digit_position)) * base;
frame = round(frame + element);
}
if (comparison_method(frame, pattern_value)) {
occurrences.push_back(i - m + 1);
}
}
return occurrences;
}
// You can change the comparision method as you wish,
// but the comparision must have complexity O(1).
bool comparison_method(long long x, long long y) {
return x <= y;
}
int main() {
string text = "qaaqwawababbgghhjkcabbwaknqtbababbdaaabdwawaawaaa";
string pattern = "abb";
string alphabet = "abcd"; // It must be ordered
cout << "Searching..." << endl;
set_alphabet(alphabet);
vector<int> occurrences = search(text, pattern);
for (int i = 0; i < occurrences.size(); i++) {
cout << "Found at position: " << occurrences[i] << endl;
}
}