-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.cpp
146 lines (124 loc) · 2.99 KB
/
parser.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
//TODO make the process case insensitive - all to lower case
#include "parser.h"
Parser::Parser(QObject *parent) : QObject(parent)
{
files = new QList<QString>;
hash = new QHash<QString, int>;
qDebug()<<"Parser created";
}
Parser::~Parser()
{
delete files;
delete hash;
qDebug()<<"Parser deleted";
}
/**
* @brief Parser::parseFile Parsing txt file
* @param path
* @return
*/
void Parser::parseFile(QString path)
{
QFile file (path);
if(!file.open(QIODevice::ReadOnly)) {
QMessageBox::information(0, "error", file.errorString());
}
else {
files->append(path);
QTextStream in(&file);
QString output = in.readAll();
file.close();
//Update object field
text = output;
}
}
/**
* @brief Parser::calculateStatistics
* Calculates words' occurences in a text
* @param path, wordLength
*/
void Parser::calculateStatistics(int wordLength)
{
QStringList words = text.split(QRegExp("\\s+"), QString::SkipEmptyParts);
// Clean hashMap from words < wordLength
filterHash(wordLength);
//Adding words to the hash map and calculating occurences
//TODO use more efficient hashing?
for(const auto& i : words ){
//Ignoring words which length is less
if (i.length() < wordLength){
continue;
}
if(!hash->contains(i)){
//adding new word
hash->insert(i, 1);
}
else {
//increasing the count
hash->insert(i, hash->value(i)+ 1);
}
}
}
/** Returning sorted occurances result WORD : OCCURENCE
* @brief Parser::getResult
* @return
*/
QString Parser::getResult()
{
QString res;
QHashIterator<QString, int> i(*hash);
QList<QPair<int,QString> > sortedRes;
//Filling QList
while (i.hasNext()) {
i.next();
sortedRes.append(qMakePair(i.value(), i.key()));
}
//Sorting and reversing
std::sort(sortedRes.begin(), sortedRes.end());
std::reverse(sortedRes.begin(), sortedRes.end());
//Forming an output string
for (int i=0;i<sortedRes.count();i++){
QPair<int, QString> pair = sortedRes.at(i);
res += QString(pair.second + " : %1 <br></br><br></br>").arg(pair.first);
}
output = res;
return res;
}
QString Parser::getText()
{
return text;
}
QHash<QString, int>* Parser::getHashMap()
{
return hash;
}
/**
* @brief Parser::filterHash
* Removes from hash words that shorter than wordLength
* @param wordLength
*/
void Parser::filterHash(int wordLength)
{
QHash<QString, int>::iterator i;
QList<QString> toBeDeleted;
for (i = hash->begin(); i != hash->end(); ++i){
if(i.key().length() < wordLength){
toBeDeleted.append(i.key());
}
}
foreach(QString key, toBeDeleted){
hash->remove(key);
}
}
QList<QString>* Parser::getFiles()
{
return files;
}
long long Parser::getElapsedTime()
{
return elapsedTime;
}
void Parser::setElapsedTime(long long time)
{
elapsedTime = time;
}