-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkeyword_extr_model.h
62 lines (47 loc) · 1.61 KB
/
keyword_extr_model.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#ifndef KEYWORD_EXTR_MODEL_H
#define KEYWORD_EXTR_MODEL_H
#include <iostream>
#include <vector>
#include "maxent/maxent.h"
#include "postagger_model.h"
#include "util.h"
using namespace std;
extern unordered_map<string, unordered_map<string, string> > keywords;
class Keyword_Extractor_Model {
Postagger_Model postagger_model;
ME_Model extractor_model;
bool extractor_model_loaded;
string train_file, test_file;
void init();
public:
Keyword_Extractor_Model(string train_file, string test_file):
extractor_model_loaded(false), train_file(train_file), test_file(test_file) {
init();
}
struct ClassifiedToken {
string word, pos_tag, type;
ClassifiedToken(const string & w, const string & pos, const string & t):
word(w), pos_tag(pos), type(t) {}
};
struct KeywordToken {
string word, pos, keywords, types;
bool is_keyword;
};
void test();
void train();
vector<Keyword_Extractor_Model::KeywordToken> classify_line(string str);
ME_Model get_extractor_model(){
if (!extractor_model_loaded){
if (extractor_model.load_from_file("extractor.model")){
log("loaded extractor.model from file");
} else {
train();
}
}
return extractor_model;
}
private:
ME_Sample generate_sample(vector<ClassifiedToken> & classified_words, int i);
vector<ClassifiedToken> read_line(const string & str);
};
#endif