-
Notifications
You must be signed in to change notification settings - Fork 0
/
consts.py
151 lines (144 loc) · 4.6 KB
/
consts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
NONE = 'O'
PAD = "[PAD]"
UNK = "[UNK]"
# for BERT
CLS = '[CLS]'
SEP = '[SEP]'
"""[CLS] 标志放在第一个句子的首位,经过 BERT 得到的的表征向量 C 可以用于后续的分类任务。
[SEP] 标志用于分开两个输入句子,例如输入句子 A 和 B,要在句子 A,B 后面增加 [SEP] 标志。
[UNK]标志指的是未知字符
[MASK] 标志用于遮盖句子中的一些单词,将单词用 [MASK] 遮盖之后,再利用 BERT 输出的 [MASK] 向量预测单词是什么
"""
# 34 event triggers
TRIGGERS = ['Business:Merge-Org',
'Business:Start-Org',
'Business:Declare-Bankruptcy',
'Business:End-Org',
'Justice:Pardon',
'Justice:Extradite',
'Justice:Execute',
'Justice:Fine',
'Justice:Trial-Hearing',
'Justice:Sentence',
'Justice:Appeal',
'Justice:Convict',
'Justice:Sue',
'Justice:Release-Parole',
'Justice:Arrest-Jail',
'Justice:Charge-Indict',
'Justice:Acquit',
'Conflict:Demonstrate',
'Conflict:Attack',
'Contact:Phone-Write',
'Contact:Meet',
'Personnel:Start-Position',
'Personnel:Elect',
'Personnel:End-Position',
'Personnel:Nominate',
'Transaction:Transfer-Ownership',
'Transaction:Transfer-Money',
'Life:Marry',
'Life:Divorce',
'Life:Be-Born',
'Life:Die',
'Life:Injure',
'Movement:Transport']
"""
28 argument roles
There are 35 roles in ACE2005 dataset, but the time-related 8 roles were replaced by 'Time' as the previous work (Yang et al., 2016).
['Time-At-End','Time-Before','Time-At-Beginning','Time-Ending', 'Time-Holds', 'Time-After','Time-Starting', 'Time-Within'] --> 'Time'.
"""
#argument是事件元素,事件元素是指事件的参与者
ARGUMENTS = ['Place',
'Crime',
'Prosecutor',
'Sentence',
'Org',
'Seller',
'Entity',
'Agent',
'Recipient',
'Target',
'Defendant',
'Plaintiff',
'Origin',
'Artifact',
'Giver',
'Position',
'Instrument',
'Money',
'Destination',
'Buyer',
'Beneficiary',
'Attacker',
'Adjudicator',
'Person',
'Victim',
'Price',
'Vehicle',
'Time']
# 54 entities
ENTITIES = ['VEH:Water',
'GPE:Nation',
'ORG:Commercial',
'GPE:State-or-Province',
'Contact-Info:E-Mail',
'Crime',
'ORG:Non-Governmental',
'Contact-Info:URL',
'Sentence',
'ORG:Religious',
'VEH:Underspecified',
'WEA:Projectile',
'FAC:Building-Grounds',
'PER:Group',
'WEA:Exploding',
'WEA:Biological',
'Contact-Info:Phone-Number',
'WEA:Chemical',
'LOC:Land-Region-Natural',
'WEA:Nuclear',
'LOC:Region-General',
'PER:Individual',
'WEA:Sharp',
'ORG:Sports',
'ORG:Government',
'ORG:Media',
'LOC:Address',
'WEA:Shooting',
'LOC:Water-Body',
'LOC:Boundary',
'GPE:Population-Center',
'GPE:Special',
'LOC:Celestial',
'FAC:Subarea-Facility',
'PER:Indeterminate',
'VEH:Subarea-Vehicle',
'WEA:Blunt',
'VEH:Land',
'TIM:time',
'Numeric:Money',
'FAC:Airport',
'GPE:GPE-Cluster',
'ORG:Educational',
'Job-Title',
'GPE:County-or-District',
'ORG:Entertainment',
'Numeric:Percent',
'LOC:Region-International',
'WEA:Underspecified',
'VEH:Air',
'FAC:Path',
'ORG:Medical-Science',
'FAC:Plant',
'GPE:Continent']
# 45 pos tags
POSTAGS = ['VBZ', 'NNS', 'JJR', 'VB', 'RBR',
'WP', 'NNP', 'RP', 'RBS', 'VBP',
'IN', 'UH', 'JJS', 'NNPS', 'PRP$',
'MD', 'DT', 'WP$', 'POS', 'LS',
'CC', 'VBN', 'EX', 'NN', 'VBG',
'SYM', 'FW', 'TO', 'JJ', 'VBD',
'WRB', 'CD', 'PDT', 'WDT', 'PRP',
'RB', ',', '``', "''", ':',
'.', '$', '#', '-LRB-', '-RRB-']