-
Notifications
You must be signed in to change notification settings - Fork 1
/
finnish-pos-tagger-model.props
38 lines (38 loc) · 1.54 KB
/
finnish-pos-tagger-model.props
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#this is an example .props file I tried to train the Stanford POS tagger on the FinnTreeBank data
## tagger training invoked at Sat Mar 04 00:00:21 EET 2017 with arguments:
model = finnish-pos-tagger-model
arch = words(-1,1),unicodeshapes(-1,1),order(2),suffix(4)
wordFunction =
#These word columns were actually for the output of another python script that stripped everything else and just made it 2 columns (or 3 to address the parser bug)
#for the raw FTB data, I tried to use the columns of 1 and 4
trainFile = format=TSV,wordColumn=0,tagColumn=1,transformed_train_1M.conllx
closedClassTags =
closedClassTagThreshold = 40
curWordMinFeatureThresh = 2
debug = false
debugPrefix =
tagSeparator = _
encoding = iso-8859-1
iterations = 100
lang =
learnClosedClassTags = false
minFeatureThresh = 2
openClassTags =
rareWordMinFeatureThresh = 10
rareWordThresh = 5
search = qn
sgml = false
sigmaSquared = 0.0
regL1 = 0.75
tagInside =
tokenize = false
tokenizerFactory =
tokenizerOptions = asciiQuotes
verbose = true
verboseResults = true
veryCommonWordThresh = 250
xmlInput = null
outputFile =
outputFormat = slashTags
outputFormatOptions =
nthreads = 4