-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathIpadicFormatter.ts
111 lines (101 loc) · 2.81 KB
/
IpadicFormatter.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
"use strict";
import { ViterbiNodeType } from "../viterbi/ViterbiNode.js";
export interface IpadicFormatterToken {
/** 辞書内での単語ID */
word_id: number;
/** 単語タイプ(辞書に登録されている単語ならKNOWN, 未知語ならUNKNOWN) */
word_type: ViterbiNodeType;
/** 単語の開始位置 */
word_position: number;
/** 表層形 */
surface_form: string | Uint8Array;
/** 品詞 */
pos: string;
/** 品詞細分類1 */
pos_detail_1: string;
/** 品詞細分類2 */
pos_detail_2: string;
/** 品詞細分類3 */
pos_detail_3: string;
/** 活用型 */
conjugated_type: string;
/** 活用形 */
conjugated_form: string;
/** 基本形 */
basic_form: string;
/** 読み */
reading?: string;
/** 発音 */
pronunciation?: string;
}
class IpadicFormatter {
/**
* Mappings between IPADIC dictionary features and tokenized results
* @constructor
*/
constructor() {}
formatEntry(
word_id: number,
position: number,
type: ViterbiNodeType,
features: string[]
): IpadicFormatterToken {
let token: IpadicFormatterToken = {
word_id,
word_type: type,
word_position: position,
surface_form: features[0],
pos: features[1],
pos_detail_1: features[2],
pos_detail_2: features[3],
pos_detail_3: features[4],
conjugated_type: features[5],
conjugated_form: features[6],
basic_form: features[7],
reading: features[8],
pronunciation: features[9],
};
return token;
}
formatUnknownEntry(
word_id: number,
position: number,
type: ViterbiNodeType,
features: string[],
surface_form: string | Uint8Array
): IpadicFormatterToken {
let token: IpadicFormatterToken = {
word_id,
word_type: type,
word_position: position,
surface_form: surface_form,
pos: features[1],
pos_detail_1: features[2],
pos_detail_2: features[3],
pos_detail_3: features[4],
conjugated_type: features[5],
conjugated_form: features[6],
basic_form: features[7],
// reading: features[8],
// pronunciation: features[9],
};
return token;
}
}
export default IpadicFormatter;