@@ -109,6 +109,122 @@ func Test_DelimWhiteSpace(t *testing.T) {
109
109
}
110
110
}
111
111
112
+ func Test_DisableSkipWhiteSpace (t * testing.T ) {
113
+ testdata := []struct {
114
+ input string
115
+ expect []string
116
+ }{
117
+ {
118
+ input : "" ,
119
+ expect : []string {},
120
+ },
121
+ {
122
+ input : "あああ" ,
123
+ expect : []string {"あああ" },
124
+ },
125
+ {
126
+ input : " " ,
127
+ expect : []string {" " },
128
+ },
129
+ {
130
+ input : " x x x " ,
131
+ expect : []string {" " , "x " , "x " , "x " },
132
+ },
133
+ {
134
+ input : "こんにちはは「さようなら 」 \U0001f363 が好き" ,
135
+ expect : []string {"こんにちはは「さようなら 」 " , "\U0001f363 が好き" },
136
+ },
137
+ {
138
+ input : "こんにちは \t \t さようなら おはよう おやすみ " ,
139
+ expect : []string {"こんにちは " , "\t \t さようなら " , "おはよう " , "おやすみ " },
140
+ },
141
+ {
142
+ input : "すももも\t \n \n \t ももも\n \n もものうち\n \n " ,
143
+ expect : []string {"すももも\t \n \n " , " " , "\t ももも\n \n " , "もものうち\n \n " },
144
+ },
145
+ }
146
+
147
+ s := filter.SentenceSplitter {
148
+ Delim : []rune {' ' , ' ' }, // white spaces
149
+ Follower : []rune {'.' , '」' , '」' , '』' , ')' , ')' , '}' , '}' , '〉' , '》' },
150
+ SkipWhiteSpace : false ,
151
+ DoubleLineFeedSplit : true ,
152
+ MaxRuneLen : 256 ,
153
+ }
154
+ for _ , d := range testdata {
155
+ scanner := bufio .NewScanner (strings .NewReader (d .input ))
156
+ scanner .Split (s .ScanSentences )
157
+ r := make ([]string , 0 , len (d .expect ))
158
+ for scanner .Scan () {
159
+ r = append (r , scanner .Text ())
160
+ }
161
+ if ! reflect .DeepEqual (r , d .expect ) {
162
+ t .Errorf ("input %q, got %#v, expected %#v" , d .input , r , d .expect )
163
+ }
164
+ if got := strings .Join (r , "" ); got != d .input {
165
+ t .Errorf ("got len=%d %q, input len=%d %q" , len (got ), got , len (d .input ), d .input )
166
+ }
167
+ }
168
+ }
169
+
170
+ func Test_DisableSkipWhiteSpaceAndDoubleLineFeedSplit (t * testing.T ) {
171
+ testdata := []struct {
172
+ input string
173
+ expect []string
174
+ }{
175
+ {
176
+ input : "" ,
177
+ expect : []string {},
178
+ },
179
+ {
180
+ input : "あああ" ,
181
+ expect : []string {"あああ" },
182
+ },
183
+ {
184
+ input : " " ,
185
+ expect : []string {" " },
186
+ },
187
+ {
188
+ input : " x x x " ,
189
+ expect : []string {" " , "x " , "x " , "x " },
190
+ },
191
+ {
192
+ input : "こんにちはは「さようなら 」 \U0001f363 が好き" ,
193
+ expect : []string {"こんにちはは「さようなら 」 " , "\U0001f363 が好き" },
194
+ },
195
+ {
196
+ input : "こんにちは \t \t さようなら おはよう おやすみ " ,
197
+ expect : []string {"こんにちは " , "\t \t さようなら " , "おはよう " , "おやすみ " },
198
+ },
199
+ {
200
+ input : "すももも\t \n \n \t ももも\n \n もものうち\n \n " ,
201
+ expect : []string {"すももも\t \n \n " , "\t ももも\n \n もものうち\n \n " },
202
+ },
203
+ }
204
+
205
+ s := filter.SentenceSplitter {
206
+ Delim : []rune {' ' , ' ' }, // white spaces
207
+ Follower : []rune {'.' , '」' , '」' , '』' , ')' , ')' , '}' , '}' , '〉' , '》' },
208
+ SkipWhiteSpace : false ,
209
+ DoubleLineFeedSplit : false ,
210
+ MaxRuneLen : 256 ,
211
+ }
212
+ for _ , d := range testdata {
213
+ scanner := bufio .NewScanner (strings .NewReader (d .input ))
214
+ scanner .Split (s .ScanSentences )
215
+ r := make ([]string , 0 , len (d .expect ))
216
+ for scanner .Scan () {
217
+ r = append (r , scanner .Text ())
218
+ }
219
+ if ! reflect .DeepEqual (r , d .expect ) {
220
+ t .Errorf ("input %q, got %#v, expected %#v" , d .input , r , d .expect )
221
+ }
222
+ if got := strings .Join (r , "" ); got != d .input {
223
+ t .Errorf ("got len=%d %q, input len=%d %q" , len (got ), got , len (d .input ), d .input )
224
+ }
225
+ }
226
+ }
227
+
112
228
func Test_ScanSentences (t * testing.T ) {
113
229
testdata := []struct {
114
230
atEnd bool
0 commit comments