1
- //! b-p-e for Byte Pair Encoding
1
+ //! b-p-e for Byte Pair Encoding
2
2
3
3
mod algorithm;
4
4
@@ -283,8 +283,8 @@ mod bpe_tests {
283
283
284
284
let encoded: Vec < _ > = bpe. encode ( "abc" ) . into_iter ( ) . collect ( ) ;
285
285
assert_eq ! ( encoded. len( ) , 2 ) ; // Should merge "ab" and leave "c"
286
- assert_eq ! ( encoded[ 0 ] , 3 ) ; // Assuming "ab" is assigned token ID 3
287
- assert_eq ! ( encoded[ 1 ] , 2 ) ; // Assuming "c" is assigned token ID 2
286
+ assert_eq ! ( encoded[ 0 ] , 3 ) ; // Assuming "ab" is assigned token ID 3
287
+ assert_eq ! ( encoded[ 1 ] , 2 ) ; // Assuming "c" is assigned token ID 2
288
288
}
289
289
290
290
#[ test]
@@ -307,7 +307,10 @@ mod bpe_tests {
307
307
308
308
let text = "abcbc" ;
309
309
let encoded: Vec < _ > = bpe. encode ( text) . into_iter ( ) . collect ( ) ;
310
- let decoded: Vec < u8 > = encoded. iter ( ) . flat_map ( |& t| bpe. decode ( t) . iter ( ) . copied ( ) ) . collect ( ) ;
310
+ let decoded: Vec < u8 > = encoded
311
+ . iter ( )
312
+ . flat_map ( |& t| bpe. decode ( t) . iter ( ) . copied ( ) )
313
+ . collect ( ) ;
311
314
assert_eq ! ( String :: from_utf8( decoded) . unwrap( ) , text) ;
312
315
}
313
316
@@ -333,13 +336,22 @@ mod bpe_tests {
333
336
println ! ( "Inaccessible tokens: {:?}" , inaccessible) ;
334
337
335
338
// 'd' is a single character, so it should be accessible
336
- assert ! ( !inaccessible. contains_key( "d" ) , "Token 'd' should be accessible" ) ;
339
+ assert ! (
340
+ !inaccessible. contains_key( "d" ) ,
341
+ "Token 'd' should be accessible"
342
+ ) ;
337
343
338
344
// 'bcd' cannot be formed by merging other tokens, so it should be inaccessible
339
- assert ! ( inaccessible. contains_key( "bcd" ) , "Token 'bcd' should be inaccessible" ) ;
345
+ assert ! (
346
+ inaccessible. contains_key( "bcd" ) ,
347
+ "Token 'bcd' should be inaccessible"
348
+ ) ;
340
349
341
350
// 'ab' can be formed by merging 'a' and 'b', so it should be accessible
342
- assert ! ( !inaccessible. contains_key( "ab" ) , "Token 'ab' should be accessible" ) ;
351
+ assert ! (
352
+ !inaccessible. contains_key( "ab" ) ,
353
+ "Token 'ab' should be accessible"
354
+ ) ;
343
355
}
344
356
345
357
#[ test]
0 commit comments