@@ -78,15 +78,16 @@ def test_corpus_token_counts_split_on_newlines(self):
78
78
u"my" : 2 ,
79
79
u"pajamas" : 2 ,
80
80
}
81
- self .assertDictContainsSubset ( expected , token_counts )
81
+ self .assertEqual ( token_counts , { ** token_counts , ** expected } )
82
82
self .assertNotIn (u".\n \n " , token_counts )
83
83
self .assertNotIn (u"\n " , token_counts )
84
84
85
85
def test_corpus_token_counts_no_split_on_newlines (self ):
86
86
token_counts = tokenizer .corpus_token_counts (
87
87
self .corpus_path , corpus_max_lines = 0 , split_on_newlines = False )
88
88
89
- self .assertDictContainsSubset ({u".\n \n " : 2 , u"\n " : 3 }, token_counts )
89
+ expected_subset = {u".\n \n " : 2 , u"\n " : 3 }
90
+ self .assertEqual (token_counts , {** token_counts , ** expected_subset })
90
91
91
92
def test_corpus_token_counts_split_with_max_lines (self ):
92
93
token_counts = tokenizer .corpus_token_counts (
@@ -101,11 +102,12 @@ def test_corpus_token_counts_no_split_with_max_lines(self):
101
102
102
103
self .assertIn (u"slept" , token_counts )
103
104
self .assertNotIn (u"Mitch" , token_counts )
104
- self . assertDictContainsSubset ( {
105
+ expected_subset = {
105
106
u".\n \n " : 1 ,
106
107
u"\n " : 2 ,
107
108
u".\n " : 1
108
- }, token_counts )
109
+ }
110
+ self .assertEqual (token_counts , {** token_counts , ** expected_subset })
109
111
110
112
def test_vocab_token_counts (self ):
111
113
token_counts = tokenizer .vocab_token_counts (self .vocab_path , 0 )
0 commit comments