-
-
Notifications
You must be signed in to change notification settings - Fork 11
/
test.py
22 lines (18 loc) · 943 Bytes
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#from natas import ocr_builder
from mikatools import *
#from gensim.models import Word2Vec
#from gensim.models.keyedvectors import Word2VecKeyedVectors
import natas
#print(natas.is_correctly_spelled("cat"))
print(natas.normalize_words(["seacreat", "wiþe"], n_best=5, return_scores=True))
print(natas.ocr_correct_words(["paft", "friendlhip"], return_scores=True))
print(natas.normalize_words(["seacreat", "wiþe"], n_best=5))
print(natas.ocr_correct_words(["paft", "friendlhip"]))
#print(natas.is_correctly_spelled("ca7"))
#model = Word2Vec.load("/Users/mikahama/Downloads/models/model_fi_1820-1917.w2v")
#model = Word2VecKeyedVectors.load_word2vec_format("/mnt/c/Users/Mika/Downloads/enwiki_20180420_100d.txt")
#print("ok")
#seed_words = set(json_load("natas/wiktionary_lemmas.json"))
#print("ok")
#res = ocr_builder.extract_parallel(seed_words, model, dictionary=seed_words, lemmatize=False, use_freq=False)
#json_dump(res, "test.json")