Skip to content

Commit 18799e4

Browse files
committedJul 31, 2018
example bug fixes
1 parent 5324519 commit 18799e4

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed
 

‎examples/03-en/08-topmine_ngrammer.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import os
77
import sys
8+
import codecs
89
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
910

1011
import pattern.text as text_module
@@ -16,8 +17,11 @@
1617

1718
texts = []
1819
for p in paths:
19-
with open(p, "rb") as f:
20-
texts.append(str(f.read()))
20+
with codecs.open(p, "rb", encoding='latin-1') as f:
21+
if sys.version_info[0] < 3:
22+
texts.append(f.read())
23+
else:
24+
texts.append(str(f.read()))
2125

2226
ng = text_module.train_topmine_ngrammer(texts, threshhold=1, regexp="[^a-zA-Z0-9]")
2327
ngrams = text_module.topmine_ngramms(texts[0], ng, threshhold=1)
@@ -33,11 +37,11 @@
3337
elif len(key.split("_")) == 3:
3438
trigrams.append(key)
3539

36-
print("Extracted {} bigrams (removed stopwords):\n".format(len(bigrams)))
40+
print("Extracted {} bigrams:\n".format(len(bigrams)))
3741
print(bigrams)
3842
print("\n")
3943

40-
print("Extracted {} trigrams (removed stopwords):\n".format(len(trigrams)))
44+
print("Extracted {} trigrams:\n".format(len(trigrams)))
4145
print(trigrams)
4246
print("\n")
4347

‎pattern/web/__init__.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2971,9 +2971,10 @@ def name(self):
29712971
# http://dbpedia.org/resource/Australia => Australia
29722972
s = re.sub("^http://dbpedia.org/resource/", "", self)
29732973
s = s.replace("_", " ")
2974-
s = encode_utf8(s)
2975-
s = decode_url(s)
2976-
s = decode_utf8(s)
2974+
if sys.version_info[0] < 3:
2975+
s = encode_utf8(s)
2976+
s = decode_url(s)
2977+
s = decode_utf8(s)
29772978
return s
29782979

29792980

0 commit comments

Comments
 (0)
Please sign in to comment.