-
Notifications
You must be signed in to change notification settings - Fork 1
/
QueneauNovel.py
102 lines (68 loc) · 2.27 KB
/
QueneauNovel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import json
import random
import re
import textwrap
import subprocess
import io
from olipy.queneau import Assembler, CompositeAssembler, WordAssembler
episode_names = [ "Telemachus",
"Nestor",
"Proteus",
"Calypso",
"LotusEaters",
"Hades",
"Aeolus",
"Lestrygonians",
"ScyllaCharybdis",
"WanderingRocks",
"Sirens",
"Cyclops",
"Nausicaa",
"OxenOfTheSun",
"Circe",
"Eumaeus",
"Ithaca",
"Penelope"]
data_files = []
for i,e in enumerate(episode_names):
data_files.append("data/%02d%s.dat"%(i+1,e.lower()))
money = []
for ii,data_file in enumerate(data_files):
money.append("\n\n")
money.append( " [ %02d %s ]"%(ii+1, episode_names[ii]) )
money.append("\n\n")
corpus = Assembler.loadlines(open(data_file),tokens_in='par')
no_punctuation_at_end = re.compile("[a-zA-Z0-9]$")
whitespace = re.compile("\s+")
starts_with_letter = re.compile("^[a-zA-Z0-9].")
contains_verse = re.compile(" ")
how_many = int(round(random.random()*120))
for i in range(how_many):
sentences = []
for line, source in corpus.assemble("m.l", min_length=4):
#if no_punctuation_at_end.search(line):
# if(data_file is '18penelope'):
# line = line.strip()
# else:
# line = line.strip() + "."
if no_punctuation_at_end.search(line):
line = line + "."
if contains_verse.search(line):
verses = line.split(" ")
verses = [" "+v+"\n" for v in verses if v<>'']
sentences.append("\n")
for v in verses:
sentences.append(v)
elif not starts_with_letter.search(line):
sentences.append("\n")
sentences.append(line)
else:
sentences.append(line)
if(data_file is '18penelope'):
par = "".join(sentences)
else:
par = " ".join(sentences)
money.append(par)
money = [m.strip()+u"\n" for m in money]
with io.open('queneau_novel.txt','w',encoding='utf-8') as f:
f.writelines(money)