-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmarkov-chain-model.py
83 lines (63 loc) · 2.24 KB
/
markov-chain-model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# coding: utf-8
# Hans Kamin
# Spring 2017
# Bigram Markov Chain Model
def train_markov_chain(lyrics):
"""
Args:
- lyrics: a list of strings, where each string represents
the lyrics of one song by an artist.
Returns:
A dict that maps a tuple of 2 words ("bigram") to a list of
words that follow that bigram, representing the Markov
chain trained on the lyrics.
"""
# Initialize the beginning of our chain.
chain = {
(None, "<START>"): []
}
for lyric in lyrics:
# Replace newline characters with our tag.
lyric_newlines = lyric.replace('\n', ' <N> ')
# Create a tuple representing the most recent (current) bigram.
last_2 = (None, "<START>")
for word in lyric_newlines.split():
# Add the word as one that follows the current bigram.
chain[last_2].append(word)
# Shift the current bigram to account for the newly added word.
last_2 = (last_2[1], word)
if last_2 not in chain:
chain[last_2] = []
chain[last_2].append("<END>")
return chain
# Load the pickled lyrics object that we created earlier.
import pickle
lyrics = pickle.load(open("lyrics.pkl", "rb"))
# Train a Markov Chain over all of Logic's lyrics.
chain = train_markov_chain(lyrics)
import random
def generate_new_lyrics(chain):
"""
Args:
- chain: a dict representing the Markov chain,
such as one generated by generate_new_lyrics()
Returns:
A string representing the randomly generated song.
"""
# a list for storing the generated words
words = []
# generate the first word
word = random.choice(chain[(None, "<START>")])
words.append(word)
# Begin with the first bigram in our chain.
last_2 = (None, "<START>")
while words[-1] != "<END>":
# Generate the next word.
word = random.choice(chain[last_2])
words.append(word)
# Shift the current bigram to account for the newly added word.
last_2 = (last_2[1], words[-1])
# Join the words together into a string with line breaks.
lyrics = " ".join(words[:-1])
return "\n".join(lyrics.split("<N>"))
print(generate_new_lyrics(chain))