forked from wumb0/spotify2playmusic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspotify2playmusic.py
executable file
·318 lines (296 loc) · 12.3 KB
/
spotify2playmusic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#!/usr/bin/env python -u
#this tool looks up spotify playlists and clones them over to google play music
#with an acceptable degree of accuracy allowing for variations in song info
#this causes it to be pretty slow, but at least you don't have to do it by hand
#read more at the README!
import spotify
from gmusicapi import Mobileclient
from os import system
import threading
from sys import stdout
def levenshtein(a,b):
#from http://hetland.org/coding/python/levenshtein.py
#used to compare strings, similar strings return lower numbers
n, m = len(a), len(b)
if n > m:
a,b = b,a
n,m = m,n
current = range(n+1)
for i in range(1,m+1):
previous, current = current, [i]+[0]*n
for j in range(1,n+1):
add, delete = previous[j]+1, current[j-1]+1
change = previous[j-1]
if a[j-1] != b[i-1]:
change = change + 1
current[j] = min(add, delete, change)
return current[n]
#help handle spotify login
logged_in_event = threading.Event()
def logged_in_listener(session, error_type):
logged_in_event.set()
def get_password(prompt):
#from http://scrollingtext.org/hiding-keyboard-input-screen
#allows entry of password from the terminal without it printing to the screen
system("stty -echo")
password = raw_input(prompt)
system("stty echo")
return password
def login_gpm():
#takes credentials and checks user login of Google
username = raw_input("Enter your Google Play username: ")
password = get_password("Enter your Google Play password: ")
#file = open('play_creds', 'r')
#username = file.readline().rstrip('\n')
#password = file.readline().rstrip('\n')
#file.close()
gpm = Mobileclient()
if not gpm.login(username, password):
print("\nNot a valid login")
exit()
else:
print("\nLogin success!")
return gpm
def login_spotify(spot):
#takes credentials and checks user login of spotify
username = raw_input("Enter your Spotify username: ")
password = get_password("Enter your Spotify password: ")
#file = open('spotify_creds', 'r')
#username = file.readline().rstrip('\n')
#password = file.readline().rstrip('\n')
#file.close()
spot.login(username, password)
spot.on(spotify.SessionEvent.LOGGED_IN, logged_in_listener)
while spot.connection.state != spotify.ConnectionState.LOGGED_IN:
spot.process_events()
if (spot.user is None):
print("\nNot a valid login")
exit()
else:
print("\nLogin success!")
return spot
def get_playlist(spot):
#gets a list of all spotify playlists and allows the user to choose which one they want to clone
playlists = spot.playlist_container
playlists.load()
spotify_playlists = []
count = 0
for playlist in playlists:
if type(playlist) is spotify.playlist.Playlist:
spotify_playlists.append(playlist)
print(str(count+1) + ". " + playlist.load().name)
count += 1
option = -1
while (option < 0 or option > (count+1)):
option = input("Playlist to duplicate (enter a number, 0 to exit): ")
if (option < 0 or option > (count+1)):
print("Invalid option, " + option + " try again")
if option == 0:
print("Bye!")
spot.logout()
exit()
return spotify_playlists[option-1]
def is_similar(title1, artist1, album1, title2, artist2, album2):
#the main comparison algorithm os the program
#makes the judgements on whether a song is a close enough match or not
#two thresholds explained later
threshold1 = 14
threshold2 = 9
#the next few statements attempt to remove extra stuff from songs
#"The City (Extended Mix)" may not match, so we have an alternate for each field that removes
#things in brackets, parentheses, and things after dashes
try:
title1alt = title1.split("(")[0].split("[")[0]
title2alt = title2.split("(")[0].split("[")[0]
except:
#if the song does not have a title then just return false
return False
try:
album2alt = album2.split("(")[0].split("[")[0]
album1alt = album1.split("(")[0].split("[")[0]
except:
#if one of the parts of the comparison is blank then just make everything the same
album1, album2, album1alt, album2alt = "none", "none", "none", "none"
try:
artist1alt = artist1.split("(")[0].split("[")[0]
artist2alt = artist2.split("(")[0].split("[")[0]
except:
#same as above
artist1, artist2, artist1alt, artist2alt = "none", "none", "none", "none"
#the next 16 statements modify the threshold according to the name length
#this is to allow songs with shorter details to have less variability and
#songs with longer details to have higher variablility
#will probably replace with ratio based on length so the threshold
#is dynamically modified
if (len(title1) < 8 or len(title1alt) < 8):
threshold1 -= 1
threshold2 -= 1
if len(title2) < 8 or len(title2alt) < 8:
threshold1 -= 1
threshold2 -= 1
if (len(title1) < 5 or len(title1alt) < 5):
threshold1 -= 1
if len(title2) < 5 or len(title2alt) < 5:
threshold1 -= 1
if len(artist1) < 12 or len(artist1alt) < 12:
threshold1 -= 3
threshold2 -= 1
if len(artist2) < 12 or len(artist2alt) < 12:
threshold1 -= 3
threshold2 -= 1
if len(title1) < 13 or len(title1alt) < 13:
threshold1 -= 2
threshold2 -= 1
if len(title2) < 13 or len(title2alt) < 13:
threshold1 -= 2
threshold2 -= 1
if len(album1) < 12 or len(album1alt) < 12:
threshold1 -= 3
threshold2 -= 1
if len(album2) < 12 or len(album2alt) < 12:
threshold1 -= 3
threshold2 -= 1
if len(artist1) > 25 or len(artist1alt) > 25:
threshold1 += 2
threshold2 += 1
if len(artist2) > 25 or len(artist2alt) > 25:
threshold1 += 2
threshold2 += 1
if len(title1) > 25 or len(title1alt) > 25:
threshold1 += 3
threshold2 += 1
if len(title2) > 25 or len(title2alt) > 25:
threshold1 += 3
threshold2 += 1
if len(album1) > 25 or len(album1alt) > 25:
threshold1 += 2
threshold2 += 1
if len(album2) > 25 or len(album2alt) > 25:
threshold1 += 2
threshold2 += 1
#this is the main comparison logic
#first compares the two for an exact match based off of title and artist
#next it uses the alternate titles and artists in a direct comparison
#third it takes the sum of the levenstein calculations for the differences in the
#title, artist, and album and compares it to the first threshold
#finally levenstien calculations are done for the alternatives and compared against
#the thresholds individually
if title1 is title2 and artist1 is artist2 and not artist2 is "none":
return True
elif title1alt is title2alt and artist1alt is artist2alt and not artist2alt is "none":
return True
elif (levenshtein(title1,title2) + levenshtein(artist1, artist2) + levenshtein(album1, album2) < threshold1):
return True
elif (levenshtein(title1alt,title2alt) < threshold2 and levenshtein(artist1alt, artist2alt) < threshold2 and levenshtein(album1alt, album2alt) < threshold2):
return True
else:
#if there is no match then the function returns false
return False
def main():
#setup spotify config and initialize session
config = spotify.Config()
config.user_agent = 'spotify2playmusic'
config.tracefile = b'/tmp/libspotify-trace.log'
spot = spotify.Session(config=config)
#log in and get playlist choice for cloning
gpm = login_gpm()
spot = login_spotify(spot)
playlist = get_playlist(spot)
#get google play playlist names and compare them to the chosen list
#if there is a playlist with the same name already in google play then say so and exit
gpm_playlist_names = []
for gpm_playlist in gpm.get_all_playlists():
gpm_playlist_names.append(gpm_playlist['name'])
if playlist.load().name in gpm_playlist_names:
print("Playlist already exists in Google Play... exiting")
exit()
#there will be two lists: one of tracks to be added and another
#of songs that could not be matched
tracks_to_match = []
to_add_list = []
unmatched_tracks = []
#searches user uploaded music first
print("Searching in uploaded music first...")
gpm_local = gpm.get_all_songs()
print("Database loaded!")
progress = 0
for track in playlist.load().tracks:
progress += 1
track = track.load()
title = track.name
artist = track.artists[0].load().name
album = track.album.load().name
#for progress
stdout.flush()
stdout.write("%i%% - %s - %s \r" % (((float(progress) / float(len(playlist.load().tracks))) * 100.0), title, artist))
unmatched = True
top5 = {}
to_push_id = ""
#checks against every song in the google play library and finds the
#top 5 hits, then the one with the lowest levenshtein number is chosen
for i in gpm_local:
if is_similar(title, artist, album, i['title'], i['artist'], i['album']):
print(" - Found a match in uploaded library: " + i['title'] + " - " + i['artist'] + " ")
unmatched = False
top5[i['title']] = i['id']
to_push_id = i['id']
if len(top5) == 5:
break
if len(top5) > 1:
lowest_score = 999999
winning_track = ""
for top in top5.keys():
val = levenshtein(title, top)
if val < lowest_score:
lowest_score = val
to_push_id = top5[top]
winning_track = top
if winning_track is not "":
print("Going with the closest match: " + winning_track)
if unmatched:
tracks_to_match.append(track)
if to_push_id is not "":
to_add_list.append(to_push_id)
#all remaining unmatched tracks are searched in All Access
print("Now searching All Access for the remaining tracks...")
progress = 0
for track in tracks_to_match:
progress +=1
track = track.load()
title = track.name.lower()
artist = track.artists[0].load().name.lower()
album = track.album.load().name.lower()
#only searches for title and artist because google play does not return
#the correct results when album is included
query = title + " " + artist
#for progress
stdout.flush()
stdout.write("%i%% - %s - %s \r" % (((float(progress) / float(len(playlist.load().tracks))) * 100.0), title, artist))
unmatched = True
#does two comparisons: one with the album and one where they both have
#the same album, this will increase the chance of matching
#I trust google's search function :)
try:
result = gpm.search_all_access(query, max_results = 10)['song_hits'][0]['track']
except:
print "Something went wrong while trying to search All Access."
if is_similar(title, artist, album, result['title'].lower(), result['artist'].lower(), result['album']):
print(" - Found a match in All Access: " + result['title'] + " - " + result['artist'] + " ")
unmatched = False
to_add_list.append(result['nid'])
elif is_similar(title, artist, "analbumthebest", result['title'].lower(), result['artist'].lower(), "analbumthebest"):
print(" - Found a match in All Access: " + result['title'] + " - " + result['artist'] + " ")
unmatched = False
to_add_list.append(result['nid'])
else:
unmatched_tracks.append(track)
#add the new playlist and all of the identified songs songs to it
gpm_new_playlist_id = gpm.create_playlist(playlist.load().name)
print("Adding matched songs to playlist\n")
for to_add in to_add_list:
gpm.add_songs_to_playlist(gpm_new_playlist_id, to_add)
#show the unmatched tracks
print("Unmatched tracks:")
for unmatched_track in unmatched_tracks:
print(unmatched_track.load().name + " - " + unmatched_track.load().artists[0].load().name)
main()