forked from yaph/james-bond-actors
-
Notifications
You must be signed in to change notification settings - Fork 0
/
actors.py
107 lines (87 loc) · 3.23 KB
/
actors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# -*- coding: utf-8 -*-
# Get actors of all movies in a series and create as gexf.
import requests, requests_cache, json, itertools
import networkx as nx
requests_cache.install_cache('freebase')
series = ['/en/james_bond_film_series',
'/en/the_three_mesquiteers',
'/en/carry_on_films',
'/en/the_pink_panther',
'/m/0hn_rv_', # Star Trek
'/en/star_wars',
'/m/01rb9m', # Nightmare on Elm Street
'/wikipedia/en_title/Tomie_$0028film_series$0029',
'/m/0g_sz63', # Jerry Cotton
'/m/0lmfcgx', # Tsuribaka Nisshi
'/m/02676m4', # Harry Potter
'/en/american_pie_film_series',
'/en/the_whistler_film_series',
'/wikipedia/en/The_Fast_and_the_Furious_$0028film_series$0029',
'/m/0j_l7cw', # Hellraiser
'/wikipedia/en_title/The_Texas_Chainsaw_Massacre_$0028franchise$0029',
'/en/x_men_film_series',
'/en/superman_film_series',
'/m/0gyjf4v', # Batman
'/m/0j7ylh0' # Emmanule
]
# exclude films generally not counted as part of the series
blacklist = ('/en/casino_royale_1967', '/en/casino_royale_1954', '/en/never_say_never_again')
query = {}
with open('actors.mql') as f:
query = json.load(f)
def dump_actors_to_gexf(id):
films = []
actormap = {}
edgemap = {}
query[0]['!pd:/film/film_series/films_in_series'][0]['id'] = id
r = requests.get('https://www.googleapis.com/freebase/v1/mqlread', params={'query': json.dumps(query)})
response = json.loads(r.text)
results = response['result']
if not results:
return
seriesname = results[0]['!pd:/film/film_series/films_in_series'][0]['name']
for r in results:
if r['id'] in blacklist: continue
actors = []
for s in r['starring']:
if s['actor'] is None: continue
aid = s['actor']['id']
alabel = s['actor']['name']
actors.append({
'id': aid,
'label': alabel
})
if aid not in actormap:
actormap[aid] = {'label': alabel, 'size': 0, 'films': []}
actormap[aid]['films'].append(r['name'])
films.append({
'id': r['id'],
'label': r['name'],
'actors': actors
})
actorids = list(actormap.keys())
for f in films:
comb = itertools.combinations(f['actors'], 2)
for c in comb:
e = tuple(sorted([c[0]['id'], c[1]['id']]))
edgemap[e] = edgemap.get(e, 0) + 1
G = nx.Graph()
for a in actormap:
attr = {'label': actormap[a]['label'],
'films': '|'.join(actormap[a]['films'])
}
attr.update(dict((f,True) for f in actormap[a]['films']))
G.add_node(a, attr)
G.node[a]['viz'] = {'size': len(actormap[a]['films'])}
for e in edgemap:
G.add_edge(e[0], e[1], {'weight': edgemap[e]})
outputfile = seriesname.replace(' ','')+'.gexf'
nx.write_gexf(G, outputfile, version='1.2draft')
return outputfile
def main():
for id in series:
print "Dumping ",id
filename = dump_actors_to_gexf(id)
print " dumped", filename
if __name__ == "__main__":
main()