-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathload_hop.py
98 lines (90 loc) · 2.72 KB
/
load_hop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import rdflib
import random
PREFIX="http://data.linkedmdb.org/"
def out_degree(url):
deg=0
if PREFIX in url:
g1=rdflib.Graph()
g1.parse(url)
# print(g1.)
actor_id=url.split("/")[-1]
for s,p,o in g1:
if PREFIX in p:
#print(s,p,o)
deg+=1
return deg
def weight(url):
g1=rdflib.Graph()
g1.parse(url)
# print(g1.)
weight_edge=0
if PREFIX in url:
g1=rdflib.Graph()
g1.parse(url)
# print(g1.)
actor_id=url.split("/")[-1]
for s,p,o in g1:
if PREFIX in o:
#print("Object:",o)
try:
weight_edge+=out_degree(o)
except:
pass
return weight_edge
def comp(s1):
return weight(s1)
def main():
base_url = 'http://data.linkedmdb.org/all/'
print(" ")
class_name = input("Enter Class Name: ")
class_name.lower().strip()
url = base_url+class_name
print("\n")
print("Extracting Data From "+str(url))
g=rdflib.Graph()
g.parse(url)
# print(g.all_nodes())
entities,actor_id = [],[]
for s,p,o in g:
# print(s,p,o)
if('label' in p):
#o = o.encode('utf-8')
# print(o)
try:
entity = str(o.split('(')[-2].strip(' '))
entities.append(entity)
actor_id.append(s.split("/")[-1])
except Exception as e:
print("Error:", e)
print(" ")
print("Total "+str(len(entities))+" entities found! Showing first "+str(min(10, len(entities)))+" entities")
print(" ")
print(actor_id[:10])
for i in range(min(len(entities), 10)):
print(str(i+1)+": ", entities[i])
print("\n")
print(base_url[:-4]+"data/"+class_name+"/"+actor_id[0])
uri_actor_id=[ base_url[:-4]+"data/"+class_name+"/"+actor_id[i] for i in range(len(actor_id))]
print(out_degree(base_url[:-4]+"data/"+class_name+"/"+actor_id[0]))
print("Weight of first one :",uri_actor_id[0],weight(uri_actor_id[0]))
import csv
uri_actor_id_dict={}
row = 1
temp_file=open("temp_csv.csv","a")
temp_csv = csv.writer(temp_file)
for i in uri_actor_id:
try:
uri_actor_id_dict[i]=weight(i)
temp_csv.writerow([row,i,uri_actor_id_dict[i]])
temp_file.flush()
print(row,i,uri_actor_id_dict[i])
row = row+1
except:
pass
#writing the dictionary as csv
w = csv.writer(open("actor_id_weight.csv", "w"))
for key, val in dict.items():
w.writerow([key, val])
#print("Weight of Highest one :",uri_actor_id[0],weight(uri_actor_id[0]))
if __name__ == '__main__':
main()