-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
186 lines (150 loc) · 6.46 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import streamlit as st
import networkx as nx
import pandas as pd
from pyvis.network import Network
import streamlit.components.v1 as components
import json
import csv
import pickle
# Set page layout to wide mode for better use of screen space
st.set_page_config(layout="wide")
# Function to visualize the graph using PyVis
def visualize_graph(G):
net = Network(height="700px", width="100%", notebook=False, directed=True)
net.barnes_hut(gravity=-80000, central_gravity=0.3, spring_length=100, spring_strength=0.001, damping=0.9)
# Add nodes and edges
pagerank = nx.pagerank(G)
for node in G.nodes:
net.add_node(node, size=pagerank[node]*200 + 10)
for edge in G.edges:
net.add_edge(edge[0], edge[1])
# Set graph options
net.set_options("""
var options = {
"nodes": {
"scaling": {
"min": 10,
"max": 30
}
},
"edges": {
"color": {
"inherit": true
},
"smooth": {
"type": "continuous"
}
},
"physics": {
"barnesHut": {
"gravitationalConstant": -80000,
"springLength": 100,
"springConstant": 0.001,
"damping": 0.9
},
"minVelocity": 0.75
}
}
""")
path = "graph.html"
net.save_graph(path)
return path
def node_similarity(G, node1, node2):
# You can implement any similarity measure like Jaccard, Adamic-Adar, etc.
preds1 = set(G.predecessors(node1))
preds2 = set(G.predecessors(node2))
intersection = preds1 & preds2
union = preds1 | preds2
return len(intersection) / len(union) if len(union) > 0 else 0
# Function to detect the community for a user_id using Louvain
def get_community(G, node_id):
communities = nx.algorithms.community.louvain_communities(G)
for community in communities:
if node_id in community:
return list(community)
return []
def load_endorsement_data(csv_path: str):
"""Load endorsement data from CSV file."""
endorsement_data = {}
with open(csv_path, 'r') as csvfile:
reader = csv.reader(csvfile)
next(reader) # Skip header
for row in reader:
user_id, recommenders = row
endorsement_data[user_id] = json.loads(recommenders.replace("'", '"'))
return endorsement_data
# Load data
resumes_dir = "Dataset/Final_Resumes"
recommendations_base_dir = "Dataset/Final_Recommendation_Letters"
df = pd.read_csv("./out-data/output.csv")
endorsement_data = load_endorsement_data("./Dataset/Final_Persons_And_Recommenders.csv")
G2 = nx.DiGraph()
for i in range(1000):
G2.add_node(str(i))
for user_id, recommender_ids in endorsement_data.items():
for recommender_id in recommender_ids:
G2.add_edge(user_id, str(recommender_id))
# Load the directed graph from pickle file
with open("graph.pickle", "rb") as f:
G = pickle.load(f)
# First Section: Graph Visualization
st.subheader("Graph Visualization")
graph_html_path = visualize_graph(G)
components.html(open(graph_html_path, 'r').read(), height=700)
# Add spacing between sections
st.markdown("---")
# Second Section: User Information and Options
st.subheader("User Information")
# Select a node (user_id) from the dropdown
user_ids = df['ID'].astype(str).unique()
selected_user_id = st.selectbox("Select User ID", user_ids)
st.markdown("---")
# Subsection: Ranking Candidates by Influence
st.subheader(f"Ranking Candidates by Influence for User ID {selected_user_id}")
if st.button("Rank Candidates by Influence"):
with st.expander("Ranked Nodes"):
pagerank = nx.pagerank(G)
ranked_nodes = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)
st.write("Most Influential Users (Top 5):")
for rank, (node, score) in enumerate(ranked_nodes[:5], start=1):
st.write(f"{rank}. Node {node} - Score: {score:.4f}")
st.write(f'Number of neighbors: {len(list(G2.neighbors(node)))}\n')
st.markdown("---")
# Subsection: Comparing Similarity Between Users
st.subheader(f"Compare Similarity Between Users for User ID {selected_user_id}")
selected_comparison_node = st.selectbox("Compare with another User ID", user_ids)
if st.button(f"Calculate Similarity between {selected_user_id} and {selected_comparison_node}"):
with st.expander("Similarity Result"):
similarity_score = node_similarity(G, selected_user_id, selected_comparison_node)
st.write(f"Similarity Score between {selected_user_id} and {selected_comparison_node}: {similarity_score:.4f}")
st.write("Note: The similarity score is calculated based on the Jaccard Index of the neighbors of the two nodes.")
st.markdown("---")
# Subsection: Display Community Details
st.subheader(f"Community Details for User ID {selected_user_id}")
if st.button("Show Community Details"):
with st.expander("Community Information"):
community = get_community(G, selected_user_id)
st.write(f"Community Members for User {selected_user_id}: {community}")
st.markdown("---")
# Subsection: Neighbors and College Mates
st.subheader(f"Neighbors and College Mates for User ID {selected_user_id}")
neighbors = list(G2.neighbors(selected_user_id))
college_mates = [x for x in G.neighbors(selected_user_id) if x not in neighbors]
with st.expander("Neighbors and College Mates"):
st.write(f"Neighbors of User {selected_user_id}: {neighbors}")
st.write(f"College Mates of User {selected_user_id}: {college_mates}")
st.markdown("---")
# Subsection: Resume Information and Fraud Data
st.subheader(f"Resume and Fraud Information for User ID {selected_user_id}")
# Load fraud score and flag information from final.csv
fraud_data = pd.read_csv("final_df.csv") # Assuming columns are 'ID', 'Fraud Score', 'Flag'
with st.expander("Resume Details and Fraud Information"):
st.write(df['Resume Summary'].iloc[int(selected_user_id)])
st.write("**Recommendation Score**:", df['Resume Score based on Recommendations'].iloc[int(selected_user_id)])
st.write("**Suspicious Word Score**:", df['Suspicious Wording Score'].iloc[int(selected_user_id)])
st.write("**Recommendation Redundancy Score**:", df['Recommendation Redundancy Score'].iloc[int(selected_user_id)])
# Fetch fraud score and flag for the selected user
fraud_score = fraud_data['fraud_score'].iloc[int(selected_user_id)]
fraud_flag = fraud_data['fraud'].iloc[int(selected_user_id)]
st.write("**Fraud Score**:", fraud_score)
st.write("**Fraud Flag**:", "True" if fraud_flag else "False")