Skip to content

Commit

Permalink
Merge pull request #28 from TogetherCrew/feat/26-limit-graph-analytics
Browse files Browse the repository at this point in the history
feat: limited graph saving just for the latest date!
  • Loading branch information
amindadgar authored Sep 12, 2024
2 parents 1353679 + 7a34956 commit ffbb18f
Show file tree
Hide file tree
Showing 14 changed files with 65 additions and 55 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="tc-analyzer-lib",
version="1.4.6",
version="1.4.7",
author="Mohammad Amin Dadgar, TogetherCrew",
maintainer="Mohammad Amin Dadgar",
maintainer_email="dadgaramin96@gmail.com",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def compute_node_stats_wrapper(self, date: float):

self.save_properties_db(df, date)
_ = self.gds.run_cypher(
"CALL gds.graph.drop($graph_name)",
"CALL gds.graph.drop($graph_name) YIELD graphName",
{
"graph_name": graph_name,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def closeness_computation_wrapper(self, date: float) -> None:
# dropping the computed date
_ = self.neo4j_ops.gds.run_cypher(
"""
CALL gds.graph.drop($graph_projected_name)
CALL gds.graph.drop($graph_projected_name) YIELD graphName
""",
{
"graph_projected_name": graph_projected_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def local_clustering_computation_wrapper(self, date: float) -> None:
# dropping the computed date
_ = self.gds.run_cypher(
"""
CALL gds.graph.drop($graph_projected_name)
CALL gds.graph.drop($graph_projected_name) YIELD graphName
""",
{
"graph_projected_name": graph_projected_name,
Expand Down
2 changes: 1 addition & 1 deletion tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def louvain_computation_wrapper(self, date: float) -> None:
# dropping the computed date
_ = self.neo4j_ops.gds.run_cypher(
"""
CALL gds.graph.drop($graph_projected_name)
CALL gds.graph.drop($graph_projected_name) YIELD graphName
""",
{
"graph_projected_name": graph_projected_name,
Expand Down
15 changes: 15 additions & 0 deletions tc_analyzer_lib/tc_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ async def run_once(self):
member_acitivities_networkx_data,
) = memberactivity_analysis.analysis_member_activity(from_start=False)

member_acitivities_networkx_data = self.get_latest_networkx_graph(
member_acitivities_networkx_data
)

analytics_data = {}
# storing whole data into a dictinoary
analytics_data["heatmaps"] = None
Expand Down Expand Up @@ -193,6 +197,10 @@ async def recompute(self):
member_acitivities_networkx_data,
) = memberactivity_analysis.analysis_member_activity(from_start=True)

member_acitivities_networkx_data = self.get_latest_networkx_graph(
member_acitivities_networkx_data
)

# storing whole data into a dictinoary
analytics_data = {}
# storing whole data into a dictinoary
Expand Down Expand Up @@ -223,3 +231,10 @@ def check_platform(self):
raise ValueError(
f"Platform with platform_id: {self.platform_id} doesn't exist!"
)

def get_latest_networkx_graph(self, member_acitivities_networkx_data: dict):
"""
just getting the latest networkx object (latest graph)
"""
latest_date = max(member_acitivities_networkx_data.keys())
return {latest_date: member_acitivities_networkx_data[latest_date]}
16 changes: 7 additions & 9 deletions tests/integration/test_generated_graph_period_1_year_run_once.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ async def test_networkgraph_one_year_period_run_once_available_analytics(self):
rawinfo_samples
)

# we've only saved the latest date
await analyzer.run_once()

graph_schema = analyzer.graph_schema
Expand All @@ -130,19 +131,16 @@ async def test_networkgraph_one_year_period_run_once_available_analytics(self):
)
dates = results.values.squeeze()

print("dates[:2]: ", dates[:2])
print("dates[-2:]: ", dates[-2:])

# our analysis started from 4 days ago
start_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=4)
# # our analysis started from 4 days ago
# start_analytics_date = datetime.now().replace(
# hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
# ) - timedelta(days=4)
end_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)

assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates[0] == end_analytics_date.timestamp() * 1000
# assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates == end_analytics_date.timestamp() * 1000

# results = neo4j_ops.gds.run_cypher(
# f"""
Expand Down
10 changes: 5 additions & 5 deletions tests/integration/test_generated_graph_period_1year.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,15 @@ async def test_networkgraph_one_year_period_recompute_available_analytics(self):

print(dates)

start_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=354)
# start_analytics_date = datetime.now().replace(
# hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
# ) - timedelta(days=354)
end_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)

assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates[0] == end_analytics_date.timestamp() * 1000
# assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates == end_analytics_date.timestamp() * 1000

# results = neo4j_ops.gds.run_cypher(
# f"""
Expand Down
10 changes: 5 additions & 5 deletions tests/integration/test_generated_graph_period_35_days.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,15 @@ async def test_networkgraph_35_days_period_recompute_available_analytics(self):

print(dates)

start_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=29)
# start_analytics_date = datetime.now().replace(
# hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
# ) - timedelta(days=29)
end_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)

assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates[0] == end_analytics_date.timestamp() * 1000
# assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates == end_analytics_date.timestamp() * 1000

# results = neo4j_ops.gds.run_cypher(
# f"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,15 @@ async def test_networkgraph_35_days_period_run_once_available_analytics(self):
print(dates)

# we do run the analytics for 4 days ago
start_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=4)
# start_analytics_date = datetime.now().replace(
# hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
# ) - timedelta(days=4)
end_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)

assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates[0] == end_analytics_date.timestamp() * 1000
# assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates == end_analytics_date.timestamp() * 1000

# results = neo4j_ops.gds.run_cypher(
# f"""
Expand Down
10 changes: 5 additions & 5 deletions tests/integration/test_generated_graph_period_3_months.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,15 @@ async def test_networkgraph_three_months_period_recompute_available_analytics(se

print(dates)

start_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=84)
# start_analytics_date = datetime.now().replace(
# hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
# ) - timedelta(days=84)
end_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)

assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates[0] == end_analytics_date.timestamp() * 1000
# assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates == end_analytics_date.timestamp() * 1000

# results = neo4j_ops.gds.run_cypher(
# f"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,15 @@ async def test_networkgraph_three_months_period_run_once_available_analytics(sel
)
dates = results.values.squeeze()

print("dates[:2]: ", dates[:2])
print("dates[-2:]: ", dates[-2:])

start_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=4)
# start_analytics_date = datetime.now().replace(
# hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
# ) - timedelta(days=4)
end_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)

assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates[0] == end_analytics_date.timestamp() * 1000
# assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates == end_analytics_date.timestamp() * 1000

# results = neo4j_ops.gds.run_cypher(
# f"""
Expand Down
10 changes: 5 additions & 5 deletions tests/integration/test_generated_graph_period_6_months.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,15 @@ async def test_networkgraph_six_months_period_recompute_available_analytics(self

print(dates)

start_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=174)
# start_analytics_date = datetime.now().replace(
# hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
# ) - timedelta(days=174)
end_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)

assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates[0] == end_analytics_date.timestamp() * 1000
# assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates == end_analytics_date.timestamp() * 1000

# for now we've dropped the support for community node creation
# was not required
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,19 +132,19 @@ async def test_networkgraph_six_months_period_run_once_available_analytics(self)
)
dates = results.values.squeeze()

print("dates[:2]: ", dates[:2])
print("dates[-2:]: ", dates[-2:])
# print("dates[:2]: ", dates[:2])
# print("dates[-2:]: ", dates[-2:])

# we do analyzed from 4 days ago
start_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=4)
# # we do analyzed from 4 days ago
# start_analytics_date = datetime.now().replace(
# hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
# ) - timedelta(days=4)
end_analytics_date = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
) - timedelta(days=1)

assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates[0] == end_analytics_date.timestamp() * 1000
# assert dates[-1] == start_analytics_date.timestamp() * 1000
assert dates == end_analytics_date.timestamp() * 1000

# connection to community is deleted for now
# results = neo4j_ops.gds.run_cypher(
Expand Down

0 comments on commit ffbb18f

Please sign in to comment.