From f6723a12a4820b8144e671d7809bad84bd18199b Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Thu, 12 Sep 2024 11:21:03 +0330 Subject: [PATCH 1/5] feat: limited graph saving just for the latest date! --- tc_analyzer_lib/tc_analyzer.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tc_analyzer_lib/tc_analyzer.py b/tc_analyzer_lib/tc_analyzer.py index ebb0732..9cc945c 100644 --- a/tc_analyzer_lib/tc_analyzer.py +++ b/tc_analyzer_lib/tc_analyzer.py @@ -112,6 +112,12 @@ async def run_once(self): member_acitivities_networkx_data, ) = memberactivity_analysis.analysis_member_activity(from_start=False) + # just getting the latest networkx object (latest graph) + latest_date = max(member_acitivities_networkx_data.keys()) + member_acitivities_networkx_data = { + latest_date: member_acitivities_networkx_data[latest_date] + } + analytics_data = {} # storing whole data into a dictinoary analytics_data["heatmaps"] = None @@ -193,6 +199,12 @@ async def recompute(self): member_acitivities_networkx_data, ) = memberactivity_analysis.analysis_member_activity(from_start=True) + # just getting the latest networkx object (latest graph) + latest_date = max(member_acitivities_networkx_data.keys()) + member_acitivities_networkx_data = { + latest_date: member_acitivities_networkx_data[latest_date] + } + # storing whole data into a dictinoary analytics_data = {} # storing whole data into a dictinoary From a3e8c8fd81b1d39cce7d66e8d97c3a48501413e4 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Thu, 12 Sep 2024 13:43:43 +0330 Subject: [PATCH 2/5] fix: depricated graph warnings! the depricated warnings was for the cases that YIELD was not used in gds.graph.drop --- .../algorithms/neo4j_analysis/analyzer_node_stats.py | 2 +- .../algorithms/neo4j_analysis/closeness_centrality.py | 2 +- .../algorithms/neo4j_analysis/local_clustering_coefficient.py | 2 +- tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tc_analyzer_lib/algorithms/neo4j_analysis/analyzer_node_stats.py b/tc_analyzer_lib/algorithms/neo4j_analysis/analyzer_node_stats.py index ff6cc5a..cfdfcac 100644 --- a/tc_analyzer_lib/algorithms/neo4j_analysis/analyzer_node_stats.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/analyzer_node_stats.py @@ -121,7 +121,7 @@ def compute_node_stats_wrapper(self, date: float): self.save_properties_db(df, date) _ = self.gds.run_cypher( - "CALL gds.graph.drop($graph_name)", + "CALL gds.graph.drop($graph_name) YIELD graphName", { "graph_name": graph_name, }, diff --git a/tc_analyzer_lib/algorithms/neo4j_analysis/closeness_centrality.py b/tc_analyzer_lib/algorithms/neo4j_analysis/closeness_centrality.py index 8640e51..0371c7b 100644 --- a/tc_analyzer_lib/algorithms/neo4j_analysis/closeness_centrality.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/closeness_centrality.py @@ -86,7 +86,7 @@ def closeness_computation_wrapper(self, date: float) -> None: # dropping the computed date _ = self.neo4j_ops.gds.run_cypher( """ - CALL gds.graph.drop($graph_projected_name) + CALL gds.graph.drop($graph_projected_name) YIELD graphName """, { "graph_projected_name": graph_projected_name, diff --git a/tc_analyzer_lib/algorithms/neo4j_analysis/local_clustering_coefficient.py b/tc_analyzer_lib/algorithms/neo4j_analysis/local_clustering_coefficient.py index 2323851..43c46f1 100644 --- a/tc_analyzer_lib/algorithms/neo4j_analysis/local_clustering_coefficient.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/local_clustering_coefficient.py @@ -81,7 +81,7 @@ def local_clustering_computation_wrapper(self, date: float) -> None: # dropping the computed date _ = self.gds.run_cypher( """ - CALL gds.graph.drop($graph_projected_name) + CALL gds.graph.drop($graph_projected_name) YIELD graphName """, { "graph_projected_name": graph_projected_name, diff --git a/tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py b/tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py index 8a00ddf..a928c50 100644 --- a/tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py @@ -76,7 +76,7 @@ def louvain_computation_wrapper(self, date: float) -> None: # dropping the computed date _ = self.neo4j_ops.gds.run_cypher( """ - CALL gds.graph.drop($graph_projected_name) + CALL gds.graph.drop($graph_projected_name) YIELD graphName """, { "graph_projected_name": graph_projected_name, From c147fbef31be0d12e6194b29836a3b0dba6ac501 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Thu, 12 Sep 2024 13:48:55 +0330 Subject: [PATCH 3/5] feat: update test cases for just saving the recent graph! --- ...est_generated_graph_period_1_year_run_once.py | 16 +++++++--------- .../test_generated_graph_period_1year.py | 10 +++++----- .../test_generated_graph_period_35_days.py | 10 +++++----- ...st_generated_graph_period_35_days_run_once.py | 10 +++++----- .../test_generated_graph_period_3_months.py | 10 +++++----- ...t_generated_graph_period_3_months_run_once.py | 13 +++++-------- .../test_generated_graph_period_6_months.py | 10 +++++----- ...t_generated_graph_period_6_months_run_once.py | 16 ++++++++-------- 8 files changed, 45 insertions(+), 50 deletions(-) diff --git a/tests/integration/test_generated_graph_period_1_year_run_once.py b/tests/integration/test_generated_graph_period_1_year_run_once.py index da41381..530a747 100644 --- a/tests/integration/test_generated_graph_period_1_year_run_once.py +++ b/tests/integration/test_generated_graph_period_1_year_run_once.py @@ -116,6 +116,7 @@ async def test_networkgraph_one_year_period_run_once_available_analytics(self): rawinfo_samples ) + # we've only saved the latest date await analyzer.run_once() graph_schema = analyzer.graph_schema @@ -130,19 +131,16 @@ async def test_networkgraph_one_year_period_run_once_available_analytics(self): ) dates = results.values.squeeze() - print("dates[:2]: ", dates[:2]) - print("dates[-2:]: ", dates[-2:]) - - # our analysis started from 4 days ago - start_analytics_date = datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc - ) - timedelta(days=4) + # # our analysis started from 4 days ago + # start_analytics_date = datetime.now().replace( + # hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + # ) - timedelta(days=4) end_analytics_date = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc ) - timedelta(days=1) - assert dates[-1] == start_analytics_date.timestamp() * 1000 - assert dates[0] == end_analytics_date.timestamp() * 1000 + # assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates == end_analytics_date.timestamp() * 1000 # results = neo4j_ops.gds.run_cypher( # f""" diff --git a/tests/integration/test_generated_graph_period_1year.py b/tests/integration/test_generated_graph_period_1year.py index 182a145..02ff29c 100644 --- a/tests/integration/test_generated_graph_period_1year.py +++ b/tests/integration/test_generated_graph_period_1year.py @@ -133,15 +133,15 @@ async def test_networkgraph_one_year_period_recompute_available_analytics(self): print(dates) - start_analytics_date = datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc - ) - timedelta(days=354) + # start_analytics_date = datetime.now().replace( + # hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + # ) - timedelta(days=354) end_analytics_date = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc ) - timedelta(days=1) - assert dates[-1] == start_analytics_date.timestamp() * 1000 - assert dates[0] == end_analytics_date.timestamp() * 1000 + # assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates == end_analytics_date.timestamp() * 1000 # results = neo4j_ops.gds.run_cypher( # f""" diff --git a/tests/integration/test_generated_graph_period_35_days.py b/tests/integration/test_generated_graph_period_35_days.py index dd665f7..5decee9 100644 --- a/tests/integration/test_generated_graph_period_35_days.py +++ b/tests/integration/test_generated_graph_period_35_days.py @@ -133,15 +133,15 @@ async def test_networkgraph_35_days_period_recompute_available_analytics(self): print(dates) - start_analytics_date = datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc - ) - timedelta(days=29) + # start_analytics_date = datetime.now().replace( + # hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + # ) - timedelta(days=29) end_analytics_date = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc ) - timedelta(days=1) - assert dates[-1] == start_analytics_date.timestamp() * 1000 - assert dates[0] == end_analytics_date.timestamp() * 1000 + # assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates == end_analytics_date.timestamp() * 1000 # results = neo4j_ops.gds.run_cypher( # f""" diff --git a/tests/integration/test_generated_graph_period_35_days_run_once.py b/tests/integration/test_generated_graph_period_35_days_run_once.py index c5d982c..6ebf0d9 100644 --- a/tests/integration/test_generated_graph_period_35_days_run_once.py +++ b/tests/integration/test_generated_graph_period_35_days_run_once.py @@ -134,15 +134,15 @@ async def test_networkgraph_35_days_period_run_once_available_analytics(self): print(dates) # we do run the analytics for 4 days ago - start_analytics_date = datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc - ) - timedelta(days=4) + # start_analytics_date = datetime.now().replace( + # hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + # ) - timedelta(days=4) end_analytics_date = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc ) - timedelta(days=1) - assert dates[-1] == start_analytics_date.timestamp() * 1000 - assert dates[0] == end_analytics_date.timestamp() * 1000 + # assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates == end_analytics_date.timestamp() * 1000 # results = neo4j_ops.gds.run_cypher( # f""" diff --git a/tests/integration/test_generated_graph_period_3_months.py b/tests/integration/test_generated_graph_period_3_months.py index fe68ac3..bafc516 100644 --- a/tests/integration/test_generated_graph_period_3_months.py +++ b/tests/integration/test_generated_graph_period_3_months.py @@ -132,15 +132,15 @@ async def test_networkgraph_three_months_period_recompute_available_analytics(se print(dates) - start_analytics_date = datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc - ) - timedelta(days=84) + # start_analytics_date = datetime.now().replace( + # hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + # ) - timedelta(days=84) end_analytics_date = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc ) - timedelta(days=1) - assert dates[-1] == start_analytics_date.timestamp() * 1000 - assert dates[0] == end_analytics_date.timestamp() * 1000 + # assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates == end_analytics_date.timestamp() * 1000 # results = neo4j_ops.gds.run_cypher( # f""" diff --git a/tests/integration/test_generated_graph_period_3_months_run_once.py b/tests/integration/test_generated_graph_period_3_months_run_once.py index 5bbe50c..fed0434 100644 --- a/tests/integration/test_generated_graph_period_3_months_run_once.py +++ b/tests/integration/test_generated_graph_period_3_months_run_once.py @@ -131,18 +131,15 @@ async def test_networkgraph_three_months_period_run_once_available_analytics(sel ) dates = results.values.squeeze() - print("dates[:2]: ", dates[:2]) - print("dates[-2:]: ", dates[-2:]) - - start_analytics_date = datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc - ) - timedelta(days=4) + # start_analytics_date = datetime.now().replace( + # hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + # ) - timedelta(days=4) end_analytics_date = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc ) - timedelta(days=1) - assert dates[-1] == start_analytics_date.timestamp() * 1000 - assert dates[0] == end_analytics_date.timestamp() * 1000 + # assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates == end_analytics_date.timestamp() * 1000 # results = neo4j_ops.gds.run_cypher( # f""" diff --git a/tests/integration/test_generated_graph_period_6_months.py b/tests/integration/test_generated_graph_period_6_months.py index 323fef3..fc32bdc 100644 --- a/tests/integration/test_generated_graph_period_6_months.py +++ b/tests/integration/test_generated_graph_period_6_months.py @@ -133,15 +133,15 @@ async def test_networkgraph_six_months_period_recompute_available_analytics(self print(dates) - start_analytics_date = datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc - ) - timedelta(days=174) + # start_analytics_date = datetime.now().replace( + # hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + # ) - timedelta(days=174) end_analytics_date = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc ) - timedelta(days=1) - assert dates[-1] == start_analytics_date.timestamp() * 1000 - assert dates[0] == end_analytics_date.timestamp() * 1000 + # assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates == end_analytics_date.timestamp() * 1000 # for now we've dropped the support for community node creation # was not required diff --git a/tests/integration/test_generated_graph_period_6_months_run_once.py b/tests/integration/test_generated_graph_period_6_months_run_once.py index fb39536..c6e2968 100644 --- a/tests/integration/test_generated_graph_period_6_months_run_once.py +++ b/tests/integration/test_generated_graph_period_6_months_run_once.py @@ -132,19 +132,19 @@ async def test_networkgraph_six_months_period_run_once_available_analytics(self) ) dates = results.values.squeeze() - print("dates[:2]: ", dates[:2]) - print("dates[-2:]: ", dates[-2:]) + # print("dates[:2]: ", dates[:2]) + # print("dates[-2:]: ", dates[-2:]) - # we do analyzed from 4 days ago - start_analytics_date = datetime.now().replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc - ) - timedelta(days=4) + # # we do analyzed from 4 days ago + # start_analytics_date = datetime.now().replace( + # hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + # ) - timedelta(days=4) end_analytics_date = datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc ) - timedelta(days=1) - assert dates[-1] == start_analytics_date.timestamp() * 1000 - assert dates[0] == end_analytics_date.timestamp() * 1000 + # assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates == end_analytics_date.timestamp() * 1000 # connection to community is deleted for now # results = neo4j_ops.gds.run_cypher( From 324c4ae2a73d4da9ede4b220724365c6f8ea6b15 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Thu, 12 Sep 2024 14:04:49 +0330 Subject: [PATCH 4/5] feat: code cleanup! remove duplicate codes. --- tc_analyzer_lib/tc_analyzer.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tc_analyzer_lib/tc_analyzer.py b/tc_analyzer_lib/tc_analyzer.py index 9cc945c..2a11beb 100644 --- a/tc_analyzer_lib/tc_analyzer.py +++ b/tc_analyzer_lib/tc_analyzer.py @@ -112,11 +112,9 @@ async def run_once(self): member_acitivities_networkx_data, ) = memberactivity_analysis.analysis_member_activity(from_start=False) - # just getting the latest networkx object (latest graph) - latest_date = max(member_acitivities_networkx_data.keys()) - member_acitivities_networkx_data = { - latest_date: member_acitivities_networkx_data[latest_date] - } + member_acitivities_networkx_data = self.get_latest_networkx_graph( + member_acitivities_networkx_data + ) analytics_data = {} # storing whole data into a dictinoary @@ -199,11 +197,9 @@ async def recompute(self): member_acitivities_networkx_data, ) = memberactivity_analysis.analysis_member_activity(from_start=True) - # just getting the latest networkx object (latest graph) - latest_date = max(member_acitivities_networkx_data.keys()) - member_acitivities_networkx_data = { - latest_date: member_acitivities_networkx_data[latest_date] - } + member_acitivities_networkx_data = self.get_latest_networkx_graph( + member_acitivities_networkx_data + ) # storing whole data into a dictinoary analytics_data = {} @@ -235,3 +231,10 @@ def check_platform(self): raise ValueError( f"Platform with platform_id: {self.platform_id} doesn't exist!" ) + + def get_latest_networkx_graph(self, member_acitivities_networkx_data: dict): + """ + just getting the latest networkx object (latest graph) + """ + latest_date = max(member_acitivities_networkx_data.keys()) + return {latest_date: member_acitivities_networkx_data[latest_date]} From 7a34956dc09a40e2f276835154502b26bf20dc4f Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Thu, 12 Sep 2024 14:18:16 +0330 Subject: [PATCH 5/5] feat: bump lib version! --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index af49967..4c12d39 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="tc-analyzer-lib", - version="1.4.6", + version="1.4.7", author="Mohammad Amin Dadgar, TogetherCrew", maintainer="Mohammad Amin Dadgar", maintainer_email="dadgaramin96@gmail.com",