Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: computing neo4j analytics for just the only latest date! #34

Merged
merged 5 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,16 @@ def __init__(
self.projection_utils = ProjectionUtils(self.platform_id, self.graph_schema)

def compute_stats(self, from_start: bool) -> None:
"""
from_start is disabled. We would always compute just for the latest date
"""
amindadgar marked this conversation as resolved.
Show resolved Hide resolved
# possible dates to do the computations
possible_dates = self.projection_utils.get_dates()

# if we didn't want to compute from the day start
if not from_start:
computed_dates = self.get_computed_dates()
possible_dates = possible_dates - computed_dates
# if not from_start:
# computed_dates = self.get_computed_dates()
# possible_dates = possible_dates - computed_dates

for date in possible_dates:
try:
Expand Down
44 changes: 24 additions & 20 deletions tc_analyzer_lib/algorithms/neo4j_analysis/centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def compute_degree_centerality(
the computed_dates will be based on the
network decentrality metric computations

NOTE: `from_start` is disabled and we're always computing for the latest date available

Parameters:
------------
direction : str
Expand Down Expand Up @@ -68,30 +70,32 @@ def compute_degree_centerality(
node = self.graph_schema.user_label
weighted = True if "weighted" not in kwargs.keys() else kwargs["weighted"]
normalize = False if "normalize" not in kwargs.keys() else kwargs["normalize"]
preserve_parallel = (
True
if "preserve_parallel" not in kwargs.keys()
else kwargs["preserve_parallel"]
)
preserve_parallel = kwargs.get("preserve_parallel", True)

recompute_dates = None
if "recompute_dates" in kwargs:
recompute_dates = kwargs["recompute_dates"]
# recompute_dates = None
# if "recompute_dates" in kwargs:
# recompute_dates = kwargs["recompute_dates"]
Comment on lines +75 to +77
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commented-out code

The code for handling recompute_dates has been commented out. If this functionality is no longer needed, it's better to remove the code entirely rather than leaving it commented out. This helps maintain code cleanliness and readability.

If you think this functionality might be needed in the future, remember that version control systems can be used to retrieve old code if necessary.


if weighted and not preserve_parallel:
logging.warn(
logging.warning(
"""preserver_parallel=False with weighted=True
could produce wrong results!"""
)

interacted_with_label = self.graph_schema.interacted_with_rel
query = """
MATCH () -[r:INTERACTED_WITH {platformId: $platform_id}]-()
WITH max(r.date) as latest_date
"""

# determining one line of the query useing the direction variable
interaction = f"[r:{interacted_with_label} {{date: latest_date}}]"
if direction == "in_degree":
query = f"MATCH (a:{node})<-[r:{interacted_with_label}]-(b:{node})"
query += f"MATCH (a:{node})<-{interaction}-(b:{node})"
elif direction == "out_degree":
query = f"MATCH (a:{node})-[r:{interacted_with_label}]->(b:{node})"
query += f"MATCH (a:{node})-{interaction}->(b:{node})"
elif direction == "undirected":
query = f"MATCH (a:{node})-[r:{interacted_with_label}]-(b:{node})"
query += f"MATCH (a:{node})-{interaction}-(b:{node})"

results = self.neo4j_ops.gds.run_cypher(
f"""
Expand All @@ -107,14 +111,14 @@ def compute_degree_centerality(
)

dates_to_compute = set(results["date"].value_counts().index)
if not from_start:
projection_utils = ProjectionUtils(self.platform_id, self.graph_schema)

dates_to_compute = self._get_dates_to_compute(
projection_utils, dates_to_compute
)
if recompute_dates is not None:
dates_to_compute = dates_to_compute.union(recompute_dates)
# if not from_start:
# projection_utils = ProjectionUtils(self.platform_id, self.graph_schema)

# dates_to_compute = self._get_dates_to_compute(
# projection_utils, dates_to_compute
# )
# if recompute_dates is not None:
# dates_to_compute = dates_to_compute.union(recompute_dates)
amindadgar marked this conversation as resolved.
Show resolved Hide resolved

degree_centerality = self.count_degrees(
computation_date=dates_to_compute,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ def compute(self, from_start: bool = False) -> None:

computable_dates = self.projection_utils.get_dates()

# compute for each date
to_compute: set[float]
if from_start:
to_compute = computable_dates
else:
computed_dates = self.get_computed_dates()
to_compute = computable_dates - computed_dates

for date in to_compute:
# # compute for each date
# to_compute: set[float]
# if from_start:
# to_compute = computable_dates
# else:
# computed_dates = self.get_computed_dates()
# to_compute = computable_dates - computed_dates

amindadgar marked this conversation as resolved.
Show resolved Hide resolved
for date in computable_dates:
amindadgar marked this conversation as resolved.
Show resolved Hide resolved
try:
self.closeness_computation_wrapper(date)
except Exception as exp:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ def compute(self, from_start: bool = False) -> None:
# Getting all possible dates
computable_dates = self.projection_utils.get_dates()

computed_dates = self.get_computed_dates()
# computed_dates = self.get_computed_dates()

# compute for each date
to_compute: set[float]
if from_start:
to_compute = computable_dates
else:
to_compute = computable_dates - computed_dates
# to_compute: set[float]
# if from_start:
# to_compute = computable_dates
# else:
# to_compute = computable_dates - computed_dates

# for the computation date
for date in to_compute:
for date in computable_dates:
try:
self.local_clustering_computation_wrapper(date=date)
except Exception as exp:
Expand Down
16 changes: 8 additions & 8 deletions tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ def compute(self, from_start: bool = False) -> None:
computable_dates = self.projection_utils.get_dates()

# compute for each date
to_compute: set[float]
if from_start:
to_compute = computable_dates
else:
computed_dates = self.get_computed_dates()
to_compute = computable_dates - computed_dates

for date in to_compute:
# to_compute: set[float]
# if from_start:
# to_compute = computable_dates
# else:
# computed_dates = self.get_computed_dates()
# to_compute = computable_dates - computed_dates

for date in computable_dates:
amindadgar marked this conversation as resolved.
Show resolved Hide resolved
try:
self.louvain_computation_wrapper(date)
except Exception as exp:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def project_temp_graph(
def get_dates(self) -> set[float]:
"""
get all the dates we do have on the INTERACTED_WITH relations
Note: returning just the only previous date

Parameters:
------------
Expand All @@ -124,8 +125,7 @@ def get_dates(self) -> set[float]:
f"""
MATCH (a:{self.user_label})
-[r:{self.between_user_label} {{platformId: $platform_id}}]-()
WITH DISTINCT(r.date) as dates
RETURN dates
RETURN r.date as dates ORDER BY dates DESC LIMIT 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update return type and variable name to reflect single date return

The method now returns only the latest date, but the return type is still set[float] and the variable name suggests multiple dates. This inconsistency could lead to confusion or errors in code that uses this method.

Consider the following changes:

  1. Update the method signature to return a single float instead of a set[float].
  2. Rename the variable to reflect that it contains a single date.
  3. Modify the return statement to return the single date value.
-    def get_dates(self) -> set[float]:
+    def get_latest_date(self) -> float:
         """
-        get all the dates we do have on the INTERACTED_WITH relations
+        get the most recent date from the INTERACTED_WITH relations
         
-        Note: returning just the only previous date
+        Note: returning only the most recent date
         
         Parameters:
         ------------
         guildId : str
             the guild we do want the dates of relations
         """
         dates = self.gds.run_cypher(
             f"""
             MATCH (a:{self.user_label})
                 -[r:{self.between_user_label} {{platformId: $platform_id}}]-()
             RETURN r.date as dates ORDER BY dates DESC LIMIT 1
             """,
             params={"platform_id": self.platform_id},
         )
-        computable_dates_set = set(dates["dates"].values)
+        latest_date = dates["dates"].values[0] if dates["dates"].values else None

-        return computable_dates_set
+        return latest_date

Also applies to: 130-132

""",
params={"platform_id": self.platform_id},
)
Expand Down
Loading