Skip to content

Commit

Permalink
Update features with newer code versions (#107)
Browse files Browse the repository at this point in the history
  • Loading branch information
arnaudon authored Feb 7, 2024
1 parent 4273a99 commit 8a153a1
Show file tree
Hide file tree
Showing 19 changed files with 120 additions and 93 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run-tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 2 additions & 2 deletions examples/cli/run_example.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
export OMP_NUM_THREADS=1 # set to one to prevent numpy to run in parallel

echo 'Getting data'
hcga -v get_data $1
#hcga -v get_data $1

echo 'Extracting features'
hcga -v extract_features datasets/$1.pkl -m fast -n 4 --timeout 10.0
hcga -vvv extract_features datasets/$1.pkl -m fast -n 5 --timeout 10.0

echo 'Run classification'
hcga -v feature_analysis $1
1 change: 0 additions & 1 deletion hcga/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import click

L = logging.getLogger(__name__)
L.setLevel(logging.DEBUG)
# pylint: disable=too-many-arguments,too-many-locals


Expand Down
5 changes: 4 additions & 1 deletion hcga/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,13 @@ def feature_extraction(graph, list_feature_classes, with_runtimes=False):
Returns:
(DataFrame): dataframe of calculated features for a given graph.
"""
L.debug("computing %s", graph)
column_indexes = pd.MultiIndex(
levels=[[], []], codes=[[], []], names=["feature_class", "feature_name"]
)
features_df = pd.DataFrame(columns=column_indexes)
for feature_class in list_feature_classes:
for i, feature_class in enumerate(list_feature_classes):
L.debug("computing: %s/ %s, %s", i, len(list_feature_classes), feature_class)
if with_runtimes:
start_time = time.time()

Expand All @@ -201,6 +203,7 @@ def feature_extraction(graph, list_feature_classes, with_runtimes=False):
columns = [(feat_class_inst.shortname, col) for col in features.columns]
features_df[columns] = features
del feat_class_inst
L.debug("done with: %s/ %s, %s", i, len(list_feature_classes), feature_class)

if with_runtimes:
features_df[("runtimes", feature_class.name)] = time.time() - start_time
Expand Down
14 changes: 9 additions & 5 deletions hcga/feature_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _hmean(dist):

def _mode(dist):
""""""
return st.mode(dist)[0][0]
return st.mode(dist).mode


def _get_index(args, i=0):
Expand All @@ -56,12 +56,16 @@ def _trivial(graph): # pylint: disable=unused-argument

def _feat_N(graph, features):
""""""
return features / len(graph.nodes)
if features is not None:
return features / len(graph.nodes)
return None


def _feat_E(graph, features):
""""""
return features / len(graph.edges)
if features is not None:
return features / len(graph.edges)
return None


class FeatureClass:
Expand Down Expand Up @@ -413,13 +417,13 @@ def _clustering_statistics(self, community_partition, feat_name, feat_desc, feat

self.add_feature(
feat_name + "_coverage",
lambda: list(partial(quality.partition_quality, partition=community_partition))[0],
lambda graph: quality.partition_quality(graph, partition=community_partition)[0],
"Coverage" + compl_desc,
feat_interpret,
)
self.add_feature(
feat_name + "_performance",
lambda: list(partial(quality.partition_quality, partition=community_partition))[1],
lambda graph: quality.partition_quality(graph, partition=community_partition)[1],
"Performance" + compl_desc,
feat_interpret,
)
Expand Down
48 changes: 34 additions & 14 deletions hcga/features/basal_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from functools import lru_cache

import networkx as nx

from hcga.feature_class import FeatureClass, InterpretabilityScore

featureclass_name = "BasalNodes"
Expand All @@ -21,24 +23,34 @@ def basal_nodes_func(graph):

def n_basal_nodes(graph):
"""n_basal_nodes."""
return len(basal_nodes_func(graph))
if nx.is_directed(graph):
return len(basal_nodes_func(graph))
return 0


def basal_degrees(graph):
"""basal_degrees"""
return [dict(graph.out_degree)[i] for i in basal_nodes_func(graph)]
if nx.is_directed(graph):
return [dict(graph.out_degree)[i] for i in basal_nodes_func(graph)]
return [0]


def n_basal_edges(graph):
"""n_basal_edges"""
return sum(dict(graph.out_degree)[i] for i in basal_nodes_func(graph))
if nx.is_directed(graph):
return sum(dict(graph.out_degree)[i] for i in basal_nodes_func(graph))
return 0


def exp_basal_edge(graph):
"""exp_basal_edge"""
in_degs = list(dict(graph.in_degree).values())
r = sum(dict(graph.out_degree)[i] for i in basal_nodes_func(graph)) / (graph.number_of_edges())
return [i * r for i in in_degs]
if nx.is_directed(graph):
in_degs = list(dict(graph.in_degree).values())
r = sum(dict(graph.out_degree)[i] for i in basal_nodes_func(graph)) / (
graph.number_of_edges()
)
return [i * r for i in in_degs]
return [0]


@lru_cache(maxsize=None)
Expand All @@ -50,26 +62,34 @@ def attracting_nodes_func(graph):

def n_attracting_nodes(graph):
"""n_attracting_nodes"""
return len(attracting_nodes_func(graph))
if nx.is_directed(graph):
return len(attracting_nodes_func(graph))
return 0


def attracting_degrees(graph):
"""attracting_degrees"""
return [dict(graph.in_degree)[i] for i in attracting_nodes_func(graph)]
if nx.is_directed(graph):
return [dict(graph.in_degree)[i] for i in attracting_nodes_func(graph)]
return [0]


def n_attracting_edges(graph):
"""n_attracting_edges"""
return sum(dict(graph.in_degree)[i] for i in attracting_nodes_func(graph))
if nx.is_directed(graph):
return sum(dict(graph.in_degree)[i] for i in attracting_nodes_func(graph))
return 0


def exp_attracting_edge(graph):
"""exp_attracting_edge"""
out_degs = list(dict(graph.out_degree).values())
r = sum(dict(graph.in_degree)[i] for i in attracting_nodes_func(graph)) / (
graph.number_of_edges()
)
return [i * r for i in out_degs]
if nx.is_directed(graph):
out_degs = list(dict(graph.out_degree).values())
r = sum(dict(graph.in_degree)[i] for i in attracting_nodes_func(graph)) / (
graph.number_of_edges()
)
return [i * r for i in out_degs]
return [0]


class BasalNodes(FeatureClass):
Expand Down
4 changes: 2 additions & 2 deletions hcga/features/centralities_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ def katz_centrality(graph):

def pagerank(graph):
"""pagerank"""
return list(nx.pagerank_numpy(graph).values())
return list(nx.pagerank(graph).values())


def weighted_pagerank(graph):
"""weighted_pagerank"""
return list(nx.pagerank_numpy(graph, weight="weight").values())
return list(nx.pagerank(graph, weight="weight").values())


class CentralitiesBasic(FeatureClass):
Expand Down
2 changes: 1 addition & 1 deletion hcga/features/communities_asyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def eval_asyn(graph, num_comms):

def sum_density(graph, num_comms):
"""sum_density"""
return (sum(eval_asyn(graph, num_comms)[1]),)
return sum(eval_asyn(graph, num_comms)[1])


def ratio_density(graph, num_comms):
Expand Down
8 changes: 6 additions & 2 deletions hcga/features/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,16 @@ def attracting_component_sizes(graph):

def number_basal_components(graph):
"""number_basal_components"""
return nx.number_attracting_components(nx.reverse(graph))
if nx.is_directed(graph):
return nx.number_attracting_components(nx.reverse(graph))
return 0


def basal_component_sizes(graph):
"""basal_component_sizes"""
return [len(i) for i in nx.attracting_components(nx.reverse(graph))]
if nx.is_directed(graph):
return [len(i) for i in nx.attracting_components(nx.reverse(graph))]
return [0]


class Components(FeatureClass):
Expand Down
2 changes: 1 addition & 1 deletion hcga/features/distance_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def eccentricity(graph):

def extrema_bounding(graph):
"""extrema_bounding"""
return nx.extrema_bounding(ensure_connected(graph))
return nx.diameter(ensure_connected(graph), usebounds=True)


class DistanceMeasures(FeatureClass):
Expand Down
14 changes: 11 additions & 3 deletions hcga/features/flow_hierarchy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Flow hierarchy class."""

from functools import partial
from functools import lru_cache, partial

import networkx as nx

Expand All @@ -9,6 +9,14 @@
featureclass_name = "FlowHierarchy"


@lru_cache(maxsize=None)
def flow_hierarchy(graph, weight=None):
"""apply flow hierarchy only on digraph"""
if isinstance(graph, nx.DiGraph):
return nx.flow_hierarchy(graph, weight)
return 0.0


class FlowHierarchy(FeatureClass):
"""Flow hierarchy class.
Expand Down Expand Up @@ -38,14 +46,14 @@ def compute_features(self):
# graph clique number
self.add_feature(
"flow_hierarchy",
nx.flow_hierarchy,
flow_hierarchy,
"fraction of edges not participating in cycles",
InterpretabilityScore(3),
)

self.add_feature(
"flow_hierarchy_weighted",
partial(nx.flow_hierarchy, weight="weight"),
partial(flow_hierarchy, weight="weight"),
"fraction of edges not participating in cycles",
InterpretabilityScore(3),
)
46 changes: 29 additions & 17 deletions hcga/features/in_out_degrees.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,51 @@

def in_degree(graph):
"""in_degree"""
return list(dict(graph.in_degree).values())
if nx.is_directed(graph):
return list(dict(graph.in_degree).values())
return [0]


def out_degree(graph):
"""out_degree"""
return list(dict(graph.out_degree).values())
if nx.is_directed(graph):
return list(dict(graph.out_degree).values())
return [0]


def in_deg_n(graph):
"""in_deg_n"""
return [
i / d
for i, d in zip(list(dict(graph.in_degree).values()), list(dict(graph.degree).values()))
]
if nx.is_directed(graph):
return [
i / d
for i, d in zip(list(dict(graph.in_degree).values()), list(dict(graph.degree).values()))
]
return [0]


def out_deg_n(graph):
"""out_deg_n"""
return [
o / d
for o, d in zip(list(dict(graph.out_degree).values()), list(dict(graph.degree).values()))
]
if nx.is_directed(graph):
return [
o / d
for o, d in zip(
list(dict(graph.out_degree).values()), list(dict(graph.degree).values())
)
]
return [0]


def in_out_deg(graph):
"""in_out_deg"""
return [
i / o
for i, o in zip(
list(dict(graph.in_degree).values()),
list(dict(graph.out_degree).values()),
)
]
if nx.is_directed(graph):
return [
i / o
for i, o in zip(
list(dict(graph.in_degree).values()),
list(dict(graph.out_degree).values()),
)
]
return [0]


def in_degree_centrality(graph):
Expand Down
4 changes: 2 additions & 2 deletions hcga/features/jaccard_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ def degree_assortativity_coeff(graph):

def graph_clique_number(graph):
"""graph_clique_number"""
return nx.graph_clique_number(jaccard_similarity(graph))
return max(len(c) for c in nx.clique.find_cliques(jaccard_similarity(graph)))


def num_max_cliques(graph):
"""num_max_cliques"""
return nx.graph_number_of_cliques(jaccard_similarity(graph))
return sum(1 for _ in nx.clique.find_cliques(jaccard_similarity(graph)))


def transitivity(graph):
Expand Down
6 changes: 3 additions & 3 deletions hcga/features/looplessness.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def looplessness(graph): # pylint: disable=too-many-locals
n = graph.number_of_nodes()

# Bipartite graphs
if nx.is_bipartite(graph):
trophic = [1] * n
if nx.is_bipartite(graph) or not nx.is_directed(graph):
trophic = [1.0] * n
return 0, trophic, 0, 0, 0, 0

# Non-bipartite graphs
Expand Down Expand Up @@ -86,7 +86,7 @@ def looplessness(graph): # pylint: disable=too-many-locals
trophic[j] = s[i]

# Convert all weights to 1 in order to compute trophic levels
a = np.where(nx.adj_matrix(graph).toarray() > 0, 1, 0)
a = np.where(nx.adjacency_matrix(graph).toarray() > 0, 1, 0)

LHS = [(tr - 1) * k for tr, k in zip(trophic, in_degrees)]
RHS = list(np.dot(a, np.array(trophic)))
Expand Down
Loading

0 comments on commit 8a153a1

Please sign in to comment.