Skip to content

Commit

Permalink
fixed layer pruning after refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
DillonZChen committed Jan 15, 2025
1 parent acd7a7a commit c201f38
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 43 deletions.
11 changes: 6 additions & 5 deletions include/feature_generation/features.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ namespace feature_generation {
int get_colour_hash(const std::vector<int> &colour);

// reformat colour hash based on colours to throw out
std::map<int, int> reformat_colour_hash(const std::vector<int> &to_prune);
void init_layer_to_colours();
std::map<int, int> reformat_colour_hash(const std::set<int> &to_prune);
virtual std::vector<int> reformat_neighbour_colours(const std::vector<int> &colours,
const std::map<int, int> &remap) = 0;

Expand Down Expand Up @@ -117,13 +118,13 @@ namespace feature_generation {

/* Pruning functions */

std::vector<int> features_to_prune_this_iteration(int iteration,
std::set<int> features_to_prune_this_iteration(int iteration,
std::vector<std::vector<int>> &cur_colours);
std::vector<int> features_to_prune(std::vector<Embedding> X);
std::set<int> features_to_prune(const std::vector<graph::Graph> &graphs);

std::vector<int> greedy_iteration_pruner(int iteration,
std::set<int> greedy_iteration_pruner(int iteration,
std::vector<std::vector<int>> &cur_colours);
std::vector<int> greedy_all_pruner(std::vector<Embedding> X);
std::set<int> greedy_all_pruner(std::vector<Embedding> X);

/* Prediction functions */

Expand Down
35 changes: 22 additions & 13 deletions src/feature_generation/feature_generators/wl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,17 @@ namespace feature_generation {

std::vector<int> WLFeatures::reformat_neighbour_colours(const std::vector<int> &colours,
const std::map<int, int> &remap) {
// check neighbour_container for definitions
std::vector<int> new_colours(colours.size());
for (size_t i = 0; i < colours.size(); i++) {
if (i == 0 || i % 2 == 1) {
new_colours[i] = remap.at(colours[i]);

// colours should always show up in remap by their construction
new_colours[0] = remap.at(colours[0]);
for (size_t i = 1; i < colours.size(); i++) {
int colour = colours[i];
if ((multiset_hash && (i % 3 == 2)) || (!multiset_hash && (i % 2 == 0))) {
new_colours[i] = remap.at(colour);
} else {
new_colours[i] = colours[i];
new_colours[i] = colour;
}
}
return new_colours;
Expand Down Expand Up @@ -90,7 +95,10 @@ namespace feature_generation {
std::vector<std::vector<int>> graph_colours;
std::vector<std::vector<int>> graph_colours_tmp;

// init colours
n_seen_graphs += graphs.size();
cur_collecting_layer = 0;
std::cout << "collecting iteration " << cur_collecting_layer << std::endl;
for (size_t graph_i = 0; graph_i < graphs.size(); graph_i++) {
const auto graph = std::make_shared<graph::Graph>(graphs[graph_i]);
int n_nodes = graph->nodes.size();
Expand All @@ -99,9 +107,6 @@ namespace feature_generation {
n_seen_edges += n_edges;

std::vector<int> colours(n_nodes, 0);

// init colours
cur_collecting_layer = 0;
for (int node_i = 0; node_i < n_nodes; node_i++) {
int col = get_colour_hash({graph->nodes[node_i]});
colours[node_i] = col;
Expand All @@ -114,31 +119,35 @@ namespace feature_generation {
// main WL loop
for (int iteration = 1; iteration < iterations + 1; iteration++) {
cur_collecting_layer = iteration;
std::cout << "collecting iteration " << cur_collecting_layer << std::endl;

for (size_t graph_i = 0; graph_i < graphs.size(); graph_i++) {
const auto graph = std::make_shared<graph::Graph>(graphs[graph_i]);
refine(graph, graph_colours[graph_i], graph_colours_tmp[graph_i]);
}

// layer pruning
std::vector<int> features_to_prune =
features_to_prune_this_iteration(iteration, graph_colours);
if (features_to_prune.size() != 0) {
std::map<int, int> remap = reformat_colour_hash(features_to_prune);
std::set<int> to_prune = features_to_prune_this_iteration(iteration, graph_colours);
if (to_prune.size() != 0) {
std::map<int, int> remap = reformat_colour_hash(to_prune);
for (size_t graph_i = 0; graph_i < graphs.size(); graph_i++) {
for (size_t node_i = 0; node_i < graph_colours[graph_i].size(); node_i++) {
int col = graph_colours[graph_i][node_i];
if (remap.count(col) > 0) {
graph_colours[graph_i][node_i] = remap[col];
} else {
graph_colours[graph_i][node_i] = UNSEEN_COLOUR;
}
}
}
}
}

// bulk pruning
std::vector<int> to_prune = features_to_prune(graph_colours);
reformat_colour_hash(to_prune);
std::set<int> to_prune = features_to_prune(graphs);
if (to_prune.size() != 0) {
reformat_colour_hash(to_prune);
}
layer_redundancy_check();
}

Expand Down
72 changes: 65 additions & 7 deletions src/feature_generation/features.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@

using json = nlohmann::json;

#define debug_hash(k, v) \
for (const int i : k) { \
std::cout << i << "."; \
} \
std::cout << " : " << v << std::endl;

namespace feature_generation {
Features::Features(const std::string feature_name,
const planning::Domain &domain,
Expand All @@ -40,6 +46,10 @@ namespace feature_generation {
n_seen_nodes = 0;
n_seen_edges = 0;
seen_initial_colours = std::set<int>();
init_layer_to_colours();
}

void Features::init_layer_to_colours() {
// plus 1 because zeroth iteration is also included
layer_to_colours = std::vector<std::set<int>>(iterations + 1, std::set<int>());
}
Expand Down Expand Up @@ -137,40 +147,88 @@ namespace feature_generation {
colour_hash[colour] = hash;
colour_to_layer[hash] = cur_collecting_layer;
layer_to_colours[cur_collecting_layer].insert(hash);

// debug_hash(colour, hash);
}
return colour_hash[colour];
}

std::map<int, int> Features::reformat_colour_hash(const std::vector<int> &to_prune) {
if (to_prune.size() == 0) {
return std::map<int, int>();
}
std::map<int, int> Features::reformat_colour_hash(const std::set<int> &to_prune) {
// TODO can be optimised if we have layer information for colours

// remap values
std::map<int, int> remap;
std::vector<std::pair<std::vector<int>, int>> new_hash_vec;
std::set<int> to_prune_set(to_prune.begin(), to_prune.end());
std::unordered_map<int, int> new_colour_layer;

// layer 0 colours (init colours) should remain consistent
for (const auto &[key, val] : colour_hash) {
int layer = colour_to_layer[val];
if (seen_initial_colours.count(val) == 0) {
if (layer == 0) {
std::cout << "error: encountered refined colour with layer = " << layer << std::endl;
exit(-1);
}
continue;
} else {
if (layer != 0) {
std::cout << "error: encountered initial colour with layer = " << layer << std::endl;
exit(-1);
}
// keep the same for initial colours
new_hash_vec.push_back(std::make_pair(key, val));
new_colour_layer[val] = layer;
remap[val] = val;
}
}

// deal with layer 1+ colours
for (const auto &[key, val] : colour_hash) {
if (to_prune_set.count(val) > 0) {
if (seen_initial_colours.count(val) > 0 || to_prune.count(val) > 0) {
continue;
}
int new_val = (int)new_hash_vec.size();
remap[val] = new_val;
new_hash_vec.push_back(std::make_pair(key, new_val));
new_colour_layer[new_val] = colour_to_layer[val];
}

// // debug
// std::cout << "initial_colours" << std::endl;
// for (const int i : seen_initial_colours) {
// std::cout << i << std::endl;
// }
// std::cout << "to_prune" << std::endl;
// for (const int i : to_prune) {
// std::cout << i << std::endl;
// }
// std::cout << "remap" << std::endl;
// for (const auto &[key, val] : remap) {
// std::cout << key << " -> " << val << std::endl;
// }

// remap keys
ColourHash new_colour_hash;
for (size_t i = 0; i < new_hash_vec.size(); i++) {
std::vector<int> key = new_hash_vec[i].first;
int val = new_hash_vec[i].second;
key = reformat_neighbour_colours(key, remap);
if (new_colour_layer[val] > 0) {
debug_hash(key, val);
key = reformat_neighbour_colours(key, remap);
}
new_colour_hash[key] = val;
}

// remap hash
colour_hash = new_colour_hash;

// remap colours
colour_to_layer = new_colour_layer;
init_layer_to_colours();
for (const auto &[key, val] : colour_hash) {
layer_to_colours[colour_to_layer[val]].insert(val);
}

return remap;
}

Expand Down
18 changes: 9 additions & 9 deletions src/feature_generation/neighbour_container.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ namespace feature_generation {
std::string str = "";
if (multiset_hash) {
for (const auto &kv : neighbours_mset) {
str += "." + std::to_string(kv.first.first);
str += "." + std::to_string(kv.first.second);
str += "." + std::to_string(kv.first.first); // edge label
str += "." + std::to_string(kv.first.second); // node colour
str += "." + std::to_string(kv.second); // count in multiset
}
} else {
for (const auto &kv : neighbours_set) {
str += "." + std::to_string(kv.first);
str += "." + std::to_string(kv.second);
str += "." + std::to_string(kv.first); // edge label
str += "." + std::to_string(kv.second); // node colour
}
}
return str;
Expand All @@ -44,14 +44,14 @@ namespace feature_generation {
std::vector<int> vec;
if (multiset_hash) {
for (const auto &kv : neighbours_mset) {
vec.push_back(kv.first.first);
vec.push_back(kv.first.second);
vec.push_back(kv.second); // count in multiset
vec.push_back(kv.first.first); // edge label i % 3 == 1
vec.push_back(kv.first.second); // node colour i % 3 == 2
vec.push_back(kv.second); // count in multiset i % 3 == 0
}
} else {
for (const auto &kv : neighbours_set) {
vec.push_back(kv.first);
vec.push_back(kv.second);
vec.push_back(kv.first); // edge label
vec.push_back(kv.second); // node colour
}
}
return vec;
Expand Down
10 changes: 6 additions & 4 deletions src/feature_generation/pruning/bulk_pruners.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ const int DISTINCT = -1;

namespace feature_generation {

std::vector<int> Features::features_to_prune(std::vector<Embedding> X) {
std::set<int> Features::features_to_prune(const std::vector<graph::Graph> &graphs) {
if (pruning == PruningOptions::COLLAPSE_ALL) {
collected = true;
std::vector<Embedding> X = embed_graphs(graphs);
return greedy_all_pruner(X);
} else {
return std::vector<int>();
return std::set<int>();
}
}

Expand Down Expand Up @@ -39,7 +41,7 @@ namespace feature_generation {
<< std::endl;
}

std::vector<int> Features::greedy_all_pruner(std::vector<Embedding> X) {
std::set<int> Features::greedy_all_pruner(std::vector<Embedding> X) {
std::cout << "Minimising equivalent features..." << std::endl;
FeatureDependencyGraph fdg = FeatureDependencyGraph(colour_hash);

Expand Down Expand Up @@ -109,6 +111,6 @@ namespace feature_generation {

std::cout << "Equivalent features minimised!" << std::endl;

return std::vector<int>();
return std::set<int>();
}
} // namespace feature_generation
10 changes: 5 additions & 5 deletions src/feature_generation/pruning/layer_pruners.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@

namespace feature_generation {

std::vector<int>
std::set<int>
Features::features_to_prune_this_iteration(int iteration,
std::vector<std::vector<int>> &cur_colours) {
if (pruning == PruningOptions::COLLAPSE_LAYER) {
return greedy_iteration_pruner(iteration, cur_colours);
} else {
return std::vector<int>();
return std::set<int>();
}
}

std::vector<int> Features::greedy_iteration_pruner(int iteration,
std::set<int> Features::greedy_iteration_pruner(int iteration,
std::vector<std::vector<int>> &cur_colours) {
std::set<int> colours = get_iteration_colours(iteration);
std::vector<int> features_to_prune;
std::set<int> features_to_prune;

std::map<int, std::vector<int>> columns;
size_t n_graphs = cur_colours.size();
Expand All @@ -42,7 +42,7 @@ namespace feature_generation {
unique_features.insert(column);
} else {
// throw out because not unique
features_to_prune.push_back(colour);
features_to_prune.insert(colour);
}
}

Expand Down

0 comments on commit c201f38

Please sign in to comment.