diff --git a/docs/examples/blocksworld.ipynb b/docs/examples/blocksworld.ipynb index 81f33fd..d88a984 100644 --- a/docs/examples/blocksworld.ipynb +++ b/docs/examples/blocksworld.ipynb @@ -6,8 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install numpy scikit-learn wlplan\n", - "%pip install -i https://test.pypi.org/simple/ pymimir-dzc-fork==0.1.3" + "%pip install numpy scikit-learn wlplan" ] }, { @@ -19,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -43,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -166,18 +165,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X.shape=(1348, 10442)\n", - "y.shape=(1348,)\n" - ] - } - ], + "outputs": [], "source": [ "feature_generator = WLFeatures(domain=wlplan_domain, iterations=4)\n", "feature_generator.collect(dataset)\n", @@ -196,17 +186,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loss=1.0842255191254774e-17\n" - ] - } - ], + "outputs": [], "source": [ "linear_kernel = DotProduct(sigma_0=0, sigma_0_bounds=\"fixed\")\n", "model = GaussianProcessRegressor(kernel=linear_kernel, alpha=1e-7, random_state=0)\n", @@ -240,7 +222,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/include/data/dataset.hpp b/include/data/dataset.hpp index 753f981..f46f011 100644 --- a/include/data/dataset.hpp +++ b/include/data/dataset.hpp @@ -27,6 +27,8 @@ namespace data { Dataset(const planning::Domain &domain, const std::vector &data); + size_t get_size() const; + private: std::unordered_map predicate_to_arity; diff --git a/include/feature_generation/wl_features.hpp b/include/feature_generation/wl_features.hpp index 4bda7be..87b09d3 100644 --- a/include/feature_generation/wl_features.hpp +++ b/include/feature_generation/wl_features.hpp @@ -14,7 +14,6 @@ class int_vector_hasher { public: - // https://stackoverflow.com/a/27216842 std::size_t operator()(std::vector const &vec) const { std::size_t seed = vec.size(); @@ -71,6 +70,12 @@ namespace feature_generation { // for iteration j = 0, ..., iterations - 1 std::vector> seen_colour_statistics; + // training statistics + int n_seen_graphs; + int n_seen_nodes; + int n_seen_edges; + std::set seen_initial_colours; + public: WLFeatures(const planning::Domain &domain, std::string graph_representation, @@ -85,11 +90,18 @@ namespace feature_generation { // collect training colours void collect(const data::Dataset dataset); + void collect(const planning::State state); + void collect(const std::vector &graphs); // set problem for graph generator if it exists void set_problem(const planning::Problem &problem); + // get string representation of WL colours agnostic to the number of collected colours + std::string get_string_representation(const Embedding &embedding); + + std::string get_string_representation(const planning::State &state); + // assumes training is done, and returns a feature matrix X std::vector embed(const data::Dataset &dataset); @@ -121,6 +133,16 @@ namespace feature_generation { std::vector get_unseen_counts() const { return seen_colour_statistics[0]; }; + int get_n_seen_graphs() const { return n_seen_graphs; } + + int get_n_seen_nodes() const { return n_seen_nodes; } + + int get_n_seen_edges() const { return n_seen_edges; } + + int get_n_seen_initial_colours() const { return seen_initial_colours.size(); } + + int get_n_seen_refined_colours() const { return (int)colour_hash.size(); } + /* Other useful functions */ std::unordered_map, int, int_vector_hasher> diff --git a/include/graph/ilg_generator.hpp b/include/graph/ilg_generator.hpp index 6c7367f..f9a8c58 100644 --- a/include/graph/ilg_generator.hpp +++ b/include/graph/ilg_generator.hpp @@ -30,7 +30,7 @@ enum class ILGFactDescription { ILG_FACT_DESCRIPTIONS }; namespace graph { class ILGGenerator : public GraphGenerator { public: - ILGGenerator(const planning::Domain &domain); + ILGGenerator(const planning::Domain &domain, bool differentiate_constant_objects); // Change the base graph based on the input problem void set_problem(const planning::Problem &problem) override; @@ -54,16 +54,17 @@ namespace graph { void dump_graph() const override; private: + /* The following variables remain constant for all problems */ + const planning::Domain &domain; + const std::unordered_map predicate_to_colour; + bool differentiate_constant_objects; + /* These variables get reset every time a new problem is set */ std::shared_ptr base_graph; std::unordered_set positive_goal_names; std::unordered_set negative_goal_names; std::shared_ptr problem; - /* The following variables remain constant for all problems */ - const planning::Domain &domain; - const std::unordered_map predicate_to_colour; - // Do not use a vector here because colours can be negative, i.e. constant objects std::map colour_to_description; int fact_colour(const int predicate_idx, const ILGFactDescription &fact_description) const; diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..6d6920f --- /dev/null +++ b/install.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Show commands +set -x +# Exit on the first error +set -e + +# Build with all cores +export MAKEFLAGS="-j$(nproc)" + +# Install the package from sources +mkdir -p _wlplan +pip install . -v + +# Make sure required tools are installed +pip install pybind11-stubgen + +# Generate stubs +rm -rf _wlplan/*.pyi +pybind11-stubgen _wlplan -o . + +# Generate documentation +pip install sphinx sphinx_rtd_theme +cd docs +rm -rf _build/ +make html +cd .. diff --git a/src/data/dataset.cpp b/src/data/dataset.cpp index 633dbb2..a2d2016 100644 --- a/src/data/dataset.cpp +++ b/src/data/dataset.cpp @@ -63,4 +63,12 @@ namespace data { } } } + + size_t Dataset::get_size() const { + size_t ret = 0; + for (const auto &problem_states : data) { + ret += problem_states.states.size(); + } + return ret; + } } // namespace data diff --git a/src/feature_generation/wl_features.cpp b/src/feature_generation/wl_features.cpp index b4e6918..11afedc 100644 --- a/src/feature_generation/wl_features.cpp +++ b/src/feature_generation/wl_features.cpp @@ -29,6 +29,11 @@ namespace feature_generation { neighbour_container = std::make_shared(multiset_hash); seen_colour_statistics = std::vector>(2, std::vector(iterations, 0)); store_weights = false; + + n_seen_graphs = 0; + n_seen_nodes = 0; + n_seen_edges = 0; + seen_initial_colours = std::set(); } WLFeatures::WLFeatures(const std::string &filename) { @@ -202,6 +207,9 @@ namespace feature_generation { const auto &graph = graphs[graph_i]; std::unordered_map histogram; int n_nodes = graph.nodes.size(); + n_seen_graphs++; + n_seen_nodes += n_nodes; + n_seen_edges += graph.get_n_edges(); std::vector colours(n_nodes); for (int node_i = 0; node_i < n_nodes; node_i++) { cur_collecting_layer = 0; @@ -211,6 +219,7 @@ namespace feature_generation { } histogram[col]++; colours[node_i] = col; + seen_initial_colours.insert(col); } graph_histograms.push_back(histogram); graph_colours.push_back(colours); @@ -293,6 +302,23 @@ namespace feature_generation { } } + std::string WLFeatures::get_string_representation(const Embedding &embedding) { + std::string str_embed = ""; + for (size_t i = 0; i < embedding.size(); i++) { + int count = embedding[i]; + if (count == 0) { + continue; + } + str_embed += std::to_string(i) + "." + std::to_string(count) + "."; + } + return str_embed; + } + + std::string WLFeatures::get_string_representation(const planning::State &state) { + Embedding x = embed(state); + return get_string_representation(x); + } + std::vector WLFeatures::embed(const data::Dataset &dataset) { std::vector graphs = convert_to_graphs(dataset); if (graphs.size() == 0) { @@ -422,8 +448,8 @@ namespace feature_generation { return int_colour_hash; } - std::unordered_map - WLFeatures::int_to_str_colour_hash(std::unordered_map, int, int_vector_hasher> int_colour_hash) const { + std::unordered_map WLFeatures::int_to_str_colour_hash( + std::unordered_map, int, int_vector_hasher> int_colour_hash) const { std::unordered_map str_colour_hash; for (const auto &pair : int_colour_hash) { std::string colour_str = ""; diff --git a/src/graph/graph_generator_factory.cpp b/src/graph/graph_generator_factory.cpp index 81de4f7..acc1119 100644 --- a/src/graph/graph_generator_factory.cpp +++ b/src/graph/graph_generator_factory.cpp @@ -6,7 +6,7 @@ namespace graph { std::shared_ptr create_graph_generator(const std::string &name, const planning::Domain &domain) { if (name == "ilg") { - return std::make_shared(domain); + return std::make_shared(domain, false); } else if (name == "custom") { return NULL; } else { diff --git a/src/graph/ilg_generator.cpp b/src/graph/ilg_generator.cpp index 53498d5..c22bca4 100644 --- a/src/graph/ilg_generator.cpp +++ b/src/graph/ilg_generator.cpp @@ -8,13 +8,17 @@ char const *fact_description_name[] = {ILG_FACT_DESCRIPTIONS}; #define to_atom_node(x) x.to_string() namespace graph { - ILGGenerator::ILGGenerator(const planning::Domain &domain) - : domain(domain), predicate_to_colour(domain.predicate_to_colour) { - /// initialise initial node colours - // add constant object colours - for (size_t i = 0; i < domain.constant_objects.size(); i++) { - int colour = -(i + 1); - colour_to_description[colour] = "_constant_object_ " + domain.constant_objects[i]; + ILGGenerator::ILGGenerator(const planning::Domain &domain, bool differentiate_constant_objects) + : domain(domain), + predicate_to_colour(domain.predicate_to_colour), + differentiate_constant_objects(differentiate_constant_objects) { + // initialise initial node colours + if (differentiate_constant_objects) { + // add constant object colours + for (size_t i = 0; i < domain.constant_objects.size(); i++) { + int colour = -(i + 1); + colour_to_description[colour] = "_constant_object_ " + domain.constant_objects[i]; + } } colour_to_description[0] = "_object_"; @@ -37,31 +41,37 @@ namespace graph { this->problem = std::make_shared(problem); /// add nodes + int colour; + // add constant object nodes for (size_t i = 0; i < problem.get_constant_objects().size(); i++) { std::string node = to_obj_node(domain.constant_objects[i]); - int colour = -(i + 1); + if (differentiate_constant_objects) { + colour = -(i + 1); + } else { + colour = 0; + } graph.add_node(node, colour); } // objects for (const auto &object : problem.get_problem_objects()) { std::string node = to_obj_node(object); - int colour = 0; + colour = 0; graph.add_node(node, colour); } // atoms for (const auto &atom : problem.get_positive_goals()) { std::string node = to_atom_node(atom); - int colour = fact_colour(atom, ILGFactDescription::F_POS_GOAL); + colour = fact_colour(atom, ILGFactDescription::F_POS_GOAL); graph.add_node(node, colour); positive_goal_names.insert(node); } for (const auto &atom : problem.get_negative_goals()) { std::string node = to_atom_node(atom); - int colour = fact_colour(atom, ILGFactDescription::F_NEG_GOAL); + colour = fact_colour(atom, ILGFactDescription::F_NEG_GOAL); graph.add_node(node, colour); negative_goal_names.insert(node); } @@ -116,7 +126,8 @@ namespace graph { neg_goal_changed_pred.push_back(pred_idx); } } else { - atom_node = graph->add_node(atom_node_str, fact_colour(pred_idx, ILGFactDescription::NON_GOAL)); + atom_node = + graph->add_node(atom_node_str, fact_colour(pred_idx, ILGFactDescription::NON_GOAL)); if (store_changes) { n_nodes_added++; } @@ -138,11 +149,14 @@ namespace graph { void ILGGenerator::reset_graph() const { for (size_t i = 0; i < pos_goal_changed.size(); i++) { - base_graph->change_node_colour(pos_goal_changed[i], fact_colour(pos_goal_changed_pred[i], ILGFactDescription::F_POS_GOAL)); + base_graph->change_node_colour( + pos_goal_changed[i], + fact_colour(pos_goal_changed_pred[i], ILGFactDescription::F_POS_GOAL)); } for (const auto &node : neg_goal_changed) { - base_graph->change_node_colour(node, fact_colour(neg_goal_changed_pred[node], ILGFactDescription::F_NEG_GOAL)); + base_graph->change_node_colour( + node, fact_colour(neg_goal_changed_pred[node], ILGFactDescription::F_NEG_GOAL)); } for (int i = 0; i < n_nodes_added; i++) { diff --git a/src/main.cpp b/src/main.cpp index 0fb49eb..62a02e3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -199,6 +199,10 @@ wl_features "graphs"_a) .def("set_problem", &feature_generation::WLFeatures::set_problem, "problem"_a) + .def("get_string_representation", py::overload_cast(&feature_generation::WLFeatures::get_string_representation), + "embedding"_a) + .def("get_string_representation", py::overload_cast(&feature_generation::WLFeatures::get_string_representation), + "state"_a) .def("embed", py::overload_cast(&feature_generation::WLFeatures::embed), "dataset"_a) .def("embed", py::overload_cast &>(&feature_generation::WLFeatures::embed), @@ -208,6 +212,11 @@ wl_features .def("get_n_features", &feature_generation::WLFeatures::get_n_features) .def("get_seen_counts", &feature_generation::WLFeatures::get_seen_counts) .def("get_unseen_counts", &feature_generation::WLFeatures::get_unseen_counts) + .def("get_n_seen_graphs", &feature_generation::WLFeatures::get_n_seen_graphs) + .def("get_n_seen_nodes", &feature_generation::WLFeatures::get_n_seen_nodes) + .def("get_n_seen_edges", &feature_generation::WLFeatures::get_n_seen_edges) + .def("get_n_seen_initial_colours", &feature_generation::WLFeatures::get_n_seen_initial_colours) + .def("get_n_seen_refined_colours", &feature_generation::WLFeatures::get_n_seen_refined_colours) .def("set_weights", &feature_generation::WLFeatures::set_weights, "weights"_a) .def("get_weights", &feature_generation::WLFeatures::get_weights) diff --git a/tests/custom_graph_test.py b/tests/custom_graph_test.py index 317c657..cf354af 100644 --- a/tests/custom_graph_test.py +++ b/tests/custom_graph_test.py @@ -13,7 +13,9 @@ def test_blocksworld_random_path(): random.seed(0) + LOGGER.info("Getting raw dataset") domain, dataset, _ = get_raw_dataset(domain_name="blocksworld", keep_statics=False) + LOGGER.info("Constructing feature generator") feature_generator = WLFeatures( domain=domain, graph_representation=None, @@ -21,6 +23,7 @@ def test_blocksworld_random_path(): prune_features=None, ) graphs = [] + LOGGER.info("Converting to random path graphs") for _, states in dataset: for state in states: G = nx.Graph() @@ -31,7 +34,9 @@ def test_blocksworld_random_path(): G = from_networkx(G) graphs.append(G) G.dump() + LOGGER.info("Collecting features") feature_generator.collect(graphs) + LOGGER.info("Embedding") X = np.array(feature_generator.embed(graphs)).astype(float) n_features = feature_generator.get_n_features() assert X.shape[1] == n_features diff --git a/wlplan/__version__.py b/wlplan/__version__.py index dd9b22c..7225152 100644 --- a/wlplan/__version__.py +++ b/wlplan/__version__.py @@ -1 +1 @@ -__version__ = "0.5.1" +__version__ = "0.5.2" diff --git a/wlplan/feature_generation.py b/wlplan/feature_generation.py index a3cc8a4..6ce230c 100644 --- a/wlplan/feature_generation.py +++ b/wlplan/feature_generation.py @@ -20,7 +20,7 @@ class WLFeatures(_WLFeatures): iterations : int, default=2 The number of WL iterations to perform. - prune_features : "collapse", "collapse_by_layer" or None, default="collapse" + prune_features : "collapse", "collapse_by_layer" or None, default=None How to detect and prune duplicate features. If None, no pruning is done. multiset_hash : bool, default=False @@ -55,6 +55,21 @@ class WLFeatures(_WLFeatures): get_unseen_counts(self) -> List[int] Returns a list of length `iterations` with the count of unseen colours at each iteration. Counts are from colours not seen from `collect` calls. The values are collected over all `embed` calls from the initialisation of this class. + get_n_seen_graphs -> int + Returns the number of training graphs collected from `collect` calls. + + get_n_seen_nodes -> int + Returns the number of training nodes collected from `collect` calls. + + get_n_seen_edges -> int + Returns the number of training edges collected from `collect` calls. + + get_n_seen_initial_colours -> int + Returns the number of initial colours collected from `collect` calls. + + get_n_seen_refined_colours -> int + Returns the number of refined colours collected from `collect` calls. + set_weights(self, weights: Union[list[float], list[int]]) -> None Set the weights to predict heuristics directly with this class. The weights must be a list of floats, integers or a numpy array of floats. The length of the weights must be the same as the number of features collected. @@ -76,7 +91,7 @@ def __init__( domain: Domain, graph_representation: Optional[str] = "ilg", iterations: int = 2, - prune_features: Optional[str] = "no_prune", + prune_features: Optional[str] = None, multiset_hash: bool = False, **kwargs, ) -> None: