diff --git a/docs/examples/blocksworld.ipynb b/docs/examples/blocksworld.ipynb
index 81f33fd..d88a984 100644
--- a/docs/examples/blocksworld.ipynb
+++ b/docs/examples/blocksworld.ipynb
@@ -6,8 +6,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install numpy scikit-learn wlplan\n",
-    "%pip install -i https://test.pypi.org/simple/ pymimir-dzc-fork==0.1.3"
+    "%pip install numpy scikit-learn wlplan"
    ]
   },
   {
@@ -19,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -43,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -69,7 +68,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -166,18 +165,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "X.shape=(1348, 10442)\n",
-      "y.shape=(1348,)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "feature_generator = WLFeatures(domain=wlplan_domain, iterations=4)\n",
     "feature_generator.collect(dataset)\n",
@@ -196,17 +186,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "loss=1.0842255191254774e-17\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "linear_kernel = DotProduct(sigma_0=0, sigma_0_bounds=\"fixed\")\n",
     "model = GaussianProcessRegressor(kernel=linear_kernel, alpha=1e-7, random_state=0)\n",
@@ -240,7 +222,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/include/data/dataset.hpp b/include/data/dataset.hpp
index 753f981..f46f011 100644
--- a/include/data/dataset.hpp
+++ b/include/data/dataset.hpp
@@ -27,6 +27,8 @@ namespace data {
 
     Dataset(const planning::Domain &domain, const std::vector<ProblemStates> &data);
 
+    size_t get_size() const;
+
    private:
     std::unordered_map<std::string, int> predicate_to_arity;
 
diff --git a/include/feature_generation/wl_features.hpp b/include/feature_generation/wl_features.hpp
index 4bda7be..87b09d3 100644
--- a/include/feature_generation/wl_features.hpp
+++ b/include/feature_generation/wl_features.hpp
@@ -14,7 +14,6 @@
 
 class int_vector_hasher {
  public:
-
   // https://stackoverflow.com/a/27216842
   std::size_t operator()(std::vector<int> const &vec) const {
     std::size_t seed = vec.size();
@@ -71,6 +70,12 @@ namespace feature_generation {
     // for iteration j = 0, ..., iterations - 1
     std::vector<std::vector<long>> seen_colour_statistics;
 
+    // training statistics
+    int n_seen_graphs;
+    int n_seen_nodes;
+    int n_seen_edges;
+    std::set<int> seen_initial_colours;
+
    public:
     WLFeatures(const planning::Domain &domain,
                std::string graph_representation,
@@ -85,11 +90,18 @@ namespace feature_generation {
     // collect training colours
     void collect(const data::Dataset dataset);
 
+    void collect(const planning::State state);
+
     void collect(const std::vector<graph::Graph> &graphs);
 
     // set problem for graph generator if it exists
     void set_problem(const planning::Problem &problem);
 
+    // get string representation of WL colours agnostic to the number of collected colours
+    std::string get_string_representation(const Embedding &embedding);
+
+    std::string get_string_representation(const planning::State &state);
+
     // assumes training is done, and returns a feature matrix X
     std::vector<Embedding> embed(const data::Dataset &dataset);
 
@@ -121,6 +133,16 @@ namespace feature_generation {
 
     std::vector<long> get_unseen_counts() const { return seen_colour_statistics[0]; };
 
+    int get_n_seen_graphs() const { return n_seen_graphs; }
+
+    int get_n_seen_nodes() const { return n_seen_nodes; }
+
+    int get_n_seen_edges() const { return n_seen_edges; }
+
+    int get_n_seen_initial_colours() const { return seen_initial_colours.size(); }
+
+    int get_n_seen_refined_colours() const { return (int)colour_hash.size(); }
+
     /* Other useful functions */
 
     std::unordered_map<std::vector<int>, int, int_vector_hasher>
diff --git a/include/graph/ilg_generator.hpp b/include/graph/ilg_generator.hpp
index 6c7367f..f9a8c58 100644
--- a/include/graph/ilg_generator.hpp
+++ b/include/graph/ilg_generator.hpp
@@ -30,7 +30,7 @@ enum class ILGFactDescription { ILG_FACT_DESCRIPTIONS };
 namespace graph {
   class ILGGenerator : public GraphGenerator {
    public:
-    ILGGenerator(const planning::Domain &domain);
+    ILGGenerator(const planning::Domain &domain, bool differentiate_constant_objects);
 
     // Change the base graph based on the input problem
     void set_problem(const planning::Problem &problem) override;
@@ -54,16 +54,17 @@ namespace graph {
     void dump_graph() const override;
 
    private:
+    /* The following variables remain constant for all problems */
+    const planning::Domain &domain;
+    const std::unordered_map<std::string, int> predicate_to_colour;
+    bool differentiate_constant_objects;
+
     /* These variables get reset every time a new problem is set */
     std::shared_ptr<Graph> base_graph;
     std::unordered_set<std::string> positive_goal_names;
     std::unordered_set<std::string> negative_goal_names;
     std::shared_ptr<planning::Problem> problem;
 
-    /* The following variables remain constant for all problems */
-    const planning::Domain &domain;
-    const std::unordered_map<std::string, int> predicate_to_colour;
-
     // Do not use a vector here because colours can be negative, i.e. constant objects
     std::map<int, std::string> colour_to_description;
     int fact_colour(const int predicate_idx, const ILGFactDescription &fact_description) const;
diff --git a/install.sh b/install.sh
new file mode 100755
index 0000000..6d6920f
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Show commands
+set -x
+# Exit on the first error
+set -e
+
+# Build with all cores
+export MAKEFLAGS="-j$(nproc)"
+
+# Install the package from sources
+mkdir -p _wlplan
+pip install . -v
+
+# Make sure required tools are installed
+pip install pybind11-stubgen
+
+# Generate stubs
+rm -rf _wlplan/*.pyi
+pybind11-stubgen _wlplan -o .
+
+# Generate documentation
+pip install sphinx sphinx_rtd_theme
+cd docs
+rm -rf _build/
+make html
+cd ..
diff --git a/src/data/dataset.cpp b/src/data/dataset.cpp
index 633dbb2..a2d2016 100644
--- a/src/data/dataset.cpp
+++ b/src/data/dataset.cpp
@@ -63,4 +63,12 @@ namespace data {
       }
     }
   }
+
+  size_t Dataset::get_size() const {
+    size_t ret = 0;
+    for (const auto &problem_states : data) {
+      ret += problem_states.states.size();
+    }
+    return ret;
+  }
 }  // namespace data
diff --git a/src/feature_generation/wl_features.cpp b/src/feature_generation/wl_features.cpp
index b4e6918..11afedc 100644
--- a/src/feature_generation/wl_features.cpp
+++ b/src/feature_generation/wl_features.cpp
@@ -29,6 +29,11 @@ namespace feature_generation {
     neighbour_container = std::make_shared<NeighbourContainer>(multiset_hash);
     seen_colour_statistics = std::vector<std::vector<long>>(2, std::vector<long>(iterations, 0));
     store_weights = false;
+
+    n_seen_graphs = 0;
+    n_seen_nodes = 0;
+    n_seen_edges = 0;
+    seen_initial_colours = std::set<int>();
   }
 
   WLFeatures::WLFeatures(const std::string &filename) {
@@ -202,6 +207,9 @@ namespace feature_generation {
       const auto &graph = graphs[graph_i];
       std::unordered_map<int, int> histogram;
       int n_nodes = graph.nodes.size();
+      n_seen_graphs++;
+      n_seen_nodes += n_nodes;
+      n_seen_edges += graph.get_n_edges();
       std::vector<int> colours(n_nodes);
       for (int node_i = 0; node_i < n_nodes; node_i++) {
         cur_collecting_layer = 0;
@@ -211,6 +219,7 @@ namespace feature_generation {
         }
         histogram[col]++;
         colours[node_i] = col;
+        seen_initial_colours.insert(col);
       }
       graph_histograms.push_back(histogram);
       graph_colours.push_back(colours);
@@ -293,6 +302,23 @@ namespace feature_generation {
     }
   }
 
+  std::string WLFeatures::get_string_representation(const Embedding &embedding) {
+    std::string str_embed = "";
+    for (size_t i = 0; i < embedding.size(); i++) {
+      int count = embedding[i];
+      if (count == 0) {
+        continue;
+      }
+      str_embed += std::to_string(i) + "." + std::to_string(count) + ".";
+    }
+    return str_embed;
+  }
+
+  std::string WLFeatures::get_string_representation(const planning::State &state) {
+    Embedding x = embed(state);
+    return get_string_representation(x);
+  }
+
   std::vector<Embedding> WLFeatures::embed(const data::Dataset &dataset) {
     std::vector<graph::Graph> graphs = convert_to_graphs(dataset);
     if (graphs.size() == 0) {
@@ -422,8 +448,8 @@ namespace feature_generation {
     return int_colour_hash;
   }
 
-  std::unordered_map<std::string, int>
-  WLFeatures::int_to_str_colour_hash(std::unordered_map<std::vector<int>, int, int_vector_hasher> int_colour_hash) const {
+  std::unordered_map<std::string, int> WLFeatures::int_to_str_colour_hash(
+      std::unordered_map<std::vector<int>, int, int_vector_hasher> int_colour_hash) const {
     std::unordered_map<std::string, int> str_colour_hash;
     for (const auto &pair : int_colour_hash) {
       std::string colour_str = "";
diff --git a/src/graph/graph_generator_factory.cpp b/src/graph/graph_generator_factory.cpp
index 81de4f7..acc1119 100644
--- a/src/graph/graph_generator_factory.cpp
+++ b/src/graph/graph_generator_factory.cpp
@@ -6,7 +6,7 @@ namespace graph {
   std::shared_ptr<GraphGenerator> create_graph_generator(const std::string &name,
                                                          const planning::Domain &domain) {
     if (name == "ilg") {
-      return std::make_shared<ILGGenerator>(domain);
+      return std::make_shared<ILGGenerator>(domain, false);
     } else if (name == "custom") {
       return NULL;
     } else {
diff --git a/src/graph/ilg_generator.cpp b/src/graph/ilg_generator.cpp
index 53498d5..c22bca4 100644
--- a/src/graph/ilg_generator.cpp
+++ b/src/graph/ilg_generator.cpp
@@ -8,13 +8,17 @@ char const *fact_description_name[] = {ILG_FACT_DESCRIPTIONS};
 #define to_atom_node(x) x.to_string()
 
 namespace graph {
-  ILGGenerator::ILGGenerator(const planning::Domain &domain)
-      : domain(domain), predicate_to_colour(domain.predicate_to_colour) {
-    /// initialise initial node colours
-    // add constant object colours
-    for (size_t i = 0; i < domain.constant_objects.size(); i++) {
-      int colour = -(i + 1);
-      colour_to_description[colour] = "_constant_object_ " + domain.constant_objects[i];
+  ILGGenerator::ILGGenerator(const planning::Domain &domain, bool differentiate_constant_objects)
+      : domain(domain),
+        predicate_to_colour(domain.predicate_to_colour),
+        differentiate_constant_objects(differentiate_constant_objects) {
+    // initialise initial node colours
+    if (differentiate_constant_objects) {
+      // add constant object colours
+      for (size_t i = 0; i < domain.constant_objects.size(); i++) {
+        int colour = -(i + 1);
+        colour_to_description[colour] = "_constant_object_ " + domain.constant_objects[i];
+      }
     }
 
     colour_to_description[0] = "_object_";
@@ -37,31 +41,37 @@ namespace graph {
     this->problem = std::make_shared<planning::Problem>(problem);
 
     /// add nodes
+    int colour;
+
     // add constant object nodes
     for (size_t i = 0; i < problem.get_constant_objects().size(); i++) {
       std::string node = to_obj_node(domain.constant_objects[i]);
-      int colour = -(i + 1);
+      if (differentiate_constant_objects) {
+        colour = -(i + 1);
+      } else {
+        colour = 0;
+      }
       graph.add_node(node, colour);
     }
 
     // objects
     for (const auto &object : problem.get_problem_objects()) {
       std::string node = to_obj_node(object);
-      int colour = 0;
+      colour = 0;
       graph.add_node(node, colour);
     }
 
     // atoms
     for (const auto &atom : problem.get_positive_goals()) {
       std::string node = to_atom_node(atom);
-      int colour = fact_colour(atom, ILGFactDescription::F_POS_GOAL);
+      colour = fact_colour(atom, ILGFactDescription::F_POS_GOAL);
       graph.add_node(node, colour);
       positive_goal_names.insert(node);
     }
 
     for (const auto &atom : problem.get_negative_goals()) {
       std::string node = to_atom_node(atom);
-      int colour = fact_colour(atom, ILGFactDescription::F_NEG_GOAL);
+      colour = fact_colour(atom, ILGFactDescription::F_NEG_GOAL);
       graph.add_node(node, colour);
       negative_goal_names.insert(node);
     }
@@ -116,7 +126,8 @@ namespace graph {
           neg_goal_changed_pred.push_back(pred_idx);
         }
       } else {
-        atom_node = graph->add_node(atom_node_str, fact_colour(pred_idx, ILGFactDescription::NON_GOAL));
+        atom_node =
+            graph->add_node(atom_node_str, fact_colour(pred_idx, ILGFactDescription::NON_GOAL));
         if (store_changes) {
           n_nodes_added++;
         }
@@ -138,11 +149,14 @@ namespace graph {
 
   void ILGGenerator::reset_graph() const {
     for (size_t i = 0; i < pos_goal_changed.size(); i++) {
-      base_graph->change_node_colour(pos_goal_changed[i], fact_colour(pos_goal_changed_pred[i], ILGFactDescription::F_POS_GOAL));
+      base_graph->change_node_colour(
+          pos_goal_changed[i],
+          fact_colour(pos_goal_changed_pred[i], ILGFactDescription::F_POS_GOAL));
     }
 
     for (const auto &node : neg_goal_changed) {
-      base_graph->change_node_colour(node, fact_colour(neg_goal_changed_pred[node], ILGFactDescription::F_NEG_GOAL));
+      base_graph->change_node_colour(
+          node, fact_colour(neg_goal_changed_pred[node], ILGFactDescription::F_NEG_GOAL));
     }
 
     for (int i = 0; i < n_nodes_added; i++) {
diff --git a/src/main.cpp b/src/main.cpp
index 0fb49eb..62a02e3 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -199,6 +199,10 @@ wl_features
         "graphs"_a)
   .def("set_problem", &feature_generation::WLFeatures::set_problem,
         "problem"_a)
+  .def("get_string_representation", py::overload_cast<const feature_generation::Embedding &>(&feature_generation::WLFeatures::get_string_representation),
+        "embedding"_a)
+  .def("get_string_representation", py::overload_cast<const planning::State &>(&feature_generation::WLFeatures::get_string_representation),
+        "state"_a)
   .def("embed", py::overload_cast<const data::Dataset &>(&feature_generation::WLFeatures::embed), 
         "dataset"_a)
   .def("embed", py::overload_cast<const std::vector<graph::Graph> &>(&feature_generation::WLFeatures::embed),
@@ -208,6 +212,11 @@ wl_features
   .def("get_n_features", &feature_generation::WLFeatures::get_n_features)
   .def("get_seen_counts", &feature_generation::WLFeatures::get_seen_counts)
   .def("get_unseen_counts", &feature_generation::WLFeatures::get_unseen_counts)
+  .def("get_n_seen_graphs", &feature_generation::WLFeatures::get_n_seen_graphs)
+  .def("get_n_seen_nodes", &feature_generation::WLFeatures::get_n_seen_nodes)
+  .def("get_n_seen_edges", &feature_generation::WLFeatures::get_n_seen_edges)
+  .def("get_n_seen_initial_colours", &feature_generation::WLFeatures::get_n_seen_initial_colours)
+  .def("get_n_seen_refined_colours", &feature_generation::WLFeatures::get_n_seen_refined_colours)
   .def("set_weights", &feature_generation::WLFeatures::set_weights,
         "weights"_a)
   .def("get_weights", &feature_generation::WLFeatures::get_weights)
diff --git a/tests/custom_graph_test.py b/tests/custom_graph_test.py
index 317c657..cf354af 100644
--- a/tests/custom_graph_test.py
+++ b/tests/custom_graph_test.py
@@ -13,7 +13,9 @@
 
 def test_blocksworld_random_path():
     random.seed(0)
+    LOGGER.info("Getting raw dataset")
     domain, dataset, _ = get_raw_dataset(domain_name="blocksworld", keep_statics=False)
+    LOGGER.info("Constructing feature generator")
     feature_generator = WLFeatures(
         domain=domain,
         graph_representation=None,
@@ -21,6 +23,7 @@ def test_blocksworld_random_path():
         prune_features=None,
     )
     graphs = []
+    LOGGER.info("Converting to random path graphs")
     for _, states in dataset:
         for state in states:
             G = nx.Graph()
@@ -31,7 +34,9 @@ def test_blocksworld_random_path():
             G = from_networkx(G)
             graphs.append(G)
             G.dump()
+    LOGGER.info("Collecting features")
     feature_generator.collect(graphs)
+    LOGGER.info("Embedding")
     X = np.array(feature_generator.embed(graphs)).astype(float)
     n_features = feature_generator.get_n_features()
     assert X.shape[1] == n_features
diff --git a/wlplan/__version__.py b/wlplan/__version__.py
index dd9b22c..7225152 100644
--- a/wlplan/__version__.py
+++ b/wlplan/__version__.py
@@ -1 +1 @@
-__version__ = "0.5.1"
+__version__ = "0.5.2"
diff --git a/wlplan/feature_generation.py b/wlplan/feature_generation.py
index a3cc8a4..6ce230c 100644
--- a/wlplan/feature_generation.py
+++ b/wlplan/feature_generation.py
@@ -20,7 +20,7 @@ class WLFeatures(_WLFeatures):
         iterations : int, default=2
             The number of WL iterations to perform.
 
-        prune_features : "collapse", "collapse_by_layer" or None, default="collapse"
+        prune_features : "collapse", "collapse_by_layer" or None, default=None
             How to detect and prune duplicate features. If None, no pruning is done.
 
         multiset_hash : bool, default=False
@@ -55,6 +55,21 @@ class WLFeatures(_WLFeatures):
         get_unseen_counts(self) -> List[int]
             Returns a list of length `iterations` with the count of unseen colours at each iteration. Counts are from colours not seen from `collect` calls. The values are collected over all `embed` calls from the initialisation of this class.
 
+        get_n_seen_graphs -> int
+            Returns the number of training graphs collected from `collect` calls.
+
+        get_n_seen_nodes -> int
+            Returns the number of training nodes collected from `collect` calls.
+
+        get_n_seen_edges -> int
+            Returns the number of training edges collected from `collect` calls.
+
+        get_n_seen_initial_colours -> int
+            Returns the number of initial colours collected from `collect` calls.
+
+        get_n_seen_refined_colours -> int
+            Returns the number of refined colours collected from `collect` calls.
+
         set_weights(self, weights: Union[list[float], list[int]]) -> None
             Set the weights to predict heuristics directly with this class. The weights must be a list of floats, integers or a numpy array of floats. The length of the weights must be the same as the number of features collected.
 
@@ -76,7 +91,7 @@ def __init__(
         domain: Domain,
         graph_representation: Optional[str] = "ilg",
         iterations: int = 2,
-        prune_features: Optional[str] = "no_prune",
+        prune_features: Optional[str] = None,
         multiset_hash: bool = False,
         **kwargs,
     ) -> None: