Skip to content

Commit

Permalink
Perform batch hydration in O(N)
Browse files Browse the repository at this point in the history
This hadn't been noticed previously since N is generally small, eg. 50
or 60 at most. Working with a Metabase dashboard that loaded 10000
fields (100 each from 100 tables) in a single `select` + `hydrate`, I
found this was consuming about 3 seconds.

The slow path was effectively a very slow `conj`: taking the `acc`
vector, and then for each `annotated-instances` doing:

```clojure
(recur (vec (concat acc nil [(first hydrated-instances)]))
       ...)
```

which is pouring the whole vector into a lazy sequence and back into a
vector, once for each instance in the list.

The new version first creates an "index" of instances that need hydration, and
later uses that index to map the instances in the hydrated instances batch to
their places in the initial collection.

Co-authored-by: Braden Shepherdson <braden@metabase.com>
  • Loading branch information
alexander-yakushev and bshepherdson committed Sep 20, 2024
1 parent e14a45e commit e5f311d
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 63 deletions.
59 changes: 20 additions & 39 deletions src/toucan2/tools/hydrate.clj
Original file line number Diff line number Diff line change
Expand Up @@ -430,48 +430,29 @@
;;; `:needs-hydration?`, replace it with the first hydrated instance; repeat this process until we have spliced all
;;; the hydrated instances back in.

(defn- merge-hydrated-instances
"Merge the annotated instances as returned by [[annotate-instances]] and the hydrated instances as returned
by [[hydrate-annotated-instances]] back into a single un-annotated sequence.
(merge-hydrated-instances
[{:needs-hydration? false, :instance {:x 1, :y 1}}
{:needs-hydration? false, :instance {:x 2, :y 2}}
{:needs-hydration? true, :instance {:x 3}}
{:needs-hydration? true, :instance {:x 4}}
{:needs-hydration? false, :instance {:x 5, :y 5}}
{:needs-hydration? true, :instance {:x 6}}]
[{:x 3, :y 3} {:x 4, :y 4} {:x 6, :y 6}])
=>
[{:x 1, :y 1}
{:x 2, :y 2}
{:x 3, :y 3}
{:x 4, :y 4}
{:x 5, :y 5}
{:x 6, :y 6}]"
[annotated-instances hydrated-instances]
(loop [acc [], annotated-instances annotated-instances, hydrated-instances hydrated-instances]
(if (empty? hydrated-instances)
(concat acc (map :instance annotated-instances))
(let [[not-hydrated [_needed-hydration & more]] (split-with (complement :needs-hydration?) annotated-instances)]
(recur (vec (concat acc (map :instance not-hydrated) [(first hydrated-instances)]))
more
(rest hydrated-instances))))))

(defn- annotate-instances [model k instances]
(for [instance instances]
{:needs-hydration? (needs-hydration? model k instance)
:instance instance}))

(defn- hydrate-annotated-instances [model k annotated-instances]
(when-let [instances-that-need-hydration (not-empty (map :instance (filter :needs-hydration? annotated-instances)))]
(batched-hydrate model k instances-that-need-hydration)))
(defn- index-instances-needing-hydration
"Given a list of instances, return a list of the same length and order where each element is a tuple `[i instance]`.
For instances that need hydration, `i` represents the serial number of instances needing hydration. For instances that
don't need hydration, `i` is nil."
[model k instances]
(let [idx (volatile! -1)]
(mapv (fn [instance]
[(when (needs-hydration? model k instance)
(vswap! idx inc))
instance])
instances)))

(m/defmethod hydrate-with-strategy ::multimethod-batched
[model _strategy k instances]
(let [annotated-instances (annotate-instances model k instances)
hydrated-instances (hydrate-annotated-instances model k annotated-instances)]
(merge-hydrated-instances annotated-instances hydrated-instances)))
(let [indexed (index-instances-needing-hydration model k instances)
instances-to-hydrate (not-empty (map second (filter first indexed))) ;; Important: keep doesn't work here.
hydrated-instances (when instances-to-hydrate
(vec (batched-hydrate model k instances-to-hydrate)))]
(mapv (fn [[i unhydrated-instance]]
(if i
(nth hydrated-instances i)
unhydrated-instance))
indexed)))


;;; Method-Based Simple Hydration (using impls of [[simple-hydrate]])
Expand Down
40 changes: 16 additions & 24 deletions test/toucan2/tools/hydrate_test.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
(ns toucan2.tools.hydrate-test
(:require
[clojure.math.combinatorics :as math.combo]
[clojure.test :refer :all]
[methodical.core :as m]
[toucan2.connection :as conn]
Expand Down Expand Up @@ -352,29 +351,22 @@
(is (= {:f [:a 100]}
(hydrate/hydrate {:f [:a 100]} :p)))))

(deftest ^:parallel merge-hydrated-instances-test
(doseq [[a b c d e f :as order] (take 1 (math.combo/permutations (range 1 7)))
:let [annotated-instances (map (fn [i]
(case (long i)
1 {:needs-hydration? false, :instance {:x 1, :y 1}}
2 {:needs-hydration? false, :instance {:x 2, :y 2}}
3 {:needs-hydration? true, :instance {:x 3}}
4 {:needs-hydration? true, :instance {:x 4}}
5 {:needs-hydration? false, :instance {:x 5, :y 5}}
6 {:needs-hydration? true, :instance {:x 6}}))
order)
hydrated-instances (->> order
(filter #{3 4 6})
(map (fn [i]
{:x i, :y i})))]]
(testing (pr-str order)
(is (= [{:x a, :y a}
{:x b, :y b}
{:x c, :y c}
{:x d, :y d}
{:x e, :y e}
{:x f, :y f}]
(#'hydrate/merge-hydrated-instances annotated-instances hydrated-instances))))))
(deftest ^:synchronized index-instances-needing-hydration-test
(let [instances [{:x 1, :y 1}
{:x 2, :y 2}
{:x 3}
{:x 4}
{:x 5, :y 5}
{:x 6}]]
(with-redefs [hydrate/needs-hydration? (fn [_ _ instance]
(= (count instance) 1))]
(is (= [[nil {:x 1, :y 1}]
[nil {:x 2, :y 2}]
[0 {:x 3}]
[1 {:x 4}]
[nil {:x 5, :y 5}]
[2 {:x 6}]]
(#'hydrate/index-instances-needing-hydration nil nil instances))))))

(m/defmethod hydrate/batched-hydrate [:default ::is-bird?]
[_model _k rows]
Expand Down

0 comments on commit e5f311d

Please sign in to comment.