From f0bb380c9cb0a86e4d471a0399b1842d1603b11b Mon Sep 17 00:00:00 2001
From: Waqas Ali <waqas.abbasi@outlook.com>
Date: Wed, 12 Jan 2022 15:00:06 +0800
Subject: [PATCH] Fix bug where g/->dataset doesn't work for more than 8
 columns (#340)

---
 .../zero_one/geni/core/dataset_creation.clj   |  8 +++++++-
 test/zero_one/geni/dataset_creation_test.clj  | 20 ++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/clojure/zero_one/geni/core/dataset_creation.clj b/src/clojure/zero_one/geni/core/dataset_creation.clj
index ebf0236..14621ef 100644
--- a/src/clojure/zero_one/geni/core/dataset_creation.clj
+++ b/src/clojure/zero_one/geni/core/dataset_creation.clj
@@ -164,6 +164,8 @@
    For list of maps, it returns a list of one map with first non-nil value for each nested key.
      
      Examples:
+     []                                          => []
+     [nil nil]                                   => []
      [1 2 3]                                     => [1]
      [nil [1 2]]                                 => [[1]]
      [{:a 1} {:a 3 :b true}]                     => [{:a 1 :b true}]
@@ -186,6 +188,10 @@
    The sample non-nil value can be generated using first-non-nil function above.
    
      Examples:
+     [] | []
+      => []
+     [nil nil] | []
+      => [nil nil]
      [1 2 3] | [1]
       => [1 2 3]
      [nil [1 2]] | [[1]]
@@ -235,7 +241,7 @@
      (let [col-names  (map name col-names)
            transposed (transpose table)
            values     (map first-non-nil transposed)
-           table      (transpose (map (partial apply fill-missing-nested-keys) (zipmap transposed values)))
+           table      (transpose (map (partial apply fill-missing-nested-keys) (map vector transposed values)))
            rows       (interop/->java-list (map interop/->spark-row (transform-maps table)))
            schema     (infer-schema col-names (map first values))]
        (.createDataFrame spark rows schema)))))
diff --git a/test/zero_one/geni/dataset_creation_test.clj b/test/zero_one/geni/dataset_creation_test.clj
index 5fb8054..7480749 100644
--- a/test/zero_one/geni/dataset_creation_test.clj
+++ b/test/zero_one/geni/dataset_creation_test.clj
@@ -227,7 +227,25 @@
       (instance? Dataset dataset) => true
       (g/collect-vals dataset) => [[0 [[{:z 1 :h nil :g nil} {:z 2 :h nil :g nil}]
                                        [{:z nil :h true :g nil}]]]
-                                   [1 [[{:z nil :h nil :g 3.0}]]]])))
+                                   [1 [[{:z nil :h nil :g 3.0}]]]]))
+  (fact "should work for several number of columns"
+    (let [dataset (g/records->dataset
+                   @tr/spark
+                   [{:a 1  :b 2  :c 3  :d 4  :e 5  :f 6  :g 7  :h 8  :i 9}
+                    {:a 10 :b 11 :c 12 :d 13 :e 14 :f 15 :g 16 :h 17 :i 18}])]
+      (instance? Dataset dataset) => true
+      (g/collect dataset) => [{:a 1  :b 2  :c 3  :d 4  :e 5  :f 6  :g 7  :h 8  :i 9}
+                              {:a 10 :b 11 :c 12 :d 13 :e 14 :f 15 :g 16 :h 17 :i 18}]))
+  (fact "should work for nil and empty values"
+    (let [dataset (g/records->dataset
+                   @tr/spark
+                   [{:i nil :s []        :b []}
+                    {:i nil :s [nil nil] :b []}
+                    {:i nil :s nil       :b []}])]
+      (instance? Dataset dataset) => true
+      (g/collect-vals dataset) => [[nil []        []]
+                                   [nil [nil nil] []]
+                                   [nil nil       []]])))
 
 (facts "On table->dataset"
   (fact "should create the right dataset"