diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c1e9646
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+/target
+/*.iml
+/.*
+!/.gitignore
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..1914ab6
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,6 @@
+# Change Log
+
+## 0.1.0 - 2018-03-16
+### Added
+- Conversion of MSD files into H2
+- README with usage instructions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..ad88984
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,16 @@
+Copyright (c) 2018 Nicolas Duchenne, Belove Ltd, London, UK
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+and associated documentation files (the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge, publish, distribute,
+sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or
+substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..dfb6b71
--- /dev/null
+++ b/README.md
@@ -0,0 +1,119 @@
+# MSD Lyrics SQL database 
+
+A command line tool to load the lyrics subset of the [Million Song Dataset](https://labrosa.ee.columbia.edu/millionsong/) 
+into an [H2 SQL database](http://www.h2database.com/html/main.html).
+
+A SQL database makes it easy to inspect, clean, aggregate, filter and slice the dataset, via a GUI or programmatically.
+ 
+ 
+## Installation
+
+ 
+Install [Java](http://java.com/en/download/), download the jar file from the release page of this repository and follow the instructions below. 
+
+Alternatively, you can clone this repository and run the code with [Leiningen](http://leiningen.org/), the build automation tool 
+for [Clojure](http://clojure.org). Start by editing the `msd.edn` file in the project root (see further below) and then execute 
+the code with `lein run`.
+
+You don't need to install H2 on your machine to run the program. The database engine is embedded in the program. 
+
+However, you'll need some H2 compatible tool to view the data. An option is to [install H2](http://www.h2database.com/html/download.html) 
+and use the included [console application](http://www.h2database.com/html/quickstart.html).
+Another is to use an H2 compatible front-end such as [DataGrip](https://www.jetbrains.com/datagrip/).
+
+
+
+
+## Usage
+
+Gather the following files from the [Million Song Dataset](https://labrosa.ee.columbia.edu/millionsong/) and place them in 
+a same directory (while you're browsing the websites, check the licensing/citing terms for the various subsets):
+
+- from https://labrosa.ee.columbia.edu/millionsong/musixmatch:
+  - mxm_779k_matches.txt
+  - mxm_dataset_test.txt
+  - mxm_dataset_train.txt
+  - mxm_reverse_mapping.txt
+- from https://labrosa.ee.columbia.edu/millionsong/pages/getting-dataset:   
+  - tracks_per_year.txt
+- from http://www.ifs.tuwien.ac.at/mir/msd/download.html#groundtruth: 
+  - msd-MASD-styleAssignment.cls
+
+Place the jar file into the same directory and run:
+
+    $ java -jar msd-to-h2-0.1.0-standalone.jar
+
+Give it a few minutes to create the output files:
+
+    Creating the csv files...
+    Creating the database. Just a bit of patience.
+    Creating primary tables...
+    Creating indexes...
+    Creating derived tables...
+    All done!
+    The 3 artists with the largest vocabulary in the Million Song Dataset are Aesop Rock with 2555 words, Eminem with 2526 words, Cypress Hill with 2476 words
+
+
+
+
+## Outputs
+
+The program runs in two stages. 
+
+Firsty, the program converts the original MSD files into CSV files. Words and tracks are given new unique integer ids, 
+and files that relate to each others are consolidated (e.g. tracks + track years + track genres).
+  
+Secondly, the program uploads the resulting CSV files into a new H2 database. Tables are created for tracks, words and the track/word matrix. 
+The tool also creates a table of artists (based  on the MusicXMatch artist names in the dataset) with aggregate track count, vocabulary count, 
+and year range for each artist. This list of artists is preliminary and is meant to help prioritize data cleaning, rather than being used as is. 
+
+
+
+
+## Options
+
+Rather than having all input and output files in the same directory, it is possible to specify different locations for input files, csv outputs files 
+and the database. To do this, create an [edn file](https://learnxinyminutes.com/docs/edn/) called `msd.edn` with the following keys:
+
+- `:in`  directory of the MSD input files 
+- `:csv` directory of the csv output files
+- `:db` output database file according to 
+[H2's URL format for an embedded database](http://www.h2database.com/html/features.html#database_url), without the `jdbc:h2:` prefix. 
+The code was tested for a location relative to the `msd.edn` file (`./`) 
+or to the user's home directory (`~/`).
+
+For example: 
+     
+     {:in   "/data/msd/source/" 
+      :csv  "/data/msd/csv/" 
+      :db   "~/h2data/msd"}
+ 
+You can then run `java -jar msd-to-h2-0.1.0-standalone.jar` at the location of the `msd.edn` file.
+
+If there is no `msd.edn` file (like earlier) then the program defaults to the following parameters:
+
+    {:in   "./" 
+     :csv  "./" 
+     :db   "./msd"}
+
+
+
+
+## Other SQL Engines
+
+Amending the code to accommodate other SQL engines should be straightforward
+. 
+You'll have to:
+
+- [ ] change the database driver dependency in `project.clj` 
+- [ ] adapt the database spec in `src/intoh2/core.clj` (look for`db-spec` and `create-database`)
+- [ ] adapt the scripts in `resources/sql` to the SQL dialect of the new database  
+
+
+
+
+## License
+
+Copyright © 2018 Nicolas Duchenne, Belove Ltd, London, UK
+
+Released under the [MIT License](https://opensource.org/licenses/MIT).
diff --git a/msd.edn b/msd.edn
new file mode 100644
index 0000000..6403ad7
--- /dev/null
+++ b/msd.edn
@@ -0,0 +1,3 @@
+{:in  "/data/millionsong/dataset/extract"
+ :csv "/data/millionsong/dataset/csv"
+ :db "~/data/millionsong/dataset/h2/msd;AUTO_SERVER=TRUE;AUTO_SERVER_PORT=9091"}
\ No newline at end of file
diff --git a/project.clj b/project.clj
new file mode 100644
index 0000000..f360119
--- /dev/null
+++ b/project.clj
@@ -0,0 +1,18 @@
+(defproject msd-to-h2 "0.1.0"
+  :description "Converts lyrics files in the Million Song Dataset into an H2 database."
+  :url "https://github.com/belovehq/msd-to-h2"
+  :license {:name "MIT License"
+            :url  "https://opensource.org/licenses/MIT"}
+  :dependencies [[org.clojure/clojure "1.9.0"]
+                 [com.layerware/hugsql "0.4.8"]
+                 [com.h2database/h2 "1.4.195"]]
+
+  :source-paths ["src"]
+  :target-path "target/%s"
+  :resource-paths ["resources"]
+
+  :main ^:skip-aot intoh2.core
+
+  :profiles {:uberjar {:aot :all}})
+
+
diff --git a/resources/sql/derivedtables.sql b/resources/sql/derivedtables.sql
new file mode 100644
index 0000000..d10e5f1
--- /dev/null
+++ b/resources/sql/derivedtables.sql
@@ -0,0 +1,39 @@
+-- Create and populate artists table
+
+CREATE TABLE msdartists (
+  artistid      INT PRIMARY KEY AUTO_INCREMENT,
+  mxmartistname VARCHAR(55),
+  trackcount    INT,
+  vocabulary    INT,
+  fromyear      INT,
+  toyear        INT
+);
+
+
+INSERT INTO msdartists (mxmartistname, trackcount, vocabulary, fromyear, toyear)
+  SELECT
+    t.mxmartistname           AS mxmartistname,
+    COUNT(DISTINCT t.trackid) AS trackcount,
+    COUNT(DISTINCT m.wordid)  AS vocabulary,
+    MIN(t.trackyear)          AS fromyear,
+    MAX(t.trackyear)          AS toyear
+  FROM msdtracks t
+    INNER JOIN matrix M ON M.trackid = t.trackid
+  GROUP BY mxmartistname
+  ORDER BY mxmartistname;
+
+CREATE INDEX IX_msdartists_artistname
+  ON msdartists (mxmartistname);
+
+-- -- Create and populate genres table
+
+CREATE TABLE msdgenres (
+  genreid   INT AUTO_INCREMENT PRIMARY KEY,
+  masdgenre VARCHAR(20)
+);
+
+INSERT INTO msdgenres (masdgenre)
+  SELECT DISTINCT masdgenre
+  FROM msdtracks
+  WHERE masdgenre IS NOT NULL
+  ORDER BY masdgenre;
diff --git a/resources/sql/hello.sql b/resources/sql/hello.sql
new file mode 100644
index 0000000..067a0ce
--- /dev/null
+++ b/resources/sql/hello.sql
@@ -0,0 +1,7 @@
+-- :name hello :query :one
+SELECT CONCAT('The 3 artists with the largest vocabulary in the Million Song Dataset are ',
+              GROUP_CONCAT(s SEPARATOR ', ')) out
+FROM
+  (SELECT TOP 3 CONCAT(mxmartistname, ' with ', vocabulary, ' words') s
+   FROM msdartists
+   ORDER BY vocabulary DESC);
\ No newline at end of file
diff --git a/resources/sql/indexes.sql b/resources/sql/indexes.sql
new file mode 100644
index 0000000..d434f2d
--- /dev/null
+++ b/resources/sql/indexes.sql
@@ -0,0 +1,12 @@
+-- indexes on msdtracks
+CREATE PRIMARY KEY ON msdtracks (trackid);
+CREATE UNIQUE INDEX ix_msdtracks_entrackid ON msdtracks (entrackid);
+CREATE INDEX ix_msdtracks_masdgenre ON msdtracks (masdgenre);
+CREATE INDEX ix_msdtracks_mxmartistname ON msdtracks (mxmartistname);
+
+-- index on msdwords
+CREATE PRIMARY KEY ON msdwords (wordid);
+
+-- indexes on matrix
+CREATE PRIMARY KEY ON matrix (trackid, wordid);
+CREATE INDEX ix_matrix_wordid_trackid ON matrix (wordid, trackid);
diff --git a/resources/sql/primarytables.sql b/resources/sql/primarytables.sql
new file mode 100644
index 0000000..72cf966
--- /dev/null
+++ b/resources/sql/primarytables.sql
@@ -0,0 +1,37 @@
+-- Command definitions for HughSQL.
+
+-- :name create-tracks-table :execute :raw
+CREATE TABLE msdtracks (
+  trackid       INT NOT NULL,
+  entrackid     VARCHAR(18),
+  mxmtrackid    INT,
+  istest          INT,
+  entrackttitle VARCHAR(250),
+  mxmtracktitle VARCHAR(180),
+  enartistname  VARCHAR(100),
+  mxmartistname VARCHAR(55),
+  trackyear     INT,
+  masdgenre VARCHAR(20)
+) AS
+  SELECT *
+  FROM CSVREAD(:sql:file);
+
+-- :name create-words-table :execute :raw
+CREATE TABLE msdwords (
+  wordid INT NOT NULL,
+  stem   VARCHAR(15),
+  word   VARCHAR(15)
+) AS
+  SELECT *
+  FROM CSVREAD(:sql:file);
+
+-- :name create-matrix-table :execute :raw
+CREATE TABLE matrix (
+  trackid INT NOT NULL,
+  wordid  INT NOT NULL,
+  count   INT NOT NULL
+) AS
+  SELECT *
+  FROM CSVREAD(:sql:file);
+
+
diff --git a/resources/sql/safety.sql b/resources/sql/safety.sql
new file mode 100644
index 0000000..265c994
--- /dev/null
+++ b/resources/sql/safety.sql
@@ -0,0 +1,3 @@
+-- re-enable database logs
+SET LOG 1;
+SET UNDO_LOG 1;
\ No newline at end of file
diff --git a/resources/sql/speed.sql b/resources/sql/speed.sql
new file mode 100644
index 0000000..efb8632
--- /dev/null
+++ b/resources/sql/speed.sql
@@ -0,0 +1,3 @@
+-- momentarily disable database logs
+SET LOG 0;
+SET UNDO_LOG 0;
diff --git a/src/intoh2/core.clj b/src/intoh2/core.clj
new file mode 100644
index 0000000..313032a
--- /dev/null
+++ b/src/intoh2/core.clj
@@ -0,0 +1,316 @@
+(ns intoh2.core
+  (:require [clojure.java.io :refer [reader writer file resource as-file]]
+            [clojure.string :as s]
+            [intoh2.sql :as sql]
+            [clojure.java.jdbc :refer [with-db-connection]])
+  (:gen-class))
+
+
+
+; specs
+
+(def msd-spec
+  "the map of source MSD files"
+  {:years   {:name "tracks_per_year.txt"}
+   :stems   {:name "mxm_reverse_mapping.txt"}
+   :matches {:name "mxm_779k_matches.txt"}
+   :genres  {:name "msd-MASD-styleAssignment.cls" :separator "\t"}
+   :train   {:name "mxm_dataset_train.txt"}
+   :test    {:name "mxm_dataset_test.txt"}})
+
+
+(def csv-spec
+  "the map of CSV files that will be produced from the MSD files before loading into H2"
+  {:years        {:name   "msdyears.csv"
+                  :header ["year" "entrackid" "artistname" "tracktitle"]}
+   :stems        {:name   "mxmstems.csv"
+                  :header ["stem" "word"]}
+   :matches      {:name   "mxmmatches.csv"
+                  :header ["entrackid" "enartistname" "entracktitle"
+                           "mxmtrackid" "mxmartistname" "mxmtracktitle"]}
+   :genres       {:name   "masdgenres.csv"
+                  :header ["entrackid", "masdgenre"]}
+   :words        {:name   "mxmwords.csv"
+                  :header ["wordid" "stem"]}
+   :tracks       {:name   "mxmtracks.csv"
+                  :header ["trackid" "entrackid" "mxmtrackid" "test"]}
+   :final-pairs  {:name   "h2matrix.csv"
+                  :header ["trackid" "wordid" "count"]}
+   :final-words  {:name   "h2words.csv"
+                  :header ["wordid" "stem" "word"]}
+   :final-tracks {:name   "h2tracks.csv"
+                  :header ["trackid" "entrackid" "mxmtrackid" "test" "entracktitle"
+                           "mxmtracktitle" "enartistname" "mxmartistname" "trackyear" "masdgenre"]}})
+
+
+(def db-spec
+  "Base h2 database spec"
+  {:classname      "org.h2.Driver"
+   :subprotocol    "h2"
+   :subname        ""
+   :user           ""
+   :password       ""
+   :DB_CLOSE_DELAY "-1"})
+
+
+(defn pathify [s]
+  "Append a slash at the end of a string if none is present"
+  (let [t (s/trim s)]
+    (if (or (= t "") (#{\/ \\} (last t))) t (str t "/"))))
+
+
+(def path-spec
+  "the map of input and output paths:
+  :in the path of the input msd files
+  :csv the path where to create csv files
+  :db the database file to create"
+  (try
+    (-> (clojure.edn/read-string (slurp "msd.edn"))
+        (update-in [:in] pathify)
+        (update-in [:csv] pathify))
+    (catch Exception e
+      {:in  "./"
+       :csv "./"
+       :db  "./msd"})))
+
+
+
+; pure functions
+
+
+(defn vec-to-csv
+  "convert a vector to a csv line, incl. line break"
+  [v] (apply str (concat (interpose "," v) "\n")))
+
+
+(defn cs-to-vec
+  "convert a comma separated string to a vector of strings"
+  [s] (s/split s #","))
+
+
+(defn msd-full-path
+  "return the full path specified for the csv file that has key k"
+  [k] (str (:in path-spec) (get-in msd-spec [k :name])))
+
+
+(defn csv-full-path
+  "return the full path specified for the csv file that has key k"
+  [k] (str (:csv path-spec) (get-in csv-spec [k :name])))
+
+
+(defn csv-header
+  "return the comma-separated header specified for the csv file that has key k"
+  [k] (vec-to-csv (get-in csv-spec [k :header])))
+
+
+(defn msd-separator
+  "return the separator specified for the csv file that has key k"
+  [k] (let [sep (get-in msd-spec [k :separator])]
+        (if (nil? sep) "<SEP>" sep)))
+
+
+(defn tabular-to-csv
+  "convert a tabular line (with separator sep) to a CSV line. Insert id at start of line if supplied. Also replace double quotes with simple quotes."
+  [sep line id]
+  (if (= (first line) \#) ""
+                          (str (if id (str id ","))
+                               (-> line
+                                   (s/replace sep ",")
+                                   (s/replace \" \'))
+                               "\n")))
+
+
+(defn seq-of-ids
+  "return an infinite sequence of int starting at 1 if b is true, and of nil otherwise."
+  [b] (if b (rest (range))
+            (repeat nil)))
+
+
+
+; read and write
+
+
+(defn convert-msd-file-to-csv
+  "convert a tabular MSD file into a CSV file.
+   in: path of the msd file
+   out: path of the csv file to produce
+   sep: separator in the MSD file
+   header: header of the csv file, comma separated and including line break
+   insertid: if true then insert an incremental id at the beginning of each row"
+  [in out sep header insertids]
+  (with-open [rdr (reader in) wtr (writer out)]
+    (if header (.write wtr header))
+    (doseq [line (map (partial tabular-to-csv sep) (line-seq rdr) (seq-of-ids insertids))]
+      (.write wtr line))))
+
+
+(defn convert-msd-files-to-csv
+  "convert a set of tabular MSD files into CSV files.
+   keys: sequence of file keys"
+  ([keys] (doseq [k keys]
+            (let [in (msd-full-path k)
+                  out (csv-full-path k)
+                  sep (msd-separator k)
+                  header (csv-header k)]
+              (convert-msd-file-to-csv in out sep header nil)))))
+
+
+(defn write-string-to-csv
+  "Write a comma separated string into a CSV file with one entry per line.
+  out: path of the destination file
+  s: input string
+  header: header of the csv file, comma separated and including line break
+  insertids: if true then the first column of the csv file will be an incremental integer id"
+  ([out s header insertids]
+   (let [tocsv (fn [w id] (str (if id (str id ",")) w "\n"))]
+     (with-open [wtr (writer out)]
+       (if header (.write wtr header))
+       (doseq [line (map tocsv (cs-to-vec s) (seq-of-ids insertids))]
+         (.write wtr line))))))
+
+
+
+(defn csv-file-to-map
+  "convert a csv file into a map {key1 {:col1 ... :col2 ...} key2 {:col1 ... :col2 ...} ... }.
+  file: full path of the csv file
+  keycol: in which column to get the keys of the map
+  include-cols: which columns to include in the output (if nil, include all columns)"
+  ([file key-col] (csv-file-to-map file key-col nil))
+  ([file key-col include-cols]
+   (with-open [rdr (reader file)]
+     (let [lines (line-seq rdr)
+           csvheader (map keyword (cs-to-vec (first lines)))
+           add-kv-pair (fn [out line]
+                         (let [m (zipmap csvheader (cs-to-vec line))
+                               k (key-col m)
+                               v (if include-cols (select-keys m include-cols) m)]
+                           (assoc out k v)))]
+       (reduce add-kv-pair {} (rest lines))))))
+
+
+(defn ^:private write-lyricsdataset-to-csv
+  "Convert a single musicXmatch lyrics matrix and append it to CSV files.
+  in: the musicXmatch lyrics matrix file name, either the training file or the test file
+  tw, pw: writers to the tracks csv file and the pairs csv file
+  seed: the point from which to start the tracks numbering"
+  [in tw pw seed]
+  (let [is-test-set (> seed 0)
+
+        test-flag (if is-test-set 1 0)
+
+        write-words (fn [s] (write-string-to-csv (csv-full-path :words) s (csv-header :words) true))
+
+        write-track-data (fn [s trackid] (let [[msdtrackid mxmtrackid & word-counts] (cs-to-vec s)]
+                                           (.write tw (vec-to-csv [trackid msdtrackid mxmtrackid test-flag]))
+                                           (doseq [word-count word-counts]
+                                             (let [[wordid n] (s/split word-count #":")]
+                                               (.write pw (vec-to-csv [trackid wordid n]))))))
+
+        do-track (fn [oldid s] (case (first s)
+                                 \# oldid
+                                 \% (do (if (not is-test-set) (write-words (subs s 1))) oldid)
+                                 (let [trackid (inc oldid)] (write-track-data s trackid) trackid)))]
+
+    (with-open [rdr (reader in)]
+      (reduce do-track seed (line-seq rdr)))))
+
+
+(defn write-lyrics-to-csv
+  "Convert the two musicXmatch lyrics matrices into aggregated CSV files"
+  [] (let [out-tracks (csv-full-path :tracks)
+           out-words (csv-full-path :words)
+           out-pairs (csv-full-path :final-pairs)]
+       (with-open [tw (writer out-tracks) pw (writer out-pairs)]
+         (.write tw (csv-header :tracks))
+         (.write pw (csv-header :final-pairs))
+         (let [seed (write-lyricsdataset-to-csv (msd-full-path :train) tw pw 0)]
+           (write-lyricsdataset-to-csv (msd-full-path :test) tw pw seed)))))
+
+
+(defn join-track-files
+  []
+  "Consolidate track data into a single csv file. Sources are:
+  a) the tracks found in the mxm training and test datasets
+  b) the mxm/msd track matching list
+  c) the list of tracks with years
+  d) the list of tracks with genres"
+  (let [outfile (csv-full-path :final-tracks)
+        matches (csv-file-to-map (csv-full-path :matches) :entrackid)
+        years (csv-file-to-map (csv-full-path :years) :entrackid [:year])
+        styles (csv-file-to-map (csv-full-path :genres) :entrackid [:masdgenre])]
+    (with-open [rdr (reader (csv-full-path :tracks)) wtr (writer outfile)]
+      (.write wtr (csv-header :final-tracks))
+      (doseq [line (rest (line-seq rdr))]
+        (let [entrackid (second (cs-to-vec line))
+              mx (get matches entrackid)
+              enartist (get mx :enartistname "")
+              mxmartist (get mx :mxmartistname "")
+              s (vec-to-csv [line
+                             (get mx :entracktitle "")
+                             (get mx :mxmtracktitle "")
+                             enartist
+                             mxmartist
+                             (get-in years [entrackid :year] "")
+                             (get-in styles [entrackid :masdgenre] "")])]
+          (.write wtr s))))))
+
+
+(defn join-word-files
+  ([]
+   "Consolidate word data into a single csv file. Sources are:
+   a) the list of stemmed words found in the mxm training dataset
+   b) the stem/word matching list"
+   (let [outfile (csv-full-path :final-words)
+         stems (csv-file-to-map (csv-full-path :stems) :stem [:word])]
+     (with-open [rdr (reader (csv-full-path :words)) wtr (writer outfile)]
+       (.write wtr (csv-header :final-words))
+       (doseq [line (rest (line-seq rdr))]
+         (let [stem (second (cs-to-vec line))]
+           (.write wtr (vec-to-csv [line (get-in stems [stem :word] "")]))))))))
+
+
+
+; orchestration
+
+
+(defn create-csv-files
+  "Create all csv files based on all MSD files"
+  [] (do
+       (convert-msd-files-to-csv [:years :stems :matches :genres])
+       (write-lyrics-to-csv)
+       (join-track-files)
+       (join-word-files)))
+
+
+(defn create-database
+  "Create the database based on all csv files"
+  [] (let [db (assoc db-spec :subname (:db path-spec))]
+       (with-db-connection [con db]
+                           (sql/runscript con "sql/speed.sql")
+                           (println "Creating primary tables...")
+                           (sql/create-tracks-table con {:file (str "'" (csv-full-path :final-tracks) "'")})
+                           (sql/create-words-table con {:file (str "'" (csv-full-path :final-words) "'")})
+                           (sql/create-matrix-table con {:file (str "'" (csv-full-path :final-pairs) "'")})
+                           (println "Creating indexes...")
+                           (sql/runscript con "sql/indexes.sql")
+                           (println "Creating derived tables...")
+                           (sql/runscript con "sql/derivedtables.sql")
+                           (sql/runscript con "sql/safety.sql")
+                           (println "All Done!")
+                           (println (:out (sql/hello con))))))
+
+
+(defn -main
+  "Create the csv files and the database"
+  [& args]
+  (try
+    (do
+      (if (not= #{:csv :db :in} (set (keys path-spec)))
+        (throw (Exception. "msd.edn is missing or invalid.")))
+      (println "Creating the csv files...")
+      (create-csv-files)
+      (println "Creating the database. Just a bit of patience.")
+      (create-database))
+    (catch Exception e
+      (println (str "An error occured: " (.getMessage e))))))
+
diff --git a/src/intoh2/sql.clj b/src/intoh2/sql.clj
new file mode 100644
index 0000000..6f1a78f
--- /dev/null
+++ b/src/intoh2/sql.clj
@@ -0,0 +1,14 @@
+(ns intoh2.sql
+  (:require [hugsql.core :as h]
+            [clojure.java.io :refer [resource]]))
+
+; create stubs for sql queries
+(h/def-db-fns "sql/primarytables.sql")
+(h/def-db-fns "sql/hello.sql")
+
+
+(defn runscript
+  "run a script from a resource file."
+  [db r]
+  (h/db-run db (slurp (resource r)) {} :execute))
+