diff --git a/history-script.py b/history-script.py new file mode 100644 index 0000000..c2de6a5 --- /dev/null +++ b/history-script.py @@ -0,0 +1,73 @@ +import os, sys, json +import sqlite3 + +database = 'history.sqlite3' + +def select_keys(data, key_list): + return [data[key] for key in key_list] + +def keyfn(addon): + return (addon['source'], addon['source-id']) + +def mkidx(addon_list): + return {keyfn(addon):addon for addon in addon_list} + +def popularity_data(curr_addon, prev_addon): + num = curr_addon['download-count'] + prev_num = prev_addon.get('download-count', num) + diff = num - prev_num + return { + 'count': num, + 'count_difference': diff + } + +def db_connection(): + return sqlite3.connect(database) + +def insert(db, addon): + sql = """INSERT INTO addon + (timestamp, source, source_id, name, count, count_difference) + VALUES + (?,?,?,?,?,?);""" + data = select_keys(addon, ['timestamp', 'source', 'source-id', 'name', 'count', 'count_difference']) + db.execute(sql, tuple(data)) + +def init_db(): + db = sqlite3.connect(database) + sql = """ + CREATE TABLE addon( + timestamp DATETIME NOT NULL, + source TEXT NOT NULL, + source_id INTEGER NOT NULL, + name TEXT NOT NULL, + count INTEGER NOT NULL, + count_difference INTEGER NOT NULL, + + PRIMARY KEY (timestamp, source, source_id) + );""" + db.execute(sql) + db.commit() + +def main(timestamp, curr_catalogue_fname, prev_catalogue_fname): + curr_catalogue = json.load(open(curr_catalogue_fname, 'r')) + prev_catalogue = {'addon-summary-list': []} + # previous catalogue won't exist on first call to script. + if os.path.exists(prev_catalogue_fname): + prev_catalogue = json.load(open(prev_catalogue_fname, 'r')) + + addon_list = curr_catalogue['addon-summary-list'] + prev_addon_idx = mkidx(prev_catalogue['addon-summary-list']) + + if not os.path.exists(database): + init_db() + + db = db_connection() + for addon in addon_list: + addon_key = keyfn(addon) + addon.update(popularity_data(addon, prev_addon_idx.get(addon_key, {}))) + addon['timestamp'] = timestamp + insert(db, addon) + db.commit() + +if __name__ == '__main__': + main(*sys.argv[1:4]) diff --git a/history-script.sh b/history-script.sh new file mode 100755 index 0000000..6b5d559 --- /dev/null +++ b/history-script.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# goes through git history, oldest to newest, and creates a database of popularity data for addons overtime +set -e + +catalogue=${1:-"full-catalogue.json"} + +# clean up from any previous runs +git checkout master +rm -f "*.json.prev" history.sqlite3 + +# list of commit references, oldest to newest +ref_list=$(git log --reverse | grep '^commit' | cut -c 8-) + +# checkout ref, call script with ref, timestamp and catalogue files +for ref in $ref_list; do + git checkout "$ref" --quiet + timestamp=$(TZ=UTC git --no-pager log -1 --format=%cd --date=iso-strict-local) + echo "$timestamp: $ref" + python history-script.py "$timestamp" "$catalogue" "$catalogue.prev" + # so the next iteration reads from previous iteration's catalogue + cp "$catalogue" "$catalogue.prev" + echo "---" +done + +sqlite3 history.sqlite3 "select sum(count_difference) as pop, source, source_id as 'source-id', name from addon where source = 'curseforge' group by source, source_id, name order by pop desc limit 50;" -json > most-popular-curseforge.json +echo "wrote most-popular-curseforge.json" + +sqlite3 history.sqlite3 "select sum(count_difference) as pop, source, source_id as 'source-id', name from addon where source = 'wowinterface' group by source, source_id, name order by pop desc limit 50;" -json > most-popular-wowinterface.json +echo "wrote most-popular-wowinterface.json" + +rm -f "$catalogue.prev" +git checkout master