Skip to content

Commit

Permalink
adds a script for navigating the catalogue history.
Browse files Browse the repository at this point in the history
I'm using it to build a database to calculate popularity since cataloguing began but it could have other uses
  • Loading branch information
Torkus committed Dec 13, 2020
1 parent b7c6f61 commit 4f4ccde
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 0 deletions.
73 changes: 73 additions & 0 deletions history-script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os, sys, json
import sqlite3

database = 'history.sqlite3'

def select_keys(data, key_list):
return [data[key] for key in key_list]

def keyfn(addon):
return (addon['source'], addon['source-id'])

def mkidx(addon_list):
return {keyfn(addon):addon for addon in addon_list}

def popularity_data(curr_addon, prev_addon):
num = curr_addon['download-count']
prev_num = prev_addon.get('download-count', num)
diff = num - prev_num
return {
'count': num,
'count_difference': diff
}

def db_connection():
return sqlite3.connect(database)

def insert(db, addon):
sql = """INSERT INTO addon
(timestamp, source, source_id, name, count, count_difference)
VALUES
(?,?,?,?,?,?);"""
data = select_keys(addon, ['timestamp', 'source', 'source-id', 'name', 'count', 'count_difference'])
db.execute(sql, tuple(data))

def init_db():
db = sqlite3.connect(database)
sql = """
CREATE TABLE addon(
timestamp DATETIME NOT NULL,
source TEXT NOT NULL,
source_id INTEGER NOT NULL,
name TEXT NOT NULL,
count INTEGER NOT NULL,
count_difference INTEGER NOT NULL,
PRIMARY KEY (timestamp, source, source_id)
);"""
db.execute(sql)
db.commit()

def main(timestamp, curr_catalogue_fname, prev_catalogue_fname):
curr_catalogue = json.load(open(curr_catalogue_fname, 'r'))
prev_catalogue = {'addon-summary-list': []}
# previous catalogue won't exist on first call to script.
if os.path.exists(prev_catalogue_fname):
prev_catalogue = json.load(open(prev_catalogue_fname, 'r'))

addon_list = curr_catalogue['addon-summary-list']
prev_addon_idx = mkidx(prev_catalogue['addon-summary-list'])

if not os.path.exists(database):
init_db()

db = db_connection()
for addon in addon_list:
addon_key = keyfn(addon)
addon.update(popularity_data(addon, prev_addon_idx.get(addon_key, {})))
addon['timestamp'] = timestamp
insert(db, addon)
db.commit()

if __name__ == '__main__':
main(*sys.argv[1:4])
32 changes: 32 additions & 0 deletions history-script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
# goes through git history, oldest to newest, and creates a database of popularity data for addons overtime
set -e

catalogue=${1:-"full-catalogue.json"}

# clean up from any previous runs
git checkout master
rm -f "*.json.prev" history.sqlite3

# list of commit references, oldest to newest
ref_list=$(git log --reverse | grep '^commit' | cut -c 8-)

# checkout ref, call script with ref, timestamp and catalogue files
for ref in $ref_list; do
git checkout "$ref" --quiet
timestamp=$(TZ=UTC git --no-pager log -1 --format=%cd --date=iso-strict-local)
echo "$timestamp: $ref"
python history-script.py "$timestamp" "$catalogue" "$catalogue.prev"
# so the next iteration reads from previous iteration's catalogue
cp "$catalogue" "$catalogue.prev"
echo "---"
done

sqlite3 history.sqlite3 "select sum(count_difference) as pop, source, source_id as 'source-id', name from addon where source = 'curseforge' group by source, source_id, name order by pop desc limit 50;" -json > most-popular-curseforge.json
echo "wrote most-popular-curseforge.json"

sqlite3 history.sqlite3 "select sum(count_difference) as pop, source, source_id as 'source-id', name from addon where source = 'wowinterface' group by source, source_id, name order by pop desc limit 50;" -json > most-popular-wowinterface.json
echo "wrote most-popular-wowinterface.json"

rm -f "$catalogue.prev"
git checkout master

0 comments on commit 4f4ccde

Please sign in to comment.