-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adds a script for navigating the catalogue history.
I'm using it to build a database to calculate popularity since cataloguing began but it could have other uses
- Loading branch information
Torkus
committed
Dec 13, 2020
1 parent
b7c6f61
commit 4f4ccde
Showing
2 changed files
with
105 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import os, sys, json | ||
import sqlite3 | ||
|
||
database = 'history.sqlite3' | ||
|
||
def select_keys(data, key_list): | ||
return [data[key] for key in key_list] | ||
|
||
def keyfn(addon): | ||
return (addon['source'], addon['source-id']) | ||
|
||
def mkidx(addon_list): | ||
return {keyfn(addon):addon for addon in addon_list} | ||
|
||
def popularity_data(curr_addon, prev_addon): | ||
num = curr_addon['download-count'] | ||
prev_num = prev_addon.get('download-count', num) | ||
diff = num - prev_num | ||
return { | ||
'count': num, | ||
'count_difference': diff | ||
} | ||
|
||
def db_connection(): | ||
return sqlite3.connect(database) | ||
|
||
def insert(db, addon): | ||
sql = """INSERT INTO addon | ||
(timestamp, source, source_id, name, count, count_difference) | ||
VALUES | ||
(?,?,?,?,?,?);""" | ||
data = select_keys(addon, ['timestamp', 'source', 'source-id', 'name', 'count', 'count_difference']) | ||
db.execute(sql, tuple(data)) | ||
|
||
def init_db(): | ||
db = sqlite3.connect(database) | ||
sql = """ | ||
CREATE TABLE addon( | ||
timestamp DATETIME NOT NULL, | ||
source TEXT NOT NULL, | ||
source_id INTEGER NOT NULL, | ||
name TEXT NOT NULL, | ||
count INTEGER NOT NULL, | ||
count_difference INTEGER NOT NULL, | ||
PRIMARY KEY (timestamp, source, source_id) | ||
);""" | ||
db.execute(sql) | ||
db.commit() | ||
|
||
def main(timestamp, curr_catalogue_fname, prev_catalogue_fname): | ||
curr_catalogue = json.load(open(curr_catalogue_fname, 'r')) | ||
prev_catalogue = {'addon-summary-list': []} | ||
# previous catalogue won't exist on first call to script. | ||
if os.path.exists(prev_catalogue_fname): | ||
prev_catalogue = json.load(open(prev_catalogue_fname, 'r')) | ||
|
||
addon_list = curr_catalogue['addon-summary-list'] | ||
prev_addon_idx = mkidx(prev_catalogue['addon-summary-list']) | ||
|
||
if not os.path.exists(database): | ||
init_db() | ||
|
||
db = db_connection() | ||
for addon in addon_list: | ||
addon_key = keyfn(addon) | ||
addon.update(popularity_data(addon, prev_addon_idx.get(addon_key, {}))) | ||
addon['timestamp'] = timestamp | ||
insert(db, addon) | ||
db.commit() | ||
|
||
if __name__ == '__main__': | ||
main(*sys.argv[1:4]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/bin/bash | ||
# goes through git history, oldest to newest, and creates a database of popularity data for addons overtime | ||
set -e | ||
|
||
catalogue=${1:-"full-catalogue.json"} | ||
|
||
# clean up from any previous runs | ||
git checkout master | ||
rm -f "*.json.prev" history.sqlite3 | ||
|
||
# list of commit references, oldest to newest | ||
ref_list=$(git log --reverse | grep '^commit' | cut -c 8-) | ||
|
||
# checkout ref, call script with ref, timestamp and catalogue files | ||
for ref in $ref_list; do | ||
git checkout "$ref" --quiet | ||
timestamp=$(TZ=UTC git --no-pager log -1 --format=%cd --date=iso-strict-local) | ||
echo "$timestamp: $ref" | ||
python history-script.py "$timestamp" "$catalogue" "$catalogue.prev" | ||
# so the next iteration reads from previous iteration's catalogue | ||
cp "$catalogue" "$catalogue.prev" | ||
echo "---" | ||
done | ||
|
||
sqlite3 history.sqlite3 "select sum(count_difference) as pop, source, source_id as 'source-id', name from addon where source = 'curseforge' group by source, source_id, name order by pop desc limit 50;" -json > most-popular-curseforge.json | ||
echo "wrote most-popular-curseforge.json" | ||
|
||
sqlite3 history.sqlite3 "select sum(count_difference) as pop, source, source_id as 'source-id', name from addon where source = 'wowinterface' group by source, source_id, name order by pop desc limit 50;" -json > most-popular-wowinterface.json | ||
echo "wrote most-popular-wowinterface.json" | ||
|
||
rm -f "$catalogue.prev" | ||
git checkout master |