diff --git a/.github/workflows/run_data_sync.yml b/.github/workflows/run_data_sync.yml index a774c35021b..16752008c4c 100644 --- a/.github/workflows/run_data_sync.yml +++ b/.github/workflows/run_data_sync.yml @@ -30,8 +30,6 @@ env: TITLE: Yihong0618 Running MIN_GRID_DISTANCE: 10 # change min distance here TITLE_GRID: Over 10km Runs # also here - GITHUB_NAME: "action@github.com" # default Actions bot you can change it to yours - GITHUB_EMAIL: "GitHub Action" # default Actions bot you can change it to yours # IGNORE_BEFORE_SAVING: True # if you want to ignore some data before saving, set this to True IGNORE_START_END_RANGE: 10 # Unit meter @@ -41,6 +39,7 @@ env: SAVE_DATA_IN_GITHUB_CACHE: false # if you deploy in the vercal, check the README DATA_CACHE_PREFIX: 'track_data' BUILD_GH_PAGES: true # If you do not need GitHub Page please set it to `false` + SAVE_TO_PARQENT: true # If you want to save the data to the repo, set it to `true` jobs: sync: @@ -210,11 +209,17 @@ jobs: python run_page/gen_svg.py --from-db --type circular --use-localtime python run_page/gen_svg.py --from-db --year $(date +"%Y") --language zh_CN --title "$(date +"%Y") Running" --type github --athlete "${{ env.ATHLETE }}" --special-distance 10 --special-distance2 20 --special-color yellow --special-color2 red --output assets/github_$(date +"%Y").svg --use-localtime --min-distance 0.5 + - name: Save data to parqent + if: env.SAVE_TO_PARQENT == 'true' + run: | + pip install duckdb + python run_page/save_to_parqent.py + - name: Push new runs if: env.SAVE_DATA_IN_GITHUB_CACHE != 'true' run: | - git config --local user.email "${{ env.GITHUB_EMAIL }}" - git config --local user.name "${{ env.GITHUB_NAME }}" + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" git add . git commit -m 'update new runs' || echo "nothing to commit" git push || echo "nothing to push" diff --git a/requirements.txt b/requirements.txt index 124aa41936d..24f8c00d953 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,3 +27,4 @@ garmin-fit-sdk haversine==2.8.0 garth pycryptodome +duckdb \ No newline at end of file diff --git a/run_page/save_to_parqent.py b/run_page/save_to_parqent.py new file mode 100644 index 00000000000..a8da406df33 --- /dev/null +++ b/run_page/save_to_parqent.py @@ -0,0 +1,101 @@ +import duckdb + +with duckdb.connect() as conn: + conn.install_extension("sqlite") + conn.load_extension("sqlite") + conn.sql("ATTACH 'run_page/data.db' (TYPE SQLITE);USE data;") + conn.sql( + "COPY (SELECT * FROM activities) TO 'run_page/data.parquet' (FORMAT PARQUET);" + ) + +""" +examples: + +duckdb.sql("select regexp_extract(location_country, '[\u4e00-\u9fa5]{2,}(市|自治州|特别行政区)') as run_location, concat(try_cast(sum(distance/1000) as integer)::varchar,' km') as run_distance from read_parquet('https://github.com/yihong0618/run/raw/refs/heads/master/run_page/data.parquet') where run_location is not NULL group by run_location order by sum(distance) desc;").show(max_rows=50) + +┌──────────────┬──────────────┐ +│ run_location │ run_distance │ +│ varchar │ varchar │ +├──────────────┼──────────────┤ +│ 大连市 │ 9328 km │ +│ 沈阳市 │ 2030 km │ +│ 北京市 │ 61 km │ +│ 长沙市 │ 24 km │ +│ 扬州市 │ 21 km │ +│ 盘锦市 │ 21 km │ +│ 烟台市 │ 21 km │ +│ 上海市 │ 12 km │ +│ 北九州市 │ 7 km │ +│ 丹东市 │ 5 km │ +│ 瓦房店市 │ 4 km │ +│ 竹田市 │ 3 km │ +│ 伊万里市 │ 2 km │ +│ 长春市 │ 1 km │ +│ 锦州市 │ 1 km │ +│ │ 0 km │ +├──────────────┴──────────────┤ +│ 16 rows 2 columns │ +└─────────────────────────────┘ + +duckdb.sql("select start_date_local, distance, name, location_country from read_parquet('https://github.com/yihong0618/run/raw/refs/heads/master/run_page/data.parquet') order by run_id desc limit 1;") + + +duckdb.sql("select start_date_local[:4] as year, sum(distance/1000)::integer from read_parquet('https://github.com/yihong0618/run/raw/refs/heads/master/run_page/data.parquet') group by year order by year desc;").show(max_rows=50) + +┌─────────┬─────────────────────────────────────────┐ +│ year │ CAST(sum((distance / 1000)) AS INTEGER) │ +│ varchar │ int32 │ +├─────────┼─────────────────────────────────────────┤ +│ 2024 │ 1605 │ +│ 2023 │ 696 │ +│ 2022 │ 758 │ +│ 2021 │ 1244 │ +│ 2020 │ 1284 │ +│ 2019 │ 1344 │ +│ 2018 │ 405 │ +│ 2017 │ 964 │ +│ 2016 │ 901 │ +│ 2015 │ 436 │ +│ 2014 │ 823 │ +│ 2013 │ 790 │ +│ 2012 │ 387 │ +├─────────┴─────────────────────────────────────────┤ +│ 13 rows 2 columns │ +└───────────────────────────────────────────────────┘ + +duckdb.sql("SELECT concat(try_cast(distance/1000 as integer)::varchar,' km') as distance_km,count(*) FROM read_parquet('https://github.com/yihong0618/run/raw/refs/heads/master/run_page/data.parquet') GROUP BY distance_km order by count(*) desc;").show(max_rows=50) + +┌─────────────┬──────────────┐ +│ distance_km │ count_star() │ +│ varchar │ int64 │ +├─────────────┼──────────────┤ +│ 2 km │ 706 │ +│ 3 km │ 639 │ +│ 1 km │ 493 │ +│ 5 km │ 391 │ +│ 4 km │ 337 │ +│ 6 km │ 164 │ +│ 10 km │ 84 │ +│ 8 km │ 55 │ +│ 7 km │ 54 │ +│ 0 km │ 29 │ +│ 12 km │ 25 │ +│ 11 km │ 17 │ +│ 9 km │ 17 │ +│ 15 km │ 15 │ +│ 21 km │ 8 │ +│ 16 km │ 7 │ +│ 14 km │ 6 │ +│ 20 km │ 6 │ +│ 17 km │ 4 │ +│ 18 km │ 3 │ +│ 19 km │ 2 │ +│ 13 km │ 2 │ +│ 43 km │ 2 │ +│ 24 km │ 1 │ +│ 41 km │ 1 │ +│ 28 km │ 1 │ +├─────────────┴──────────────┤ +│ 26 rows 2 columns │ +└────────────────────────────┘ +"""