-
Notifications
You must be signed in to change notification settings - Fork 14
132 lines (129 loc) · 5.04 KB
/
fetch_filter_resources.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
name: Weekly resource fetching and community filtering
on:
workflow_dispatch:
schedule:
#Every Sunday at 8:00 am
- cron: "0 8 * * 0"
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
concurrency:
group: "tools"
cancel-in-progress: false
jobs:
fetch-servers:
runs-on: ubuntu-20.04
name: Fetch servers
steps:
- name: Checkout main
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install requirement
run: |
python -m pip install -r requirements.txt
sudo apt-get install jq
- name: Fetch list of all available servers
run: |
python sources/bin/get_public_galaxy_servers.py -o sources/data/available_public_servers.csv
- name: Archive available servers
uses: actions/upload-artifact@v4
with:
name: available-servers
path: sources/data/available_public_servers.csv
fetch-tools-stepwise:
runs-on: ubuntu-20.04
name: Fetch tool stepwise
environment: fetch-tools
needs: fetch-servers
strategy:
#max-parallel: 1 #need to run one after another, since otherwise there is a chance, that mulitple jobs want to push to the results branch at the same time (which fails due to merge)
matrix:
subset:
- repositories01.list
- repositories02.list
- repositories03.list
- repositories04.list
steps:
- name: Checkout main
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install requirement
run: python -m pip install -r requirements.txt
- name: Download available servers
uses: actions/download-artifact@v4
with:
name: available-servers
path: sources/data/
- name: Fetch all tool stepwise
run: |
bash sources/bin/extract_all_tools.sh "${{ matrix.subset }}"
env:
GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }}
- name: Archive tool sublists production artifacts
uses: actions/upload-artifact@v4
with:
name: tools-${{ matrix.subset }}
path: communities/all/resources/${{ matrix.subset }}_tools.*
merge-fetch-filter:
runs-on: ubuntu-20.04
needs: fetch-tools-stepwise
name: Merge tools, fetch tutorials and filter the resources for communities
steps:
- name: Checkout main
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install requirement
run: |
python -m pip install -r requirements.txt
sudo apt-get install jq
- name: Download stepwise tool lists
uses: actions/download-artifact@v4
with:
pattern: tools-*
merge-multiple: true
path: communities/all/resources/
- name: Display structure of downloaded files
run: ls -R communities/all/resources/
- name: Merge all tools
run: | #merge files with only one header -> https://stackoverflow.com/questions/16890582/unixmerge-multiple-csv-files-with-same-header-by-keeping-the-header-of-the-firs; map(.[]) -> https://stackoverflow.com/questions/42011086/merge-arrays-of-json (get flat array, one tool per entry)
awk 'FNR==1 && NR!=1{next;}{print}' communities/all/resources/repositories*.list_tools.tsv > communities/all/resources/tools.tsv
jq -s 'map(.[])' communities/all/resources/repositories*.list_tools.json > communities/all/resources/tools.json
rm communities/all/resources/repositories*.list_tools.json
rm communities/all/resources/repositories*.list_tools.tsv
- name: Generate wordcloud and interactive table
run: |
bash sources/bin/format_tools.sh
- name: Fetch all workflows
run: |
bash sources/bin/extract_all_workflows.sh
- name: Filter workflows for communities
run: |
bash sources/bin/get_community_workflows.sh
- name: Fetch all tutorials
run: |
bash sources/bin/extract_all_tutorials.sh
env:
PLAUSIBLE_API_KEY: ${{ secrets.PLAUSIBLE_API_TOKEN }}
- name: Filter tutorials for communities
run: |
bash sources/bin/get_community_tutorials.sh
#- name: Update tool to keep and exclude for communities
# run: |
# bash sources/bin/update_tools_to_keep_exclude.sh
- name: Filter tools for communities
run: |
bash sources/bin/get_community_tools.sh
- name: Create Pull Request
uses: peter-evans/create-pull-request@v4
with:
commit-message: Update resources
title: Automatic resources update
body: Automatic resource update done via GitHub Action once a week
base: main
branch: resource-update
delete-branch: true