Skip to content

Commit 1a267ad

Browse files
committed
Add column for Google Assured OSS
1 parent 498ac21 commit 1a267ad

File tree

2 files changed

+28
-6
lines changed

2 files changed

+28
-6
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ ackg|0.0.5||0|0|2021-01-21 04:37:10
3131
- Maintainers on PyPI
3232
- URLs used by packages
3333
- OpenSSF scorecard data
34+
- Google Assured OSS
3435

3536
### Database Schemas
3637

@@ -46,7 +47,8 @@ CREATE TABLE packages (
4647
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
4748
downloads INTEGER,
4849
scorecard_overall FLOAT,
49-
PRIMARY KEY (name, version)
50+
in_google_assured_oss BOOLEAN,
51+
PRIMARY KEY (name)
5052
);
5153

5254
-- Dependencies --

main.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
# logger.setLevel(logging.DEBUG)
2626
# logger.addHandler(logging.StreamHandler())
2727

28+
MAX_WORKERS = 16
29+
GOOGLE_ASSURED_OSS_PACKAGES = set()
2830

2931
@contextlib.contextmanager
3032
def locked_db():
@@ -132,7 +134,7 @@ def get_metadata_by_install(package, resp):
132134

133135
def get_maintainers_from_pypi(package: str):
134136
for _ in range(5):
135-
resp = http.request("GET", f"https://pypi.org/project/{package}")
137+
resp = http.request("GET", f"https://pypi.org/project/{package}/")
136138
if resp.status == 404:
137139
return set()
138140
elif resp.status != 200:
@@ -200,7 +202,7 @@ def get_project_urls(info: dict) -> list[tuple[str, str, str]]:
200202

201203

202204
def update_data_for_package(package: str) -> None:
203-
global downloads, db_lock
205+
global downloads, db_lock, GOOGLE_ASSURED_OSS_PACKAGES
204206

205207
resp = http.request("GET", f"https://pypi.org/pypi/{package}/json")
206208

@@ -290,8 +292,8 @@ def update_data_for_package(package: str) -> None:
290292
db.execute(
291293
"""
292294
INSERT OR IGNORE INTO packages (
293-
name, version, requires_python, has_binary_wheel, uploaded_at, downloads, scorecard_overall
294-
) VALUES (?, ?, ?, ?, ?, ?, ?);
295+
name, version, requires_python, has_binary_wheel, uploaded_at, downloads, scorecard_overall, in_google_assured_oss
296+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?);
295297
""",
296298
(
297299
package,
@@ -301,6 +303,7 @@ def update_data_for_package(package: str) -> None:
301303
uploaded_at,
302304
package_downloads,
303305
scorecard_overall,
306+
package.lower() in GOOGLE_ASSURED_OSS_PACKAGES
304307
),
305308
)
306309

@@ -420,9 +423,23 @@ def update_data_from_pypi():
420423
pass
421424

422425

426+
def get_google_assured_oss_packages(http: urllib3.PoolManager) -> set[str]:
427+
resp = http.request("GET", "https://cloud.google.com/assured-open-source-software/docs/supported-packages")
428+
data = resp.data.decode("utf-8")
429+
430+
# Start after the Python heading, then look for first list.
431+
data = data[data.find("<h2 id=\"python\""):]
432+
start = data.find("<ul>")
433+
end = data.find("</ul>")
434+
return {x.lower() for x in re.findall(r"<li>([^<]+)</li>", data[start:end])}
435+
436+
423437
if __name__ == "__main__":
424438
base_dir = os.path.dirname((os.path.abspath(__file__)))
425439
http = urllib3.PoolManager(
440+
block=True,
441+
strict=True,
442+
maxsize=MAX_WORKERS,
426443
headers=urllib3.util.make_headers(
427444
keep_alive=True,
428445
accept_encoding=True,
@@ -434,6 +451,8 @@ def update_data_from_pypi():
434451
)
435452
wheel_re = re.compile(r"-([^\-]+-[^\-]+-[^\-]+)\.whl$")
436453

454+
GOOGLE_ASSURED_OSS_PACKAGES = get_google_assured_oss_packages(http)
455+
437456
tmp_dir = tempfile.mkdtemp()
438457
os.system(f"virtualenv {tmp_dir}/venv > /dev/null")
439458
venv_python = os.path.join(tmp_dir, "venv/bin/python")
@@ -460,6 +479,7 @@ def update_data_from_pypi():
460479
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
461480
downloads INTEGER,
462481
scorecard_overall FLOAT,
482+
in_google_assured_oss BOOLEAN,
463483
PRIMARY KEY (name)
464484
);
465485
"""
@@ -536,7 +556,7 @@ def update_data_from_pypi():
536556
)
537557
_DB.commit()
538558
db_lock = threading.Lock()
539-
pool = ThreadPoolExecutor()
559+
pool = ThreadPoolExecutor(max_workers=MAX_WORKERS)
540560

541561
packages = get_all_package_names()
542562

0 commit comments

Comments
 (0)