|
1 |
| -from bs4 import BeautifulSoup |
2 |
| -import grequests |
3 |
| -import markdown |
| 1 | +import asyncio |
4 | 2 | import os
|
5 | 3 | import unittest
|
| 4 | + |
| 5 | +import httpx |
| 6 | +import markdown |
| 7 | +from bs4 import BeautifulSoup |
| 8 | + |
6 | 9 | from . import (
|
| 10 | + DOC_NAMES, |
| 11 | + DOCS_DIR, |
7 | 12 | SCHEMA_NAMES,
|
8 | 13 | all_properties,
|
9 |
| - DOCS_DIR, |
10 |
| - DOC_NAMES, |
11 | 14 | property_doc_name,
|
12 | 15 | schema_enum_registry,
|
13 | 16 | )
|
@@ -117,44 +120,45 @@ def error_msg(schema_name, value, enum):
|
117 | 120 | schema_name, v, enum
|
118 | 121 | ) # noqa
|
119 | 122 |
|
120 |
| - def test_urls_in_docs(s): |
121 |
| - def exception(request, exception): |
122 |
| - return f"{request} - {exception}" |
| 123 | + def test_urls_in_docs(self): |
| 124 | + async def async_requests(urls): |
| 125 | + async with httpx.AsyncClient(timeout=60) as client: |
| 126 | + responses = (client.get(url) for url in urls) |
| 127 | + results = await asyncio.gather(*responses, return_exceptions=True) |
123 | 128 |
|
124 |
| - def async_requests(urls): |
125 |
| - results = grequests.map( |
126 |
| - (grequests.get(u) for u in urls), exception_handler=exception, size=100 |
127 |
| - ) |
128 | 129 | return results
|
129 | 130 |
|
130 |
| - urls = [] |
| 131 | + urls = ["https://www.google.com/", "https://www.google.com/404", "sdfghjk"] |
131 | 132 |
|
132 |
| - for docname in DOC_NAMES: |
133 |
| - filename = os.path.join(DOCS_DIR, f"{docname}.md") |
134 |
| - with open(filename) as f: |
135 |
| - doc_html = markdown.markdown(f.read()) |
136 |
| - soup = BeautifulSoup(doc_html, features="html.parser") |
137 |
| - links = soup.find_all("a") |
138 |
| - for link in links: |
139 |
| - url = link.get("href") |
140 |
| - if not url.startswith("http"): |
141 |
| - raise ValueError(f"Invalid URL in {docname}: {url}") |
| 133 | + # for docname in DOC_NAMES: |
| 134 | + # filename = os.path.join(DOCS_DIR, f"{docname}.md") |
| 135 | + # with open(filename) as f: |
| 136 | + # doc_html = markdown.markdown(f.read()) |
| 137 | + # soup = BeautifulSoup(doc_html, features="html.parser") |
| 138 | + # links = soup.find_all("a") |
| 139 | + # for link in links: |
| 140 | + # url = link.get("href") |
| 141 | + # if not url.startswith("http"): |
| 142 | + # raise ValueError(f"Invalid URL in {docname}: {url}") |
142 | 143 |
|
143 |
| - urls.append(url) |
| 144 | + # urls.append(url) |
144 | 145 |
|
145 |
| - results = async_requests(urls) |
| 146 | + results = asyncio.run(async_requests(urls)) |
146 | 147 |
|
147 | 148 | warns = []
|
148 | 149 | not_founds = []
|
149 |
| - for resp in results: |
150 |
| - if not resp.ok: |
151 |
| - warns.append(f"failed {resp.status_code}: {resp.url}") |
152 |
| - if resp.status_code in [404]: |
153 |
| - not_founds.append(resp.url) |
| 150 | + for response in results: |
| 151 | + if isinstance(response, httpx.HTTPError): |
| 152 | + warns.append(f"failed {response!s}: {response.request.url!s}") |
| 153 | + else: |
| 154 | + if not response.is_success: |
| 155 | + warns.append(f"failed {response.status_code}: {response.url!s}") |
154 | 156 |
|
155 |
| - if not_founds: |
156 |
| - raise ValueError(f"URLs not found: \n {not_founds}") |
| 157 | + if response.status_code in (404,): |
| 158 | + not_founds.append(str(response.url)) |
157 | 159 |
|
158 | 160 | print("\n=== Minor URL link warnings ===\n")
|
159 | 161 | for w in warns:
|
160 | 162 | print(w)
|
| 163 | + |
| 164 | + assert not not_founds, f"URLs not found: \n {not_founds}" |
0 commit comments