Skip to content

Commit 95282c1

Browse files
committed
anyio
1 parent 1d817a5 commit 95282c1

File tree

2 files changed

+3
-15
lines changed

2 files changed

+3
-15
lines changed

examples/refresh_chroma/refresh_collection.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,7 @@
1616
def html_parser(html: str) -> str:
1717
import trafilatura
1818

19-
trafilatura_config = trafilatura.settings.use_config()
20-
# disable signal, so it can run in a worker thread
21-
# https://github.com/adbar/trafilatura/issues/202
22-
trafilatura_config.set("DEFAULT", "EXTRACTION_TIMEOUT", "0")
23-
return (
24-
trafilatura.extract(html, config=trafilatura_config)
25-
or BeautifulSoup(html, "html.parser").get_text()
26-
)
19+
return trafilatura.extract(html) or BeautifulSoup(html, "html.parser").get_text()
2720

2821

2922
raggy.settings.html_parser = html_parser

pyproject.toml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ keywords = ["scraping", "nlp", "ai", "document-processing"]
1515
requires-python = ">=3.10"
1616
dependencies = [
1717
"aiofiles",
18-
"anyio<4.0.0",
1918
"bs4",
2019
"chardet",
2120
"fake-useragent",
@@ -86,11 +85,7 @@ asyncio_mode = 'auto'
8685
filterwarnings = [
8786
"ignore:'crypt' is deprecated and slated for removal in Python 3.13:DeprecationWarning",
8887
]
89-
env = [
90-
'D:RAGGY_LOG_VERBOSE=1',
91-
'D:RAGGY_LOG_LEVEL=DEBUG',
92-
'PYTEST_TIMEOUT=20',
93-
]
88+
env = ['D:RAGGY_LOG_VERBOSE=1', 'D:RAGGY_LOG_LEVEL=DEBUG', 'PYTEST_TIMEOUT=20']
9489

9590
[tool.ruff]
9691
extend-select = ["I"]
@@ -106,4 +101,4 @@ skip-magic-trailing-comma = false
106101
'tests/fixtures/*.py' = ['F403']
107102

108103
[tool.setuptools_scm]
109-
write_to = "src/raggy/_version.py"
104+
write_to = "src/raggy/_version.py"

0 commit comments

Comments
 (0)