Skip to content

Commit 828914a

Browse files
committed
Add support for 'knockout' selectors that get filtered from output
1 parent ff4f61c commit 828914a

File tree

3 files changed

+65
-2
lines changed

3 files changed

+65
-2
lines changed

frozen_soup/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Optional, Union
1+
from typing import Optional, Union, List
22

33
import requests
44

@@ -15,6 +15,7 @@ def freeze_to_string(
1515
session: Optional[requests.Session] = None,
1616
timeout: Union[float, tuple[float, float], None] = 900.0,
1717
formatter: str = 'html5',
18+
knockouts: Optional[List[str]] = None,
1819
) -> str:
1920
if session is None:
2021
session = requests.Session()
@@ -23,6 +24,12 @@ def freeze_to_string(
2324

2425
soup = BeautifulSoup(r.text, 'html.parser')
2526

27+
# Process the knockouts first so we don't do any extra work on those
28+
if knockouts is not None:
29+
for selector in knockouts:
30+
for tag in soup.css.select(selector):
31+
tag.decompose()
32+
2633
base_url = url
2734

2835
# Find the first <base href="">, which could follow a <base target="">

frozen_soup/__main__.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,35 @@ def main() -> int:
2929
'-T', '--timeout',
3030
type=float,
3131
default=900.0,
32+
metavar= 'SECONDS',
3233
help='default connect and read timeout in seconds'
3334
)
3435
parser.add_argument(
3536
'--connect-timeout',
3637
type=float,
38+
metavar= 'SECONDS',
3739
help='default connect timeout in seconds (will override --timeout)'
3840
)
3941
parser.add_argument(
4042
'--read-timeout',
4143
type=float,
44+
metavar= 'SECONDS',
4245
help='default read timeout in seconds (will override --timeout)'
4346
)
47+
parser.add_argument(
48+
'--knockout',
49+
action= 'append',
50+
metavar= 'SELECTOR',
51+
help='knock out elements matching the given CSS selector'
52+
)
4453

4554
args = parser.parse_args()
4655

4756
timeout = args.timeout
4857
if (args.connect_timeout or args.read_timeout):
4958
timeout = (args.connect_timeout or timeout, args.read_timeout or timeout)
5059

51-
print(freeze_to_string(args.url, timeout=timeout))
60+
print(freeze_to_string(args.url, timeout=timeout, knockouts=args.knockout))
5261

5362
return 0
5463

tests/test_knockout.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import pytest
2+
3+
import requests
4+
from requests_testadapter import TestAdapter, TestSession
5+
6+
from frozen_soup import freeze_to_string
7+
8+
@pytest.fixture
9+
def session() -> requests.Session:
10+
s = TestSession()
11+
12+
s.mount("http://test/content", TestAdapter(
13+
b'/* WONTON */',
14+
headers= { 'Content-type' : 'text/plain' }
15+
))
16+
17+
s.mount(
18+
"http://test/html",
19+
TestAdapter(b'<i class="ko">pow!</i><img src="/content">')
20+
)
21+
s.mount(
22+
"http://test/multiple",
23+
TestAdapter(b'<i class="ko">pow!</i><b class="ko">bang!</b><img src="/content">')
24+
)
25+
s.mount(
26+
"http://test/bad-img",
27+
TestAdapter(b'<i class="ko">pow!</i><img src="/error">')
28+
)
29+
30+
return s
31+
32+
def test_knockout(session):
33+
out = freeze_to_string('http://test/html', session, knockouts=['.ko'])
34+
assert out == '<img src="data:text/plain;base64,LyogV09OVE9OICov">'
35+
36+
def test_knockout_multiple_elements(session):
37+
out = freeze_to_string('http://test/multiple', session, knockouts=['.ko'])
38+
assert out == '<img src="data:text/plain;base64,LyogV09OVE9OICov">'
39+
40+
def test_knockout_multiple_selectors(session):
41+
out = freeze_to_string('http://test/multiple', session, knockouts=['i', 'b'])
42+
assert out == '<img src="data:text/plain;base64,LyogV09OVE9OICov">'
43+
44+
# if the knockout doesn't kill the <img> we'll get an exception
45+
def test_knockout_img(session):
46+
out = freeze_to_string('http://test/bad-img', session, knockouts=['img'])
47+
assert out == '<i class="ko">pow!</i>'

0 commit comments

Comments
 (0)