Skip to content

Commit 2b452ff

Browse files
author
s-pace
committed
feat(meta): do not jsonized version meta
1 parent f4d9ff5 commit 2b452ff

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

scraper/src/strategies/default_strategy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def get_records_from_dom(self, current_page_url=None):
177177
record[name] = content
178178

179179
if name == "version":
180-
record[name] = str(record[name])
180+
record[name] = str(content)
181181

182182
if current_page_url is not None:
183183
# Add variables to the record

scraper/src/tests/default_strategy/meta_test.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,37 @@ def test_meta_numbered_version(self):
187187
assert actual[0]['version'] == "1.0"
188188
assert actual[1]['version'] != 1
189189

190+
def test_meta_decimal_version(self):
191+
# Given
192+
strategy = get_strategy({
193+
'selectors': {
194+
'lvl0': "h1",
195+
'content': 'p'
196+
}
197+
})
198+
strategy.dom = lxml.html.fromstring("""
199+
<html>
200+
<header>
201+
<meta name="docsearch:version" content='5.20'>
202+
</header>
203+
<body>
204+
<h1>Foo</h1>
205+
<p>text</p>
206+
<h2>Bar</h2>
207+
<h3>Baz</h3>
208+
</body>
209+
</html>
210+
""")
211+
212+
# When
213+
actual = strategy.get_records_from_dom()
214+
215+
# Then
216+
217+
assert len(actual) == 2
218+
assert actual[0]['version'] == "5.20"
219+
assert actual[1]['version'] != 5.2
220+
190221
def test_meta_escaped_string(self):
191222
# Given
192223
strategy = get_strategy({

0 commit comments

Comments
 (0)