Skip to content

Commit

Permalink
Fix wordCount and illustrationType
Browse files Browse the repository at this point in the history
  • Loading branch information
wragge committed Aug 29, 2023
1 parent ac616f4 commit defa895
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 187 deletions.
120 changes: 20 additions & 100 deletions 00_parser.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The dotenv extension is already loaded. To reload it, use:\n",
" %reload_ext dotenv\n"
]
}
],
"outputs": [],
"source": [
"#hide\n",
"from nbdev.showdoc import *\n",
Expand Down Expand Up @@ -90,7 +81,7 @@
" new_params = parse_qs(parsed_url.query)\n",
" else:\n",
" # These params can be accepted as is.\n",
" safe = ['l-category', 'l-title', 'l-decade', 'l-year', 'l-month', 'l-state', 'l-word', 'include']\n",
" safe = ['l-category', 'l-title', 'l-decade', 'l-year', 'l-month', 'l-state', 'include']\n",
" new_params = {}\n",
" dates = {}\n",
" keywords = []\n",
Expand All @@ -102,8 +93,11 @@
" new_params[key].append(value)\n",
" except KeyError:\n",
" new_params[key] = [value]\n",
" elif key == 'l-advWord':\n",
" new_params['l-word'] = value\n",
" elif key in ['l-word', 'l-advWord']:\n",
" if api_version == 2:\n",
" new_params['l-word'] = value\n",
" elif api_version == 3:\n",
" new_params['l-wordCount'] = value\n",
" elif key == 'l-advstate':\n",
" try:\n",
" new_params['l-state'].append(value)\n",
Expand All @@ -121,10 +115,10 @@
" new_params['l-title'] = [value]\n",
" elif key in ['l-illustrationType', 'l-advIllustrationType']:\n",
" new_params['l-illustrated'] = 'true'\n",
" try:\n",
" new_params['l-illtype'].append(value)\n",
" except KeyError:\n",
" if api_version == 2:\n",
" new_params['l-illtype'] = [value]\n",
" elif api_version == 3:\n",
" new_params['l-illustrationType'] = [value]\n",
" elif key == 'date.from':\n",
" dates['from'] = value\n",
" elif key == 'date.to':\n",
Expand Down Expand Up @@ -174,31 +168,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"<h4 id=\"parse_query\" class=\"doc_header\"><code>parse_query</code><a href=\"__main__.py#L21\" class=\"source_link\" style=\"float:right\">[source]</a></h4>\n",
"\n",
"> <code>parse_query</code>(**`query`**, **`api_version`**=*`2`*)\n",
"\n",
"Converts the parameters of a search using the Trove web interface into a form the API will understand.\n",
"\n",
"Parameters: \n",
"* `query` – the url of a search in the Trove newspapers & gazettes category\n",
"* `api_version` – Trove API version (default is 2)\n",
"\n",
"Returns: \n",
"* a dict containing the parameters (multiple values will be in a list)"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"show_doc(parse_query)"
]
Expand All @@ -223,24 +193,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'q': 'wragge',\n",
" 'l-artType': 'newspapers',\n",
" 'l-state': ['Queensland'],\n",
" 'l-category': ['Article'],\n",
" 'l-illustrated': 'true',\n",
" 'l-illtype': ['Cartoon'],\n",
" 'category': 'newspaper'}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-state=Queensland&l-category=Article&l-illustrationType=Cartoon', 3)\n",
"params"
Expand Down Expand Up @@ -379,18 +332,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'q': 'wragge date:[1901 TO 1903]', 'zone': 'newspaper'}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge%20date%3A%5B1901%20TO%201903%5D&l-artType=newspapers')"
]
Expand Down Expand Up @@ -470,18 +412,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'q': 'wragge', 'zone': 'newspaper', 'l-decade': ['190']}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-decade=190')"
]
Expand All @@ -497,18 +428,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'q': 'wragge', 'zone': 'newspaper', 'l-decade': ['190'], 'l-year': ['1903']}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-decade=190&l-year=1903')"
]
Expand Down Expand Up @@ -542,7 +462,7 @@
"metadata": {},
"outputs": [],
"source": [
"assert {'q': 'wragge', 'zone': 'newspaper', 'l-word': ['1000+ Words']} == parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-word=1000%2B%20Words')"
"assert {'q': 'wragge', 'zone': 'newspaper', 'l-word': '1000+ Words'} == parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-word=1000%2B%20Words')"
]
},
{
Expand Down Expand Up @@ -910,7 +830,7 @@
"outputs": [],
"source": [
"params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-illustrationType=Photo', 3)\n",
"assert {'q': 'wragge', 'category': 'newspaper', 'l-illustrated': 'true', 'l-illtype': ['Photo'], 'l-artType': 'newspapers'} == params\n",
"assert {'q': 'wragge', 'category': 'newspaper', 'l-illustrated': 'true', 'l-illustrationType': ['Photo'], 'l-artType': 'newspapers'} == params\n",
"assert query_api(params) == 200"
]
},
Expand All @@ -928,7 +848,7 @@
"outputs": [],
"source": [
"params = parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&l-artType=newspapers&l-word=1000%2B%20Words', 3)\n",
"assert {'q': 'wragge', 'category': 'newspaper', 'l-word': ['1000+ Words'], 'l-artType': 'newspapers'} == params\n",
"assert {'q': 'wragge', 'category': 'newspaper', 'l-wordCount': '1000+ Words', 'l-artType': 'newspapers'} == params\n",
"assert query_api(params) == 200"
]
},
Expand Down Expand Up @@ -1079,7 +999,7 @@
"outputs": [],
"source": [
"params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advIllustrationType=Photo', 3)\n",
"assert {'q': 'wragge', 'category': 'newspaper', 'l-illustrated': 'true', 'l-illtype': ['Photo'], 'l-artType': 'newspapers'} == params\n",
"assert {'q': 'wragge', 'category': 'newspaper', 'l-illustrated': 'true', 'l-illustrationType': ['Photo'], 'l-artType': 'newspapers'} == params\n",
"assert query_api(params) == 200"
]
},
Expand All @@ -1097,7 +1017,7 @@
"outputs": [],
"source": [
"params = parse_query('https://trove.nla.gov.au/search/advanced/category/newspapers?keyword=wragge&l-advArtType=newspapers&l-advWord=100%20-%201000%20Words', 3)\n",
"assert {'q': 'wragge', 'category': 'newspaper', 'l-word': '100 - 1000 Words', 'l-artType': 'newspapers'} == params\n",
"assert {'q': 'wragge', 'category': 'newspaper', 'l-wordCount': '100 - 1000 Words', 'l-artType': 'newspapers'} == params\n",
"assert query_api(params) == 200"
]
},
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ parse_query('https://trove.nla.gov.au/search/category/newspapers?keyword=wragge&
'l-state': ['Queensland'],
'l-category': ['Article'],
'l-illustrated': 'true',
'l-illtype': ['Cartoon'],
'l-illustrationType': ['Cartoon'],
'category': 'newspaper'}


Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ <h2 id="How-to-use">How to use<a class="anchor-link" href="#How-to-use"> </a></h
&#39;l-state&#39;: [&#39;Queensland&#39;],
&#39;l-category&#39;: [&#39;Article&#39;],
&#39;l-illustrated&#39;: &#39;true&#39;,
&#39;l-illtype&#39;: [&#39;Cartoon&#39;],
&#39;l-illustrationType&#39;: [&#39;Cartoon&#39;],
&#39;category&#39;: &#39;newspaper&#39;}</pre>
</div>

Expand Down
Loading

0 comments on commit defa895

Please sign in to comment.