Skip to content

Commit

Permalink
Scraper update
Browse files Browse the repository at this point in the history
Year values
Input path
Add key 'year' to json output
  • Loading branch information
michaelglenister committed Jan 30, 2024
1 parent c38eac0 commit 1e345d3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 27 deletions.
5 changes: 3 additions & 2 deletions pombola/south_africa/data/members-interests/2022.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"date": "2021-12-31",
"date": "2022-12-31",
"source": "",
"register": [
{
Expand Down Expand Up @@ -45423,5 +45423,6 @@
}
]
}
]
],
"year": "2022"
}
5 changes: 3 additions & 2 deletions pombola/south_africa/data/members-interests/2023.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"date": "2021-12-31",
"date": "2023-12-31",
"source": "",
"register": [
{
Expand Down Expand Up @@ -44974,5 +44974,6 @@
}
]
}
]
],
"year": "2023"
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 426,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -13,7 +13,7 @@
},
{
"cell_type": "code",
"execution_count": 427,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -29,24 +29,12 @@
},
{
"cell_type": "code",
"execution_count": 428,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHABANE MOSA STEVE ANC has no RAW-SHARES AND OTHER FINANCIAL INTERESTS data\n",
"DLAMINI-ZUMA NKOSAZANA CLARICE ANC has no RAW-TRAVEL data\n",
"DLAMINI-ZUMA NKOSAZANA CLARICE ANC has no RAW-LAND AND PROPERTY data\n",
"DLAMINI-ZUMA NKOSAZANA CLARICE ANC has no RAW-PENSIONS data\n",
"DLAMINI-ZUMA NKOSAZANA CLARICE ANC\n"
]
}
],
"outputs": [],
"source": [
"# read html content from members.html\n",
"with open('members.html', 'r') as f:\n",
"with open('../scraper/main_html_file.html', 'r') as f:\n",
" html_content = f.read()\n",
"\n",
"html_content = html_content.replace(\"1. SHARES AND OTHER FINANCIAL INTERESTS (Family and other trusts)\", \"<p><strong>SHARES AND OTHER FINANCIAL INTERESTS</strong></p>\")\n",
Expand Down Expand Up @@ -187,7 +175,7 @@
},
{
"cell_type": "code",
"execution_count": 429,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -318,14 +306,15 @@
},
{
"cell_type": "code",
"execution_count": 430,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"register = {\n",
" \"date\": \"2021-12-31\", \n",
" \"date\": \"2023-12-31\", \n",
" \"source\": \"\", \n",
" \"register\": []\n",
" \"register\": [],\n",
" \"year\": \"2023\"\n",
"}\n",
"\n",
"for person in data:\n",
Expand All @@ -339,7 +328,7 @@
},
{
"cell_type": "code",
"execution_count": 431,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -365,7 +354,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.10.12"
},
"orig_nbformat": 4
},
Expand Down

0 comments on commit 1e345d3

Please sign in to comment.