Skip to content

Commit

Permalink
Scrape data
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelglenister committed Feb 14, 2024
1 parent 6b5f41c commit 980ba5c
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 15 deletions.
14 changes: 7 additions & 7 deletions pombola/south_africa/data/members-interests/2022.json
Original file line number Diff line number Diff line change
Expand Up @@ -1842,7 +1842,7 @@
]
},
{
"mp": "BONGO ADVOCATE BONGANI THOMAS ANC",
"mp": "BONGO BONGANI THOMAS ANC",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -6978,7 +6978,7 @@
]
},
{
"mp": "HOLOMISA ADVOCATE SANGO PATEKILE ANC",
"mp": "HOLOMISA SANGO PATEKILE ANC",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -13998,7 +13998,7 @@
]
},
{
"mp": "MASUTHA ADVOCATE TSHILILO MICHEAL ANC",
"mp": "MASUTHA TSHILILO MICHEAL ANC",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -29032,7 +29032,7 @@
]
},
{
"mp": "BREYTENBACH ADVOCATE GLYNNIS DA",
"mp": "BREYTENBACH GLYNNIS DA",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -36281,7 +36281,7 @@
]
},
{
"mp": "TERBLANCHE MAJOR GENERAL OCKERT STEFANUS DA",
"mp": "TERBLANCHE OCKERT STEFANUS DA",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -43740,7 +43740,7 @@
]
},
{
"mp": "BUTHELEZI PRINCE MANGOSUTHU IFP",
"mp": "BUTHELEZI MANGOSUTHU IFP",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -45252,7 +45252,7 @@
]
},
{
"mp": "HOLOMISA MAJOR GENERAL BANTUBONKE HARRINGTON UDM",
"mp": "HOLOMISA BANTUBONKE HARRINGTON UDM",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down
12 changes: 6 additions & 6 deletions pombola/south_africa/data/members-interests/2023.json
Original file line number Diff line number Diff line change
Expand Up @@ -1750,7 +1750,7 @@
]
},
{
"mp": "BONGO ADVOCATE BONGANI THOMAS ANC",
"mp": "BONGO BONGANI THOMAS ANC",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -6636,7 +6636,7 @@
]
},
{
"mp": "HOLOMISA ADVOCATE SANGO PATEKILE ANC",
"mp": "HOLOMISA SANGO PATEKILE ANC",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -13616,7 +13616,7 @@
]
},
{
"mp": "MASUTHA ADVOCATE TSHILILO MICHEAL ANC",
"mp": "MASUTHA TSHILILO MICHEAL ANC",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -28946,7 +28946,7 @@
]
},
{
"mp": "BREYTENBACH ADVOCATE GLYNNIS DA",
"mp": "BREYTENBACH GLYNNIS DA",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -36172,7 +36172,7 @@
]
},
{
"mp": "TERBLANCHE MAJOR GENERAL OCKERT STEFANUS DA",
"mp": "TERBLANCHE OCKERT STEFANUS DA",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down Expand Up @@ -44798,7 +44798,7 @@
]
},
{
"mp": "HOLOMISA MAJOR GENERAL BANTUBONKE HARRINGTON UDM",
"mp": "HOLOMISA BANTUBONKE HARRINGTON UDM",
"content": "",
"SHARES AND OTHER FINANCIAL INTERESTS": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ class Converter(object):
"\" \"",
]

parties = ["ACDP", "AIC", "AL JAMA-AH", "ANC", "ATM", "COPE", "DA", "EFF", "FF PLUS", "GOOD", "IFP", "NFP", "PAC", "UDM"]
unique_case_surname = ["BODLANI MOTSHIDI", "LE GOFF", "MAZZONE MICHAEL", "MC GLUWA", "VAN ZYL", "NTLANGWINI LOUW", "DE BRUYN", "DENNER JORDAAN", "DU TOIT", "VAN STADEN"]
# Change this to True to enable little bits of helper code for finding new
# slug corrections:
finding_slug_corrections = True
Expand Down Expand Up @@ -661,8 +663,16 @@ def extract_entries(self, data):

def mp_to_person_slug(self, mp):
# NOTE: 2020 no longer has the party in the name and the names are rearranged
name = re.sub(r'(.*?) (.*)', r'\2 \1', mp.rsplit(' ', 1)[0])
slug = slugify(name)
pattern = r'\b(?:{})\b'.format('|'.join(map(re.escape, self.parties)))
name_only = re.sub(pattern, '', mp)
# special case surnames
for surname in self.unique_case_surname:
if name_only.startswith(surname):
name_ordered = re.sub(r'^(\w+\b\s+\w+\b)\s+(.*)$', r'\2 \1', name_only)
break
else:
name_ordered = re.sub(r'(.*?) (.*)', r'\2 \1', name_only)
slug = slugify(name_ordered)

# Check if there is a known correction for this slug
slug = self.slug_corrections.get(slug, slug)
Expand Down

0 comments on commit 980ba5c

Please sign in to comment.