Skip to content

Commit

Permalink
Adjust downloader to new bic-code.org layout
Browse files Browse the repository at this point in the history
  • Loading branch information
mrclmr committed Dec 1, 2023
1 parent b0baee3 commit 5e83458
Show file tree
Hide file tree
Showing 2 changed files with 305 additions and 4 deletions.
263 changes: 260 additions & 3 deletions http/owners_downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,14 @@ func parseOwners(body io.Reader) ([]cont.Owner, error) {

companyTdNode := nextTdSibling(n)
company := parseHTMLtdData(companyTdNode, "Company:")
cityNode := nextTdSibling(companyTdNode)
cityNode := nextTdSibling(nextTdSibling(companyTdNode))
city := parseHTMLtdData(cityNode, "City:")
countryNode := nextTdSibling(cityNode)
country := parseHTMLtdData(countryNode, "Country:")
countryNode := nextTdSibling(nextTdSibling(cityNode))
countryCode := parseHTMLtdData(countryNode, "Country:")
country, ok := countryCodeMap[countryCode]
if !ok {
country = countryCode
}

owners = append(owners,
cont.Owner{
Expand Down Expand Up @@ -130,3 +134,256 @@ func nextTdSibling(td *html.Node) *html.Node {
}
return nil
}

// Copied from Wikipedia "ISO 3166-1 alpha-2" and manually adjusted
var countryCodeMap = map[string]string{
"AD": "Andorra",
"AE": "United Arab Emirates",
"AF": "Afghanistan",
"AG": "Antigua and Barbuda",
"AI": "Anguilla",
"AL": "Albania",
"AM": "Armenia",
"AO": "Angola",
"AQ": "Antarctica",
"AR": "Argentina",
"AS": "American Samoa",
"AT": "Austria",
"AU": "Australia",
"AW": "Aruba",
"AX": "Åland Islands",
"AZ": "Azerbaijan",
"BA": "Bosnia and Herzegovina",
"BB": "Barbados",
"BD": "Bangladesh",
"BE": "Belgium",
"BF": "Burkina Faso",
"BG": "Bulgaria",
"BH": "Bahrain",
"BI": "Burundi",
"BJ": "Benin",
"BL": "Saint Barthélemy",
"BM": "Bermuda",
"BN": "Brunei Darussalam",
"BO": "Bolivia",
"BQ": "Bonaire, Sint Eustatius and Saba",
"BR": "Brazil",
"BS": "Bahamas",
"BT": "Bhutan",
"BV": "Bouvet Island",
"BW": "Botswana",
"BY": "Belarus",
"BZ": "Belize",
"CA": "Canada",
"CC": "Cocos (Keeling) Islands",
"CD": "Democratic Republic of the Congo",
"CF": "Central African Republic",
"CG": "Congo",
"CH": "Switzerland",
"CI": "Côte d'Ivoire",
"CK": "Cook Islands",
"CL": "Chile",
"CM": "Cameroon",
"CN": "China",
"CO": "Colombia",
"CR": "Costa Rica",
"CU": "Cuba",
"CV": "Cabo Verde",
"CW": "Curaçao",
"CX": "Christmas Island",
"CY": "Cyprus",
"CZ": "Czechia",
"DE": "Germany",
"DJ": "Djibouti",
"DK": "Denmark",
"DM": "Dominica",
"DO": "Dominican Republic",
"DZ": "Algeria",
"EC": "Ecuador",
"EE": "Estonia",
"EG": "Egypt",
"EH": "Western Sahara",
"ER": "Eritrea",
"ES": "Spain",
"ET": "Ethiopia",
"FI": "Finland",
"FJ": "Fiji",
"FK": "Falkland Islands (Malvinas)",
"FM": "Micronesia",
"FO": "Faroe Islands",
"FR": "France",
"GA": "Gabon",
"GB": "United Kingdom",
"GD": "Grenada",
"GE": "Georgia",
"GF": "French Guiana",
"GG": "Guernsey",
"GH": "Ghana",
"GI": "Gibraltar",
"GL": "Greenland",
"GM": "Gambia",
"GN": "Guinea",
"GP": "Guadeloupe",
"GQ": "Equatorial Guinea",
"GR": "Greece",
"GS": "South Georgia and the South Sandwich Islands",
"GT": "Guatemala",
"GU": "Guam",
"GW": "Guinea-Bissau",
"GY": "Guyana",
"HK": "Hong Kong",
"HM": "Heard Island and McDonald Islands",
"HN": "Honduras",
"HR": "Croatia",
"HT": "Haiti",
"HU": "Hungary",
"ID": "Indonesia",
"IE": "Ireland",
"IL": "Israel",
"IM": "Isle of Man",
"IN": "India",
"IO": "British Indian Ocean Territory",
"IQ": "Iraq",
"IR": "Iran",
"IS": "Iceland",
"IT": "Italy",
"JE": "Jersey",
"JM": "Jamaica",
"JO": "Jordan",
"JP": "Japan",
"KE": "Kenya",
"KG": "Kyrgyzstan",
"KH": "Cambodia",
"KI": "Kiribati",
"KM": "Comoros",
"KN": "Saint Kitts and Nevis",
"KP": "Korea (Democratic People's Republic of)",
"KR": "Korea, Republic of",
"KW": "Kuwait",
"KY": "Cayman Islands",
"KZ": "Kazakhstan",
"LA": "Lao",
"LB": "Lebanon",
"LC": "Saint Lucia",
"LI": "Liechtenstein",
"LK": "Sri Lanka",
"LR": "Liberia",
"LS": "Lesotho",
"LT": "Lithuania",
"LU": "Luxembourg",
"LV": "Latvia",
"LY": "Libya",
"MA": "Morocco",
"MC": "Monaco",
"MD": "Moldova",
"ME": "Montenegro",
"MF": "Saint Martin (French part)",
"MG": "Madagascar",
"MH": "Marshall Islands",
"MK": "North Macedonia",
"ML": "Mali",
"MM": "Myanmar",
"MN": "Mongolia",
"MO": "Macao",
"MP": "Northern Mariana Islands",
"MQ": "Martinique",
"MR": "Mauritania",
"MS": "Montserrat",
"MT": "Malta",
"MU": "Mauritius",
"MV": "Maldives",
"MW": "Malawi",
"MX": "Mexico",
"MY": "Malaysia",
"MZ": "Mozambique",
"NA": "Namibia",
"NC": "New Caledonia",
"NE": "Niger",
"NF": "Norfolk Island",
"NG": "Nigeria",
"NI": "Nicaragua",
"NL": "Netherlands",
"NO": "Norway",
"NP": "Nepal",
"NR": "Nauru",
"NU": "Niue",
"NZ": "New Zealand",
"OM": "Oman",
"PA": "Panama",
"PE": "Peru",
"PF": "French Polynesia",
"PG": "Papua New Guinea",
"PH": "Philippines",
"PK": "Pakistan",
"PL": "Poland",
"PM": "Saint Pierre and Miquelon",
"PN": "Pitcairn",
"PR": "Puerto Rico",
"PS": "Palestine",
"PT": "Portugal",
"PW": "Palau",
"PY": "Paraguay",
"QA": "Qatar",
"RE": "Réunion",
"RO": "Romania",
"RS": "Serbia",
"RU": "Russian Federation",
"RW": "Rwanda",
"SA": "Saudi Arabia",
"SB": "Solomon Islands",
"SC": "Seychelles",
"SD": "Sudan",
"SE": "Sweden",
"SG": "Singapore",
"SH": "Saint Helena, Ascension and Tristan da Cunha",
"SI": "Slovenia",
"SJ": "Svalbard and Jan Mayen",
"SK": "Slovakia",
"SL": "Sierra Leone",
"SM": "San Marino",
"SN": "Senegal",
"SO": "Somalia",
"SR": "Suriname",
"SS": "South Sudan",
"ST": "Sao Tome and Principe",
"SV": "El Salvador",
"SX": "Sint Maarten (Dutch part)",
"SY": "Syrian Arab Republic",
"SZ": "Eswatini",
"TC": "Turks and Caicos Islands",
"TD": "Chad",
"TF": "French Southern Territories",
"TG": "Togo",
"TH": "Thailand",
"TJ": "Tajikistan",
"TK": "Tokelau",
"TL": "Timor-Leste",
"TM": "Turkmenistan",
"TN": "Tunisia",
"TO": "Tonga",
"TR": "Türkiye",
"TT": "Trinidad and Tobago",
"TV": "Tuvalu",
"TW": "Taiwan (Republic of China)",
"TZ": "Tanzania",
"UA": "Ukraine",
"UG": "Uganda",
"UM": "United States Minor Outlying Islands",
"US": "United States of America",
"UY": "Uruguay",
"UZ": "Uzbekistan",
"VA": "Holy See",
"VC": "Saint Vincent and the Grenadines",
"VE": "Venezuela",
"VG": "Virgin Islands (British)",
"VI": "Virgin Islands (U.S.)",
"VN": "Viet Nam",
"VU": "Vanuatu",
"WF": "Wallis and Futuna",
"WS": "Samoa",
"YE": "Yemen",
"YT": "Mayotte",
"ZA": "South Africa",
"ZM": "Zambia",
"ZW": "Zimbabwe",
}
46 changes: 45 additions & 1 deletion http/owners_downloader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,18 @@ func codeInvalidLength() io.Reader {
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Company:</span>
<span>A Company</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Address:</span>
<span>6 KAUFMAN STREET 16TH FLOOR</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">City:</span>
<span>A City</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Zip:</span>
<span>68012</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Country:</span>
Expand All @@ -121,8 +129,12 @@ func missingTdTags() io.Reader {
<span>AAAU</span>
<span class="hideOnDesktop tdHeading">Company:</span>
<span>A Company</span>
<span class="hideOnDesktop tdHeading">Address:</span>
<span>An address</span>
<span class="hideOnDesktop tdHeading">City:</span>
<span>A City</span>
<span>A zip code</span>
<span class="hideOnDesktop tdHeading">Zip:</span>
<span>An address</span>
<span class="hideOnDesktop tdHeading">Country:</span>
<span>A Country</span>
<span class="hideOnDesktop tdHeading">Details:</span>
Expand Down Expand Up @@ -158,10 +170,18 @@ func validBody() io.Reader {
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Company:</span>
<span>A Company</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Address:</span>
<span>6 KAUFMAN STREET 16TH FLOOR</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">City:</span>
<span>A City</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Zip:</span>
<span>68012</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Country:</span>
Expand All @@ -180,10 +200,18 @@ func validBody() io.Reader {
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Company:</span>
<span></span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Address:</span>
<span>6 KAUFMAN STREET 16TH FLOOR</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">City:</span>
<span>B City</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Zip:</span>
<span>68012</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Country:</span>
Expand All @@ -202,10 +230,18 @@ func validBody() io.Reader {
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Company:</span>
<span>C Company</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Address:</span>
<span>6 KAUFMAN STREET 16TH FLOOR</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">City:</span>
<span></span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Zip:</span>
<span>68012</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Country:</span>
Expand All @@ -224,10 +260,18 @@ func validBody() io.Reader {
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Company:</span>
<span>D Company</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Address:</span>
<span>6 KAUFMAN STREET 16TH FLOOR</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">City:</span>
<span>D City</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Zip:</span>
<span>68012</span>
</td>
<td class="flexMobile align-items-center">
<span class="hideOnDesktop tdHeading">Country:</span>
Expand Down

0 comments on commit 5e83458

Please sign in to comment.