Skip to content

Commit bd9d8c1

Browse files
author
Jay Vala
committed
Fix downloading of ISIN file
1 parent b85a0d0 commit bd9d8c1

File tree

7 files changed

+48
-92
lines changed

7 files changed

+48
-92
lines changed

requirements/ci.txt

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
1-
black>=19.10b0 ; python_version >= "3.6"
2-
isort~=4.3.21
3-
flake8~=3.7.9
4-
flake8-absolute-import~=1.0 ; python_version >= "3.6"
5-
flake8-black~=0.1.1 ; python_version >= "3.6"
6-
flake8-blind-except~=0.1.1 ; python_version >= "3.6"
7-
flake8-builtins~=1.5.2 ; python_version >= "3.6"
8-
flake8-comprehensions~=3.2.2 ; python_version >= "3.6"
9-
flake8-docstrings~=1.5.0 ; python_version >= "3.6"
10-
flake8-mutable~=1.2.0 ; python_version >= "3.6"
11-
flake8-print~=3.1.4 ; python_version >= "3.6"
12-
flake8-quotes~=3.0.0 ; python_version >= "3.6"
13-
flake8-tuple~=0.4.1 ; python_version >= "3.6"
14-
pytest~=4.6 # pytest 5 requires py3
15-
pytest-cov~=2.8.1
16-
pytest-env~=0.6.2
17-
pytest-sugar~=0.9.2
18-
testfixtures~=6.14.0
1+
black
2+
isort
3+
flake8
4+
flake8-absolute-import
5+
flake8-black
6+
flake8-blind-except
7+
flake8-comprehensions
8+
flake8-docstrings
9+
flake8-mutable
10+
flake8-print
11+
flake8-quotes
12+
flake8-tuple
13+
pytest
14+
pytest-cov
15+
pytest-env
16+
pytest-sugar
17+
testfixtures

requirements/prod.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
pandas
22
texttable
3-
requests
3+
requests
4+
bs4

src/python_lei/isin_lei.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import pandas as pd
2-
import requests
31
from python_lei.exceptions import InvalidISIN, InvalidLEI
42
from python_lei.utils import load_data
53

src/python_lei/utils.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import pandas as pd
1010
import requests
11+
from bs4 import BeautifulSoup
1112

1213
logger = logging.getLogger(__name__)
1314
logging.basicConfig(level=logging.INFO)
@@ -30,8 +31,7 @@ def __init__(self, _is_actions=False):
3031
Args:
3132
_is_actions (bool): For setting path of downloaded resources on Github Actions
3233
"""
33-
self.data_url = f"https://isinmapping.gleif.org/file-by-date/{TODAY}"
34-
34+
self.data_url = "https://www.gleif.org/en/lei-data/lei-mapping/download-isin-to-lei-relationship-files"
3535
self._download(_is_actions)
3636

3737
def _download(self, _is_actions):
@@ -42,8 +42,13 @@ def _download(self, _is_actions):
4242
logger.info(f"No resources directory found, creating resources directory.")
4343
os.mkdir(RESOURCE_DIR)
4444

45+
download_link = self._scrape_isin_file()
46+
47+
if not download_link:
48+
raise ValueError("Downloading of isin file not available.")
49+
4550
try:
46-
response = requests.get(self.data_url)
51+
response = requests.get(download_link)
4752
except requests.exceptions as err:
4853
logger.error(
4954
"Connection Error, Unable to download data at this time. Please check you have working internet connection or try again later."
@@ -52,19 +57,31 @@ def _download(self, _is_actions):
5257
logger.error("No response from GLEIF server.")
5358

5459
logger.info("The file could be over 50 Mb.")
55-
# TODO: Add progress bar
5660
zipped_content = zipfile.ZipFile(io.BytesIO(response.content))
57-
# TODO: Remove this
5861
if _is_actions:
5962
zipped_content.extractall(
6063
"/home/runner/work/python-lei/python-lei/resources"
6164
)
6265
else:
6366
zipped_content.extractall(RESOURCE_DIR)
6467
logger.info(f"Extraction complete in {RESOURCE_DIR}")
68+
69+
def _scrape_isin_file(self):
70+
"""
71+
Scrape the data.
72+
"""
73+
try:
74+
response = requests.get(self.data_url)
75+
response.raise_for_status()
76+
soup = BeautifulSoup(response.text)
77+
78+
# find all the tr and td and get to the href
79+
download_link = soup.find_all("tr")[1].find_all("td")[1].find("a")["href"]
80+
return download_link
6581

66-
# TODO: Covert the dataframe to parquet and use it.
67-
82+
except requests.ConnectionError:
83+
logger.error(f"Error connecting to {self.data_url}")
84+
6885

6986
class Update:
7087
"""
@@ -83,12 +100,12 @@ def __init__(self):
83100
logger.info(
84101
"Resource directory not found or LEI ISIN mappings not found. Downloading now."
85102
)
86-
download = Download()
103+
Download()
87104

88105
if os.listdir(RESOURCE_DIR) != []:
89106
shutil.rmtree(RESOURCE_DIR)
90107
logger.info(f"Downloading Data in {RESOURCE_DIR}")
91-
download = Download()
108+
Download()
92109

93110

94111
def load_data():

tests/test_isin_lei.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

tests/test_lei_search.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
from pathlib import Path
2-
from unittest.mock import Mock, patch
1+
from unittest.mock import patch
32

4-
import pandas as pd
53
import pytest
6-
from python_lei.exceptions import NotFound
74
from python_lei.lei_search import SearchLEI
85

96

tests/test_pylei.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
from pathlib import Path
2-
from unittest.mock import Mock, patch
1+
from unittest.mock import patch
32

43
import pandas as pd
54
import pytest
6-
from python_lei.exceptions import InvalidISIN, InvalidLEI
5+
from python_lei.exceptions import InvalidLEI
76
from python_lei.pylei import pyLEI
87
from python_lei.utils import PROJECT_ROOT
98

0 commit comments

Comments
 (0)