Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix downloading of ISIN file #12

Merged
merged 6 commits into from
Oct 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,24 @@ on:
tags: ["*"]
jobs:
CI:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"] # Specify Python versions here
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: 3.7
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements/ci.txt
pip install -e .

- name: Download Data
run: python -c "from python_lei.utils import Download; Download(_is_actions=True)"

Expand All @@ -38,12 +44,12 @@ jobs:
if: startsWith(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Set up Python 3.7
uses: actions/setup-python@v2
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: 3.7
python-version: 3.10

- name: PyPi Deploy preparation
run: |
Expand All @@ -53,4 +59,4 @@ jobs:
uses: pypa/gh-action-pypi-publish@v1.0.0a0
with:
user: ${{ secrets.PYPI_USER }}
password: ${{ secrets.PYPI_PASSWORD }}
password: ${{ secrets.PYPI_PASSWORD }}
35 changes: 17 additions & 18 deletions requirements/ci.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
black>=19.10b0 ; python_version >= "3.6"
isort~=4.3.21
flake8~=3.7.9
flake8-absolute-import~=1.0 ; python_version >= "3.6"
flake8-black~=0.1.1 ; python_version >= "3.6"
flake8-blind-except~=0.1.1 ; python_version >= "3.6"
flake8-builtins~=1.5.2 ; python_version >= "3.6"
flake8-comprehensions~=3.2.2 ; python_version >= "3.6"
flake8-docstrings~=1.5.0 ; python_version >= "3.6"
flake8-mutable~=1.2.0 ; python_version >= "3.6"
flake8-print~=3.1.4 ; python_version >= "3.6"
flake8-quotes~=3.0.0 ; python_version >= "3.6"
flake8-tuple~=0.4.1 ; python_version >= "3.6"
pytest~=4.6 # pytest 5 requires py3
pytest-cov~=2.8.1
pytest-env~=0.6.2
pytest-sugar~=0.9.2
testfixtures~=6.14.0
black
isort
flake8
flake8-absolute-import
flake8-black
flake8-blind-except
flake8-comprehensions
flake8-docstrings
flake8-mutable
flake8-print
flake8-quotes
flake8-tuple
pytest
pytest-cov
pytest-env
pytest-sugar
testfixtures
3 changes: 2 additions & 1 deletion requirements/prod.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pandas
texttable
requests
requests
bs4
2 changes: 0 additions & 2 deletions src/python_lei/isin_lei.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import pandas as pd
import requests
from python_lei.exceptions import InvalidISIN, InvalidLEI
from python_lei.utils import load_data

Expand Down
35 changes: 26 additions & 9 deletions src/python_lei/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pandas as pd
import requests
from bs4 import BeautifulSoup

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
Expand All @@ -30,8 +31,7 @@ def __init__(self, _is_actions=False):
Args:
_is_actions (bool): For setting path of downloaded resources on Github Actions
"""
self.data_url = f"https://isinmapping.gleif.org/file-by-date/{TODAY}"

self.data_url = "https://www.gleif.org/en/lei-data/lei-mapping/download-isin-to-lei-relationship-files"
self._download(_is_actions)

def _download(self, _is_actions):
Expand All @@ -42,8 +42,13 @@ def _download(self, _is_actions):
logger.info(f"No resources directory found, creating resources directory.")
os.mkdir(RESOURCE_DIR)

download_link = self._scrape_isin_file()

if not download_link:
raise ValueError("Downloading of isin file not available.")

try:
response = requests.get(self.data_url)
response = requests.get(download_link)
except requests.exceptions as err:
logger.error(
"Connection Error, Unable to download data at this time. Please check you have working internet connection or try again later."
Expand All @@ -52,19 +57,31 @@ def _download(self, _is_actions):
logger.error("No response from GLEIF server.")

logger.info("The file could be over 50 Mb.")
# TODO: Add progress bar
zipped_content = zipfile.ZipFile(io.BytesIO(response.content))
# TODO: Remove this
if _is_actions:
zipped_content.extractall(
"/home/runner/work/python-lei/python-lei/resources"
)
else:
zipped_content.extractall(RESOURCE_DIR)
logger.info(f"Extraction complete in {RESOURCE_DIR}")

def _scrape_isin_file(self):
"""
Scrape the data.
"""
try:
response = requests.get(self.data_url)
response.raise_for_status()
soup = BeautifulSoup(response.text)

# find all the tr and td and get to the href
download_link = soup.find_all("tr")[1].find_all("td")[1].find("a")["href"]
return download_link

# TODO: Covert the dataframe to parquet and use it.

except requests.ConnectionError:
logger.error(f"Error connecting to {self.data_url}")


class Update:
"""
Expand All @@ -83,12 +100,12 @@ def __init__(self):
logger.info(
"Resource directory not found or LEI ISIN mappings not found. Downloading now."
)
download = Download()
Download()

if os.listdir(RESOURCE_DIR) != []:
shutil.rmtree(RESOURCE_DIR)
logger.info(f"Downloading Data in {RESOURCE_DIR}")
download = Download()
Download()


def load_data():
Expand Down
55 changes: 0 additions & 55 deletions tests/test_isin_lei.py

This file was deleted.

5 changes: 1 addition & 4 deletions tests/test_lei_search.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from pathlib import Path
from unittest.mock import Mock, patch
from unittest.mock import patch

import pandas as pd
import pytest
from python_lei.exceptions import NotFound
from python_lei.lei_search import SearchLEI


Expand Down
5 changes: 2 additions & 3 deletions tests/test_pylei.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from pathlib import Path
from unittest.mock import Mock, patch
from unittest.mock import patch

import pandas as pd
import pytest
from python_lei.exceptions import InvalidISIN, InvalidLEI
from python_lei.exceptions import InvalidLEI
from python_lei.pylei import pyLEI
from python_lei.utils import PROJECT_ROOT

Expand Down