Skip to content

Commit

Permalink
Add download option to skip accession numbers (#142)
Browse files Browse the repository at this point in the history
* Add download option to skip accession numbers

This improves efficiency when some filings have been already downloaded
to another location.

* Use `Set` from typing and improve var name

* Fix typing annotation

* Add test for skipping accession numbers

* Apply linter

It was previously not fully initialized
  • Loading branch information
spolcyn authored Jul 26, 2024
1 parent 40db529 commit 15ebd0d
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 2 deletions.
5 changes: 4 additions & 1 deletion sec_edgar_downloader/_Downloader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
from pathlib import Path
from typing import ClassVar, List, Optional
from typing import ClassVar, List, Optional, Set

from ._constants import DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE
from ._constants import SUPPORTED_FORMS as _SUPPORTED_FORMS
Expand Down Expand Up @@ -67,6 +67,7 @@ def get(
before: Optional[Date] = None,
include_amends: bool = False,
download_details: bool = False,
accession_numbers_to_skip: Optional[Set[str]] = None,
) -> int:
"""Download filings and save them to disk.
Expand All @@ -84,6 +85,7 @@ def get(
Defaults to False.
:param download_details: denotes whether to download human-readable and easily
parseable filing detail documents (e.g. form 4 XML, 8-K HTML). Defaults to False.
:param accession_numbers_to_skip: Set of accession numbers to skip when downloading.
:return: number of filings downloaded.
Usage::
Expand Down Expand Up @@ -173,6 +175,7 @@ def get(
download_details,
# Save ticker if passed in to form file system path for saving filings
ticker=ticker_or_cik if not is_cik(ticker_or_cik) else None,
accession_numbers_to_skip=accession_numbers_to_skip,
),
self.user_agent,
)
Expand Down
7 changes: 7 additions & 0 deletions sec_edgar_downloader/_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ def get_to_download(cik: str, acc_num: str, doc: str) -> ToDownload:
def fetch_and_save_filings(download_metadata: DownloadMetadata, user_agent: str) -> int:
successfully_downloaded = 0
to_download = aggregate_filings_to_download(download_metadata, user_agent)
if download_metadata.accession_numbers_to_skip is not None:
to_download = [
td
for td in to_download
if td.accession_number not in download_metadata.accession_numbers_to_skip
]

for td in to_download:
try:
save_location = get_save_location(
Expand Down
3 changes: 2 additions & 1 deletion sec_edgar_downloader/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import dataclass
from datetime import date, datetime
from pathlib import Path
from typing import Optional, Union
from typing import Optional, Set, Union

from ._constants import DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE

Expand All @@ -20,6 +20,7 @@ class DownloadMetadata:
include_amends: bool = False
download_details: bool = False
ticker: Optional[str] = None
accession_numbers_to_skip: Optional[Set[str]] = None


@dataclass
Expand Down
41 changes: 41 additions & 0 deletions tests/test_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,47 @@ def test_fetch_and_save_filings_given_paths_that_already_exist(
assert mock_save_document.call_count == 0


def test_fetch_and_save_filings_given_accession_numbers_to_skip(
user_agent, form_10k, apple_cik
):
limit = 2
download_metadata = DownloadMetadata(
download_folder=Path("."),
form=form_10k,
cik=apple_cik,
limit=limit,
after=DEFAULT_AFTER_DATE,
before=DEFAULT_BEFORE_DATE,
include_amends=False,
download_details=False,
accession_numbers_to_skip={"acc_num_0"},
)

to_download_list = [
ToDownload(
raw_filing_uri=f"raw_{i}",
primary_doc_uri=f"pd_{i}",
accession_number=f"acc_num_{i}",
details_doc_suffix=".xml",
)
for i in range(limit)
]

with patch(
"sec_edgar_downloader._orchestrator.aggregate_filings_to_download",
new=lambda x, y: to_download_list,
), patch(
"sec_edgar_downloader._orchestrator.download_filing", autospec=True
) as mock_download_filing, patch(
"sec_edgar_downloader._orchestrator.save_document", autospec=True
) as mock_save_document:
num_downloaded = fetch_and_save_filings(download_metadata, user_agent)

assert num_downloaded == 1
assert mock_download_filing.call_count == 1
assert mock_save_document.call_count == 1


def test_fetch_and_save_filings_given_exception(user_agent, form_10k, apple_cik):
limit = 2
download_metadata = DownloadMetadata(
Expand Down

0 comments on commit 15ebd0d

Please sign in to comment.