diff --git a/sec_edgar_downloader/_Downloader.py b/sec_edgar_downloader/_Downloader.py index d88e2e5..fee84b9 100644 --- a/sec_edgar_downloader/_Downloader.py +++ b/sec_edgar_downloader/_Downloader.py @@ -1,6 +1,6 @@ import sys from pathlib import Path -from typing import ClassVar, List, Optional +from typing import ClassVar, List, Optional, Set from ._constants import DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE from ._constants import SUPPORTED_FORMS as _SUPPORTED_FORMS @@ -67,6 +67,7 @@ def get( before: Optional[Date] = None, include_amends: bool = False, download_details: bool = False, + accession_numbers_to_skip: Optional[Set[str]] = None, ) -> int: """Download filings and save them to disk. @@ -84,6 +85,7 @@ def get( Defaults to False. :param download_details: denotes whether to download human-readable and easily parseable filing detail documents (e.g. form 4 XML, 8-K HTML). Defaults to False. + :param accession_numbers_to_skip: Set of accession numbers to skip when downloading. :return: number of filings downloaded. Usage:: @@ -173,6 +175,7 @@ def get( download_details, # Save ticker if passed in to form file system path for saving filings ticker=ticker_or_cik if not is_cik(ticker_or_cik) else None, + accession_numbers_to_skip=accession_numbers_to_skip, ), self.user_agent, ) diff --git a/sec_edgar_downloader/_orchestrator.py b/sec_edgar_downloader/_orchestrator.py index 223f827..ca9f532 100644 --- a/sec_edgar_downloader/_orchestrator.py +++ b/sec_edgar_downloader/_orchestrator.py @@ -130,6 +130,13 @@ def get_to_download(cik: str, acc_num: str, doc: str) -> ToDownload: def fetch_and_save_filings(download_metadata: DownloadMetadata, user_agent: str) -> int: successfully_downloaded = 0 to_download = aggregate_filings_to_download(download_metadata, user_agent) + if download_metadata.accession_numbers_to_skip is not None: + to_download = [ + td + for td in to_download + if td.accession_number not in download_metadata.accession_numbers_to_skip + ] + for td in to_download: try: save_location = get_save_location( diff --git a/sec_edgar_downloader/_types.py b/sec_edgar_downloader/_types.py index 5dd6c4f..7345a3b 100644 --- a/sec_edgar_downloader/_types.py +++ b/sec_edgar_downloader/_types.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from datetime import date, datetime from pathlib import Path -from typing import Optional, Union +from typing import Optional, Set, Union from ._constants import DEFAULT_AFTER_DATE, DEFAULT_BEFORE_DATE @@ -20,6 +20,7 @@ class DownloadMetadata: include_amends: bool = False download_details: bool = False ticker: Optional[str] = None + accession_numbers_to_skip: Optional[Set[str]] = None @dataclass diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 3e40a68..34ff520 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -313,6 +313,47 @@ def test_fetch_and_save_filings_given_paths_that_already_exist( assert mock_save_document.call_count == 0 +def test_fetch_and_save_filings_given_accession_numbers_to_skip( + user_agent, form_10k, apple_cik +): + limit = 2 + download_metadata = DownloadMetadata( + download_folder=Path("."), + form=form_10k, + cik=apple_cik, + limit=limit, + after=DEFAULT_AFTER_DATE, + before=DEFAULT_BEFORE_DATE, + include_amends=False, + download_details=False, + accession_numbers_to_skip={"acc_num_0"}, + ) + + to_download_list = [ + ToDownload( + raw_filing_uri=f"raw_{i}", + primary_doc_uri=f"pd_{i}", + accession_number=f"acc_num_{i}", + details_doc_suffix=".xml", + ) + for i in range(limit) + ] + + with patch( + "sec_edgar_downloader._orchestrator.aggregate_filings_to_download", + new=lambda x, y: to_download_list, + ), patch( + "sec_edgar_downloader._orchestrator.download_filing", autospec=True + ) as mock_download_filing, patch( + "sec_edgar_downloader._orchestrator.save_document", autospec=True + ) as mock_save_document: + num_downloaded = fetch_and_save_filings(download_metadata, user_agent) + + assert num_downloaded == 1 + assert mock_download_filing.call_count == 1 + assert mock_save_document.call_count == 1 + + def test_fetch_and_save_filings_given_exception(user_agent, form_10k, apple_cik): limit = 2 download_metadata = DownloadMetadata(