Skip to content

Commit

Permalink
SorrTask612_Extract_signal
Browse files Browse the repository at this point in the history
  • Loading branch information
why2000 committed Nov 29, 2023
1 parent 05fd1fe commit 184daa1
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
"investors_csv_save_path = \"../result_csv/signal_fintech_investors.csv\"\n",
"# Source data page URL.\n",
"baseurl = \"https://signal.nfx.com/investor-lists/top-fintech-seed-investors\"\n",
"# Specifying the range of data to be extracted.\n",
"start_idx = 0\n",
"length = 40\n",
"# Get Dataframe of investors from Signal page URL.\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
investors_csv_save_path = "../result_csv/signal_fintech_investors.csv"
# Source data page URL.
baseurl = "https://signal.nfx.com/investor-lists/top-fintech-seed-investors"
# Specifying the range of data to be extracted.
start_idx = 0
length = 40
# Get Dataframe of investors from Signal page URL.
Expand Down
13 changes: 10 additions & 3 deletions marketing/signal/extract_investors_from_signal_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
import marketing.signal.extract_investors_from_signal_list as mseifsili
"""

# TODO(Henry): This package need to be manually installed until they are added
# to the container.
# Run the following line in any notebook would install it:
# !sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade selenium webdriver-manager)"

import math
import time

Expand All @@ -21,9 +25,12 @@ def extract_investors_from_signal_url(
baseurl: str, start_idx: int, length: int
) -> pd.DataFrame:
"""
Extract a dataframe of investor information from a signal investors list
page. e.g. https://signal.nfx.com/investor-lists/top-fintech-seed-investors
Available lists are in this page: https://signal.nfx.com/investor-lists/
Extract a dataframe of investor information from a signal investors list page.
e.g. https://signal.nfx.com/investor-lists/top-fintech-seed-investors
Available lists are in this page: https://signal.nfx.com/investor-lists/
The page is only loading a few items for one click on the loading button,
so please use the params to specify the range of data to be extracted,
and avoid an unexpectable waiting time.
:param baseurl: The page url to be extracted
:param start_idx: The index of the first item to be extracted (start from 0)
Expand Down

0 comments on commit 184daa1

Please sign in to comment.