Skip to content

Commit

Permalink
improve Jupyter Notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
do-me committed Oct 22, 2023
1 parent 58b89e0 commit e96c508
Showing 1 changed file with 3 additions and 14 deletions.
17 changes: 3 additions & 14 deletions copernicus_services_miner.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,30 +74,19 @@
" href = a.get(\"href\")\n",
" if href and href.startswith(\"/en/access-data/\"):\n",
" full_url = url\n",
" data_list.append({\"Page Number\": page, \"Full URL\": full_url, \"Link\": href})\n",
" data_list.append({\"Page Number\": page, \"Page_URL\": full_url, \"Link\": href})\n",
" else:\n",
" print(f\"No <div> with specified classes found on page {page}\")\n",
" else:\n",
" print(f\"Failed to fetch data from page {page}. Status code: {response.status_code}\")\n",
"\n",
"# Convert the list of dictionaries to a Pandas DataFrame\n",
"df = pd.DataFrame(data_list)\n",
"\n",
"print(\"Mining and extraction complete.\")\n",
"print(\"Links starting with '/en/access-data/' have been added to the DataFrame.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[\"Service_URL\"] = \"https://www.copernicus.eu\" + df[\"Link\"]\n",
"del df[\"Link\"]\n",
"\n",
"df[\"Page_URL\"] = df[\"Full URL\"]\n",
"del df[\"Full URL\"]"
"print(\"Mining and extraction complete.\")\n",
"print(\"Links starting with '/en/access-data/' have been added to the DataFrame.\")"
]
},
{
Expand Down

0 comments on commit e96c508

Please sign in to comment.