From 56debdb36d716940555b296be2ef92bdc5232717 Mon Sep 17 00:00:00 2001 From: FDauphin Date: Thu, 12 Sep 2024 11:29:22 -0400 Subject: [PATCH] Add function to safely extract tarfile --- .../mast_api_psf/download_psf_cutouts.ipynb | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/notebooks/WFC3/mast_api_psf/download_psf_cutouts.ipynb b/notebooks/WFC3/mast_api_psf/download_psf_cutouts.ipynb index 6aa6f16c5..08a9222b0 100644 --- a/notebooks/WFC3/mast_api_psf/download_psf_cutouts.ipynb +++ b/notebooks/WFC3/mast_api_psf/download_psf_cutouts.ipynb @@ -295,7 +295,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "With the `.tar.gz` file downloaded, we extract the cutouts." + "With the `.tar.gz` file downloaded, we safely extract the cutouts. `get_safe_members` ensures only safe files are extracted." ] }, { @@ -304,10 +304,23 @@ "metadata": {}, "outputs": [], "source": [ - "tar = tarfile.open(filename_bundle, 'r:gz')\n", - "path_mast = tar.getnames()[0] # path of extracted directory\n", - "tar.extractall()\n", - "tar.close()" + "def get_safe_members(members):\n", + " for member in members:\n", + " member_name = member.name\n", + " condition = (\n", + " member_name.startswith('..') or \\\n", + " member_name.startswith('/') or \\\n", + " member_name.startswith('\\\\')\n", + " )\n", + " if not condition:\n", + " yield member\n", + " \n", + "with tarfile.open(filename_bundle, 'r:gz') as tar:\n", + " path_mast = tar.getnames()[0]\n", + " print (f'Path to MAST PSF Cutouts: {path_mast}')\n", + " members = tar.getmembers()\n", + " safe_members = get_safe_members(members)\n", + " tar.extractall(members=safe_members)" ] }, {