From b5c80649ab645cd27c8bf7ed0a21ddd355e5b014 Mon Sep 17 00:00:00 2001 From: Lei Ma Date: Wed, 11 Sep 2024 15:06:39 -0400 Subject: [PATCH] day3 student minor edits --- .../Python/Python-Day3-student.ipynb | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/docs/resources/Workshops/Python/Python-Day3-student.ipynb b/docs/resources/Workshops/Python/Python-Day3-student.ipynb index 23e78c8..5b6b5c7 100644 --- a/docs/resources/Workshops/Python/Python-Day3-student.ipynb +++ b/docs/resources/Workshops/Python/Python-Day3-student.ipynb @@ -516,12 +516,6 @@ "# Hint: it might be easiest to explicitly create the boolean mask for this\n", "# The operator & is the element-wise AND operator, it returns True if both elements in an array are True\n", "# Your code here\n", - "\n", - "\n", - "\n", - "# could also do: mask = (arr != np.max(arr)) & (arr != np.min(arr))\n", - "# could also do: mask = np.logical_and(arr != np.max(arr), arr != np.min(arr))\n", - "# could also do: mask = np.all([arr != np.max(arr), arr != np.min(arr)], axis=0)\n", "\n" ] }, @@ -733,7 +727,7 @@ "metadata": {}, "outputs": [], "source": [ - "filename = 'data/bird_observations.csv'\n", + "filename = 'https://informatics.fas.harvard.edu/resources/Workshops/Python/data/bird_observations.csv'\n", "\n", "bird_observations = dict()\n", "\n", @@ -1458,7 +1452,7 @@ ">**Exercise**: write a function that takes a sample ID and a file path and does the following:\n", ">\n", ">1. use the `read_csv()` function in pandas to read in a file as a pandas dataframe (remember: our input is TAB separated, not comma separated!)\n", - ">2. re-name the 'target_id' column to 'transcriptID' and the 'tpm' column to the `sampleID` (use the `.rename()` method of your dataframe)\n", + ">2. re-name the 'target_id' column to 'transcriptID' and the 'tpm' column to the `sampleID` (use the `.rename()` method of your dataframe [link](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html))\n", ">3. subsets the data frame to only keep the two renamed columns\n", ">4. return the simplified data frame" ] @@ -1543,11 +1537,7 @@ "source": [ "pandasDFs = [] #initialize a list to hold the pandas dataframes\n", "\n", - "# Your code goes here:\n", - "for sample in samplesDict:\n", - " filePath = samplesDict[sample]\n", - "\n", - " pandasDFs.append(makePandasDF(sample,filePath))" + "# Your code goes here:\n" ] }, { @@ -1645,7 +1635,18 @@ "source": [ "Now for the final step, getting the gene name for each transcript! Recall that this information lives in the file `data/geneIDs.tsv`, so we are first going to have to read in this file as well. There are several ways we could do this, depending on what our specific goals are, but as we only want to retain the transcripts that have a corresponding gene ID in the table, the easiest way is again using the `merge()` function.\n", "\n", - ">Exercise: read in the file `data/geneIDs.tsv` as a pandas dataframe and merge it with our combined dataframe to assign a gene name to each transcript " + "Here's a sample of what the file looks like:\n", + "\n", + "```\n", + "TCONS_00000001\tgene-SNX18_gGal-2\n", + "TCONS_00000002\tgene-LOC116806879_tGut-like-2\n", + "TCONS_00000004\tgene-KCMF1_gGal-like-1\n", + "TCONS_00000003\tgene-KCMF1_gGal-like-1\n", + "```\n", + "\n", + ">Exercise: read in the file `data/geneIDs.tsv` as a pandas dataframe and merge it with our combined dataframe to assign a gene name to each transcript \n", + ">\n", + "> Note that this import requires customizing the `pd.read_csv()` function because there is no header. You'll have to look at the documentation to figure out how to add a header" ] }, {