Skip to content

Commit

Permalink
another fix
Browse files Browse the repository at this point in the history
  • Loading branch information
GeorgWa committed Nov 7, 2024
1 parent b7714b9 commit ae84223
Showing 1 changed file with 75 additions and 46 deletions.
121 changes: 75 additions & 46 deletions nbs_tests/psm_reader/dia_psm_reader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -32,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -76,7 +76,7 @@
" 'genes': ['Genes', 'Gene', 'GeneName', 'GeneNames']}"
]
},
"execution_count": null,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -101,7 +101,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -115,7 +115,7 @@
" 'FullUniModPeptideName']"
]
},
"execution_count": null,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -133,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand All @@ -155,7 +155,7 @@
" 'fdr': 'Q.Value'}"
]
},
"execution_count": null,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -180,7 +180,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -189,7 +189,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -291,7 +291,7 @@
"3 HLLNQAVGEEEVPK 14 1.000000 521.610617 "
]
},
"execution_count": null,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -314,7 +314,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -427,7 +427,7 @@
"2 MAP7 Acetyl@Protein_N-term;Phospho@S 0;4 11 1.0 371.282739 "
]
},
"execution_count": null,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -457,7 +457,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -484,7 +484,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -521,7 +521,7 @@
" <th>scan_num</th>\n",
" <th>score</th>\n",
" <th>fdr</th>\n",
" <th>spec_idx</th>\n",
" <th>diann_spec_idx</th>\n",
" <th>mods</th>\n",
" <th>mod_sites</th>\n",
" <th>nAA</th>\n",
Expand Down Expand Up @@ -890,21 +890,21 @@
"12 AAAAAAAPSGGGGGGEEERLEEK 3 7.28562 7.23794 7.33338 1.01500 \n",
"13 AAAAAAAPSGGGGGGEEERLEEK 3 7.26825 7.22055 7.31601 1.01208 \n",
"\n",
" proteins uniprot_ids genes scan_num score fdr spec_idx \\\n",
"0 P28482 MAPK1 11191 0.843331 0.006937 11190 \n",
"1 P28482 MAPK1 11239 0.951820 0.001225 11238 \n",
"2 Q9UH36 SRRD 30053 0.999997 0.000040 30052 \n",
"3 Q9UH36 SRRD 30029 0.995505 0.000184 30028 \n",
"4 Q9UH36 SRRD 30005 0.997286 0.000185 30004 \n",
"5 Q9UH36 SRRD 29981 0.996593 0.000153 29980 \n",
"6 Q96P70 IPO9 22187 0.999999 0.000040 22186 \n",
"7 Q96P70 IPO9 22091 0.999996 0.000050 22090 \n",
"8 Q96P70 IPO9 22067 0.999999 0.000061 22066 \n",
"9 Q96P70 IPO9 21947 0.999997 0.000044 21946 \n",
"10 P51608-2 MECP2 11077 0.998266 0.000142 11076 \n",
"11 P51608-2 MECP2 11029 0.994097 0.000201 11028 \n",
"12 P51608-2 MECP2 10981 0.999939 0.000070 10980 \n",
"13 P51608-2 MECP2 10957 0.971834 0.000604 10956 \n",
" proteins uniprot_ids genes scan_num score fdr diann_spec_idx \\\n",
"0 P28482 MAPK1 11191 0.843331 0.006937 11190 \n",
"1 P28482 MAPK1 11239 0.951820 0.001225 11238 \n",
"2 Q9UH36 SRRD 30053 0.999997 0.000040 30052 \n",
"3 Q9UH36 SRRD 30029 0.995505 0.000184 30028 \n",
"4 Q9UH36 SRRD 30005 0.997286 0.000185 30004 \n",
"5 Q9UH36 SRRD 29981 0.996593 0.000153 29980 \n",
"6 Q96P70 IPO9 22187 0.999999 0.000040 22186 \n",
"7 Q96P70 IPO9 22091 0.999996 0.000050 22090 \n",
"8 Q96P70 IPO9 22067 0.999999 0.000061 22066 \n",
"9 Q96P70 IPO9 21947 0.999997 0.000044 21946 \n",
"10 P51608-2 MECP2 11077 0.998266 0.000142 11076 \n",
"11 P51608-2 MECP2 11029 0.994097 0.000201 11028 \n",
"12 P51608-2 MECP2 10981 0.999939 0.000070 10980 \n",
"13 P51608-2 MECP2 10957 0.971834 0.000604 10956 \n",
"\n",
" mods mod_sites nAA rt_norm precursor_mz \\\n",
"0 Acetyl@Any_N-term;Oxidation@M 0;12 14 0.372721 650.819344 \n",
Expand Down Expand Up @@ -939,7 +939,7 @@
"13 612.071553 "
]
},
"execution_count": null,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -999,26 +999,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['S[UniMod:21]',\n",
" 'S[Phospho (S)]',\n",
"['S[ph]',\n",
" 'S[Phospho (STY)]',\n",
" 'S(Phospho (S))',\n",
" 'S[Phospho (STYDH)]',\n",
" 'S(ph)',\n",
" 'S(UniMod:21)',\n",
" 'S[Phospho (S)]',\n",
" 'S[Phospho (ST)]',\n",
" 'S(Phospho (ST))',\n",
" 'S[Phospho (STY)]',\n",
" 'S[UniMod:21]',\n",
" 'pS',\n",
" 'S(UniMod:21)',\n",
" 'S(ph)',\n",
" 'S[ph]',\n",
" 'S(Phospho (STY))',\n",
" 'S[Phospho (ST)]']"
" 'S(Phospho (STYDH))']"
]
},
"execution_count": null,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1029,9 +1031,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "AssertionError",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[13], line 25\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m np\u001b[38;5;241m.\u001b[39msum(\u001b[38;5;241m~\u001b[39mdiann_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmods\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39mcontains(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAcetyl@Any_N-term\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m4\u001b[39m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m np\u001b[38;5;241m.\u001b[39msum(diann_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmods\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39mcontains(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOxidation@M\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mset\u001b[39m(diann_reader\u001b[38;5;241m.\u001b[39mmodification_mapping[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPhospho@S\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;241m==\u001b[39m\u001b[38;5;28mset\u001b[39m([\n\u001b[1;32m 26\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpS\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 27\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(ph)\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(UniMod:21)\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (S))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (ST))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (STY))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 32\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[ph]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 33\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[UniMod:21]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 34\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (S)]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (ST)]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (STY)]\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
"\u001b[0;31mAssertionError\u001b[0m: "
]
}
],
"source": [
"tsv = StringIO('''File.Name\tRun\tProtein.Group\tProtein.Ids\tProtein.Names\tGenes\tPG.Quantity\tPG.Normalised\tPG.MaxLFQ\tGenes.Quantity\tGenes.Normalised\tGenes.MaxLFQ\tGenes.MaxLFQ.Unique\tModified.Sequence\tStripped.Sequence\tPrecursor.Id\tPrecursor.Charge\tQ.Value\tGlobal.Q.Value\tProtein.Q.Value\tPG.Q.Value\tGlobal.PG.Q.Value\tGG.Q.Value\tTranslated.Q.Value\tProteotypic\tPrecursor.Quantity\tPrecursor.Normalised\tPrecursor.Translated\tQuantity.Quality\tRT\tRT.Start\tRT.Stop\tiRT\tPredicted.RT\tPredicted.iRT\tLib.Q.Value\tMs1.Profile.Corr\tMs1.Area\tEvidence\tSpectrum.Similarity\tMass.Evidence\tCScore\tDecoy.Evidence\tDecoy.CScore\tFragment.Quant.Raw\tFragment.Quant.Corrected\tFragment.Correlations\tMS2.Scan\tIM\tiIM\tPredicted.IM\tPredicted.iIM\n",
"F:\\XXX\\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d\t20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636\tQ9UH36\tQ9UH36\t\tSRRD\t3296.49\t3428.89\t3428.89\t3296.49\t3428.89\t3428.89\t3428.89\t(UniMod:1)AAAAAAALESWQAAAPR\tAAAAAAALESWQAAAPR\t(UniMod:1)AAAAAAALESWQAAAPR2\t2\t3.99074e-05\t1.96448e-05\t0.000159821\t0.000159821\t0.000146135\t0.000161212\t0\t1\t3296.49\t3428.89\t3296.49\t0.852479\t19.9208\t19.8731\t19.9685\t123.9\t19.8266\t128.292\t0\t0.960106\t5308.05\t1.96902\t0.683134\t0.362287\t0.999997\t1.23691\t3.43242e-05\t1212.01;2178.03;1390.01;1020.01;714.008;778.008;\t1212.01;1351.73;887.591;432.92;216.728;732.751;\t0.956668;0.757581;0.670497;0.592489;0.47072;0.855203;\t30053\t1.19708\t1.19328\t1.19453\t1.19469\n",
Expand Down Expand Up @@ -1064,16 +1078,19 @@
" 'S(Phospho (S))',\n",
" 'S(Phospho (ST))',\n",
" 'S(Phospho (STY))',\n",
" 'S(Phospho (STYDH))',\n",
" 'S[ph]',\n",
" 'S[UniMod:21]',\n",
" 'S[Phospho (S)]',\n",
" 'S[Phospho (ST)]',\n",
" 'S[Phospho (STY)]'])"
" 'S[Phospho (STY)]',\n",
" 'S[Phospho (STYDH)]'\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"metadata": {},
"outputs": [
{
Expand All @@ -1082,7 +1099,7 @@
"'Acetyl@Any_N-term'"
]
},
"execution_count": null,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -1111,6 +1128,18 @@
"display_name": "python3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
Expand Down

0 comments on commit ae84223

Please sign in to comment.