diff --git a/alphabase/constants/const_files/psm_reader.yaml b/alphabase/constants/const_files/psm_reader.yaml
index d2577b93..03ce74ee 100644
--- a/alphabase/constants/const_files/psm_reader.yaml
+++ b/alphabase/constants/const_files/psm_reader.yaml
@@ -38,6 +38,7 @@ maxquant:
'scan_num':
- 'Scan number'
- 'MS/MS scan number'
+ - 'MS/MS Scan Number'
- 'Scan index'
'raw_name': 'Raw file'
'precursor_mz': 'm/z'
@@ -57,6 +58,8 @@ maxquant:
'Acetyl@Protein_N-term':
- '_(Acetyl (Protein_N-term))'
- '_(ac)'
+ 'Acetyl@K':
+ - 'K(ac)'
'Carbamidomethyl@C':
- 'C(Carbamidomethyl (C))'
- 'C(Carbamidomethyl)'
@@ -68,22 +71,37 @@ maxquant:
- 'S(Phospho (S))'
- 'S(Phospho (ST))'
- 'S(Phospho (STY))'
+ - 'S(Phospho (STYDH))'
- 'S(ph)'
- 'pS'
'Phospho@T':
- 'T(Phospho (T))'
- 'T(Phospho (ST))'
- 'T(Phospho (STY))'
+ - 'T(Phospho (STYDH))'
- 'T(ph)'
- 'pT'
'Phospho@Y':
- 'Y(Phospho (Y))'
- 'Y(Phospho (STY))'
- - 'Y(ph)'
+ - 'Y(Phospho (STYDH))'
- 'pY'
+ 'Phospho@D':
+ - 'D(Phospho (STYDH))'
+ - 'pD'
+ 'Phospho@H':
+ - 'H(Phospho (STYDH))'
+ - 'pH'
+ 'Crotonyl@K':
+ - 'K(cr)'
+ 'Lactylation@K':
+ - 'K(la)'
+ 'Succinyl@K':
+ - 'K(su)'
'Deamidated@N': ['N(Deamidation (NQ))','N(de)']
'Deamidated@Q': ['Q(Deamidation (NQ))','Q(de)']
'GlyGly@K': ['K(GlyGly (K))', 'K(gl)']
+ 'hydroxyisobutyryl@K': 'K(2-)'
pfind:
reader_type: pfind
diff --git a/nbs_tests/psm_reader/dia_psm_reader.ipynb b/nbs_tests/psm_reader/dia_psm_reader.ipynb
index cbb6f0c6..2d15c477 100644
--- a/nbs_tests/psm_reader/dia_psm_reader.ipynb
+++ b/nbs_tests/psm_reader/dia_psm_reader.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -32,7 +32,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -49,7 +49,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -76,7 +76,7 @@
" 'genes': ['Genes', 'Gene', 'GeneName', 'GeneNames']}"
]
},
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -101,7 +101,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -115,7 +115,7 @@
" 'FullUniModPeptideName']"
]
},
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -133,7 +133,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -155,7 +155,7 @@
" 'fdr': 'Q.Value'}"
]
},
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -180,7 +180,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -189,7 +189,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -291,7 +291,7 @@
"3 HLLNQAVGEEEVPK 14 1.000000 521.610617 "
]
},
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -314,7 +314,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -427,7 +427,7 @@
"2 MAP7 Acetyl@Protein_N-term;Phospho@S 0;4 11 1.0 371.282739 "
]
},
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -457,7 +457,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -484,7 +484,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -521,7 +521,7 @@
"
scan_num | \n",
" score | \n",
" fdr | \n",
- " spec_idx | \n",
+ " diann_spec_idx | \n",
" mods | \n",
" mod_sites | \n",
" nAA | \n",
@@ -890,21 +890,21 @@
"12 AAAAAAAPSGGGGGGEEERLEEK 3 7.28562 7.23794 7.33338 1.01500 \n",
"13 AAAAAAAPSGGGGGGEEERLEEK 3 7.26825 7.22055 7.31601 1.01208 \n",
"\n",
- " proteins uniprot_ids genes scan_num score fdr spec_idx \\\n",
- "0 P28482 MAPK1 11191 0.843331 0.006937 11190 \n",
- "1 P28482 MAPK1 11239 0.951820 0.001225 11238 \n",
- "2 Q9UH36 SRRD 30053 0.999997 0.000040 30052 \n",
- "3 Q9UH36 SRRD 30029 0.995505 0.000184 30028 \n",
- "4 Q9UH36 SRRD 30005 0.997286 0.000185 30004 \n",
- "5 Q9UH36 SRRD 29981 0.996593 0.000153 29980 \n",
- "6 Q96P70 IPO9 22187 0.999999 0.000040 22186 \n",
- "7 Q96P70 IPO9 22091 0.999996 0.000050 22090 \n",
- "8 Q96P70 IPO9 22067 0.999999 0.000061 22066 \n",
- "9 Q96P70 IPO9 21947 0.999997 0.000044 21946 \n",
- "10 P51608-2 MECP2 11077 0.998266 0.000142 11076 \n",
- "11 P51608-2 MECP2 11029 0.994097 0.000201 11028 \n",
- "12 P51608-2 MECP2 10981 0.999939 0.000070 10980 \n",
- "13 P51608-2 MECP2 10957 0.971834 0.000604 10956 \n",
+ " proteins uniprot_ids genes scan_num score fdr diann_spec_idx \\\n",
+ "0 P28482 MAPK1 11191 0.843331 0.006937 11190 \n",
+ "1 P28482 MAPK1 11239 0.951820 0.001225 11238 \n",
+ "2 Q9UH36 SRRD 30053 0.999997 0.000040 30052 \n",
+ "3 Q9UH36 SRRD 30029 0.995505 0.000184 30028 \n",
+ "4 Q9UH36 SRRD 30005 0.997286 0.000185 30004 \n",
+ "5 Q9UH36 SRRD 29981 0.996593 0.000153 29980 \n",
+ "6 Q96P70 IPO9 22187 0.999999 0.000040 22186 \n",
+ "7 Q96P70 IPO9 22091 0.999996 0.000050 22090 \n",
+ "8 Q96P70 IPO9 22067 0.999999 0.000061 22066 \n",
+ "9 Q96P70 IPO9 21947 0.999997 0.000044 21946 \n",
+ "10 P51608-2 MECP2 11077 0.998266 0.000142 11076 \n",
+ "11 P51608-2 MECP2 11029 0.994097 0.000201 11028 \n",
+ "12 P51608-2 MECP2 10981 0.999939 0.000070 10980 \n",
+ "13 P51608-2 MECP2 10957 0.971834 0.000604 10956 \n",
"\n",
" mods mod_sites nAA rt_norm precursor_mz \\\n",
"0 Acetyl@Any_N-term;Oxidation@M 0;12 14 0.372721 650.819344 \n",
@@ -939,7 +939,7 @@
"13 612.071553 "
]
},
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -986,36 +986,41 @@
" 'S(Phospho (S))',\n",
" 'S(Phospho (ST))',\n",
" 'S(Phospho (STY))',\n",
+ " 'S(Phospho (STYDH))',\n",
" 'S[ph]',\n",
" 'S[UniMod:21]',\n",
" 'S[Phospho (S)]',\n",
" 'S[Phospho (ST)]',\n",
- " 'S[Phospho (STY)]'])\n",
+ " 'S[Phospho (STY)]',\n",
+ " 'S[Phospho (STYDH)]'],\n",
+ " )\n",
"_df"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "['S[UniMod:21]',\n",
- " 'S[Phospho (S)]',\n",
+ "['S[ph]',\n",
+ " 'S[Phospho (STY)]',\n",
" 'S(Phospho (S))',\n",
+ " 'S[Phospho (STYDH)]',\n",
+ " 'S(ph)',\n",
+ " 'S(UniMod:21)',\n",
+ " 'S[Phospho (S)]',\n",
+ " 'S[Phospho (ST)]',\n",
" 'S(Phospho (ST))',\n",
- " 'S[Phospho (STY)]',\n",
+ " 'S[UniMod:21]',\n",
" 'pS',\n",
- " 'S(UniMod:21)',\n",
- " 'S(ph)',\n",
- " 'S[ph]',\n",
" 'S(Phospho (STY))',\n",
- " 'S[Phospho (ST)]']"
+ " 'S(Phospho (STYDH))']"
]
},
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -1026,9 +1031,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "ename": "AssertionError",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[13], line 25\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m np\u001b[38;5;241m.\u001b[39msum(\u001b[38;5;241m~\u001b[39mdiann_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmods\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39mcontains(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAcetyl@Any_N-term\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m4\u001b[39m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m np\u001b[38;5;241m.\u001b[39msum(diann_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmods\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39mcontains(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOxidation@M\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mset\u001b[39m(diann_reader\u001b[38;5;241m.\u001b[39mmodification_mapping[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPhospho@S\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;241m==\u001b[39m\u001b[38;5;28mset\u001b[39m([\n\u001b[1;32m 26\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpS\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 27\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(ph)\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(UniMod:21)\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (S))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (ST))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (STY))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 32\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[ph]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 33\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[UniMod:21]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 34\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (S)]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (ST)]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (STY)]\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
+ "\u001b[0;31mAssertionError\u001b[0m: "
+ ]
+ }
+ ],
"source": [
"tsv = StringIO('''File.Name\tRun\tProtein.Group\tProtein.Ids\tProtein.Names\tGenes\tPG.Quantity\tPG.Normalised\tPG.MaxLFQ\tGenes.Quantity\tGenes.Normalised\tGenes.MaxLFQ\tGenes.MaxLFQ.Unique\tModified.Sequence\tStripped.Sequence\tPrecursor.Id\tPrecursor.Charge\tQ.Value\tGlobal.Q.Value\tProtein.Q.Value\tPG.Q.Value\tGlobal.PG.Q.Value\tGG.Q.Value\tTranslated.Q.Value\tProteotypic\tPrecursor.Quantity\tPrecursor.Normalised\tPrecursor.Translated\tQuantity.Quality\tRT\tRT.Start\tRT.Stop\tiRT\tPredicted.RT\tPredicted.iRT\tLib.Q.Value\tMs1.Profile.Corr\tMs1.Area\tEvidence\tSpectrum.Similarity\tMass.Evidence\tCScore\tDecoy.Evidence\tDecoy.CScore\tFragment.Quant.Raw\tFragment.Quant.Corrected\tFragment.Correlations\tMS2.Scan\tIM\tiIM\tPredicted.IM\tPredicted.iIM\n",
"F:\\XXX\\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d\t20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636\tQ9UH36\tQ9UH36\t\tSRRD\t3296.49\t3428.89\t3428.89\t3296.49\t3428.89\t3428.89\t3428.89\t(UniMod:1)AAAAAAALESWQAAAPR\tAAAAAAALESWQAAAPR\t(UniMod:1)AAAAAAALESWQAAAPR2\t2\t3.99074e-05\t1.96448e-05\t0.000159821\t0.000159821\t0.000146135\t0.000161212\t0\t1\t3296.49\t3428.89\t3296.49\t0.852479\t19.9208\t19.8731\t19.9685\t123.9\t19.8266\t128.292\t0\t0.960106\t5308.05\t1.96902\t0.683134\t0.362287\t0.999997\t1.23691\t3.43242e-05\t1212.01;2178.03;1390.01;1020.01;714.008;778.008;\t1212.01;1351.73;887.591;432.92;216.728;732.751;\t0.956668;0.757581;0.670497;0.592489;0.47072;0.855203;\t30053\t1.19708\t1.19328\t1.19453\t1.19469\n",
@@ -1061,16 +1078,19 @@
" 'S(Phospho (S))',\n",
" 'S(Phospho (ST))',\n",
" 'S(Phospho (STY))',\n",
+ " 'S(Phospho (STYDH))',\n",
" 'S[ph]',\n",
" 'S[UniMod:21]',\n",
" 'S[Phospho (S)]',\n",
" 'S[Phospho (ST)]',\n",
- " 'S[Phospho (STY)]'])"
+ " 'S[Phospho (STY)]',\n",
+ " 'S[Phospho (STYDH)]'\n",
+ "])"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -1079,7 +1099,7 @@
"'Acetyl@Any_N-term'"
]
},
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -1108,6 +1128,18 @@
"display_name": "python3",
"language": "python",
"name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
}
},
"nbformat": 4,
diff --git a/nbs_tests/psm_reader/maxquant_reader.ipynb b/nbs_tests/psm_reader/maxquant_reader.ipynb
index 90c9c5f0..16b5f745 100644
--- a/nbs_tests/psm_reader/maxquant_reader.ipynb
+++ b/nbs_tests/psm_reader/maxquant_reader.ipynb
@@ -174,11 +174,14 @@
" 'S(Phospho (S))',\n",
" 'S(Phospho (ST))',\n",
" 'S(Phospho (STY))',\n",
+ " 'S(Phospho (STYDH))',\n",
" 'S[ph]',\n",
" 'S[UniMod:21]',\n",
" 'S[Phospho (S)]',\n",
" 'S[Phospho (ST)]',\n",
- " 'S[Phospho (STY)]'])"
+ " 'S[Phospho (STY)]',\n",
+ " 'S[Phospho (STYDH)]'\n",
+ "])"
]
},
{
diff --git a/nbs_tests/psm_reader/psm_reader.ipynb b/nbs_tests/psm_reader/psm_reader.ipynb
index 3af8cfb1..bb3481c1 100644
--- a/nbs_tests/psm_reader/psm_reader.ipynb
+++ b/nbs_tests/psm_reader/psm_reader.ipynb
@@ -211,11 +211,14 @@
" 'S(Phospho (S))',\n",
" 'S(Phospho (ST))',\n",
" 'S(Phospho (STY))',\n",
+ " 'S(Phospho (STYDH))',\n",
" 'S[ph]',\n",
" 'S[UniMod:21]',\n",
" 'S[Phospho (S)]',\n",
" 'S[Phospho (ST)]',\n",
- " 'S[Phospho (STY)]'])\n",
+ " 'S[Phospho (STY)]',\n",
+ " 'S[Phospho (STYDH)]'\n",
+ "])\n",
"try:\n",
" psm_reader_provider.get_reader_by_yaml(psm_reader_yaml['unknown'])\n",
"except Exception as e:\n",