diff --git a/nbs_tests/hla/hla_class1.ipynb b/nbs_tests/hla/hla_class1.ipynb
new file mode 100644
index 00000000..d0fa0eb3
--- /dev/null
+++ b/nbs_tests/hla/hla_class1.ipynb
@@ -0,0 +1,329 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -q pydivsufsort"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from peptdeep.hla.hla_class1 import HLA1_Binding_Classifier\n",
+    "\n",
+    "model = HLA1_Binding_Classifier()\n",
+    "model.load_pretrained_hla_model()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prot1 = 'MABCDEKFGHIJKLMNOPQRST'\n",
+    "prot2 = 'FGHIJKLMNOPQR'\n",
+    "protein_dict = {\n",
+    "    'xx': {\n",
+    "        'protein_id': 'xx',\n",
+    "        'gene_name': '',\n",
+    "        'sequence': prot1\n",
+    "    },\n",
+    "    'yy': {\n",
+    "        'protein_id': 'yy',\n",
+    "        'gene_name': 'gene',\n",
+    "        'sequence': prot2\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1/1 [00:00<00:00, 14.32it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_pos</th>\n",
+       "      <th>end_pos</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "      <th>sequence</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.124847</td>\n",
+       "      <td>MABCDEKF</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>14</td>\n",
+       "      <td>22</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.040122</td>\n",
+       "      <td>LMNOPQRS</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>13</td>\n",
+       "      <td>21</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.674667</td>\n",
+       "      <td>KLMNOPQR</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>12</td>\n",
+       "      <td>20</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.119722</td>\n",
+       "      <td>JKLMNOPQ</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>11</td>\n",
+       "      <td>19</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.104152</td>\n",
+       "      <td>IJKLMNOP</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>79</th>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.163758</td>\n",
+       "      <td>DEKFGHIJKLMNOP</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>80</th>\n",
+       "      <td>4</td>\n",
+       "      <td>18</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.000618</td>\n",
+       "      <td>CDEKFGHIJKLMNO</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>81</th>\n",
+       "      <td>3</td>\n",
+       "      <td>17</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.000773</td>\n",
+       "      <td>BCDEKFGHIJKLMN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>82</th>\n",
+       "      <td>9</td>\n",
+       "      <td>23</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.525840</td>\n",
+       "      <td>GHIJKLMNOPQRST</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>83</th>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.156962</td>\n",
+       "      <td>EKFGHIJKLMNOPQ</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>84 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    start_pos  end_pos  nAA  HLA_prob_pred        sequence\n",
+       "0           1        9    8       0.124847        MABCDEKF\n",
+       "1          14       22    8       0.040122        LMNOPQRS\n",
+       "2          13       21    8       0.674667        KLMNOPQR\n",
+       "3          12       20    8       0.119722        JKLMNOPQ\n",
+       "4          11       19    8       0.104152        IJKLMNOP\n",
+       "..        ...      ...  ...            ...             ...\n",
+       "79          5       19   14       0.163758  DEKFGHIJKLMNOP\n",
+       "80          4       18   14       0.000618  CDEKFGHIJKLMNO\n",
+       "81          3       17   14       0.000773  BCDEKFGHIJKLMN\n",
+       "82          9       23   14       0.525840  GHIJKLMNOPQRST\n",
+       "83          6       20   14       0.156962  EKFGHIJKLMNOPQ\n",
+       "\n",
+       "[84 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.predict_from_proteins(protein_data=protein_dict, prob_threshold=0.0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sequence</th>\n",
+       "      <th>nAA</th>\n",
+       "      <th>HLA_prob_pred</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>MABCDEKF</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.124847</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>KLMNOPQR</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0.674667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>DEKFGHIJKLMNOP</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.163758</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         sequence  nAA  HLA_prob_pred\n",
+       "0        MABCDEKF    8       0.124847\n",
+       "1        KLMNOPQR    8       0.674667\n",
+       "2  DEKFGHIJKLMNOP   14       0.163758"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "peptide_df = pd.DataFrame({\n",
+    "    \"sequence\": [\"MABCDEKF\",\"KLMNOPQR\",\"DEKFGHIJKLMNOP\"]\n",
+    "})\n",
+    "model.predict_peptide_df_(peptide_df=peptide_df)\n",
+    "peptide_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/peptdeep/constants/default_settings.yaml b/peptdeep/constants/default_settings.yaml
index 29ddb833..d01acbc2 100644
--- a/peptdeep/constants/default_settings.yaml
+++ b/peptdeep/constants/default_settings.yaml
@@ -10,9 +10,11 @@ model:
 PEPTDEEP_HOME: "~/peptdeep" # ~ refers to user folder (e.g. C:/Users/username)
 
 local_model_zip_name: "pretrained_models.zip"
+local_hla_model_zip_name: "hla_model.zip"
 
 # overwritable config
 model_url: "https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/pretrained_models.zip"
+hla_model_url: "https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/hla_model.zip"
 
 task_workflow: [library]
 task_choices:
diff --git a/peptdeep/hla/__init__.py b/peptdeep/hla/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/peptdeep/hla/hla_class1.py b/peptdeep/hla/hla_class1.py
new file mode 100644
index 00000000..3a9f5789
--- /dev/null
+++ b/peptdeep/hla/hla_class1.py
@@ -0,0 +1,392 @@
+import os
+import torch
+import pandas as pd
+import tqdm
+
+from typing import Union
+
+import peptdeep.model.building_block as building_block
+from peptdeep.model.model_interface import ModelInterface
+from peptdeep.model.featurize import get_ascii_indices
+from peptdeep.pretrained_models import pretrain_dir, download_models, global_settings
+
+from .hla_utils import (
+    get_random_sequences,
+    load_prot_df,
+    cat_proteins,
+    get_seq_series,
+    nonspecific_digest_cat_proteins,
+)
+
+
+class HLA_Class_I_LSTM(torch.nn.Module):
+    """
+    HLA-I-binding peptide prediction model using LSTM.
+    """
+
+    def __init__(
+        self,
+        *,
+        hidden_dim=256,
+        input_dim=128,
+        n_lstm_layers=4,
+        dropout=0.1,
+    ):
+        """
+        Parameters
+        ----------
+        hidden_dim : int, optional
+            hidden dimension, by default 256
+        input_dim : int, optional
+            input dimension, by default 128 (ASCII)
+        n_lstm_layers : int, optional
+            number of LSTM layers, by default 4
+        dropout : float, optional
+            dropout rate, by default 0.1
+        """
+        super().__init__()
+        self.dropout = torch.nn.Dropout(dropout)
+
+        self.nn = torch.nn.Sequential(
+            torch.nn.Embedding(input_dim, hidden_dim // 4),
+            building_block.SeqCNN(hidden_dim // 4),
+            self.dropout,
+            building_block.SeqLSTM(hidden_dim, hidden_dim, rnn_layer=n_lstm_layers),
+            building_block.SeqAttentionSum(hidden_dim),
+            self.dropout,
+            torch.nn.Linear(hidden_dim, 64),
+            torch.nn.GELU(),
+            torch.nn.Linear(64, 1),
+            torch.nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        return self.nn(x).squeeze(-1)
+
+
+class HLA_Class_I_Bert(torch.nn.Module):
+    """
+    Model based on a transformer Architecture from
+    Huggingface's BertEncoder class.
+    """
+
+    def __init__(
+        self,
+        nlayers=4,
+        input_dim=128,
+        hidden_dim=256,
+        output_attentions=False,
+        dropout=0.1,
+        **kwargs,
+    ):
+        super().__init__()
+
+        self.dropout = torch.nn.Dropout(dropout)
+
+        self.input_nn = torch.nn.Sequential(
+            torch.nn.Embedding(input_dim, hidden_dim),
+            building_block.PositionalEncoding(hidden_dim),
+        )
+
+        self._output_attentions = output_attentions
+
+        self.hidden_nn = building_block.Hidden_HFace_Transformer(
+            hidden_dim,
+            nlayers=nlayers,
+            dropout=dropout,
+            output_attentions=output_attentions,
+        )
+
+        self.output_nn = torch.nn.Sequential(
+            building_block.SeqAttentionSum(hidden_dim),
+            torch.nn.PReLU(),
+            self.dropout,
+            torch.nn.Linear(hidden_dim, 1),
+            torch.nn.Sigmoid(),
+        )
+
+    @property
+    def output_attentions(self):
+        return self._output_attentions
+
+    @output_attentions.setter
+    def output_attentions(self, val: bool):
+        self._output_attentions = val
+        self.hidden_nn.output_attentions = val
+
+    def forward(self, x):
+        x = self.dropout(self.input_nn(x))
+
+        x = self.hidden_nn(x)
+        if self.output_attentions:
+            self.attentions = x[1]
+        else:
+            self.attentions = None
+        x = self.dropout(x[0])
+
+        return self.output_nn(x).squeeze(1)
+
+
+class HLA1_Binding_Classifier(ModelInterface):
+    """
+    Class to predict HLA-binding probabilities of peptides.
+    """
+
+    _model_zip_name = global_settings["local_hla_model_zip_name"]
+    _model_url = global_settings["hla_model_url"]
+    _model_zip = os.path.join(pretrain_dir, _model_zip_name)
+
+    def __init__(
+        self,
+        dropout: float = 0.1,
+        model_class: type = HLA_Class_I_LSTM,  # model defined above
+        device: str = "gpu",
+        min_peptide_length: int = 8,
+        max_peptide_length: int = 14,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        dropout : float, optional
+            dropout rate of the model, by default 0.1
+        model_class : torch.nn.Module, optional
+            The model class type, can be :class:`HLA_Class_I_LSTM` or
+            :class:`HLA_Class_I_Bert`, by default :class:`HLA_Class_I_LSTM`
+        min_peptide_length : int, optional
+            minimal peptide length after digestion, by default 8
+        max_peptide_length : int, optional
+            maximal peptide length after digestion, by default 14
+        """
+        super().__init__(device=device)
+        self.build(model_class, dropout=dropout, **kwargs)
+        self.loss_func = torch.nn.BCELoss()
+        self.target_column_to_predict = "HLA_prob_pred"
+        self.min_peptide_length = min_peptide_length
+        self.max_peptide_length = max_peptide_length
+        self._n_neg_per_pos_training = 1
+
+        self.predict_batch_size = 4096
+
+    def _prepare_predict_data_df(
+        self,
+        precursor_df: pd.DataFrame,
+    ):
+        """
+        Prepare the predicting data from `precursor_df`.
+
+        Parameters
+        ----------
+        precursor_df : pd.DataFrame
+            The dataframe to predict.
+        """
+        self.__training = False
+        precursor_df[self.target_column_to_predict] = 0.0
+        self.predict_df = precursor_df
+
+    def _prepare_train_data_df(self, precursor_df: pd.DataFrame, **kwargs):
+        """
+        Prepare data for training from precursor_df.
+
+        Parameters
+        ----------
+        precursor_df : pd.DataFrame
+            The dataframe for training.
+        """
+        self.__training = True
+        precursor_df["nAA"] = precursor_df.sequence.str.len()
+        precursor_df.drop(
+            index=precursor_df[
+                (precursor_df.nAA < self.min_peptide_length)
+                | (precursor_df.nAA > self.max_peptide_length)
+            ].index,
+            inplace=True,
+        )
+        precursor_df.reset_index(inplace=True, drop=True)
+
+    def _get_features_from_batch_df(
+        self,
+        batch_df: pd.DataFrame,
+        **kwargs,
+    ) -> torch.LongTensor:
+        """
+        Convert AA sequences to tokens, which are `torch.LongTensor` of AA ASCII code array.
+
+        Parameters
+        ----------
+        batch_df : pd.DataFrame
+            The batch dataframe containing the `sequence` column.
+            All sequences in batch_df are treated as positive.
+            When training, negative sequences are sampled from self.protein_df.
+
+        Returns
+        -------
+        torch.LongTensor
+            The ASCII tokens of AA sequences.
+        """
+        aa_indices = self._as_tensor(
+            get_ascii_indices(batch_df["sequence"].values.astype("U")), dtype=torch.long
+        )
+
+        if self.__training:
+            rnd_seqs = get_random_sequences(
+                self.protein_df,
+                n=int(len(batch_df) * self._n_neg_per_pos_training),
+                pep_len=batch_df.nAA.values[0],
+            )
+            aa_indices = torch.cat(
+                [
+                    aa_indices,
+                    self._as_tensor(get_ascii_indices(rnd_seqs), dtype=torch.long),
+                ],
+                axis=0,
+            )
+
+        return aa_indices
+
+    def _get_targets_from_batch_df(
+        self, batch_df: pd.DataFrame, **kwargs
+    ) -> torch.Tensor:
+        """
+        Get target (y) value for training from batch_df.
+
+        Parameters
+        ----------
+        batch_df : pd.DataFrame
+            All sequences in batch_df are positive.
+            Random sequences are negative.
+
+        Returns
+        -------
+        torch.Tensor
+            Tensor with 0-1 binary values.
+        """
+        x = torch.zeros(
+            len(batch_df)
+            + (
+                int(len(batch_df) * self._n_neg_per_pos_training)
+                if self.__training
+                else 0
+            ),
+            device=self.device,
+        )
+        x[: len(batch_df)] = 1
+        return x
+
+    def load_proteins(
+        self,
+        protein_data: Union[pd.DataFrame, str, list, dict],
+    ):
+        """
+        Load proteins, and generate :attr:`protein_df` and
+        :attr:`_cat_protein_sequence` in this object.
+
+        Parameters
+        ----------
+        protein_data : pd.DataFrame | str | list | dict
+            pd.DataFrame: protein_df with a `sequence` column
+            str : absolute or relative fasta file path
+            list: list of fasta file path
+            dict: protein dict structure
+        """
+
+        if isinstance(protein_data, pd.DataFrame):
+            self.protein_df = protein_data
+            self._cat_protein_sequence = cat_proteins(
+                self.protein_df["sequence"].to_numpy()
+            )
+        else:
+            self.protein_df = load_prot_df(protein_data)
+            self._cat_protein_sequence = cat_proteins(
+                self.protein_df["sequence"].to_numpy()
+            )
+
+    def _digest_proteins(self):
+        """
+        Unspecific digestion of proteins generates :attr:`digested_idxes_df`.
+        """
+        self.digested_idxes_df = nonspecific_digest_cat_proteins(
+            self._cat_protein_sequence, self.min_peptide_length, self.max_peptide_length
+        )
+
+    def _predict_all_probs(self, digest_batch_size: int):
+        """
+        Predict probabilities for self.digested_idxes_df.
+        """
+        for i in tqdm.tqdm(range(0, len(self.digested_idxes_df), digest_batch_size)):
+            _df = self.digested_idxes_df.iloc[i : i + digest_batch_size]
+            seq_df = get_seq_series(_df, self._cat_protein_sequence).to_frame(
+                "sequence"
+            )
+            seq_df["nAA"] = _df.nAA
+            self.predict(seq_df, batch_size=self.predict_batch_size)
+            self.digested_idxes_df[self.target_column_to_predict].values[
+                i : i + digest_batch_size
+            ] = seq_df[self.target_column_to_predict]
+
+    def predict_peptide_df_(
+        self,
+        peptide_df: pd.DataFrame,
+    ):
+        """
+        Predict HLA probabilities for the given peptide dataframe
+        Probabilities are predicted inplace in `peptide_df` with
+        the predicted `HLA_prob_pred` column.
+
+        Parameters
+        ----------
+        peptide_df : pd.DataFrame
+            peptide dataframe with `sequence` column.
+        """
+        peptide_df = self.predict(peptide_df, batch_size=self.predict_batch_size)
+
+    def predict_from_proteins(
+        self,
+        protein_data: Union[pd.DataFrame, str, list, dict],
+        prob_threshold: float = 0.7,
+        digest_batch_size: int = 1024000,
+    ) -> pd.DataFrame:
+        """
+        Digest peptides from :attr:`protein_df`.
+
+        Parameters
+        ----------
+        protein_data : pd.DataFrame | str | list | dict
+            pd.DataFrame: protein_df with a `sequence` column
+            str : absolute or relative fasta file path
+            list: list of fasta file path
+            dict: protein dict structure
+        prob_threshold : float, optional
+            Peptides above this probability are kept, by default 0.7
+        digest_batch_size : int, optional
+            Batch size for digestion, by default 1024000
+
+        Returns
+        -------
+        pd.DataFrame
+            The peptide dataframe in alphabase format.
+        """
+        self.load_proteins(protein_data=protein_data)
+
+        self._digest_proteins()
+        self.digested_idxes_df[self.target_column_to_predict] = 0.0
+
+        self._predict_all_probs(digest_batch_size)
+
+        peptide_df = self.digested_idxes_df[
+            self.digested_idxes_df[self.target_column_to_predict] >= prob_threshold
+        ].reset_index(drop=True)
+
+        peptide_df["sequence"] = get_seq_series(peptide_df, self._cat_protein_sequence)
+        return peptide_df
+
+    def _download_pretrained_hla_model(self):
+        download_models(url=self._model_url, target_path=self._model_zip)
+
+    def load_pretrained_hla_model(self):
+        """
+        Load pretrained `HLA1_IEDB.pt` model.
+        """
+        if not os.path.exists(self._model_zip):
+            self._download_pretrained_hla_model()
+        self.load(model_file=self._model_zip, model_path_in_zip="HLA1_IEDB.pt")
diff --git a/peptdeep/hla/hla_utils.py b/peptdeep/hla/hla_utils.py
new file mode 100644
index 00000000..d74d36b0
--- /dev/null
+++ b/peptdeep/hla/hla_utils.py
@@ -0,0 +1,187 @@
+import pandas as pd
+import numpy as np
+import numba
+
+import os
+
+from typing import Union, List
+
+from alphabase.protein.lcp_digest import get_substring_indices
+
+from alphabase.protein.fasta import load_all_proteins
+
+
+def load_prot_df(
+    protein_data: Union[str, list, tuple, set, dict],
+) -> pd.DataFrame:
+    """
+    Load protein dataframe from input protein_data.
+
+    Parameters
+    ----------
+    protein_data : Union[str,list,tuple,set,dict]
+        str: fasta file
+        list (tuple, or set): a list of fasta files
+        dict: protein dict
+
+    Returns
+    -------
+    pd.DataFrame
+        protein dataframe
+
+    Raises
+    ------
+    TypeError
+        protein_data type is not one of str, list, tuple, set, or dict.
+    """
+    if isinstance(protein_data, str):
+        protein_dict = load_all_proteins([protein_data])
+    elif isinstance(protein_data, (list, tuple, set)):
+        protein_dict = load_all_proteins(protein_data)
+    elif isinstance(protein_data, dict):
+        protein_dict = protein_data
+    else:
+        raise TypeError(
+            "`protein_data` must be str, list, tuple, set or dict, "
+            f"`{type(protein_data)}` is given."
+        )
+    prot_df = pd.DataFrame().from_dict(protein_dict, orient="index")
+    prot_df["nAA"] = prot_df.sequence.str.len()
+    return prot_df
+
+
+def cat_proteins(sequences: List[str], sep: str = "$") -> str:
+    """
+    Concatenate protein sequences in `prot_df` into a single sequence.
+
+    Parameters
+    ----------
+    sequences : list
+        List-like sequence list.
+    sep : str, optional
+        Separater of the concat string, by default '$'
+
+    Returns
+    -------
+    str
+        The concat protein sequence.
+
+    Example
+    -------
+    >>> sequences = ["ABC","DEF"]
+    >>> cat_proteins(sequences, sep="$")
+    '$ABC$DEF$'
+    """
+    return sep + sep.join(sequences) + sep
+
+
+def nonspecific_digest_cat_proteins(
+    cat_sequence: str, min_len: int, max_len: int
+) -> pd.DataFrame:
+    """
+    Digest the concat protein sequence to non-specific peptides.
+
+    Parameters
+    ----------
+    cat_sequence : str
+        The concat protein sequence generated by :func:`cat_proteins`
+    min_len : int
+        Min peptide length
+    max_len : int
+        Max peptide length
+
+    Returns
+    -------
+    pd.DataFrame
+        A dataframe sorted by `nAA` with three columns:
+        `start_pos`: the start index of the peptide in cat_protein
+        `end_pos`: the stop/end index of the peptide in cat_protein
+        `nAA`: the number of amino acids (peptide length).
+    """
+    pos_starts, pos_ends = get_substring_indices(cat_sequence, min_len, max_len)
+    digest_df = pd.DataFrame(dict(start_pos=pos_starts, end_pos=pos_ends))
+    digest_df["nAA"] = digest_df.end_pos - digest_df.start_pos
+    digest_df.sort_values("nAA", inplace=True)
+    digest_df.reset_index(inplace=True, drop=True)
+    return digest_df
+
+
+def _get_rnd_subseq(prot_seq_len: tuple, pep_len: int) -> str:
+    """
+    Get random subsequence from a protein sequence.
+    This function is only used by :func:`get_random_sequences`.
+
+    Parameters
+    ----------
+    prot_seq_len : tuple
+        (protein sequence, sequence length)
+    pep_len : int
+        peptide length to get
+
+    Returns
+    -------
+    str
+        The peptide sequence.
+    """
+    sequence, prot_len = prot_seq_len
+    if prot_len <= pep_len:
+        return (
+            "".join([sequence] * (pep_len // prot_len)) + sequence[: pep_len % prot_len]
+        )
+    start = np.random.randint(0, prot_len - pep_len)
+    return sequence[start : start + pep_len]
+
+
+def get_random_sequences(prot_df: pd.DataFrame, n: int, pep_len: int):
+    """
+    Random peptide sampling from proteins
+    """
+    return (
+        prot_df.sample(n, replace=True, weights="nAA")[["sequence", "nAA"]]
+        .apply(_get_rnd_subseq, pep_len=pep_len, axis=1)
+        .values.astype("U")
+    )
+
+
+@numba.njit
+def _check_sty(seq: str) -> bool:
+    """
+    If a sequence contains STY.
+    """
+    for aa in seq:
+        if aa in "STY":
+            return True
+    return False
+
+
+def get_seq_series(idxes_df: pd.DataFrame, cat_prot: str) -> pd.Series:
+    """
+    Get sub-sequence pd.Series from a concat protein sequence based on `idxes_df`.
+
+    Parameters
+    ----------
+    idxes_df : pd.DataFrame
+        a dataframe with `start_pos` and `stop_pos` columns of `cat_prot`.
+    cat_prot : str
+        The concat protein sequence.
+
+    Returns
+    -------
+    pd.Series
+        pd.Series with sub-sequences (peptide sequences).
+    """
+    return idxes_df[["start_pos", "end_pos"]].apply(
+        lambda x: cat_prot[slice(*x)], axis=1
+    )
+
+
+def check_is_file(file_path: str):
+    """
+    Check if a file_path exists.
+    """
+    if os.path.isfile(file_path):
+        print(f"Loading `{file_path}`")
+        return True
+    else:
+        print(f"`{file_path}` does not exist, ignore it.")
+        return False
diff --git a/peptdeep/pretrained_models.py b/peptdeep/pretrained_models.py
index 1e82a240..3d6f8ef2 100644
--- a/peptdeep/pretrained_models.py
+++ b/peptdeep/pretrained_models.py
@@ -64,13 +64,17 @@ def is_model_zip(downloaded_zip):
         return any(x == "generic/ms2.pth" for x in zip.namelist())
 
 
-def download_models(url: str = model_url, overwrite=True):
+def download_models(url: str = model_url, target_path: str = model_zip, overwrite=True):
     """
     Parameters
     ----------
     url : str, optional
         Remote or local path.
-        Defaults to `peptdeep.pretrained_models.model_url`
+        Defaults to :data:`peptdeep.pretrained_models.model_url`
+
+    target_path : str, optional
+        Target file path after download.
+        Defaults to :data:`peptdeep.pretrained_models.model_zip`
 
     overwrite : bool, optional
         overwirte old model files.
@@ -82,13 +86,13 @@ def download_models(url: str = model_url, overwrite=True):
         If remote url is not accessible.
     """
     if not os.path.isfile(url):
-        logging.info(f"Downloading {model_zip_name} ...")
+        logging.info(f"Downloading {url} ...")
         try:
             context = ssl._create_unverified_context()
             requests = urllib.request.urlopen(url, context=context, timeout=10)
-            with open(model_zip, "wb") as f:
+            with open(target_path, "wb") as f:
                 f.write(requests.read())
-        except (socket.timeout, urllib.error.URLError, urllib.error.HTTPError) as e:
+        except (socket.timeout, urllib.error.URLError, urllib.error.HTTPError):
             raise FileNotFoundError(
                 "Downloading model failed! Please download the "
                 f'zip or tar file by yourself from "{url}",'
@@ -97,8 +101,8 @@ def download_models(url: str = model_url, overwrite=True):
                 " to install the models"
             )
     else:
-        shutil.copy(url, model_zip)
-    logging.info(f"The pretrained models had been downloaded in {model_zip}")
+        shutil.copy(url, target_path)
+    logging.info(f"The pretrained models had been downloaded in {target_path}")
 
 
 if not os.path.exists(model_zip):
@@ -486,7 +490,7 @@ def _load_file(model, model_file):
                         return
                 else:
                     model.load(model_file)
-            except (UnpicklingError, TypeError, ValueError, KeyError) as e:
+            except (UnpicklingError, TypeError, ValueError, KeyError):
                 logging.info(
                     f"Cannot load {model_file} as {model.__class__} model, peptdeep will use the pretrained model instead."
                 )
diff --git a/requirements/requirements_hla.txt b/requirements/requirements_hla.txt
new file mode 100644
index 00000000..f6012f1f
--- /dev/null
+++ b/requirements/requirements_hla.txt
@@ -0,0 +1 @@
+pydivsufsort # used by alphabase.protein.lcp_digest

	start_pos	end_pos	nAA	HLA_prob_pred	sequence
0	1	9	8	0.124847	MABCDEKF
1	14	22	8	0.040122	LMNOPQRS
2	13	21	8	0.674667	KLMNOPQR
3	12	20	8	0.119722	JKLMNOPQ
4	11	19	8	0.104152	IJKLMNOP
...	...	...	...	...	...
79	5	19	14	0.163758	DEKFGHIJKLMNOP
80	4	18	14	0.000618	CDEKFGHIJKLMNO
81	3	17	14	0.000773	BCDEKFGHIJKLMN
82	9	23	14	0.525840	GHIJKLMNOPQRST
83	6	20	14	0.156962	EKFGHIJKLMNOPQ