diff --git a/aaanalysis/data_handling_pro/_backend/cd_hit.py b/aaanalysis/data_handling_pro/_backend/cd_hit.py
index 380cca63..af30aa83 100644
--- a/aaanalysis/data_handling_pro/_backend/cd_hit.py
+++ b/aaanalysis/data_handling_pro/_backend/cd_hit.py
@@ -1,4 +1,4 @@
-"""This is a script for the backend of the cd-hit method for the filtering_seq function."""
+"""This is a script for the backend of the CH-HIT method for the filter_seq() function."""
 import pandas as pd
 import os
 
@@ -64,7 +64,7 @@ def run_cd_hit(df_seq=None,
     file_in = os.path.join(temp_dir, f"_{result_prefix}_in")
     save_entries_to_fasta(df_seq=df_seq, file_path=file_in)
     file_out = os.path.join(temp_dir, f"_{result_prefix}_out")
-    # Create CD-hit command
+    # Create CD-HIT command
     if word_size is None:
         word_size = _select_word_size(st=similarity_threshold)
     cmd = ["cd-hit", "-i", file_in,
@@ -87,9 +87,9 @@ def run_cd_hit(df_seq=None,
     if sort_clusters:
         cmd.extend(["-sc", "1"])
 
-    # Run CD-Hit command
+    # Run CD-HIT command
     if verbose:
-        ut.print_out("Run CD-Hit filtering")
+        ut.print_out("Run CD-HIT filtering")
     run_command(cmd=cmd, verbose=verbose, temp_dir=temp_dir)
 
     # Convert CD-Hit output to clustering DataFrame
@@ -98,6 +98,3 @@ def run_cd_hit(df_seq=None,
     # Remove temporary file
     remove_temp(path=temp_dir)
     return df_clust
-
-
-
diff --git a/aaanalysis/data_handling_pro/_backend/mmseq2.py b/aaanalysis/data_handling_pro/_backend/mmseq2.py
index 38f2ac2e..0adbfffe 100644
--- a/aaanalysis/data_handling_pro/_backend/mmseq2.py
+++ b/aaanalysis/data_handling_pro/_backend/mmseq2.py
@@ -1,4 +1,4 @@
-"""This is a script for the backend of the MMseqs2 method for the filtering_seq function."""
+"""This is a script for the backend of the MMseqs2 method for the filter_seq() function."""
 import os
 import pandas as pd
 
diff --git a/aaanalysis/data_handling_pro/_filter_seq.py b/aaanalysis/data_handling_pro/_filter_seq.py
index 198bf5ec..087f63c1 100644
--- a/aaanalysis/data_handling_pro/_filter_seq.py
+++ b/aaanalysis/data_handling_pro/_filter_seq.py
@@ -1,5 +1,5 @@
 """
-This is a script for a wrapper function called filter_seq that provides an Python interface to the
+This is a script for a wrapper function called filter_seq that provides a Python interface to the
 redundancy-reduction algorithms CD-Hit and MMseqs2.
 """
 from typing import Optional, List, Literal
@@ -39,7 +39,7 @@ def filter_seq(df_seq: pd.DataFrame = None,
                verbose: bool = False
                ) -> pd.DataFrame:
     """
-    UNDER CONSTRUCTION: Redundancy reduction of sequences using clustering-based algorithms.
+    Redundancy reduction of sequences using clustering-based algorithms.
 
     This functions performs redundancy reduction of sequences by clustering and selecting representative sequences using
     the CD-HIT ([Li06]_) or MMseqs2 ([Steinegger17]_) algorithms locally. It allows for adjustable filtering strictness:
@@ -47,7 +47,7 @@ def filter_seq(df_seq: pd.DataFrame = None,
     * Strict filtering results in smaller, more homogeneous clusters, suitable when high sequence similarity is required.
     * Non-strict filtering creates larger, more diverse clusters, enhancing sequence representation.
 
-    CD-Hit and MMseq2 are standalone software tools, each requiring separate installation. CD-Hit is more
+    CD-HIT and MMseq2 are standalone software tools, each requiring separate installation. CD-Hit is more
     resource-efficient and easier to install, while MMseq2 is a larger multi-purpose tool. Pairwise sequence similarities
     for the MMseq2 clustering results were computed using the Biopython :class:`Bio.Align.PairwiseAligner` class.
 
@@ -64,7 +64,7 @@ def filter_seq(df_seq: pd.DataFrame = None,
     similarity_threshold : float, default=0.9
         Defines the minimum sequence identity [0.4-1.0] for clustering. Higher values increase strictness.
     word_size : int, optional
-        The size of the 'word' (in CD-Hit, [2-5]) or 'k-mer' (in MMseqs, [5-7]) used for the initial screening step in clustering.
+        The size of the 'word' (in CD-HIT, [2-5]) or 'k-mer' (in MMseqs, [5-7]) used for the initial screening step in clustering.
         Effect on strictness is dataset-dependent. If ``None``, optimized based on ``similarity_threshold`` (CD-Hit).
     global_identity : bool, default=True
         Whether to use global (True) or local (False) sequence identity for 'cd-hit' clustering. Global is stricter.
@@ -117,7 +117,8 @@ def filter_seq(df_seq: pd.DataFrame = None,
 
     Warnings
     --------
-    * CD-Hit and MMseq2 must be installed separately.
+    * CD-HIT and MMseq2 must be installed separately.
+    * CD-HIT is not available for Windows.
     * This function requires `biopython`, which is automatically installed via `pip install aaanalysis[pro]`.
 
     Examples
diff --git a/examples/data_handling/filter_seq.ipynb b/examples/data_handling/filter_seq.ipynb
index 2bfef3e7..e78b3008 100644
--- a/examples/data_handling/filter_seq.ipynb
+++ b/examples/data_handling/filter_seq.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "outputs": [],
    "source": [
     "import aaanalysis as aa\n",
@@ -22,8 +22,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-06-27T13:43:08.448843861Z",
-     "start_time": "2024-06-27T13:43:05.447608778Z"
+     "end_time": "2024-06-27T20:22:29.817583849Z",
+     "start_time": "2024-06-27T20:22:29.631806177Z"
     }
    },
    "id": "f6652f89954b8969"
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "outputs": [
     {
      "name": "stdout",
@@ -53,7 +53,7 @@
     {
      "data": {
       "text/plain": "<IPython.core.display.HTML object>",
-      "text/html": "<style type=\"text/css\">\n#T_d31f6 thead th {\n  background-color: white;\n  color: black;\n}\n#T_d31f6 tbody tr:nth-child(odd) {\n  background-color: #f2f2f2;\n}\n#T_d31f6 tbody tr:nth-child(even) {\n  background-color: white;\n}\n#T_d31f6 th {\n  padding: 5px;\n  white-space: nowrap;\n}\n#T_d31f6  td {\n  padding: 5px;\n  white-space: nowrap;\n}\n</style>\n<table id=\"T_d31f6\" style='display:block; max-height: 300px; max-width: 100%; overflow-x: auto; overflow-y: auto;'>\n  <thead>\n    <tr>\n      <th class=\"blank level0\" >&nbsp;</th>\n      <th id=\"T_d31f6_level0_col0\" class=\"col_heading level0 col0\" >entry</th>\n      <th id=\"T_d31f6_level0_col1\" class=\"col_heading level0 col1\" >cluster</th>\n      <th id=\"T_d31f6_level0_col2\" class=\"col_heading level0 col2\" >identity_with_rep</th>\n      <th id=\"T_d31f6_level0_col3\" class=\"col_heading level0 col3\" >is_representative</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th id=\"T_d31f6_level0_row0\" class=\"row_heading level0 row0\" >1</th>\n      <td id=\"T_d31f6_row0_col0\" class=\"data row0 col0\" >CAPSID_4457</td>\n      <td id=\"T_d31f6_row0_col1\" class=\"data row0 col1\" >0</td>\n      <td id=\"T_d31f6_row0_col2\" class=\"data row0 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row0_col3\" class=\"data row0 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row1\" class=\"row_heading level0 row1\" >2</th>\n      <td id=\"T_d31f6_row1_col0\" class=\"data row1 col0\" >CAPSID_4855</td>\n      <td id=\"T_d31f6_row1_col1\" class=\"data row1 col1\" >1</td>\n      <td id=\"T_d31f6_row1_col2\" class=\"data row1 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row1_col3\" class=\"data row1 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row2\" class=\"row_heading level0 row2\" >3</th>\n      <td id=\"T_d31f6_row2_col0\" class=\"data row2 col0\" >CAPSID_915</td>\n      <td id=\"T_d31f6_row2_col1\" class=\"data row2 col1\" >2</td>\n      <td id=\"T_d31f6_row2_col2\" class=\"data row2 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row2_col3\" class=\"data row2 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row3\" class=\"row_heading level0 row3\" >4</th>\n      <td id=\"T_d31f6_row3_col0\" class=\"data row3 col0\" >CAPSID_258</td>\n      <td id=\"T_d31f6_row3_col1\" class=\"data row3 col1\" >3</td>\n      <td id=\"T_d31f6_row3_col2\" class=\"data row3 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row3_col3\" class=\"data row3 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row4\" class=\"row_heading level0 row4\" >5</th>\n      <td id=\"T_d31f6_row4_col0\" class=\"data row4 col0\" >CAPSID_4301</td>\n      <td id=\"T_d31f6_row4_col1\" class=\"data row4 col1\" >4</td>\n      <td id=\"T_d31f6_row4_col2\" class=\"data row4 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row4_col3\" class=\"data row4 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row5\" class=\"row_heading level0 row5\" >6</th>\n      <td id=\"T_d31f6_row5_col0\" class=\"data row5 col0\" >CAPSID_4167</td>\n      <td id=\"T_d31f6_row5_col1\" class=\"data row5 col1\" >5</td>\n      <td id=\"T_d31f6_row5_col2\" class=\"data row5 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row5_col3\" class=\"data row5 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row6\" class=\"row_heading level0 row6\" >7</th>\n      <td id=\"T_d31f6_row6_col0\" class=\"data row6 col0\" >CAPSID_4574</td>\n      <td id=\"T_d31f6_row6_col1\" class=\"data row6 col1\" >6</td>\n      <td id=\"T_d31f6_row6_col2\" class=\"data row6 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row6_col3\" class=\"data row6 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row7\" class=\"row_heading level0 row7\" >8</th>\n      <td id=\"T_d31f6_row7_col0\" class=\"data row7 col0\" >CAPSID_4191</td>\n      <td id=\"T_d31f6_row7_col1\" class=\"data row7 col1\" >7</td>\n      <td id=\"T_d31f6_row7_col2\" class=\"data row7 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row7_col3\" class=\"data row7 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row8\" class=\"row_heading level0 row8\" >9</th>\n      <td id=\"T_d31f6_row8_col0\" class=\"data row8 col0\" >CAPSID_4951</td>\n      <td id=\"T_d31f6_row8_col1\" class=\"data row8 col1\" >8</td>\n      <td id=\"T_d31f6_row8_col2\" class=\"data row8 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row8_col3\" class=\"data row8 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_d31f6_level0_row9\" class=\"row_heading level0 row9\" >10</th>\n      <td id=\"T_d31f6_row9_col0\" class=\"data row9 col0\" >CAPSID_4380</td>\n      <td id=\"T_d31f6_row9_col1\" class=\"data row9 col1\" >9</td>\n      <td id=\"T_d31f6_row9_col2\" class=\"data row9 col2\" >100.000000</td>\n      <td id=\"T_d31f6_row9_col3\" class=\"data row9 col3\" >1</td>\n    </tr>\n  </tbody>\n</table>\n"
+      "text/html": "<style type=\"text/css\">\n#T_bc503 thead th {\n  background-color: white;\n  color: black;\n}\n#T_bc503 tbody tr:nth-child(odd) {\n  background-color: #f2f2f2;\n}\n#T_bc503 tbody tr:nth-child(even) {\n  background-color: white;\n}\n#T_bc503 th {\n  padding: 5px;\n  white-space: nowrap;\n}\n#T_bc503  td {\n  padding: 5px;\n  white-space: nowrap;\n}\n</style>\n<table id=\"T_bc503\" style='display:block; max-height: 300px; max-width: 100%; overflow-x: auto; overflow-y: auto;'>\n  <thead>\n    <tr>\n      <th class=\"blank level0\" >&nbsp;</th>\n      <th id=\"T_bc503_level0_col0\" class=\"col_heading level0 col0\" >entry</th>\n      <th id=\"T_bc503_level0_col1\" class=\"col_heading level0 col1\" >cluster</th>\n      <th id=\"T_bc503_level0_col2\" class=\"col_heading level0 col2\" >identity_with_rep</th>\n      <th id=\"T_bc503_level0_col3\" class=\"col_heading level0 col3\" >is_representative</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th id=\"T_bc503_level0_row0\" class=\"row_heading level0 row0\" >1991</th>\n      <td id=\"T_bc503_row0_col0\" class=\"data row0 col0\" >CAPSID_162</td>\n      <td id=\"T_bc503_row0_col1\" class=\"data row0 col1\" >1990</td>\n      <td id=\"T_bc503_row0_col2\" class=\"data row0 col2\" >100.000000</td>\n      <td id=\"T_bc503_row0_col3\" class=\"data row0 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row1\" class=\"row_heading level0 row1\" >1992</th>\n      <td id=\"T_bc503_row1_col0\" class=\"data row1 col0\" >CAPSID_4775</td>\n      <td id=\"T_bc503_row1_col1\" class=\"data row1 col1\" >1991</td>\n      <td id=\"T_bc503_row1_col2\" class=\"data row1 col2\" >100.000000</td>\n      <td id=\"T_bc503_row1_col3\" class=\"data row1 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row2\" class=\"row_heading level0 row2\" >1993</th>\n      <td id=\"T_bc503_row2_col0\" class=\"data row2 col0\" >CAPSID_5005</td>\n      <td id=\"T_bc503_row2_col1\" class=\"data row2 col1\" >1992</td>\n      <td id=\"T_bc503_row2_col2\" class=\"data row2 col2\" >100.000000</td>\n      <td id=\"T_bc503_row2_col3\" class=\"data row2 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row3\" class=\"row_heading level0 row3\" >1994</th>\n      <td id=\"T_bc503_row3_col0\" class=\"data row3 col0\" >CAPSID_4701</td>\n      <td id=\"T_bc503_row3_col1\" class=\"data row3 col1\" >1993</td>\n      <td id=\"T_bc503_row3_col2\" class=\"data row3 col2\" >100.000000</td>\n      <td id=\"T_bc503_row3_col3\" class=\"data row3 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row4\" class=\"row_heading level0 row4\" >1995</th>\n      <td id=\"T_bc503_row4_col0\" class=\"data row4 col0\" >CAPSID_4962</td>\n      <td id=\"T_bc503_row4_col1\" class=\"data row4 col1\" >1994</td>\n      <td id=\"T_bc503_row4_col2\" class=\"data row4 col2\" >100.000000</td>\n      <td id=\"T_bc503_row4_col3\" class=\"data row4 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row5\" class=\"row_heading level0 row5\" >1996</th>\n      <td id=\"T_bc503_row5_col0\" class=\"data row5 col0\" >CAPSID_4517</td>\n      <td id=\"T_bc503_row5_col1\" class=\"data row5 col1\" >1995</td>\n      <td id=\"T_bc503_row5_col2\" class=\"data row5 col2\" >100.000000</td>\n      <td id=\"T_bc503_row5_col3\" class=\"data row5 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row6\" class=\"row_heading level0 row6\" >1997</th>\n      <td id=\"T_bc503_row6_col0\" class=\"data row6 col0\" >CAPSID_4516</td>\n      <td id=\"T_bc503_row6_col1\" class=\"data row6 col1\" >1996</td>\n      <td id=\"T_bc503_row6_col2\" class=\"data row6 col2\" >100.000000</td>\n      <td id=\"T_bc503_row6_col3\" class=\"data row6 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row7\" class=\"row_heading level0 row7\" >1998</th>\n      <td id=\"T_bc503_row7_col0\" class=\"data row7 col0\" >CAPSID_4300</td>\n      <td id=\"T_bc503_row7_col1\" class=\"data row7 col1\" >1997</td>\n      <td id=\"T_bc503_row7_col2\" class=\"data row7 col2\" >100.000000</td>\n      <td id=\"T_bc503_row7_col3\" class=\"data row7 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row8\" class=\"row_heading level0 row8\" >1999</th>\n      <td id=\"T_bc503_row8_col0\" class=\"data row8 col0\" >CAPSID_4108</td>\n      <td id=\"T_bc503_row8_col1\" class=\"data row8 col1\" >1998</td>\n      <td id=\"T_bc503_row8_col2\" class=\"data row8 col2\" >100.000000</td>\n      <td id=\"T_bc503_row8_col3\" class=\"data row8 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_bc503_level0_row9\" class=\"row_heading level0 row9\" >2000</th>\n      <td id=\"T_bc503_row9_col0\" class=\"data row9 col0\" >CAPSID_4984</td>\n      <td id=\"T_bc503_row9_col1\" class=\"data row9 col1\" >1999</td>\n      <td id=\"T_bc503_row9_col2\" class=\"data row9 col2\" >100.000000</td>\n      <td id=\"T_bc503_row9_col3\" class=\"data row9 col3\" >1</td>\n    </tr>\n  </tbody>\n</table>\n"
      },
      "metadata": {},
      "output_type": "display_data"
@@ -67,11 +67,11 @@
     }
    ],
    "source": [
-    "# Filtering using cd-hit (default)\n",
+    "# Filtering using CD-HIT (default)\n",
     "df_clust = aa.filter_seq(df_seq=df_seq)\n",
     "n_clust = df_clust[\"cluster\"].nunique()\n",
     "print(f\"Number of CD-HIT clusters: {n_clust}\")\n",
-    "aa.display_df(df_clust, n_rows=10, show_shape=True)\n",
+    "aa.display_df(df_clust, n_rows=-10, show_shape=True)\n",
     "\n",
     "# Filtering using MMSeqs\n",
     "df_clust = aa.filter_seq(df_seq=df_seq, method=\"mmseqs\")\n",
@@ -81,8 +81,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-06-27T13:43:26.967403360Z",
-     "start_time": "2024-06-27T13:43:13.028873821Z"
+     "end_time": "2024-06-27T20:22:45.025340662Z",
+     "start_time": "2024-06-27T20:22:30.433050007Z"
     }
    },
    "id": "b9e4f7053030fd3f"
@@ -90,7 +90,51 @@
   {
    "cell_type": "markdown",
    "source": [
-    "The decrease the number of clusters, you can decrease the sequence ``similarity_threshold`` (default=0.9): "
+    "You can obtain a redundancy-reduced set of protein sequences by selecting the representative sequence of each cluster:"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "7a2d9bf2bf0b9de6"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DataFrame shape: (2000, 4)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "<style type=\"text/css\">\n#T_71982 thead th {\n  background-color: white;\n  color: black;\n}\n#T_71982 tbody tr:nth-child(odd) {\n  background-color: #f2f2f2;\n}\n#T_71982 tbody tr:nth-child(even) {\n  background-color: white;\n}\n#T_71982 th {\n  padding: 5px;\n  white-space: nowrap;\n}\n#T_71982  td {\n  padding: 5px;\n  white-space: nowrap;\n}\n</style>\n<table id=\"T_71982\" style='display:block; max-height: 300px; max-width: 100%; overflow-x: auto; overflow-y: auto;'>\n  <thead>\n    <tr>\n      <th class=\"blank level0\" >&nbsp;</th>\n      <th id=\"T_71982_level0_col0\" class=\"col_heading level0 col0\" >entry</th>\n      <th id=\"T_71982_level0_col1\" class=\"col_heading level0 col1\" >cluster</th>\n      <th id=\"T_71982_level0_col2\" class=\"col_heading level0 col2\" >identity_with_rep</th>\n      <th id=\"T_71982_level0_col3\" class=\"col_heading level0 col3\" >is_representative</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th id=\"T_71982_level0_row0\" class=\"row_heading level0 row0\" >1991</th>\n      <td id=\"T_71982_row0_col0\" class=\"data row0 col0\" >CAPSID_4774</td>\n      <td id=\"T_71982_row0_col1\" class=\"data row0 col1\" >1988</td>\n      <td id=\"T_71982_row0_col2\" class=\"data row0 col2\" >100.000000</td>\n      <td id=\"T_71982_row0_col3\" class=\"data row0 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row1\" class=\"row_heading level0 row1\" >1992</th>\n      <td id=\"T_71982_row1_col0\" class=\"data row1 col0\" >CAPSID_4806</td>\n      <td id=\"T_71982_row1_col1\" class=\"data row1 col1\" >1989</td>\n      <td id=\"T_71982_row1_col2\" class=\"data row1 col2\" >100.000000</td>\n      <td id=\"T_71982_row1_col3\" class=\"data row1 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row2\" class=\"row_heading level0 row2\" >1993</th>\n      <td id=\"T_71982_row2_col0\" class=\"data row2 col0\" >CAPSID_4839</td>\n      <td id=\"T_71982_row2_col1\" class=\"data row2 col1\" >1990</td>\n      <td id=\"T_71982_row2_col2\" class=\"data row2 col2\" >100.000000</td>\n      <td id=\"T_71982_row2_col3\" class=\"data row2 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row3\" class=\"row_heading level0 row3\" >1994</th>\n      <td id=\"T_71982_row3_col0\" class=\"data row3 col0\" >CAPSID_4871</td>\n      <td id=\"T_71982_row3_col1\" class=\"data row3 col1\" >1991</td>\n      <td id=\"T_71982_row3_col2\" class=\"data row3 col2\" >100.000000</td>\n      <td id=\"T_71982_row3_col3\" class=\"data row3 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row4\" class=\"row_heading level0 row4\" >1995</th>\n      <td id=\"T_71982_row4_col0\" class=\"data row4 col0\" >CAPSID_4904</td>\n      <td id=\"T_71982_row4_col1\" class=\"data row4 col1\" >1992</td>\n      <td id=\"T_71982_row4_col2\" class=\"data row4 col2\" >100.000000</td>\n      <td id=\"T_71982_row4_col3\" class=\"data row4 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row5\" class=\"row_heading level0 row5\" >1996</th>\n      <td id=\"T_71982_row5_col0\" class=\"data row5 col0\" >CAPSID_4936</td>\n      <td id=\"T_71982_row5_col1\" class=\"data row5 col1\" >1993</td>\n      <td id=\"T_71982_row5_col2\" class=\"data row5 col2\" >100.000000</td>\n      <td id=\"T_71982_row5_col3\" class=\"data row5 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row6\" class=\"row_heading level0 row6\" >1997</th>\n      <td id=\"T_71982_row6_col0\" class=\"data row6 col0\" >CAPSID_4968</td>\n      <td id=\"T_71982_row6_col1\" class=\"data row6 col1\" >1994</td>\n      <td id=\"T_71982_row6_col2\" class=\"data row6 col2\" >100.000000</td>\n      <td id=\"T_71982_row6_col3\" class=\"data row6 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row7\" class=\"row_heading level0 row7\" >1998</th>\n      <td id=\"T_71982_row7_col0\" class=\"data row7 col0\" >CAPSID_5002</td>\n      <td id=\"T_71982_row7_col1\" class=\"data row7 col1\" >1995</td>\n      <td id=\"T_71982_row7_col2\" class=\"data row7 col2\" >100.000000</td>\n      <td id=\"T_71982_row7_col3\" class=\"data row7 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row8\" class=\"row_heading level0 row8\" >1999</th>\n      <td id=\"T_71982_row8_col0\" class=\"data row8 col0\" >CAPSID_5037</td>\n      <td id=\"T_71982_row8_col1\" class=\"data row8 col1\" >1996</td>\n      <td id=\"T_71982_row8_col2\" class=\"data row8 col2\" >100.000000</td>\n      <td id=\"T_71982_row8_col3\" class=\"data row8 col3\" >1</td>\n    </tr>\n    <tr>\n      <th id=\"T_71982_level0_row9\" class=\"row_heading level0 row9\" >2000</th>\n      <td id=\"T_71982_row9_col0\" class=\"data row9 col0\" >CAPSID_5069</td>\n      <td id=\"T_71982_row9_col1\" class=\"data row9 col1\" >1997</td>\n      <td id=\"T_71982_row9_col2\" class=\"data row9 col2\" >100.000000</td>\n      <td id=\"T_71982_row9_col3\" class=\"data row9 col3\" >1</td>\n    </tr>\n  </tbody>\n</table>\n"
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Select redundancy-reduced sequences\n",
+    "df_selected = df_clust[df_clust[\"is_representative\"] == 1]\n",
+    "aa.display_df(df_clust, n_rows=-10, show_shape=True)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-06-27T20:22:45.110610021Z",
+     "start_time": "2024-06-27T20:22:45.020134758Z"
+    }
+   },
+   "id": "50a17ad119d34446"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "To reduce the number of clusters, you can decrease the sequence ``similarity_threshold`` (default=0.9): "
    ],
    "metadata": {
     "collapsed": false
@@ -99,7 +143,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
    "outputs": [
     {
      "name": "stdout",
@@ -124,8 +168,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-06-27T13:44:16.530809132Z",
-     "start_time": "2024-06-27T13:43:33.611598651Z"
+     "end_time": "2024-06-27T20:23:26.767419508Z",
+     "start_time": "2024-06-27T20:22:45.081053810Z"
     }
    },
    "id": "ef5135c86e164027"
@@ -142,7 +186,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
    "outputs": [
     {
      "name": "stdout",
@@ -160,8 +204,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-06-27T13:44:47.307324800Z",
-     "start_time": "2024-06-27T13:44:25.611748437Z"
+     "end_time": "2024-06-27T20:23:47.145309577Z",
+     "start_time": "2024-06-27T20:23:26.755456935Z"
     }
    },
    "id": "7d55920a4d8a2183"
@@ -178,7 +222,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
    "outputs": [
     {
      "name": "stdout",
@@ -197,8 +241,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-06-27T13:44:55.331701921Z",
-     "start_time": "2024-06-27T13:44:51.223695188Z"
+     "end_time": "2024-06-27T20:23:50.951949901Z",
+     "start_time": "2024-06-27T20:23:47.156300620Z"
     }
    },
    "id": "62d0351a3c25f270"
@@ -215,7 +259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 10,
    "outputs": [
     {
      "name": "stdout",
@@ -240,8 +284,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-06-27T13:45:17.853935207Z",
-     "start_time": "2024-06-27T13:45:02.144846759Z"
+     "end_time": "2024-06-27T20:24:06.633475548Z",
+     "start_time": "2024-06-27T20:23:50.959943351Z"
     }
    },
    "id": "5da69ca3ad47a131"
@@ -258,7 +302,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 11,
    "outputs": [],
    "source": [
     "# Sort sequences by clusters\n",
@@ -267,8 +311,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-06-27T13:45:25.181395528Z",
-     "start_time": "2024-06-27T13:45:24.395434611Z"
+     "end_time": "2024-06-27T20:24:07.190489991Z",
+     "start_time": "2024-06-27T20:24:06.648321841Z"
     }
    },
    "id": "ce1a5c401c42fe9d"
@@ -285,14 +329,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 12,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Time without multiprocessing: 0.8 seconds\n",
-      "Time with multiprocessing. 0.91 seconds\n"
+      "Time without multiprocessing: 0.54 seconds\n",
+      "Time with multiprocessing. 0.68 seconds\n"
      ]
     }
    ],
@@ -314,8 +358,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-06-27T13:45:27.414166029Z",
-     "start_time": "2024-06-27T13:45:25.691803391Z"
+     "end_time": "2024-06-27T20:24:08.416840033Z",
+     "start_time": "2024-06-27T20:24:07.208454665Z"
     }
    },
    "id": "87a6ef861d00ab12"
@@ -329,6 +373,16 @@
     "collapsed": false
    },
    "id": "9328b8f044d63fc8"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "d0993d1b5f9aa2b"
   }
  ],
  "metadata": {