diff --git a/external/popl/popl.hpp b/external/popl/popl.hpp
index 8fd668b..9cce497 100644
--- a/external/popl/popl.hpp
+++ b/external/popl/popl.hpp
@@ -3,9 +3,11 @@
     (  _ \ /  \(  _ \(  )
      ) __/(  O )) __// (_/\
     (__)   \__/(__)  \____/
-    version 1.3.0
+    version 1.4.0
     https://github.com/badaix/popl
 
+    modified by Zilong-Li Nov 03, 2024
+
     This file is part of popl (program options parser lib)
     Copyright (C) 2015-2021 Johannes Pohl
 
@@ -41,7 +43,7 @@
 namespace popl
 {
 
-#define POPL_VERSION "1.3.0"
+#define POPL_VERSION "1.4.0"
 
 
 /// Option's argument type
@@ -62,6 +64,7 @@ enum class Argument
 /**
  * inactive: Option is not set and will not be parsed
  * hidden:   Option is active, but will not show up in the help message
+ * headline: Option is a head line as start of a new section
  * required: Option must be set on the command line. Otherwise an exception will be thrown
  * optional: Option must not be set. Default attribute.
  * advanced: Option is advanced and will only show up in the advanced help message
@@ -69,8 +72,9 @@ enum class Argument
  */
 enum class Attribute
 {
-    inactive = 0,
-    hidden = 1,
+    inactive = -1,
+    hidden = 0,
+    headline = 1,
     required = 2,
     optional = 3,
     advanced = 4,
@@ -1183,7 +1187,10 @@ inline std::string ConsoleOptionPrinter::print(const Attribute& max_attribute) c
             optionStr.resize(optionRightMargin, ' ');
         else
             optionStr += "\n" + std::string(optionRightMargin, ' ');
-        s << optionStr;
+        if (option->attribute() != Attribute::headline)
+            s << optionStr;
+        else
+            s << "\n";
 
         std::string line;
         std::vector<std::string> lines;
diff --git a/src/Cmd.cpp b/src/Cmd.cpp
index 3dc7724..94594ea 100644
--- a/src/Cmd.cpp
+++ b/src/Cmd.cpp
@@ -5,10 +5,6 @@
  ******************************************************************************/
 
 #include "Cmd.hpp"
-
-#include <cstdlib>
-#include <iostream>
-
 #include "popl/popl.hpp"
 
 using namespace popl;
@@ -24,40 +20,64 @@ Param::Param(int argc, char **argv) {
                     "       use csv file as input and apply the Implicitly Restarted Arnoldi Method\n" +
                     "       PCAone --csv csv.zst --svd 0 \n" +
   "\033[0m\n"};
-  OptionParser opts(copyr + "Main options");
+  OptionParser opts(copyr + "General options");
   auto help_opt = opts.add<Switch>("h", "help", "print all options including hidden advanced options");
+  opts.add<Value<double>>("m", "memory", "RAM usage in GB unit for out-of-core mode. default is in-core mode", memory, &memory);
+  opts.add<Value<uint>>("n", "threads", "the number of threads to be used", threads, &threads);
+  opts.add<Switch>("v", "verbose", "verbose message output", &verbose);
+  opts.add<Value<std::string>, Attribute::headline>("","PCA","PCA algorithms:");
   auto svd_opt = opts.add<Value<uint>>("d", "svd", "SVD method to be applied. default 2 is recommended for big data.\n"
                                        "0: the Implicitly Restarted Arnoldi Method (IRAM)\n"
                                        "1: the Yu's single-pass Randomized SVD with power iterations\n"
                                        "2: the accurate window-based Randomized SVD method (PCAone)\n"
                                        "3: the full Singular Value Decomposition.", 2);
-  auto plinkfile = opts.add<Value<std::string>>("b", "bfile", "prefix to PLINK .bed/.bim/.fam files", "", &filein);
-  auto binfile = opts.add<Value<std::string>>("B", "binary", "path of binary file", "", &filein);
-  auto csvfile = opts.add<Value<std::string>>("c", "csv", "path of comma seperated CSV file compressed by zstd", "", &filein);
-  auto bgenfile = opts.add<Value<std::string>>("g", "bgen", "path of BGEN file compressed by gzip/zstd", "", &filein);
-  auto beaglefile = opts.add<Value<std::string>>("G", "beagle", "path of BEAGLE file compressed by gzip", "", &filein);
-  auto usvprefix = opts.add<Value<std::string>>("", "USV", "prefix to PCAone .eigvecs/.eigvals/.loadings/.mbim");
-  opts.add<Value<std::string>>("", "read-U", "path of file with left singular vectors (.eigvecs)", "", &fileU);
-  opts.add<Value<std::string>>("", "read-V", "path of file with right singular vectors (.loadings)", "", &fileV);
-  opts.add<Value<std::string>>("", "read-S", "path of file with eigen values (.eigvals)", "", &fileS);
   opts.add<Value<uint>>("k", "pc", "top k principal components (PCs) to be calculated", k, &k);
-  opts.add<Value<double>>("m", "memory", "RAM usage in GB unit for out-of-core mode. default is in-core mode", memory, &memory);
-  opts.add<Value<uint>>("n", "threads", "the number of threads to be used", threads, &threads);
-  opts.add<Value<std::string>>("o", "out", "prefix to output files. default [pcaone]", fileout, &fileout);
-  opts.add<Value<uint>>("p", "maxp", "maximum number of power iterations for RSVD algorithm", maxp, &maxp);
-  opts.add<Switch>("S", "no-shuffle", "do not shuffle columns of data for --svd 2 (if not locally correlated)", &noshuffle);
-  opts.add<Switch>("v", "verbose", "verbose message output", &verbose);
-  opts.add<Switch>("V", "printv", "output the right eigenvectors with suffix .loadings", &printv);
-  opts.add<Value<uint>, Attribute::advanced>("w", "batches", "the number of mini-batches used by --svd 2", bands, &bands);
   opts.add<Value<uint>>("C", "scale", "do scaling for input file.\n"
                         "0: do just centering\n"
                         "1: do log transformation eg. log(x+0.01) for RNA-seq data\n"
                         "2: do count per median log transformation (CPMED) for scRNAs",
                         scale,  &scale);
+  opts.add<Value<uint>>("p", "maxp", "maximum number of power iterations for RSVD algorithm", maxp, &maxp);
+  opts.add<Switch>("S", "no-shuffle", "do not shuffle columns of data for --svd 2 (if not locally correlated)", &noshuffle);
+  opts.add<Value<uint>, Attribute::advanced>("w", "batches", "the number of mini-batches used by --svd 2", bands, &bands);
   opts.add<Switch>("", "emu", "use EMU algorithm for genotype input with missingness", &emu);
   opts.add<Switch>("", "pcangsd", "use PCAngsd algorithm for genotype likelihood input", &pcangsd);
-  opts.add<Value<double>>("", "maf", "exclude variants with MAF lower than this value", maf, &maf);
+  opts.add<Value<uint>, Attribute::advanced>("", "M", "the number of features (eg. SNPs) if already known", 0, &nsnps);
+  opts.add<Value<uint>, Attribute::advanced>("", "N", "the number of samples if already known", 0, &nsamples);
+  // opts.add<Switch, Attribute::advanced>("", "debug", "turn on debugging mode", &debug);
+  opts.add<Value<uint>, Attribute::advanced>("", "buffer", "memory buffer in GB unit for permuting the data", buffer, &buffer);
+  opts.add<Value<uint>, Attribute::advanced>("", "imaxiter", "maximum number of IRAM iterations", imaxiter, &imaxiter);
+  opts.add<Value<double>, Attribute::advanced>("", "itol", "stopping tolerance for IRAM algorithm", itol, &itol);
+  opts.add<Value<uint>, Attribute::advanced>("", "ncv", "the number of Lanzcos basis vectors for IRAM", ncv, &ncv);
+  opts.add<Value<uint>, Attribute::advanced>("", "oversamples", "the number of oversampling columns for RSVD", oversamples, &oversamples);
+  opts.add<Value<uint>, Attribute::advanced>("", "rand", "the random matrix type. 0: uniform, 1: guassian", rand, &rand);
+  opts.add<Value<uint>, Attribute::advanced>("", "maxiter", "maximum number of EM iterations", maxiter, &maxiter);
+  opts.add<Value<double>, Attribute::advanced>("", "tol-rsvd", "tolerance for RSVD algorithm", tol, &tol);
+  opts.add<Value<double>, Attribute::advanced>("", "tol-em", "tolerance for EMU/PCAngsd algorithm", tolem, &tolem);
+  opts.add<Value<double>, Attribute::advanced>("", "tol-maf", "tolerance for MAF estimation by EM", tolmaf, &tolmaf);
+  opts.add<Switch, Attribute::hidden>("", "printu", "output eigen vector of each epoch (for tests)", &printu);
+  
+  opts.add<Value<std::string>, Attribute::headline>("","INPUT","Input options:");
+  auto plinkfile = opts.add<Value<std::string>>("b", "bfile", "prefix of PLINK .bed/.bim/.fam files", "", &filein);
+  opts.add<Switch, Attribute::advanced>("", "haploid", "the plink format represents haploid data", &haploid);
+  auto binfile = opts.add<Value<std::string>>("B", "binary", "path of binary file", "", &filein);
+  auto csvfile = opts.add<Value<std::string>>("c", "csv", "path of comma seperated CSV file compressed by zstd", "", &filein);
+  auto bgenfile = opts.add<Value<std::string>>("g", "bgen", "path of BGEN file compressed by gzip/zstd", "", &filein);
+  auto beaglefile = opts.add<Value<std::string>>("G", "beagle", "path of BEAGLE file compressed by gzip", "", &filein);
   opts.add<Value<std::string>>("", "match-bim", "the .mbim file to be matched, where the 7th column is allele frequency", "", &filebim);
+  auto usvprefix = opts.add<Value<std::string>>("", "USV", "prefix of PCAone .eigvecs/.eigvals/.loadings/.mbim");
+  opts.add<Value<std::string>, Attribute::advanced>("", "read-U", "path of file with left singular vectors (.eigvecs)", "", &fileU);
+  opts.add<Value<std::string>, Attribute::advanced>("", "read-V", "path of file with right singular vectors (.loadings)", "", &fileV);
+  opts.add<Value<std::string>, Attribute::advanced>("", "read-S", "path of file with eigen values (.eigvals)", "", &fileS);
+  
+  opts.add<Value<std::string>, Attribute::headline>("","OUTPUT","Output options:");
+  opts.add<Value<std::string>>("o", "out", "prefix of output files. default [pcaone]", fileout, &fileout);
+  opts.add<Switch>("V", "printv", "output the right eigenvectors with suffix .loadings", &printv);
+  opts.add<Switch>("", "ld", "output a binary matrix for downstream LD related analysis", &ld);
+  opts.add<Switch>("", "print-r2", "print LD r2 to *.ld.gz file for pairwise SNPs within a window", &print_r2);
+  
+  opts.add<Value<std::string>, Attribute::headline>("","MISC","Misc options:");
+  opts.add<Value<double>>("", "maf", "exclude variants with MAF lower than this value", maf, &maf);
   opts.add<Value<int>>("", "project", "project the new samples onto the existing PCs.\n"
                                       "0: disabled\n"
                                       "1: by multiplying the loadings with mean imputation for missing genotypes\n"
@@ -68,36 +88,20 @@ Param::Param(int argc, char **argv) {
                                       "0: disabled\n"
                                       "1: compute per-site inbreeding coefficient and HWE test\n",
                        inbreed, &inbreed);
-  opts.add<Switch>("", "ld", "output a binary matrix for downstream LD related analysis", &ld);
   opts.add<Value<double>>("", "ld-r2", "r2 cutoff for LD-based pruning. (usually 0.2)", ld_r2, &ld_r2);
   opts.add<Value<uint>>("", "ld-bp", "physical distance threshold in bases for LD. (usually 1000000)", ld_bp, &ld_bp);
   opts.add<Value<int>>("", "ld-stats", "statistics to calculate LD r2 for pairwise SNPs.\n"
                                        "0: the ancestry adjusted, i.e. correlation between residuals\n"
                                        "1: the standard, i.e. correlation between two alleles\n",
                        ld_stats, &ld_stats);
-  opts.add<Switch>("", "print-r2", "print LD r2 to *.ld.gz file for pairwise SNPs within a window", &print_r2);
   auto clumpfile = opts.add<Value<std::string>>("", "clump", "assoc-like file with target variants and pvalues for clumping", "", &clump);
   auto assocnames = opts.add<Value<std::string>>("", "clump-names", "column names in assoc-like file for locating chr, pos and pvalue", "CHR,BP,P", &assoc_colnames);
   opts.add<Value<double>>("", "clump-p1", "significance threshold for index SNPs", clump_p1, &clump_p1);
   opts.add<Value<double>>("", "clump-p2", "secondary significance threshold for clumped SNPs", clump_p2, &clump_p2);
   opts.add<Value<double>>("", "clump-r2", "r2 cutoff for LD-based clumping", clump_r2, &clump_r2);
   opts.add<Value<uint>>("", "clump-bp", "physical distance threshold in bases for clumping", clump_bp, &clump_bp);
-  opts.add<Switch, Attribute::advanced>("", "printu", "output eigen vector of each epoch (for tests)", &printu);
-  opts.add<Value<uint>, Attribute::advanced>("", "M", "the number of features (eg. SNPs) if already known", 0, &nsnps);
-  opts.add<Value<uint>, Attribute::advanced>("", "N", "the number of samples if already known", 0, &nsamples);
-  // opts.add<Switch, Attribute::advanced>("", "debug", "turn on debugging mode", &debug);
-  opts.add<Switch, Attribute::advanced>("", "haploid", "the plink format represents haploid data", &haploid);
-  opts.add<Value<uint>, Attribute::advanced>("", "buffer", "memory buffer in GB unit for permuting the data", buffer, &buffer);
-  opts.add<Value<uint>, Attribute::advanced>("", "imaxiter", "maximum number of IRAM iterations", imaxiter, &imaxiter);
-  opts.add<Value<double>, Attribute::advanced>("", "itol", "stopping tolerance for IRAM algorithm", itol, &itol);
-  opts.add<Value<uint>, Attribute::advanced>("", "ncv", "the number of Lanzcos basis vectors for IRAM", ncv, &ncv);
-  opts.add<Value<uint>, Attribute::advanced>("", "oversamples", "the number of oversampling columns for RSVD", oversamples, &oversamples);
-  opts.add<Value<uint>, Attribute::advanced>("", "rand", "the random matrix type. 0: uniform, 1: guassian", rand, &rand);
-  opts.add<Value<uint>, Attribute::advanced>("", "maxiter", "maximum number of EM iterations", maxiter, &maxiter);
-  opts.add<Value<double>, Attribute::advanced>("", "tol-rsvd", "tolerance for RSVD algorithm", tol, &tol);
-  opts.add<Value<double>, Attribute::advanced>("", "tol-em", "tolerance for EMU/PCAngsd algorithm", tolem, &tolem);
-  opts.add<Value<double>, Attribute::advanced>("", "tol-maf", "tolerance for MAF estimation by EM", tolmaf, &tolmaf);
   opts.add<Switch, Attribute::hidden>("", "groff", "print groff formatted help message", &groff);
+  
   // collect command line options acutal in effect
   ss << (std::string) "PCAone (v" + VERSION + ")    https://github.com/Zilong-Li/PCAone\n";
   ss << "Options in effect:\n";