From 0ab4e236da8c7c63198c9e5a87d2e5e44c18b856 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 06:29:07 -0500
Subject: [PATCH 01/31] Re-run using `0.0.7` (#46)

---
 examples/bib_example.ipynb | 444 ++++++++++++++++++-------------------
 1 file changed, 211 insertions(+), 233 deletions(-)
diff --git a/examples/bib_example.ipynb b/examples/bib_example.ipynb
index b86222a..d72479e 100644
--- a/examples/bib_example.ipynb
+++ b/examples/bib_example.ipynb
@@ -22,7 +22,7 @@
     {
      "data": {
       "text/plain": [
-       "'0.0.4'"
+       "'0.0.7'"
       ]
      },
      "execution_count": 1,
@@ -49,7 +49,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "url = 'https://raw.githubusercontent.com/neurodata/neurodata.io/deploy/content/bibs/people.bib'"
+    "url = 'https://raw.githubusercontent.com/Qinka/random-bib/master/references-1a.bib'"
    ]
   },
   {
@@ -91,175 +91,218 @@
        "      <th></th>\n",
        "      <th>type</th>\n",
        "      <th>alias</th>\n",
-       "      <th>author</th>\n",
-       "      <th>usera</th>\n",
-       "      <th>userw</th>\n",
-       "      <th>month</th>\n",
+       "      <th>title</th>\n",
        "      <th>year</th>\n",
+       "      <th>journal</th>\n",
+       "      <th>author</th>\n",
        "      <th>number</th>\n",
-       "      <th>series</th>\n",
-       "      <th>abstract</th>\n",
-       "      <th>userb</th>\n",
-       "      <th>userc</th>\n",
-       "      <th>userd</th>\n",
-       "      <th>keywords</th>\n",
-       "      <th>doi</th>\n",
-       "      <th>note</th>\n",
+       "      <th>volume</th>\n",
        "      <th>url</th>\n",
-       "      <th>usere</th>\n",
-       "      <th>file</th>\n",
+       "      <th>doi</th>\n",
+       "      <th>...</th>\n",
+       "      <th>arxivId</th>\n",
+       "      <th>editor</th>\n",
+       "      <th>publisher</th>\n",
+       "      <th>address</th>\n",
+       "      <th>month</th>\n",
+       "      <th>pmid</th>\n",
+       "      <th>institution</th>\n",
+       "      <th>series</th>\n",
+       "      <th>translator</th>\n",
+       "      <th>edition</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>incollection</td>\n",
-       "      <td>jovo</td>\n",
-       "      <td>Joshua Vogelstein</td>\n",
-       "      <td>Director</td>\n",
-       "      <td>Chief Unity Ninja</td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td>--</td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td>BME, JHU</td>\n",
-       "      <td>director</td>\n",
-       "      <td>director</td>\n",
-       "      <td>jovo</td>\n",
-       "      <td>jovo@jhu.edu</td>\n",
-       "      <td>neurodata.io/about/jovo</td>\n",
-       "      <td>safe-zone</td>\n",
-       "      <td>vogelstein_joshua.jpg</td>\n",
+       "      <td>article</td>\n",
+       "      <td>Klein2012</td>\n",
+       "      <td>101 Labeled Brain Images and a Consistent Huma...</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>Frontiers in Neuroscience</td>\n",
+       "      <td>Klein, Arno and Tourville, Jason</td>\n",
+       "      <td>DEC</td>\n",
+       "      <td>6</td>\n",
+       "      <td>http://journal.frontiersin.org/article/10.3389...</td>\n",
+       "      <td>10.3389/fnins.2012.00171</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>incollection</td>\n",
-       "      <td>tylertomita</td>\n",
-       "      <td>Tyler Tomita</td>\n",
-       "      <td>Postdoctoral Fellow</td>\n",
-       "      <td></td>\n",
-       "      <td>8</td>\n",
-       "      <td>2014</td>\n",
-       "      <td>08/14 --</td>\n",
-       "      <td></td>\n",
-       "      <td>Developed Sparse Projection Oblique Randomer F...</td>\n",
-       "      <td>MSE</td>\n",
-       "      <td>BME, JHU</td>\n",
-       "      <td>postdoc</td>\n",
-       "      <td>postdoc</td>\n",
-       "      <td>ttomita</td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td>tomita_tyler.jpg</td>\n",
+       "      <td>inproceedings</td>\n",
+       "      <td>Zhong2017b</td>\n",
+       "      <td>3D alpha matting based co-segmentation of tumo...</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Zhong, Zisha and Kim, Yusung and Buatti, John ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>10555 LNCS</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>10.1007/978-3-319-67564-0_4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>incollection</td>\n",
-       "      <td>jongshin</td>\n",
-       "      <td>Jong Shin</td>\n",
-       "      <td>Software Engineer</td>\n",
-       "      <td>Tech Support</td>\n",
-       "      <td>9</td>\n",
-       "      <td>2020</td>\n",
-       "      <td>09/20 --</td>\n",
-       "      <td></td>\n",
-       "      <td>Currently investigating the effect of inductiv...</td>\n",
-       "      <td>MS</td>\n",
-       "      <td>BME, JHU</td>\n",
-       "      <td>staff</td>\n",
-       "      <td>staffresearch</td>\n",
-       "      <td>jshinm</td>\n",
-       "      <td>jshin69@jhu.edu</td>\n",
-       "      <td></td>\n",
-       "      <td>safe-zone</td>\n",
-       "      <td>jong_shin.png</td>\n",
+       "      <td>article</td>\n",
+       "      <td>Khvostikov2018</td>\n",
+       "      <td>3D CNN-based classification using sMRI and MD-...</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Khvostikov, Alexander and Aderghal, Karim and ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>http://arxiv.org/abs/1801.05968</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1801.05968</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>incollection</td>\n",
-       "      <td>aligeisa</td>\n",
-       "      <td>Ali Geisa</td>\n",
-       "      <td>Research Assistant</td>\n",
-       "      <td></td>\n",
-       "      <td>3</td>\n",
-       "      <td>2020</td>\n",
-       "      <td>03/20 --</td>\n",
-       "      <td></td>\n",
-       "      <td>Researching progressive and lifelong learning ...</td>\n",
-       "      <td>MS</td>\n",
-       "      <td>BME, JHU</td>\n",
-       "      <td>faculty - research</td>\n",
-       "      <td>staffresearch</td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td>ali_geisa.jpg</td>\n",
+       "      <td>inproceedings</td>\n",
+       "      <td>Dou2016</td>\n",
+       "      <td>3D deeply supervised network for automatic liv...</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Dou, Qi and Chen, Hao and Jin, Yueming and Yu,...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9901 LNCS</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>10.1007/978-3-319-46723-8_18</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1607.00582</td>\n",
+       "      <td>Ourselin, Sebastien and Joskowicz, Leo and Sab...</td>\n",
+       "      <td>Springer International Publishing</td>\n",
+       "      <td>Cham</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>incollection</td>\n",
-       "      <td>kareefullah</td>\n",
-       "      <td>Kareef Ullah</td>\n",
-       "      <td>Undergraduate Researcher</td>\n",
-       "      <td>Cup Stacker</td>\n",
-       "      <td>9</td>\n",
-       "      <td>2021</td>\n",
-       "      <td>09/21 --</td>\n",
-       "      <td></td>\n",
-       "      <td>Assisted with fixing issues in graspologic and...</td>\n",
-       "      <td></td>\n",
-       "      <td>BME, JHU</td>\n",
-       "      <td>undergrad</td>\n",
-       "      <td>undergrad</td>\n",
-       "      <td></td>\n",
-       "      <td>kullah2@jhu.edu</td>\n",
-       "      <td></td>\n",
-       "      <td></td>\n",
-       "      <td>kareef_ullah.jpg</td>\n",
+       "      <td>inproceedings</td>\n",
+       "      <td>Zhong2018</td>\n",
+       "      <td>3D fully convolutional networks for co-segment...</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Zhong, Z and Kim, Y and Zhou, L and Plichta, K...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>10.1109/ISBI.2018.8363561</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
+       "<p>5 rows × 25 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "           type        alias             author                     usera  \\\n",
-       "0  incollection         jovo  Joshua Vogelstein                  Director   \n",
-       "1  incollection  tylertomita       Tyler Tomita       Postdoctoral Fellow   \n",
-       "2  incollection     jongshin          Jong Shin         Software Engineer   \n",
-       "3  incollection     aligeisa          Ali Geisa        Research Assistant   \n",
-       "4  incollection  kareefullah       Kareef Ullah  Undergraduate Researcher   \n",
+       "            type           alias  \\\n",
+       "0        article       Klein2012   \n",
+       "1  inproceedings      Zhong2017b   \n",
+       "2        article  Khvostikov2018   \n",
+       "3  inproceedings         Dou2016   \n",
+       "4  inproceedings       Zhong2018   \n",
        "\n",
-       "               userw month  year     number series  \\\n",
-       "0  Chief Unity Ninja                    --           \n",
-       "1                        8  2014  08/14 --           \n",
-       "2       Tech Support     9  2020  09/20 --           \n",
-       "3                        3  2020  03/20 --           \n",
-       "4        Cup Stacker     9  2021  09/21 --           \n",
+       "                                               title  year  \\\n",
+       "0  101 Labeled Brain Images and a Consistent Huma...  2012   \n",
+       "1  3D alpha matting based co-segmentation of tumo...  2017   \n",
+       "2  3D CNN-based classification using sMRI and MD-...  2018   \n",
+       "3  3D deeply supervised network for automatic liv...  2016   \n",
+       "4  3D fully convolutional networks for co-segment...  2018   \n",
        "\n",
-       "                                            abstract userb     userc  \\\n",
-       "0                                                           BME, JHU   \n",
-       "1  Developed Sparse Projection Oblique Randomer F...   MSE  BME, JHU   \n",
-       "2  Currently investigating the effect of inductiv...    MS  BME, JHU   \n",
-       "3  Researching progressive and lifelong learning ...    MS  BME, JHU   \n",
-       "4  Assisted with fixing issues in graspologic and...        BME, JHU   \n",
+       "                     journal  \\\n",
+       "0  Frontiers in Neuroscience   \n",
+       "1                        NaN   \n",
+       "2                        NaN   \n",
+       "3                        NaN   \n",
+       "4                        NaN   \n",
        "\n",
-       "                userd       keywords      doi             note  \\\n",
-       "0            director       director     jovo     jovo@jhu.edu   \n",
-       "1             postdoc        postdoc  ttomita                    \n",
-       "2               staff  staffresearch   jshinm  jshin69@jhu.edu   \n",
-       "3  faculty - research  staffresearch                             \n",
-       "4           undergrad      undergrad           kullah2@jhu.edu   \n",
+       "                                              author number      volume  \\\n",
+       "0                   Klein, Arno and Tourville, Jason    DEC           6   \n",
+       "1  Zhong, Zisha and Kim, Yusung and Buatti, John ...    NaN  10555 LNCS   \n",
+       "2  Khvostikov, Alexander and Aderghal, Karim and ...    NaN         NaN   \n",
+       "3  Dou, Qi and Chen, Hao and Jin, Yueming and Yu,...    NaN   9901 LNCS   \n",
+       "4  Zhong, Z and Kim, Y and Zhou, L and Plichta, K...    NaN         NaN   \n",
        "\n",
-       "                       url      usere                   file  \n",
-       "0  neurodata.io/about/jovo  safe-zone  vogelstein_joshua.jpg  \n",
-       "1                                           tomita_tyler.jpg  \n",
-       "2                           safe-zone          jong_shin.png  \n",
-       "3                                              ali_geisa.jpg  \n",
-       "4                                           kareef_ullah.jpg  "
+       "                                                 url  \\\n",
+       "0  http://journal.frontiersin.org/article/10.3389...   \n",
+       "1                                                NaN   \n",
+       "2                    http://arxiv.org/abs/1801.05968   \n",
+       "3                                                NaN   \n",
+       "4                                                NaN   \n",
+       "\n",
+       "                            doi  ...     arxivId  \\\n",
+       "0      10.3389/fnins.2012.00171  ...         NaN   \n",
+       "1   10.1007/978-3-319-67564-0_4  ...         NaN   \n",
+       "2                           NaN  ...  1801.05968   \n",
+       "3  10.1007/978-3-319-46723-8_18  ...  1607.00582   \n",
+       "4     10.1109/ISBI.2018.8363561  ...         NaN   \n",
+       "\n",
+       "                                              editor  \\\n",
+       "0                                                NaN   \n",
+       "1                                                NaN   \n",
+       "2                                                NaN   \n",
+       "3  Ourselin, Sebastien and Joskowicz, Leo and Sab...   \n",
+       "4                                                NaN   \n",
+       "\n",
+       "                           publisher address month pmid institution series  \\\n",
+       "0                                NaN     NaN   NaN  NaN         NaN    NaN   \n",
+       "1                                NaN     NaN   NaN  NaN         NaN    NaN   \n",
+       "2                                NaN     NaN   NaN  NaN         NaN    NaN   \n",
+       "3  Springer International Publishing    Cham   NaN  NaN         NaN    NaN   \n",
+       "4                                NaN     NaN     4  NaN         NaN    NaN   \n",
+       "\n",
+       "  translator edition  \n",
+       "0        NaN     NaN  \n",
+       "1        NaN     NaN  \n",
+       "2        NaN     NaN  \n",
+       "3        NaN     NaN  \n",
+       "4        NaN     NaN  \n",
+       "\n",
+       "[5 rows x 25 columns]"
       ]
      },
      "execution_count": 4,
@@ -284,7 +327,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# url_beta = 'https://raw.githubusercontent.com/neurodata/neurodata.io/deploy/content/bibs/talks.bib'\n",
     "url_beta = 'https://raw.githubusercontent.com/neurodata/neurodata.io/deploy/content/bibs/press.bib'"
    ]
   },
@@ -305,7 +347,7 @@
     {
      "data": {
       "text/plain": [
-       "'@misc{Prazosin2020,\\n  year={2020},\\n  title={Prazosin Might Be A Treatment for COVID-19. More Data Is'"
+       "'@misc{Prazosin2020, year={2020}, title={Prazosin Might Be A Treatment for COVID-19. More Data Is Urg'"
       ]
      },
      "execution_count": 7,
@@ -341,7 +383,7 @@
     {
      "data": {
       "text/plain": [
-       "'@misc{Prazosin2020,\\n  year={2020},\\n  title={Prazosin Might Be A Treatment for COVID-19. More Data Is'"
+       "'@misc{Prazosin2020, year={2020}, title={Prazosin Might Be A Treatment for COVID-19. More Data Is Urg'"
       ]
      },
      "execution_count": 9,
@@ -530,7 +572,7 @@
     {
      "data": {
       "text/plain": [
-       "\"%   ****************************************\\r\\n%   * File: SAMPLE.BIB                     *\\r\\n%   ****************************************\\r\\n%   * An invented bib file                 *\\r\\n%   * For the sample texts                 *\\r\\n%   * The order is unimportant and there   *\\r\\n%   * may be more entries than references  *\\r\\n%   * in the text                          *\\r\\n%   ****************************************\\r\\n% \\r\\n@ARTICLE{smit54,\\r\\n\\tAUTHOR = {J. G. Smith and H. K. Weston},\\r\\n\\tTITLE = {Nothing Particular in this Year's History},\\r\\n\\tYEAR = {1954},\\r\\n\\tJOURNAL = {J. Geophys. Res.},\\r\\n\\tVOLUME = {2},\\r\\n\\tPAGES = {14-15}\\r\\n}\\r\\n@BOOK{colu92,\\r\\n\\tAUTHOR = {Christopher Columbus},\\r\\n\\tTITLE = {How {I} Discovered {America}},\\r\\n\\tYEAR = {1492},\\r\\n\\tPUBLISHER = {Hispanic Press},\\r\\n\\tADDRESS = {Barcelona}\\r\\n}\\r\\n@ARTICLE{gree00,\\r\\n\\tAUTHOR = {R. J. Green and U. P. Fred and W. P. Norbert},\\r\\n\\tTITLE = {Things that Go Bump in the Night},\\r\\n\\tYEAR = {1900},\\r\\n\\tJOURNAL = {Psych. Today},\\r\\n\\tVOLUME = {46},\\r\\n\\tPAGES = {345-678}\\r\\n}\\r\\n@ART\""
+       "\"% ****************************************% * File: SAMPLE.BIB *% ****************************************% * An invented bib file *% * For the sample texts *% * The order is unimportant and there *% * may be more entries than references *% * in the text *% ****************************************% @ARTICLE{smit54,\\tAUTHOR = {J. G. Smith and H. K. Weston},\\tTITLE = {Nothing Particular in this Year's History},\\tYEAR = {1954},\\tJOURNAL = {J. Geophys. Res.},\\tVOLUME = {2},\\tPAGES = {14-15}}@BOOK{colu92,\\tAUTHOR = {Christopher Columbus},\\tTITLE = {How {I} Discovered {America}},\\tYEAR = {1492},\\tPUBLISHER = {Hispanic Press},\\tADDRESS = {Barcelona}}@ARTICLE{gree00,\\tAUTHOR = {R. J. Green and U. P. Fred and W. P. Norbert},\\tTITLE = {Things that Go Bump in the Night},\\tYEAR = {1900},\\tJOURNAL = {Psych. Today},\\tVOLUME = {46},\\tPAGES = {345-678}}@ARTICLE{phil99,\\tAUTHOR = {T. P. Phillips},\\tTITLE = {Possible Influence of the Magnetosphere on {American} History},\\tYEAR = {1999},\\tJOURNAL = {J. Oddball Res.},\\tVOLUME \""
       ]
      },
      "execution_count": 14,
@@ -608,33 +650,7 @@
        "      <td>BOOK</td>\n",
        "      <td>colu92</td>\n",
        "      <td>Christopher Columbus</td>\n",
-       "      <td>I</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>BOOK</td>\n",
-       "      <td>colu92</td>\n",
-       "      <td>Christopher Columbus</td>\n",
-       "      <td>I</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>BOOK</td>\n",
-       "      <td>colu92</td>\n",
-       "      <td>Christopher Columbus</td>\n",
-       "      <td>I</td>\n",
+       "      <td>How I Discovered America</td>\n",
        "      <td>1492</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
@@ -643,7 +659,7 @@
        "      <td>Barcelona</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
+       "      <th>2</th>\n",
        "      <td>ARTICLE</td>\n",
        "      <td>gree00</td>\n",
        "      <td>R. J. Green and U. P. Fred and W. P. Norbert</td>\n",
@@ -656,24 +672,11 @@
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>ARTICLE</td>\n",
-       "      <td>phil99</td>\n",
-       "      <td>T. P. Phillips</td>\n",
-       "      <td>American</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
+       "      <th>3</th>\n",
        "      <td>ARTICLE</td>\n",
        "      <td>phil99</td>\n",
        "      <td>T. P. Phillips</td>\n",
-       "      <td>American</td>\n",
+       "      <td>Possible Influence of the Magnetosphere on Ame...</td>\n",
        "      <td>1999</td>\n",
        "      <td>J. Oddball Res.</td>\n",
        "      <td>98</td>\n",
@@ -682,24 +685,11 @@
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>ARTICLE</td>\n",
-       "      <td>jame76</td>\n",
-       "      <td>Kelly James and Harris, Jr., George and Wilby ...</td>\n",
-       "      <td>American</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
+       "      <th>4</th>\n",
        "      <td>ARTICLE</td>\n",
        "      <td>jame76</td>\n",
        "      <td>Kelly James and Harris, Jr., George and Wilby ...</td>\n",
-       "      <td>American</td>\n",
+       "      <td>American Independence and Magnetism</td>\n",
        "      <td>1776</td>\n",
        "      <td>Revol. Tracts</td>\n",
        "      <td>32</td>\n",
@@ -715,35 +705,23 @@
        "      type   alias                                             AUTHOR  \\\n",
        "0  ARTICLE  smit54                       J. G. Smith and H. K. Weston   \n",
        "1     BOOK  colu92                               Christopher Columbus   \n",
-       "2     BOOK  colu92                               Christopher Columbus   \n",
-       "3     BOOK  colu92                               Christopher Columbus   \n",
-       "4  ARTICLE  gree00       R. J. Green and U. P. Fred and W. P. Norbert   \n",
-       "5  ARTICLE  phil99                                     T. P. Phillips   \n",
-       "6  ARTICLE  phil99                                     T. P. Phillips   \n",
-       "7  ARTICLE  jame76  Kelly James and Harris, Jr., George and Wilby ...   \n",
-       "8  ARTICLE  jame76  Kelly James and Harris, Jr., George and Wilby ...   \n",
+       "2  ARTICLE  gree00       R. J. Green and U. P. Fred and W. P. Norbert   \n",
+       "3  ARTICLE  phil99                                     T. P. Phillips   \n",
+       "4  ARTICLE  jame76  Kelly James and Harris, Jr., George and Wilby ...   \n",
        "\n",
-       "                                       TITLE  YEAR           JOURNAL VOLUME  \\\n",
-       "0  Nothing Particular in this Year's History  1954  J. Geophys. Res.      2   \n",
-       "1                                          I   NaN               NaN    NaN   \n",
-       "2                                          I   NaN               NaN    NaN   \n",
-       "3                                          I  1492               NaN    NaN   \n",
-       "4           Things that Go Bump in the Night  1900      Psych. Today     46   \n",
-       "5                                   American   NaN               NaN    NaN   \n",
-       "6                                   American  1999   J. Oddball Res.     98   \n",
-       "7                                   American   NaN               NaN    NaN   \n",
-       "8                                   American  1776     Revol. Tracts     32   \n",
+       "                                               TITLE  YEAR           JOURNAL  \\\n",
+       "0          Nothing Particular in this Year's History  1954  J. Geophys. Res.   \n",
+       "1                           How I Discovered America  1492               NaN   \n",
+       "2                   Things that Go Bump in the Night  1900      Psych. Today   \n",
+       "3  Possible Influence of the Magnetosphere on Ame...  1999   J. Oddball Res.   \n",
+       "4                American Independence and Magnetism  1776     Revol. Tracts   \n",
        "\n",
-       "       PAGES       PUBLISHER    ADDRESS  \n",
-       "0      14-15             NaN        NaN  \n",
-       "1        NaN             NaN        NaN  \n",
-       "2        NaN             NaN        NaN  \n",
-       "3        NaN  Hispanic Press  Barcelona  \n",
-       "4    345-678             NaN        NaN  \n",
-       "5        NaN             NaN        NaN  \n",
-       "6  1000-1003             NaN        NaN  \n",
-       "7        NaN             NaN        NaN  \n",
-       "8      34-55             NaN        NaN  "
+       "  VOLUME      PAGES       PUBLISHER    ADDRESS  \n",
+       "0      2      14-15             NaN        NaN  \n",
+       "1    NaN        NaN  Hispanic Press  Barcelona  \n",
+       "2     46    345-678             NaN        NaN  \n",
+       "3     98  1000-1003             NaN        NaN  \n",
+       "4     32      34-55             NaN        NaN  "
       ]
      },
      "execution_count": 16,
@@ -765,7 +743,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.13 ('pdr')",
+   "display_name": "Python 3.9.15 ('pandarize')",
    "language": "python",
    "name": "python3"
   },
@@ -779,12 +757,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.15"
   },
   "orig_nbformat": 4,
   "vscode": {
    "interpreter": {
-    "hash": "ec3ba36b413de325dd38d751e7a8a6eca65f333d1619da92dd7e369649a08d52"
+    "hash": "dad1adfa8d136b42bc671de9edd435b00b948a75d2c3d60fa318daadbc489ee6"
    }
   }
  },

From 0f22611dc3bb740978e92e1442093deba0e671e6 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 06:29:51 -0500
Subject: [PATCH 02/31] Add postprocessing switch (#46)

---
 pandarize/_util.py | 32 +++++++++++++++++++++-----------
 pandarize/frame.py |  4 ++--
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/pandarize/_util.py b/pandarize/_util.py
index fa83f29..a392aa0 100644
--- a/pandarize/_util.py
+++ b/pandarize/_util.py
@@ -72,7 +72,7 @@ def bib_preprocessing(raw):
     
     return raw
 
-def bib_parser(raw, idxkey):
+def bib_parser(raw, idxkey, postprocess):
     '''Main bib parsing logic'''
     all_lst = []
     lst = []
@@ -122,7 +122,8 @@ def bib_parser(raw, idxkey):
             standby = False
 
     df = pd.DataFrame(all_lst)
-    df = postprocessing(df)
+    if postprocess:
+        df = postprocessing(df)
 
     return df
 
@@ -254,10 +255,16 @@ def bib_parser_old(raw):
 
     return df_out
 
-def check_names(string, connector):
+def check_names(string, sep, connector):
     '''Checks for valid author names'''
     if connector in string:
         return True
+    
+    # skip in case at least one name is already converted
+    # or there's misformatting issue
+    if sep in string:
+        return True
+    
     return False
 
 def convert_names(string, sep=',', connector='and'):
@@ -265,20 +272,23 @@ def convert_names(string, sep=',', connector='and'):
     '''
     padded_connector = f' {connector} '
     
-    if check_names(string, connector=padded_connector):
+    if check_names(string, sep=sep, connector=padded_connector):
         return string
     
     names = ''
     lst = string.split(sep)
     
     for i, nms in enumerate(lst):
-        nm = nms.strip().split(' ')
-        names += f'{nm[-1]}, {nm[0]}'
-        if len(nm) > 2:
-            for mname in nm[1:-1]:
-                names += f' {mname[0].upper()}.'
-        if i+1 != len(lst):
-            names += f'{padded_connector}'
+        try:
+            nm = nms.strip().split(' ')
+            names += f'{nm[-1]}, {nm[0]}'
+            if len(nm) > 2:
+                for mname in nm[1:-1]:
+                    names += f' {mname[0].upper()}.'
+            if i+1 != len(lst):
+                names += f'{padded_connector}'
+        except Exception as e:
+            print(f'{e} for {nms} at {i}th index')
             
     return names
 
diff --git a/pandarize/frame.py b/pandarize/frame.py
index cfa42e8..00e9086 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -14,11 +14,11 @@ def load(self, source=None, savefile=None):
         self.raw = bib_preprocessing(raw=self.raw)
         self.idxkey = rfindall_matched(self.raw, r'[.*]?@[^}]*{*[,]', '@')
 
-    def fit(self, kind='bib'):
+    def fit(self, kind='bib', postprocess=False):
         '''Method that infers data structure (in the future)
         '''
         if kind == 'bib':
-            self.df = bib_parser(raw=self.raw, idxkey=self.idxkey)
+            self.df = bib_parser(raw=self.raw, idxkey=self.idxkey, postprocess=postprocess)
 
     def transform(self, formats='bib', types=None, alias=None, dirs=None):
         '''Transform loaded data into a specified data type

From b2aaf3afa2d7e6c94ffb76251be83fb3c8cd9935 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 07:36:37 -0500
Subject: [PATCH 03/31] Add basic stdout for metadata (#50)

---
 pandarize/frame.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/pandarize/frame.py b/pandarize/frame.py
index 00e9086..bd8b840 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -24,4 +24,20 @@ def transform(self, formats='bib', types=None, alias=None, dirs=None):
         '''Transform loaded data into a specified data type
         '''
         if formats == 'bib':
-            bib_writer(df=self.df, types=types, alias=alias, dirs=dirs)
\ No newline at end of file
+            bib_writer(df=self.df, types=types, alias=alias, dirs=dirs)
+            
+    def describe(self):
+        '''Reports basic metadata'''
+        
+        if not self.df:
+            print('No file is loaded. Please load() and fit() to retrieve metadata.')
+            return 0
+            
+        if self.df.shape[0] == 0 or self.df.shape[1] == 0:
+            print('The file has not been loaded successfully. Please check the file path and/or make sure that file is not corrupted.')
+            return 0
+            
+        print(f'''The loaded file has {df.shape[0]} rows and {df.shape[1]} columns.\n
+              ''')
+        
+        ## Config Setting
\ No newline at end of file

From 0bf0f9827256469bcae0f73c6128d244f0181171 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 07:39:19 -0500
Subject: [PATCH 04/31] Stop returning integers (#50)

---
 pandarize/frame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandarize/frame.py b/pandarize/frame.py
index bd8b840..c8a33ac 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -30,12 +30,12 @@ def describe(self):
         '''Reports basic metadata'''
         
         if not self.df:
-            print('No file is loaded. Please load() and fit() to retrieve metadata.')
-            return 0
+            print('No file is loaded. Please load() and fit() to create metadata.')
+            return 
             
         if self.df.shape[0] == 0 or self.df.shape[1] == 0:
             print('The file has not been loaded successfully. Please check the file path and/or make sure that file is not corrupted.')
-            return 0
+            return 
             
         print(f'''The loaded file has {df.shape[0]} rows and {df.shape[1]} columns.\n
               ''')

From 8969ccfa0766c765cca577ce47f6f0a2f5d8ebd6 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 14:22:04 -0500
Subject: [PATCH 05/31] Fix conditional for empty df (#50)

---
 pandarize/frame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandarize/frame.py b/pandarize/frame.py
index c8a33ac..86ea48e 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -27,9 +27,9 @@ def transform(self, formats='bib', types=None, alias=None, dirs=None):
             bib_writer(df=self.df, types=types, alias=alias, dirs=dirs)
             
     def describe(self):
-        '''Reports basic metadata'''
+        '''Generates basic metadata'''
         
-        if not self.df:
+        if self.df is None:
             print('No file is loaded. Please load() and fit() to create metadata.')
             return 
             
@@ -37,7 +37,7 @@ def describe(self):
             print('The file has not been loaded successfully. Please check the file path and/or make sure that file is not corrupted.')
             return 
             
-        print(f'''The loaded file has {df.shape[0]} rows and {df.shape[1]} columns.\n
+        print(f'''The loaded file has {self.df.shape[0]} rows and {self.df.shape[1]} columns.\n
               ''')
         
         ## Config Setting
\ No newline at end of file

From 2c1fb900916077c9cb0fa79498dcf7b9810cd228 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 14:22:30 -0500
Subject: [PATCH 06/31] Display `describe` (#50)

---
 examples/bib_example.ipynb | 69 ++++++++++++++++++++++++++++----------
 1 file changed, 51 insertions(+), 18 deletions(-)

diff --git a/examples/bib_example.ipynb b/examples/bib_example.ipynb
index d72479e..0c556cc 100644
--- a/examples/bib_example.ipynb
+++ b/examples/bib_example.ipynb
@@ -63,10 +63,43 @@
     "pdr.fit()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Add basic description of loaded and fitted DataFrame"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The loaded file has 374 rows and 25 columns.\n",
+      "\n",
+      "              \n"
+     ]
+    }
+   ],
+   "source": [
+    "pdr.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Head view of the DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -305,7 +338,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -323,7 +356,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -332,7 +365,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -341,7 +374,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -350,7 +383,7 @@
        "'@misc{Prazosin2020, year={2020}, title={Prazosin Might Be A Treatment for COVID-19. More Data Is Urg'"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -368,7 +401,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -377,7 +410,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -386,7 +419,7 @@
        "'@misc{Prazosin2020, year={2020}, title={Prazosin Might Be A Treatment for COVID-19. More Data Is Urg'"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -397,7 +430,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -406,7 +439,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -530,7 +563,7 @@
        "4  Mone, Amy and Mehl, Valerie    press  "
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -548,7 +581,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -557,7 +590,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -566,7 +599,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -575,7 +608,7 @@
        "\"% ****************************************% * File: SAMPLE.BIB *% ****************************************% * An invented bib file *% * For the sample texts *% * The order is unimportant and there *% * may be more entries than references *% * in the text *% ****************************************% @ARTICLE{smit54,\\tAUTHOR = {J. G. Smith and H. K. Weston},\\tTITLE = {Nothing Particular in this Year's History},\\tYEAR = {1954},\\tJOURNAL = {J. Geophys. Res.},\\tVOLUME = {2},\\tPAGES = {14-15}}@BOOK{colu92,\\tAUTHOR = {Christopher Columbus},\\tTITLE = {How {I} Discovered {America}},\\tYEAR = {1492},\\tPUBLISHER = {Hispanic Press},\\tADDRESS = {Barcelona}}@ARTICLE{gree00,\\tAUTHOR = {R. J. Green and U. P. Fred and W. P. Norbert},\\tTITLE = {Things that Go Bump in the Night},\\tYEAR = {1900},\\tJOURNAL = {Psych. Today},\\tVOLUME = {46},\\tPAGES = {345-678}}@ARTICLE{phil99,\\tAUTHOR = {T. P. Phillips},\\tTITLE = {Possible Influence of the Magnetosphere on {American} History},\\tYEAR = {1999},\\tJOURNAL = {J. Oddball Res.},\\tVOLUME \""
       ]
      },
-     "execution_count": 14,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -586,7 +619,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -595,7 +628,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -724,7 +757,7 @@
        "4     32      34-55             NaN        NaN  "
       ]
      },
-     "execution_count": 16,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }

From af0b76b8ac82e6d23703d3703a68f1070f4f3458 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 21:52:23 -0500
Subject: [PATCH 07/31] Add config loader (#51)

---
 pandarize/_util.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/pandarize/_util.py b/pandarize/_util.py
index a392aa0..7f5bb4a 100644
--- a/pandarize/_util.py
+++ b/pandarize/_util.py
@@ -4,6 +4,8 @@
 from pylatexenc.latex2text import LatexNodes2Text
 import re
 import os
+import yaml as pyyaml
+import pkgutil
 
 def source_loader(source, savefile):
     if check_url(string=source):
@@ -33,6 +35,32 @@ def source_loader(source, savefile):
 
     return raw
 
+def validate_config(obj):
+    '''Validates yaml config files'''
+    pass
+
+def load_config(yaml, path, ftype='bib'):
+    '''Loads yaml config file and returns a yaml object'''
+    def load(data):
+        try:
+            dic = {}
+            for i in pyyaml.safe_load(data)[ftype]:
+                for key, val in i.items():
+                    dic[key] = val
+            
+            print('Configuration applied. Please change the setting via <class object>.settings as needed.')
+            return dic
+            
+        except:
+            print('The config file is either not found or corrupted.')
+    
+    if yaml and path:
+        with open(path) as f:
+            return load(f)
+    else:
+        data = pkgutil.get_data(__name__, "/config/config.yaml").decode('utf-8')
+        return load(data)
+
 def rfindall(string, pattern):
     '''Find index of all occurrence of the pattern'''
     

From 3a6c8c9040def4b3da87df54e59ad4567d4b5bbf Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 21:52:48 -0500
Subject: [PATCH 08/31] Load settings when initialize (#51)

---
 pandarize/frame.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandarize/frame.py b/pandarize/frame.py
index 86ea48e..56b1f73 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -1,11 +1,21 @@
 import pandas as pd
 from pandarize._util import *
 
+
 class Pandarizer:
     def __init__(self):
         self.raw = None
         self.df = None
         self.idxkey = None
+        self.settings = None
+        
+        
+    def initialize(self, yaml=False, path=None):
+        '''Initializes the setting either for the first time by
+        loading a default yaml config file in system dir or 
+        load from an user-specified existing the file in `path`
+        '''
+        self.settings = load_config(yaml=yaml, path=path)
 
     def load(self, source=None, savefile=None):
         '''Loads raw data from either local file or the url

From 0a5fc1e201f308210bd8bb01327443c2c4f620a9 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 21:53:37 -0500
Subject: [PATCH 09/31] Find package automatically and include all yaml (#51)

---
 setup.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 8fcbd26..44bb137 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,5 @@
 from distutils.core import setup
+from setuptools import find_packages
 import pandarize
 
 VERSION = pandarize.__version__
@@ -10,18 +11,19 @@
     version=VERSION,
     author='Jong M. Shin',
     author_email='jshinm@gmail.com',
-    packages=['pandarize'],
+    packages=find_packages(),
+    package_data = {"": ['*.yaml']},
     url='https://github.com/jshinm/pandarize/',
     license='MIT',
     description='Turns data into panda dataframe',
     readme='README.md',
     long_description_content_type='text/markdown',
     long_description=README,
-    requires=['pandas', 'requests'],
-    install_requires=["pandas", 'requests'],
+    requires=['pandas', 'requests', 'pyyaml'],
+    install_requires=["pandas", 'requests', 'pyyaml'],
     classifiers = [
     "Programming Language :: Python :: 3",
     "License :: OSI Approved :: MIT License",
-    "Operating System :: OS Independent",
-]
+    "Operating System :: OS Independent"
+    ]
 )
\ No newline at end of file

From 6de706475d8c6f994ec8a6ccac4eb527c3397e9b Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sun, 25 Dec 2022 21:53:55 -0500
Subject: [PATCH 10/31] Add yaml config file (#51)

---
 pandarize/config/config.yaml | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 pandarize/config/config.yaml

diff --git a/pandarize/config/config.yaml b/pandarize/config/config.yaml
new file mode 100644
index 0000000..12dde3b
--- /dev/null
+++ b/pandarize/config/config.yaml
@@ -0,0 +1,4 @@
+- bib:
+  convert_names: True
+  remove_latex: True
+  remove_empty_entries: True
\ No newline at end of file

From 409ddf7c8fb7379a77a26ed90b76a81118c9dfb5 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Mon, 26 Dec 2022 15:43:00 -0500
Subject: [PATCH 11/31] Add an entry for `truncate_author_list` (#39)

---
 pandarize/config/config.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandarize/config/config.yaml b/pandarize/config/config.yaml
index 12dde3b..9671280 100644
--- a/pandarize/config/config.yaml
+++ b/pandarize/config/config.yaml
@@ -1,4 +1,5 @@
 - bib:
   convert_names: True
   remove_latex: True
-  remove_empty_entries: True
\ No newline at end of file
+  remove_empty_entries: True
+  truncate_author_list: False
\ No newline at end of file

From deed76260d0fe64a244e7f37f9feacb4ed10075e Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Tue, 27 Dec 2022 19:13:32 -0500
Subject: [PATCH 12/31] Fix yaml file structure (#51)

---
 pandarize/config/config.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandarize/config/config.yaml b/pandarize/config/config.yaml
index 9671280..d962b6d 100644
--- a/pandarize/config/config.yaml
+++ b/pandarize/config/config.yaml
@@ -1,5 +1,5 @@
-- bib:
-  convert_names: True
-  remove_latex: True
-  remove_empty_entries: True
-  truncate_author_list: False
\ No newline at end of file
+bib:
+  - convert_names: True
+  - remove_latex: True
+  - remove_empty_entries: True
+  - truncate_author_list: False #happens after `fit`
\ No newline at end of file

From 4146588d122d13ea2d3e79b160dee9578445547a Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Wed, 28 Dec 2022 11:23:54 -0500
Subject: [PATCH 13/31] Add comments (#51)

---
 pandarize/config/config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandarize/config/config.yaml b/pandarize/config/config.yaml
index d962b6d..0dfd0ba 100644
--- a/pandarize/config/config.yaml
+++ b/pandarize/config/config.yaml
@@ -2,4 +2,4 @@ bib:
   - convert_names: True
   - remove_latex: True
   - remove_empty_entries: True
-  - truncate_author_list: False #happens after `fit`
\ No newline at end of file
+  - truncate_author_list: False #applied after `fit`
\ No newline at end of file

From 59d3aed4b843753e39929651643b075bb20dbed7 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Wed, 28 Dec 2022 11:50:12 -0500
Subject: [PATCH 14/31] Add placeholder for `truncate_names` (#51)

---
 pandarize/_util.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandarize/_util.py b/pandarize/_util.py
index 7f5bb4a..dd121b4 100644
--- a/pandarize/_util.py
+++ b/pandarize/_util.py
@@ -155,6 +155,10 @@ def bib_parser(raw, idxkey, postprocess):
 
     return df
 
+def truncate_names(srs):
+    '''Truncates names in Pandas series'''
+    pass
+
 def _itemize_bib(lst):
     '''Itemizes bib structured string into a json format'''
     new_lst = []

From 285898e97439adb2209e36bcc15b1e2452c612ce Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Thu, 29 Dec 2022 21:12:59 -0500
Subject: [PATCH 15/31] Add docstring for `convert_names` (#39)

---
 pandarize/_util.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/pandarize/_util.py b/pandarize/_util.py
index dd121b4..74fa4ce 100644
--- a/pandarize/_util.py
+++ b/pandarize/_util.py
@@ -300,8 +300,17 @@ def check_names(string, sep, connector):
     return False
 
 def convert_names(string, sep=',', connector='and'):
-    '''Convert First MI Last names to Last, First MI format.
-    '''
+    """Convert First MI Last names to Last, First MI format.
+
+    Args:
+        string (str): parsed string that contains names with (name)(sep)(name) format
+        sep (str, optional): original string separator between names. Defaults to ','.
+        connector (str, optional): new name connector that will connect converted names. Defaults to 'and'.
+
+    Returns:
+        str: converted names connected by `connector`
+    """
+    
     padded_connector = f' {connector} '
     
     if check_names(string, sep=sep, connector=padded_connector):
@@ -322,6 +331,9 @@ def convert_names(string, sep=',', connector='and'):
         except Exception as e:
             print(f'{e} for {nms} at {i}th index')
             
+    # conditional here for truncate author list
+    
+            
     return names
 
 def bib_writer(df, types, alias, dirs):

From 076c5eefe20b91ea15c3b5decc9f0c5c0bbbbad0 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 16:28:19 -0500
Subject: [PATCH 16/31] Remove old parser (#52)

---
 pandarize/_util.py | 49 ----------------------------------------------
 1 file changed, 49 deletions(-)

diff --git a/pandarize/_util.py b/pandarize/_util.py
index 74fa4ce..35563c9 100644
--- a/pandarize/_util.py
+++ b/pandarize/_util.py
@@ -238,55 +238,6 @@ def postprocessing(df):
     
     return df
 
-def bib_parser_old(raw):
-    '''Old bib parsing logic (deprecated and replaced by the new logic)'''
-    df_out = pd.DataFrame()
-    raw = manual_drop(raw, keys=['\n'])
-    raw = check_string(raw)
-    is_newRow = True
-
-    for i, char in enumerate(raw[:]):
-        
-        if char == '@' and is_newRow:
-            new_row = {}
-            get_type = i+1
-        elif char == '{':
-            if get_type:
-                new_row['type'] = raw[get_type:i].strip()
-                get_type = None
-                get_alias = i+1 #get the alias
-            elif curr_name != None:
-                get_item = i+1
-            else:
-                pass
-        elif char == '}':
-            if get_item:
-                new_row[curr_name] = raw[get_item:i]
-                get_item = None
-                curr_name = None
-            else:
-                df_row = pd.DataFrame.from_dict(new_row, orient='index').T
-                df_out = pd.concat([df_out, df_row])
-                is_newRow = True
-        elif char == '=' and get_name:
-            curr_name = raw[get_name:i].strip()
-            new_row[curr_name] = None
-            get_name = None
-        elif char == ',':
-            if get_alias:
-                new_row['alias'] = raw[get_alias:i]
-                get_alias = None
-                is_newRow = False
-            elif curr_name:
-                continue #edge case to handle comma (,) in the content
-            get_name = i+1
-        else:
-            pass
-    
-    df_out.reset_index(drop=True, inplace=True)
-
-    return df_out
-
 def check_names(string, sep, connector):
     '''Checks for valid author names'''
     if connector in string:

From c7554b7aaa5666ee536a1b569c0701ad284e0cca Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 22:47:37 -0500
Subject: [PATCH 17/31] Create a separate loader class for modularization (#52)

---
 pandarize/loader.py | 64 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 pandarize/loader.py

diff --git a/pandarize/loader.py b/pandarize/loader.py
new file mode 100644
index 0000000..38ef81e
--- /dev/null
+++ b/pandarize/loader.py
@@ -0,0 +1,64 @@
+import os
+import yaml as pyyaml
+import requests
+import pkgutil
+from ._util import *
+
+class Loader:
+    def __init__(self):
+        self.settings = None
+        self.raw = None
+    
+    def source_loader(self, source, savefile):
+        if check_url(string=source):
+            r = requests.get(url=source)
+            r = r.content
+        else:
+            try:
+                with open(source, 'r', encoding='UTF-8', newline='') as f:
+                    r = f.read()
+            except Exception as e:
+                print('Error while reading from local file')
+
+        if isinstance(r, bytes):
+            raw = r.decode('utf-8')
+        elif isinstance(r, str):
+            raw = r
+        else:
+            raise Exception('The source cannot be parsed')
+
+        if savefile:
+            folder, files = os.path.split(savefile)
+            if not os.path.exists(path=folder):
+                os.mkdir(path=folder)
+
+            with open(savefile, 'w', encoding='UTF-8', newline='') as f:
+                f.write(raw)
+
+        self.raw = raw
+    
+    def validate_config(self, obj):
+        '''Validates yaml config files'''
+        pass
+
+    def load_config(self, yaml=None, path=None, ftype='bib'):
+        '''Loads yaml config file and returns a yaml object'''
+        def load(data):
+            try:
+                dic = {}
+                for i in pyyaml.safe_load(data)[ftype]:
+                    for key, val in i.items():
+                        dic[key] = val
+                
+                print('Configuration applied. Please change the setting via <class object>.settings as needed.')
+                return dic
+                
+            except:
+                print('The config file is either not found or corrupted.')
+        
+        if yaml and path:
+            with open(path) as f:
+                return load(f)
+        else:
+            data = pkgutil.get_data(__name__, "/config/config.yaml").decode('utf-8')
+            self.settings = load(data)
\ No newline at end of file

From 57eda06a4c0fe9c0c00fa621340be68490ac82d7 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 22:49:35 -0500
Subject: [PATCH 18/31] Create a parser class that parse out a bib format (#52)

---
 pandarize/parser.py | 151 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100644 pandarize/parser.py

diff --git a/pandarize/parser.py b/pandarize/parser.py
new file mode 100644
index 0000000..46671d1
--- /dev/null
+++ b/pandarize/parser.py
@@ -0,0 +1,151 @@
+import re
+from ._util import *
+from pylatexenc.latex2text import LatexNodes2Text
+
+class Parser:
+    def __init__(self):
+        self.df = None
+
+    def bib_preprocessing(self):
+        '''Pre-processes raw bib file'''
+        
+        raw = self.raw.replace('\n', '').replace('\r', '') #remove linebreaks and linefeed
+        raw = re.sub(' +', ' ', raw) #contract whitespace
+        
+        self.raw = raw
+        # return raw
+
+    def postprocessing(self, df):
+        '''Post-process of constructed pandas DataFrame. Runs multiple checks.'''
+        
+        # Author Name Check for Biber
+        df['author'] = df['author'].apply(lambda x: convert_names(x))
+        
+        return df
+
+    def bib_parser(self, raw, idxkey, postprocess):
+        '''Main bib parsing logic'''
+        all_lst = []
+        lst = []
+        start = None
+        standby = None
+
+        for i, c in enumerate(raw):
+            if c == '@':
+                if not i in idxkey: #skip if not true start
+                    continue
+                
+                if lst:
+                    # fixes cases when extra comma is added to the last key:value item
+                    fix = raw[curr_idx:last_pair-2] + raw[last_pair-2:last_pair+1].replace(',', '')
+                    lst.append(fix) #edge case for last key:value pair
+                    all_lst.append(self._itemize_bib(lst))
+                lst = []
+                curr_idx = i
+                start = True
+            elif c == ',' and start:
+                lst.append(raw[curr_idx:i+1])
+                start = False
+                curr_idx = i+1
+            elif c == '}' and i != len(raw)-1:
+                last_pair = i #catches last pair and saves position as index
+                standby = True
+            elif c == ',' and standby:
+                # second check to account for misused bracket edge cases
+                # e.g., author = {A and B and C and {D} and F}
+                standby = False
+                
+                for check_i in raw[i+1:]:
+                    if check_i == '}':
+                        break
+                    elif check_i == '=':
+                        if raw[curr_idx:i+1]:
+                            lst.append(raw[curr_idx:i+1]) #remove linebreak
+                            curr_idx = i+1
+                        else:
+                            break
+            elif i == len(raw)-1:
+                lst.append(raw[curr_idx:i+1])
+                all_lst.append(self._itemize_bib(lst))
+            elif c == ' ':
+                pass
+            else:
+                standby = False
+
+        df = pd.DataFrame(all_lst)
+        if postprocess:
+            df = postprocessing(df)
+
+        self.df = df
+    
+    def bib_writer(self, df, types, alias, dirs):
+        '''bib writer and formatter that converts pandas 
+        dataframe into a bib file
+        '''
+
+        def parse(row, types=types, alias=alias):
+            items = []
+
+            for i, (idx, item) in enumerate(zip(row.index, row)):
+                if pd.isnull(item) or item == '':
+                    continue
+                item = str(item)
+                if idx == types:
+                    header = f'@{item}' + '{'
+                elif idx == alias:
+                    alias = item + ',\n'
+                else:
+                    item_i = f'\t{idx} = ' + '{' + f'{item}' + '},\n'
+                    items.append(item_i)
+
+            out_text = header + alias
+            for i in items:
+                out_text += i
+            out_text = out_text[:-2] #remove last comma
+            out_text += '\n},\n'
+
+            return out_text
+
+        N = df.shape[0]
+
+        # Add stamper before the first header
+        out = stamper(target='bib')
+
+        for i in range(N):
+            if i == N-1: #remove the very last comma
+                out += parse(df.iloc[i,:])[:-3] + parse(df.iloc[i,:])[-3:].replace(',', '') + '\n'
+            else:
+                out += parse(df.iloc[i,:]) + '\n'
+
+        if not os.path.exists(path=dirs):
+            os.mkdir(path=dirs)
+
+        with open(f'{dirs}output.bib', 'w', encoding='utf-8') as f:
+            f.write(out)
+
+    @staticmethod
+    def _itemize_bib(lst):
+        '''Itemizes bib structured string into a json format'''
+        new_lst = []
+        dic = {}
+
+        for i, s in enumerate(lst):
+            if i == 0:
+                ii = s.rfind('@')
+                jj = s.rfind('{')
+                kk = s.rfind(',')
+                dic['type'] = s[ii:jj].replace('@', '')
+                dic['alias'] = s[jj:kk].replace('{', '')
+            else:
+                if s:
+                    # print(s, sorted(rfindall(s, '=')))
+                    ii = sorted(rfindall(s, '='))[0]
+                    if s[-1] == ',':
+                        s = s[:-1]
+                    out = LatexNodes2Text().latex_to_text(s[ii+1:]).strip()
+                    dic[s[:ii].strip()] = out
+                
+        for i in lst:
+            new_lst.append(LatexNodes2Text().latex_to_text(i))
+            
+        return dic
\ No newline at end of file

From e13e128c2fa44db17fb1bb114f9da126924c6872 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 22:50:53 -0500
Subject: [PATCH 19/31] Refactor code to use other modules (#52)

---
 pandarize/frame.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/pandarize/frame.py b/pandarize/frame.py
index 56b1f73..ea7c1f8 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -1,40 +1,37 @@
 import pandas as pd
-from pandarize._util import *
+from ._util import *
+from .loader import Loader
+from .parser import Parser
 
-
-class Pandarizer:
+class Pandarizer(Loader, Parser):
     def __init__(self):
-        self.raw = None
-        self.df = None
         self.idxkey = None
-        self.settings = None
-        
         
     def initialize(self, yaml=False, path=None):
         '''Initializes the setting either for the first time by
         loading a default yaml config file in system dir or 
         load from an user-specified existing the file in `path`
         '''
-        self.settings = load_config(yaml=yaml, path=path)
+        self.load_config(yaml=yaml, path=path)
 
     def load(self, source=None, savefile=None):
         '''Loads raw data from either local file or the url
         '''
-        self.raw = source_loader(source=source, savefile=savefile)
-        self.raw = bib_preprocessing(raw=self.raw)
+        self.source_loader(source=source, savefile=savefile)
+        self.bib_preprocessing()
         self.idxkey = rfindall_matched(self.raw, r'[.*]?@[^}]*{*[,]', '@')
 
     def fit(self, kind='bib', postprocess=False):
         '''Method that infers data structure (in the future)
         '''
         if kind == 'bib':
-            self.df = bib_parser(raw=self.raw, idxkey=self.idxkey, postprocess=postprocess)
+            self.bib_parser(raw=self.raw, idxkey=self.idxkey, postprocess=postprocess)
 
     def transform(self, formats='bib', types=None, alias=None, dirs=None):
         '''Transform loaded data into a specified data type
         '''
         if formats == 'bib':
-            bib_writer(df=self.df, types=types, alias=alias, dirs=dirs)
+            self.bib_writer(df=self.df, types=types, alias=alias, dirs=dirs)
             
     def describe(self):
         '''Generates basic metadata'''

From a4a8f2dba68efb8a3cfd9b0d33a11c627da60c2e Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 22:52:14 -0500
Subject: [PATCH 20/31] Move functions to their corresponding modules (#52)

---
 pandarize/_util.py | 203 ---------------------------------------------
 1 file changed, 203 deletions(-)

diff --git a/pandarize/_util.py b/pandarize/_util.py
index 35563c9..dae8fcc 100644
--- a/pandarize/_util.py
+++ b/pandarize/_util.py
@@ -1,65 +1,5 @@
-import requests
-import pandas as pd
 from datetime import datetime
-from pylatexenc.latex2text import LatexNodes2Text
 import re
-import os
-import yaml as pyyaml
-import pkgutil
-
-def source_loader(source, savefile):
-    if check_url(string=source):
-        r = requests.get(url=source)
-        r = r.content
-    else:
-        try:
-            with open(source, 'r', encoding='UTF-8', newline='') as f:
-                r = f.read()
-        except Exception as e:
-            print('Error while reading from local file')
-
-    if isinstance(r, bytes):
-        raw = r.decode('utf-8')
-    elif isinstance(r, str):
-        raw = r
-    else:
-        raise Exception('The source cannot be parsed')
-
-    if savefile:
-        folder, files = os.path.split(savefile)
-        if not os.path.exists(path=folder):
-            os.mkdir(path=folder)
-
-        with open(savefile, 'w', encoding='UTF-8', newline='') as f:
-            f.write(raw)
-
-    return raw
-
-def validate_config(obj):
-    '''Validates yaml config files'''
-    pass
-
-def load_config(yaml, path, ftype='bib'):
-    '''Loads yaml config file and returns a yaml object'''
-    def load(data):
-        try:
-            dic = {}
-            for i in pyyaml.safe_load(data)[ftype]:
-                for key, val in i.items():
-                    dic[key] = val
-            
-            print('Configuration applied. Please change the setting via <class object>.settings as needed.')
-            return dic
-            
-        except:
-            print('The config file is either not found or corrupted.')
-    
-    if yaml and path:
-        with open(path) as f:
-            return load(f)
-    else:
-        data = pkgutil.get_data(__name__, "/config/config.yaml").decode('utf-8')
-        return load(data)
 
 def rfindall(string, pattern):
     '''Find index of all occurrence of the pattern'''
@@ -92,99 +32,10 @@ def rfindall_matched(string, pattern, key):
        match_index.append(match.start() + match.group().rfind(key))
     return match_index
 
-def bib_preprocessing(raw):
-    '''Pre-processes raw bib file'''
-    
-    raw = raw.replace('\n', '').replace('\r', '') #remove linebreaks and linefeed
-    raw = re.sub(' +', ' ', raw) #contract whitespace
-    
-    return raw
-
-def bib_parser(raw, idxkey, postprocess):
-    '''Main bib parsing logic'''
-    all_lst = []
-    lst = []
-    start = None
-    standby = None
-
-    for i, c in enumerate(raw):
-        if c == '@':
-            if not i in idxkey: #skip if not true start
-                continue
-            
-            if lst:
-                # fixes cases when extra comma is added to the last key:value item
-                fix = raw[curr_idx:last_pair-2] + raw[last_pair-2:last_pair+1].replace(',', '')
-                lst.append(fix) #edge case for last key:value pair
-                all_lst.append(_itemize_bib(lst))
-            lst = []
-            curr_idx = i
-            start = True
-        elif c == ',' and start:
-            lst.append(raw[curr_idx:i+1])
-            start = False
-            curr_idx = i+1
-        elif c == '}' and i != len(raw)-1:
-            last_pair = i #catches last pair and saves position as index
-            standby = True
-        elif c == ',' and standby:
-            # second check to account for misused bracket edge cases
-            # e.g., author = {A and B and C and {D} and F}
-            standby = False
-            
-            for check_i in raw[i+1:]:
-                if check_i == '}':
-                    break
-                elif check_i == '=':
-                    if raw[curr_idx:i+1]:
-                        lst.append(raw[curr_idx:i+1]) #remove linebreak
-                        curr_idx = i+1
-                    else:
-                        break
-        elif i == len(raw)-1:
-            lst.append(raw[curr_idx:i+1])
-            all_lst.append(_itemize_bib(lst))
-        elif c == ' ':
-            pass
-        else:
-            standby = False
-
-    df = pd.DataFrame(all_lst)
-    if postprocess:
-        df = postprocessing(df)
-
-    return df
-
 def truncate_names(srs):
     '''Truncates names in Pandas series'''
     pass
 
-def _itemize_bib(lst):
-    '''Itemizes bib structured string into a json format'''
-    new_lst = []
-    dic = {}
-
-    for i, s in enumerate(lst):
-        if i == 0:
-            ii = s.rfind('@')
-            jj = s.rfind('{')
-            kk = s.rfind(',')
-            dic['type'] = s[ii:jj].replace('@', '')
-            dic['alias'] = s[jj:kk].replace('{', '')
-        else:
-            if s:
-                # print(s, sorted(rfindall(s, '=')))
-                ii = sorted(rfindall(s, '='))[0]
-                if s[-1] == ',':
-                    s = s[:-1]
-                out = LatexNodes2Text().latex_to_text(s[ii+1:]).strip()
-                dic[s[:ii].strip()] = out
-            
-    for i in lst:
-        new_lst.append(LatexNodes2Text().latex_to_text(i))
-        
-    return dic
-
 def check_string(string):
     '''Screens for misinterpreted strings that interferes parsing (deprecated)'''
     
@@ -230,14 +81,6 @@ def manual_drop(raw, keys):
     
     return raw
 
-def postprocessing(df):
-    '''Post-process of constructed pandas DataFrame. Runs multiple checks.'''
-    
-    # Author Name Check for Biber
-    df['author'] = df['author'].apply(lambda x: convert_names(x))
-    
-    return df
-
 def check_names(string, sep, connector):
     '''Checks for valid author names'''
     if connector in string:
@@ -283,51 +126,5 @@ def convert_names(string, sep=',', connector='and'):
             print(f'{e} for {nms} at {i}th index')
             
     # conditional here for truncate author list
-    
             
     return names
-
-def bib_writer(df, types, alias, dirs):
-    '''bib writer and formatter that converts pandas 
-    dataframe into a bib file
-    '''
-
-    def parse(row, types=types, alias=alias):
-        items = []
-
-        for i, (idx, item) in enumerate(zip(row.index, row)):
-            if pd.isnull(item) or item == '':
-                continue
-            item = str(item)
-            if idx == types:
-                header = f'@{item}' + '{'
-            elif idx == alias:
-                alias = item + ',\n'
-            else:
-                item_i = f'\t{idx} = ' + '{' + f'{item}' + '},\n'
-                items.append(item_i)
-
-        out_text = header + alias
-        for i in items:
-            out_text += i
-        out_text = out_text[:-2] #remove last comma
-        out_text += '\n},\n'
-
-        return out_text
-
-    N = df.shape[0]
-
-    # Add stamper before the first header
-    out = stamper(target='bib')
-
-    for i in range(N):
-        if i == N-1: #remove the very last comma
-            out += parse(df.iloc[i,:])[:-3] + parse(df.iloc[i,:])[-3:].replace(',', '') + '\n'
-        else:
-            out += parse(df.iloc[i,:]) + '\n'
-
-    if not os.path.exists(path=dirs):
-        os.mkdir(path=dirs)
-
-    with open(f'{dirs}output.bib', 'w', encoding='utf-8') as f:
-        f.write(out)
\ No newline at end of file

From 8248ba5df2c18d28b53fe72a62b61a76da75a0c7 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 22:54:26 -0500
Subject: [PATCH 21/31] Remove return statement and save as class variables
 (#52)

---
 pandarize/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandarize/loader.py b/pandarize/loader.py
index 38ef81e..649222e 100644
--- a/pandarize/loader.py
+++ b/pandarize/loader.py
@@ -58,7 +58,7 @@ def load(data):
         
         if yaml and path:
             with open(path) as f:
-                return load(f)
+                self.settings = load(f)
         else:
             data = pkgutil.get_data(__name__, "/config/config.yaml").decode('utf-8')
             self.settings = load(data)
\ No newline at end of file

From 1a0a103f3f76c32adac3a764af9db6df726aa36e Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 23:39:55 -0500
Subject: [PATCH 22/31] Refactor `bib_parser` to use class variable (#52)

---
 pandarize/frame.py  | 4 ++--
 pandarize/parser.py | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandarize/frame.py b/pandarize/frame.py
index ea7c1f8..f3040e1 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -19,13 +19,13 @@ def load(self, source=None, savefile=None):
         '''
         self.source_loader(source=source, savefile=savefile)
         self.bib_preprocessing()
-        self.idxkey = rfindall_matched(self.raw, r'[.*]?@[^}]*{*[,]', '@')
+        # self.idxkey = rfindall_matched(self.raw, r'[.*]?@[^}]*{*[,]', '@')
 
     def fit(self, kind='bib', postprocess=False):
         '''Method that infers data structure (in the future)
         '''
         if kind == 'bib':
-            self.bib_parser(raw=self.raw, idxkey=self.idxkey, postprocess=postprocess)
+            self.bib_parser(postprocess=postprocess)
 
     def transform(self, formats='bib', types=None, alias=None, dirs=None):
         '''Transform loaded data into a specified data type
diff --git a/pandarize/parser.py b/pandarize/parser.py
index 46671d1..1cac45c 100644
--- a/pandarize/parser.py
+++ b/pandarize/parser.py
@@ -1,4 +1,6 @@
 import re
+import os
+import pandas as pd
 from ._util import *
 from pylatexenc.latex2text import LatexNodes2Text
 
@@ -23,12 +25,14 @@ def postprocessing(self, df):
         
         return df
 
-    def bib_parser(self, raw, idxkey, postprocess):
+    def bib_parser(self, postprocess):
         '''Main bib parsing logic'''
         all_lst = []
         lst = []
         start = None
         standby = None
+        raw = self.raw
+        idxkey = rfindall_matched(raw, r'[.*]?@[^}]*{*[,]', '@')
 
         for i, c in enumerate(raw):
             if c == '@':

From 4bebdd61ed54acb13e688f739d3924bdc84bec7b Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 23:41:50 -0500
Subject: [PATCH 23/31] Refactor `bib_writer` to use class variable (#52)

---
 pandarize/frame.py  | 2 +-
 pandarize/parser.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandarize/frame.py b/pandarize/frame.py
index f3040e1..4c14144 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -31,7 +31,7 @@ def transform(self, formats='bib', types=None, alias=None, dirs=None):
         '''Transform loaded data into a specified data type
         '''
         if formats == 'bib':
-            self.bib_writer(df=self.df, types=types, alias=alias, dirs=dirs)
+            self.bib_writer(types=types, alias=alias, dirs=dirs)
             
     def describe(self):
         '''Generates basic metadata'''
diff --git a/pandarize/parser.py b/pandarize/parser.py
index 1cac45c..648ef4b 100644
--- a/pandarize/parser.py
+++ b/pandarize/parser.py
@@ -82,11 +82,13 @@ def bib_parser(self, postprocess):
 
         self.df = df
     
-    def bib_writer(self, df, types, alias, dirs):
+    def bib_writer(self, types, alias, dirs):
         '''bib writer and formatter that converts pandas 
         dataframe into a bib file
         '''
 
+        df = self.df
+
         def parse(row, types=types, alias=alias):
             items = []
 

From 89ceef3be593b3eba9dfcbd8fcf40cc4e89b0e14 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 23:43:54 -0500
Subject: [PATCH 24/31] Remove frame init (#52)

---
 pandarize/frame.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pandarize/frame.py b/pandarize/frame.py
index 4c14144..9201412 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -4,9 +4,7 @@
 from .parser import Parser
 
 class Pandarizer(Loader, Parser):
-    def __init__(self):
-        self.idxkey = None
-        
+
     def initialize(self, yaml=False, path=None):
         '''Initializes the setting either for the first time by
         loading a default yaml config file in system dir or 
@@ -19,7 +17,6 @@ def load(self, source=None, savefile=None):
         '''
         self.source_loader(source=source, savefile=savefile)
         self.bib_preprocessing()
-        # self.idxkey = rfindall_matched(self.raw, r'[.*]?@[^}]*{*[,]', '@')
 
     def fit(self, kind='bib', postprocess=False):
         '''Method that infers data structure (in the future)
@@ -46,5 +43,3 @@ def describe(self):
             
         print(f'''The loaded file has {self.df.shape[0]} rows and {self.df.shape[1]} columns.\n
               ''')
-        
-        ## Config Setting
\ No newline at end of file

From 2481cce731efb5353905db4ef3eaf6c80d1aa181 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 23:47:38 -0500
Subject: [PATCH 25/31] Add comments for each switch (#51)

---
 pandarize/config/config.yaml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandarize/config/config.yaml b/pandarize/config/config.yaml
index 0dfd0ba..c1c191c 100644
--- a/pandarize/config/config.yaml
+++ b/pandarize/config/config.yaml
@@ -1,5 +1,6 @@
 bib:
-  - convert_names: True
-  - remove_latex: True
-  - remove_empty_entries: True
+  - convert_names: True #change name format to Last, First MI
+  - remove_latex: True #remove latex syntax
+  - remove_html: False #TODO: remove html tags
+  - remove_empty_entries: True #empty entries are removed after `transform`
   - truncate_author_list: False #applied after `fit`
\ No newline at end of file

From ce55b3b9684e6daad74b52be7b952545733a066a Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 23:53:03 -0500
Subject: [PATCH 26/31] Add conditional for `convert_names` option (#51)

---
 pandarize/parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandarize/parser.py b/pandarize/parser.py
index 648ef4b..0a11e14 100644
--- a/pandarize/parser.py
+++ b/pandarize/parser.py
@@ -15,13 +15,13 @@ def bib_preprocessing(self):
         raw = re.sub(' +', ' ', raw) #contract whitespace
         
         self.raw = raw
-        # return raw
 
     def postprocessing(self, df):
         '''Post-process of constructed pandas DataFrame. Runs multiple checks.'''
         
         # Author Name Check for Biber
-        df['author'] = df['author'].apply(lambda x: convert_names(x))
+        if self.settings['convert_names']:
+            df['author'] = df['author'].apply(lambda x: convert_names(x))
         
         return df
 

From f9103bc9cafc4a4f6990154a41ed74375ebf2b35 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Fri, 30 Dec 2022 23:54:28 -0500
Subject: [PATCH 27/31] Add comments (#51)

---
 pandarize/config/config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandarize/config/config.yaml b/pandarize/config/config.yaml
index c1c191c..5d0dfb8 100644
--- a/pandarize/config/config.yaml
+++ b/pandarize/config/config.yaml
@@ -3,4 +3,4 @@ bib:
   - remove_latex: True #remove latex syntax
   - remove_html: False #TODO: remove html tags
   - remove_empty_entries: True #empty entries are removed after `transform`
-  - truncate_author_list: False #applied after `fit`
\ No newline at end of file
+  - truncate_author_list: False #shorten names in the list by only stating the first person followed by optional `suffix`
\ No newline at end of file

From 26560686b1945746d4b0bb904ed7e892da6d8535 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sat, 31 Dec 2022 00:14:21 -0500
Subject: [PATCH 28/31] Removed latex option (#51)

---
 pandarize/config/config.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandarize/config/config.yaml b/pandarize/config/config.yaml
index 5d0dfb8..5ab03b8 100644
--- a/pandarize/config/config.yaml
+++ b/pandarize/config/config.yaml
@@ -1,6 +1,5 @@
 bib:
   - convert_names: True #change name format to Last, First MI
-  - remove_latex: True #remove latex syntax
   - remove_html: False #TODO: remove html tags
   - remove_empty_entries: True #empty entries are removed after `transform`
   - truncate_author_list: False #shorten names in the list by only stating the first person followed by optional `suffix`
\ No newline at end of file

From bc8741399a1284ef4b49e3b9426126d88ec760a2 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sat, 31 Dec 2022 00:14:48 -0500
Subject: [PATCH 29/31] Removed outdated lines (#51)

---
 pandarize/parser.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pandarize/parser.py b/pandarize/parser.py
index 0a11e14..7e9aacc 100644
--- a/pandarize/parser.py
+++ b/pandarize/parser.py
@@ -144,14 +144,10 @@ def _itemize_bib(lst):
                 dic['alias'] = s[jj:kk].replace('{', '')
             else:
                 if s:
-                    # print(s, sorted(rfindall(s, '=')))
                     ii = sorted(rfindall(s, '='))[0]
                     if s[-1] == ',':
                         s = s[:-1]
                     out = LatexNodes2Text().latex_to_text(s[ii+1:]).strip()
                     dic[s[:ii].strip()] = out
-                
-        for i in lst:
-            new_lst.append(LatexNodes2Text().latex_to_text(i))
-            
+
         return dic
\ No newline at end of file

From e3a10f9ab296b407aa2b4305bf0cd0307c88d5d6 Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sat, 31 Dec 2022 01:09:41 -0500
Subject: [PATCH 30/31] Add `remove_empty_entries` option and fix bug (#51)

---
 pandarize/parser.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandarize/parser.py b/pandarize/parser.py
index 7e9aacc..b0c57e2 100644
--- a/pandarize/parser.py
+++ b/pandarize/parser.py
@@ -69,7 +69,8 @@ def bib_parser(self, postprocess):
                         else:
                             break
             elif i == len(raw)-1:
-                lst.append(raw[curr_idx:i+1])
+                fix = raw[curr_idx:-3] + raw[-3:].replace(',', '')
+                lst.append(fix)
                 all_lst.append(self._itemize_bib(lst))
             elif c == ' ':
                 pass
@@ -93,7 +94,7 @@ def parse(row, types=types, alias=alias):
             items = []
 
             for i, (idx, item) in enumerate(zip(row.index, row)):
-                if pd.isnull(item) or item == '':
+                if pd.isnull(item) or item == '' and self.settings['remove_empty_entries']:
                     continue
                 item = str(item)
                 if idx == types:

From 38a3a4ee5e42370698cdd0851b10a15d41ade2fb Mon Sep 17 00:00:00 2001
From: Jong Shin <jshin.m@gmail.com>
Date: Sat, 31 Dec 2022 01:11:36 -0500
Subject: [PATCH 31/31] Update version (#50, #51, #52)

---
 pandarize/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandarize/__init__.py b/pandarize/__init__.py
index 7d53ea3..71eb32d 100644
--- a/pandarize/__init__.py
+++ b/pandarize/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.7"
\ No newline at end of file
+__version__ = "0.0.8"
\ No newline at end of file