diff --git a/.github/workflows/publish-test.yml b/.github/workflows/publish-test.yml index 2128983..7199b5f 100644 --- a/.github/workflows/publish-test.yml +++ b/.github/workflows/publish-test.yml @@ -3,7 +3,7 @@ on: pull_request: types: [closed] paths: - - 'pandarize/__init__.py' + - 'bibReader/__init__.py' jobs: deploy: runs-on: ubuntu-latest diff --git a/README.md b/README.md index 913ad3e..18d9b08 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,18 @@ -# Pandarize -This project aims to turn all kinds of data structure/types into a nice tabulated pandas DataFrame +[![PyPI](https://github.com/jshinm/pandarize/actions/workflows/publish-package.yml/badge.svg)](https://github.com/jshinm/pandarize/actions/workflows/publish-package.yml) +# bibReader +The bibReader reads non-standard bib format from files/url and convert them into pandas DataFrames to easily work with the data, and then tranforms them back into a standardized bib file. # Installation ``` -pip install pandarize +pip install bibReader ``` # Basic Usage Guide ```python -from pandarize.frame import Pandarizer +from bibReader.frame import bReader -pdr = Pandarizer() #instantiate Pandarizer class -pdr.load(source='https://somewebsite.com/filename.bib') #it can load from url or local source -pdr.fit() #infers data types and parse it into pandas dataframe -pdr.transform() #changes pandas dataframe into different mode of data types +bib = bReader() #instantiate bReader class +bib.load(source='https://somewebsite.com/filename.bib') #it can load from url or local source +bib.fit() #infers data types and parse it into pandas dataframe +bib.transform() #changes pandas dataframe into different mode of data types ``` - -# Currently Supported Data Types -- bib - diff --git a/pandarize/__init__.py b/bibReader/__init__.py similarity index 100% rename from pandarize/__init__.py rename to bibReader/__init__.py diff --git a/pandarize/_util.py b/bibReader/_util.py similarity index 95% rename from pandarize/_util.py rename to bibReader/_util.py index 78c8df1..ed635db 100644 --- a/pandarize/_util.py +++ b/bibReader/_util.py @@ -1,5 +1,6 @@ from datetime import datetime import re +import bibReader def rfindall(string, pattern): '''Find index of all occurrence of the pattern''' @@ -62,13 +63,13 @@ def check_url(string): return False -def stamper(target, name, url, marker='%'): +def stamper(target, marker='%'): '''Creates head stamp on the transformed dataframe ''' msg = f'{marker}'*60 + '\n' - msg += f'{marker} This {target} file is created and processed by {name}\n' + msg += f'{marker} This {target} file is created and processed by bibReader-{bibReader.__version__}\n' msg += f'{marker} Date: {datetime.today().date()}\n' - msg += f'{marker} Webpage: {url}\n' + msg += f'{marker} Webpage: https://github.com/jshinm/bibReader/\n' msg += f'{marker}'*60 + '\n\n' return msg diff --git a/pandarize/config/config.yaml b/bibReader/config/config.yaml similarity index 100% rename from pandarize/config/config.yaml rename to bibReader/config/config.yaml diff --git a/pandarize/frame.py b/bibReader/frame.py similarity index 97% rename from pandarize/frame.py rename to bibReader/frame.py index 263818a..6c07a6e 100644 --- a/pandarize/frame.py +++ b/bibReader/frame.py @@ -3,7 +3,7 @@ from .loader import Loader from .parser import Parser -class Pandarizer(Loader, Parser): +class bReader(Loader, Parser): def __init__(self): self.initialize() diff --git a/pandarize/loader.py b/bibReader/loader.py similarity index 100% rename from pandarize/loader.py rename to bibReader/loader.py diff --git a/pandarize/parser.py b/bibReader/parser.py similarity index 100% rename from pandarize/parser.py rename to bibReader/parser.py diff --git a/examples/ndd_member_update.ipynb b/examples/ndd_member_update.ipynb index da36cbf..ee2e8d6 100644 --- a/examples/ndd_member_update.ipynb +++ b/examples/ndd_member_update.ipynb @@ -16,7 +16,7 @@ { "data": { "text/plain": [ - "'0.0.8.7'" + "'0.0.8.12'" ] }, "execution_count": 1, @@ -25,10 +25,10 @@ } ], "source": [ - "from pandarize.frame import Pandarizer\n", + "from bibReader.frame import bReader\n", "import pandas as pd\n", - "import pandarize\n", - "pandarize.__version__" + "import bibReader\n", + "bibReader.__version__" ] }, { @@ -45,8 +45,7 @@ } ], "source": [ - "pdr = Pandarizer()\n", - "pdr.initialize()" + "bib = bReader()" ] }, { @@ -55,8 +54,8 @@ "metadata": {}, "outputs": [], "source": [ - "pdr.settings['convert_names'] = False\n", - "pdr.settings['remove_empty_entries'] = False" + "bib.settings['convert_names'] = False\n", + "bib.settings['remove_empty_entries'] = False" ] }, { @@ -65,8 +64,8 @@ "metadata": {}, "outputs": [], "source": [ - "pdr.load('https://raw.githubusercontent.com/neurodata/neurodata.io/deploy/content/bibs/people.bib')\n", - "pdr.fit()" + "bib.load('https://raw.githubusercontent.com/neurodata/neurodata.io/deploy/content/bibs/people.bib')\n", + "bib.fit()" ] }, { @@ -186,7 +185,7 @@ " 09/20 –\n", " \n", " Currently investigating the effect of inductiv...\n", - " MS\n", + " MSE\n", " BME, JHU\n", " staff\n", " staffresearch\n", @@ -265,7 +264,7 @@ " abstract userb \\\n", "0 \n", "1 \n", - "2 Currently investigating the effect of inductiv... MS \n", + "2 Currently investigating the effect of inductiv... MSE \n", "3 Developed Sparse Projection Oblique Randomer F... MSE \n", "4 Javier studies how larval zebrafish learn how ... Ph.D. \n", "\n", @@ -290,7 +289,7 @@ } ], "source": [ - "pdr.df.head()" + "bib.df.head()" ] }, { @@ -299,7 +298,7 @@ "metadata": {}, "outputs": [], "source": [ - "pdr.df.to_excel(filepath, index=False, engine='xlsxwriter')" + "bib.df.to_excel(filepath, index=False, engine='xlsxwriter')" ] }, { @@ -315,7 +314,7 @@ "metadata": {}, "outputs": [], "source": [ - "# pdr.df = pd.read_excel(ufilepath, dtype=object, na_filter=False)" + "bib.df = pd.read_excel(ufilepath, dtype=object, na_filter=False)" ] }, { @@ -425,13 +424,13 @@ " 09/20 –\n", " \n", " Currently investigating the effect of inductiv...\n", - " MS\n", + " MSE\n", " BME, JHU\n", " staff\n", " staffresearch\n", " jshinm\n", " jshin69@jhu.edu\n", - " \n", + " modrev.org/jshinm\n", " safe-zone\n", " jong_shin.png\n", " 2.0\n", @@ -504,7 +503,7 @@ " abstract userb \\\n", "0 \n", "1 \n", - "2 Currently investigating the effect of inductiv... MS \n", + "2 Currently investigating the effect of inductiv... MSE \n", "3 Developed Sparse Projection Oblique Randomer F... MSE \n", "4 Javier studies how larval zebrafish learn how ... Ph.D. \n", "\n", @@ -518,7 +517,7 @@ " url usere file priority \n", "0 neurodata.io/about/jovo/ safe-zone vogelstein_joshua.jpg 0.0 \n", "1 priebe_carey.jpg 1.0 \n", - "2 safe-zone jong_shin.png 2.0 \n", + "2 modrev.org/jshinm safe-zone jong_shin.png 2.0 \n", "3 tomita_tyler.jpg 3.0 \n", "4 safe-zone javier_how.jpg 3.0 " ] @@ -529,26 +528,26 @@ } ], "source": [ - "pdr.df.head(5)" + "bib.df.head(5)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "pdr.df.priority = pdr.df.priority.astype(float)\n", - "pdr.df.sort_values(['priority'], ascending=True, inplace=True)" + "bib.df.priority = bib.df.priority.astype(float)\n", + "bib.df.sort_values(['priority'], ascending=True, inplace=True)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "pdr.transform(filename='people')" + "bib.transform(filename='people')" ] }, { diff --git a/examples/ndd_pubs_update.ipynb b/examples/ndd_pubs_update.ipynb index e5d4a7f..3e204e7 100644 --- a/examples/ndd_pubs_update.ipynb +++ b/examples/ndd_pubs_update.ipynb @@ -16,7 +16,7 @@ { "data": { "text/plain": [ - "'0.0.8.7'" + "'0.0.8.12'" ] }, "execution_count": 1, @@ -25,22 +25,29 @@ } ], "source": [ - "from pandarize.frame import Pandarizer\n", - "import pandarize\n", + "from bibReader.frame import bReader\n", "import pandas as pd\n", - "pandarize.__version__" + "import bibReader\n", + "bibReader.__version__" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Configuration applied. Please change the setting via .settings as needed.\n" + ] + } + ], "source": [ - "pdr = Pandarizer()\n", - "pdr.load('https://raw.githubusercontent.com/neurodata/neurodata.io/deploy/content/bibs/pubs.bib')\n", - "# pdr.load('pubs.bib') #for local file\n", - "pdr.fit()" + "bib = bReader()\n", + "bib.load('https://raw.githubusercontent.com/neurodata/neurodata.io/deploy/content/bibs/pubs.bib')\n", + "bib.fit()" ] }, { @@ -59,7 +66,7 @@ "metadata": {}, "outputs": [], "source": [ - "pdr.df.to_excel(filepath, index=False, engine='xlsxwriter')" + "bib.df.to_excel(filepath, index=False, engine='xlsxwriter')" ] }, { @@ -71,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -106,16 +113,16 @@ " month\n", " address\n", " ...\n", - " adsnote\n", + " elocation-id\n", " urldate\n", - " tag\n", " howpublished\n", + " tag\n", + " note\n", + " day\n", " annotation\n", " acmid\n", " articleno\n", " issue_date\n", - " note\n", - " day\n", " \n", " \n", " \n", @@ -126,19 +133,19 @@ ], "text/plain": [ "Empty DataFrame\n", - "Columns: [type, alias, title, author, author+an, year, keywords, url, month, address, journal, pages, volume, number, booktitle, doi, isbn, abstract, organization, publisher, issn, pmid, numpages, location, series, eprint, archiveprefix, arxivid, editor, chapter, annote, archivePrefix, primaryClass, primaryclass, eprinttype, copyright, URL, elocation-id, shorttitle, eid, adsurl, adsnote, urldate, tag, howpublished, annotation, acmid, articleno, issue_date, note, day]\n", + "Columns: [type, alias, title, author, author+an, year, keywords, url, month, address, volume, pages, number, journal, booktitle, doi, isbn, abstract, issn, eprint, archiveprefix, arxivid, chapter, publisher, pmid, annote, numpages, location, series, organization, editor, primaryclass, eprinttype, copyright, shorttitle, archivePrefix, primaryClass, eid, adsurl, adsnote, URL, elocation-id, urldate, howpublished, tag, note, day, annotation, acmid, articleno, issue_date]\n", "Index: []\n", "\n", "[0 rows x 51 columns]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pdr.df[pdr.df['alias'].duplicated()]" + "bib.df[bib.df['alias'].duplicated()]" ] }, { @@ -150,16 +157,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# pdr.df = pd.read_excel(ufilepath, dtype=object, na_filter=False)" + "bib.df = pd.read_excel(ufilepath, dtype=object, na_filter=False)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -194,16 +201,16 @@ " month\n", " address\n", " ...\n", - " adsnote\n", + " elocation-id\n", " urldate\n", - " tag\n", " howpublished\n", + " tag\n", + " note\n", + " day\n", " annotation\n", " acmid\n", " articleno\n", " issue_date\n", - " note\n", - " day\n", " \n", " \n", " \n", @@ -220,112 +227,112 @@ " 6\n", " OHBM, Rome Italy\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " 1\n", " inproceedings\n", - " Allen2015synaptome\n", - " The Open Synaptome Project: Toward a Microscop...\n", - " Smith, Stephen J. and Burns, Randal and Chevil...\n", - " 8=highlight\n", - " 2015\n", + " ThomasNeuro2020\n", + " Automated Neuron Tracing of Sparse Fluorescent...\n", + " Athey, Thomas L and Sulam, Jeremias and Vogels...\n", + " 3=highlight; 1=trainee\n", + " 2020\n", " abspos\n", - " https://figshare.com/articles/Open_Synaptome_P...\n", - " 10\n", - " Society for Neuroscience, Chicago, IL, USA\n", + " \n", + " 11\n", + " Neuromatch 3\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " 2\n", " inproceedings\n", - " XBrain2015\n", - " X-Brain: Quantifying Mesoscale Neuroanatomy Us...\n", - " Deyer, Eva L. and Fernandes, Hugo L. and Ronca...\n", - " 5=highlight;3=trainee\n", - " 2015\n", + " hayden_naisys_2020\n", + " A Biological Implementation of Lifelong Learni...\n", + " Vogelstein, Joshua T. and Helm, Hayden and Ped...\n", + " 2=trainee;3=trainee;4=trainee;1=highlight\n", + " 2020\n", " abspos\n", - " https://figshare.com/articles/X_Brain_Quantify...\n", - " NaN\n", - " Figshare\n", + " \n", + " 11\n", + " NAIsys, Cold Spring Harbor, NY, USA\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " 3\n", " inproceedings\n", - " Design2015\n", - " Optimal Design for Discovery Science: Applicat...\n", - " Wang, Shangsi and Yang, Zhi and Zuo, Xi-Nian a...\n", - " 1=trainee;7=highlight\n", - " 2015\n", + " berlin_2017\n", + " Processing and Analyzing Terascale Conjugate A...\n", + " Baden, Alex and Perlman, Eric and Collman, For...\n", + " 1=trainee;5=highlight\n", + " 2017\n", " abspos\n", - " https://figshare.com/articles/Optimal_Design_f...\n", - " NaN\n", - " Figshare\n", + " https://neurodata.io/talks/berlin_2017.pdf\n", + " \n", + " Berlin, Germany\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " 4\n", " inproceedings\n", - " Sparse2015\n", - " A Sparse High Dimensional State-Space Model wi...\n", - " Chen, Shaojie and Liu, Kai and Yuguang, Yang a...\n", - " 1=trainee;7=highlight\n", - " 2015\n", + " falk_open_data2019\n", + " NeuroData's Open Data Cloud Ecosystem\n", + " Falk, Benjamin and Vogelstein, Joshua T.\n", + " 2=highlight\n", + " 2019\n", " abspos\n", - " https://figshare.com/articles/A_Sparse_High_Di...\n", - " NaN\n", - " Figshare\n", + " https://neurodata.io/talks/25_NeuroDatas_Open_...\n", + " 7\n", + " Harvard University, Cambridge, MA, USA\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "\n", @@ -335,78 +342,78 @@ "text/plain": [ " type alias \\\n", "0 inproceedings j12019 \n", - "1 inproceedings Allen2015synaptome \n", - "2 inproceedings XBrain2015 \n", - "3 inproceedings Design2015 \n", - "4 inproceedings Sparse2015 \n", + "1 inproceedings ThomasNeuro2020 \n", + "2 inproceedings hayden_naisys_2020 \n", + "3 inproceedings berlin_2017 \n", + "4 inproceedings falk_open_data2019 \n", "\n", " title \\\n", "0 Clustering Multi-Modal Connectomes \n", - "1 The Open Synaptome Project: Toward a Microscop... \n", - "2 X-Brain: Quantifying Mesoscale Neuroanatomy Us... \n", - "3 Optimal Design for Discovery Science: Applicat... \n", - "4 A Sparse High Dimensional State-Space Model wi... \n", + "1 Automated Neuron Tracing of Sparse Fluorescent... \n", + "2 A Biological Implementation of Lifelong Learni... \n", + "3 Processing and Analyzing Terascale Conjugate A... \n", + "4 NeuroData's Open Data Cloud Ecosystem \n", "\n", " author \\\n", "0 Chung, Jaewon and Pedigo, Benjamin D. and Prie... \n", - "1 Smith, Stephen J. and Burns, Randal and Chevil... \n", - "2 Deyer, Eva L. and Fernandes, Hugo L. and Ronca... \n", - "3 Wang, Shangsi and Yang, Zhi and Zuo, Xi-Nian a... \n", - "4 Chen, Shaojie and Liu, Kai and Yuguang, Yang a... \n", + "1 Athey, Thomas L and Sulam, Jeremias and Vogels... \n", + "2 Vogelstein, Joshua T. and Helm, Hayden and Ped... \n", + "3 Baden, Alex and Perlman, Eric and Collman, For... \n", + "4 Falk, Benjamin and Vogelstein, Joshua T. \n", "\n", - " author+an year keywords \\\n", - "0 1=trainee;2=trainee;4=highlight 2019 abspos \n", - "1 8=highlight 2015 abspos \n", - "2 5=highlight;3=trainee 2015 abspos \n", - "3 1=trainee;7=highlight 2015 abspos \n", - "4 1=trainee;7=highlight 2015 abspos \n", + " author+an year keywords \\\n", + "0 1=trainee;2=trainee;4=highlight 2019 abspos \n", + "1 3=highlight; 1=trainee 2020 abspos \n", + "2 2=trainee;3=trainee;4=trainee;1=highlight 2020 abspos \n", + "3 1=trainee;5=highlight 2017 abspos \n", + "4 2=highlight 2019 abspos \n", "\n", " url month \\\n", "0 https://figshare.com/articles/Clustering_Multi... 6 \n", - "1 https://figshare.com/articles/Open_Synaptome_P... 10 \n", - "2 https://figshare.com/articles/X_Brain_Quantify... NaN \n", - "3 https://figshare.com/articles/Optimal_Design_f... NaN \n", - "4 https://figshare.com/articles/A_Sparse_High_Di... NaN \n", + "1 11 \n", + "2 11 \n", + "3 https://neurodata.io/talks/berlin_2017.pdf \n", + "4 https://neurodata.io/talks/25_NeuroDatas_Open_... 7 \n", "\n", - " address ... adsnote urldate tag \\\n", - "0 OHBM, Rome Italy ... NaN NaN NaN \n", - "1 Society for Neuroscience, Chicago, IL, USA ... NaN NaN NaN \n", - "2 Figshare ... NaN NaN NaN \n", - "3 Figshare ... NaN NaN NaN \n", - "4 Figshare ... NaN NaN NaN \n", + " address ... elocation-id urldate \\\n", + "0 OHBM, Rome Italy ... \n", + "1 Neuromatch 3 ... \n", + "2 NAIsys, Cold Spring Harbor, NY, USA ... \n", + "3 Berlin, Germany ... \n", + "4 Harvard University, Cambridge, MA, USA ... \n", "\n", - " howpublished annotation acmid articleno issue_date note day \n", - "0 NaN NaN NaN NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN NaN NaN NaN \n", + " howpublished tag note day annotation acmid articleno issue_date \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", "[5 rows x 51 columns]" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pdr.df.head(5)" + "bib.df.head(5)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# pdr.df.priority = pdr.df.priority.astype(float)\n", - "pdr.df.sort_values(['keywords'], ascending=True, inplace=True)" + "bib.df.sort_values(['keywords'], ascending=True, inplace=True)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -441,16 +448,16 @@ " month\n", " address\n", " ...\n", - " adsnote\n", + " elocation-id\n", " urldate\n", - " tag\n", " howpublished\n", + " tag\n", + " note\n", + " day\n", " annotation\n", " acmid\n", " articleno\n", " issue_date\n", - " note\n", - " day\n", " \n", " \n", " \n", @@ -467,112 +474,112 @@ " 6\n", " OHBM, Rome Italy\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " 1\n", " inproceedings\n", - " ThomasNeuro2020\n", - " Automated Neuron Tracing of Sparse Fluorescent...\n", - " Athey, Thomas L and Sulam, Jeremias and Vogels...\n", - " 3=highlight; 1=trainee\n", - " 2020\n", + " Airan2013\n", + " Reproducible differentiation of individual of ...\n", + " D, Raag and Vogelstein, Airan A. and Caffo, Jo...\n", + " 2=highlight\n", + " 2013\n", " abspos\n", - " NaN\n", - " 11\n", - " Neuromatch 3\n", + " http://dx.doi.org/10.6084/m9.figshare.1284146\n", + " 4\n", + " Proc ISMRM, Salt Lake City, UT, USA\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " 2\n", " inproceedings\n", - " hayden_naisys_2020\n", - " A Biological Implementation of Lifelong Learni...\n", - " Vogelstein, Joshua T. and Helm, Hayden and Ped...\n", - " 2=trainee;3=trainee;4=trainee;1=highlight\n", - " 2020\n", + " Allen2015synaptome\n", + " The Open Synaptome Project: Toward a Microscop...\n", + " Smith, Stephen J. and Burns, Randal and Chevil...\n", + " 8=highlight\n", + " 2015\n", " abspos\n", - " NaN\n", - " 11\n", - " NAIsys, Cold Spring Harbor, NY, USA\n", + " https://figshare.com/articles/Open_Synaptome_P...\n", + " 10\n", + " Society for Neuroscience, Chicago, IL, USA\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " 3\n", " inproceedings\n", - " berlin_2017\n", - " Processing and Analyzing Terascale Conjugate A...\n", - " Baden, Alex and Perlman, Eric and Collman, For...\n", - " 1=trainee;5=highlight\n", - " 2017\n", + " XBrain2015\n", + " X-Brain: Quantifying Mesoscale Neuroanatomy Us...\n", + " Deyer, Eva L. and Fernandes, Hugo L. and Ronca...\n", + " 5=highlight;3=trainee\n", + " 2015\n", " abspos\n", - " https://neurodata.io/talks/berlin_2017.pdf\n", - " NaN\n", - " Berlin, Germany\n", + " https://figshare.com/articles/X_Brain_Quantify...\n", + " \n", + " Figshare\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " 4\n", " inproceedings\n", - " falk_open_data2019\n", - " NeuroData's Open Data Cloud Ecosystem\n", - " Falk, Benjamin and Vogelstein, Joshua T.\n", - " 2=highlight\n", - " 2019\n", + " Design2015\n", + " Optimal Design for Discovery Science: Applicat...\n", + " Wang, Shangsi and Yang, Zhi and Zuo, Xi-Nian a...\n", + " 1=trainee;7=highlight\n", + " 2015\n", " abspos\n", - " https://neurodata.io/talks/25_NeuroDatas_Open_...\n", - " 7\n", - " Harvard University, Cambridge, MA, USA\n", + " https://figshare.com/articles/Optimal_Design_f...\n", + " \n", + " Figshare\n", " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "\n", @@ -582,72 +589,72 @@ "text/plain": [ " type alias \\\n", "0 inproceedings j12019 \n", - "1 inproceedings ThomasNeuro2020 \n", - "2 inproceedings hayden_naisys_2020 \n", - "3 inproceedings berlin_2017 \n", - "4 inproceedings falk_open_data2019 \n", + "1 inproceedings Airan2013 \n", + "2 inproceedings Allen2015synaptome \n", + "3 inproceedings XBrain2015 \n", + "4 inproceedings Design2015 \n", "\n", " title \\\n", "0 Clustering Multi-Modal Connectomes \n", - "1 Automated Neuron Tracing of Sparse Fluorescent... \n", - "2 A Biological Implementation of Lifelong Learni... \n", - "3 Processing and Analyzing Terascale Conjugate A... \n", - "4 NeuroData's Open Data Cloud Ecosystem \n", + "1 Reproducible differentiation of individual of ... \n", + "2 The Open Synaptome Project: Toward a Microscop... \n", + "3 X-Brain: Quantifying Mesoscale Neuroanatomy Us... \n", + "4 Optimal Design for Discovery Science: Applicat... \n", "\n", " author \\\n", "0 Chung, Jaewon and Pedigo, Benjamin D. and Prie... \n", - "1 Athey, Thomas L and Sulam, Jeremias and Vogels... \n", - "2 Vogelstein, Joshua T. and Helm, Hayden and Ped... \n", - "3 Baden, Alex and Perlman, Eric and Collman, For... \n", - "4 Falk, Benjamin and Vogelstein, Joshua T. \n", + "1 D, Raag and Vogelstein, Airan A. and Caffo, Jo... \n", + "2 Smith, Stephen J. and Burns, Randal and Chevil... \n", + "3 Deyer, Eva L. and Fernandes, Hugo L. and Ronca... \n", + "4 Wang, Shangsi and Yang, Zhi and Zuo, Xi-Nian a... \n", "\n", - " author+an year keywords \\\n", - "0 1=trainee;2=trainee;4=highlight 2019 abspos \n", - "1 3=highlight; 1=trainee 2020 abspos \n", - "2 2=trainee;3=trainee;4=trainee;1=highlight 2020 abspos \n", - "3 1=trainee;5=highlight 2017 abspos \n", - "4 2=highlight 2019 abspos \n", + " author+an year keywords \\\n", + "0 1=trainee;2=trainee;4=highlight 2019 abspos \n", + "1 2=highlight 2013 abspos \n", + "2 8=highlight 2015 abspos \n", + "3 5=highlight;3=trainee 2015 abspos \n", + "4 1=trainee;7=highlight 2015 abspos \n", "\n", " url month \\\n", "0 https://figshare.com/articles/Clustering_Multi... 6 \n", - "1 NaN 11 \n", - "2 NaN 11 \n", - "3 https://neurodata.io/talks/berlin_2017.pdf NaN \n", - "4 https://neurodata.io/talks/25_NeuroDatas_Open_... 7 \n", + "1 http://dx.doi.org/10.6084/m9.figshare.1284146 4 \n", + "2 https://figshare.com/articles/Open_Synaptome_P... 10 \n", + "3 https://figshare.com/articles/X_Brain_Quantify... \n", + "4 https://figshare.com/articles/Optimal_Design_f... \n", "\n", - " address ... adsnote urldate tag \\\n", - "0 OHBM, Rome Italy ... NaN NaN NaN \n", - "1 Neuromatch 3 ... NaN NaN NaN \n", - "2 NAIsys, Cold Spring Harbor, NY, USA ... NaN NaN NaN \n", - "3 Berlin, Germany ... NaN NaN NaN \n", - "4 Harvard University, Cambridge, MA, USA ... NaN NaN NaN \n", + " address ... elocation-id urldate \\\n", + "0 OHBM, Rome Italy ... \n", + "1 Proc ISMRM, Salt Lake City, UT, USA ... \n", + "2 Society for Neuroscience, Chicago, IL, USA ... \n", + "3 Figshare ... \n", + "4 Figshare ... \n", "\n", - " howpublished annotation acmid articleno issue_date note day \n", - "0 NaN NaN NaN NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN NaN NaN NaN \n", + " howpublished tag note day annotation acmid articleno issue_date \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", "[5 rows x 51 columns]" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pdr.df.reset_index(drop=True).head(5)" + "bib.df.reset_index(drop=True).head(5)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "pdr.transform(filename='pubs')" + "bib.transform(filename='pubs')" ] }, { diff --git a/setup.py b/setup.py index 81e7c77..f580921 100644 --- a/setup.py +++ b/setup.py @@ -1,20 +1,20 @@ from distutils.core import setup from setuptools import find_packages -import pandarize +import bibReader -VERSION = pandarize.__version__ +VERSION = bibReader.__version__ with open('README.md', 'r') as f: README = f.read() setup( - name='Pandarize', + name='bibReader', version=VERSION, author='Jong M. Shin', author_email='jshinm@gmail.com', packages=find_packages(), - package_data = {"": ['pandarize/config/config.yaml']}, + package_data = {"": ['bibReader/config/config.yaml']}, include_package_data=True, - url='https://github.com/jshinm/pandarize/', + url='https://github.com/jshinm/bibReader/', license='MIT', description='Turns data into panda dataframe', readme='README.md',