Skip to content

Commit

Permalink
Merge pull request #12 from gaelforget/v0p1p3b
Browse files Browse the repository at this point in the history
V0p1p3b
  • Loading branch information
gaelforget authored Jun 19, 2022
2 parents cc50484 + 9c48eed commit fc3c144
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 83 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Dataverse"
uuid = "9c0b9be8-e31e-490f-90fe-77697562404d"
authors = ["gaelforget <gforget@mit.edu> and contributors"]
version = "0.1.2"
version = "0.1.3"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
1 change: 0 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ makedocs(;
),
pages=[
"Home" => "index.md",
"APIs" => "dataverse_access.md",
],
)

Expand Down
2 changes: 1 addition & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ This package is about interfaces to the [Dataverse](https://dataverse.org) proje
## Functionalities

```@autodocs
Modules = [pyDataverse, Dataverse.downloads]
Modules = [pyDataverse, DataverseDownloads]
```
5 changes: 1 addition & 4 deletions src/Dataverse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ include("pyDataverse.jl")
export pyDataverse

include("downloads.jl")
export get_from_dataverse, dataverse_lists, example_lists
get_from_dataverse=downloads.get_from_dataverse
dataverse_lists=downloads.dataverse_lists
example_lists=downloads.example_lists
export DataverseDownloads

end
45 changes: 14 additions & 31 deletions src/downloads.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
module downloads
module DataverseDownloads

using Downloads, OceanStateEstimation, CSV, DataFrames
import Dataverse.pyDataverse.dataset_file_list
using Downloads, DataFrames

##

"""
get_from_dataverse(lst::String,nam::String,pth::String)
download_files(lst::String,nam::String,pth::String)
```
lst=example_lists.OCCA_list
nams=example_lists.OCCA_files.name
[get_from_dataverse(lst,string(nam),tempdir()) for nam in nams[:]]
lst=downloads.download_urls(dataset_file_list(:OCCA_clim))
DataverseDownloads.download_files(lst,lst.name[1],tempdir())
```
"""
function get_from_dataverse(lst::String,nam::String,pth::String)
lists=dataverse_lists(lst)
function download_files(lists::NamedTuple,nam::String,pth::String)
ii = findall([occursin("$nam", lists.name[i]) for i=1:length(lists.ID)])
for i in ii
nam1=Downloads.download(lists.URL[i])
Expand All @@ -30,35 +29,19 @@ function get_from_dataverse(lst::String,nam::String,pth::String)
end

"""
dataverse_lists(lst::String)
download_urls(lst::String)
Read and derive lists (ID,name,URL) from csv file (ID,name) and return as tuple
```
lst=example_lists.OCCA_list
ECCO_files=dataverse_lists(lst)
```
Add download URL (using df.id) and return as NamedTuple.
"""
function dataverse_lists(lst::String)
tmp=readlines(lst)
ID=[parse(Int,tmp[j][1:findfirst(isequal(','),tmp[j])-1]) for j=2:length(tmp)]
name=[tmp[j][findfirst(isequal(','),tmp[j])+1:end] for j=2:length(tmp)]
download_urls(df::DataFrame) = begin
tmp="https://dataverse.harvard.edu/api/access/datafile/"
URL=[tmp*"$(ID[j])" for j=1:length(ID)]
return (ID=ID,name=name,URL=URL)
URL=[tmp*"$(df.id[j])" for j=1:length(df.id)]
(ID=df.id,name=df.filename,URL=URL)
end

##

pth=dirname(pathof(OceanStateEstimation))

OCCA_list=joinpath(pth,"../examples/OCCA_climatology.csv")
OCCA_files=dataverse_lists(OCCA_list)

ECCO_list=joinpath(pth,"../examples/nctiles_climatology.csv")
ECCO_files=dataverse_lists(ECCO_list)

example_lists=( OCCA_list=OCCA_list,OCCA_files=OCCA_files,
ECCO_list=ECCO_list,ECCO_files=ECCO_files)
OCCA_files()=download_urls(dataset_file_list(:OCCA_clim))
ECCO_files()=download_urls(dataset_file_list(:ECCO_clim))

end
101 changes: 69 additions & 32 deletions src/pyDataverse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,32 @@ end
"""
demo(option::String)
- call `demo_basic` if `option=="basic"`
- call `demo_ECCO` if `option=="ECCO"`
"""
function demo(option="basic")
if option=="basic"
demo_basic()
elseif option=="ECCO"
demo_ECCO()
- call `demo_download` if `option=="download"`
- call `demo_metadata` if `option=="metadata"`
"""
function demo(option="download")
if option=="download"
demo_download()
elseif option=="metadata"
demo_metadata()
else
println("unknown option")
end
end

"""
demo_basic(;path=tempdir(),DOI = "doi:10.7910/DVN/KBHLOD")
demo_download(;path=tempdir(),DOI = "doi:10.7910/DVN/KBHLOD")
Replicate the worflow example from
<https://pydataverse.readthedocs.io/en/latest/user/basic-usage.html#download-and-save-a-dataset-to-disk>
```
pyDataverse.demo_basic()
pyDataverse.demo_download()
```
"""
function demo_basic(;path=tempdir(),DOI = "doi:10.7910/DVN/KBHLOD")
(DataAccessApi,NativeApi)=pyDataverse.APIs()
function demo_download(;path=tempdir(),DOI = "doi:10.7910/DVN/KBHLOD")
(DataAccessApi,NativeApi)=pyDataverse.APIs(do_install=false)
dataset = NativeApi.get_dataset(DOI)
files_list = dataset.json()["data"]["latestVersion"]["files"]
filenames=String[]
Expand All @@ -59,11 +61,7 @@ function demo_basic(;path=tempdir(),DOI = "doi:10.7910/DVN/KBHLOD")
filenames
end

"""
tree_children_to_DataFrame(files)
Convert output of e.g. `tree[1]["children"]` to DataFrame. See notebook for a more complete example.
"""
#Deprecated : see `dataset_file_list`+`files_to_DataFrame` instead
function tree_children_to_DataFrame(files)
nf=length(files)
filename=[files[ff]["filename"] for ff in 1:nf]
Expand All @@ -73,40 +71,79 @@ function tree_children_to_DataFrame(files)
end

"""
dataset_children_to_DataFrame(files)
files_to_DataFrame(files)
Convert output of e.g. `dataset.json()["data"]["latestVersion"]["files"]` to DataFrame. See notebook for a more complete example.
Convert output from `dataset.json()["data"]["latestVersion"]["files"]` to DataFrame.
See notebook for a more complete example.
"""
function dataset_files_to_DataFrame(files)
function files_to_DataFrame(files)
nf=length(files)
filename=[files[ff]["dataFile"]["filename"] for ff in 1:nf]
filesize=[files[ff]["dataFile"]["filesize"] for ff in 1:nf]
id=[files[ff]["dataFile"]["id"] for ff in 1:nf]
pidURL=[files[ff]["dataFile"]["pidURL"] for ff in 1:nf]
DataFrame(filename=filename,filesize=filesize,pidURL=pidURL)
DataFrame(filename=filename,filesize=filesize,id=id,pidURL=pidURL)
end


"""
demo_ECCO()
demo_metadata()
```
pyDataverse.demo_metadata()
```
"""
function demo_metadata()
df1=dataset_file_list(:OCCA_clim)
df2=dataverse_file_list(:ECCOv4r2)
df1,df2
end

##

"""
dataset_file_list(nam::Symbol=:OCCA_clim)
Lookup DOI from list of demo data sets.
```
pyDataverse.demo_ECCO()
dataset_file_list(:OCCA_clim)
```
"""
function demo_ECCO()
(DataAccessApi,NativeApi)=pyDataverse.APIs()
function dataset_file_list(nam::Symbol)
(DataAccessApi,NativeApi)=pyDataverse.APIs(do_install=false)
DOI=(OCCA_clim="doi:10.7910/DVN/RNXA2A",ECCO_clim="doi:10.7910/DVN/3HPRZI")
dataset_file_list(DOI[nam])
end

DOI="doi:10.7910/DVN/AVVGYX"
"""
dataset_file_list(DOI::String="doi:10.7910/DVN/ODM2IQ")
Use `NativeApi.get_dataset` to derive the list of files (name, etc) via `files_to_DataFrame`.
```
dataset_file_list("doi:10.7910/DVN/ODM2IQ")
```
"""
function dataset_file_list(DOI::String)
(DataAccessApi,NativeApi)=pyDataverse.APIs(do_install=false)
dataset = NativeApi.get_dataset(DOI)
dataset_files = dataset.json()["data"]["latestVersion"]["files"]
df1=dataset_files_to_DataFrame(dataset_files)
files_to_DataFrame(dataset_files)
end

NAME="ECCOv4r2"
tree = NativeApi.get_children(NAME, children_types= ["datasets", "datafiles"])
files=tree[1]["children"]
df2=tree_children_to_DataFrame(files)
"""
dataverse_file_list(nam::Symbol=:ECCOv4r2)
df1,df2
- Use `NativeApi.get_children` to get the tree of datasets
- Loop through and return vector of `dataset_file_list` output
"""
function dataverse_file_list(nam::Symbol=:ECCOv4r2)
(DataAccessApi,NativeApi)=pyDataverse.APIs(do_install=false)
tree = NativeApi.get_children(string(nam), children_types= ["datasets", "datafiles"])
#[tree_children_to_DataFrame(leaf["children"]) for leaf in tree]
[dataset_file_list(leaf["pid"]) for leaf in tree]
end

end
21 changes: 8 additions & 13 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,16 @@ using UUIDs
pyDataverse.APIs(do_install=true)

@testset "Dataverse.jl" begin
lst=example_lists.OCCA_list
nams=example_lists.OCCA_files.name

pth=joinpath(tempdir(),string(UUIDs.uuid4()))
lst=DataverseDownloads.OCCA_files()
pth=joinpath(tempdir(),string(UUIDs.uuid4()))
mkdir(pth)
DataverseDownloads.download_files(lst,lst.name[1],pth)
@test isfile(joinpath(pth,lst.name[1]))

nam=nams[2]
get_from_dataverse(lst,string(nam),pth)
@test isfile(joinpath(pth,nam))

tmp=pyDataverse.demo("basic")
tmp=pyDataverse.demo("download")
@test isfile(tmp[1])

df1,df2=pyDataverse.demo("ECCO")
@test size(df1,1)==3
@test size(df2,1)==12

df1,df2=pyDataverse.demo("metadata")
@test size(df1,1)==56
@test size(df2,1)==11
end

0 comments on commit fc3c144

Please sign in to comment.