From 26a5d7ab1c0267f8079a09e7cd5f9ec0aa45f238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=AD=20Bosch?= Date: Mon, 24 Jun 2024 14:02:52 +0200 Subject: [PATCH 1/2] feat: hive partitioning for id/var (TODO: make it optional?) --- tstore/archive/io.py | 80 ++++++++++++++++++++++++++++++++---- tstore/tests/test_tsdf.py | 8 ++-- tstore/tests/test_tslong.py | 6 ++- tstore/tsdf/pandas.py | 65 +++++++++++++++++++++++------ tstore/tsdf/reader.py | 48 +++++++++++++++++++--- tstore/tsdf/writer.py | 82 ++++++++++++++++++++++++++++++------- tstore/tslong/pandas.py | 48 +++++++++++++++++----- tstore/tslong/polars.py | 3 ++ tstore/tslong/pyarrow.py | 8 +++- 9 files changed, 292 insertions(+), 56 deletions(-) diff --git a/tstore/archive/io.py b/tstore/archive/io.py index ba8386f..c6b1392 100644 --- a/tstore/archive/io.py +++ b/tstore/archive/io.py @@ -3,6 +3,7 @@ import glob import os import shutil +from pathlib import Path def check_tstore_structure(tstore_structure): @@ -31,12 +32,43 @@ def define_attributes_filepath(base_dir): return fpath -def define_tsarray_filepath(base_dir, tstore_id, ts_variable, tstore_structure): - """Define filepath of a TStore TS.""" +def define_tsarray_filepath( + base_dir: Path | str, + tstore_id: str, + ts_variable: str, + tstore_structure: str, + id_prefix: str, + var_prefix: str, +) -> str: + """ + Define filepath of a TStore TS. + + Parameters + ---------- + base_dir : path-like + Base directory of the TStore. + tstore_id : str + Value of the time series ID. + ts_variable : str + Name of the time series variable. + ts_structure : ["id-var", "var-id"] + TStore structure, either "id-var" or "var-id". + id_prefix : str + Prefix for the ID directory in the TStore. + var_prefix : str + Prefix for the variable directory in the TStore. + + Returns + ------- + fpath : str + Filepath for the time series. + """ + id_dir_basename = f"{id_prefix}={tstore_id}" + var_dir_basename = f"{var_prefix}={ts_variable}" if tstore_structure == "id-var": - fpath = os.path.join(base_dir, tstore_id, ts_variable) + fpath = os.path.join(base_dir, id_dir_basename, var_dir_basename) elif tstore_structure == "var-id": - fpath = os.path.join(base_dir, ts_variable, tstore_id) + fpath = os.path.join(base_dir, var_dir_basename, id_dir_basename) else: raise ValueError("Valid tstore_structure are 'id-var' and 'var-id'.") return fpath @@ -82,16 +114,48 @@ def get_partitions(base_dir, ts_variable): return partitions -def get_ts_info(base_dir, ts_variable): - """Retrieve filepaths and tstore_ids for a specific ts_variable.""" +def get_ts_info( + base_dir: Path | str, + ts_variable: str, + var_prefix: str, +): + """ + Retrieve filepaths and tstore_ids for a specific ts_variable. + + Parameters + ---------- + base_dir : path-like + Base directory of the TStore. + ts_variable : str + Name of the time series variable. + var_prefix : str + Prefix for the variable directory in the TStore. + + Returns + ------- + fpaths : list of str + List of filepaths for the time series. + tstore_ids : list of str + List of time series IDs. + partitions : list of str + List of partitions. + """ tstore_structure = get_tstore_structure(base_dir) + + # TODO: DRY with `define_tsarray_filepath`? + var_dir_basename = f"{var_prefix}={ts_variable}" + if tstore_structure == "id-var": - fpaths = glob.glob(os.path.join(base_dir, "*", ts_variable)) + fpaths = glob.glob(os.path.join(base_dir, "*", var_dir_basename)) tstore_ids = [os.path.basename(os.path.dirname(fpath)) for fpath in fpaths] elif tstore_structure == "var-id": - fpaths = glob.glob(os.path.join(base_dir, ts_variable, "*")) + fpaths = glob.glob(os.path.join(base_dir, var_dir_basename, "*")) tstore_ids = [os.path.basename(fpath) for fpath in fpaths] else: raise ValueError("Valid tstore_structure are 'id-var' and 'var-id'.") + # get only id values (remove prefix from hive prefix=value notation) + tstore_ids = [tstore_id.split("=")[1] for tstore_id in tstore_ids] + partitions = get_partitions(base_dir, ts_variable) + return fpaths, tstore_ids, partitions diff --git a/tstore/tests/test_tsdf.py b/tstore/tests/test_tsdf.py index f606d8a..4e3ee4a 100644 --- a/tstore/tests/test_tsdf.py +++ b/tstore/tests/test_tsdf.py @@ -96,13 +96,15 @@ def test_store( assert dirpath.is_dir() # Check directory content - assert sorted(os.listdir(dirpath / "1" / "ts_variable")) == [ + assert sorted(os.listdir(dirpath / "tstore_id=1" / "variable=ts_variable")) == [ "_common_metadata", "_metadata", "part.0.parquet", "part.1.parquet", ] - assert sorted(os.listdir(dirpath)) == ["1", "2", "3", "4", "_attributes.parquet", "tstore_metadata.yaml"] + assert sorted(os.listdir(dirpath)) == ["_attributes.parquet"] + [f"tstore_id={i}" for i in ["1", "2", "3", "4"]] + [ + "tstore_metadata.yaml", + ] class TestLoad: @@ -116,4 +118,4 @@ def test_pandas( tsdf = tstore.open_tsdf(tstore_path, backend="pandas") assert type(tsdf) is TSDFPandas assert type(tsdf._df) is pd.DataFrame - assert tsdf.shape == (4, 3) + assert tsdf.shape == (4, 2) diff --git a/tstore/tests/test_tslong.py b/tstore/tests/test_tslong.py index cbbc25c..6850f87 100644 --- a/tstore/tests/test_tslong.py +++ b/tstore/tests/test_tslong.py @@ -101,8 +101,10 @@ def test_store( assert dirpath.is_dir() # Check directory content - assert sorted(os.listdir(dirpath)) == ["1", "2", "3", "4", "_attributes.parquet", "tstore_metadata.yaml"] - assert os.listdir(dirpath / "1" / "ts_variable" / "year=2000" / "month=1") == ["part-0.parquet"] + assert sorted(os.listdir(dirpath)) == ["_attributes.parquet"] + [f"store_id={i}" for i in ["1", "2", "3", "4"]] + [ + "tstore_metadata.yaml", + ] + assert os.listdir(dirpath / "store_id=1" / "variable=ts_variable" / "year=2000" / "month=1") == ["part-0.parquet"] class TestLoad: diff --git a/tstore/tsdf/pandas.py b/tstore/tsdf/pandas.py index 92ed05f..8316b94 100644 --- a/tstore/tsdf/pandas.py +++ b/tstore/tsdf/pandas.py @@ -1,5 +1,7 @@ """TSDF class wrapping a Pandas dataframe of TSArray objects.""" +from pathlib import Path + from tstore.archive.metadata.readers import read_tstore_metadata from tstore.tsdf.reader import _read_tsarrays from tstore.tsdf.tsdf import TSDF @@ -11,28 +13,67 @@ class TSDFPandas(TSDF): def to_tstore( self, - base_dir, - id_var, - time_var, # likely not needed ! - partitioning=None, - tstore_structure="id-var", - overwrite=True, # append functionality? + base_dir: Path | str, + id_var: str | None = None, + time_var: str | None = None, # TODO: likely not needed ! + partitioning: str | None = None, + tstore_structure: str = "id-var", + var_prefix: str = "variable", + overwrite: bool = True, # append functionality? # geometry - ): - """Write TStore from TSDF object.""" - _ = write_tstore( + ) -> None: + """ + Write TSDF into a TStore. + + Parameters + ---------- + base_dir : path-like + Base directory of the TStore. + id_var : str, optional + Name of the id variable. + time_var : str, optional + Name of the time variable. + ts_variables : list-like of str + List of time series variables to write. + static_variables : list-like of str, optional + List of static variables to write. + partitioning : str, optional + Time partitioning string. + tstore_structure : ["id-var", "var-id"], default "id-var" + TStore structure, either "id-var" or "var-id". + var_prefix : str, default "variable" + Prefix for the variable directory in the TStore. + overwrite : bool, default True + Overwrite existing TStore. + """ + write_tstore( self._df, base_dir=base_dir, id_var=id_var, time_var=time_var, partitioning=partitioning, tstore_structure=tstore_structure, + var_prefix=var_prefix, overwrite=overwrite, ) @staticmethod - def from_tstore(base_dir: str) -> "TSDFPandas": - """Read TStore into TSDF object.""" + def from_tstore(base_dir: Path | str, var_prefix: str = "variable") -> "TSDFPandas": + """ + Read TStore into TSDF object. + + Parameters + ---------- + base_dir : path-like + Base directory of the TStore. + var_prefix : str, default "variable" + Prefix for the variable directory in the TStore. + + Returns + ------- + TSDFPandas + TSDF object with pandas backend. + """ # TODO: enable specify subset of TSArrays, attribute columns and rows to load # TODO: read_attributes using geopandas --> geoparquet # TODO: separate TSDF class if geoparquet (TSDF inherit from geopandas.GeoDataFrame ?) @@ -45,7 +86,7 @@ def from_tstore(base_dir: str) -> "TSDFPandas": df = read_attributes(base_dir).set_index(metadata["id_var"]) # Get list of TSArrays - list_ts_series = _read_tsarrays(base_dir, metadata) + list_ts_series = _read_tsarrays(base_dir, metadata, var_prefix) # Join TSArrays to dataframe for ts_series in list_ts_series: diff --git a/tstore/tsdf/reader.py b/tstore/tsdf/reader.py index 0022b99..050ea4e 100644 --- a/tstore/tsdf/reader.py +++ b/tstore/tsdf/reader.py @@ -5,6 +5,8 @@ @author: ghiggi """ +from pathlib import Path + import pandas as pd from tstore.archive.io import get_ts_info @@ -12,10 +14,26 @@ from tstore.tsdf.tsarray import TSArray -def _read_tsarray(base_dir, ts_variable): - """Read a TSArray into a pd.Series.""" +def _read_tsarray(base_dir: Path | str, ts_variable: str, var_prefix: str) -> pd.Series: + """ + Read a TSArray into a pd.Series. + + Parameters + ---------- + base_dir : path-like + Base directory of the TStore. + ts_variable : str + Name of the time series variable. + var_prefix : str + Prefix for the variable directory in the TStore. + + Returns + ------- + pd.Series + TSArray Series. + """ # Retrieve TS fpaths and associated tstore_ids - ts_fpaths, tstore_ids, partitions = get_ts_info(base_dir=base_dir, ts_variable=ts_variable) + ts_fpaths, tstore_ids, partitions = get_ts_info(base_dir=base_dir, ts_variable=ts_variable, var_prefix=var_prefix) # Read TS objects # TODO: add option for TS format (dask, pandas, ...) list_ts = [TS.from_file(fpath, partitions=partitions) for fpath in ts_fpaths] @@ -26,8 +44,26 @@ def _read_tsarray(base_dir, ts_variable): return ts_series -def _read_tsarrays(base_dir, metadata): - """Read list of TSArrays.""" +def _read_tsarrays(base_dir: Path | str, metadata: dict, var_prefix: str) -> list[pd.Series]: + """ + Read list of TSArrays. + + Parameters + ---------- + base_dir : path-like + Base directory of the TStore. + metadata : dict-like + Metadata dictionary. + var_prefix : str + Prefix for the variable directory in the TStore. + + Returns + ------- + list of pd.Series + List of TSArray Series. + """ ts_variables = metadata["ts_variables"] - list_ts_series = [_read_tsarray(base_dir=base_dir, ts_variable=ts_variable) for ts_variable in ts_variables] + list_ts_series = [ + _read_tsarray(base_dir=base_dir, ts_variable=ts_variable, var_prefix=var_prefix) for ts_variable in ts_variables + ] return list_ts_series diff --git a/tstore/tsdf/writer.py b/tstore/tsdf/writer.py index f838df9..a2e4ce9 100644 --- a/tstore/tsdf/writer.py +++ b/tstore/tsdf/writer.py @@ -5,7 +5,10 @@ @author: ghiggi """ +from pathlib import Path + import numpy as np +import pandas as pd from tstore.archive.io import ( check_tstore_directory, @@ -41,8 +44,21 @@ def _write_attributes(df, base_dir): write_attributes(df=df_attributes, base_dir=base_dir) -def _write_ts_series(ts_series, base_dir, tstore_structure): - """Write TSDF TSArray.""" +def _write_ts_series(ts_series: pd.Series, base_dir: Path | str, tstore_structure: str, var_prefix: str) -> None: + """ + Write TSDF TSArray. + + Parameters + ---------- + ts_series : pd.Series of tstore.TS objects + Series of TS objects. + base_dir : path-like + Base directory of the TStore. + tstore_structure : ["id-var", "var-id"] + TStore structure, either "id-var" or "var-id". + var_prefix : str + Prefix for the variable directory. + """ ts_variable = ts_series.name tstore_ids = ts_series.index.array.astype(str) for tstore_id, ts in zip(tstore_ids, ts_series): @@ -52,18 +68,34 @@ def _write_ts_series(ts_series, base_dir, tstore_structure): tstore_id=tstore_id, ts_variable=ts_variable, tstore_structure=tstore_structure, + id_prefix=ts_series.index.name, + var_prefix=var_prefix, ) ts.to_disk(ts_fpath) -def _write_tsarrays(df, base_dir, tstore_structure): - """Write TSDF TSArrays.""" +def _write_tsarrays(df: pd.DataFrame, base_dir: Path | str, tstore_structure: str, var_prefix: str) -> None: + """ + Write TSDF TSArrays. + + Parameters + ---------- + df : pd.DataFrame + Data frame with TSArray as columns. + base_dir : path-like + Base directory of the TStore. + tstore_structure : ["id-var", "var-id"] + TStore structure, either "id-var" or "var-id". + var_prefix : str + Prefix for the variable directory. + """ tsarray_columns = _get_ts_variables(df) for column in tsarray_columns: _write_ts_series( ts_series=df[column], base_dir=base_dir, tstore_structure=tstore_structure, + var_prefix=var_prefix, ) @@ -82,15 +114,37 @@ def _write_metadata(base_dir, tstore_structure, id_var, time_var, ts_variables, def write_tstore( - df, - base_dir, - id_var, - time_var, # maybe not needed for TSDF? - partitioning, - tstore_structure="id-var", - overwrite=True, -): - """Write TStore from TSDF object.""" + df: pd.DataFrame, + base_dir: Path | str, + id_var: str, + time_var: str, # maybe not needed for TSDF? + partitioning: str, + tstore_structure: str, + var_prefix: str, + overwrite: bool, +) -> None: + """ + Write TStore from TSDF object. + + Parameters + ---------- + df : pd.DataFrame + Data frame with TSArray as columns. + base_dir : path-like + Base directory of the TStore. + id_var : str + Name of the id variable. + time_var : str + Name of the time variable. + partitioning : str + Time partitioning string. + tstore_structure : ["id-var", "var-id"], default "id-var" + TStore structure, either "id-var" or "var-id". + var_prefix : str + Prefix for the variable directory in the TStore. + overwrite : bool + Overwrite existing TStore. + """ # Checks tstore_structure = check_tstore_structure(tstore_structure) base_dir = check_tstore_directory(base_dir, overwrite=overwrite) @@ -102,7 +156,7 @@ def write_tstore( _write_attributes(df, base_dir=base_dir) # Write TSArrays - _write_tsarrays(df, base_dir=base_dir, tstore_structure=tstore_structure) + _write_tsarrays(df, base_dir=base_dir, tstore_structure=tstore_structure, var_prefix=var_prefix) # Write TSArrays metadata ts_variables = _get_ts_variables(df) diff --git a/tstore/tslong/pandas.py b/tstore/tslong/pandas.py index b6dc1b7..6d63a58 100644 --- a/tstore/tslong/pandas.py +++ b/tstore/tslong/pandas.py @@ -1,5 +1,7 @@ """Module defining the TSLongPandas wrapper.""" +from pathlib import Path + import pandas as pd import pyarrow as pa @@ -22,18 +24,42 @@ class TSLongPandas(TSLong): def to_tstore( self, # TSTORE options - base_dir, + base_dir: Path | str, # DFLONG attributes - id_var, - time_var, - ts_variables, - static_variables=None, + id_var: str, + time_var: str, + ts_variables: list[str], + static_variables: list[str] | None = None, # TSTORE options - partitioning=None, - tstore_structure="id-var", - overwrite=True, - ): - """Write the wrapped dataframe as a TStore structure.""" + partitioning: str | None = None, + tstore_structure: str = "id-var", + var_prefix: str = "variable", + overwrite: bool = True, + ) -> None: + """ + Write the wrapped long data frame into a TStore. + + Parameters + ---------- + base_dir : path-like + Base directory of the TStore. + id_var : str + Name of the id variable. + time_var : str + Name of the time variable. + ts_variables : list-like of str + List of time series variables to write. + static_variables : list-like of str, optional + List of static variables to write. + partitioning : str, optional + Time partitioning string. + tstore_structure : ["id-var", "var-id"], default "id-var" + TStore structure, either "id-var" or "var-id". + var_prefix : str, default "variable" + Prefix for the variable directory in the TStore. + overwrite : bool, default True + Overwrite existing TStore. + """ # If index time, remove if time_var not in self._df.columns: self._df = self._df.reset_index(names=time_var) @@ -117,6 +143,8 @@ def to_tstore( tstore_id=tstore_id, ts_variable=ts_variable, tstore_structure=tstore_structure, + id_prefix=id_var, + var_prefix=var_prefix, ) # ----------------------------------------------- diff --git a/tstore/tslong/polars.py b/tstore/tslong/polars.py index 2718e91..87e0e27 100644 --- a/tstore/tslong/polars.py +++ b/tstore/tslong/polars.py @@ -31,6 +31,7 @@ def to_tstore( # TSTORE options partitioning=None, tstore_structure="id-var", + var_prefix="variable", overwrite=True, ) -> None: """Write the wrapped dataframe as a TStore structure.""" @@ -110,6 +111,8 @@ def to_tstore( tstore_id=tstore_id, ts_variable=ts_variable, tstore_structure=tstore_structure, + id_prefix=id_var, + var_prefix=var_prefix, ) # TODO; Maybe create TS object and use TS.to_parquet() once implemented diff --git a/tstore/tslong/pyarrow.py b/tstore/tslong/pyarrow.py index 48c98ea..0d5cf2b 100644 --- a/tstore/tslong/pyarrow.py +++ b/tstore/tslong/pyarrow.py @@ -28,6 +28,7 @@ def to_tstore(self) -> None: def from_tstore( base_dir, ts_variables=None, + var_prefix="variable", start_time=None, end_time=None, tstore_ids=None, @@ -50,6 +51,7 @@ def from_tstore( id_var=id_var, time_var=time_var, ts_variables=ts_variables, + var_prefix=var_prefix, start_time=start_time, end_time=end_time, columns=columns, @@ -104,6 +106,7 @@ def _read_ts_variable( base_dir, id_var, ts_variable, + var_prefix, start_time=None, end_time=None, columns=None, @@ -112,7 +115,8 @@ def _read_ts_variable( ): """Read a TStore ts_variable into pyarrow long-format.""" # Find TS and associated TStore IDs - fpaths, tstore_ids, partitions = get_ts_info(base_dir=base_dir, ts_variable=ts_variable) + fpaths, tstore_ids, partitions = get_ts_info(base_dir=base_dir, ts_variable=ts_variable, var_prefix=var_prefix) + # Read each TS list_tables = [ _read_ts( @@ -147,6 +151,7 @@ def _read_ts_variables( id_var, time_var, ts_variables, + var_prefix, start_time=None, end_time=None, columns=None, @@ -160,6 +165,7 @@ def _read_ts_variables( base_dir=base_dir, id_var=id_var, ts_variable=ts_variable, + var_prefix=var_prefix, start_time=start_time, end_time=end_time, columns=columns, # columns[ts_variable] in future From e5f7b83f358954dce4e3d486ca8d4144e5e56653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=AD=20Bosch?= Date: Wed, 26 Jun 2024 14:27:50 +0200 Subject: [PATCH 2/2] feat: rationale for id_var in tsdf --- tstore/tsdf/pandas.py | 5 ++++- tstore/tsdf/writer.py | 43 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/tstore/tsdf/pandas.py b/tstore/tsdf/pandas.py index 8316b94..96b2262 100644 --- a/tstore/tsdf/pandas.py +++ b/tstore/tsdf/pandas.py @@ -30,7 +30,10 @@ def to_tstore( base_dir : path-like Base directory of the TStore. id_var : str, optional - Name of the id variable. + Name of the id variable. If the provided value matches a column of the wrapped data frame, the corresponding + column will be used as id. If the provided value is not None but does not match any column of the wrapped + data frame, the id values will be taken from the index, which will be named using the provided value. + Otherwise, the id values and name will be taken from the index. time_var : str, optional Name of the time variable. ts_variables : list-like of str diff --git a/tstore/tsdf/writer.py b/tstore/tsdf/writer.py index a2e4ce9..b74bf47 100644 --- a/tstore/tsdf/writer.py +++ b/tstore/tsdf/writer.py @@ -40,11 +40,16 @@ def _write_attributes(df, base_dir): static_columns = _get_static_columns(df) df_attributes = df[static_columns] - df_attributes.index.name = "tstore_id" write_attributes(df=df_attributes, base_dir=base_dir) -def _write_ts_series(ts_series: pd.Series, base_dir: Path | str, tstore_structure: str, var_prefix: str) -> None: +def _write_ts_series( + ts_series: pd.Series, + base_dir: Path | str, + tstore_structure: str, + id_prefix: str, + var_prefix: str, +) -> None: """ Write TSDF TSArray. @@ -56,8 +61,10 @@ def _write_ts_series(ts_series: pd.Series, base_dir: Path | str, tstore_structur Base directory of the TStore. tstore_structure : ["id-var", "var-id"] TStore structure, either "id-var" or "var-id". + id_prefix : str + Prefix for the id directory in the TStore. var_prefix : str - Prefix for the variable directory. + Prefix for the variable directory in the TStore. """ ts_variable = ts_series.name tstore_ids = ts_series.index.array.astype(str) @@ -68,13 +75,19 @@ def _write_ts_series(ts_series: pd.Series, base_dir: Path | str, tstore_structur tstore_id=tstore_id, ts_variable=ts_variable, tstore_structure=tstore_structure, - id_prefix=ts_series.index.name, + id_prefix=id_prefix, var_prefix=var_prefix, ) ts.to_disk(ts_fpath) -def _write_tsarrays(df: pd.DataFrame, base_dir: Path | str, tstore_structure: str, var_prefix: str) -> None: +def _write_tsarrays( + df: pd.DataFrame, + base_dir: Path | str, + tstore_structure: str, + id_prefix: str, + var_prefix: str, +) -> None: """ Write TSDF TSArrays. @@ -86,8 +99,10 @@ def _write_tsarrays(df: pd.DataFrame, base_dir: Path | str, tstore_structure: st Base directory of the TStore. tstore_structure : ["id-var", "var-id"] TStore structure, either "id-var" or "var-id". + id_prefix : str + Prefix for the id directory in the TStore. var_prefix : str - Prefix for the variable directory. + Prefix for the variable directory in the TStore. """ tsarray_columns = _get_ts_variables(df) for column in tsarray_columns: @@ -95,6 +110,7 @@ def _write_tsarrays(df: pd.DataFrame, base_dir: Path | str, tstore_structure: st ts_series=df[column], base_dir=base_dir, tstore_structure=tstore_structure, + id_prefix=id_prefix, var_prefix=var_prefix, ) @@ -152,11 +168,24 @@ def write_tstore( ts_variables = _get_ts_variables(df) partitioning = check_partitioning(partitioning, ts_variables=ts_variables) + # id var + if id_var is None: + # if no `id_var` value is passed, the values are taken from the index. + # TODO: enforce that index has a non-None name? + id_var = df.index.name + elif id_var not in df: + # if a non-None `id_var` is passed but does not match a column of the data frame, take the values from the + # index but take `id_var` as name + df = df.reset_index(names=id_var) + else: + # if a non-None `id_var` is passed and matches a column of the data frame, there is nothing to do here + pass + # Write static attributes _write_attributes(df, base_dir=base_dir) # Write TSArrays - _write_tsarrays(df, base_dir=base_dir, tstore_structure=tstore_structure, var_prefix=var_prefix) + _write_tsarrays(df, base_dir=base_dir, tstore_structure=tstore_structure, id_prefix=id_var, var_prefix=var_prefix) # Write TSArrays metadata ts_variables = _get_ts_variables(df)