elphick · elphick · Aug 10, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -5,14 +5,16 @@
 
 import os
 import sys
+
+sys.path.insert(0, os.path.abspath('../..'))
+
 from pathlib import Path
 
 import numpy as np
 import pyvista
 import plotly
 from plotly.io._sg_scraper import plotly_sg_scraper
-
-sys.path.insert(0, os.path.abspath('../..'))
+from sphinx_gallery.sorting import FileNameSortKey
 from elphick import geomet
 
 os.environ["PYVISTA_OFF_SCREEN"] = "True"

diff --git a/docs/source/sg_execution_times.rst b/docs/source/sg_execution_times.rst
@@ -6,7 +6,7 @@
 
 Computation times
 =================
-**01:11.367** total execution time for 11 files **from all galleries**:
+**00:36.744** total execution time for 11 files **from all galleries**:
 
 .. container::
 
@@ -33,35 +33,35 @@ Computation times
      - Time
      - Mem (MB)
    * - :ref:`sphx_glr_auto_examples_examples_04_block_model_02_create_block_model.py` (``..\..\examples\04_block_model\02_create_block_model.py``)
-     - 00:46.906
+     - 00:25.667
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_04_block_model_03_load_block_model.py` (``..\..\examples\04_block_model\03_load_block_model.py``)
-     - 00:08.296
+     - 00:04.207
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_01_getting_started_03_plot_demo.py` (``..\..\examples\01_getting_started\03_plot_demo.py``)
-     - 00:06.896
+     - 00:02.871
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_03_flowsheet_01_flowsheet_basics.py` (``..\..\examples\03_flowsheet\01_flowsheet_basics.py``)
-     - 00:03.337
-     - 0.0
-   * - :ref:`sphx_glr_auto_examples_examples_01_getting_started_02_math_operations.py` (``..\..\examples\01_getting_started\02_math_operations.py``)
-     - 00:01.986
+     - 00:01.544
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_02_interval_sample_01_interval_sample.py` (``..\..\examples\02_interval_sample\01_interval_sample.py``)
-     - 00:01.957
+     - 00:00.961
+     - 0.0
+   * - :ref:`sphx_glr_auto_examples_examples_01_getting_started_02_math_operations.py` (``..\..\examples\01_getting_started\02_math_operations.py``)
+     - 00:00.608
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_02_interval_sample_02_interval_data_sink_float.py` (``..\..\examples\02_interval_sample\02_interval_data_sink_float.py``)
-     - 00:00.702
+     - 00:00.356
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_01_getting_started_01_create_sample.py` (``..\..\examples\01_getting_started\01_create_sample.py``)
-     - 00:00.498
+     - 00:00.209
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_04_block_model_01_consuming_omf.py` (``..\..\examples\04_block_model\01_consuming_omf.py``)
-     - 00:00.450
+     - 00:00.187
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_06_map_01_mapping.py` (``..\..\examples\06_map\01_mapping.py``)
-     - 00:00.187
+     - 00:00.069
      - 0.0
    * - :ref:`sphx_glr_auto_examples_examples_05_mass_balance_01_mass_balance.py` (``..\..\examples\05_mass_balance\01_mass_balance.py``)
-     - 00:00.152
+     - 00:00.065
      - 0.0
diff --git a/examples/03_flowsheet/01_flowsheet_basics.py b/examples/03_flowsheet/01_flowsheet_basics.py
@@ -19,7 +19,7 @@
 # %%
 #
 # Create some Sample objects
-# --------------------------
+# -----------------------------------
 #
 # Create an object, and split it to create two more objects.
 
@@ -164,7 +164,7 @@
 # Sometimes the network that is automatically created may not be what you are after - for example flow may be in
 # the wrong direction.  We'll learn how to modify an existing network, by picking up the network above.
 #
-# Let's break the links for the stream 1.
+# Let's break the links for the _stream 1_.
 
 fs.reset_stream_nodes(stream="stream 1")
 fig = fs.table_plot()

diff --git a/geomet/__init__.py b/geomet/__init__.py
diff --git a/tests/test_010_geoh5.py.hide b/tests/test_010_geoh5.py.hide
@@ -0,0 +1,46 @@
+from pathlib import Path
+
+from geoh5py import Workspace
+from geoh5py.data import Data
+from geoh5py.groups import ContainerGroup
+from geoh5py.objects import NoTypeObject
+
+
+def test_project_load():
+    # load an existing geoh5 workspace
+    workspace_path = (Path(__file__).parents[1] / "Geoscience_ANALYST_demo_workspace_and_data" /
+                      "GeoscienceANALYST_demo.geoh5")
+    if not workspace_path.exists():
+        raise FileNotFoundError(f"File not found: {workspace_path}")
+
+    workspace = Workspace(workspace_path)
+    print('done')
+
+def test_create_new_project():
+    # create a new geoh5 workspace
+    if Path("data/test_workspace.geoh5").exists():
+        Path("data/test_workspace.geoh5").unlink()
+    workspace: Workspace = Workspace.create("data/test_workspace.geoh5")
+
+    # create a pandas dataframe
+    import pandas as pd
+    df = pd.DataFrame({
+        "column1": [5, 10, 20],
+        "column2": ["a", "b", "c"],
+        "column3": pd.to_datetime(["2010", "2011", "2012"]),
+    })
+
+    # create a group
+    group = ContainerGroup.create(workspace, name='my group')
+
+    # create an Object
+    obj = NoTypeObject.create(workspace, name='my object', parent=group)
+
+    # create some data
+    data1 = Data.create(workspace, name='column1', values=[1, 2, 3], entity=obj)
+    data2 = Data.create(workspace, name='column2', values=['a', 'b', 'c'], entity=obj)
+    data3 = Data.create(workspace, name='column3', values=[10, 20, 30], entity=obj)
+
+    # save the workspace
+    workspace.save_as("data/test_workspace_2.geoh5")
+    print('done')
diff --git a/tests/test_011_file_readers.py.hide b/tests/test_011_file_readers.py.hide
@@ -0,0 +1,94 @@
+from pathlib import Path
+
+import pandas as pd
+import pyarrow.parquet as pq
+
+from elphick.geomet.readers import ParquetFileReader, OMFFileReader
+
+
+def create_parquet(num_cols=20, num_rows=10000, num_object_vars=2) -> Path:
+    import pandas as pd
+    import numpy as np
+    import pyarrow as pa
+
+    # Create num_cols - num_object_vars number of float columns
+    df = pd.DataFrame({f"column{i}": np.random.rand(num_rows) for i in range(num_cols - num_object_vars)})
+
+    # Create num_object_vars number of object columns
+    for i in range(num_object_vars):
+        df[f"column{num_cols - num_object_vars + i}"] = ['object_data'] * num_rows
+
+    table = pa.Table.from_pandas(df)
+    file_path = Path(f'test.{num_rows}x{num_cols}.parquet')
+    pq.write_table(table, file_path)
+    return file_path
+
+
+# create_parquet()
+
+def test_read_parquet():
+    file_path = Path('data/test.10000x20.parquet')
+    df = ParquetFileReader(file_path).read(columns=['column1', 'column2'])
+    assert not df.empty
+    assert len(df.columns) == 2
+    assert 'column1' in df.columns
+    assert 'column2' in df.columns
+    assert len(df) == 10000
+    assert df['column1'].dtype == float
+    assert df['column2'].dtype == float
+
+
+def test_read_parquet_with_object_cols():
+    file_path = Path('data/test.10000x20.parquet')
+    df = ParquetFileReader(file_path).read(columns=['column1', 'column2', 'column18', 'column19'])
+    assert not df.empty
+    assert len(df.columns) == 4
+    assert 'column1' in df.columns
+    assert 'column2' in df.columns
+    assert 'column18' in df.columns
+    assert 'column19' in df.columns
+    assert len(df) == 10000
+    assert df['column1'].dtype == float
+    assert df['column2'].dtype == float
+    assert df['column18'].dtype == object
+    assert df['column19'].dtype == object
+    assert df['column18'].unique() == ['object_data']
+    assert df['column19'].unique() == ['object_data']
+
+
+def test_read_parquet_with_query():
+    file_path = Path('data/test.10000x20.parquet')
+    df = ParquetFileReader(file_path).read(query="column1 > 0.5")
+    assert not df.empty
+    assert len(df) < 10000
+    assert df['column1'].dtype == float
+    assert (df['column1'] > 0.5).all()
+    assert len(df.columns) == 20
+
+
+def test_read_parquet_with_query_and_columns():
+    file_path = Path('data/test.10000x20.parquet')
+    df = ParquetFileReader(file_path).read(columns=['column1', 'column2', 'column19'], query="column1 > 0.5")
+    assert not df.empty
+    assert len(df) < 10000
+    assert df['column1'].dtype == float
+    assert (df['column1'] > 0.5).all()
+    assert len(df.columns) == 3
+    assert 'column1' in df.columns
+    assert 'column2' in df.columns
+    assert 'column19' in df.columns
+    assert (df['column1'] > 0.5).all()
+    assert df['column19'].unique() == ['object_data']
+
+
+def test_read_bm_parquet():
+    file_path = Path('data/block_model_copper.parquet')
+    df = ParquetFileReader(file_path).read(columns=['CU_pct'], query="CU_pct > 0.1")
+    assert not df.empty
+    assert len(df) < ParquetFileReader(file_path).records_in_file
+
+
+def test_read_omf():
+    file_path = Path('data/test_model.omf')
+    df: pd.DataFrame = OMFFileReader(file_path, element='Block Model').read()
+    assert not df.empty