diff --git a/polars_bio/__init__.py b/polars_bio/__init__.py index 3c84add..4b83d78 100644 --- a/polars_bio/__init__.py +++ b/polars_bio/__init__.py @@ -1 +1 @@ -from .overlap import overlap +from .overlap import overlap, ctx diff --git a/polars_bio/overlap.py b/polars_bio/overlap.py index f06b8da..aac792b 100644 --- a/polars_bio/overlap.py +++ b/polars_bio/overlap.py @@ -9,13 +9,27 @@ import pyarrow.compute as pc from polars.io.plugins import register_io_source from typing_extensions import TYPE_CHECKING, Union +from .polars_bio import overlap_scan, overlap_frame +from .polars_bio import BioSessionContext +def singleton(cls): + """Decorator to make a class a singleton.""" + instances = {} -from .polars_bio import overlap_scan, overlap_frame + def get_instance(*args, **kwargs): + if cls not in instances: + instances[cls] = cls(*args, **kwargs) + return instances[cls] + + return get_instance + +@singleton +class Context: + def __init__(self): + self.ctx = BioSessionContext() + self.ctx.set_option("datafusion.execution.target_partitions", "1") -from .polars_bio import BioSessionContext -ctx = BioSessionContext() if TYPE_CHECKING: @@ -24,6 +38,7 @@ DEFAULT_INTERVAL_COLUMNS = ["contig", "pos_start", "pos_end"] +ctx = Context().ctx def overlap(df1 : Union[str, pl.DataFrame, pl.LazyFrame, pd.DataFrame], df2 : Union[str, pl.DataFrame, pl.LazyFrame, pd.DataFrame], diff --git a/src/lib.rs b/src/lib.rs index 0475ede..4623efb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,8 @@ use datafusion::arrow::array::{ArrayData, RecordBatch}; use datafusion::arrow::error::ArrowError; use datafusion::arrow::ffi_stream::ArrowArrayStreamReader; use datafusion::arrow::pyarrow::PyArrowType; - +use datafusion::catalog::Session; +use datafusion::common::ScalarValue; use datafusion::config::ConfigOptions; use datafusion::datasource::MemTable; use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; @@ -39,14 +40,17 @@ impl PyBioSessionContext { ctx, }) } + #[pyo3(signature = (key, value))] + pub fn set_option(&mut self, key: &str, value: &str) { + let mut state = self.ctx.state_ref(); + state.write().config_mut().options_mut().set(key, value).unwrap(); + } } fn create_context(algorithm: Algorithm) -> SessionContext { let mut options = ConfigOptions::new(); - // FIXME let tuning_options = vec![ - ("datafusion.execution.target_partitions", "1"), ("datafusion.optimizer.repartition_joins", "false"), ("datafusion.execution.coalesce_batches", "false"), ]; @@ -61,8 +65,7 @@ fn create_context(algorithm: Algorithm) -> SessionContext { let config = SessionConfig::from(options) .with_option_extension(sequila_config) - .with_information_schema(true) - .with_target_partitions(1); + .with_information_schema(true); SessionContext::new_with_sequila(config) } @@ -119,6 +122,7 @@ fn overlap_frame(py_ctx: &PyBioSessionContext, df1: PyArrowType PyResult { let rt = Runtime::new().unwrap(); let ctx = &py_ctx.ctx; + println!("{}", ctx.state().config().options().execution.target_partitions); let s1_path = &df_path1; let s2_path = &df_path2; rt.block_on(register_parquet(&ctx, s1_path, "s1"));