Skip to content

Commit

Permalink
Parameters changes
Browse files Browse the repository at this point in the history
  • Loading branch information
mwiewior committed Dec 11, 2024
1 parent fe0d60a commit c6a0867
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 9 deletions.
2 changes: 1 addition & 1 deletion polars_bio/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .overlap import overlap
from .overlap import overlap, ctx
21 changes: 18 additions & 3 deletions polars_bio/overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,27 @@
import pyarrow.compute as pc
from polars.io.plugins import register_io_source
from typing_extensions import TYPE_CHECKING, Union
from .polars_bio import overlap_scan, overlap_frame
from .polars_bio import BioSessionContext

def singleton(cls):
"""Decorator to make a class a singleton."""
instances = {}

from .polars_bio import overlap_scan, overlap_frame
def get_instance(*args, **kwargs):
if cls not in instances:
instances[cls] = cls(*args, **kwargs)
return instances[cls]

return get_instance

@singleton
class Context:
def __init__(self):
self.ctx = BioSessionContext()
self.ctx.set_option("datafusion.execution.target_partitions", "1")

from .polars_bio import BioSessionContext

ctx = BioSessionContext()


if TYPE_CHECKING:
Expand All @@ -24,6 +38,7 @@

DEFAULT_INTERVAL_COLUMNS = ["contig", "pos_start", "pos_end"]

ctx = Context().ctx

def overlap(df1 : Union[str, pl.DataFrame, pl.LazyFrame, pd.DataFrame],
df2 : Union[str, pl.DataFrame, pl.LazyFrame, pd.DataFrame],
Expand Down
14 changes: 9 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ use datafusion::arrow::array::{ArrayData, RecordBatch};
use datafusion::arrow::error::ArrowError;
use datafusion::arrow::ffi_stream::ArrowArrayStreamReader;
use datafusion::arrow::pyarrow::PyArrowType;

use datafusion::catalog::Session;
use datafusion::common::ScalarValue;
use datafusion::config::ConfigOptions;
use datafusion::datasource::MemTable;
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
Expand Down Expand Up @@ -39,14 +40,17 @@ impl PyBioSessionContext {
ctx,
})
}
#[pyo3(signature = (key, value))]
pub fn set_option(&mut self, key: &str, value: &str) {
let mut state = self.ctx.state_ref();
state.write().config_mut().options_mut().set(key, value).unwrap();
}
}


fn create_context(algorithm: Algorithm) -> SessionContext {
let mut options = ConfigOptions::new();
// FIXME
let tuning_options = vec![
("datafusion.execution.target_partitions", "1"),
("datafusion.optimizer.repartition_joins", "false"),
("datafusion.execution.coalesce_batches", "false"),
];
Expand All @@ -61,8 +65,7 @@ fn create_context(algorithm: Algorithm) -> SessionContext {

let config = SessionConfig::from(options)
.with_option_extension(sequila_config)
.with_information_schema(true)
.with_target_partitions(1);
.with_information_schema(true);

SessionContext::new_with_sequila(config)
}
Expand Down Expand Up @@ -119,6 +122,7 @@ fn overlap_frame(py_ctx: &PyBioSessionContext, df1: PyArrowType<ArrowArrayStream
fn overlap_scan(py_ctx: &PyBioSessionContext, df_path1: String, df_path2: String) -> PyResult<PyDataFrame> {
let rt = Runtime::new().unwrap();
let ctx = &py_ctx.ctx;
println!("{}", ctx.state().config().options().execution.target_partitions);
let s1_path = &df_path1;
let s2_path = &df_path2;
rt.block_on(register_parquet(&ctx, s1_path, "s1"));
Expand Down

0 comments on commit c6a0867

Please sign in to comment.