Skip to content

Commit bb7ac88

Browse files
committed
feat: support pyspark 3 (via a databricks.koalas stub)
1 parent d7b443c commit bb7ac88

File tree

4 files changed

+7
-14
lines changed

4 files changed

+7
-14
lines changed

changelog.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
## Unreleased
44

5-
### Changed
5+
### Added
66

77
- Support for pyarrow > 0.17.0
8+
- Support for Python 3.7 to 3.10 (3.11 or higher is not tested)
89
- Support for pyspark 3 (to force pyspark 2, use `pip install eds-scikit[spark2]`)
910

1011
### Fixed

eds_scikit/biology/viz/aggregate.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,6 @@ def aggregate_concepts_set(
135135

136136
# Extract concept-set
137137
measurement_std_filtered = get_measurement_std(measurement_valid, src_to_std)
138-
if is_koalas(measurement_std_filtered):
139-
measurement_std_filtered.spark.cache()
140138
measurement_std_filtered = measurement_std_filtered.drop(
141139
columns="source_concept_id"
142140
)

tests/conftest.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ def spark_session(pytestconfig, tmpdir_factory):
8080
print("!! Creating spark session !!")
8181

8282
from pyspark import SparkConf
83-
from pyspark import __version__ as pyspark_version
8483

8584
temp_warehouse_dir = tmpdir_factory.mktemp("spark")
8685
conf = (
@@ -100,17 +99,8 @@ def spark_session(pytestconfig, tmpdir_factory):
10099
f"jdbc:derby:;databaseName={temp_warehouse_dir}/metastore_db;create=true",
101100
)
102101
.set("spark.executor.cores", 1)
103-
.set("spark.driver.memory", "2g")
104102
)
105103

106-
if pyspark_version < "3":
107-
108-
# used to overwrite hive tables
109-
conf = conf.set(
110-
"spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation",
111-
"true",
112-
)
113-
114104
session, _, _ = improve_performances(to_add_conf=list(conf.getAll()))
115105

116106
# session is ready

tests/test_biology.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ def tmp_biology_dir(tmp_path_factory):
1414

1515
@pytest.fixture
1616
def data():
17-
return load_biology_data(seed=42)
17+
return load_biology_data(
18+
seed=42,
19+
mean_measurement=500,
20+
)
1821

1922

2023
@pytest.fixture
@@ -73,6 +76,7 @@ def test_biology_summary(data, concepts_sets, module, tmp_biology_dir):
7376
limit_count=("AnaBio", 500),
7477
stats_only=True,
7578
save_folder_path=tmp_biology_dir,
79+
pd_limit_size=0,
7680
)
7781

7882

0 commit comments

Comments
 (0)