File tree Expand file tree Collapse file tree 13 files changed +71
-39
lines changed Expand file tree Collapse file tree 13 files changed +71
-39
lines changed Original file line number Diff line number Diff line change 4949 needs : check_skip
5050 if : ${{ needs.check_skip.outputs.skip == 'false' }}
5151 runs-on : " ubuntu-latest"
52+ strategy :
53+ fail-fast : true
54+ matrix :
55+ include :
56+ - python-version : " 3.7"
57+ spark : " spark2"
58+ - python-version : " 3.7"
59+ spark : " spark3"
60+ - python-version : " 3.8"
61+ spark : " spark3"
62+ - python-version : " 3.9"
63+ spark : " spark3"
64+ - python-version : " 3.10"
65+ spark : " spark3"
5266 name : ' Testing on ubuntu'
5367 defaults :
5468 run :
@@ -61,13 +75,15 @@ jobs:
6175 - name : Set up Python
6276 uses : actions/setup-python@v4
6377 with :
64- python-version : ' 3.7'
65- - name : Install eds-scikit
66- shell : bash {0}
67- run : ./build_tools/github/install.sh
68- - name : Run tests
69- shell : bash {0}
70- run : ./build_tools/github/test.sh
78+ python-version : ${{ matrix.python-version }}
79+ cache : ' pip'
80+ - name : Install dependencies
81+ run : |
82+ pip install -U "pip<23"
83+ pip install --progress-bar off ".[${{ matrix.spark }}, dev, doc]"
84+ - name : Run pytest
85+ run : |
86+ python -m pytest --pyargs tests -m "" --cov=eds_scikit
7187 - name : Upload coverage to CodeCov
7288 uses : codecov/codecov-action@v3
7389 if : success()
Load Diff This file was deleted.
Load Diff This file was deleted.
Original file line number Diff line number Diff line change 22
33## Unreleased
44
5- ### Changed
5+ ### Added
66
77- Support for pyarrow > 0.17.0
8+ - Support for Python 3.7 to 3.10 (3.11 or higher is not tested)
9+ - Support for pyspark 3 (to force pyspark 2, use ` pip install eds-scikit[spark2] ` )
810
911### Fixed
1012- Caching in spark instead of koalas to improve speed
Original file line number Diff line number Diff line change @@ -124,6 +124,7 @@ The goal of **Koalas** is precisely to avoid this issue. It aims at allowing cod
124124
125125``` python
126126from databricks import koalas as ks
127+ # or from pyspark import pandas as ks, if you have spark 3
127128
128129# Converting the Spark DataFrame into a Koalas DataFrame
129130visit_occurrence_koalas = visit_occurrence_spark.to_koalas()
Original file line number Diff line number Diff line change 2626from pyspark import SparkContext
2727from pyspark .sql import SparkSession
2828
29- import eds_scikit .biology # noqa: F401 --> To register functions
30-
3129pyarrow .open_stream = pyarrow .ipc .open_stream
3230
3331sys .path .insert (
3432 0 , (pathlib .Path (__file__ ).parent / "package-override" ).absolute ().as_posix ()
3533)
3634os .environ ["PYTHONPATH" ] = ":" .join (sys .path )
3735
36+ import eds_scikit .biology # noqa: F401 --> To register functions
37+
3838# Remove SettingWithCopyWarning
3939pd .options .mode .chained_assignment = None
4040
Original file line number Diff line number Diff line change 1+ # This file is used to override the databricks.koalas package with the pyspark.pandas
2+ # package, if the databricks.koalas package is not available (python >= 3.8)
3+ import sys
4+ import pyarrow # noqa: E402, F401
5+
6+ old_sys_path = sys .path .copy ()
7+ sys .path .remove (next ((p for p in sys .path if "package-override" in p ), None ))
8+ databricks = sys .modules .pop ("databricks" )
9+ sys .modules .pop ("databricks.koalas" )
10+ try :
11+ from databricks .koalas import * # noqa: E402, F401, F403
12+ except ImportError :
13+ from pyspark .pandas import * # noqa: E402, F401, F403
14+
15+ sys .modules ["databricks" ] = databricks
16+ sys .modules ["databricks.koalas" ] = sys .modules ["pyspark.pandas" ]
17+ sys .path [:] = old_sys_path
Original file line number Diff line number Diff line change 2121 is the only one that resolves to this very module, still gets what it asked for:
2222 the pyarrow module's content.
2323"""
24-
2524import sys
2625
26+ old_sys_path = sys .path .copy ()
2727sys .path .remove (next ((p for p in sys .path if "package-override" in p ), None ))
2828del sys .modules ["pyarrow" ]
29- import pyarrow # noqa: E402, F401
3029
31- try :
32- import pyarrow . ipc
30+ import pyarrow # noqa: E402, F401
31+ from pyarrow . ipc import open_stream # noqa: E402, F401
3332
34- pyarrow .open_stream = pyarrow .ipc .open_stream
35- except ImportError :
36- pass
33+ pyarrow .open_stream = open_stream
3734
3835from pyarrow import * # noqa: F401, F403, E402
36+
37+ sys .path [:] = old_sys_path
Original file line number Diff line number Diff line change @@ -35,19 +35,18 @@ dependencies = [
3535 " pgpasslib>=1.1.0, <2.0.0" ,
3636 " psycopg2-binary>=2.9.0, <3.0.0" ,
3737 " pandas>=1.3.0, <2.0.0" ,
38- " numpy>=1.0.0, <1.20" ,
39- " koalas>=1.8.1, <2.0.0" ,
38+ " numpy>=1.0.0" ,
4039 " altair>=5.0.0, <6.0.0" ,
4140 " loguru==0.7.0" ,
4241 " pypandoc==1.7.5" ,
43- " pyspark==2.4.3 " ,
42+ " pyspark" ,
4443 " pyarrow>=0.10.0" ,
4544 " pretty-html-table>=0.9.15, <0.10.0" ,
4645 " catalogue" ,
4746 " schemdraw>=0.15.0, <1.0.0" ,
48- " ipython>=7.32.0, <8.0.0 " ,
49- " packaging= =21.3" ,
50- " tomli= =2.0.1" ,
47+ " ipython>=7.32.0" ,
48+ " packaging> =21.3" ,
49+ " tomli> =2.0.1" ,
5150]
5251dynamic = [' version' ]
5352
@@ -66,6 +65,10 @@ Documentation = "https://aphp.github.io/eds-scikit"
6665"Bug Tracker" = " https://github.com/aphp/eds-scikit/issues"
6766
6867[project .optional-dependencies ]
68+ spark2 = [
69+ " pyspark==2.4.3" ,
70+ " koalas>=1.8.1,<2.0.0" ,
71+ ]
6972dev = [
7073 " black>=22.3.0, <23.0.0" ,
7174 " flake8==3.9.2" ,
You can’t perform that action at this time.
0 commit comments