Skip to content

Commit c1c9af5

Browse files
mjziolkomjziolko
and
mjziolko
authored
[CIVIS-6409] Change civis_file_to_table to not use table ids to validate that a table exists (#467)
* [CIVIS-6409] Change civis_file_to_table to not use table ids to validate that a table exists * add changelog notes, fix vulns * joblib 1.3 doesn't work * requests > 2.29.0 installs an incompatible version of urllib3 * problem with urllib3 > 2 for tests, bump patch version * remove safety, change numpy version for python 3.7 --------- Co-authored-by: mjziolko <mjziolko@civisanalytics.com>
1 parent 96a31a7 commit c1c9af5

File tree

10 files changed

+44
-32
lines changed

10 files changed

+44
-32
lines changed

.circleci/config.yml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,6 @@ workflows:
8080
python setup.py sdist bdist_wheel && \
8181
twine check dist/`ls dist/ | grep .tar.gz` && \
8282
twine check dist/`ls dist/ | grep .whl`
83-
- pre-build:
84-
name: safety
85-
command-run: |
86-
pip install -e . && \
87-
pip install --upgrade safety && \
88-
safety --version && \
89-
safety check
9083
- pre-build:
9184
name: bandit
9285
command-run: |
@@ -98,7 +91,6 @@ workflows:
9891
- flake8
9992
- sphinx-build
10093
- twine
101-
- safety
10294
- bandit
10395
matrix:
10496
parameters:

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1313
### Fixed
1414
### Security
1515

16+
## 1.16.1 - 2023-07-10
17+
### Changed
18+
- Changed civis_file_to_table to not rely on table ids for determining a table's existence (#464)
19+
1620
## 1.16.0 - 2021-12-14
1721
### Added
1822
- Added documentation around testing code using mocking (#447)

civis/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.16.0"
1+
__version__ = "1.16.1"

civis/cli/_cli_commands.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def notebooks_download_cmd(notebook_id, path):
240240
"""Download a notebook to a specified local path."""
241241
client = civis.APIClient()
242242
info = client.notebooks.get(notebook_id)
243-
response = requests.get(info['notebook_url'], stream=True)
243+
response = requests.get(info['notebook_url'], stream=True, timeout=60)
244244
response.raise_for_status()
245245
chunk_size = 32 * 1024
246246
chunked = response.iter_content(chunk_size)

civis/io/_files.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def _post():
110110
# requests will not stream multipart/form-data, but _single_upload
111111
# is only used for small file objects or non-seekable file objects
112112
# which can't be streamed with using requests-toolbelt anyway
113-
response = requests.post(url, files=form_key)
113+
response = requests.post(url, files=form_key, timeout=60)
114114

115115
if not response.ok:
116116
msg = _get_aws_error_message(response)
@@ -151,7 +151,8 @@ def _upload_part_base(item, file_path, part_size, file_size):
151151
with open(file_path, 'rb') as fin:
152152
fin.seek(offset)
153153
partial_buf = BufferedPartialReader(fin, num_bytes)
154-
part_response = requests.put(part_url, data=partial_buf)
154+
part_response = requests.put(part_url, data=partial_buf,
155+
timeout=60)
155156

156157
if not part_response.ok:
157158
msg = _get_aws_error_message(part_response)
@@ -368,7 +369,7 @@ def _download_url_to_buf():
368369
# Reset the buffer in case we had to retry.
369370
buf.seek(buf_orig_position)
370371

371-
response = requests.get(url, stream=True)
372+
response = requests.get(url, stream=True, timeout=60)
372373
response.raise_for_status()
373374
chunked = response.iter_content(CHUNK_SIZE)
374375
for lines in chunked:

civis/io/_tables.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from civis import APIClient
2020
from civis._utils import maybe_get_random_name
21-
from civis.base import EmptyResultError, CivisImportError
21+
from civis.base import EmptyResultError, CivisImportError, CivisAPIError
2222
from civis.futures import CivisFuture
2323
from civis.io import civis_to_file, file_to_civis
2424
from civis.utils import run_job
@@ -347,7 +347,7 @@ def f(x):
347347

348348
data = pd.read_csv(url, **kwargs)
349349
else:
350-
response = requests.get(url, stream=True)
350+
response = requests.get(url, stream=True, timeout=60)
351351
response.raise_for_status()
352352

353353
with io.StringIO() as buf:
@@ -1063,12 +1063,16 @@ def civis_file_to_table(file_id, database, table, client=None,
10631063
)
10641064

10651065
try:
1066-
client.get_table_id(table, database)
1066+
client.databases.get_schemas_tables(db_id, schema, table_name)
10671067
log.debug('Table {table} already exists - skipping column '
10681068
'detection'.format(table=table))
10691069
table_exists = True
1070-
except ValueError:
1070+
except CivisAPIError as e:
10711071
table_exists = False
1072+
if e.status_code != 404:
1073+
warnings.warn("Unexpected error when checking if table %s.%s "
1074+
"exists on database %d:\n%s"
1075+
% (schema, table_name, db_id, str(e)))
10721076

10731077
sql_types_provided = False
10741078
if table_columns:
@@ -1184,7 +1188,7 @@ def _decompress_stream(response, buf, write_bytes=True, encoding="utf-8"):
11841188

11851189

11861190
def _download_file(url, local_path, headers, compression):
1187-
response = requests.get(url, stream=True)
1191+
response = requests.get(url, stream=True, timeout=60)
11881192
response.raise_for_status()
11891193

11901194
# gzipped buffers can be concatenated so write headers as gzip

civis/resources/civis_api_spec.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

civis/tests/test_io.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,9 @@ def test_civis_file_to_table_table_exists(self,
222222
self.mock_client.get_database_id.return_value = 42
223223
self.mock_client.default_credential = 713
224224

225-
self.mock_client.get_table_id.return_value = 42
225+
self.mock_client.databases.get_schemas_tables.return_value = Response({
226+
'name': 'table1'
227+
})
226228
m_process_cleaning_results.return_value = (
227229
[mock_cleaned_file_id],
228230
True, # headers
@@ -320,7 +322,8 @@ def test_civis_file_to_table_table_doesnt_exist(
320322
self.mock_client.get_database_id.return_value = 42
321323
self.mock_client.default_credential = 713
322324

323-
self.mock_client.get_table_id.side_effect = ValueError('no table')
325+
self.mock_client.databases.get_schemas_tables.side_effect =\
326+
MockAPIError(404)
324327
mock_columns = [{'name': 'foo', 'sql_type': 'INTEGER'}]
325328
m_process_cleaning_results.return_value = (
326329
[mock_cleaned_file_id],
@@ -418,7 +421,8 @@ def test_civis_file_to_table_table_doesnt_exist_all_sql_types_missing(
418421
.id = mock_import_id
419422
self.mock_client.get_database_id.return_value = 42
420423
self.mock_client.default_credential = 713
421-
self.mock_client.get_table_id.side_effect = ValueError('no table')
424+
self.mock_client.databases.get_schemas_tables.side_effect =\
425+
MockAPIError(404)
422426
table_columns = [{'name': 'a', 'sql_type': ''},
423427
{'name': 'b', 'sql_type': ''}]
424428
detected_columns = [{'name': 'a', 'sql_type': 'INTEGER'},
@@ -519,7 +523,8 @@ def test_civis_file_to_table_table_does_not_exist_some_sql_types_missing(
519523
.id = mock_import_id
520524
self.mock_client.get_database_id.return_value = 42
521525
self.mock_client.default_credential = 713
522-
self.mock_client.get_table_id.side_effect = ValueError('no table')
526+
self.mock_client.databases.get_schemas_tables.side_effect =\
527+
MockAPIError(404)
523528
table_columns = [{'name': 'a', 'sql_type': 'INT'},
524529
{'name': 'b', 'sql_type': ''}]
525530

@@ -553,7 +558,8 @@ def test_civis_file_to_table_table_columns_keys_misspelled(
553558
.id = mock_import_id
554559
self.mock_client.get_database_id.return_value = 42
555560
self.mock_client.default_credential = 713
556-
self.mock_client.get_table_id.side_effect = ValueError('no table')
561+
self.mock_client.databases.get_schemas_tables.side_effect =\
562+
MockAPIError(404)
557563
table_columns = [{'name': 'a', 'sqlType': 'INT'},
558564
{'name': 'b', 'bad_type': ''}]
559565

@@ -594,7 +600,8 @@ def run_subtest(mock_file_ids):
594600
self.mock_client.get_database_id.return_value = 42
595601
self.mock_client.default_credential = 713
596602

597-
self.mock_client.get_table_id.side_effect = ValueError('no table')
603+
self.mock_client.databases.get_schemas_tables.side_effect =\
604+
MockAPIError(404)
598605
table_columns = [{'name': 'foo', 'sql_type': 'INTEGER'},
599606
{'name': 'bar', 'sql_type': 'VARCHAR(42)'}]
600607
m_process_cleaning_results.return_value = (
@@ -707,7 +714,8 @@ def test_civis_file_to_table_multi_file(
707714
self.mock_client.get_database_id.return_value = 42
708715
self.mock_client.default_credential = 713
709716

710-
self.mock_client.get_table_id.side_effect = ValueError('no table')
717+
self.mock_client.databases.get_schemas_tables.side_effect =\
718+
MockAPIError(404)
711719
mock_columns = [{'name': 'foo', 'sql_type': 'INTEGER'}]
712720
m_process_cleaning_results.return_value = (
713721
mock_cleaned_file_ids,
@@ -1325,7 +1333,7 @@ def test_civis_to_file_local(mock_requests):
13251333
assert _fin.read() == 'abcdef'
13261334
mock_civis.files.get.assert_called_once_with(137)
13271335
mock_requests.get.assert_called_once_with(
1328-
mock_civis.files.get.return_value.file_url, stream=True)
1336+
mock_civis.files.get.return_value.file_url, stream=True, timeout=60)
13291337

13301338

13311339
@pytest.mark.civis_to_file
@@ -1366,7 +1374,7 @@ def mock_iter_content(_):
13661374
mock_civis.files.get.assert_called_once_with(137)
13671375
assert mock_requests.get.call_count == 2
13681376
mock_requests.get.assert_called_with(
1369-
mock_civis.files.get.return_value.file_url, stream=True)
1377+
mock_civis.files.get.return_value.file_url, stream=True, timeout=60)
13701378

13711379

13721380
@pytest.mark.file_to_civis
@@ -1387,7 +1395,8 @@ def test_file_to_civis(mock_requests, input_filename):
13871395
assert fid == expected_id
13881396
mock_civis.files.post.assert_called_once_with(civis_name, expires_at=None)
13891397
mock_requests.post.assert_called_once_with(
1390-
mock_civis.files.post.return_value.upload_url, files=mock.ANY)
1398+
mock_civis.files.post.return_value.upload_url, files=mock.ANY,
1399+
timeout=60)
13911400

13921401

13931402
@pytest.mark.split_schema_tablename

dev-requirements.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# Testing and linting
2-
pytest==6.2.5
2+
pytest==7.2.1
33
flake8==4.0.1
44
pytest-cov==3.0.0
5+
urllib3==1.26.16 # version 2 is incompatible with our vcr version
56
vcrpy==1.11.1 # higher versions would break tests
67
vcrpy-unittest==0.1.7
78
twine==3.6.0
@@ -13,7 +14,8 @@ sphinx_rtd_theme==1.0.0
1314
numpydoc==1.1.0
1415

1516
# CivisML
16-
numpy==1.21.4
17+
numpy==1.21.4 ; python_version <= '3.7'
18+
numpy==1.22.4 ; python_version >= '3.8'
1719
scikit-learn==1.0.1
1820
scipy==1.7.2
1921
feather-format==0.4.1

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@ click>=6.0,<9
33
jsonref>=0.1,<=0.2.99
44
requests>=2.12.0,<3
55
jsonschema>=2.5.1,<5
6-
joblib>=0.11,<2
6+
joblib>=0.11,<1.3
77
cloudpickle>=0.2,<3
88
tenacity>=6.2,<9

0 commit comments

Comments
 (0)