From afd946967fc9f7cb8cb6b99d278c3f0e502ba1c4 Mon Sep 17 00:00:00 2001 From: Ying Chen Date: Mon, 28 Aug 2023 17:08:16 -0700 Subject: [PATCH] Revert "[importer] Avoid creating scratchdir outside of encryption zone (#3447)" This reverts commit 3d38dc89deee2d8e6c92787a0af4bde0edbb2112. Change-Id: Iae508907cc23c6e381d3dfa6929eb877d028fca7 --- .../hadoop/src/hadoop/fs/webhdfs_types.py | 1 - .../libs/indexer/src/indexer/indexers/sql.py | 5 +- .../indexer/src/indexer/indexers/sql_tests.py | 120 ------------------ 3 files changed, 1 insertion(+), 125 deletions(-) diff --git a/desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py b/desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py index d687495462e..999bd9e0cce 100644 --- a/desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py +++ b/desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py @@ -48,7 +48,6 @@ def __init__(self, file_status, parent_path): self.blockSize = file_status['blockSize'] self.replication = file_status['replication'] self.aclBit = file_status.get('aclBit') - self.encBit = file_status.get('encBit') self.fileId = file_status.get('fileId') self.mode = int(file_status['permission'], 8) diff --git a/desktop/libs/indexer/src/indexer/indexers/sql.py b/desktop/libs/indexer/src/indexer/indexers/sql.py index 8ea85e425c5..9881aaea4f7 100644 --- a/desktop/libs/indexer/src/indexer/indexers/sql.py +++ b/desktop/libs/indexer/src/indexer/indexers/sql.py @@ -173,10 +173,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco split = urlparse(source_path) # Only for HDFS, import data and non-external table if split.scheme in ('', 'hdfs') and oct(stats["mode"])[-1] != '7': - # check if the csv file is in encryption zone (encBit), then the scratch dir will be - # in the same directory - base_dir = parent_path if stats.encBit else self.fs.get_home_dir() - user_scratch_dir = base_dir + '/.scratchdir/%s' % str(uuid.uuid4()) # Make sure it's unique. + user_scratch_dir = self.fs.get_home_dir() + '/.scratchdir/%s' % str(uuid.uuid4()) # Make sure it's unique. self.fs.do_as_user(self.user, self.fs.mkdir, user_scratch_dir, 0o0777) self.fs.do_as_user(self.user, self.fs.rename, source['path'], user_scratch_dir) if editor_type == 'impala' and impala_conf and impala_conf.USER_SCRATCH_DIR_PERMISSION.get(): diff --git a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py index e5d885773d4..f83c66eb148 100644 --- a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py +++ b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py @@ -36,8 +36,6 @@ else: from mock import patch, Mock, MagicMock -def mock_uuid(): - return '52f840a8-3dde-434d-934a-2d6e06f3687e' class TestSQLIndexer(object): @@ -107,124 +105,6 @@ def destination_dict(key): [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')] ) - @patch('uuid.uuid4', mock_uuid) - def test_create_table_from_a_file_to_csv_for_kms_encryption(self): - def mock_parent_path(path): - return '/'.join(path.split('/')[:-1]) - - class MockStat: - def __init__(self, encBit=True, mode=16877): - self.encBit = encBit - self.mode = mode - - def __getitem__(self, key): - if key == 'mode': - return 16877 - - def enc_source_dict(key): - return { - 'path': '/enc_zn/upload_dir/data.csv', - 'format': {'quoteChar': '"', 'fieldSeparator': ','}, - 'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}], - 'sourceType': 'hive' - }.get(key, Mock()) - source = MagicMock() - source.__getitem__.side_effect = enc_source_dict - - def destination_dict(key): - return { - 'name': 'default.export_table', - 'tableFormat': 'csv', - 'importData': True, - 'isIceberg': False, - 'nonDefaultLocation': '/warehouse/tablespace/managed/hive/customer_stats.csv', - 'columns': [{'name': 'id', 'type': 'int'}], - 'partitionColumns': [{'name': 'day', 'type': 'date', 'partitionValue': '20200101'}], - 'description': 'No comment!', - 'sourceType': 'hive-1' - }.get(key, Mock()) - destination = MagicMock() - destination.__getitem__.side_effect = destination_dict - - fs = Mock( - stats=Mock( - return_value=MockStat() - ), - parent_path=mock_parent_path, - get_home_dir=Mock(return_value='/user/test'), - ) - - notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination) - - ### source dir is in encryption zone, so the scratch dir is in the same dir - assert_equal( - [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; -CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table` -( - `id` int ) COMMENT "No comment!" -PARTITIONED BY ( - `day` date ) -ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' - WITH SERDEPROPERTIES ("separatorChar" = ",", - "quoteChar" = """, - "escapeChar" = "\\\\" - ) - STORED AS TextFile TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false') -; -LOAD DATA INPATH '/enc_zn/upload_dir/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101'); -CREATE TABLE `default`.`export_table` COMMENT "No comment!" - STORED AS csv -TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only') - AS SELECT * - FROM `default`.`hue__tmp_export_table`; -DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')], - [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')] - ) - - fs = Mock( - stats=Mock( - return_value=MockStat(encBit=False) - ), - parent_path=mock_parent_path, - get_home_dir=Mock(return_value='/user/test'), - ) - - def source_dict(key): - return { - 'path': '/user/test/data.csv', - 'format': {'quoteChar': '"', 'fieldSeparator': ','}, - 'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}], - 'sourceType': 'hive' - }.get(key, Mock()) - source = MagicMock() - source.__getitem__.side_effect = source_dict - - notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination) - - ### source dir is not in encryption zone, so the scratch dir is in user's home dir - assert_equal( - [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; -CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table` -( - `id` int ) COMMENT "No comment!" -PARTITIONED BY ( - `day` date ) -ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' - WITH SERDEPROPERTIES ("separatorChar" = ",", - "quoteChar" = """, - "escapeChar" = "\\\\" - ) - STORED AS TextFile TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false') -; -LOAD DATA INPATH '/user/test/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101'); -CREATE TABLE `default`.`export_table` COMMENT "No comment!" - STORED AS csv -TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only') - AS SELECT * - FROM `default`.`hue__tmp_export_table`; -DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')], - [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')] - ) class MockRequest(object): def __init__(self, fs=None, user=None):