Skip to content

Commit

Permalink
Revert "[importer] Avoid creating scratchdir outside of encryption zo…
Browse files Browse the repository at this point in the history
…ne (#3447)"

This reverts commit 3d38dc8.

Change-Id: Iae508907cc23c6e381d3dfa6929eb877d028fca7
  • Loading branch information
wing2fly committed Aug 29, 2023
1 parent 3d38dc8 commit afd9469
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 125 deletions.
1 change: 0 additions & 1 deletion desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def __init__(self, file_status, parent_path):
self.blockSize = file_status['blockSize']
self.replication = file_status['replication']
self.aclBit = file_status.get('aclBit')
self.encBit = file_status.get('encBit')
self.fileId = file_status.get('fileId')

self.mode = int(file_status['permission'], 8)
Expand Down
5 changes: 1 addition & 4 deletions desktop/libs/indexer/src/indexer/indexers/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco
split = urlparse(source_path)
# Only for HDFS, import data and non-external table
if split.scheme in ('', 'hdfs') and oct(stats["mode"])[-1] != '7':
# check if the csv file is in encryption zone (encBit), then the scratch dir will be
# in the same directory
base_dir = parent_path if stats.encBit else self.fs.get_home_dir()
user_scratch_dir = base_dir + '/.scratchdir/%s' % str(uuid.uuid4()) # Make sure it's unique.
user_scratch_dir = self.fs.get_home_dir() + '/.scratchdir/%s' % str(uuid.uuid4()) # Make sure it's unique.
self.fs.do_as_user(self.user, self.fs.mkdir, user_scratch_dir, 0o0777)
self.fs.do_as_user(self.user, self.fs.rename, source['path'], user_scratch_dir)
if editor_type == 'impala' and impala_conf and impala_conf.USER_SCRATCH_DIR_PERMISSION.get():
Expand Down
120 changes: 0 additions & 120 deletions desktop/libs/indexer/src/indexer/indexers/sql_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
else:
from mock import patch, Mock, MagicMock

def mock_uuid():
return '52f840a8-3dde-434d-934a-2d6e06f3687e'

class TestSQLIndexer(object):

Expand Down Expand Up @@ -107,124 +105,6 @@ def destination_dict(key):
[statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
)

@patch('uuid.uuid4', mock_uuid)
def test_create_table_from_a_file_to_csv_for_kms_encryption(self):
def mock_parent_path(path):
return '/'.join(path.split('/')[:-1])

class MockStat:
def __init__(self, encBit=True, mode=16877):
self.encBit = encBit
self.mode = mode

def __getitem__(self, key):
if key == 'mode':
return 16877

def enc_source_dict(key):
return {
'path': '/enc_zn/upload_dir/data.csv',
'format': {'quoteChar': '"', 'fieldSeparator': ','},
'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}],
'sourceType': 'hive'
}.get(key, Mock())
source = MagicMock()
source.__getitem__.side_effect = enc_source_dict

def destination_dict(key):
return {
'name': 'default.export_table',
'tableFormat': 'csv',
'importData': True,
'isIceberg': False,
'nonDefaultLocation': '/warehouse/tablespace/managed/hive/customer_stats.csv',
'columns': [{'name': 'id', 'type': 'int'}],
'partitionColumns': [{'name': 'day', 'type': 'date', 'partitionValue': '20200101'}],
'description': 'No comment!',
'sourceType': 'hive-1'
}.get(key, Mock())
destination = MagicMock()
destination.__getitem__.side_effect = destination_dict

fs = Mock(
stats=Mock(
return_value=MockStat()
),
parent_path=mock_parent_path,
get_home_dir=Mock(return_value='/user/test'),
)

notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination)

### source dir is in encryption zone, so the scratch dir is in the same dir
assert_equal(
[statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table`
(
`id` int ) COMMENT "No comment!"
PARTITIONED BY (
`day` date )
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES ("separatorChar" = ",",
"quoteChar" = """,
"escapeChar" = "\\\\"
)
STORED AS TextFile TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false')
;
LOAD DATA INPATH '/enc_zn/upload_dir/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101');
CREATE TABLE `default`.`export_table` COMMENT "No comment!"
STORED AS csv
TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only')
AS SELECT *
FROM `default`.`hue__tmp_export_table`;
DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')],
[statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
)

fs = Mock(
stats=Mock(
return_value=MockStat(encBit=False)
),
parent_path=mock_parent_path,
get_home_dir=Mock(return_value='/user/test'),
)

def source_dict(key):
return {
'path': '/user/test/data.csv',
'format': {'quoteChar': '"', 'fieldSeparator': ','},
'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}],
'sourceType': 'hive'
}.get(key, Mock())
source = MagicMock()
source.__getitem__.side_effect = source_dict

notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination)

### source dir is not in encryption zone, so the scratch dir is in user's home dir
assert_equal(
[statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table`
(
`id` int ) COMMENT "No comment!"
PARTITIONED BY (
`day` date )
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES ("separatorChar" = ",",
"quoteChar" = """,
"escapeChar" = "\\\\"
)
STORED AS TextFile TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false')
;
LOAD DATA INPATH '/user/test/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101');
CREATE TABLE `default`.`export_table` COMMENT "No comment!"
STORED AS csv
TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only')
AS SELECT *
FROM `default`.`hue__tmp_export_table`;
DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')],
[statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
)

class MockRequest(object):
def __init__(self, fs=None, user=None):
Expand Down

0 comments on commit afd9469

Please sign in to comment.