From afd946967fc9f7cb8cb6b99d278c3f0e502ba1c4 Mon Sep 17 00:00:00 2001
From: Ying Chen <yingchen@cloudera.com>
Date: Mon, 28 Aug 2023 17:08:16 -0700
Subject: [PATCH] Revert "[importer] Avoid creating scratchdir outside of
 encryption zone (#3447)"

This reverts commit 3d38dc89deee2d8e6c92787a0af4bde0edbb2112.

Change-Id: Iae508907cc23c6e381d3dfa6929eb877d028fca7
---
 .../hadoop/src/hadoop/fs/webhdfs_types.py     |   1 -
 .../libs/indexer/src/indexer/indexers/sql.py  |   5 +-
 .../indexer/src/indexer/indexers/sql_tests.py | 120 ------------------
 3 files changed, 1 insertion(+), 125 deletions(-)

diff --git a/desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py b/desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py
index d687495462e..999bd9e0cce 100644
--- a/desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py
+++ b/desktop/libs/hadoop/src/hadoop/fs/webhdfs_types.py
@@ -48,7 +48,6 @@ def __init__(self, file_status, parent_path):
     self.blockSize = file_status['blockSize']
     self.replication = file_status['replication']
     self.aclBit = file_status.get('aclBit')
-    self.encBit = file_status.get('encBit')
     self.fileId = file_status.get('fileId')
 
     self.mode = int(file_status['permission'], 8)
diff --git a/desktop/libs/indexer/src/indexer/indexers/sql.py b/desktop/libs/indexer/src/indexer/indexers/sql.py
index 8ea85e425c5..9881aaea4f7 100644
--- a/desktop/libs/indexer/src/indexer/indexers/sql.py
+++ b/desktop/libs/indexer/src/indexer/indexers/sql.py
@@ -173,10 +173,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco
       split = urlparse(source_path)
       # Only for HDFS, import data and non-external table
       if split.scheme in ('', 'hdfs') and oct(stats["mode"])[-1] != '7':
-        # check if the csv file is in encryption zone (encBit), then the scratch dir will be
-        # in the same directory
-        base_dir = parent_path if stats.encBit else self.fs.get_home_dir()
-        user_scratch_dir = base_dir + '/.scratchdir/%s' % str(uuid.uuid4()) # Make sure it's unique.
+        user_scratch_dir = self.fs.get_home_dir() + '/.scratchdir/%s' % str(uuid.uuid4()) # Make sure it's unique.
         self.fs.do_as_user(self.user, self.fs.mkdir, user_scratch_dir, 0o0777)
         self.fs.do_as_user(self.user, self.fs.rename, source['path'], user_scratch_dir)
         if editor_type == 'impala' and impala_conf and impala_conf.USER_SCRATCH_DIR_PERMISSION.get():
diff --git a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py
index e5d885773d4..f83c66eb148 100644
--- a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py
+++ b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py
@@ -36,8 +36,6 @@
 else:
   from mock import patch, Mock, MagicMock
 
-def mock_uuid():
-  return '52f840a8-3dde-434d-934a-2d6e06f3687e'
 
 class TestSQLIndexer(object):
 
@@ -107,124 +105,6 @@ def destination_dict(key):
     [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
   )
 
-  @patch('uuid.uuid4', mock_uuid)
-  def test_create_table_from_a_file_to_csv_for_kms_encryption(self):
-    def mock_parent_path(path):
-      return '/'.join(path.split('/')[:-1])
-
-    class MockStat:
-      def __init__(self, encBit=True, mode=16877):
-        self.encBit = encBit
-        self.mode = mode
-
-      def __getitem__(self, key):
-        if key == 'mode':
-          return 16877
-
-    def enc_source_dict(key):
-      return {
-        'path': '/enc_zn/upload_dir/data.csv',
-        'format': {'quoteChar': '"', 'fieldSeparator': ','},
-        'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}],
-        'sourceType': 'hive'
-      }.get(key, Mock())
-    source = MagicMock()
-    source.__getitem__.side_effect = enc_source_dict
-
-    def destination_dict(key):
-      return {
-        'name': 'default.export_table',
-        'tableFormat': 'csv',
-        'importData': True,
-        'isIceberg': False,
-        'nonDefaultLocation': '/warehouse/tablespace/managed/hive/customer_stats.csv',
-        'columns': [{'name': 'id', 'type': 'int'}],
-        'partitionColumns': [{'name': 'day', 'type': 'date', 'partitionValue': '20200101'}],
-        'description': 'No comment!',
-        'sourceType': 'hive-1'
-      }.get(key, Mock())
-    destination = MagicMock()
-    destination.__getitem__.side_effect = destination_dict
-
-    fs = Mock(
-        stats=Mock(
-          return_value=MockStat()
-        ),
-        parent_path=mock_parent_path,
-        get_home_dir=Mock(return_value='/user/test'),
-    )
-
-    notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination)
-
-    ### source dir is in encryption zone, so the scratch dir is in the same dir
-    assert_equal(
-      [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
-CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table`
-(
-  `id` int ) COMMENT "No comment!"
-PARTITIONED BY (
-  `day` date )
-ROW FORMAT   SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
-  WITH SERDEPROPERTIES ("separatorChar" = ",",
-    "quoteChar"     = """,
-    "escapeChar"    = "\\\\"
-    )
-  STORED AS TextFile TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false')
-;
-LOAD DATA INPATH '/enc_zn/upload_dir/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101');
-CREATE TABLE `default`.`export_table` COMMENT "No comment!"
-        STORED AS csv
-TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only')
-        AS SELECT *
-        FROM `default`.`hue__tmp_export_table`;
-DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')],
-    [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
-  )
-
-    fs = Mock(
-        stats=Mock(
-          return_value=MockStat(encBit=False)
-        ),
-        parent_path=mock_parent_path,
-        get_home_dir=Mock(return_value='/user/test'),
-    )
-
-    def source_dict(key):
-      return {
-        'path': '/user/test/data.csv',
-        'format': {'quoteChar': '"', 'fieldSeparator': ','},
-        'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}],
-        'sourceType': 'hive'
-      }.get(key, Mock())
-    source = MagicMock()
-    source.__getitem__.side_effect = source_dict
-
-    notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination)
-
-    ### source dir is not in encryption zone, so the scratch dir is in user's home dir
-    assert_equal(
-      [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
-CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table`
-(
-  `id` int ) COMMENT "No comment!"
-PARTITIONED BY (
-  `day` date )
-ROW FORMAT   SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
-  WITH SERDEPROPERTIES ("separatorChar" = ",",
-    "quoteChar"     = """,
-    "escapeChar"    = "\\\\"
-    )
-  STORED AS TextFile TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false')
-;
-LOAD DATA INPATH '/user/test/.scratchdir/52f840a8-3dde-434d-934a-2d6e06f3687e/data.csv' INTO TABLE `default`.`hue__tmp_export_table` PARTITION (day='20200101');
-CREATE TABLE `default`.`export_table` COMMENT "No comment!"
-        STORED AS csv
-TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only')
-        AS SELECT *
-        FROM `default`.`hue__tmp_export_table`;
-DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')],
-      [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
-    )
 
 class MockRequest(object):
   def __init__(self, fs=None, user=None):