Skip to content

Commit

Permalink
[Importer] Adding an option to copy and not just move files
Browse files Browse the repository at this point in the history
  • Loading branch information
agl29 committed Nov 7, 2023
1 parent ea3a7ba commit 115f487
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 16 deletions.
6 changes: 5 additions & 1 deletion desktop/libs/indexer/src/indexer/indexers/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco
source_path = source['path']
load_data = destination['importData']
isIceberg = destination['isIceberg']
copyFile = destination['useCopy']

external = not destination['useDefaultLocation']
external_path = destination['nonDefaultLocation']
Expand Down Expand Up @@ -166,7 +167,10 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco
# If dir not just the file, create data dir and move file there. Make sure it's unique.
external_path = external_path + '/%s%s_table' % (external_file_name, str(uuid.uuid4()))
self.fs.mkdir(external_path)
self.fs.rename(source_path, external_path)
if copyFile:
self.fs.copy(source_path, external_path)
else:
self.fs.rename(source_path, external_path)
elif load_data: # We'll use load data command
parent_path = self.fs.parent_path(source_path)
stats = self.fs.stats(parent_path)
Expand Down
24 changes: 12 additions & 12 deletions desktop/libs/indexer/src/indexer/indexers/sql_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def test_generate_create_text_table_with_data_partition():
u'outputFormats': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr index', u'value': u'index'}],
u'customMapDelimiter': u'\\003', u'showProperties': False, u'useDefaultLocation': True, u'description': u'',
u'primaryKeyObjects': [], u'customFieldDelimiter': u',', u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False,
u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1,
u'useCopy': False, u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1,
u'name': u'VALUES', u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [],
u'outputFormat': u'table', u'nonDefaultLocation': u'/user/romain/customer_stats.csv', u'name': u'default.customer_stats',
u'tableFormat': u'text', 'ouputFormat': u'table',
Expand Down Expand Up @@ -371,7 +371,7 @@ def test_generate_create_kudu_table_with_data():
u'description': u'Big Data', u'primaryKeyObjects': [{u'operations': [], u'comment': u'', u'name': u'id', u'level': 0,
u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False,
u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}], u'customFieldDelimiter': u',',
u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, u'databaseName': u'default',
u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, u'useCopy': False, u'databaseName': u'default',
u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES',
u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [u'id'],
u'outputFormat': u'table', u'nonDefaultLocation': u'/user/admin/index_data.csv', u'name': u'index_data',
Expand Down Expand Up @@ -527,9 +527,9 @@ def test_generate_create_parquet_table():
'''"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},{"value":"csv","name":"Csv"},'''
'''{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},{"value":"orc",'''
'''"name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],"importData":true,'''
'''"isIceberg":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,'''
'''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",'''
'''"customRegexp":""}'''
'''"isIceberg":false,"useCopy":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv",'''
'''"hasHeader":true,"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002",'''
'''"customMapDelimiter":"\\\\003","customRegexp":""}'''
)

path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
Expand Down Expand Up @@ -619,9 +619,9 @@ def test_generate_create_iceberg_table():
'''"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},{"value":"csv","name":"Csv"},'''
'''{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},{"value":"orc",'''
'''"name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],"importData":true,'''
'''"isIceberg":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,'''
'''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",'''
'''"customRegexp":""}'''
'''"isIceberg":true,"useCopy":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv",'''
'''"hasHeader":true,"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002",'''
'''"customMapDelimiter":"\\\\003","customRegexp":""}'''
)

path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
Expand Down Expand Up @@ -715,7 +715,7 @@ def test_generate_create_orc_table_transactional():
'''{"value":"orc","name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],'''
'''"importData":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,'''
'''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",'''
'''"customRegexp":"","isIceberg":false}'''
'''"customRegexp":"","isIceberg":false,"useCopy":false}'''
)

path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
Expand Down Expand Up @@ -781,7 +781,7 @@ def test_generate_create_empty_kudu_table():
'''"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys": ["acct_client"],"primaryKeyObjects":[],"importData":false,'''
'''"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":false,"useCustomDelimiters":'''
'''false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003","customRegexp":"",'''
'''"isIceberg":false}'''
'''"isIceberg":false,"useCopy":false}'''
)

path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
Expand Down Expand Up @@ -899,8 +899,8 @@ def test_create_ddl_with_nonascii():
u'rdbmsSplitByColumn': [], u'existingTargetUrl': u'', u'channelSinkTypes':
[{u'name': u'This topic', u'value': u'kafka'}, {u'name': u'Solr', u'value': u'solr'},
{u'name': u'HDFS', u'value': u'hdfs'}], u'defaultName': u'default.renamed_chinese_cities_gb2312',
u'isTransactionalUpdateEnabled': False, u'importData': True, u'isIceberg': False, u'databaseName': u'default',
u'indexerRunJob': False, u'indexerReplicationFactor': 1, u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN':
u'isTransactionalUpdateEnabled': False, u'importData': True, u'isIceberg': False, u'useCopy': False, u'databaseName':
u'default', u'indexerRunJob': False, u'indexerReplicationFactor': 1, u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN':
{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=',
u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [], u'indexerConfigSet': u'',
u'sqoopJobLibPaths': [{u'path': u''}], u'outputFormat': u'table',
Expand Down
17 changes: 14 additions & 3 deletions desktop/libs/indexer/src/indexer/templates/importer.mako
Original file line number Diff line number Diff line change
Expand Up @@ -660,12 +660,12 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode }
<span data-bind="visible: showProperties">
<div class="control-group">
<label class="checkbox inline-block" data-bind="visible: tableFormat() != 'kudu'">
<input data-hue-analytics="importer:store-in-default-localtion-checkbox-interaction" type="checkbox" data-bind="checked: useDefaultLocation, disable: isIceberg"> ${_('Store in Default location')}
<input data-hue-analytics="importer:store-in-default-localtion-checkbox-interaction" type="checkbox" data-bind="checked: useDefaultLocation, disable: isIceberg() || useCopy()"> ${_('Store in Default location')}
</label>
</div>
<div class="control-group" data-bind="visible: isTransactionalVisible">
<label class="checkbox inline-block">
<input type="checkbox" data-hue-analytics="importer:is-transactional-checkbox-interaction" data-bind="checked: isTransactional, disable: isIceberg"> ${_('Transactional table')}
<input type="checkbox" data-hue-analytics="importer:is-transactional-checkbox-interaction" data-bind="checked: isTransactional, disable: isIceberg() || useCopy()"> ${_('Transactional table')}
</label>
<label class="checkbox inline-block" title="${_('Full transactional support available in Hive with ORC')}">
<input type="checkbox" data-bind="checked: isInsertOnly, enable: isTransactionalUpdateEnabled"> ${_('Insert only')}
Expand All @@ -690,6 +690,15 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode }
</label>
</div>
<div class="control-group" data-bind="visible: !useDefaultLocation() && !isTransactional() && $root.createWizard.source.inputFormat() === 'file'">
<label class="checkbox inline-block">
<input data-hue-analytics="importer:useCopy-checkbox-interaction" type="checkbox" data-bind="checked: useCopy"> ${_('Copy file')}
</label>
<a href="javascript:void(0)" style="display: inline" data-trigger="hover" data-toggle="popover" data-placement="right" rel="popover" title="${ _('Choosing this option will copy the file instead of moving it to the new location for the table importer, and ensuring the original file remains unchanged.') }">
<i class="fa fa-info-circle"></i>
</a>
</div>
<div class="control-group">
<label><div>${ _('Description') }</div>
<input type="text" class="form-control input-xxlarge" data-bind="value: description, valueUpdate: 'afterkeydown'" placeholder="${ _('Description') }">
Expand Down Expand Up @@ -2650,7 +2659,7 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode }
if ( ['avro', 'orc'].indexOf(self.tableFormat()) === -1 || vm.sourceType === 'impala') {
self.tableFormat('parquet');
}
}
}
else {
self.useDefaultLocation(true);
self.isTransactional(self.isTransactionalVisible());
Expand All @@ -2659,6 +2668,8 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode }
window.hueAnalytics.log('importer', 'is-iceberg/' + val);
});
self.useCopy = ko.observable(false);
self.hasHeader = ko.observable(false);
self.useCustomDelimiters = ko.observable(false);
Expand Down

0 comments on commit 115f487

Please sign in to comment.