%endif
diff --git a/desktop/core/src/desktop/templates/hue.mako b/desktop/core/src/desktop/templates/hue.mako
index 50dee07a29c..c012831c5f5 100644
--- a/desktop/core/src/desktop/templates/hue.mako
+++ b/desktop/core/src/desktop/templates/hue.mako
@@ -20,7 +20,7 @@
from desktop import conf
from desktop.auth.backend import is_admin
from desktop.conf import ENABLE_HUE_5, has_multi_clusters
- from desktop.lib.i18n import smart_unicode
+ from desktop.lib.i18n import smart_str
from desktop.models import hue_version
from desktop.views import _ko, commonshare, login_modal
from desktop.webpack_utils import get_hue_bundles
@@ -328,7 +328,7 @@ ${ hueAceAutocompleter.hueAceAutocompleter() }
${ commonHeaderFooterComponents.header_pollers(user, is_s3_enabled, apps) }
% if request is not None:
-${ smart_unicode(login_modal(request).content) | n,unicode }
+${ smart_str(login_modal(request).content) | n,unicode }
% endif
diff --git a/desktop/core/src/desktop/templates/ko_editor.mako b/desktop/core/src/desktop/templates/ko_editor.mako
index ddd96dd39d8..6296fec1639 100644
--- a/desktop/core/src/desktop/templates/ko_editor.mako
+++ b/desktop/core/src/desktop/templates/ko_editor.mako
@@ -17,7 +17,6 @@
<%!
import sys
from desktop import conf
-from desktop.lib.i18n import smart_unicode
from desktop.views import _ko
if sys.version_info[0] > 2:
diff --git a/desktop/core/src/desktop/templates/ko_metastore.mako b/desktop/core/src/desktop/templates/ko_metastore.mako
index 8712a29f176..8c66d65d514 100644
--- a/desktop/core/src/desktop/templates/ko_metastore.mako
+++ b/desktop/core/src/desktop/templates/ko_metastore.mako
@@ -17,7 +17,6 @@
<%!
import sys
from desktop import conf
-from desktop.lib.i18n import smart_unicode
if sys.version_info[0] > 2:
from django.utils.translation import gettext as _
else:
diff --git a/desktop/core/src/desktop/templates/logs.mako b/desktop/core/src/desktop/templates/logs.mako
index a059e8ebc2f..f9394d680c5 100644
--- a/desktop/core/src/desktop/templates/logs.mako
+++ b/desktop/core/src/desktop/templates/logs.mako
@@ -18,7 +18,7 @@ import re
import sys
from desktop.lib.conf import BoundConfig
-from desktop.lib.i18n import smart_unicode
+from desktop.lib.i18n import smart_str
from desktop.views import commonheader, commonfooter
if sys.version_info[0] > 2:
@@ -101,7 +101,7 @@ ${ layout.menubar(section='log_view') }
diff --git a/desktop/core/src/desktop/templates/popup_error.mako b/desktop/core/src/desktop/templates/popup_error.mako
index eebe7d2c0cb..2b38c8338fd 100644
--- a/desktop/core/src/desktop/templates/popup_error.mako
+++ b/desktop/core/src/desktop/templates/popup_error.mako
@@ -18,7 +18,7 @@
import sys
from desktop.views import commonheader, commonfooter
-from desktop.lib.i18n import smart_unicode
+from desktop.lib.i18n import smart_str
from desktop.auth.backend import is_admin
if sys.version_info[0] > 2:
@@ -38,9 +38,9 @@ ${ commonheader(title, "", user, request, "40px") | n,unicode }
-
${ smart_unicode(message) }
+
${ smart_str(message) }
% if detail:
-
${ smart_unicode(detail) }
+
${ smart_str(detail) }
% endif
@@ -65,9 +65,9 @@ ${ commonheader(title, "", user, request, "40px") | n,unicode }
% for (file_name, line_number, function_name, text) in traceback:
- ${smart_unicode(file_name) or ""} |
- ${smart_unicode(line_number) or ""} |
- ${smart_unicode(function_name) or ""} |
+ ${smart_str(file_name) or ""} |
+ ${smart_str(line_number) or ""} |
+ ${smart_str(function_name) or ""} |
% endfor
diff --git a/desktop/core/src/desktop/test_data/hue_5.10.db b/desktop/core/src/desktop/test_data/hue_5.10.db
index b89f25024ea..cf0179c357a 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.10.db and b/desktop/core/src/desktop/test_data/hue_5.10.db differ
diff --git a/desktop/core/src/desktop/test_data/hue_5.11.db b/desktop/core/src/desktop/test_data/hue_5.11.db
index 9c5c99c5f14..f4ee9c6b5dd 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.11.db and b/desktop/core/src/desktop/test_data/hue_5.11.db differ
diff --git a/desktop/core/src/desktop/test_data/hue_5.12.db b/desktop/core/src/desktop/test_data/hue_5.12.db
index e380f0f20f7..7abc31a993d 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.12.db and b/desktop/core/src/desktop/test_data/hue_5.12.db differ
diff --git a/desktop/core/src/desktop/test_data/hue_5.13.db b/desktop/core/src/desktop/test_data/hue_5.13.db
index 1ae8fd78ca2..92599865759 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.13.db and b/desktop/core/src/desktop/test_data/hue_5.13.db differ
diff --git a/desktop/core/src/desktop/test_data/hue_5.14.db b/desktop/core/src/desktop/test_data/hue_5.14.db
index af0075caa63..fad3211eb6c 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.14.db and b/desktop/core/src/desktop/test_data/hue_5.14.db differ
diff --git a/desktop/core/src/desktop/test_data/hue_5.15.db b/desktop/core/src/desktop/test_data/hue_5.15.db
index ff640ec121e..14c1f0a1629 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.15.db and b/desktop/core/src/desktop/test_data/hue_5.15.db differ
diff --git a/desktop/core/src/desktop/test_data/hue_5.7.db b/desktop/core/src/desktop/test_data/hue_5.7.db
index 6ed7c12f57a..efa6a19caed 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.7.db and b/desktop/core/src/desktop/test_data/hue_5.7.db differ
diff --git a/desktop/core/src/desktop/test_data/hue_5.8.db b/desktop/core/src/desktop/test_data/hue_5.8.db
index f5d90beab0d..f8224c236e4 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.8.db and b/desktop/core/src/desktop/test_data/hue_5.8.db differ
diff --git a/desktop/core/src/desktop/test_data/hue_5.9.db b/desktop/core/src/desktop/test_data/hue_5.9.db
index 81f83f62768..f97346c362f 100644
Binary files a/desktop/core/src/desktop/test_data/hue_5.9.db and b/desktop/core/src/desktop/test_data/hue_5.9.db differ
diff --git a/desktop/core/src/desktop/tests.py b/desktop/core/src/desktop/tests.py
index 52b3a634105..e367d992110 100644
--- a/desktop/core/src/desktop/tests.py
+++ b/desktop/core/src/desktop/tests.py
@@ -1455,7 +1455,7 @@ def test_db_migrations_sqlite():
'PASSWORD': '',
'HOST': '',
'PORT': '',
- 'OPTIONS': {} if sys.version_info[0] > 2 else '',
+ 'OPTIONS': {},
'ATOMIC_REQUESTS': True,
'CONN_MAX_AGE': 0,
}
diff --git a/desktop/core/src/desktop/views.py b/desktop/core/src/desktop/views.py
index c033cacd2cb..7a14bd113dc 100644
--- a/desktop/core/src/desktop/views.py
+++ b/desktop/core/src/desktop/views.py
@@ -343,9 +343,9 @@ def download_log_view(request):
# in case it is rather big. So we write it to a file line by line
# and pass that file to zipfile, which might follow a more efficient path.
tmp = tempfile.NamedTemporaryFile()
- log_tmp = tempfile.NamedTemporaryFile("w+t") if sys.version_info[0] == 2 else tempfile.NamedTemporaryFile("w+t", encoding='utf-8')
+ log_tmp = tempfile.NamedTemporaryFile("w+t", encoding='utf-8')
for line in buffer:
- log_tmp.write(smart_str(l, errors='replace'))
+ log_tmp.write(smart_str(line, errors='replace'))
# This is not just for show - w/out flush, we often get truncated logs
log_tmp.flush()
t = time.time()
@@ -530,7 +530,7 @@ def serve_500_error(request, *args, **kwargs):
else:
tb = traceback.extract_tb(exc_info[2])
if is_ajax(request):
- tb = '\n'.join(tb.format() if sys.version_info[0] > 2 else [str(t) for t in tb])
+ tb = '\n'.join(tb.format())
return render("500.mako", request, {'traceback': tb})
else:
# exc_info could be empty
diff --git a/desktop/libs/aws/src/aws/conf.py b/desktop/libs/aws/src/aws/conf.py
index f72158e3e9d..99a7906289b 100644
--- a/desktop/libs/aws/src/aws/conf.py
+++ b/desktop/libs/aws/src/aws/conf.py
@@ -16,21 +16,15 @@
import os
import re
-import sys
import logging
import requests
+from django.utils.translation import gettext as _t, gettext_lazy as _
from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool, coerce_password_from_script
from desktop.lib.idbroker import conf as conf_idbroker
from hadoop.core_site import get_raz_api_url, get_raz_s3_default_bucket, get_s3a_access_key, get_s3a_secret_key, get_s3a_session_token
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _t, gettext_lazy as _
-else:
- from django.utils.translation import ugettext as _t, ugettext_lazy as _
-
-
LOG = logging.getLogger()
diff --git a/desktop/libs/aws/src/aws/conf_tests.py b/desktop/libs/aws/src/aws/conf_tests.py
index f8ad9f99621..be23732095c 100644
--- a/desktop/libs/aws/src/aws/conf_tests.py
+++ b/desktop/libs/aws/src/aws/conf_tests.py
@@ -13,26 +13,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import absolute_import
import logging
-import pytest
-import sys
-import unittest
-from aws import conf
+import pytest
from django.test import TestCase
+from aws import conf
from desktop.conf import RAZ
from desktop.lib.django_test_util import make_logged_in_client
-
from useradmin.models import User
-if sys.version_info[0] > 2:
- from unittest.mock import patch
-else:
- from mock import patch
-
LOG = logging.getLogger()
diff --git a/desktop/libs/aws/src/aws/s3/s3connection.py b/desktop/libs/aws/src/aws/s3/s3connection.py
index a5dba706683..5d6d6578d76 100644
--- a/desktop/libs/aws/src/aws/s3/s3connection.py
+++ b/desktop/libs/aws/src/aws/s3/s3connection.py
@@ -14,31 +14,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import boto
import logging
-import requests
-import sys
-import xml.sax
-
-if sys.version_info[0] > 2:
- from urllib.parse import unquote, urlparse as lib_urlparse, parse_qs, urlencode
-else:
- from urllib import unquote, urlencode
- from urlparse import urlparse as lib_urlparse, parse_qs
+from urllib.parse import parse_qs, unquote, urlencode
+import boto
from boto.connection import HTTPRequest
from boto.exception import BotoClientError
from boto.regioninfo import connect
from boto.resultset import ResultSet
from boto.s3 import S3RegionInfo
from boto.s3.bucket import Bucket, Key
-from boto.s3.connection import S3Connection, NoHostProvided
+from boto.s3.connection import NoHostProvided, S3Connection
from boto.s3.prefix import Prefix
from desktop.conf import RAZ
from desktop.lib.raz.clients import S3RazClient
-
LOG = logging.getLogger()
@@ -92,7 +83,6 @@ def __init__(self, username, aws_access_key_id=None, aws_secret_access_key=None,
suppress_consec_slashes=suppress_consec_slashes, anon=anon,
validate_certs=validate_certs, profile_name=profile_name)
-
def make_request(self, method, bucket='', key='', headers=None, data='',
query_args=None, sender=None, override_num_retries=None,
retry_handler=None):
@@ -113,7 +103,7 @@ def make_request(self, method, bucket='', key='', headers=None, data='',
if query_args:
# Clean prefix to remove s3a%3A//[S3_BUCKET]/ for sending correct relative path to RAZ
if 'prefix=s3a%3A//' in query_args:
- qs_parsed = parse_qs(query_args) # all strings will be unquoted
+ qs_parsed = parse_qs(query_args) # all strings will be unquoted
prefix_relative_path = qs_parsed['prefix'][0].partition(bucket + '/')[2]
qs_parsed['prefix'][0] = prefix_relative_path
@@ -149,13 +139,11 @@ def make_request(self, method, bucket='', key='', headers=None, data='',
return self._mexe(http_request, sender, override_num_retries,
retry_handler=retry_handler)
-
def get_signed_url(self, action='GET', url=None, headers=None, data=None):
raz_client = S3RazClient(username=self.username)
return raz_client.get_url(action, url, headers, data)
-
def _required_auth_capability(self):
"""
Force AnonAuthHandler when Raz is enabled
diff --git a/desktop/libs/aws/src/aws/s3/s3connection_test.py b/desktop/libs/aws/src/aws/s3/s3connection_test.py
index bdf573a1cda..0c958b0fdfc 100644
--- a/desktop/libs/aws/src/aws/s3/s3connection_test.py
+++ b/desktop/libs/aws/src/aws/s3/s3connection_test.py
@@ -14,22 +14,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-import requests
-import six
import sys
+import logging
+from unittest.mock import Mock, patch
-from desktop.conf import RAZ
+import six
+import requests
from aws.client import _make_client
from aws.s3.s3connection import RazS3Connection
from aws.s3.s3test_utils import S3TestBase
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock
-else:
- from mock import patch, Mock
-
+from desktop.conf import RAZ
LOG = logging.getLogger()
@@ -68,9 +63,7 @@ def test_list_buckets(self):
raise SkipTest() # Incorrect in Py3 CircleCi
assert 'GET' == http_request.method
- assert (
- ('s3-us-west-1.amazonaws.com:443' if sys.version_info[0] > 2 else 's3-us-west-1.amazonaws.com') ==
- http_request.host)
+ assert 's3-us-west-1.amazonaws.com:443' == http_request.host
assert '/' == http_request.path
assert '/' == http_request.auth_path
assert ({
diff --git a/desktop/libs/aws/src/aws/s3/s3fs.py b/desktop/libs/aws/src/aws/s3/s3fs.py
index 2718e7f6403..c890c2a18c7 100644
--- a/desktop/libs/aws/src/aws/s3/s3fs.py
+++ b/desktop/libs/aws/src/aws/s3/s3fs.py
@@ -14,21 +14,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import absolute_import
-
import os
import re
-import sys
import time
import logging
import itertools
import posixpath
+import urllib.error
+import urllib.request
from builtins import object, str
+from urllib.parse import urlparse as lib_urlparse
from boto.exception import BotoClientError, S3ResponseError
from boto.s3.connection import Location
from boto.s3.key import Key
from boto.s3.prefix import Prefix
+from django.utils.translation import gettext as _
from aws import s3
from aws.conf import AWS_ACCOUNTS, PERMISSION_ACTION_S3, get_default_region, get_locations, is_raz_s3
@@ -36,18 +37,6 @@
from aws.s3.s3stat import S3Stat
from filebrowser.conf import REMOTE_STORAGE_HOME
-if sys.version_info[0] > 2:
- import urllib.error
- import urllib.request
- from urllib.parse import quote as urllib_quote, urlparse as lib_urlparse
-
- from django.utils.translation import gettext as _
-else:
- from urllib import quote as urllib_quote
-
- from django.utils.translation import ugettext as _
- from urlparse import urlparse as lib_urlparse
-
DEFAULT_READ_SIZE = 1024 * 1024 # 1MB
BUCKET_NAME_PATTERN = re.compile(
r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$")
diff --git a/desktop/libs/aws/src/aws/s3/upload.py b/desktop/libs/aws/src/aws/s3/upload.py
index 9e2edc34e22..e3df72d38a7 100644
--- a/desktop/libs/aws/src/aws/s3/upload.py
+++ b/desktop/libs/aws/src/aws/s3/upload.py
@@ -21,13 +21,13 @@
See http://docs.djangoproject.com/en/1.9/topics/http/file-uploads/
"""
-import io
-import sys
import logging
import unicodedata
+from io import BytesIO as stream_io
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.files.uploadhandler import FileUploadHandler, SkipFile, StopFutureHandlers, StopUpload, UploadFileException
+from django.utils.translation import gettext as _
from aws.s3 import parse_uri
from aws.s3.s3fs import S3FileSystemException
@@ -36,16 +36,6 @@
from desktop.lib.fsmanager import get_client
from filebrowser.utils import calculate_total_size, generate_chunks
-if sys.version_info[0] > 2:
- from io import BytesIO as stream_io
-else:
- from cStringIO import StringIO as stream_io
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
DEFAULT_WRITE_SIZE = 1024 * 1024 * 128 # TODO: set in configuration (currently 128 MiB)
LOG = logging.getLogger()
diff --git a/desktop/libs/azure/src/azure/abfs/abfs.py b/desktop/libs/azure/src/azure/abfs/abfs.py
index 6c963e7dbc2..6a22f640a67 100644
--- a/desktop/libs/azure/src/azure/abfs/abfs.py
+++ b/desktop/libs/azure/src/azure/abfs/abfs.py
@@ -20,8 +20,6 @@
"""
import os
-import re
-import sys
import logging
import threading
import urllib.error
@@ -585,9 +583,6 @@ def rename(self, old, new):
Renames a file
"""
rename_source = Init_ABFS.strip_scheme(old)
- if sys.version_info[0] < 3 and isinstance(rename_source, unicode):
- rename_source = rename_source.encode('utf-8')
-
headers = {'x-ms-rename-source': '/' + urllib_quote(rename_source)}
try:
@@ -660,7 +655,7 @@ def _local_copy_file(self, local_src, remote_dst, chunk_size=UPLOAD_CHUCK_SIZE):
offset += size
chunk = src.read(chunk_size)
self.flush(remote_dst, params={'position': offset})
- except:
+ except Exception:
LOG.exception(_('Copying %s -> %s failed.') % (local_src, remote_dst))
raise
finally:
diff --git a/desktop/libs/azure/src/azure/abfs/upload.py b/desktop/libs/azure/src/azure/abfs/upload.py
index 730e58a505f..f5c76390f5e 100644
--- a/desktop/libs/azure/src/azure/abfs/upload.py
+++ b/desktop/libs/azure/src/azure/abfs/upload.py
@@ -14,12 +14,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import sys
import logging
import unicodedata
+from io import StringIO as string_io
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.files.uploadhandler import FileUploadHandler, SkipFile, StopFutureHandlers, StopUpload, UploadFileException
+from django.utils.translation import gettext as _
from azure.abfs.__init__ import parse_uri
from azure.abfs.abfs import ABFSFileSystemException
@@ -28,16 +29,6 @@
from desktop.lib.fsmanager import get_client
from filebrowser.utils import calculate_total_size, generate_chunks
-if sys.version_info[0] > 2:
- from io import BytesIO, StringIO as string_io
-else:
- from cStringIO import StringIO as string_io
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
DEFAULT_WRITE_SIZE = 100 * 1024 * 1024 # As per Azure doc, maximum blob size is 100MB
LOG = logging.getLogger()
diff --git a/desktop/libs/azure/src/azure/conf.py b/desktop/libs/azure/src/azure/conf.py
index 5bcdf051b5f..858f267bc13 100644
--- a/desktop/libs/azure/src/azure/conf.py
+++ b/desktop/libs/azure/src/azure/conf.py
@@ -18,15 +18,12 @@
import sys
import logging
+from django.utils.translation import gettext_lazy as _t
+
from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool, coerce_password_from_script
from desktop.lib.idbroker import conf as conf_idbroker
from hadoop import core_site
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t
-else:
- from django.utils.translation import ugettext_lazy as _t
-
LOG = logging.getLogger()
PERMISSION_ACTION_ABFS = "abfs_access"
diff --git a/desktop/libs/dashboard/src/dashboard/api.py b/desktop/libs/dashboard/src/dashboard/api.py
index 12f594ad557..2f94fc8970c 100644
--- a/desktop/libs/dashboard/src/dashboard/api.py
+++ b/desktop/libs/dashboard/src/dashboard/api.py
@@ -15,42 +15,39 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import filter
-import hashlib
import json
-import logging
-import sys
import uuid
+import hashlib
+import logging
+
+from django.utils.encoding import force_str
+from django.utils.translation import gettext as _
+from dashboard.conf import USE_GRIDSTER, get_engines
+from dashboard.controller import can_edit_index
+from dashboard.dashboard_api import get_engine
+from dashboard.data_export import download as export_download
+from dashboard.decorators import allow_viewer_only
+from dashboard.facet_builder import _guess_gap, _new_range_facet, _zoom_range_facet
+from dashboard.models import (
+ COMPARE_FACET,
+ NESTED_FACET_FORM,
+ QUERY_FACET,
+ Collection2,
+ augment_solr_exception,
+ augment_solr_response,
+ extract_solr_exception_message,
+ pairwise2,
+)
from desktop.conf import ENABLE_DOWNLOAD
from desktop.lib.django_util import JsonResponse
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.rest.http_client import RestException
from desktop.models import Document2
from desktop.views import serve_403_error
-
from libsolr.api import SolrApi
-
-from notebook.connectors.base import get_api
-from notebook.dashboard_api import MockRequest
from search.conf import SOLR_URL
-from dashboard.conf import get_engines, USE_GRIDSTER
-from dashboard.controller import can_edit_index
-from dashboard.dashboard_api import get_engine
-from dashboard.data_export import download as export_download
-from dashboard.decorators import allow_viewer_only
-from dashboard.facet_builder import _guess_gap, _zoom_range_facet, _new_range_facet
-from dashboard.models import Collection2, augment_solr_response, pairwise2, augment_solr_exception,\
- NESTED_FACET_FORM, COMPARE_FACET, QUERY_FACET, extract_solr_exception_message
-
-if sys.version_info[0] > 2:
- from django.utils.encoding import force_str
- from django.utils.translation import gettext as _
-else:
- from django.utils.encoding import force_unicode as force_str
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
@@ -122,12 +119,12 @@ def index_fields_dynamic(request):
result['message'] = ''
result['fields'] = [
- Collection2._make_field(name, properties)
- for name, properties in dynamic_fields['fields'].items() if 'dynamicBase' in properties
+ Collection2._make_field(name, properties) for name, properties in dynamic_fields['fields'].items() if 'dynamicBase' in properties
]
result['gridlayout_header_fields'] = [
- Collection2._make_gridlayout_header_field({'name': name, 'type': properties.get('type')}, True)
- for name, properties in dynamic_fields['fields'].items() if 'dynamicBase' in properties
+ Collection2._make_gridlayout_header_field({'name': name, 'type': properties.get('type')}, True)
+ for name, properties in dynamic_fields['fields'].items()
+ if 'dynamicBase' in properties
]
result['status'] = 0
except Exception as e:
@@ -197,9 +194,9 @@ def update_document(request):
if document['hasChanged']:
edits = {
- "id": doc_id,
+ "id": doc_id,
}
- version = None # If there is a version, use it to avoid potential concurrent update conflicts
+ version = None # If there is a version, use it to avoid potential concurrent update conflicts
for field in document['details']:
if field['hasChanged'] and field['key'] != '_version_':
@@ -207,7 +204,9 @@ def update_document(request):
if field['key'] == '_version_':
version = field['value']
- result['update'] = SolrApi(SOLR_URL.get(), request.user).update(collection['name'], json.dumps([edits]), content_type='json', version=version)
+ result['update'] = SolrApi(SOLR_URL.get(), request.user).update(
+ collection['name'], json.dumps([edits]), content_type='json', version=version
+ )
result['message'] = _('Document successfully updated.')
result['status'] = 0
else:
@@ -216,7 +215,7 @@ def update_document(request):
except RestException as e:
try:
result['message'] = json.loads(e.message)['error']['msg']
- except:
+ except Exception:
LOG.exception('Failed to parse json response')
result['message'] = force_str(e)
except Exception as e:
@@ -271,7 +270,7 @@ def get_terms(request):
# maxcount
}
if analysis['terms']['prefix']:
- properties['terms.regex'] = '.*%(prefix)s.*' % analysis['terms'] # Use regexp instead of case sensitive 'terms.prefix'
+ properties['terms.regex'] = '.*%(prefix)s.*' % analysis['terms'] # Use regexp instead of case sensitive 'terms.prefix'
properties['terms.regex.flag'] = 'case_insensitive'
result['terms'] = SolrApi(SOLR_URL.get(), request.user).terms(collection['name'], field, properties)
@@ -380,7 +379,6 @@ def new_facet(request):
widget_type = request.POST.get('widget_type')
window_size = request.POST.get('window_size')
-
result['message'] = ''
result['facet'] = _create_facet(collection, request.user, facet_id, facet_label, facet_field, widget_type, window_size)
result['status'] = 0
@@ -400,35 +398,37 @@ def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_t
'missing': False,
'isDate': False,
'slot': 0,
- 'aggregate': {'function': 'unique', 'formula': '', 'plain_formula': '', 'percentile': 50}
+ 'aggregate': {'function': 'unique', 'formula': '', 'plain_formula': '', 'percentile': 50},
}
template = {
- "showFieldList": True,
- "showGrid": False,
- "showChart": True,
- "chartSettings" : {
- 'chartType': 'pie' if widget_type == 'pie2-widget' else ('timeline' if widget_type == 'timeline-widget' else ('gradientmap' if widget_type == 'gradient-map-widget' else 'bars')),
- 'chartSorting': 'none',
- 'chartScatterGroup': None,
- 'chartScatterSize': None,
- 'chartScope': 'world',
- 'chartX': None,
- 'chartYSingle': None,
- 'chartYMulti': [],
- 'chartData': [],
- 'chartMapLabel': None,
- 'chartSelectorType': 'bar'
- },
- "fieldsAttributes": [],
- "fieldsAttributesFilter": "",
- "filteredAttributeFieldsAll": True,
- "fields": [],
- "fieldsSelected": [],
- "leafletmap": {'latitudeField': None, 'longitudeField': None, 'labelField': None}, # Use own?
- 'leafletmapOn': False,
- 'isGridLayout': False,
- "hasDataForChart": True,
- "rows": 25,
+ "showFieldList": True,
+ "showGrid": False,
+ "showChart": True,
+ "chartSettings": {
+ 'chartType': 'pie'
+ if widget_type == 'pie2-widget'
+ else ('timeline' if widget_type == 'timeline-widget' else ('gradientmap' if widget_type == 'gradient-map-widget' else 'bars')),
+ 'chartSorting': 'none',
+ 'chartScatterGroup': None,
+ 'chartScatterSize': None,
+ 'chartScope': 'world',
+ 'chartX': None,
+ 'chartYSingle': None,
+ 'chartYMulti': [],
+ 'chartData': [],
+ 'chartMapLabel': None,
+ 'chartSelectorType': 'bar',
+ },
+ "fieldsAttributes": [],
+ "fieldsAttributesFilter": "",
+ "filteredAttributeFieldsAll": True,
+ "fields": [],
+ "fieldsSelected": [],
+ "leafletmap": {'latitudeField': None, 'longitudeField': None, 'labelField': None}, # Use own?
+ 'leafletmapOn': False,
+ 'isGridLayout': False,
+ "hasDataForChart": True,
+ "rows": 25,
}
if widget_type in ('tree-widget', 'heatmap-widget', 'map-widget'):
facet_type = 'pivot'
@@ -438,14 +438,27 @@ def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_t
properties['statementUuid'] = collection['selectedDocument'].get('uuid')
doc = Document2.objects.get_by_uuid(user=user, uuid=collection['selectedDocument']['uuid'], perm_type='read')
snippets = doc.data_dict.get('snippets', [])
- properties['result'] = {'handle': {'statement_id': 0, 'statements_count': 1, 'previous_statement_hash': hashlib.sha224(str(uuid.uuid4())).hexdigest()}}
+ properties['result'] = {
+ 'handle': {'statement_id': 0, 'statements_count': 1, 'previous_statement_hash': hashlib.sha224(str(uuid.uuid4())).hexdigest()}
+ }
if snippets:
properties['engine'] = snippets[0]['type']
else:
properties['statementUuid'] = ''
properties['statement'] = ''
properties['uuid'] = facet_field
- properties['facets'] = [{'canRange': False, 'field': 'blank', 'limit': 10, 'mincount': 0, 'sort': 'desc', 'aggregate': {'function': 'count'}, 'isDate': False, 'type': 'field'}]
+ properties['facets'] = [
+ {
+ 'canRange': False,
+ 'field': 'blank',
+ 'limit': 10,
+ 'mincount': 0,
+ 'sort': 'desc',
+ 'aggregate': {'function': 'count'},
+ 'isDate': False,
+ 'type': 'field',
+ }
+ ]
facet_type = 'statement'
else:
api = get_engine(user, collection)
@@ -460,7 +473,15 @@ def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_t
else:
facet_type = 'field'
- if widget_type in ('bucket-widget', 'pie2-widget', 'timeline-widget', 'tree2-widget', 'text-facet-widget', 'hit-widget', 'gradient-map-widget'):
+ if widget_type in (
+ 'bucket-widget',
+ 'pie2-widget',
+ 'timeline-widget',
+ 'tree2-widget',
+ 'text-facet-widget',
+ 'hit-widget',
+ 'gradient-map-widget',
+ ):
# properties = {'canRange': False, 'stacked': False, 'limit': 10} # TODO: Lighter weight top nested facet
properties['facets_form'] = NESTED_FACET_FORM
@@ -546,7 +567,7 @@ def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_t
'properties': properties,
# Hue 4+
'template': template,
- 'queryResult': {}
+ 'queryResult': {},
}
@@ -564,7 +585,7 @@ def get_range_facet(request):
if action == 'select':
properties = _guess_gap(solr_api, collection, facet, facet['properties']['start'], facet['properties']['end'])
else:
- properties = _zoom_range_facet(solr_api, collection, facet) # Zoom out
+ properties = _zoom_range_facet(solr_api, collection, facet) # Zoom out
result['properties'] = properties
result['status'] = 0
diff --git a/desktop/libs/dashboard/src/dashboard/conf.py b/desktop/libs/dashboard/src/dashboard/conf.py
index d262655b3fa..a8562ae8875 100644
--- a/desktop/libs/dashboard/src/dashboard/conf.py
+++ b/desktop/libs/dashboard/src/dashboard/conf.py
@@ -17,16 +17,12 @@
import sys
-from desktop.lib.conf import Config, UnspecifiedConfigSection, ConfigSection, coerce_bool
-from desktop.appmanager import get_apps_dict
+from django.utils.translation import gettext as _, gettext_lazy as _t
+from desktop.appmanager import get_apps_dict
+from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool
from notebook.conf import get_ordered_interpreters
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _, gettext_lazy as _t
-else:
- from django.utils.translation import ugettext as _, ugettext_lazy as _t
-
def is_enabled():
"""Automatic when search is enabled."""
@@ -91,6 +87,7 @@ def is_enabled():
default=False
)
+
def get_properties():
if ENGINES.get():
engines = ENGINES.get()
@@ -112,6 +109,7 @@ def get_properties():
},
}
+
def get_engines(user):
engines = []
apps = get_apps_dict(user=user)
@@ -139,7 +137,6 @@ def get_engines(user):
return engines
-
ENGINES = UnspecifiedConfigSection(
"engines",
help="One entry for each type of snippet.",
diff --git a/desktop/libs/dashboard/src/dashboard/decorators.py b/desktop/libs/dashboard/src/dashboard/decorators.py
index 337dde06752..116d62caca8 100644
--- a/desktop/libs/dashboard/src/dashboard/decorators.py
+++ b/desktop/libs/dashboard/src/dashboard/decorators.py
@@ -15,21 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-import json
import sys
+import json
+import logging
from django.utils.functional import wraps
+from django.utils.translation import gettext as _
from desktop.conf import USE_NEW_EDITOR
from desktop.lib.exceptions_renderable import PopupException
from desktop.models import Document2
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
diff --git a/desktop/libs/dashboard/src/dashboard/facet_builder.py b/desktop/libs/dashboard/src/dashboard/facet_builder.py
index 42303889f2c..72aea6d2d4f 100644
--- a/desktop/libs/dashboard/src/dashboard/facet_builder.py
+++ b/desktop/libs/dashboard/src/dashboard/facet_builder.py
@@ -16,30 +16,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import division, print_function
-from future import standard_library
-standard_library.install_aliases()
-
-from builtins import str, range
-
-import logging
+import re
import math
+import logging
import numbers
-import re
-import sys
-import urllib.request, urllib.parse, urllib.error
-
+import urllib.error
+import urllib.parse
+import urllib.request
from datetime import datetime, timedelta
-from math import ceil
-from math import log
+from math import ceil, log
from time import mktime
-from dateutil.relativedelta import *
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from dateutil.relativedelta import *
+from django.utils.translation import gettext as _
LOG = logging.getLogger()
@@ -59,17 +48,9 @@
'DAYS': DAY_MS,
'WEEKS': WEEK_MS,
'MONTHS': MONTH_MS,
- 'YEARS': YEAR_MS
-}
-TIME_INTERVALS_STARTING_VALUE = {
- 'microsecond': 0,
- 'second': 0,
- 'minute': 0,
- 'hour': 0,
- 'day': 1,
- 'month': 1,
- 'year': 0
+ 'YEARS': YEAR_MS,
}
+TIME_INTERVALS_STARTING_VALUE = {'microsecond': 0, 'second': 0, 'minute': 0, 'hour': 0, 'day': 1, 'month': 1, 'year': 0}
TIME_INTERVAL_SORTED = ['microsecond', 'second', 'minute', 'hour', 'day', 'month']
TIME_INTERVALS = [
{'coeff': 1, 'unit': 'SECONDS'},
@@ -97,20 +78,24 @@
{'coeff': 1, 'unit': 'MONTHS'},
{'coeff': 3, 'unit': 'MONTHS'},
{'coeff': 6, 'unit': 'MONTHS'},
- {'coeff': 1, 'unit': 'YEARS'}];
+ {'coeff': 1, 'unit': 'YEARS'},
+]
for interval in TIME_INTERVALS:
interval['ms'] = TIME_INTERVALS_MS[interval['unit']] * interval['coeff']
+
def utf_quoter(what):
return urllib.parse.quote(str(what).encode('utf-8'), safe='~@#$&()*!+=:;,.?/\'')
-def _guess_range_facet(widget_type, solr_api, collection, facet_field, properties, start=None, end=None, gap=None, window_size=None, slot = 0):
+def _guess_range_facet(
+ widget_type, solr_api, collection, facet_field, properties, start=None, end=None, gap=None, window_size=None, slot=0
+):
try:
stats_json = solr_api.stats(collection['name'], [facet_field])
stat_facet = stats_json['stats']['stats_fields'][facet_field]
- _compute_range_facet(widget_type, stat_facet, properties, start, end, gap, window_size = window_size, SLOTS = slot)
+ _compute_range_facet(widget_type, stat_facet, properties, start, end, gap, window_size=window_size, SLOTS=slot)
except Exception as e:
print(e)
LOG.info('Stats not supported on all the fields, like text: %s' % e)
@@ -120,7 +105,7 @@ def _get_interval(domain_ms, SLOTS):
biggest_interval = TIME_INTERVALS[len(TIME_INTERVALS) - 1]
biggest_interval_is_too_small = math.floor(domain_ms / biggest_interval['ms']) > SLOTS
if biggest_interval_is_too_small:
- coeff = min(ceil(math.floor(domain_ms / SLOTS)), 100) # If we go over 100 years, something has gone wrong.
+ coeff = min(ceil(math.floor(domain_ms / SLOTS)), 100) # If we go over 100 years, something has gone wrong.
return {'ms': YEAR_MS * coeff, 'coeff': coeff, 'unit': 'YEARS'}
for i in range(len(TIME_INTERVALS) - 2, 0, -1):
@@ -130,11 +115,13 @@ def _get_interval(domain_ms, SLOTS):
return TIME_INTERVALS[0]
+
def _format_interval(interval):
return '+' + str(interval['coeff']) + interval['unit']
+
def _get_interval_duration(text):
- regex = re.search('.*-(\d*)(.*)', text)
+ regex = re.search(r'.*-(\d*)(.*)', text)
if regex:
groups = regex.groups()
@@ -142,6 +129,7 @@ def _get_interval_duration(text):
return TIME_INTERVALS_MS[groups[1]] * int(groups[0])
return 0
+
def _clamp_date(interval, time):
gap_duration_lower = interval['unit'].lower()
gap_duration_lowers = gap_duration_lower[:-1] # Removes 's'
@@ -153,6 +141,7 @@ def _clamp_date(interval, time):
break
return time
+
def _get_next_interval(interval, start_time, do_at_least_once):
time = start_time
if interval.get('start_relative'):
@@ -161,107 +150,119 @@ def _get_next_interval(interval, start_time, do_at_least_once):
gap_duration_lower = interval['unit'].lower()
gap_duration_lowers = gap_duration_lower[:-1] # Removes 's'
gap_duration = int(interval['coeff'])
- while (getattr(time, gap_duration_lowers) - TIME_INTERVALS_STARTING_VALUE[gap_duration_lowers]) % gap_duration or (do_at_least_once and time == start_time): # Do while
+ while (getattr(time, gap_duration_lowers) - TIME_INTERVALS_STARTING_VALUE[gap_duration_lowers]) % gap_duration or (
+ do_at_least_once and time == start_time
+ ): # Do while
kwargs = {gap_duration_lower: 1}
time = time + relativedelta(**kwargs)
return time
+
def _remove_duration(interval, nb_slot, time):
gap_duration_lower = interval['unit'].lower()
gap_duration = int(interval['coeff']) * nb_slot
kwargs = {gap_duration_lower: -1 * gap_duration}
return time + relativedelta(**kwargs)
+
def _compute_range_facet(widget_type, stat_facet, properties, start=None, end=None, gap=None, SLOTS=0, window_size=None):
- if SLOTS == 0:
- if widget_type == 'pie-widget' or widget_type == 'pie2-widget':
- SLOTS = 5
- elif widget_type == 'facet-widget' or widget_type == 'text-facet-widget' or widget_type == 'histogram-widget' or widget_type == 'bar-widget' or widget_type == 'bucket-widget' or widget_type == 'timeline-widget':
- if window_size:
- SLOTS = math.floor(int(window_size) / 75) # Value is determined as the thinnest space required to display a timestamp on x axis
- else:
- SLOTS = 10
+ if SLOTS == 0:
+ if widget_type == 'pie-widget' or widget_type == 'pie2-widget':
+ SLOTS = 5
+ elif (
+ widget_type == 'facet-widget'
+ or widget_type == 'text-facet-widget'
+ or widget_type == 'histogram-widget'
+ or widget_type == 'bar-widget'
+ or widget_type == 'bucket-widget'
+ or widget_type == 'timeline-widget'
+ ):
+ if window_size:
+ SLOTS = math.floor(int(window_size) / 75) # Value is determined as the thinnest space required to display a timestamp on x axis
else:
- SLOTS = 100
-
- is_date = widget_type == 'timeline-widget'
-
- if isinstance(stat_facet['min'], numbers.Number):
- stats_min = int(stat_facet['min']) # Cast floats to int currently
- stats_max = int(stat_facet['max'])
- if start is None:
- if widget_type == 'line-widget':
- start, _ = _round_thousand_range(stats_min)
- else:
- start, _ = _round_number_range(stats_min)
+ SLOTS = 10
+ else:
+ SLOTS = 100
+
+ is_date = widget_type == 'timeline-widget'
+
+ if isinstance(stat_facet['min'], numbers.Number):
+ stats_min = int(stat_facet['min']) # Cast floats to int currently
+ stats_max = int(stat_facet['max'])
+ if start is None:
+ if widget_type == 'line-widget':
+ start, _ = _round_thousand_range(stats_min)
else:
- start = int(start)
- if end is None:
- if widget_type == 'line-widget':
- _, end = _round_thousand_range(stats_max)
- else:
- _, end = _round_number_range(stats_max)
+ start, _ = _round_number_range(stats_min)
+ else:
+ start = int(start)
+ if end is None:
+ if widget_type == 'line-widget':
+ _, end = _round_thousand_range(stats_max)
else:
- end = int(end)
-
- if gap is None:
- gap = int(math.floor((end - start) / SLOTS))
- if gap < 1:
- gap = 1
-
- end = max(end, stats_max)
- elif re.match('\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d\d?\d?)?Z', stat_facet['min']):
- is_date = True
- stats_min = stat_facet['min']
- stats_max = stat_facet['max']
- start_was_none = False
- if start is None:
- start_was_none = True
- start = stats_min
- start = re.sub('\.\d\d?\d?Z$', 'Z', start)
- try:
- start_ts = datetime.strptime(start, '%Y-%m-%dT%H:%M:%SZ')
- start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') # Check for dates before 1900
- except Exception as e:
- LOG.error('Bad date: %s' % e)
- start_ts = datetime.strptime('1970-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')
-
- if end is None:
- end = stats_max
- end = re.sub('\.\d\d?\d?Z$', 'Z', end)
- try:
- end_ts = datetime.strptime(end, '%Y-%m-%dT%H:%M:%SZ')
- end_ts.strftime('%Y-%m-%dT%H:%M:%SZ') # Check for dates before 1900
- except Exception as e:
- LOG.error('Bad date: %s' % e)
- end_ts = datetime.strptime('2050-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')
- end = end_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
- domain_ms = (mktime(end_ts.timetuple()) - mktime(start_ts.timetuple())) * 1000
- interval = _get_interval(domain_ms, SLOTS)
+ _, end = _round_number_range(stats_max)
+ else:
+ end = int(end)
+
+ if gap is None:
+ gap = int(math.floor((end - start) / SLOTS))
+ if gap < 1:
+ gap = 1
+
+ end = max(end, stats_max)
+ elif re.match(r'\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d\d?\d?)?Z', stat_facet['min']):
+ is_date = True
+ stats_min = stat_facet['min']
+ stats_max = stat_facet['max']
+ start_was_none = False
+ if start is None:
+ start_was_none = True
+ start = stats_min
+ start = re.sub(r'\.\d\d?\d?Z$', 'Z', start)
+ try:
+ start_ts = datetime.strptime(start, '%Y-%m-%dT%H:%M:%SZ')
+ start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') # Check for dates before 1900
+ except Exception as e:
+ LOG.error('Bad date: %s' % e)
+ start_ts = datetime.strptime('1970-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')
+
+ if end is None:
+ end = stats_max
+ end = re.sub(r'\.\d\d?\d?Z$', 'Z', end)
+ try:
+ end_ts = datetime.strptime(end, '%Y-%m-%dT%H:%M:%SZ')
+ end_ts.strftime('%Y-%m-%dT%H:%M:%SZ') # Check for dates before 1900
+ except Exception as e:
+ LOG.error('Bad date: %s' % e)
+ end_ts = datetime.strptime('2050-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')
+ end = end_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
+ domain_ms = (mktime(end_ts.timetuple()) - mktime(start_ts.timetuple())) * 1000
+ interval = _get_interval(domain_ms, SLOTS)
+ start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
+ gap = _format_interval(interval)
+ if start_was_none:
+ start_ts = _clamp_date(interval, start_ts)
start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
- gap = _format_interval(interval)
- if start_was_none:
- start_ts = _clamp_date(interval, start_ts)
- start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
- stats_max = end
- stats_min = start
- else:
- start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
- elif stat_facet['max'] == 'NOW':
- is_date = True
- domain_ms = _get_interval_duration(stat_facet['min'])
- interval = _get_interval(domain_ms, SLOTS)
- nb_slot = math.floor(domain_ms / interval['ms'])
- gap = _format_interval(interval)
- end_ts = datetime.utcnow()
- end_ts_clamped = _clamp_date(interval, end_ts)
- end_ts = _get_next_interval(interval, end_ts_clamped, end_ts_clamped != end_ts)
- start_ts = _remove_duration(interval, nb_slot, end_ts)
- stats_max = end = end_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
- stats_min = start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
-
- properties.update({
+ stats_max = end
+ stats_min = start
+ else:
+ start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
+ elif stat_facet['max'] == 'NOW':
+ is_date = True
+ domain_ms = _get_interval_duration(stat_facet['min'])
+ interval = _get_interval(domain_ms, SLOTS)
+ nb_slot = math.floor(domain_ms / interval['ms'])
+ gap = _format_interval(interval)
+ end_ts = datetime.utcnow()
+ end_ts_clamped = _clamp_date(interval, end_ts)
+ end_ts = _get_next_interval(interval, end_ts_clamped, end_ts_clamped != end_ts)
+ start_ts = _remove_duration(interval, nb_slot, end_ts)
+ stats_max = end = end_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
+ stats_min = start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ')
+
+ properties.update(
+ {
'min': stats_min,
'max': stats_max,
'start': start,
@@ -270,13 +271,11 @@ def _compute_range_facet(widget_type, stat_facet, properties, start=None, end=No
'slot': SLOTS,
'canRange': True,
'isDate': is_date,
- })
+ }
+ )
- if widget_type == 'histogram-widget':
- properties.update({
- 'timelineChartType': 'bar',
- 'enableSelection': True
- })
+ if widget_type == 'histogram-widget':
+ properties.update({'timelineChartType': 'bar', 'enableSelection': True})
def _round_date_range(tm):
@@ -284,33 +283,38 @@ def _round_date_range(tm):
end = start + timedelta(seconds=60)
return start, end
+
def _round_number_range(n):
if n <= 10:
return n, n + 1
else:
i = int(log(n, 10))
end = int(round(n, -i))
- start = end - 10 ** i
+ start = end - 10**i
return start, end
+
def _round_thousand_range(n):
if n <= 10:
return 0, 0
else:
i = int(log(n, 10))
- start = 10 ** i
+ start = 10**i
end = 10 ** (i + 1)
return start, end
+
def _guess_gap(solr_api, collection, facet, start=None, end=None):
properties = {}
- _guess_range_facet(facet['widgetType'], solr_api, collection, facet['field'], properties, start=start, end=end, slot = facet.get('properties', facet)['slot'])
+ _guess_range_facet(
+ facet['widgetType'], solr_api, collection, facet['field'], properties, start=start, end=end, slot=facet.get('properties', facet)['slot']
+ )
return properties
def _new_range_facet(solr_api, collection, facet_field, widget_type, window_size):
properties = {}
- _guess_range_facet(widget_type, solr_api, collection, facet_field, properties, window_size = window_size)
+ _guess_range_facet(widget_type, solr_api, collection, facet_field, properties, window_size=window_size)
return properties
diff --git a/desktop/libs/dashboard/src/dashboard/models.py b/desktop/libs/dashboard/src/dashboard/models.py
index d0a241876a7..754574d5c22 100644
--- a/desktop/libs/dashboard/src/dashboard/models.py
+++ b/desktop/libs/dashboard/src/dashboard/models.py
@@ -17,33 +17,25 @@
from __future__ import division
-from builtins import next
-from builtins import str
-from builtins import zip
-from builtins import object
-import collections
-import datetime
-import dateutil
-import itertools
+import re
+import sys
import json
import logging
import numbers
-import re
-import sys
+import datetime
+import itertools
+import collections
+from builtins import next, object, str, zip
+import dateutil
from django.urls import reverse
from django.utils.html import escape
-
-from desktop.lib.i18n import smart_unicode, smart_str, force_unicode
-from desktop.models import get_data_link, Document2
-from notebook.connectors.base import Notebook, _get_snippet_name
+from django.utils.translation import gettext as _
from dashboard.dashboard_api import get_engine
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from desktop.lib.i18n import force_unicode, smart_str
+from desktop.models import Document2, get_data_link
+from notebook.connectors.base import Notebook, _get_snippet_name
LOG = logging.getLogger()
@@ -142,11 +134,11 @@ def get_props(self, user):
for facet in props['collection']['facets']:
properties = facet['properties']
- if 'gap' in properties and not 'initial_gap' in properties:
+ if 'gap' in properties and 'initial_gap' not in properties:
properties['initial_gap'] = properties['gap']
- if 'start' in properties and not 'initial_start' in properties:
+ if 'start' in properties and 'initial_start' not in properties:
properties['initial_start'] = properties['start']
- if 'end' in properties and not 'initial_end' in properties:
+ if 'end' in properties and 'initial_end' not in properties:
properties['initial_end'] = properties['end']
if 'domain' not in properties:
properties['domain'] = {'blockParent': [], 'blockChildren': []}
@@ -192,7 +184,7 @@ def get_default(self, user, name, engine='solr', source='data'):
if id_field:
id_field = id_field[0]
else:
- id_field = '' # Schemaless might not have an id
+ id_field = '' # Schemaless might not have an id
if source == 'query':
nb_doc = Document2.objects.document(user=user, doc_id=name)
@@ -332,6 +324,7 @@ def get_field_list(cls, collection):
else:
return ['*']
+
def get_facet_field(category, field, facets):
if category in ('nested', 'function'):
id_pattern = '%(id)s'
@@ -345,6 +338,7 @@ def get_facet_field(category, field, facets):
else:
return None
+
def pairwise2(field, fq_filter, iterable):
pairs = []
selected_values = [f['value'] for f in fq_filter]
@@ -359,11 +353,12 @@ def pairwise2(field, fq_filter, iterable):
})
return pairs
+
def range_pair(field, cat, fq_filter, iterable, end, collection_facet):
# e.g. counts":["0",17430,"1000",1949,"2000",671,"3000",404,"4000",243,"5000",165],"gap":1000,"start":0,"end":6000}
pairs = []
selected_values = [f['value'] for f in fq_filter]
- is_single_unit_gap = re.match('^[\+\-]?1[A-Za-z]*$', str(collection_facet['properties']['gap'])) is not None
+ is_single_unit_gap = re.match(r'^[\+\-]?1[A-Za-z]*$', str(collection_facet['properties']['gap'])) is not None
is_up = collection_facet['properties']['sort'] == 'asc'
if collection_facet['properties']['sort'] == 'asc' and (
@@ -422,7 +417,7 @@ def range_pair2(facet_field, cat, fq_filter, iterable, end, facet, collection_fa
# e.g. counts":["0",17430,"1000",1949,"2000",671,"3000",404,"4000",243,"5000",165],"gap":1000,"start":0,"end":6000}
pairs = []
selected_values = [f['value'] for f in fq_filter]
- is_single_unit_gap = re.match('^[\+\-]?1[A-Za-z]*$', str(facet['gap'])) is not None
+ is_single_unit_gap = re.match(r'^[\+\-]?1[A-Za-z]*$', str(facet['gap'])) is not None
is_up = facet['sort'] == 'asc'
if facet['sort'] == 'asc' and facet['type'] == 'range-up':
@@ -541,7 +536,7 @@ def augment_solr_response(response, collection, query):
if response and response.get('facets'):
for facet in collection['facets']:
category = facet['type']
- name = facet['id'] # Nested facets can only have one name
+ name = facet['id'] # Nested facets can only have one name
if category == 'function' and name in response['facets']:
collection_facet = get_facet_field(category, name, collection['facets'])
@@ -614,7 +609,7 @@ def augment_solr_response(response, collection, query):
legend = agg_keys[0].split(':', 2)[1]
column = agg_keys[0]
else:
- legend = facet['field'] # 'count(%s)' % legend
+ legend = facet['field'] # 'count(%s)' % legend
agg_keys = [column]
_augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
@@ -670,9 +665,9 @@ def augment_solr_response(response, collection, query):
agg_keys.insert(0, 'count')
counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
- #_convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2)
+ # _convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2)
dimension = len(facet_fields)
- elif len(collection_facet['properties']['facets']) == 1 or (len(collection_facet['properties']['facets']) == 2 and \
+ elif len(collection_facet['properties']['facets']) == 1 or (len(collection_facet['properties']['facets']) == 2 and
collection_facet['properties']['facets'][1]['aggregate']['function'] != 'count'):
# Dimension 1 with 1 count or agg
dimension = 1
@@ -713,7 +708,7 @@ def augment_solr_response(response, collection, query):
'counts': counts,
'extraSeries': extraSeries,
'dimension': dimension,
- 'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets
+ 'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets
'docs': [dict(list(zip(cols, row))) for row in rows],
'fieldsAttributes': [
Collection2._make_gridlayout_header_field(
@@ -738,6 +733,7 @@ def augment_solr_response(response, collection, query):
return augmented
+
def _get_agg_keys(counts):
for count in counts:
keys = [key for key, value in list(count.items()) if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
@@ -745,6 +741,7 @@ def _get_agg_keys(counts):
return keys
return []
+
def augment_response(collection, query, response):
# HTML escaping
if not query.get('download'):
@@ -762,18 +759,18 @@ def augment_response(collection, query, response):
for field, value in doc.items():
if isinstance(value, numbers.Number):
escaped_value = value
- elif field == '_childDocuments_': # Nested documents
+ elif field == '_childDocuments_': # Nested documents
escaped_value = value
- elif isinstance(value, list): # Multivalue field
- escaped_value = [smart_unicode(escape(val), errors='replace') for val in value]
+ elif isinstance(value, list): # Multivalue field
+ escaped_value = [smart_str(escape(val), errors='replace') for val in value]
else:
- value = smart_unicode(value, errors='replace')
+ value = smart_str(value, errors='replace')
escaped_value = escape(value)
doc[field] = escaped_value
doc['externalLink'] = link
doc['details'] = []
- doc['hueId'] = smart_unicode(doc.get(id_field, ''))
+ doc['hueId'] = smart_str(doc.get(id_field, ''))
if 'moreLikeThis' in response and response['moreLikeThis'][doc['hueId']].get('numFound'):
_doc = response['moreLikeThis'][doc['hueId']]
doc['_childDocuments_'] = _doc['docs']
@@ -785,14 +782,14 @@ def augment_response(collection, query, response):
id_field = collection.get('idField')
if id_field:
for doc in response['response']['docs']:
- if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
- highlighting = response['highlighting'][smart_unicode(doc[id_field])]
+ if id_field in doc and smart_str(doc[id_field]) in highlighted_fields:
+ highlighting = response['highlighting'][smart_str(doc[id_field])]
if highlighting:
escaped_highlighting = {}
for field, hls in highlighting.items():
_hls = [
- escape(smart_unicode(hl, errors='replace')).replace('<em>', '
').replace('</em>', '')
+ escape(smart_str(hl, errors='replace')).replace('<em>', '
').replace('</em>', '')
for hl in hls
]
escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls
@@ -857,7 +854,7 @@ def __augment_stats_2d(counts, label, fq_fields, fq_values, fq_filter, _selected
count = bucket['count']
dim_row = [val]
- _fq_fields = fq_fields + _fields[0:1] # Pick dim field if there is one
+ _fq_fields = fq_fields + _fields[0:1] # Pick dim field if there is one
_fq_values = fq_values + [val]
for agg_key in agg_keys:
@@ -866,18 +863,18 @@ def __augment_stats_2d(counts, label, fq_fields, fq_values, fq_filter, _selected
augmented.append(_get_augmented(count, val, label, _fq_values, _fq_fields, fq_filter, _selected_values))
elif agg_key.startswith('agg_'):
label = fq_values[0] if len(_fq_fields) >= 2 else agg_key.split(':', 2)[1]
- if agg_keys.index(agg_key) == 0: # One count by dimension
+ if agg_keys.index(agg_key) == 0: # One count by dimension
dim_row.append(count)
- if not agg_key in bucket: # No key if value is 0
+ if agg_key not in bucket: # No key if value is 0
bucket[agg_key] = 0
dim_row.append(bucket[agg_key])
augmented.append(_get_augmented(bucket[agg_key], val, label, _fq_values, _fq_fields, fq_filter, _selected_values))
else:
- augmented.append(_get_augmented(count, val, label, _fq_values, _fq_fields, fq_filter, _selected_values)) # Needed?
+ augmented.append(_get_augmented(count, val, label, _fq_values, _fq_fields, fq_filter, _selected_values)) # Needed?
# List nested fields
_agg_keys = []
- if agg_key in bucket and bucket[agg_key]['buckets']: # Protect against empty buckets
+ if agg_key in bucket and bucket[agg_key]['buckets']: # Protect against empty buckets
for key, value in list(bucket[agg_key]['buckets'][0].items()):
if key.lower().startswith('agg_') or key.lower().startswith('dim_'):
_agg_keys.append(key)
@@ -904,7 +901,7 @@ def __augment_stats_2d(counts, label, fq_fields, fq_values, fq_filter, _selected
new_rows.append(dim_row + row)
dim_row = new_rows
- if dim_row and type(dim_row[0]) == list:
+ if dim_row and type(dim_row[0]) is list:
rows.extend(dim_row)
else:
rows.append(dim_row)
@@ -997,7 +994,6 @@ def augment_solr_exception(response, collection):
})
-
def extract_solr_exception_message(e):
response = {}
diff --git a/desktop/libs/dashboard/src/dashboard/urls.py b/desktop/libs/dashboard/src/dashboard/urls.py
index a034c9191f9..c05b4224849 100644
--- a/desktop/libs/dashboard/src/dashboard/urls.py
+++ b/desktop/libs/dashboard/src/dashboard/urls.py
@@ -17,13 +17,9 @@
import sys
-from dashboard import views as dashboard_views
-from dashboard import api as dashboard_api
+from django.urls import re_path
-if sys.version_info[0] > 2:
- from django.urls import re_path
-else:
- from django.conf.urls import url as re_path
+from dashboard import api as dashboard_api, views as dashboard_views
urlpatterns = [
re_path(r'^$', dashboard_views.index, name='index'),
diff --git a/desktop/libs/dashboard/src/dashboard/views.py b/desktop/libs/dashboard/src/dashboard/views.py
index 2a66a5d212e..08411b6e809 100644
--- a/desktop/libs/dashboard/src/dashboard/views.py
+++ b/desktop/libs/dashboard/src/dashboard/views.py
@@ -15,60 +15,53 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import json
-import logging
import re
import sys
+import json
+import logging
from django.urls import reverse
from django.utils.html import escape
+from django.utils.translation import gettext as _
+from dashboard.conf import get_engines
+from dashboard.controller import DashboardController, can_edit_index
+from dashboard.dashboard_api import get_engine
+from dashboard.decorators import allow_owner_only
+from dashboard.models import Collection2
from desktop.conf import USE_NEW_EDITOR
from desktop.lib.django_util import JsonResponse, render
from desktop.lib.exceptions_renderable import PopupException
-from desktop.models import Document2, Document
+from desktop.models import Document, Document2
from desktop.views import antixss
-
-from search.conf import LATEST
from indexer.views import importer
-
-from dashboard.dashboard_api import get_engine
-from dashboard.decorators import allow_owner_only
-from dashboard.conf import get_engines
-from dashboard.controller import DashboardController, can_edit_index
-from dashboard.models import Collection2
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from search.conf import LATEST
LOG = logging.getLogger()
DEFAULT_LAYOUT = [
- {"size":2,"rows":[{"widgets":[]}],"drops":["temp"],"klass":"card card-home card-column span2"},
- {"size":10,"rows":[{"widgets":[
- {"size":12,"name":"Filter Bar","widgetType":"filter-widget", "id":"99923aef-b233-9420-96c6-15d48293532b",
- "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]},
- {"widgets":[
- {"size":12,"name":"Grid Results","widgetType":"resultset-widget", "id":"14023aef-b233-9420-96c6-15d48293532b",
- "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]}],
- "drops":["temp"],"klass":"card card-home card-column span10"},
+ {"size": 2, "rows": [{"widgets": []}], "drops": ["temp"], "klass": "card card-home card-column span2"},
+ {"size": 10, "rows": [{"widgets": [
+ {"size": 12, "name": "Filter Bar", "widgetType": "filter-widget", "id": "99923aef-b233-9420-96c6-15d48293532b",
+ "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]},
+ {"widgets": [
+ {"size": 12, "name": "Grid Results", "widgetType": "resultset-widget", "id": "14023aef-b233-9420-96c6-15d48293532b",
+ "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]}],
+ "drops": ["temp"], "klass": "card card-home card-column span10"},
]
REPORT_LAYOUT = [
- {u'klass': u'card card-home card-column span12', u'rows': [{"widgets":[]}], u'id': u'7e0c0a45-ae90-43a6-669a-2a852ef4a449', u'drops': [u'temp'], u'size': 12}
+ {u'klass': u'card card-home card-column span12', u'rows': [{"widgets": []}], u'id': u'7e0c0a45-ae90-43a6-669a-2a852ef4a449', u'drops': [u'temp'], u'size': 12} # noqa: E501
]
QUERY_BUILDER_LAYOUT = [
{u'klass': u'card card-home card-column span12', u'rows': [
{u'widgets': [
- {u'name': u'Filter Bar', u'widgetType': u'filter-widget', u'properties': {}, u'isLoading': False, u'offset': 0, u'klass': u'card card-widget span12', u'id': u'abe50df3-a5a0-408a-8122-019d779b4354', u'size': 12}],
+ {u'name': u'Filter Bar', u'widgetType': u'filter-widget', u'properties': {}, u'isLoading': False, u'offset': 0, u'klass': u'card card-widget span12', u'id': u'abe50df3-a5a0-408a-8122-019d779b4354', u'size': 12}], # noqa: E501
u'id': u'22532a0a-8e43-603a-daa9-77d5d233fd7f', u'columns': []},
{u'widgets': [], u'id': u'ebb7fe4d-64c5-c660-bdc0-02a77ff8321e', u'columns': []},
- {u'widgets': [{u'name': u'Grid Results', u'widgetType': u'resultset-widget', u'properties': {}, u'isLoading': False, u'offset': 0, u'klass': u'card card-widget span12', u'id': u'14023aef-b233-9420-96c6-15d48293532b', u'size': 12}],
+ {u'widgets': [{u'name': u'Grid Results', u'widgetType': u'resultset-widget', u'properties': {}, u'isLoading': False, u'offset': 0, u'klass': u'card card-widget span12', u'id': u'14023aef-b233-9420-96c6-15d48293532b', u'size': 12}], # noqa: E501
u'id': u'2bfa8b4b-f7f3-1491-4de0-282130c6ab61', u'columns': []}
],
u'id': u'7e0c0a45-ae90-43a6-669a-2a852ef4a449', u'drops': [u'temp'], u'size': 12
@@ -76,19 +69,19 @@
]
TEXT_SEARCH_LAYOUT = [
- {"size":12,"rows":[{"widgets":[
- {"size":12,"name":"Filter Bar","widgetType":"filter-widget", "id":"99923aef-b233-9420-96c6-15d48293532b",
- "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]},
- {"widgets":[
- {"size":12,"name":"HTML Results","widgetType":"html-resultset-widget", "id":"14023aef-b233-9420-96c6-15d48293532b",
- "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]}],
- "drops":["temp"],"klass":"card card-home card-column span12"},
+ {"size": 12, "rows": [{"widgets": [
+ {"size": 12, "name": "Filter Bar", "widgetType": "filter-widget", "id": "99923aef-b233-9420-96c6-15d48293532b",
+ "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]},
+ {"widgets": [
+ {"size": 12, "name": "HTML Results", "widgetType": "html-resultset-widget", "id": "14023aef-b233-9420-96c6-15d48293532b",
+ "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]}],
+ "drops": ["temp"], "klass": "card card-home card-column span12"},
]
def index(request, is_mobile=False):
engine = request.GET.get('engine', 'solr')
- cluster = request.POST.get('cluster','""')
+ cluster = request.POST.get('cluster', '""')
collection_id = request.GET.get('collection')
collections = get_engine(request.user, engine, cluster=cluster).datasets() if engine != 'report' else ['default']
@@ -140,12 +133,14 @@ def index(request, is_mobile=False):
'is_report': collection.data['collection'].get('engine') == 'report'
})
+
def index_m(request):
return index(request, True)
+
def new_search(request):
engine = request.GET.get('engine', 'solr')
- cluster = request.POST.get('cluster','""')
+ cluster = request.POST.get('cluster', '""')
collections = get_engine(request.user, engine, cluster=cluster).datasets() if engine != 'report' else ['default']
@@ -190,12 +185,13 @@ def new_search(request):
'is_report': engine == 'report'
})
+
def browse(request, name, is_mobile=False):
engine = request.GET.get('engine', 'solr')
source = request.GET.get('source', 'data')
if engine == 'solr':
- name = re.sub('^default\.', '', name)
+ name = re.sub(r'^default\.', '', name)
database = name.split('.', 1)[0]
collections = get_engine(request.user, engine, source=source).datasets(database=database)
@@ -217,10 +213,10 @@ def browse(request, name, is_mobile=False):
'autoLoad': True,
'collections': collections,
'layout': [
- {"size":12,"rows":[{"widgets":[
- {"size":12,"name":"Grid Results","id":"52f07188-f30f-1296-2450-f77e02e1a5c0","widgetType":"resultset-widget",
- "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]}],
- "drops":["temp"],"klass":"card card-home card-column span10"}
+ {"size": 12, "rows": [{"widgets": [
+ {"size": 12, "name": "Grid Results", "id": "52f07188-f30f-1296-2450-f77e02e1a5c0", "widgetType": "resultset-widget",
+ "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]}],
+ "drops": ["temp"], "klass": "card card-home card-column span10"}
],
'qb_layout': QUERY_BUILDER_LAYOUT,
'text_search_layout': TEXT_SEARCH_LAYOUT,
@@ -252,8 +248,12 @@ def save(request):
if collection['id']:
dashboard_doc = Document2.objects.get(id=collection['id'])
else:
- dashboard_doc = Document2.objects.create(name=collection['name'], uuid=collection['uuid'], type='search-dashboard', owner=request.user, description=collection['label'])
- Document.objects.link(dashboard_doc, owner=request.user, name=collection['name'], description=collection['label'], extra='search-dashboard')
+ dashboard_doc = Document2.objects.create(
+ name=collection['name'], uuid=collection['uuid'], type='search-dashboard', owner=request.user, description=collection['label']
+ )
+ Document.objects.link(
+ dashboard_doc, owner=request.user, name=collection['name'], description=collection['label'], extra='search-dashboard'
+ )
dashboard_doc.update_data({
'collection': collection,
diff --git a/desktop/libs/hadoop/src/hadoop/conf.py b/desktop/libs/hadoop/src/hadoop/conf.py
index 3b2a8efbabb..ba0509349bc 100644
--- a/desktop/libs/hadoop/src/hadoop/conf.py
+++ b/desktop/libs/hadoop/src/hadoop/conf.py
@@ -15,19 +15,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import fnmatch
-import logging
import os
import sys
+import fnmatch
+import logging
-from desktop.conf import default_ssl_validate, has_connectors
-from desktop.lib.conf import Config, UnspecifiedConfigSection, ConfigSection, coerce_bool
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t
-else:
- from django.utils.translation import ugettext_lazy as _t
+from django.utils.translation import gettext_lazy as _t
+from desktop.conf import default_ssl_validate, has_connectors
+from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool
LOG = logging.getLogger()
DEFAULT_NN_HTTP_PORT = 50070
@@ -94,13 +90,13 @@ def get_hadoop_conf_dir_default():
),
NN_KERBEROS_PRINCIPAL=Config(
"nn_kerberos_principal",
- help="Kerberos principal for NameNode", # Unused
+ help="Kerberos principal for NameNode", # Unused
default="hdfs",
type=str
),
DN_KERBEROS_PRINCIPAL=Config(
"dn_kerberos_principal",
- help="Kerberos principal for DataNode", # Unused
+ help="Kerberos principal for DataNode", # Unused
default="hdfs",
type=str
),
@@ -124,8 +120,7 @@ def get_hadoop_conf_dir_default():
HADOOP_CONF_DIR=Config(
key="hadoop_conf_dir",
dynamic_default=get_hadoop_conf_dir_default,
- help=
- "Directory of the Hadoop configuration. Defaults to the environment variable HADOOP_CONF_DIR when set, "
+ help="Directory of the Hadoop configuration. Defaults to the environment variable HADOOP_CONF_DIR when set, "
"or '/etc/hadoop/conf'.",
type=str
),
@@ -182,7 +177,7 @@ def get_hadoop_conf_dir_default():
help="Whether Hue should use this cluster to run jobs",
default=True,
type=coerce_bool
- ), # True here for backward compatibility
+ ), # True here for backward compatibility
)
)
)
@@ -196,6 +191,7 @@ def get_spark_history_server_from_cm():
return ManagerApi().get_spark_history_server_url()
return None
+
def get_spark_history_server_url():
"""
Try to get Spark history server URL from Cloudera Manager API, otherwise give default URL
@@ -203,6 +199,7 @@ def get_spark_history_server_url():
url = get_spark_history_server_from_cm()
return url if url else 'http://localhost:18088'
+
def get_spark_history_server_security_enabled():
"""
Try to get Spark history server URL from Cloudera Manager API, otherwise give default URL
@@ -234,7 +231,7 @@ def get_spark_history_server_security_enabled():
SECURITY_ENABLED=Config("security_enabled", help="Is running with Kerberos authentication",
default=False, type=coerce_bool),
SUBMIT_TO=Config('submit_to', help="Whether Hue should use this cluster to run jobs",
- default=False, type=coerce_bool), # False here for backward compatibility
+ default=False, type=coerce_bool), # False here for backward compatibility
IS_YARN=Config("is_yarn", help="Attribute set only on YARN clusters and not MR1 ones.",
default=True, type=coerce_bool),
RESOURCE_MANAGER_API_URL=Config("resourcemanager_api_url",
@@ -309,7 +306,7 @@ def test_spark_configuration(user):
try:
spark_hs_api.get_history_server_api().applications()
status = 'OK'
- except:
+ except Exception:
LOG.exception('failed to get spark history server status')
return status
@@ -319,7 +316,7 @@ def test_yarn_configurations(user):
result = []
try:
- from jobbrowser.api import get_api # Required for cluster HA testing
+ from jobbrowser.api import get_api # Required for cluster HA testing
except Exception as e:
LOG.warning('Jobbrowser is disabled, skipping test_yarn_configurations')
return result
diff --git a/desktop/libs/hadoop/src/hadoop/core_site.py b/desktop/libs/hadoop/src/hadoop/core_site.py
index 3e075f2ca19..e2b03b79893 100644
--- a/desktop/libs/hadoop/src/hadoop/core_site.py
+++ b/desktop/libs/hadoop/src/hadoop/core_site.py
@@ -15,21 +15,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import absolute_import
+import re
import errno
import logging
-import re
-import sys
-
-from hadoop import conf
-from hadoop import confparse
from desktop.lib.paths import get_config_root_hadoop
-
-if sys.version_info[0] > 2:
- open_file = open
-else:
- open_file = file
+from hadoop import confparse
__all = ['get_conf', 'get_trash_interval', 'get_s3a_access_key', 'get_s3a_secret_key']
@@ -63,6 +54,7 @@
_CNF_SECURITY = 'hadoop.security.authentication'
+
def reset():
"""Reset the cached conf"""
global _CORE_SITE_DICT
@@ -85,7 +77,7 @@ def _parse_core_site():
try:
_CORE_SITE_PATH = get_config_root_hadoop('core-site.xml')
- data = open_file(_CORE_SITE_PATH, 'r').read()
+ data = open(_CORE_SITE_PATH, 'r').read()
except IOError as err:
if err.errno != errno.ENOENT:
LOG.error('Cannot read from "%s": %s' % (_CORE_SITE_PATH, err))
@@ -104,6 +96,7 @@ def get_trash_interval():
"""
return get_conf().get(_CNF_TRASH_INTERVAL, 0)
+
def get_s3a_access_key():
"""
Get S3A AWS access key ID
@@ -111,6 +104,7 @@ def get_s3a_access_key():
"""
return get_conf().get(_CNF_S3A_ACCESS_KEY)
+
def get_s3a_secret_key():
"""
Get S3A AWS secret key
@@ -118,6 +112,7 @@ def get_s3a_secret_key():
"""
return get_conf().get(_CNF_S3A_SECRET_KEY)
+
def get_s3a_session_token():
return get_conf().get(_CNF_S3A_SESSION_TOKEN)
@@ -134,12 +129,14 @@ def get_raz_api_url():
return s3a_raz_url or adls_raz_url
+
def get_raz_cluster_name():
"""
Get the name of the Cluster where Raz is running.
"""
return get_conf().get(_CNF_S3A_RAZ_CLUSTER_NAME, '') or get_conf().get(_CNF_ADLS_RAZ_CLUSTER_NAME, '')
+
def get_raz_s3_default_bucket():
"""
Get the name of the default S3 bucket of Raz
@@ -153,9 +150,11 @@ def get_raz_s3_default_bucket():
'bucket': match.group('bucket')
}
+
def get_default_fs():
return get_conf().get(_CNF_DEFAULT_FS)
+
def get_adls_client_id():
"""
Get ADLS client id
@@ -163,6 +162,7 @@ def get_adls_client_id():
"""
return get_conf().get(_CNF_ADLS_CLIENT_ID)
+
def get_adls_authentication_code():
"""
Get ADLS secret key
@@ -170,6 +170,7 @@ def get_adls_authentication_code():
"""
return get_conf().get(_CNF_ADLS_AUTHENTICATION_CODE)
+
def get_adls_refresh_url():
"""
Get ADLS secret key
@@ -177,6 +178,7 @@ def get_adls_refresh_url():
"""
return get_conf().get(_CNF_ADLS_REFRESH_URL)
+
def get_adls_grant_type():
"""
Get ADLS provider type
@@ -184,14 +186,18 @@ def get_adls_grant_type():
"""
return get_conf().get(_CNF_ADLS_GRANT_TYPE)
+
def is_kerberos_enabled():
return get_conf().get(_CNF_SECURITY) == 'kerberos'
+
def get_azure_client_id():
return get_conf().get(_CNF_AZURE_CLIENT_ID)
+
def get_azure_client_secret():
return get_conf().get(_CNF_AZURE_CLIENT_SECRET)
+
def get_azure_client_endpoint():
return get_conf().get(_CNF_AZURE_CLIENT_ENDPOINT)
diff --git a/desktop/libs/hadoop/src/hadoop/core_site_tests.py b/desktop/libs/hadoop/src/hadoop/core_site_tests.py
index f7f0d82ac92..d48d05e0fda 100644
--- a/desktop/libs/hadoop/src/hadoop/core_site_tests.py
+++ b/desktop/libs/hadoop/src/hadoop/core_site_tests.py
@@ -15,21 +15,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import absolute_import
-
import os
-import sys
import logging
import tempfile
from desktop.models import get_remote_home_storage
from hadoop import conf, core_site
-if sys.version_info[0] > 2:
- open_file = open
-else:
- open_file = file
-
LOG = logging.getLogger()
@@ -78,7 +70,7 @@ def test_core_site():
"""
- open_file(os.path.join(hadoop_home, 'core-site.xml'), 'w').write(xml)
+ open(os.path.join(hadoop_home, 'core-site.xml'), 'w').write(xml)
finish = (
conf.HDFS_CLUSTERS.set_for_testing({'default': {}}),
diff --git a/desktop/libs/hadoop/src/hadoop/fs/__init__.py b/desktop/libs/hadoop/src/hadoop/fs/__init__.py
index 1a560131a27..1c7513def3a 100644
--- a/desktop/libs/hadoop/src/hadoop/fs/__init__.py
+++ b/desktop/libs/hadoop/src/hadoop/fs/__init__.py
@@ -29,35 +29,25 @@
When possible, the interfaces here have fidelity to the
native python interfaces.
"""
-from __future__ import division
-from future import standard_library
-from functools import reduce
-standard_library.install_aliases()
-from builtins import map
-from builtins import range
-from builtins import object
-import errno
-import grp
-import logging
-import math
import os
-import posixpath
-import pwd
import re
-import shutil
+import grp
+import pwd
+import math
import stat
-import sys
-
-if sys.version_info[0] > 2:
- from builtins import open as builtins_open
-else:
- from __builtin__ import open as builtins_open
+import errno
+import shutil
+import logging
+import posixpath
+from builtins import map, object, open as builtins_open, range
+from functools import reduce
SEEK_SET, SEEK_CUR, SEEK_END = os.SEEK_SET, os.SEEK_CUR, os.SEEK_END
# The web (and POSIX) always uses forward slash as a separator
-LEADING_DOUBLE_SEPARATORS = re.compile("^" + posixpath.sep*2)
+LEADING_DOUBLE_SEPARATORS = re.compile("^" + posixpath.sep * 2)
+
def normpath(path):
"""
@@ -73,6 +63,7 @@ def normpath(path):
class IllegalPathException(Exception):
pass
+
class LocalSubFileSystem(object):
"""
Facade around normal python filesystem calls, for a temporary/local
@@ -106,7 +97,7 @@ def __init__(self, root):
self.root = root
self.name = "file://%s" % self.root
if not os.path.isdir(root):
- logging.fatal("Root(%s) not found." % root +
+ logging.fatal("Root(%s) not found." % root +
" Perhaps you need to run manage.py create_test_fs")
def _resolve_path(self, path):
@@ -162,6 +153,7 @@ def open(self, name, mode="r"):
paths = [0]
# complicated way of taking the intersection of three lists.
assert not reduce(set.intersection, list(map(set, [paths, users, groups])))
+
def wrapped(*args):
self = args[0]
newargs = list(args[1:])
@@ -172,7 +164,7 @@ def wrapped(*args):
for i in groups:
newargs[i] = grp.getgrnam(newargs[i]).gr_gid
- if f == builtins_open and sys.version_info[0] > 2:
+ if f == builtins_open:
return f(*newargs, encoding='utf-8')
return f(*newargs)
@@ -185,7 +177,7 @@ def wrapped(*args):
mkdir = _wrap(os.mkdir)
rmdir = _wrap(os.rmdir)
listdir = _wrap(os.listdir)
- rename = _wrap(os.rename, paths=[0,1])
+ rename = _wrap(os.rename, paths=[0, 1])
exists = _wrap(os.path.exists)
isfile = _wrap(os.path.isfile)
isdir = _wrap(os.path.isdir)
@@ -235,6 +227,7 @@ def listdir_stats(self, path):
def __repr__(self):
return "LocalFileSystem(%s)" % repr(self.root)
+
class FakeStatus(object):
"""
A fake implementation of HDFS health RPCs.
@@ -246,16 +239,16 @@ class FakeStatus(object):
def get_messages(self):
"""Warnings/lint checks."""
return [
- dict(type="WARNING",message="All your base belong to us."),
+ dict(type="WARNING", message="All your base belong to us."),
dict(type="INFO", message="Hamster Dance!")
]
def get_health(self):
o = dict()
- GB = 1024*1024*1024
- o["bytesTotal"] = 5*GB
- o["bytesUsed"] = math.floor(5*GB / 2)
- o["bytesRemaining"] = 2*GB
+ GB = 1024 * 1024 * 1024
+ o["bytesTotal"] = 5 * GB
+ o["bytesUsed"] = math.floor(5 * GB / 2)
+ o["bytesRemaining"] = 2 * GB
o["bytesNonDfs"] = math.floor(GB / 2)
o["liveDataNodes"] = 13
o["deadDataNodes"] = 2
@@ -269,8 +262,8 @@ def get_datanode_report(self):
dinfo["name"] = "fake-%d" % i
dinfo["storageID"] = "fake-id-%d" % i
dinfo["host"] = "fake-host-%d" % i
- dinfo["capacity"] = 123456789
- dinfo["dfsUsed"] = 23456779
+ dinfo["capacity"] = 123456789
+ dinfo["dfsUsed"] = 23456779
dinfo["remaining"] = 100000010
dinfo["xceiverCount"] = 3
dinfo["state"] = "NORMAL_STATE"
@@ -280,8 +273,8 @@ def get_datanode_report(self):
dinfo["name"] = "fake-dead-%d" % i
dinfo["storageID"] = "fake-dead-id-%d" % i
dinfo["host"] = "fake-dead-host-%d" % i
- dinfo["capacity"] = 523456789
- dinfo["dfsUsed"] = 23456779
+ dinfo["capacity"] = 523456789
+ dinfo["dfsUsed"] = 23456779
dinfo["remaining"] = 500000010
dinfo["xceiverCount"] = 3
dinfo["state"] = "DECOMISSION_INPROGRESS"
diff --git a/desktop/libs/hadoop/src/hadoop/fs/hadoopfs.py b/desktop/libs/hadoop/src/hadoop/fs/hadoopfs.py
index c762066047b..8912e46c646 100644
--- a/desktop/libs/hadoop/src/hadoop/fs/hadoopfs.py
+++ b/desktop/libs/hadoop/src/hadoop/fs/hadoopfs.py
@@ -22,47 +22,36 @@
Interfaces for Hadoop filesystem access via the HADOOP-4707 Thrift APIs.
"""
-from __future__ import division
-from past.builtins import cmp
-from future import standard_library
-standard_library.install_aliases()
-from builtins import object
-import codecs
+import os
+import sys
+import math
import errno
+import codecs
+import random
import logging
-import math
-import os
import posixpath
-import random
import subprocess
-import sys
-
-from django.utils.encoding import smart_str
+from builtins import object
+from urllib.parse import urlsplit as lib_urlsplit
-from desktop.lib import i18n
+from django.utils.encoding import force_str, smart_str
+from django.utils.translation import gettext as _
+from past.builtins import cmp
import hadoop.conf
-from hadoop.fs import normpath, SEEK_SET, SEEK_CUR, SEEK_END
+from desktop.lib import i18n
+from hadoop.fs import SEEK_CUR, SEEK_END, SEEK_SET, normpath
from hadoop.fs.exceptions import PermissionDeniedException
-if sys.version_info[0] > 2:
- from django.utils.encoding import force_str
- from urllib.parse import urlsplit as lib_urlsplit
- from django.utils.translation import gettext as _
-else:
- from django.utils.encoding import force_unicode as force_str
- from urlparse import urlsplit as lib_urlsplit
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
DEFAULT_USER = "webui"
# The number of bytes to read if not specified
-DEFAULT_READ_SIZE = 1024*1024 # 1MB
+DEFAULT_READ_SIZE = 1024 * 1024 # 1MB
# The buffer size of the pipe to hdfs -put during upload
-WRITE_BUFFER_SIZE = 128*1024 # 128K
+WRITE_BUFFER_SIZE = 128 * 1024 # 128K
# Class that we translate into PermissionDeniedException
HADOOP_ACCESSCONTROLEXCEPTION = "org.apache.hadoop.security.AccessControlException"
@@ -78,10 +67,12 @@
textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
is_binary_string = lambda bytes: bool(bytes.translate(None, textchars))
+
def encode_fs_path(path):
"""encode_fs_path(path) -> byte string in utf8"""
return smart_str(path, HDFS_ENCODING, errors='strict')
+
def decode_fs_path(path):
"""decode_fs_path(bytestring) -> unicode path"""
return force_str(path, HDFS_ENCODING, errors='strict')
@@ -158,7 +149,7 @@ def urlsplit(url):
if schema not in ('hdfs', 'viewfs'):
# Default to standard for non-hdfs
return lib_urlsplit(url)
- url = url[i+3:]
+ url = url[i + 3:]
i = url.find('/')
if i == -1:
# Everything is netloc. Assume path is root.
@@ -185,9 +176,9 @@ def create_home_dir(self, home_path=None):
if home_path is None:
home_path = self.get_home_dir()
+ from desktop.conf import DEFAULT_HDFS_SUPERUSER
from hadoop.hdfs_site import get_umask_mode
from useradmin.conf import HOME_DIR_PERMISSIONS, USE_HOME_DIR_PERMISSIONS
- from desktop.conf import DEFAULT_HDFS_SUPERUSER
mode = int(HOME_DIR_PERMISSIONS.get(), 8) if USE_HOME_DIR_PERMISSIONS.get() else (0o777 & (0o1777 ^ get_umask_mode()))
if not self.exists(home_path):
user = self.user
@@ -242,7 +233,7 @@ def _copy_binary_file(self, local_src, remote_dst, chunk_size):
self.append(remote_dst, chunk)
chunk = src.read(chunk_size)
LOG.info(_('Copied %s -> %s.') % (local_src, remote_dst))
- except:
+ except Exception:
LOG.exception(_('Copying %s -> %s failed.') % (local_src, remote_dst))
raise
finally:
@@ -251,10 +242,8 @@ def _copy_binary_file(self, local_src, remote_dst, chunk_size):
def _copy_non_binary_file(self, local_src, remote_dst, chunk_size):
for data_format in ("ascii", "utf-8", "latin-1", "iso-8859"):
src_copied = False
- if sys.version_info[0] > 2:
- src = open(local_src, encoding=data_format)
- else:
- src = codecs.open(local_src, encoding=data_format)
+ src = open(local_src, encoding=data_format)
+
try:
self.create(remote_dst, permission=0o755)
chunk = src.read(chunk_size)
@@ -262,7 +251,7 @@ def _copy_non_binary_file(self, local_src, remote_dst, chunk_size):
self.append(remote_dst, chunk)
chunk = src.read(chunk_size)
src_copied = True
- except:
+ except Exception:
LOG.exception(_('Copying %s -> %s failed with %s encoding format') % (local_src, remote_dst, data_format))
self.remove(remote_dst)
finally:
@@ -295,7 +284,6 @@ def _copy_file(self, local_src, remote_dst, chunk_size=1024 * 1024 * 64):
else:
LOG.info(_('Skipping %s (not a file).') % local_src)
-
@_coerce_exceptions
def mktemp(self, subdir='', prefix='tmp', basedir=None):
"""
@@ -350,9 +338,6 @@ def listdir_stats(self):
raise NotImplementedError(_("%(function)s has not been implemented.") % {'function': 'listdir_stats'})
-
-
-
def require_open(func):
"""
Decorator that ensures that the file instance isn't closed when the
@@ -365,8 +350,6 @@ def wrapper(self, *args, **kwargs):
return wrapper
-
-
class File(object):
""" Represents an open file on HDFS. """
@@ -378,7 +361,7 @@ def __init__(self, fs, path, mode="r", buffering=False):
self._block_cache = BlockCache()
if buffering or mode != "r":
- raise Exception("buffering and write support not yet implemented") # NYI
+ raise Exception("buffering and write support not yet implemented") # NYI
stat = self._stat()
@@ -386,7 +369,7 @@ def __init__(self, fs, path, mode="r", buffering=False):
raise IOError(errno.ENOENT, "No such file or directory: '%s'" % path)
if stat.isDir:
raise IOError(errno.EISDIR, "Is a directory: '%s'" % path)
- #TODO(todd) somehow we need to check permissions here - maybe we need an access() call?
+ # TODO(todd) somehow we need to check permissions here - maybe we need an access() call?
# Minimal context manager implementation.
# See: http://www.python.org/doc/2.5.2/lib/typecontextmanager.html
@@ -395,7 +378,7 @@ def __enter__(self):
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
- return False # don't supress exceptions.
+ return False # don't supress exceptions.
@require_open
def seek(self, offset, whence=0):
@@ -413,7 +396,6 @@ def seek(self, offset, whence=0):
def tell(self):
return self.pos
-
def _get_block(self, pos):
"""Return the Block instance that contains the given offset"""
cached_block = self._block_cache.find_block(pos)
@@ -421,7 +403,7 @@ def _get_block(self, pos):
return cached_block
# Cache "miss" - fetch ahead 500MB worth of blocks
- new_blocks = self.fs._get_blocks(self.path, pos, 500*1024*1024)
+ new_blocks = self.fs._get_blocks(self.path, pos, 500 * 1024 * 1024)
self._block_cache.insert_new_blocks(new_blocks)
result = self._block_cache.find_block(pos)
if not result:
@@ -463,7 +445,7 @@ def read(self, length=DEFAULT_READ_SIZE):
read_so_far = 0
while read_so_far < length:
this_data = self._read_in_block(length - read_so_far)
- if this_data == "": # eof
+ if this_data == "": # eof
break
read_so_far += len(this_data)
result.append(this_data)
@@ -515,6 +497,7 @@ def __init__(self, fs, path, mode="w", block_size=None):
close_fds=True,
env=self.subprocess_env,
bufsize=WRITE_BUFFER_SIZE)
+
@require_open
def write(self, data):
"""May raise IOError, particularly EPIPE"""
diff --git a/desktop/libs/hadoop/src/hadoop/fs/test_webhdfs.py b/desktop/libs/hadoop/src/hadoop/fs/test_webhdfs.py
index 8bbd213940f..19c58039379 100644
--- a/desktop/libs/hadoop/src/hadoop/fs/test_webhdfs.py
+++ b/desktop/libs/hadoop/src/hadoop/fs/test_webhdfs.py
@@ -16,25 +16,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import map
-from builtins import zip
-from builtins import range
-from builtins import object
-import logging
-import pytest
import os
-import random
import sys
+import random
+import logging
import threading
-import unittest
+from builtins import map, object, range, zip
+from functools import reduce
+
+import pytest
from django.test import TestCase
from hadoop import pseudo_hdfs4
from hadoop.fs.exceptions import WebHdfsException
from hadoop.fs.hadoopfs import Hdfs
from hadoop.pseudo_hdfs4 import is_live_cluster
-from functools import reduce
-
LOG = logging.getLogger()
@@ -65,7 +61,7 @@ def test_webhdfs(self):
try:
f.write("hello")
f.close()
- assert (b"hello" if sys.version_info[0] > 2 else "hello") == fs.open(test_file).read()
+ assert (b"hello") == fs.open(test_file).read()
assert 5 == fs.stats(test_file)["size"]
assert fs.isfile(test_file)
assert not fs.isfile("/")
@@ -96,14 +92,14 @@ def test_seek(self):
f = fs.open(test_file, "r")
f.seek(0, os.SEEK_SET)
- assert (b"he" if sys.version_info[0] > 2 else "he") == f.read(2)
+ assert (b"he") == f.read(2)
f.seek(1, os.SEEK_SET)
- assert (b"el" if sys.version_info[0] > 2 else "el") == f.read(2)
+ assert (b"el") == f.read(2)
f.seek(-1, os.SEEK_END)
- assert (b"o" if sys.version_info[0] > 2 else "o") == f.read()
+ assert (b"o") == f.read()
f.seek(0, os.SEEK_SET)
f.seek(2, os.SEEK_CUR)
- assert (b"ll" if sys.version_info[0] > 2 else "ll") == f.read(2)
+ assert (b"ll") == f.read(2)
finally:
fs.remove(test_file)
@@ -122,14 +118,14 @@ def test_seek_across_blocks(self):
f.close()
for i in range(1, 10):
- f = fs.open(test_file, "rt" if sys.version_info[0] > 2 else "r")
+ f = fs.open(test_file, "rt")
for j in range(1, 100):
offset = random.randint(0, len(data) - 1)
f.seek(offset, os.SEEK_SET)
- t = data[offset:offset+50]
- if sys.version_info[0] > 2:
- t = t.encode('utf-8')
+ t = data[offset:offset + 50]
+ t = t.encode('utf-8')
+
assert t == f.read(50)
f.close()
@@ -191,7 +187,6 @@ def test_umask(self):
finally:
fs._umask = fs_umask
-
def test_umask_overriden(self):
fs = self.cluster.fs
@@ -211,7 +206,6 @@ def test_umask_overriden(self):
finally:
fs._umask = fs_umask
-
def test_umask_without_sticky(self):
fs = self.cluster.fs
@@ -231,7 +225,6 @@ def test_umask_without_sticky(self):
finally:
fs._umask = fs_umask
-
def test_copy_remote_dir(self):
fs = self.cluster.fs
@@ -245,7 +238,7 @@ def test_copy_remote_dir(self):
f2.close()
new_owner = 'testcopy'
- new_owner_dir = self.prefix + '/' + new_owner + '/test-copy'
+ new_owner_dir = self.prefix + '/' + new_owner + '/test-copy'
fs.copy_remote_dir(src_dir, new_owner_dir, dir_mode=0o755, owner=new_owner)
@@ -310,7 +303,7 @@ def check_existence(name, parent, present=True):
if present:
assert name in listing, f"{name} should be in {listing}"
else:
- assert not name in listing, f"{name} should not be in {listing}"
+ assert name not in listing, f"{name} should not be in {listing}"
name = u'''pt-Olá_ch-你好_ko-안녕_ru-Здравствуйте%20,.<>~`!@$%^&()_-+='"'''
prefix = self.prefix + '/tmp/i18n'
@@ -547,8 +540,10 @@ def test_trash_users(self):
class test_local(object):
def __getattribute__(self, name):
return object.__getattribute__(self, name)
+
def __setattr__(self, name, value):
return object.__setattr__(self, name, value)
+
def __delattr__(self, name):
return object.__delattr__(self, name)
@@ -587,13 +582,11 @@ def __delattr__(self, name):
def test_check_access(self):
# Set user to owner
self.cluster.fs.setuser('test')
- assert ((b'' if sys.version_info[0] > 2 else '') ==
- self.cluster.fs.check_access(path='/user/test', aclspec='rw-')) # returns zero-length content
+ assert ((b'') == self.cluster.fs.check_access(path='/user/test', aclspec='rw-')) # returns zero-length content
# Set user to superuser
self.cluster.fs.setuser(self.cluster.superuser)
- assert ((b'' if sys.version_info[0] > 2 else '') ==
- self.cluster.fs.check_access(path='/user/test', aclspec='rw-')) # returns zero-length content
+ assert ((b'') == self.cluster.fs.check_access(path='/user/test', aclspec='rw-')) # returns zero-length content
# Set user to non-authorized, non-superuser user
self.cluster.fs.setuser('nonadmin')
diff --git a/desktop/libs/hadoop/src/hadoop/fs/upload.py b/desktop/libs/hadoop/src/hadoop/fs/upload.py
index 75a169b769b..32c0627fa7e 100644
--- a/desktop/libs/hadoop/src/hadoop/fs/upload.py
+++ b/desktop/libs/hadoop/src/hadoop/fs/upload.py
@@ -35,6 +35,7 @@
from builtins import object
from django.core.files.uploadhandler import FileUploadHandler, SkipFile, StopFutureHandlers, StopUpload, UploadFileException
+from django.utils.translation import gettext as _
import hadoop.cluster
from desktop.lib import fsmanager
@@ -44,11 +45,6 @@
from hadoop.conf import UPLOAD_CHUNK_SIZE
from hadoop.fs.exceptions import WebHdfsException
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
UPLOAD_SUBDIR = 'hue-uploads'
diff --git a/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py b/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py
index 537775f5402..cb5f2d698c9 100644
--- a/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py
+++ b/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py
@@ -19,45 +19,34 @@
Interfaces for Hadoop filesystem access via HttpFs/WebHDFS
"""
-from future import standard_library
-standard_library.install_aliases()
-from builtins import oct
-from builtins import object
+import stat
+import time
import errno
import logging
import posixpath
-import stat
-import sys
import threading
-import time
-import urllib.request, urllib.error
+import urllib.error
+import urllib.request
+from builtins import object, oct
+from urllib.parse import unquote as urllib_unquote, urlparse
from django.utils.encoding import smart_str
+from django.utils.translation import gettext as _
+from past.builtins import long
import hadoop.conf
import desktop.conf
-
from desktop.lib.rest import http_client, resource
-from past.builtins import long
-from hadoop.fs import normpath as fs_normpath, SEEK_SET, SEEK_CUR, SEEK_END
-from hadoop.fs.hadoopfs import Hdfs
+from hadoop.fs import SEEK_CUR, SEEK_END, SEEK_SET, normpath as fs_normpath
from hadoop.fs.exceptions import WebHdfsException
-from hadoop.fs.webhdfs_types import WebHdfsStat, WebHdfsContentSummary
-from hadoop.hdfs_site import get_nn_sentry_prefixes, get_umask_mode, get_supergroup, get_webhdfs_ssl
-
-if sys.version_info[0] > 2:
- from urllib.parse import unquote as urllib_unquote, urlparse
- from django.utils.translation import gettext as _
-else:
- from urllib import unquote as urllib_unquote
- from urlparse import urlparse
- from django.utils.translation import ugettext as _
-
+from hadoop.fs.hadoopfs import Hdfs
+from hadoop.fs.webhdfs_types import WebHdfsContentSummary, WebHdfsStat
+from hadoop.hdfs_site import get_nn_sentry_prefixes, get_supergroup, get_umask_mode, get_webhdfs_ssl
DEFAULT_HDFS_SUPERUSER = desktop.conf.DEFAULT_HDFS_SUPERUSER.get()
# The number of bytes to read if not specified
-DEFAULT_READ_SIZE = 1024 * 1024 # 1MB
+DEFAULT_READ_SIZE = 1024 * 1024 # 1MB
LOG = logging.getLogger()
@@ -244,7 +233,7 @@ def normpath(self, path):
@staticmethod
def norm_path(path):
path = fs_normpath(path)
- #fs_normpath clears scheme:/ to scheme: which doesn't make sense
+ # fs_normpath clears scheme:/ to scheme: which doesn't make sense
split = urlparse(path)
if not split.path:
path = split._replace(path="/").geturl()
@@ -296,7 +285,6 @@ def get_content_summary(self, path):
json = self._root.get(path, params, headers)
return WebHdfsContentSummary(json['ContentSummary'])
-
def _stats(self, path):
"""This version of stats returns None if the entry is not found"""
path = self.strip_normpath(path)
@@ -374,7 +362,6 @@ def _trash(self, path, recursive=False):
self.mkdir(self.dirname(trash_path))
self.rename(path, trash_path)
-
def _delete(self, path, recursive=False):
"""
_delete(path, recursive=False)
@@ -515,7 +502,6 @@ def chown(self, path, user=None, group=None, recursive=False):
else:
self._root.put(path, params, headers=headers)
-
def chmod(self, path, mode, recursive=False):
"""
chmod(path, mode, recursive=False)
@@ -533,7 +519,6 @@ def chmod(self, path, mode, recursive=False):
else:
self._root.put(path, params, headers=headers)
-
def get_home_dir(self):
"""get_home_dir() -> Home directory for the current user"""
params = self._getparams()
@@ -595,7 +580,6 @@ def read(self, path, offset, length, bufsize=None):
return ""
raise ex
-
def open(self, path, mode='r'):
"""
DEPRECATED!
@@ -606,15 +590,12 @@ def open(self, path, mode='r'):
"""
return File(self, path, mode)
-
def getDefaultFilePerms(self):
return 0o666 & (0o1777 ^ self._umask)
-
def getDefaultDirPerms(self):
return 0o1777 & (0o1777 ^ self._umask)
-
def create(self, path, overwrite=False, blocksize=None, replication=None, permission=None, data=None):
"""
create(path, overwrite=False, blocksize=None, replication=None, permission=None)
@@ -636,7 +617,6 @@ def create(self, path, overwrite=False, blocksize=None, replication=None, permis
headers = self._getheaders()
self._invoke_with_redirect('PUT', path, params, data, headers)
-
def append(self, path, data):
"""
append(path, data)
@@ -649,7 +629,6 @@ def append(self, path, data):
headers = self._getheaders()
self._invoke_with_redirect('POST', path, params, data, headers)
-
# e.g. ACLSPEC = user:joe:rwx,user::rw-
def modify_acl_entries(self, path, aclspec):
path = self.strip_normpath(path)
@@ -659,7 +638,6 @@ def modify_acl_entries(self, path, aclspec):
headers = self._getheaders()
return self._root.put(path, params, headers=headers)
-
def remove_acl_entries(self, path, aclspec):
path = self.strip_normpath(path)
params = self._getparams()
@@ -668,7 +646,6 @@ def remove_acl_entries(self, path, aclspec):
headers = self._getheaders()
return self._root.put(path, params, headers=headers)
-
def remove_default_acl(self, path):
path = self.strip_normpath(path)
params = self._getparams()
@@ -676,7 +653,6 @@ def remove_default_acl(self, path):
headers = self._getheaders()
return self._root.put(path, params, headers=headers)
-
def remove_acl(self, path):
path = self.strip_normpath(path)
params = self._getparams()
@@ -684,7 +660,6 @@ def remove_acl(self, path):
headers = self._getheaders()
return self._root.put(path, params, headers=headers)
-
def set_acl(self, path, aclspec):
path = self.strip_normpath(path)
params = self._getparams()
@@ -693,7 +668,6 @@ def set_acl(self, path, aclspec):
headers = self._getheaders()
return self._root.put(path, params, headers=headers)
-
def get_acl_status(self, path):
path = self.strip_normpath(path)
params = self._getparams()
@@ -701,7 +675,6 @@ def get_acl_status(self, path):
headers = self._getheaders()
return self._root.get(path, params, headers=headers)
-
def check_access(self, path, aclspec='rw-'):
path = self.strip_normpath(path)
params = self._getparams()
@@ -758,7 +731,6 @@ def copyfile(self, src, dst, skip_header=False):
offset += cnt
-
def copy_remote_dir(self, source, destination, dir_mode=None, owner=None):
if owner is None:
owner = self.DEFAULT_USER
@@ -777,7 +749,6 @@ def copy_remote_dir(self, source, destination, dir_mode=None, owner=None):
else:
self.do_as_user(owner, self.copyfile, source_file, destination_file)
-
def copy(self, src, dest, recursive=False, dir_mode=None, owner=None):
"""
Copy file, or directory, in HDFS to another location in HDFS.
@@ -840,16 +811,13 @@ def copy(self, src, dest, recursive=False, dir_mode=None, owner=None):
else:
self.copyfile(src, dest)
-
@staticmethod
def urlsplit(url):
return Hdfs.urlsplit(url)
-
def get_hdfs_path(self, path):
return posixpath.join(self.fs_defaultfs, path.lstrip('/'))
-
def _invoke_with_redirect(self, method, path, params=None, data=None, headers=None):
"""
Issue a request, and expect a redirect, and then submit the data to
@@ -879,7 +847,6 @@ def _invoke_with_redirect(self, method, path, params=None, data=None, headers=No
headers["Content-Type"] = 'application/octet-stream'
return resource.Resource(client).invoke(method, data=data, headers=headers)
-
def _get_redirect_url(self, webhdfs_ex):
"""Retrieve the redirect url from an exception object"""
try:
@@ -909,7 +876,6 @@ def get_delegation_token(self, renewer):
res = self._root.get(params=params, headers=headers)
return res['Token'] and res['Token']['urlString']
-
def do_as_user(self, username, fn, *args, **kwargs):
prev_user = self.user
try:
@@ -918,11 +884,9 @@ def do_as_user(self, username, fn, *args, **kwargs):
finally:
self.setuser(prev_user)
-
def do_as_superuser(self, fn, *args, **kwargs):
return self.do_as_user(self.superuser, fn, *args, **kwargs)
-
def do_recursively(self, fn, path, *args, **kwargs):
for stat in self.listdir_stats(path):
try:
@@ -1021,7 +985,7 @@ def safe_octal(octal_value):
This correctly handles octal values specified as a string or as a numeric.
"""
try:
- return oct(octal_value).replace('o', '') # fix futurized octal value with 0o prefix
+ return oct(octal_value).replace('o', '') # fix futurized octal value with 0o prefix
except TypeError:
return str(octal_value).replace('o', '')
diff --git a/desktop/libs/hadoop/src/hadoop/mini_cluster.py b/desktop/libs/hadoop/src/hadoop/mini_cluster.py
index 4ca7e385512..7f0f723fce7 100644
--- a/desktop/libs/hadoop/src/hadoop/mini_cluster.py
+++ b/desktop/libs/hadoop/src/hadoop/mini_cluster.py
@@ -17,15 +17,15 @@
#######################################################
-## WARNING!!! ##
-## This file is stale. Hadoop 0.23 and CDH4 ##
-## do not support minicluster. This is replaced ##
-## by webhdfs.py, to set up a running cluster. ##
+# WARNING!!! ##
+# This file is stale. Hadoop 0.23 and CDH4 ##
+# do not support minicluster. This is replaced ##
+# by webhdfs.py, to set up a running cluster. ##
#######################################################
# A Python-side driver for MiniHadoopClusterManager
-#
+#
# See README.testing for hints on how to use this,
# and also look for other examples.
#
@@ -37,64 +37,51 @@
# echo "GET /" | nc -w 1 localhost $p
# done
-from __future__ import print_function
-from future import standard_library
-standard_library.install_aliases()
-from builtins import object
-import atexit
-import subprocess
import os
import pwd
-import logging
import sys
-import signal
-import shutil
+import json
import time
+import atexit
+import shutil
+import signal
+import logging
import tempfile
-import json
+import subprocess
+from urllib.error import URLError as lib_URLError
+from urllib.request import Request as lib_Request, urlopen as lib_urlopen
+
import lxml.etree
+import hadoop.cluster
from desktop.lib import python_util
from desktop.lib.test_utils import clear_sys_caches, restore_sys_caches
-import hadoop.cluster
-
-if sys.version_info[0] > 2:
- from urllib.request import Request as lib_Request
- from urllib.error import URLError as lib_URLError
- from urllib.request import urlopen as lib_urlopen
- open_file = open
-else:
- from urllib2 import Request as lib_Request
- from urllib2 import URLError as lib_URLError
- from urllib2 import urlopen as lib_urlopen
- open_file = file
-
# Starts mini cluster suspended until a debugger attaches to it.
-DEBUG_HADOOP=False
+DEBUG_HADOOP = False
# Redirects mini cluster stderr to stderr. (Default is to put it in a file.)
-USE_STDERR=os.environ.get("MINI_CLUSTER_USE_STDERR", False)
+USE_STDERR = os.environ.get("MINI_CLUSTER_USE_STDERR", False)
# Whether to clean up temp dir at exit
-CLEANUP_TMP_DIR=os.environ.get("MINI_CLUSTER_CLEANUP", True)
+CLEANUP_TMP_DIR = os.environ.get("MINI_CLUSTER_CLEANUP", True)
# How long to wait for cluster to start up. (seconds)
MAX_CLUSTER_STARTUP_TIME = 120.0
# List of classes to be used as plugins for the JT of the cluster.
CLUSTER_JT_PLUGINS = 'org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin'
# MR Task Scheduler. By default use the FIFO scheduler
-CLUSTER_TASK_SCHEDULER='org.apache.hadoop.mapred.JobQueueTaskScheduler'
+CLUSTER_TASK_SCHEDULER = 'org.apache.hadoop.mapred.JobQueueTaskScheduler'
# MR queue names
-CLUSTER_QUEUE_NAMES='default'
+CLUSTER_QUEUE_NAMES = 'default'
-STARTUP_CONFIGS={}
+STARTUP_CONFIGS = {}
# users and their groups which are used in Hue tests.
TEST_USER_GROUP_MAPPING = {
- 'test': ['test','users','supergroup'], 'chown_test': ['chown_test'],
+ 'test': ['test', 'users', 'supergroup'], 'chown_test': ['chown_test'],
'notsuperuser': ['notsuperuser'], 'gamma': ['gamma'],
'webui': ['webui'], 'hue': ['supergroup']
}
-LOGGER=logging.getLogger()
+LOGGER = logging.getLogger()
class MiniHadoopCluster(object):
@@ -126,7 +113,7 @@ def tmppath(filename):
os.mkdir(in_conf_dir)
self.log_dir = tmppath("logs")
os.mkdir(self.log_dir)
- f = open_file(os.path.join(in_conf_dir, "hadoop-metrics.properties"), "w")
+ f = open(os.path.join(in_conf_dir, "hadoop-metrics.properties"), "w")
try:
f.write("""
dfs.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext
@@ -155,15 +142,26 @@ def tmppath(filename):
'mapred.queue.names': CLUSTER_QUEUE_NAMES},
tmppath('in-conf/mapred-site.xml'))
- hadoop_policy_keys = ['client', 'client.datanode', 'datanode', 'inter.datanode', 'namenode', 'inter.tracker', 'job.submission', 'task.umbilical', 'refresh.policy', 'admin.operations']
+ hadoop_policy_keys = [
+ 'client',
+ 'client.datanode',
+ 'datanode',
+ 'inter.datanode',
+ 'namenode',
+ 'inter.tracker',
+ 'job.submission',
+ 'task.umbilical',
+ 'refresh.policy',
+ 'admin.operations',
+ ]
hadoop_policy_config = {}
for policy in hadoop_policy_keys:
hadoop_policy_config['security.' + policy + '.protocol.acl'] = '*'
write_config(hadoop_policy_config, tmppath('in-conf/hadoop-policy.xml'))
- details_file = open_file(tmppath("details.json"), "w+")
+ details_file = open(tmppath("details.json"), "w+")
try:
- args = [ os.path.join(hadoop.conf.HADOOP_MR1_HOME.get(), 'bin', 'hadoop'),
+ args = [os.path.join(hadoop.conf.HADOOP_MR1_HOME.get(), 'bin', 'hadoop'),
"jar",
hadoop.conf.HADOOP_TEST_JAR.get(),
"minicluster",
@@ -193,7 +191,7 @@ def tmppath(filename):
"-D", "hadoop.policy.file=%s/hadoop-policy.xml" % in_conf_dir,
]
- for key,value in extra_configs.items():
+ for key, value in extra_configs.items():
args.append("-D")
args.append(key + "=" + value)
@@ -229,13 +227,13 @@ def tmppath(filename):
env["HADOOP_OPTS"] = env.get("HADOOP_OPTS", "") + " -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=9999"
if USE_STDERR:
- stderr=sys.stderr
+ stderr = sys.stderr
else:
- stderr=open_file(tmppath("stderr"), "w")
+ stderr = open(tmppath("stderr"), "w")
LOGGER.debug("Starting minicluster: %s env: %s" % (repr(args), repr(env)))
self.clusterproc = subprocess.Popen(
args=args,
- stdout=open_file(tmppath("stdout"), "w"),
+ stdout=open(tmppath("stdout"), "w"),
stderr=stderr,
env=env)
@@ -251,9 +249,9 @@ def tmppath(filename):
except ValueError:
pass
if self.clusterproc.poll() is not None or (not DEBUG_HADOOP and (time.time() - start) > MAX_CLUSTER_STARTUP_TIME):
- LOGGER.debug("stdout:" + open_file(tmppath("stdout")).read())
+ LOGGER.debug("stdout:" + open(tmppath("stdout")).read())
if not USE_STDERR:
- LOGGER.debug("stderr:" + open_file(tmppath("stderr")).read())
+ LOGGER.debug("stderr:" + open(tmppath("stderr")).read())
self.stop()
raise Exception("Cluster process quit or is taking too long to start. Aborting.")
finally:
@@ -267,10 +265,10 @@ def tmppath(filename):
# Parse the configuration using XPath and place into self.config.
config = lxml.etree.parse(tmppath("config.xml"))
- self.config = dict( (property.find("./name").text, property.find("./value").text)
+ self.config = dict((property.find("./name").text, property.find("./value").text)
for property in config.xpath("/configuration/property"))
- # Write out Hadoop-style configuration directory,
+ # Write out Hadoop-style configuration directory,
# which can, in turn, be used for /bin/hadoop.
self.config_dir = tmppath("conf")
os.mkdir(self.config_dir)
@@ -280,11 +278,13 @@ def tmppath(filename):
write_config(self.config, tmppath("conf/core-site.xml"),
["fs.defaultFS", "jobclient.completion.poll.interval",
"dfs.namenode.checkpoint.period", "dfs.namenode.checkpoint.dir",
- 'hadoop.proxyuser.'+self.superuser+'.groups', 'hadoop.proxyuser.'+self.superuser+'.hosts'])
- write_config(self.config, tmppath("conf/hdfs-site.xml"), ["fs.defaultFS", "dfs.namenode.http-address", "dfs.namenode.secondary.http-address"])
+ 'hadoop.proxyuser.' + self.superuser + '.groups', 'hadoop.proxyuser.' + self.superuser + '.hosts'])
+ write_config(
+ self.config, tmppath("conf/hdfs-site.xml"), ["fs.defaultFS", "dfs.namenode.http-address", "dfs.namenode.secondary.http-address"]
+ )
# mapred.job.tracker isn't written out into self.config, so we fill
# that one out more manually.
- write_config({ 'mapred.job.tracker': 'localhost:%d' % self.jobtracker_port },
+ write_config({'mapred.job.tracker': 'localhost:%d' % self.jobtracker_port},
tmppath("conf/mapred-site.xml"))
write_config(hadoop_policy_config, tmppath('conf/hadoop-policy.xml'))
@@ -299,8 +299,8 @@ def tmppath(filename):
self.secondary_proc = subprocess.Popen(
args=args,
- stdout=open_file(tmppath("stdout.2nn"), "w"),
- stderr=open_file(tmppath("stderr.2nn"), "w"),
+ stdout=open(tmppath("stdout.2nn"), "w"),
+ stderr=open(tmppath("stderr.2nn"), "w"),
env=env)
while True:
@@ -310,9 +310,9 @@ def tmppath(filename):
except lib_URLError:
# If we should abort startup.
if self.secondary_proc.poll() is not None or (not DEBUG_HADOOP and (time.time() - start) > MAX_CLUSTER_STARTUP_TIME):
- LOGGER.debug("stdout:" + open_file(tmppath("stdout")).read())
+ LOGGER.debug("stdout:" + open(tmppath("stdout")).read())
if not USE_STDERR:
- LOGGER.debug("stderr:" + open_file(tmppath("stderr")).read())
+ LOGGER.debug("stderr:" + open(tmppath("stderr")).read())
self.stop()
raise Exception("2nn process quit or is taking too long to start. Aborting.")
break
@@ -326,7 +326,6 @@ def tmppath(filename):
LOGGER.debug("Successfully started 2NN")
-
def stop(self):
"""
Kills the cluster ungracefully.
@@ -356,8 +355,8 @@ def jt(self):
@property
def superuser(self):
"""
- Returns the "superuser" of this cluster.
-
+ Returns the "superuser" of this cluster.
+
This is essentially the user that the cluster was started
with.
"""
@@ -400,6 +399,7 @@ def dump_ini(self, fd=sys.stdout):
# Shared global cluster returned by shared_cluster context manager.
_shared_cluster = None
+
def shared_cluster(conf=False):
"""
Use a shared cluster that is initialized on demand,
@@ -412,7 +412,7 @@ def shared_cluster(conf=False):
done with the shared cluster.
"""
cluster = shared_cluster_internal()
- closers = [ ]
+ closers = []
if conf:
closers.extend([
hadoop.conf.HDFS_CLUSTERS["default"].NN_HOST.set_for_testing("localhost"),
@@ -433,18 +433,19 @@ def finish():
x()
# We don't run the cluster's real stop method,
- # because a shared cluster should be shutdown at
+ # because a shared cluster should be shutdown at
# exit.
cluster.shutdown = finish
return cluster
+
def write_config(config, path, variables=None):
"""
Minimal utility to write Hadoop-style configuration
from a configuration map (config), into a new file
called path.
"""
- f = open_file(path, "w")
+ f = open(path, "w")
try:
f.write("""
@@ -461,18 +462,20 @@ def write_config(config, path, variables=None):
finally:
f.close()
+
def _write_static_group_mapping(user_group_mapping, path):
"""
Create a Java-style .properties file to contain the static user -> group
mapping used by tests.
"""
- f = open_file(path, 'w')
+ f = open(path, 'w')
try:
for user, groups in user_group_mapping.items():
f.write('%s = %s\n' % (user, ','.join(groups)))
finally:
f.close()
+
def shared_cluster_internal():
"""
Manages _shared_cluster.
@@ -484,6 +487,7 @@ def shared_cluster_internal():
atexit.register(_shared_cluster.stop)
return _shared_cluster
+
if __name__ == '__main__':
"""
It's poor form to write tests for tests (the world-wide stack
diff --git a/desktop/libs/hadoop/src/hadoop/pseudo_hdfs4.py b/desktop/libs/hadoop/src/hadoop/pseudo_hdfs4.py
index c070f92d309..f40f74aa29e 100755
--- a/desktop/libs/hadoop/src/hadoop/pseudo_hdfs4.py
+++ b/desktop/libs/hadoop/src/hadoop/pseudo_hdfs4.py
@@ -15,36 +15,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import print_function
-from builtins import object
-import atexit
-import getpass
-import logging
import os
+import time
+import atexit
import shutil
import signal
-import subprocess
import socket
-import sys
+import getpass
+import logging
import tempfile
import textwrap
-import time
+import subprocess
+import hadoop
from desktop.lib.paths import get_run_root
from desktop.lib.python_util import find_unused_port
from desktop.lib.test_utils import clear_sys_caches, restore_sys_caches
-
-import hadoop
from hadoop import cluster
from hadoop.mini_cluster import write_config
-if sys.version_info[0] > 2:
- open_file = open
-else:
- open_file = file
-
_shared_cluster = None
+
LOG = logging.getLogger()
@@ -56,11 +48,13 @@
def is_live_cluster():
return os.environ.get('LIVE_CLUSTER', 'false').lower() == 'true'
+
def get_fs_prefix(fs):
prefix = '/tmp/hue_tests_%s' % str(time.time())
fs.mkdir(prefix, 0o777)
return prefix
+
def get_db_prefix(name='hive'):
if is_live_cluster():
return 'hue_test_%s_%s' % (name, str(time.time()).replace('.', ''))
@@ -72,7 +66,7 @@ class LiveHdfs(object):
def __init__(self):
self.fs = cluster.get_hdfs('default')
# Assumes /tmp exists and is 1777
- self.jt = None # Deprecated
+ self.jt = None # Deprecated
self.fs_prefix = get_fs_prefix(self.fs)
LOG.info('Using %s as FS root' % self.fs_prefix)
@@ -210,7 +204,6 @@ def _kill_proc(name, proc):
if self.shutdown_hook is not None:
self.shutdown_hook()
-
def _tmppath(self, filename):
return os.path.join(self._tmpdir, filename)
@@ -257,7 +250,7 @@ def start(self):
if "JAVA_HOME" in os.environ:
env['JAVA_HOME'] = os.environ['JAVA_HOME']
- LOG.debug("Hadoop Environment:\n" + "\n".join([ str(x) for x in sorted(env.items()) ]))
+ LOG.debug("Hadoop Environment:\n" + "\n".join([str(x) for x in sorted(env.items())]))
# Format HDFS
self._format(self.hadoop_conf_dir, env)
@@ -305,13 +298,12 @@ def start(self):
self.fs_prefix = get_fs_prefix(self.fs)
-
def _start_mr2(self, env):
LOG.info("Starting MR2")
self._mr2_env = env.copy()
- LOG.debug("MR2 Environment:\n" + "\n".join([ str(x) for x in sorted(self.mr2_env.items()) ]))
+ LOG.debug("MR2 Environment:\n" + "\n".join([str(x) for x in sorted(self.mr2_env.items())]))
# Run YARN
self._rm_proc = self._start_daemon('resourcemanager', self.hadoop_conf_dir, self.mr2_env, self._get_yarn_bin(self.mr2_env))
@@ -347,8 +339,8 @@ def _format(self, conf_dir, env):
def _log_exit(self, proc_name, exit_code):
LOG.info('%s exited with %s' % (proc_name, exit_code))
- LOG.debug('--------------------- STDOUT:\n' + open_file(self._logpath(proc_name + '.stdout')).read())
- LOG.debug('--------------------- STDERR:\n' + open_file(self._logpath(proc_name + '.stderr')).read())
+ LOG.debug('--------------------- STDOUT:\n' + open(self._logpath(proc_name + '.stdout')).read())
+ LOG.debug('--------------------- STDERR:\n' + open(self._logpath(proc_name + '.stderr')).read())
def _is_hdfs_ready(self, env):
if self._nn_proc.poll() is not None:
@@ -376,7 +368,6 @@ def _is_hdfs_ready(self, env):
LOG.debug('Waiting for DN to come up .................\n%s' % (report_out,))
return False
-
def _is_mr2_ready(self, env):
if self._rm_proc.poll() is not None:
self._log_exit('resourcemanager', self._rm_proc.poll())
@@ -388,7 +379,6 @@ def _is_mr2_ready(self, env):
self._log_exit('historyserver', self._hs_proc.poll())
return False
-
# Run a `hadoop job -list all'
list_all = subprocess.Popen(
(self._get_mapred_bin(env), 'job', '-list', 'all'),
@@ -403,7 +393,6 @@ def _is_mr2_ready(self, env):
LOG.debug('MR2 not ready yet.\n%s\n%s' % (list_all.stderr.read(), list_all.stderr.read()))
return False
-
def _start_daemon(self, proc_name, conf_dir, env, hadoop_bin=None):
if hadoop_bin is None:
hadoop_bin = self._get_hadoop_bin(env)
@@ -411,8 +400,8 @@ def _start_daemon(self, proc_name, conf_dir, env, hadoop_bin=None):
args = (hadoop_bin, '--config', conf_dir, proc_name)
LOG.info('Starting Hadoop cluster daemon: %s' % (args,))
- stdout = open_file(self._logpath(proc_name + ".stdout"), 'w')
- stderr = open_file(self._logpath(proc_name + ".stderr"), 'w')
+ stdout = open(self._logpath(proc_name + ".stdout"), 'w')
+ stderr = open(self._logpath(proc_name + ".stderr"), 'w')
return subprocess.Popen(args=args, stdout=stdout, stderr=stderr, env=env)
@@ -450,7 +439,7 @@ def _write_hdfs_site(self):
'dfs.namenode.safemode.extension': 1,
'dfs.namenode.safemode.threshold-pct': 0,
'dfs.datanode.address': '%s:0' % self._fqdn,
- 'dfs.datanode.http.address': '0.0.0.0:0', # Work around webhdfs redirect bug -- bind to all interfaces
+ 'dfs.datanode.http.address': '0.0.0.0:0', # Work around webhdfs redirect bug -- bind to all interfaces
'dfs.datanode.ipc.address': '%s:0' % self._fqdn,
'dfs.replication': 1,
'dfs.safemode.min.datanodes': 1,
@@ -503,7 +492,7 @@ def _write_yarn_site(self):
'yarn.nodemanager.local-dirs': self._local_dir,
'yarn.nodemanager.log-dirs': self._logpath('yarn-logs'),
'yarn.nodemanager.remote-app-log-dir': '/var/log/hadoop-yarn/apps',
- 'yarn.nodemanager.localizer.address' : '%s:%s' % (self._fqdn, self._nm_port,),
+ 'yarn.nodemanager.localizer.address': '%s:%s' % (self._fqdn, self._nm_port,),
'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
'yarn.nodemanager.aux-services.mapreduce.shuffle.class': 'org.apache.hadoop.mapred.ShuffleHandler',
'yarn.nodemanager.webapp.address': '%s:%s' % (self._fqdn, self._nm_webapp_port,),
@@ -520,7 +509,6 @@ def _write_yarn_site(self):
self._yarn_site = self._tmppath('conf/yarn-site.xml')
write_config(yarn_configs, self._tmppath('conf/yarn-site.xml'))
-
def _write_mapred_site(self):
self._jh_port = find_unused_port()
self._jh_web_port = find_unused_port()
@@ -538,7 +526,7 @@ def _write_mapred_site(self):
write_config(mapred_configs, self._tmppath('conf/mapred-site.xml'))
def _write_hadoop_metrics_conf(self, conf_dir):
- f = open_file(os.path.join(conf_dir, "hadoop-metrics.properties"), "w")
+ f = open(os.path.join(conf_dir, "hadoop-metrics.properties"), "w")
try:
f.write(textwrap.dedent("""
dfs.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext
@@ -568,13 +556,29 @@ def shared_cluster():
closers = [
hadoop.conf.HDFS_CLUSTERS['default'].FS_DEFAULTFS.set_for_testing(cluster.fs_default_name),
hadoop.conf.HDFS_CLUSTERS['default'].WEBHDFS_URL.set_for_testing(webhdfs_url),
-
hadoop.conf.YARN_CLUSTERS['default'].HOST.set_for_testing(fqdn),
hadoop.conf.YARN_CLUSTERS['default'].PORT.set_for_testing(cluster._rm_port),
-
- hadoop.conf.YARN_CLUSTERS['default'].RESOURCE_MANAGER_API_URL.set_for_testing('http://%s:%s' % (cluster._fqdn, cluster._rm_webapp_port,)),
- hadoop.conf.YARN_CLUSTERS['default'].PROXY_API_URL.set_for_testing('http://%s:%s' % (cluster._fqdn, cluster._rm_webapp_port,)),
- hadoop.conf.YARN_CLUSTERS['default'].HISTORY_SERVER_API_URL.set_for_testing('%s:%s' % (cluster._fqdn, cluster._jh_web_port,)),
+ hadoop.conf.YARN_CLUSTERS['default'].RESOURCE_MANAGER_API_URL.set_for_testing(
+ 'http://%s:%s'
+ % (
+ cluster._fqdn,
+ cluster._rm_webapp_port,
+ )
+ ),
+ hadoop.conf.YARN_CLUSTERS['default'].PROXY_API_URL.set_for_testing(
+ 'http://%s:%s'
+ % (
+ cluster._fqdn,
+ cluster._rm_webapp_port,
+ )
+ ),
+ hadoop.conf.YARN_CLUSTERS['default'].HISTORY_SERVER_API_URL.set_for_testing(
+ '%s:%s'
+ % (
+ cluster._fqdn,
+ cluster._jh_web_port,
+ )
+ ),
]
old_caches = clear_sys_caches()
@@ -591,7 +595,6 @@ def restore_config():
return _shared_cluster
-
"""
Manual start from the Hue shell.
@@ -604,6 +607,8 @@ def restore_config():
>
exit() # To shutdown cleanly
"""
+
+
def main():
logging.basicConfig(level=logging.DEBUG)
diff --git a/desktop/libs/hadoop/src/hadoop/ssl_client_site.py b/desktop/libs/hadoop/src/hadoop/ssl_client_site.py
index 07703374dbf..d58a63ebf00 100644
--- a/desktop/libs/hadoop/src/hadoop/ssl_client_site.py
+++ b/desktop/libs/hadoop/src/hadoop/ssl_client_site.py
@@ -15,19 +15,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import absolute_import
+import sys
import errno
import logging
import os.path
-import sys
-from hadoop import conf
-from hadoop import confparse
-
-if sys.version_info[0] > 2:
- open_file = open
-else:
- open_file = file
+from hadoop import conf, confparse
_SSL_SITE_PATH = None # Path to ssl-client.xml
_SSL_SITE_DICT = None # A dictionary of name/value config options
@@ -35,6 +28,7 @@
_CNF_TRUSTORE_LOCATION = 'ssl.client.truststore.location'
_CNF_TRUSTORE_PASSWORD = 'ssl.client.truststore.password'
+
LOG = logging.getLogger()
@@ -56,7 +50,7 @@ def _parse_ssl_client_site():
for indentifier in conf.HDFS_CLUSTERS.get():
try:
_SSL_SITE_PATH = os.path.join(conf.HDFS_CLUSTERS[indentifier].HADOOP_CONF_DIR.get(), 'ssl-client.xml')
- data = open_file(_SSL_SITE_PATH, 'r').read()
+ data = open(_SSL_SITE_PATH, 'r').read()
break
except KeyError:
data = ""
@@ -75,4 +69,4 @@ def get_trustore_location():
def get_trustore_password():
- return get_conf().get(_CNF_TRUSTORE_PASSWORD)
\ No newline at end of file
+ return get_conf().get(_CNF_TRUSTORE_PASSWORD)
diff --git a/desktop/libs/hadoop/src/hadoop/test_hdfs_site.py b/desktop/libs/hadoop/src/hadoop/test_hdfs_site.py
index 0d93c455b31..2b7ad65328a 100644
--- a/desktop/libs/hadoop/src/hadoop/test_hdfs_site.py
+++ b/desktop/libs/hadoop/src/hadoop/test_hdfs_site.py
@@ -15,19 +15,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import absolute_import
-from hadoop import conf
-import logging
import os
-import sys
+import logging
import tempfile
-from hadoop import hdfs_site
-
-if sys.version_info[0] > 2:
- open_file = open
-else:
- open_file = file
+from hadoop import conf, hdfs_site
LOG = logging.getLogger()
@@ -51,7 +43,7 @@ def test_hdfs_site():
"""
- open_file(os.path.join(hadoop_home, 'hdfs-site.xml'), 'w').write(xml)
+ open(os.path.join(hadoop_home, 'hdfs-site.xml'), 'w').write(xml)
finish = conf.HDFS_CLUSTERS['default'].HADOOP_CONF_DIR.set_for_testing(hadoop_home)
hdfs_site.reset()
diff --git a/desktop/libs/hadoop/src/hadoop/test_ssl_client_site.py b/desktop/libs/hadoop/src/hadoop/test_ssl_client_site.py
index 20e48335bcc..408f22f31ad 100644
--- a/desktop/libs/hadoop/src/hadoop/test_ssl_client_site.py
+++ b/desktop/libs/hadoop/src/hadoop/test_ssl_client_site.py
@@ -15,19 +15,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import absolute_import
-from hadoop import conf
-import logging
+
import os
-import sys
+import logging
import tempfile
-from hadoop import ssl_client_site
-
-if sys.version_info[0] > 2:
- open_file = open
-else:
- open_file = file
+from hadoop import conf, ssl_client_site
LOG = logging.getLogger()
@@ -59,7 +52,7 @@ def test_ssl_client_site():
"""
- open_file(os.path.join(hadoop_home, 'ssl-client.xml'), 'w').write(xml)
+ open(os.path.join(hadoop_home, 'ssl-client.xml'), 'w').write(xml)
finish = conf.HDFS_CLUSTERS['default'].HADOOP_CONF_DIR.set_for_testing(hadoop_home)
ssl_client_site.reset()
diff --git a/desktop/libs/hadoop/src/hadoop/tests.py b/desktop/libs/hadoop/src/hadoop/tests.py
index d83a2060aee..02195085bbc 100644
--- a/desktop/libs/hadoop/src/hadoop/tests.py
+++ b/desktop/libs/hadoop/src/hadoop/tests.py
@@ -15,27 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
import os
-import pytest
-import sys
+from io import BytesIO as string_io
+import pytest
import desktop.conf as desktop_conf
-
-from desktop.lib.test_utils import clear_sys_caches, restore_sys_caches
from desktop.lib.django_test_util import make_logged_in_client
+from desktop.lib.test_utils import clear_sys_caches, restore_sys_caches
+from hadoop import cluster, conf, confparse, pseudo_hdfs4
-from hadoop import cluster
-from hadoop import conf
-from hadoop import confparse
-from hadoop import pseudo_hdfs4
-
-if sys.version_info[0] > 2:
- from io import BytesIO as string_io
-else:
- from cStringIO import StringIO as string_io
def test_confparse():
data = """
@@ -69,9 +58,9 @@ def test_confparse():
assert cp['fs.default.name'] == 'hdfs://localhost:8020'
assert cp.get('with_description') == 'bar'
assert cp.get('not_in_xml', 'abc') == 'abc'
- assert cp.getbool('boolean_true') == True
- assert cp.getbool('boolean_false') == False
- assert cp.getbool('not_in_xml', True) == True
+ assert cp.getbool('boolean_true') is True
+ assert cp.getbool('boolean_false') is False
+ assert cp.getbool('not_in_xml', True) is True
try:
cp['bogus']
@@ -82,14 +71,13 @@ def test_confparse():
cp_empty = confparse.ConfParse("")
assert cp_empty.get('whatever', 'yes') == 'yes'
+
def test_tricky_confparse():
"""
We found (experimentally) that dealing with a file
sometimes triggered the wrong results here.
"""
- cp_data = confparse.ConfParse(open(os.path.join(os.path.dirname(__file__),
- "test_data",
- "sample_conf.xml"), 'rb'))
+ cp_data = confparse.ConfParse(open(os.path.join(os.path.dirname(__file__), "test_data", "sample_conf.xml"), 'rb'))
assert "org.apache.hadoop.examples.SleepJob" == cp_data["mapred.mapper.class"]
@@ -128,10 +116,10 @@ def test_config_validator_more():
try:
resp = cli.get('/debug/check_config')
- assert not 'Failed to access filesystem root' in resp.content
- assert not 'Failed to create' in resp.content
- assert not 'Failed to chown' in resp.content
- assert not 'Failed to delete' in resp.content
+ assert 'Failed to access filesystem root' not in resp.content
+ assert 'Failed to create' not in resp.content
+ assert 'Failed to chown' not in resp.content
+ assert 'Failed to delete' not in resp.content
finally:
restore_sys_caches(old_caches)
@@ -141,8 +129,8 @@ def test_non_default_cluster():
NON_DEFAULT_NAME = 'non_default'
old_caches = clear_sys_caches()
reset = (
- conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
- conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
+ conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
+ conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
)
try:
# This is indeed the only hdfs/mr cluster
@@ -160,25 +148,28 @@ def test_non_default_cluster():
def test_hdfs_ssl_validate():
for desktop_kwargs, conf_kwargs, expected in [
- ({'present': False}, {'present': False}, True),
- ({'present': False}, {'data': False}, False),
- ({'present': False}, {'data': True}, True),
-
- ({'data': False}, {'present': False}, False),
- ({'data': False}, {'data': False}, False),
- ({'data': False}, {'data': True}, True),
-
- ({'data': True}, {'present': False}, True),
- ({'data': True}, {'data': False}, False),
- ({'data': True}, {'data': True}, True),
- ]:
+ ({'present': False}, {'present': False}, True),
+ ({'present': False}, {'data': False}, False),
+ ({'present': False}, {'data': True}, True),
+ ({'data': False}, {'present': False}, False),
+ ({'data': False}, {'data': False}, False),
+ ({'data': False}, {'data': True}, True),
+ ({'data': True}, {'present': False}, True),
+ ({'data': True}, {'data': False}, False),
+ ({'data': True}, {'data': True}, True),
+ ]:
resets = [
desktop_conf.SSL_VALIDATE.set_for_testing(**desktop_kwargs),
conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.set_for_testing(**conf_kwargs),
]
try:
- assert conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % (desktop_kwargs, conf_kwargs, expected, conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get())
+ assert conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % (
+ desktop_kwargs,
+ conf_kwargs,
+ expected,
+ conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get(),
+ )
finally:
for reset in resets:
reset()
@@ -186,18 +177,16 @@ def test_hdfs_ssl_validate():
def test_yarn_ssl_validate():
for desktop_kwargs, conf_kwargs, expected in [
- ({'present': False}, {'present': False}, True),
- ({'present': False}, {'data': False}, False),
- ({'present': False}, {'data': True}, True),
-
- ({'data': False}, {'present': False}, False),
- ({'data': False}, {'data': False}, False),
- ({'data': False}, {'data': True}, True),
-
- ({'data': True}, {'present': False}, True),
- ({'data': True}, {'data': False}, False),
- ({'data': True}, {'data': True}, True),
- ]:
+ ({'present': False}, {'present': False}, True),
+ ({'present': False}, {'data': False}, False),
+ ({'present': False}, {'data': True}, True),
+ ({'data': False}, {'present': False}, False),
+ ({'data': False}, {'data': False}, False),
+ ({'data': False}, {'data': True}, True),
+ ({'data': True}, {'present': False}, True),
+ ({'data': True}, {'data': False}, False),
+ ({'data': True}, {'data': True}, True),
+ ]:
resets = [
conf.YARN_CLUSTERS.set_for_testing({'default': {}}),
desktop_conf.SSL_VALIDATE.set_for_testing(**desktop_kwargs),
@@ -205,7 +194,12 @@ def test_yarn_ssl_validate():
]
try:
- assert conf.YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % (desktop_kwargs, conf_kwargs, expected, conf.YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get())
+ assert conf.YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % (
+ desktop_kwargs,
+ conf_kwargs,
+ expected,
+ conf.YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get(),
+ )
finally:
for reset in resets:
reset()
diff --git a/desktop/libs/hadoop/src/hadoop/yarn/clients.py b/desktop/libs/hadoop/src/hadoop/yarn/clients.py
index d70f4809f7f..4fab52c676e 100644
--- a/desktop/libs/hadoop/src/hadoop/yarn/clients.py
+++ b/desktop/libs/hadoop/src/hadoop/yarn/clients.py
@@ -15,25 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-from builtins import next
+import time
+import heapq
import logging
-import sys
import threading
-import time
import urllib.parse
-import heapq
+from urllib.parse import urlsplit as lib_urlsplit
from desktop.lib.rest.http_client import HttpClient
-
from hadoop import cluster
-if sys.version_info[0] > 2:
- from urllib.parse import urlsplit as lib_urlsplit
-else:
- from urlparse import urlsplit as lib_urlsplit
-
LOG = logging.getLogger()
MAX_HEAP_SIZE = 20
diff --git a/desktop/libs/hadoop/src/hadoop/yarn/resource_manager_api.py b/desktop/libs/hadoop/src/hadoop/yarn/resource_manager_api.py
index 15dc97020fe..442074402ec 100644
--- a/desktop/libs/hadoop/src/hadoop/yarn/resource_manager_api.py
+++ b/desktop/libs/hadoop/src/hadoop/yarn/resource_manager_api.py
@@ -15,27 +15,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
+import sys
import json
import logging
import posixpath
-import sys
import threading
+from builtins import object
+
+from django.utils.translation import gettext as _
from desktop.conf import DEFAULT_USER
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.i18n import smart_str
from desktop.lib.rest.http_client import HttpClient
from desktop.lib.rest.resource import Resource
-
from hadoop import cluster
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
_API_VERSION = 'v1'
@@ -55,11 +50,13 @@ def get_resource_manager(username=None):
yarn_cluster = cluster.get_cluster_conf_for_job_submission()
if yarn_cluster is None:
raise PopupException(_('No Resource Manager are available.'))
- API_CACHE = ResourceManagerApi(yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
+ API_CACHE = ResourceManagerApi(
+ yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()
+ )
finally:
API_CACHE_LOCK.release()
- API_CACHE.setuser(username) # Set the correct user
+ API_CACHE.setuser(username) # Set the correct user
return API_CACHE
@@ -71,7 +68,7 @@ def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False):
self._client = HttpClient(self._url, logger=LOG)
self._root = Resource(self._client)
self._security_enabled = security_enabled
- self._thread_local = threading.local() # To store user info
+ self._thread_local = threading.local() # To store user info
self.from_failover = False
if self._security_enabled:
@@ -82,7 +79,7 @@ def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False):
def _get_params(self):
params = {}
- if self.username != DEFAULT_USER.get(): # We impersonate if needed
+ if self.username != DEFAULT_USER.get(): # We impersonate if needed
params['doAs'] = self.username
if not self.security_enabled:
params['user.name'] = DEFAULT_USER.get()
@@ -99,7 +96,7 @@ def setuser(self, user):
@property
def user(self):
- return self.username # Backward compatibility
+ return self.username # Backward compatibility
@property
def username(self):
@@ -127,11 +124,15 @@ def apps(self, **kwargs):
def app(self, app_id):
params = self._get_params()
- return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE})
+ return self._execute(
+ self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}
+ )
def appattempts(self, app_id):
params = self._get_params()
- return self._execute(self._root.get, 'cluster/apps/%(app_id)s/appattempts' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE})
+ return self._execute(
+ self._root.get, 'cluster/apps/%(app_id)s/appattempts' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}
+ )
def appattempts_attempt(self, app_id, attempt_id):
attempts = self.appattempts(app_id)
@@ -154,7 +155,13 @@ def kill(self, app_id):
try:
params = self._get_params()
- return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)
+ return self._execute(
+ self._root.put,
+ 'cluster/apps/%(app_id)s/state' % {'app_id': app_id},
+ params=params,
+ data=json.dumps(data),
+ contenttype=_JSON_CONTENT_TYPE,
+ )
finally:
if token:
self.cancel_token(token)
diff --git a/desktop/libs/hadoop/src/hadoop/yarn/spark_history_server_api.py b/desktop/libs/hadoop/src/hadoop/yarn/spark_history_server_api.py
index 6b7fcb23be3..fdb4d6ab669 100644
--- a/desktop/libs/hadoop/src/hadoop/yarn/spark_history_server_api.py
+++ b/desktop/libs/hadoop/src/hadoop/yarn/spark_history_server_api.py
@@ -15,15 +15,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-from builtins import object
-import json
import logging
import posixpath
-import sys
import threading
import urllib.parse
+from urllib.parse import urlsplit as lib_urlsplit
+
+from django.utils.translation import gettext as _
+from lxml import html
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.rest.http_client import HttpClient
@@ -31,15 +30,6 @@
from hadoop import cluster
from hadoop.yarn.clients import get_log_client
-from lxml import html
-
-if sys.version_info[0] > 2:
- from urllib.parse import urlsplit as lib_urlsplit
- from django.utils.translation import gettext as _
-else:
- from urlparse import urlsplit as lib_urlsplit
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
_API_VERSION = 'v1'
@@ -60,7 +50,11 @@ def get_history_server_api():
yarn_cluster = cluster.get_cluster_conf_for_job_submission()
if yarn_cluster is None:
raise PopupException(_('No Spark History Server is available.'))
- API_CACHE = SparkHistoryServerApi(yarn_cluster.SPARK_HISTORY_SERVER_URL.get(), yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get())
+ API_CACHE = SparkHistoryServerApi(
+ yarn_cluster.SPARK_HISTORY_SERVER_URL.get(),
+ yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get(),
+ yarn_cluster.SSL_CERT_CA_VERIFY.get(),
+ )
finally:
API_CACHE_LOCK.release()
@@ -120,13 +114,25 @@ def stage_attempts(self, app_id, stage_id):
return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {'app_id': app_id, 'stage_id': stage_id}, headers=self.headers)
def stage_attempt(self, app_id, stage_id, stage_attempt_id):
- return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)
+ return self._root.get(
+ 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s'
+ % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id},
+ headers=self.headers,
+ )
def task_summary(self, app_id, stage_id, stage_attempt_id):
- return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)
+ return self._root.get(
+ 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary'
+ % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id},
+ headers=self.headers,
+ )
def task_list(self, app_id, stage_id, stage_attempt_id):
- return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)
+ return self._root.get(
+ 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList'
+ % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id},
+ headers=self.headers,
+ )
def storages(self, app_id):
return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers)
@@ -138,7 +144,9 @@ def download_logs(self, app_id):
return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers)
def download_attempt_logs(self, app_id, attempt_id):
- return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)
+ return self._root.get(
+ 'applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers
+ )
def download_executors_logs(self, request, job, name, offset):
log_links = self.get_executors_loglinks(job)
diff --git a/desktop/libs/indexer/src/indexer/api.py b/desktop/libs/indexer/src/indexer/api.py
index f997d711c92..b060c705215 100644
--- a/desktop/libs/indexer/src/indexer/api.py
+++ b/desktop/libs/indexer/src/indexer/api.py
@@ -15,26 +15,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import next
-from builtins import zip
-import itertools
-import json
-import logging
import re
import sys
+import json
+import logging
+import itertools
+from builtins import next, zip
+
+from django.utils.translation import gettext as _
from desktop.lib.django_util import JsonResponse
from desktop.lib.exceptions_renderable import PopupException
-
from indexer.controller import CollectionManagerController
from indexer.solr_client import SolrClient
-from indexer.utils import fields_from_log, field_values_from_separated_file, get_type_from_morphline_type, get_field_types
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from indexer.utils import field_values_from_separated_file, fields_from_log, get_field_types, get_type_from_morphline_type
LOG = logging.getLogger()
@@ -70,7 +64,7 @@ def parse_fields(request):
result['data'] = []
for field_result in field_results:
- result['data'].append( (field_result[1], get_type_from_morphline_type(field_result[0])) )
+ result['data'].append((field_result[1], get_type_from_morphline_type(field_result[0])))
result['status'] = 0
else:
@@ -93,6 +87,7 @@ def parse_fields(request):
return JsonResponse(result)
+
def autocomplete(request):
searcher = CollectionManagerController(request.user)
autocomplete = searcher.get_autocomplete()
@@ -167,7 +162,7 @@ def collections_create(request):
table = request.POST.get('table')
columns = [field['name'] for field in collection.get('fields', [])]
- searcher.update_data_from_hive(db, collection.get('name'), database, table, columns) # Not up to date
+ searcher.update_data_from_hive(db, collection.get('name'), database, table, columns) # Not up to date
response['status'] = 0
response['message'] = _('Collection created!')
@@ -193,7 +188,9 @@ def collections_import(request):
unique_key, fields = searcher.get_fields(collection.get('name'))
# Create collection and metadata.
- hue_collection, created = Collection.objects.get_or_create(name=collection.get('name'), solr_properties='{}', is_enabled=True, user=request.user)
+ hue_collection, created = Collection.objects.get_or_create(
+ name=collection.get('name'), solr_properties='{}', is_enabled=True, user=request.user
+ )
properties_dict = hue_collection.properties_dict
properties_dict['data_type'] = 'separated'
properties_dict['field_order'] = [field_name for field_name in fields]
@@ -207,6 +204,7 @@ def collections_import(request):
return JsonResponse(response)
+
# Deprecated
def collections_remove(request):
if request.method != 'POST':
@@ -244,7 +242,9 @@ def collections_fields(request, collection):
unique_key, fields = searcher.get_fields(collection)
response['status'] = 0
- response['fields'] = [(field, fields[field]['type'], fields[field].get('indexed', None), fields[field].get('stored', None)) for field in fields]
+ response['fields'] = [
+ (field, fields[field]['type'], fields[field].get('indexed', None), fields[field].get('stored', None)) for field in fields
+ ]
response['unique_key'] = unique_key
return JsonResponse(response)
diff --git a/desktop/libs/indexer/src/indexer/api3.py b/desktop/libs/indexer/src/indexer/api3.py
index 8337336d1cb..05c35928993 100644
--- a/desktop/libs/indexer/src/indexer/api3.py
+++ b/desktop/libs/indexer/src/indexer/api3.py
@@ -15,68 +15,55 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-
-from builtins import zip
-from past.builtins import basestring
+import re
import csv
import json
+import uuid
import logging
-import urllib.error
-import openpyxl
-import re
-import sys
import tempfile
-import uuid
+import urllib.error
+from builtins import zip
+from io import StringIO as string_io
+from urllib.parse import unquote as urllib_unquote, urlparse
+import pandas as pd
+import openpyxl
from django.urls import reverse
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
-
-LOG = logging.getLogger()
-
-try:
- from simple_salesforce.api import Salesforce
- from simple_salesforce.exceptions import SalesforceRefusedRequest
-except ImportError:
- LOG.warning('simple_salesforce module not found')
+from past.builtins import basestring
from desktop.lib.django_util import JsonResponse
from desktop.lib.exceptions_renderable import PopupException
-from desktop.lib.i18n import smart_unicode
+from desktop.lib.i18n import smart_str
from desktop.lib.python_util import check_encoding
from desktop.models import Document2
from filebrowser.forms import UploadLocalFileForm
-from kafka.kafka_api import get_topics, get_topic_data
-from notebook.connectors.base import get_api, Notebook
-from notebook.decorators import api_error_handler
-from notebook.models import MockedDjangoRequest, escape_rows
-
from indexer.controller import CollectionManagerController
-from indexer.file_format import HiveFormat
from indexer.fields import Field, guess_field_type_from_samples
-from indexer.indexers.envelope import _envelope_job
+from indexer.file_format import HiveFormat
from indexer.indexers.base import get_api
+from indexer.indexers.envelope import _envelope_job
from indexer.indexers.flink_sql import FlinkIndexer
+from indexer.indexers.flume import FlumeIndexer
from indexer.indexers.morphline import MorphlineIndexer, _create_solr_collection
from indexer.indexers.phoenix_sql import PhoenixIndexer
-from indexer.indexers.rdbms import run_sqoop, _get_api
+from indexer.indexers.rdbms import _get_api, run_sqoop
from indexer.indexers.sql import _create_database, _create_table, _create_table_from_local
from indexer.models import _save_pipeline
-from indexer.solr_client import SolrClient, MAX_UPLOAD_SIZE
-from indexer.indexers.flume import FlumeIndexer
+from indexer.solr_client import MAX_UPLOAD_SIZE, SolrClient
+from kafka.kafka_api import get_topic_data, get_topics
+from notebook.connectors.base import Notebook, get_api
+from notebook.decorators import api_error_handler
+from notebook.models import MockedDjangoRequest, escape_rows
+LOG = logging.getLogger()
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
- from urllib.parse import urlparse, unquote as urllib_unquote
- from django.utils.translation import gettext as _
- import pandas as pd
-else:
- from StringIO import StringIO as string_io
- from urllib import unquote as urllib_unquote
- from urlparse import urlparse
- from django.utils.translation import ugettext as _
+try:
+ from simple_salesforce.api import Salesforce
+ from simple_salesforce.exceptions import SalesforceRefusedRequest
+except ImportError:
+ LOG.warning('simple_salesforce module not found')
try:
from beeswax.server import dbms
@@ -106,10 +93,7 @@ def _escape_white_space_characters(s, inverse=False):
from_ = 0 if inverse else 1
for pair in MAPPINGS.items():
- if sys.version_info[0] > 2:
- s = s.replace(pair[to], pair[from_])
- else:
- s = s.replace(pair[to], pair[from_]).encode('utf-8')
+ s = s.replace(pair[to], pair[from_])
return s
@@ -125,9 +109,6 @@ def guess_format(request):
file_format = json.loads(request.POST.get('fileFormat', '{}'))
file_type = file_format['file_type']
path = file_format["path"]
-
- if sys.version_info[0] < 3 and (file_type == 'excel' or path[-3:] == 'xls' or path[-4:] == 'xlsx'):
- return JsonResponse({'status': -1, 'message': 'Python2 based Hue does not support Excel file importer'})
if file_format['inputFormat'] == 'localfile':
if file_type == 'excel':
@@ -169,7 +150,7 @@ def guess_format(request):
})
_convert_format(format_)
- if file_format["path"][-3:] == 'xls' or file_format["path"][-4:] == 'xlsx':
+ if file_format["path"][-3:] == 'xls' or file_format["path"][-4:] == 'xlsx':
format_ = {
"quoteChar": "\"",
"recordSeparator": '\\n',
@@ -201,7 +182,7 @@ def guess_format(request):
"fieldSeparator": storage.get('field.delim', ',')
}
elif table_metadata.details['properties']['format'] == 'parquet':
- format_ = {"type": "parquet", "hasHeader": False,}
+ format_ = {"type": "parquet", "hasHeader": False, }
else:
raise PopupException('Hive table format %s is not supported.' % table_metadata.details['properties']['format'])
elif file_format['inputFormat'] == 'query':
@@ -255,9 +236,11 @@ def guess_format(request):
format_['status'] = 0
return JsonResponse(format_)
+
def decode_utf8(input_iterator):
- for l in input_iterator:
- yield l.decode('utf-8')
+ for line in input_iterator:
+ yield line.decode('utf-8')
+
def guess_field_types(request):
file_format = json.loads(request.POST.get('fileFormat', '{}'))
@@ -275,7 +258,7 @@ def guess_field_types(request):
column_row = [re.sub('[^0-9a-zA-Z]+', '_', col) for col in csv_data[0]]
else:
sample = csv_data[:4]
- column_row = ['field_' + str(count+1) for count, col in enumerate(sample[0])]
+ column_row = ['field_' + str(count + 1) for count, col in enumerate(sample[0])]
field_type_guesses = []
for count, col in enumerate(column_row):
@@ -317,7 +300,7 @@ def guess_field_types(request):
if 'sample' in format_ and format_['sample']:
format_['sample'] = escape_rows(format_['sample'], nulls_only=True, encoding=encoding)
for col in format_['columns']:
- col['name'] = smart_unicode(col['name'], errors='replace', encoding=encoding)
+ col['name'] = smart_str(col['name'], errors='replace', encoding=encoding)
elif file_format['inputFormat'] == 'table':
sample = get_api(
@@ -659,7 +642,7 @@ def _small_indexing(user, fs, client, source, destination, index_name):
)
# TODO if rows == MAX_ROWS truncation warning
elif source['inputFormat'] == 'manual':
- pass # No need to do anything
+ pass # No need to do anything
else:
response = client.index(name=index_name, data=data, **kwargs)
errors = [error.get('message', '') for error in response['responseHeader'].get('errors', [])]
@@ -691,7 +674,7 @@ def _large_indexing(request, file_format, collection_name, query=None, start_tim
client = SolrClient(user=request.user)
- if not client.exists(collection_name) and not request.POST.get('show_command'): # if destination['isTargetExisting']:
+ if not client.exists(collection_name) and not request.POST.get('show_command'): # if destination['isTargetExisting']:
client.create_index(
name=collection_name,
fields=request.POST.get('fields', schema_fields),
@@ -786,12 +769,12 @@ def upload_local_file(request):
read_file = pd.read_excel(upload_file)
else:
read_file = pd.read_excel(upload_file, engine='xlrd')
-
+
temp_file = tempfile.NamedTemporaryFile(mode='w', prefix=filename, suffix='.csv', delete=False)
read_file.to_csv(temp_file, index=False)
file_type = 'excel'
- else:
+ else:
temp_file = tempfile.NamedTemporaryFile(prefix=filename, suffix='.csv', delete=False)
temp_file.write(upload_file.read())
diff --git a/desktop/libs/indexer/src/indexer/api3_tests.py b/desktop/libs/indexer/src/indexer/api3_tests.py
index 541dd4e2d76..c4978338fe8 100644
--- a/desktop/libs/indexer/src/indexer/api3_tests.py
+++ b/desktop/libs/indexer/src/indexer/api3_tests.py
@@ -15,20 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import json
import sys
-from django.utils.datastructures import MultiValueDict
+import json
+from unittest.mock import Mock, patch
+from urllib.parse import unquote as urllib_unquote
+
from django.core.files.uploadhandler import InMemoryUploadedFile
+from django.utils.datastructures import MultiValueDict
from desktop.settings import BASE_DIR
-from indexer.api3 import upload_local_file, guess_field_types, guess_format
-
-if sys.version_info[0] > 2:
- from urllib.parse import unquote as urllib_unquote
- from unittest.mock import patch, Mock, MagicMock
-else:
- from urllib import unquote as urllib_unquote
- from mock import patch, Mock, MagicMock
+from indexer.api3 import guess_field_types, guess_format, upload_local_file
def test_xlsx_local_file_upload():
diff --git a/desktop/libs/indexer/src/indexer/argument.py b/desktop/libs/indexer/src/indexer/argument.py
index 62a450e4559..3b563e5ddd1 100644
--- a/desktop/libs/indexer/src/indexer/argument.py
+++ b/desktop/libs/indexer/src/indexer/argument.py
@@ -15,13 +15,9 @@
# limitations under the License.import logging
import sys
-
from builtins import object
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from django.utils.translation import gettext as _
class Argument(object):
diff --git a/desktop/libs/indexer/src/indexer/conf.py b/desktop/libs/indexer/src/indexer/conf.py
index 8803343070c..1215e33ce31 100644
--- a/desktop/libs/indexer/src/indexer/conf.py
+++ b/desktop/libs/indexer/src/indexer/conf.py
@@ -15,23 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-import logging
import os
-import sys
+import logging
+from urllib.parse import urlparse
+
+from django.utils.translation import gettext_lazy as _t
from desktop.lib.conf import Config, coerce_bool
from libsolr import conf as libsolr_conf
from libzookeeper import conf as libzookeeper_conf
-if sys.version_info[0] > 2:
- from urllib.parse import urlparse
- from django.utils.translation import gettext_lazy as _t
-else:
- from urlparse import urlparse
- from django.utils.translation import ugettext_lazy as _t
-
LOG = logging.getLogger()
@@ -62,14 +55,14 @@ def zkensemble():
clusters = CLUSTERS.get()
if clusters['default'].HOST_PORTS.get() != 'localhost:2181':
return '%s/solr' % clusters['default'].HOST_PORTS.get()
- except:
+ except Exception:
LOG.warning('Failed to get Zookeeper ensemble')
try:
from search.conf import SOLR_URL
parsed = urlparse(SOLR_URL.get())
return "%s:2181/solr" % (parsed.hostname or 'localhost')
- except:
+ except Exception:
LOG.warning('Failed to get Solr url')
diff --git a/desktop/libs/indexer/src/indexer/controller.py b/desktop/libs/indexer/src/indexer/controller.py
index cb8ef170a6d..5e55523c163 100644
--- a/desktop/libs/indexer/src/indexer/controller.py
+++ b/desktop/libs/indexer/src/indexer/controller.py
@@ -16,34 +16,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
+import os
+import sys
import json
+import shutil
import logging
import numbers
-import os
-import shutil
-import sys
+from builtins import object
import tablib
+from django.utils.translation import gettext as _
-from desktop.lib.exceptions_renderable import PopupException
from dashboard.models import Collection2
-from libsolr.api import SolrApi
-from libzookeeper.models import ZookeeperClient
-from search.conf import SOLR_URL, SECURITY_ENABLED
-
+from desktop.lib.exceptions_renderable import PopupException
from indexer.conf import CORE_INSTANCE_DIR
-from indexer.utils import copy_configs, field_values_from_log, field_values_from_separated_file
from indexer.solr_client import SolrClient
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from indexer.utils import copy_configs, field_values_from_log, field_values_from_separated_file
+from libsolr.api import SolrApi
+from libzookeeper.models import ZookeeperClient
+from search.conf import SECURITY_ENABLED, SOLR_URL
LOG = logging.getLogger()
-MAX_UPLOAD_SIZE = 100 * 1024 * 1024 # 100 MB
+MAX_UPLOAD_SIZE = 100 * 1024 * 1024 # 100 MB
ALLOWED_FIELD_ATTRIBUTES = set(['name', 'type', 'indexed', 'stored'])
FLAGS = [('I', 'indexed'), ('T', 'tokenized'), ('S', 'stored')]
ZK_SOLR_CONFIG_NAMESPACE = 'configs'
@@ -124,13 +118,13 @@ def get_fields(self, collection_or_core_name):
try:
fields = api.schema_fields(collection_or_core_name)
fields = Collection2._make_luke_from_schema_fields(fields)
- except:
+ except Exception:
LOG.exception(_('Could not fetch fields for collection %s.') % collection_or_core_name)
raise PopupException(_('Could not fetch fields for collection %s. See logs for more info.') % collection_or_core_name)
try:
uniquekey = api.uniquekey(collection_or_core_name)
- except:
+ except Exception:
LOG.exception(_('Could not fetch unique key for collection %s.') % collection_or_core_name)
raise PopupException(_('Could not fetch unique key for collection %s. See logs for more info.') % collection_or_core_name)
@@ -200,7 +194,6 @@ def _create_non_solr_cloud_collection(self, name, fields, unique_key_field, df):
shutil.rmtree(instancedir)
raise PopupException(_('Could not create collection. Check error logs for more info.'))
-
def delete_collection(self, name, core):
"""
Delete solr collection/core and instance dir
@@ -263,7 +256,13 @@ def update_data_from_hdfs(self, fs, collection_or_core_name, fields, path, data_
data = json.dumps([value for value in field_values_from_log(fh, fields)])
content_type = 'json'
elif data_type == 'separated':
- data = json.dumps([value for value in field_values_from_separated_file(fh, kwargs.get('separator', ','), kwargs.get('quote_character', '"'), fields)], indent=2)
+ data = json.dumps(
+ [
+ value
+ for value in field_values_from_separated_file(fh, kwargs.get('separator', ','), kwargs.get('quote_character', '"'), fields)
+ ],
+ indent=2,
+ )
content_type = 'json'
else:
raise PopupException(_('Could not update index. Unknown type %s') % data_type)
diff --git a/desktop/libs/indexer/src/indexer/file_format.py b/desktop/libs/indexer/src/indexer/file_format.py
index f985eb9ffd8..16ba26fa92b 100644
--- a/desktop/libs/indexer/src/indexer/file_format.py
+++ b/desktop/libs/indexer/src/indexer/file_format.py
@@ -13,33 +13,24 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.import logging
-from future import standard_library
-standard_library.install_aliases()
-from builtins import range
-from past.builtins import basestring
-from builtins import object
+
import csv
import gzip
+import logging
import operator
import itertools
-import logging
-import sys
+from builtins import object, range
+from io import StringIO as string_io
-from desktop.lib import i18n
+from django.utils.translation import gettext as _
+from past.builtins import basestring, long
+from desktop.lib import i18n
from indexer.argument import CheckboxArgument, TextDelimiterArgument
from indexer.conf import ENABLE_SCALABLE_INDEXER
from indexer.fields import Field, guess_field_type_from_samples
from indexer.indexers.morphline_operations import get_operator
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
- from past.builtins import long
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
- from StringIO import StringIO as string_io
-
LOG = logging.getLogger()
@@ -59,8 +50,8 @@ def get_format_types():
ApacheCombinedFormat,
SyslogFormat,
HueLogFormat,
- #RubyLogFormat,
- #ParquetFormat
+ # RubyLogFormat,
+ # ParquetFormat
])
return formats
@@ -69,13 +60,16 @@ def get_format_types():
def get_file_indexable_format_types():
return [format_ for format_ in get_format_types() if format_.is_file_indexable]
+
def _get_format_mapping():
return dict([(format_.get_name(), format_) for format_ in get_format_types()])
+
def get_file_format_class(type_):
mapping = _get_format_mapping()
return mapping[type_] if type_ in mapping else None
+
def get_file_format_instance(file, format_=None):
file_stream = file['stream']
file_extension = file['name'].split('.')[-1] if '.' in file['name'] else ''
@@ -368,7 +362,8 @@ def _hasHeader(self, sniffer, sample, dialect):
columns = len(header)
columnTypes = {}
- for i in range(columns): columnTypes[i] = None
+ for i in range(columns):
+ columnTypes[i] = None
checked = 0
for row in rdr:
@@ -408,7 +403,7 @@ def _hasHeader(self, sniffer, sample, dialect):
# on whether it's a header
hasHeader = 0
for col, colType in list(columnTypes.items()):
- if type(colType) == type(0): # it's a length
+ if type(colType) is type(0): # it's a length
if len(header[col]) != colType:
hasHeader += 1
else:
@@ -678,7 +673,7 @@ class HiveFormat(CSVFormat):
"string": "string",
"timestamp": "date",
"binary": "string",
- "decimal": "double", # Won't match decimal(16,6)
+ "decimal": "double", # Won't match decimal(16,6)
"date": "date",
}
diff --git a/desktop/libs/indexer/src/indexer/indexers/base.py b/desktop/libs/indexer/src/indexer/indexers/base.py
index 0e7288648a6..c80223e98b2 100644
--- a/desktop/libs/indexer/src/indexer/indexers/base.py
+++ b/desktop/libs/indexer/src/indexer/indexers/base.py
@@ -17,15 +17,12 @@
import sys
+from django.utils.translation import gettext as _
+
from desktop.conf import has_connectors
from desktop.lib.connectors.models import _get_installed_connectors
from desktop.lib.exceptions_renderable import PopupException
-from desktop.lib.i18n import smart_unicode
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from desktop.lib.i18n import smart_str
def get_api(user, connector_id):
@@ -52,7 +49,6 @@ def __init__(self, user, connector_id):
def index(self, source, destination, options=None): pass
-
class IndexerApiException(Exception):
def __init__(self, message=None):
self.message = message or _('No error message, please check the logs.')
@@ -61,4 +57,4 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
diff --git a/desktop/libs/indexer/src/indexer/indexers/envelope.py b/desktop/libs/indexer/src/indexer/indexers/envelope.py
index 485407c2029..240141b48f6 100644
--- a/desktop/libs/indexer/src/indexer/indexers/envelope.py
+++ b/desktop/libs/indexer/src/indexer/indexers/envelope.py
@@ -14,27 +14,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.import logging
-from builtins import object
-import logging
import os
import sys
+import logging
+from builtins import object
from django.urls import reverse
+from django.utils.translation import gettext as _
+from desktop.lib.exceptions_renderable import PopupException
from hadoop.fs.hadoopfs import Hdfs
from indexer.conf import CONFIG_JARS_LIBS_PATH, config_morphline_path
from libzookeeper.conf import zkensemble
from notebook.models import make_notebook
from useradmin.models import User
-from desktop.lib.exceptions_renderable import PopupException
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -45,7 +39,6 @@ def __init__(self, username, fs=None, jt=None, solr_client=None):
self.jt = jt
self.username = username
-
def _upload_workspace(self, configs):
from oozie.models2 import Job
@@ -60,7 +53,6 @@ def _upload_workspace(self, configs):
return hdfs_workspace_path
-
def run(self, request, collection_name, configs, input_path, start_time=None, lib_path=None):
workspace_path = self._upload_workspace(configs)
@@ -70,8 +62,8 @@ def run(self, request, collection_name, configs, input_path, start_time=None, li
task = make_notebook(
name=_('Indexing into %s') % collection_name,
editor_type='notebook',
- #on_success_url=reverse('search:browse', kwargs={'name': collection_name}),
- #pub_sub_url='assist.collections.refresh',
+ # on_success_url=reverse('search:browse', kwargs={'name': collection_name}),
+ # pub_sub_url='assist.collections.refresh',
is_task=True,
is_notebook=True,
last_executed=start_time
@@ -98,7 +90,6 @@ def run(self, request, collection_name, configs, input_path, start_time=None, li
return task.execute(request, batch=True)
-
def generate_config(self, properties):
configs = {
}
@@ -168,7 +159,6 @@ def generate_config(self, properties):
else:
raise PopupException(_('Input format not recognized: %(inputFormat)s') % properties)
-
extra_step = ''
properties['output_deriver'] = """
deriver {
@@ -176,7 +166,7 @@ def generate_config(self, properties):
query.literal = \"\"\"SELECT * from inputdata\"\"\"
}"""
- if properties['inputFormat'] == 'stream' and properties['topics'] == 'NavigatorAuditEvents': # Kudu does not support upper case names
+ if properties['inputFormat'] == 'stream' and properties['topics'] == 'NavigatorAuditEvents': # Kudu does not support upper case names
properties['output_deriver'] = """
deriver {
type = sql
@@ -205,7 +195,6 @@ def generate_config(self, properties):
\"\"\"
}"""
-
if properties['ouputFormat'] == 'file':
output = """
%(output_deriver)s
@@ -245,7 +234,7 @@ def generate_config(self, properties):
table.name = "%(output_table)s"
}""" % properties
elif properties['ouputFormat'] == 'index':
- if True: # Workaround until envelope Solr output is official
+ if True: # Workaround until envelope Solr output is official
morphline_config = open(os.path.join(config_morphline_path(), 'navigator_topic.morphline.conf')).read()
configs['navigator_topic.morphline.conf'] = morphline_config.replace(
'${SOLR_COLLECTION}', properties['collectionName']
@@ -355,7 +344,7 @@ def _envelope_job(request, file_format, destination, start_time=None, lib_path=N
collection_name = destination['name']
indexer = EnvelopeIndexer(request.user, request.fs)
- lib_path = None # Todo optional input field
+ lib_path = None # Todo optional input field
input_path = None
if file_format['inputFormat'] == 'table':
@@ -394,7 +383,7 @@ def _envelope_job(request, file_format, destination, start_time=None, lib_path=N
if True:
properties['window'] = ''
- else: # For "KafkaSQL"
+ else: # For "KafkaSQL"
properties['window'] = '''
window {
enabled = true
@@ -420,12 +409,12 @@ def _envelope_job(request, file_format, destination, start_time=None, lib_path=N
}
if destination['outputFormat'] == 'table':
- if destination['isTargetExisting']: # Todo: check if format matches
+ if destination['isTargetExisting']: # Todo: check if format matches
pass
else:
- destination['importData'] = False # Avoid LOAD DATA
+ destination['importData'] = False # Avoid LOAD DATA
if destination['tableFormat'] == 'kudu':
- properties['kafkaFieldNames'] = properties['kafkaFieldNames'].lower() # Kudu names should be all lowercase
+ properties['kafkaFieldNames'] = properties['kafkaFieldNames'].lower() # Kudu names should be all lowercase
# Create table
if not request.POST.get('show_command'):
SQLIndexer(
@@ -452,12 +441,11 @@ def _envelope_job(request, file_format, destination, start_time=None, lib_path=N
if file_format['inputFormat'] == 'stream':
properties['format'] = 'csv'
else:
- properties['format'] = file_format['tableFormat'] # or csv
+ properties['format'] = file_format['tableFormat'] # or csv
elif destination['outputFormat'] == 'index':
properties['collectionName'] = collection_name
properties['connection'] = SOLR_URL.get()
-
properties["app_name"] = 'Data Ingest'
properties["inputFormat"] = file_format['inputFormat']
properties["ouputFormat"] = destination['ouputFormat']
diff --git a/desktop/libs/indexer/src/indexer/indexers/flink_sql.py b/desktop/libs/indexer/src/indexer/indexers/flink_sql.py
index 1d40c932651..faf14ca5b74 100644
--- a/desktop/libs/indexer/src/indexer/indexers/flink_sql.py
+++ b/desktop/libs/indexer/src/indexer/indexers/flink_sql.py
@@ -14,19 +14,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.import logging
-import logging
import sys
+import logging
from django.urls import reverse
+from django.utils.translation import gettext as _
from notebook.models import make_notebook
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -46,7 +41,7 @@ def create_table_from_kafka(self, source, destination, start_time=-1, dry_run=Fa
source_type = source['sourceType']
- from desktop.api_public import _get_interpreter_from_dialect # due to a circular import
+ from desktop.api_public import _get_interpreter_from_dialect # due to a circular import
interpreter = _get_interpreter_from_dialect('flink', self.user)
editor_type = interpreter['type'] # destination['sourceType']
diff --git a/desktop/libs/indexer/src/indexer/indexers/flume.py b/desktop/libs/indexer/src/indexer/indexers/flume.py
index c1f40125761..d33c515d4ab 100644
--- a/desktop/libs/indexer/src/indexer/indexers/flume.py
+++ b/desktop/libs/indexer/src/indexer/indexers/flume.py
@@ -14,26 +14,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.import logging
-from builtins import object
-import logging
import os
import sys
+import logging
+from builtins import object
from django.urls import reverse
+from django.utils.translation import gettext as _
-from libzookeeper.conf import zkensemble
+from desktop.lib.exceptions_renderable import PopupException
from indexer.conf import config_morphline_path
+from libzookeeper.conf import zkensemble
from metadata.manager_client import ManagerApi
from useradmin.models import User
-from desktop.lib.exceptions_renderable import PopupException
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -42,7 +36,6 @@ class FlumeIndexer(object):
def __init__(self, user):
self.user = user
-
def start(self, destination_name, file_format, destination):
responses = {'status': 0}
@@ -59,7 +52,6 @@ def start(self, destination_name, file_format, destination):
return responses
-
def generate_config(self, source, destination):
configs = []
@@ -160,7 +152,6 @@ def generate_config(self, source, destination):
return configs
-
def generate_morphline_config(self, destination):
# TODO manage generic config, cf. MorphlineIndexer
morphline_config = open(os.path.join(config_morphline_path(), 'hue_accesslogs_no_geo.morphline.conf')).read()
diff --git a/desktop/libs/indexer/src/indexer/indexers/morphline.py b/desktop/libs/indexer/src/indexer/indexers/morphline.py
index ec1c5b7551f..4e67aff1b5f 100644
--- a/desktop/libs/indexer/src/indexer/indexers/morphline.py
+++ b/desktop/libs/indexer/src/indexer/indexers/morphline.py
@@ -14,32 +14,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.import logging
-from builtins import object
-import logging
import os
import sys
-
+import logging
+from builtins import object
from collections import deque
from django.urls import reverse
+from django.utils.translation import gettext as _
from mako.lookup import TemplateLookup
from desktop.models import Document2
-from notebook.connectors.base import get_api
-from notebook.models import Notebook, make_notebook
-from useradmin.models import User
-
-from indexer.conf import CONFIG_INDEXING_TEMPLATES_PATH, CONFIG_INDEXER_LIBS_PATH
+from indexer.conf import CONFIG_INDEXER_LIBS_PATH, CONFIG_INDEXING_TEMPLATES_PATH
from indexer.fields import get_field_type
-from indexer.file_format import get_file_format_instance, get_file_format_class
+from indexer.file_format import get_file_format_class, get_file_format_instance
from indexer.indexers.morphline_operations import get_checked_args
from indexer.solr_client import SolrClient
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from notebook.connectors.base import get_api
+from notebook.models import Notebook, make_notebook
+from useradmin.models import User
LOG = logging.getLogger()
@@ -201,7 +194,7 @@ def generate_morphline_config(self, collection_name, data, uuid_name=None, lib_p
"get_kept_args": get_checked_args,
"grok_dictionaries_location": grok_dicts_loc if self.fs and self.fs.exists(grok_dicts_loc) else None,
"geolite_db_location": geolite_loc if self.fs and self.fs.exists(geolite_loc) else None,
- "zk_host": self.solr_client.get_zookeeper_host() ## offline test?
+ "zk_host": self.solr_client.get_zookeeper_host() # offline test?
}
oozie_workspace = CONFIG_INDEXING_TEMPLATES_PATH.get()
@@ -224,7 +217,7 @@ def _create_solr_collection(user, fs, client, destination, index_name, kwargs):
for field in fields:
for operation in field['operations']:
if operation['type'] == 'split':
- field['multiValued'] = True # Solr requires multiValued to be set when splitting
+ field['multiValued'] = True # Solr requires multiValued to be set when splitting
kwargs['f.%(name)s.split' % field] = 'true'
kwargs['f.%(name)s.separator' % field] = operation['settings']['splitChar'] or ','
diff --git a/desktop/libs/indexer/src/indexer/indexers/morphline_operations.py b/desktop/libs/indexer/src/indexer/indexers/morphline_operations.py
index 6b238582942..9d4b4492449 100644
--- a/desktop/libs/indexer/src/indexer/indexers/morphline_operations.py
+++ b/desktop/libs/indexer/src/indexer/indexers/morphline_operations.py
@@ -15,15 +15,11 @@
# limitations under the License.import logging
import sys
-
from builtins import object
-from indexer.argument import TextArgument, CheckboxArgument, MappingArgument
+from django.utils.translation import gettext as _
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from indexer.argument import CheckboxArgument, MappingArgument, TextArgument
class Operator(object):
@@ -58,6 +54,7 @@ def get_default_operation(self):
"fields": self._get_default_output_fields()
}
+
OPERATORS = [
Operator(
name="split",
@@ -127,9 +124,11 @@ def get_default_operation(self):
),
]
+
def get_operator(operation_name):
return [operation for operation in OPERATORS if operation.name == operation_name][0]
+
def get_checked_args(operation):
operation_args = get_operator(operation["type"]).args
diff --git a/desktop/libs/indexer/src/indexer/indexers/morphline_tests.py b/desktop/libs/indexer/src/indexer/indexers/morphline_tests.py
index 8680e52fdd7..7be69435737 100644
--- a/desktop/libs/indexer/src/indexer/indexers/morphline_tests.py
+++ b/desktop/libs/indexer/src/indexer/indexers/morphline_tests.py
@@ -14,34 +14,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-from builtins import zip
-from past.builtins import basestring
-from builtins import object
+import sys
+import logging
+from builtins import object, zip
from copy import deepcopy
+from io import StringIO as string_io
-import logging
import pytest
-import sys
+from future import standard_library
+from past.builtins import basestring
from desktop.lib.django_test_util import make_logged_in_client
-from desktop.lib.test_utils import grant_access, add_to_group
+from desktop.lib.test_utils import add_to_group, grant_access
from hadoop.pseudo_hdfs4 import is_live_cluster, shared_cluster
-from useradmin.models import User
-
from indexer.conf import ENABLE_SCALABLE_INDEXER
from indexer.controller import CollectionManagerController
-from indexer.file_format import ApacheCombinedFormat, RubyLogFormat, HueLogFormat
from indexer.fields import Field
-from indexer.indexers.morphline_operations import get_operator
+from indexer.file_format import ApacheCombinedFormat, HueLogFormat, RubyLogFormat
from indexer.indexers.morphline import MorphlineIndexer
+from indexer.indexers.morphline_operations import get_operator
from indexer.solr_client import SolrClient
from indexer.solr_client_tests import MockSolrCdhCloudHdfsApi
-
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
-else:
- from StringIO import StringIO as string_io
+from useradmin.models import User
standard_library.install_aliases()
diff --git a/desktop/libs/indexer/src/indexer/indexers/phoenix_sql.py b/desktop/libs/indexer/src/indexer/indexers/phoenix_sql.py
index eef290a91ee..cb2c31a1865 100644
--- a/desktop/libs/indexer/src/indexer/indexers/phoenix_sql.py
+++ b/desktop/libs/indexer/src/indexer/indexers/phoenix_sql.py
@@ -16,24 +16,15 @@
import csv
import logging
-import sys
-import uuid
+from io import StringIO as string_io
+from urllib.parse import unquote as urllib_unquote, urlparse
+
from django.urls import reverse
+from django.utils.translation import gettext as _
from notebook.conf import get_ordered_interpreters
from notebook.models import make_notebook
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
- from urllib.parse import urlparse, unquote as urllib_unquote
- from django.utils.translation import gettext as _
-else:
- from cStringIO import StringIO as string_io
- from django.utils.translation import ugettext as _
- from urllib import unquote as urllib_unquote
- from urlparse import urlparse
-
-
LOG = logging.getLogger()
@@ -87,7 +78,7 @@ def create_table_from_file(self, request, source, destination, start_time=-1, dr
if (source['format']['hasHeader'] and count == 0) or not csv_row:
continue
else:
- _sql = ', '.join([ "'{0}'".format(col_val) if columns[count]['type'] in ('varchar', 'timestamp') \
+ _sql = ', '.join(["'{0}'".format(col_val) if columns[count]['type'] in ('varchar', 'timestamp')
else '{0}'.format(col_val) for count, col_val in enumerate(csv_row)])
sql += '''\nUPSERT INTO %(table_name)s VALUES (%(csv_row)s);\n''' % {
@@ -95,7 +86,7 @@ def create_table_from_file(self, request, source, destination, start_time=-1, dr
'table_name': table_name,
'csv_row': _sql
}
-
+
if dry_run:
return sql
else:
diff --git a/desktop/libs/indexer/src/indexer/indexers/phoenix_sql_tests.py b/desktop/libs/indexer/src/indexer/indexers/phoenix_sql_tests.py
index 820ffaa08e2..94bcf0dbf81 100644
--- a/desktop/libs/indexer/src/indexer/indexers/phoenix_sql_tests.py
+++ b/desktop/libs/indexer/src/indexer/indexers/phoenix_sql_tests.py
@@ -17,15 +17,11 @@
# limitations under the License.from indexer.indexers.phoenix_sql import PhoenixIndexer
import sys
+from unittest.mock import MagicMock, Mock, patch
from desktop.settings import BASE_DIR
from indexer.indexers.phoenix_sql import PhoenixIndexer
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock, MagicMock
-else:
- from mock import patch, Mock, MagicMock
-
def test_create_table_phoenix():
with patch('indexer.indexers.phoenix_sql.get_ordered_interpreters') as get_ordered_interpreters:
diff --git a/desktop/libs/indexer/src/indexer/indexers/rdbms.py b/desktop/libs/indexer/src/indexer/indexers/rdbms.py
index 8033ada2b5b..07ca029992e 100644
--- a/desktop/libs/indexer/src/indexer/indexers/rdbms.py
+++ b/desktop/libs/indexer/src/indexer/indexers/rdbms.py
@@ -15,14 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import json
-import logging
import sys
+import json
import uuid
+import logging
from django.urls import reverse
+from django.utils.translation import gettext as _
-from librdbms.conf import DATABASES, get_database_password, get_server_choices, get_connector_name
+from desktop.lib.django_util import JsonResponse
+from desktop.lib.i18n import smart_str
+from librdbms.conf import DATABASES, get_connector_name, get_database_password, get_server_choices
from librdbms.jdbc import Jdbc
from librdbms.server import dbms as rdbms
from notebook.conf import get_ordered_interpreters
@@ -30,15 +33,6 @@
from notebook.models import make_notebook
from useradmin.models import User
-from desktop.lib.django_util import JsonResponse
-from desktop.lib.i18n import smart_str
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -65,6 +59,7 @@ def get_db_component(request):
return JsonResponse(format_)
+
def _get_api(request):
file_format = json.loads(request.POST.get('source', request.POST.get('fileFormat', '{}')))
options = None
@@ -72,7 +67,7 @@ def _get_api(request):
if file_format['rdbmsMode'] == 'customRdbms':
type = 'custom'
if file_format['rdbmsType'] == 'jdbc':
- name = file_format['rdbmsHostname'] # We make sure it's unique as name is the cache key
+ name = file_format['rdbmsHostname'] # We make sure it's unique as name is the cache key
interface = file_format['rdbmsType']
options = {'driver': file_format['rdbmsJdbcDriver'],
'url': file_format['rdbmsHostname'],
@@ -90,7 +85,7 @@ def _get_api(request):
'options': {},
'alias': file_format['rdbmsType']
}
- name = 'rdbms:%(server_name)s://%(server_host)s:%(server_port)s' % query_server # We make sure it's unique as name is the cache key
+ name = 'rdbms:%(server_name)s://%(server_host)s:%(server_port)s' % query_server # We make sure it's unique as name is the cache key
else:
if file_format['rdbmsType'] == 'jdbc':
type = file_format['rdbmsJdbcDriverName'] and file_format['rdbmsJdbcDriverName'].lower()
@@ -102,6 +97,7 @@ def _get_api(request):
return get_api(request, {'type': type, 'interface': interface, 'options': options, 'query_server': query_server, 'name': name})
+
def jdbc_db_list(request):
format_ = {'data': [], 'status': 1}
interpreters = get_ordered_interpreters(request.user)
@@ -110,6 +106,7 @@ def jdbc_db_list(request):
return JsonResponse(format_)
+
def get_drivers(request):
format_ = {'data': [], 'status': 1}
servers_dict = dict(get_server_choices())
@@ -120,6 +117,7 @@ def get_drivers(request):
return JsonResponse(format_)
+
def run_sqoop(request, source, destination, start_time):
rdbms_mode = source['rdbmsMode']
rdbms_name = source['rdbmsJdbcDriverName'] if source['rdbmsType'] == 'jdbc' else source['rdbmsType']
@@ -184,7 +182,7 @@ def run_sqoop(request, source, destination, start_time):
'url': url,
'rdbmsPort': rdbms_port
}
-
+
password_file_path = request.fs.join(request.fs.get_home_dir() + '/sqoop/', uuid.uuid4().hex + '.password')
request.fs.do_as_user(
request.user,
diff --git a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py
index 9776fddebe1..bfa1b684bf9 100644
--- a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py
+++ b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py
@@ -36,23 +36,21 @@ def mock_uuid():
@pytest.mark.django_db
class TestSQLIndexer(object):
-
def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="empty", recreate=True, is_superuser=False)
self.user = User.objects.get(username="test")
def test_create_table_from_a_file_to_csv(self):
- fs = Mock(
- stats=Mock(return_value={'mode': 0o0777})
- )
+ fs = Mock(stats=Mock(return_value={'mode': 0o0777}))
def source_dict(key):
return {
'path': 'hdfs:///path/data.csv',
'format': {'quoteChar': '"', 'fieldSeparator': ','},
- 'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}],
- 'sourceType': 'hive'
+ 'sampleCols': [{'operations': [], 'comment': '', 'name': 'customers.id'}],
+ 'sourceType': 'hive',
}.get(key, Mock())
+
source = MagicMock()
source.__getitem__.side_effect = source_dict
@@ -66,16 +64,18 @@ def destination_dict(key):
'columns': [{'name': 'id', 'type': 'int'}],
'partitionColumns': [{'name': 'day', 'type': 'date', 'partitionValue': '20200101'}],
'description': 'No comment!',
- 'sourceType': 'hive-1'
+ 'sourceType': 'hive-1',
}.get(key, Mock())
+
destination = MagicMock()
destination.__getitem__.side_effect = destination_dict
with patch('notebook.models.get_interpreter') as get_interpreter:
notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination)
- assert (
- [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
+ assert [
+ statement.strip()
+ for statement in '''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table`
(
@@ -98,8 +98,8 @@ def destination_dict(key):
AS SELECT *
FROM `default`.`hue__tmp_export_table`;
-DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')] ==
- [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')])
+DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')
+ ] == [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
@patch('uuid.uuid4', mock_uuid)
def test_create_table_from_a_file_to_csv_for_kms_encryption(self):
@@ -119,9 +119,10 @@ def enc_source_dict(key):
return {
'path': '/enc_zn/upload_dir/data.csv',
'format': {'quoteChar': '"', 'fieldSeparator': ','},
- 'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}],
- 'sourceType': 'hive'
+ 'sampleCols': [{'operations': [], 'comment': '', 'name': 'customers.id'}],
+ 'sourceType': 'hive',
}.get(key, Mock())
+
source = MagicMock()
source.__getitem__.side_effect = enc_source_dict
@@ -135,24 +136,24 @@ def destination_dict(key):
'columns': [{'name': 'id', 'type': 'int'}],
'partitionColumns': [{'name': 'day', 'type': 'date', 'partitionValue': '20200101'}],
'description': 'No comment!',
- 'sourceType': 'hive-1'
+ 'sourceType': 'hive-1',
}.get(key, Mock())
+
destination = MagicMock()
destination.__getitem__.side_effect = destination_dict
fs = Mock(
- stats=Mock(
- return_value=MockStat()
- ),
- parent_path=mock_parent_path,
- get_home_dir=Mock(return_value='/user/test'),
+ stats=Mock(return_value=MockStat()),
+ parent_path=mock_parent_path,
+ get_home_dir=Mock(return_value='/user/test'),
)
notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination)
# source dir is in encryption zone, so the scratch dir is in the same dir
- assert (
- [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
+ assert [
+ statement.strip()
+ for statement in '''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table`
(
`id` int ) COMMENT "No comment!"
@@ -172,32 +173,32 @@ def destination_dict(key):
TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only')
AS SELECT *
FROM `default`.`hue__tmp_export_table`;
-DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')] ==
- [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')])
+DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';') # noqa: E501
+ ] == [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
fs = Mock(
- stats=Mock(
- return_value=MockStat(encBit=False)
- ),
- parent_path=mock_parent_path,
- get_home_dir=Mock(return_value='/user/test'),
+ stats=Mock(return_value=MockStat(encBit=False)),
+ parent_path=mock_parent_path,
+ get_home_dir=Mock(return_value='/user/test'),
)
def source_dict(key):
return {
'path': '/user/test/data.csv',
'format': {'quoteChar': '"', 'fieldSeparator': ','},
- 'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}],
- 'sourceType': 'hive'
+ 'sampleCols': [{'operations': [], 'comment': '', 'name': 'customers.id'}],
+ 'sourceType': 'hive',
}.get(key, Mock())
+
source = MagicMock()
source.__getitem__.side_effect = source_dict
notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination)
# source dir is not in encryption zone, so the scratch dir is in user's home dir
- assert (
- [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
+ assert [
+ statement.strip()
+ for statement in '''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;
CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table`
(
`id` int ) COMMENT "No comment!"
@@ -217,8 +218,8 @@ def source_dict(key):
TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only')
AS SELECT *
FROM `default`.`hue__tmp_export_table`;
-DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')] ==
- [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')])
+DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';') # noqa: E501
+ ] == [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]
class MockRequest(object):
@@ -254,66 +255,292 @@ def stats(self, path):
@pytest.mark.django_db
def test_generate_create_text_table_with_data_partition():
source = {
- u'sourceType': 'hive', u'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id', u'level': 0,
- u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'',
- u'multiValued': False, u'unique': False, u'type': u'bigint', u'showProperties': False, u'keep': True},
- {u'operations': [], u'comment': u'', u'name': u'customers.name', u'level': 0, u'keyType': u'string', u'required': False,
- u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False,
- u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'',
- u'name': u'customers.email_preferences', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False, u'type':
- u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'customers.addresses',
- u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100,
- u'partitionValue': u'', u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True},
- {u'operations': [], u'comment': u'', u'name': u'customers.orders', u'level': 0, u'keyType': u'string', u'required': False,
- u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False,
- u'type': u'string', u'showProperties': False, u'keep': True}], u'name': u'', u'inputFormat': u'file',
- u'format': {u'status': 0, u'fieldSeparator': u',', u'hasHeader': True, u'quoteChar': u'"',
- u'recordSeparator': u'\\n', u'type': u'csv'}, u'defaultName': u'default.customer_stats', u'show': True,
- u'tableName': u'', u'sample': [], u'apiHelperType': u'hive', u'inputFormatsAll': [{u'name': u'File', u'value': u'file'},
- {u'name': u'Manually', u'value': u'manual'}, {u'name': u'SQL Query', u'value': u'query'},
- {u'name': u'Table', u'value': u'table'}], u'query': u'', u'databaseName': u'default', u'table': u'',
- u'inputFormats': [{u'name': u'File', u'value': u'file'}, {u'name': u'Manually', u'value': u'manual'},
- {u'name': u'SQL Query', u'value': u'query'}, {u'name': u'Table', u'value': u'table'}],
- u'path': u'/user/romain/customer_stats.csv', u'draggedQuery': u'',
- u'inputFormatsManual': [{u'name': u'Manually', u'value': u'manual'}], u'isObjectStore': False
+ 'sourceType': 'hive',
+ 'sampleCols': [
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.id',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'bigint',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.name',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.email_preferences',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.addresses',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.orders',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ ],
+ 'name': '',
+ 'inputFormat': 'file',
+ 'format': {'status': 0, 'fieldSeparator': ',', 'hasHeader': True, 'quoteChar': '"', 'recordSeparator': '\\n', 'type': 'csv'},
+ 'defaultName': 'default.customer_stats',
+ 'show': True,
+ 'tableName': '',
+ 'sample': [],
+ 'apiHelperType': 'hive',
+ 'inputFormatsAll': [
+ {'name': 'File', 'value': 'file'},
+ {'name': 'Manually', 'value': 'manual'},
+ {'name': 'SQL Query', 'value': 'query'},
+ {'name': 'Table', 'value': 'table'},
+ ],
+ 'query': '',
+ 'databaseName': 'default',
+ 'table': '',
+ 'inputFormats': [
+ {'name': 'File', 'value': 'file'},
+ {'name': 'Manually', 'value': 'manual'},
+ {'name': 'SQL Query', 'value': 'query'},
+ {'name': 'Table', 'value': 'table'},
+ ],
+ 'path': '/user/romain/customer_stats.csv',
+ 'draggedQuery': '',
+ 'inputFormatsManual': [{'name': 'Manually', 'value': 'manual'}],
+ 'isObjectStore': False,
}
destination = {
- u'isTransactional': False, u'isInsertOnly': False, u'sourceType': 'hive',
- u'KUDU_DEFAULT_PARTITION_COLUMN': {u'int_val': 16, u'name': u'HASH', u'columns': [],
- u'range_partitions': [{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=',
- u'lower_val': 0, u'values': [{u'value': u''}]}]}, u'isTargetChecking': False, u'tableName': u'customer_stats',
- u'outputFormatsList': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr index', u'value': u'index'},
- {u'name': u'File', u'value': u'file'}, {u'name': u'Database', u'value': u'database'}], u'customRegexp': u'',
- u'isTargetExisting': False, u'partitionColumns': [{u'operations': [], u'comment': u'', u'name': u'new_field_1',
- u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': True, u'length': 100,
- u'partitionValue': u'AAA', u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}],
- u'useCustomDelimiters': False, u'apiHelperType': u'hive', u'kuduPartitionColumns': [],
- u'outputFormats': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr index', u'value': u'index'}],
- u'customMapDelimiter': u'\\003', u'showProperties': False, u'useDefaultLocation': True, u'description': u'',
- u'primaryKeyObjects': [], u'customFieldDelimiter': u',', u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False,
- u'useCopy': False, u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1,
- u'name': u'VALUES', u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [],
- u'outputFormat': u'table', u'nonDefaultLocation': u'/user/romain/customer_stats.csv', u'name': u'default.customer_stats',
- u'tableFormat': u'text', 'ouputFormat': u'table',
- u'bulkColumnNames': u'customers.id,customers.name,customers.email_preferences,customers.addresses,customers.orders',
- u'columns': [{u'operations': [], u'comment': u'', u'name': u'customers.id', u'level': 0, u'keyType': u'string',
- u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False,
- u'unique': False, u'type': u'bigint', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'',
- u'name': u'customers.name', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False,
- u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False,
- u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'customers.email_preferences', u'level': 0, u'keyType': u'string',
- u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False,
- u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'',
- u'name': u'customers.addresses', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False,
- u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False,
- u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'customers.orders', u'level': 0, u'keyType': u'string',
- u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False,
- u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}], u'hasHeader': True,
- u'tableFormats': [{u'name': u'Text', u'value': u'text'}, {u'name': u'Parquet', u'value': u'parquet'},
- {u'name': u'Kudu', u'value': u'kudu'}, {u'name': u'Csv', u'value': u'csv'}, {u'name': u'Avro', u'value': u'avro'},
- {u'name': u'Json', u'value': u'json'}, {u'name': u'Regexp', u'value': u'regexp'}, {u'name': u'ORC', u'value': u'orc'}],
- u'customCollectionDelimiter': u'\\002'
+ 'isTransactional': False,
+ 'isInsertOnly': False,
+ 'sourceType': 'hive',
+ 'KUDU_DEFAULT_PARTITION_COLUMN': {
+ 'int_val': 16,
+ 'name': 'HASH',
+ 'columns': [],
+ 'range_partitions': [
+ {'include_upper_val': '<=', 'upper_val': 1, 'name': 'VALUES', 'include_lower_val': '<=', 'lower_val': 0, 'values': [{'value': ''}]}
+ ],
+ },
+ 'isTargetChecking': False,
+ 'tableName': 'customer_stats',
+ 'outputFormatsList': [
+ {'name': 'Table', 'value': 'table'},
+ {'name': 'Solr index', 'value': 'index'},
+ {'name': 'File', 'value': 'file'},
+ {'name': 'Database', 'value': 'database'},
+ ],
+ 'customRegexp': '',
+ 'isTargetExisting': False,
+ 'partitionColumns': [
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'new_field_1',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': True,
+ 'length': 100,
+ 'partitionValue': 'AAA',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ }
+ ],
+ 'useCustomDelimiters': False,
+ 'apiHelperType': 'hive',
+ 'kuduPartitionColumns': [],
+ 'outputFormats': [{'name': 'Table', 'value': 'table'}, {'name': 'Solr index', 'value': 'index'}],
+ 'customMapDelimiter': '\\003',
+ 'showProperties': False,
+ 'useDefaultLocation': True,
+ 'description': '',
+ 'primaryKeyObjects': [],
+ 'customFieldDelimiter': ',',
+ 'existingTargetUrl': '',
+ 'importData': True,
+ 'isIceberg': False,
+ 'useCopy': False,
+ 'databaseName': 'default',
+ 'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {
+ 'include_upper_val': '<=',
+ 'upper_val': 1,
+ 'name': 'VALUES',
+ 'include_lower_val': '<=',
+ 'lower_val': 0,
+ 'values': [{'value': ''}],
+ },
+ 'primaryKeys': [],
+ 'outputFormat': 'table',
+ 'nonDefaultLocation': '/user/romain/customer_stats.csv',
+ 'name': 'default.customer_stats',
+ 'tableFormat': 'text',
+ 'ouputFormat': 'table',
+ 'bulkColumnNames': 'customers.id,customers.name,customers.email_preferences,customers.addresses,customers.orders',
+ 'columns': [
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.id',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'bigint',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.name',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.email_preferences',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.addresses',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'customers.orders',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ ],
+ 'hasHeader': True,
+ 'tableFormats': [
+ {'name': 'Text', 'value': 'text'},
+ {'name': 'Parquet', 'value': 'parquet'},
+ {'name': 'Kudu', 'value': 'kudu'},
+ {'name': 'Csv', 'value': 'csv'},
+ {'name': 'Avro', 'value': 'avro'},
+ {'name': 'Json', 'value': 'json'},
+ {'name': 'Regexp', 'value': 'regexp'},
+ {'name': 'ORC', 'value': 'orc'},
+ ],
+ 'customCollectionDelimiter': '\\002',
}
request = MockRequest(fs=MockFs())
@@ -337,86 +564,422 @@ def test_generate_create_text_table_with_data_partition():
;'''
assert statement in sql, sql
- assert ('''LOAD DATA INPATH '/user/romain/customer_stats.csv' '''
- '''INTO TABLE `default`.`customer_stats` PARTITION (new_field_1='AAA');''' in sql), sql
+ assert (
+ '''LOAD DATA INPATH '/user/romain/customer_stats.csv' '''
+ '''INTO TABLE `default`.`customer_stats` PARTITION (new_field_1='AAA');''' in sql
+ ), sql
@pytest.mark.django_db
def test_generate_create_kudu_table_with_data():
source = {
- u'sourceType': 'impala', u'apiHelperType': 'hive', u'sampleCols': [], u'name': u'', u'inputFormat': u'file',
- u'format': {u'quoteChar': u'"', u'recordSeparator': u'\\n', u'type': u'csv', u'hasHeader': True, u'fieldSeparator': u','},
- u'show': True, u'tableName': u'', u'sample': [], u'defaultName': u'index_data', u'query': u'', u'databaseName': u'default',
- u'table': u'', u'inputFormats': [{u'name': u'File', u'value': u'file'}, {u'name': u'Manually', u'value': u'manual'}],
- u'path': u'/user/admin/index_data.csv', u'draggedQuery': u'', u'isObjectStore': False
+ 'sourceType': 'impala',
+ 'apiHelperType': 'hive',
+ 'sampleCols': [],
+ 'name': '',
+ 'inputFormat': 'file',
+ 'format': {'quoteChar': '"', 'recordSeparator': '\\n', 'type': 'csv', 'hasHeader': True, 'fieldSeparator': ','},
+ 'show': True,
+ 'tableName': '',
+ 'sample': [],
+ 'defaultName': 'index_data',
+ 'query': '',
+ 'databaseName': 'default',
+ 'table': '',
+ 'inputFormats': [{'name': 'File', 'value': 'file'}, {'name': 'Manually', 'value': 'manual'}],
+ 'path': '/user/admin/index_data.csv',
+ 'draggedQuery': '',
+ 'isObjectStore': False,
}
destination = {
- u'isTransactional': False, u'isInsertOnly': False, u'sourceType': 'impala',
- u'KUDU_DEFAULT_PARTITION_COLUMN': {u'int_val': 16, u'name': u'HASH', u'columns': [],
- u'range_partitions': [{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=',
- u'lower_val': 0, u'values': [{u'value': u''}]}]}, u'tableName': u'index_data',
- u'outputFormatsList': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr+index', u'value': u'index'},
- {u'name': u'File', u'value': u'file'}, {u'name': u'Database', u'value': u'database'}], u'customRegexp': u'',
- u'isTargetExisting': False, u'partitionColumns': [], u'useCustomDelimiters': True,
- u'kuduPartitionColumns': [{u'int_val': 16, u'name': u'HASH', u'columns': [u'id'],
- u'range_partitions': [{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=',
- u'lower_val': 0, u'values': [{u'value': u''}]}]}], u'outputFormats': [{u'name': u'Table', u'value': u'table'},
- {u'name': u'Solr+index', u'value': u'index'}], u'customMapDelimiter': None, u'showProperties': False, u'useDefaultLocation': True,
- u'description': u'Big Data', u'primaryKeyObjects': [{u'operations': [], u'comment': u'', u'name': u'id', u'level': 0,
- u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False,
- u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}], u'customFieldDelimiter': u',',
- u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, u'useCopy': False, u'databaseName': u'default',
- u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES',
- u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [u'id'],
- u'outputFormat': u'table', u'nonDefaultLocation': u'/user/admin/index_data.csv', u'name': u'index_data',
- u'tableFormat': u'kudu',
- u'bulkColumnNames': u'business_id,cool,date,funny,id,stars,text,type,useful,user_id,name,full_address,latitude,'
- 'longitude,neighborhoods,open,review_count,state', u'columns': [{u'operations': [], u'comment': u'', u'name': u'business_id',
- u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100,
- u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True},
- {u'operations': [], u'comment': u'', u'name': u'cool', u'level': 0, u'keyType': u'string', u'required': False,
- u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'bigint',
- u'showProperties': False, u'keep': False}, {u'operations': [], u'comment': u'', u'name': u'date', u'level': 0,
- u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False,
- u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'',
- u'name': u'funny', u'level': 0, u'scale': 4, u'precision': 10, u'keyType': u'string', u'required': False, u'nested': [],
- u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'decimal', u'showProperties': False,
- u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'id', u'level': 0, u'keyType': u'string', u'required': False,
- u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string',
- u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'stars', u'level': 0,
- u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False,
- u'unique': False, u'type': u'bigint', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'',
- u'name': u'text', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100,
- u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True},
- {u'operations': [], u'comment': u'', u'name': u'type', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [],
- u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False,
- u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'useful', u'level': 0, u'keyType': u'string', u'required': False,
- u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'bigint',
- u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'user_id', u'level': 0,
- u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False,
- u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'',
- u'name': u'name', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False,
- u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True},
- {u'operations': [], u'comment': u'', u'name': u'full_address', u'level': 0, u'keyType': u'string', u'required': False,
- u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string',
- u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'latitude', u'level': 0,
- u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False,
- u'unique': False, u'type': u'double', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'',
- u'name': u'longitude', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False,
- u'length': 100, u'multiValued': False, u'unique': False, u'type': u'double', u'showProperties': False, u'keep': True},
- {u'operations': [], u'comment': u'', u'name': u'neighborhoods', u'level': 0, u'keyType': u'string', u'required': False,
- u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string',
- u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'open', u'level': 0,
- u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False,
- u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'',
- u'name': u'review_count', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False,
- u'length': 100, u'multiValued': False, u'unique': False, u'type': u'bigint', u'showProperties': False, u'keep': True},
- {u'operations': [], u'comment': u'', u'name': u'state', u'level': 0, u'keyType': u'string', u'required': False,
- u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string',
- u'showProperties': False, u'keep': True}], u'hasHeader': True, u'tableFormats': [{u'name': u'Text', u'value': u'text'},
- {u'name': u'Parquet', u'value': u'parquet'}, {u'name': u'Json', u'value': u'json'}, {u'name': u'Kudu', u'value': u'kudu'},
- {u'name': u'Avro', u'value': u'avro'}, {u'name': u'Regexp', u'value': u'regexp'}, {u'name': u'RCFile', u'value': u'rcfile'},
- {u'name': u'ORC', u'value': u'orc'}, {u'name': u'SequenceFile', u'value': u'sequencefile'}], u'customCollectionDelimiter': None
+ 'isTransactional': False,
+ 'isInsertOnly': False,
+ 'sourceType': 'impala',
+ 'KUDU_DEFAULT_PARTITION_COLUMN': {
+ 'int_val': 16,
+ 'name': 'HASH',
+ 'columns': [],
+ 'range_partitions': [
+ {'include_upper_val': '<=', 'upper_val': 1, 'name': 'VALUES', 'include_lower_val': '<=', 'lower_val': 0, 'values': [{'value': ''}]}
+ ],
+ },
+ 'tableName': 'index_data',
+ 'outputFormatsList': [
+ {'name': 'Table', 'value': 'table'},
+ {'name': 'Solr+index', 'value': 'index'},
+ {'name': 'File', 'value': 'file'},
+ {'name': 'Database', 'value': 'database'},
+ ],
+ 'customRegexp': '',
+ 'isTargetExisting': False,
+ 'partitionColumns': [],
+ 'useCustomDelimiters': True,
+ 'kuduPartitionColumns': [
+ {
+ 'int_val': 16,
+ 'name': 'HASH',
+ 'columns': ['id'],
+ 'range_partitions': [
+ {
+ 'include_upper_val': '<=',
+ 'upper_val': 1,
+ 'name': 'VALUES',
+ 'include_lower_val': '<=',
+ 'lower_val': 0,
+ 'values': [{'value': ''}],
+ }
+ ],
+ }
+ ],
+ 'outputFormats': [{'name': 'Table', 'value': 'table'}, {'name': 'Solr+index', 'value': 'index'}],
+ 'customMapDelimiter': None,
+ 'showProperties': False,
+ 'useDefaultLocation': True,
+ 'description': 'Big Data',
+ 'primaryKeyObjects': [
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'id',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ }
+ ],
+ 'customFieldDelimiter': ',',
+ 'existingTargetUrl': '',
+ 'importData': True,
+ 'isIceberg': False,
+ 'useCopy': False,
+ 'databaseName': 'default',
+ 'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {
+ 'include_upper_val': '<=',
+ 'upper_val': 1,
+ 'name': 'VALUES',
+ 'include_lower_val': '<=',
+ 'lower_val': 0,
+ 'values': [{'value': ''}],
+ },
+ 'primaryKeys': ['id'],
+ 'outputFormat': 'table',
+ 'nonDefaultLocation': '/user/admin/index_data.csv',
+ 'name': 'index_data',
+ 'tableFormat': 'kudu',
+ 'bulkColumnNames': 'business_id,cool,date,funny,id,stars,text,type,useful,user_id,name,full_address,latitude,'
+ 'longitude,neighborhoods,open,review_count,state',
+ 'columns': [
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'business_id',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'cool',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'bigint',
+ 'showProperties': False,
+ 'keep': False,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'date',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'funny',
+ 'level': 0,
+ 'scale': 4,
+ 'precision': 10,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'decimal',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'id',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'stars',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'bigint',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'text',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'type',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'useful',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'bigint',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'user_id',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'name',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'full_address',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'latitude',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'double',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'longitude',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'double',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'neighborhoods',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'open',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'review_count',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'bigint',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'name': 'state',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'multiValued': False,
+ 'unique': False,
+ 'type': 'string',
+ 'showProperties': False,
+ 'keep': True,
+ },
+ ],
+ 'hasHeader': True,
+ 'tableFormats': [
+ {'name': 'Text', 'value': 'text'},
+ {'name': 'Parquet', 'value': 'parquet'},
+ {'name': 'Json', 'value': 'json'},
+ {'name': 'Kudu', 'value': 'kudu'},
+ {'name': 'Avro', 'value': 'avro'},
+ {'name': 'Regexp', 'value': 'regexp'},
+ {'name': 'RCFile', 'value': 'rcfile'},
+ {'name': 'ORC', 'value': 'orc'},
+ {'name': 'SequenceFile', 'value': 'sequencefile'},
+ ],
+ 'customCollectionDelimiter': None,
}
request = MockRequest(fs=MockFs())
@@ -452,7 +1015,8 @@ def test_generate_create_kudu_table_with_data():
TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false')'''
assert statement in sql, sql
- assert ('''CREATE TABLE `default`.`index_data` COMMENT "Big Data"
+ assert (
+ '''CREATE TABLE `default`.`index_data` COMMENT "Big Data"
PRIMARY KEY (id)
PARTITION BY HASH PARTITIONS 16
STORED AS kudu
@@ -460,13 +1024,15 @@ def test_generate_create_kudu_table_with_data():
'kudu.num_tablet_replicas'='1'
)
AS SELECT `id`, `business_id`, `date`, `funny`, `stars`, `text`, `type`, `useful`, `user_id`, `name`, '''
- '''`full_address`, `latitude`, `longitude`, `neighborhoods`, `open`, `review_count`, `state`
- FROM `default`.`hue__tmp_index_data`''' in sql), sql
+ '''`full_address`, `latitude`, `longitude`, `neighborhoods`, `open`, `review_count`, `state`
+ FROM `default`.`hue__tmp_index_data`''' in sql
+ ), sql
@pytest.mark.django_db
def test_generate_create_parquet_table():
- source = json.loads('''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",'''
+ source = json.loads(
+ '''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",'''
'''"-121.92150116"],["Citi Bank","2800000.0","US","Richmond","37.5242004395","-77.4932022095"],["Deutsche Bank","2600000.0","US",'''
'''"Corpus Christi","40.7807998657","-73.9772033691"],["Thomson Reuters","2400000.0","US","Albany","35.7976989746",'''
'''"-78.6252975464"],'''
@@ -494,7 +1060,8 @@ def test_generate_create_parquet_table():
'''"fieldSeparator":",","recordSeparator":"\\n","quoteChar":"\\"","hasHeader":true,"status":0},"show":true,"defaultName":'''
'''"default.query-hive-360"}'''
)
- destination = json.loads('''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.parquet_table"'''
+ destination = json.loads(
+ '''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.parquet_table"'''
''',"apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":[{"name":"Table","value":"table"},'''
'''{"name":"Solr index","value":"index"},{"name":"File","value":"file"},{"name":"Database","value":"database"}],'''
'''"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],"columns":[{"operations":[],'''
@@ -548,11 +1115,14 @@ def test_generate_create_parquet_table():
;'''
assert statement in sql, sql
- assert '''CREATE TABLE `default`.`parquet_table`
+ assert (
+ '''CREATE TABLE `default`.`parquet_table`
STORED AS parquet
AS SELECT *
FROM `default`.`hue__tmp_parquet_table`;
-''' in sql, sql
+'''
+ in sql
+ ), sql
assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`;''' in sql, sql
@@ -710,7 +1280,8 @@ def test_generate_create_avro_table():
@pytest.mark.django_db
def test_generate_create_iceberg_table():
- source = json.loads('''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",'''
+ source = json.loads(
+ '''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",'''
'''"-121.92150116"],["Citi Bank","2800000.0","US","Richmond","37.5242004395","-77.4932022095"],["Deutsche Bank","2600000.0","US",'''
'''"Corpus Christi","40.7807998657","-73.9772033691"],["Thomson Reuters","2400000.0","US","Albany","35.7976989746",'''
'''"-78.6252975464"],'''
@@ -738,7 +1309,8 @@ def test_generate_create_iceberg_table():
'''"fieldSeparator":",","recordSeparator":"\\n","quoteChar":"\\"","hasHeader":true,"status":0},"show":true,"defaultName":'''
'''"default.query-hive-360"}'''
)
- destination = json.loads('''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.parquet_table"'''
+ destination = json.loads(
+ '''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.parquet_table"'''
''',"apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":[{"name":"Table","value":"table"},'''
'''{"name":"Solr index","value":"index"},{"name":"File","value":"file"},{"name":"Database","value":"database"}],'''
'''"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],"columns":[{"operations":[],'''
@@ -793,19 +1365,23 @@ def test_generate_create_iceberg_table():
;'''
assert statement in sql, sql
- assert '''CREATE TABLE `default`.`parquet_table`
+ assert (
+ '''CREATE TABLE `default`.`parquet_table`
STORED BY ICEBERG
STORED AS parquet
AS SELECT *
FROM `default`.`hue__tmp_parquet_table`;
-''' in sql, sql
+'''
+ in sql
+ ), sql
assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`;''' in sql, sql
@pytest.mark.django_db
def test_generate_create_orc_table_transactional():
- source = json.loads('''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",'''
+ source = json.loads(
+ '''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",'''
'''"-121.92150116"],["Citi Bank","2800000.0","US","Richmond","37.5242004395","-77.4932022095"],["Deutsche Bank","2600000.0","US",'''
'''"Corpus Christi","40.7807998657","-73.9772033691"],["Thomson Reuters","2400000.0","US","Albany","35.7976989746",'''
'''"-78.6252975464"],'''
@@ -832,36 +1408,37 @@ def test_generate_create_orc_table_transactional():
'''"apiHelperType":"hive","query":"","draggedQuery":"","format":{"type":"csv","fieldSeparator":",","recordSeparator":"\\n",'''
'''"quoteChar":"\\"","hasHeader":true,"status":0},"show":true,"defaultName":"default.query-hive-360"}'''
)
- destination = json.loads('''{"isTransactional": true, "isInsertOnly": true, "sourceType": "hive", "name":'''
- '''"default.parquet_table","apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":'''
- '''[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"},{"name":"File","value":"file"},'''
- '''{"name":"Database","value":"database"}],"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],'''
- '''"columns":[{"operations":[],"comment":"","nested":[],"name":"acct_client","level":0,"keyType":"string","required":false,'''
- '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,'''
- '''"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"tran_amount",'''
- '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,'''
- '''"partitionValue":"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0},'''
- '''{"operations":[],"comment":"","nested":[],"name":"tran_country_cd","level":0,"keyType":"string","required":false,'''
- '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,'''
- '''"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city",'''
- '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,'''
- '''"partitionValue":"","multiValued":false,"unique":false,"type":"string","showProperties":false,"scale":0},'''
- '''{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lat","level":0,"keyType":"string","required":false,'''
- '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,'''
- '''"type":"double","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lon",'''
- '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":'''
- '''"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0}],"bulkColumnNames":"acct_client,'''
- '''tran_amount,tran_country_cd,vrfcn_city,vrfcn_city_lat,vrfcn_city_lon","showProperties":false,"isTargetExisting":false,'''
- '''"isTargetChecking":false,"existingTargetUrl":"","tableName":"parquet_table","databaseName":"default","tableFormat":"orc",'''
- '''"KUDU_DEFAULT_RANGE_PARTITION_COLUMN":{"values":[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=",'''
- '''"upper_val":1,"include_upper_val":"<="},"KUDU_DEFAULT_PARTITION_COLUMN":{"columns":[],"range_partitions":[{"values":'''
- '''[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=","upper_val":1,"include_upper_val":"<="}],"name":"HASH",'''
- '''"int_val":16},"tableFormats":[{"value":"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},'''
- '''{"value":"csv","name":"Csv"},{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},'''
- '''{"value":"orc","name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],'''
- '''"importData":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,'''
- '''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",'''
- '''"customRegexp":"","isIceberg":false,"useCopy":false}'''
+ destination = json.loads(
+ '''{"isTransactional": true, "isInsertOnly": true, "sourceType": "hive", "name":'''
+ '''"default.parquet_table","apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":'''
+ '''[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"},{"name":"File","value":"file"},'''
+ '''{"name":"Database","value":"database"}],"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],'''
+ '''"columns":[{"operations":[],"comment":"","nested":[],"name":"acct_client","level":0,"keyType":"string","required":false,'''
+ '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,'''
+ '''"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"tran_amount",'''
+ '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,'''
+ '''"partitionValue":"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0},'''
+ '''{"operations":[],"comment":"","nested":[],"name":"tran_country_cd","level":0,"keyType":"string","required":false,'''
+ '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,'''
+ '''"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city",'''
+ '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,'''
+ '''"partitionValue":"","multiValued":false,"unique":false,"type":"string","showProperties":false,"scale":0},'''
+ '''{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lat","level":0,"keyType":"string","required":false,'''
+ '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,'''
+ '''"type":"double","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lon",'''
+ '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":'''
+ '''"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0}],"bulkColumnNames":"acct_client,'''
+ '''tran_amount,tran_country_cd,vrfcn_city,vrfcn_city_lat,vrfcn_city_lon","showProperties":false,"isTargetExisting":false,'''
+ '''"isTargetChecking":false,"existingTargetUrl":"","tableName":"parquet_table","databaseName":"default","tableFormat":"orc",'''
+ '''"KUDU_DEFAULT_RANGE_PARTITION_COLUMN":{"values":[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=",'''
+ '''"upper_val":1,"include_upper_val":"<="},"KUDU_DEFAULT_PARTITION_COLUMN":{"columns":[],"range_partitions":[{"values":'''
+ '''[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=","upper_val":1,"include_upper_val":"<="}],"name":"HASH",'''
+ '''"int_val":16},"tableFormats":[{"value":"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},'''
+ '''{"value":"csv","name":"Csv"},{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},'''
+ '''{"value":"orc","name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],'''
+ '''"importData":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,'''
+ '''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",'''
+ '''"customRegexp":"","isIceberg":false,"useCopy":false}'''
)
path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
@@ -887,21 +1464,28 @@ def test_generate_create_orc_table_transactional():
;'''
assert statement in sql, sql
- assert '''CREATE TABLE `default`.`parquet_table`
+ assert (
+ '''CREATE TABLE `default`.`parquet_table`
STORED AS orc
TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only')
AS SELECT *
FROM `default`.`hue__tmp_parquet_table`;
-''' in sql, sql
+'''
+ in sql
+ ), sql
- assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`;
-''' in sql, sql
+ assert (
+ '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`;
+'''
+ in sql
+ ), sql
@pytest.mark.django_db
def test_generate_create_empty_kudu_table():
source = json.loads('''{"sourceType": "impala", "apiHelperType": "impala", "path": "", "inputFormat": "manual"}''')
- destination = json.loads('''{"isTransactional": false, "isInsertOnly": false, "sourceType": "impala", '''
+ destination = json.loads(
+ '''{"isTransactional": false, "isInsertOnly": false, "sourceType": "impala", '''
'''"name":"default.manual_empty_kudu","apiHelperType":"impala","description":"","outputFormat":"table",'''
'''"columns":[{"operations":[],"comment":"","nested":[],"name":"acct_client","level":0,"keyType":"string","required":false,'''
'''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,'''
@@ -936,7 +1520,8 @@ def test_generate_create_empty_kudu_table():
sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(source, destination).get_str()
- assert '''CREATE TABLE `default`.`manual_empty_kudu`
+ assert (
+ '''CREATE TABLE `default`.`manual_empty_kudu`
(
`acct_client` string ,
`tran_amount` double ,
@@ -945,155 +1530,421 @@ def test_generate_create_empty_kudu_table():
`vrfcn_city_lat` double ,
`vrfcn_city_lon` double , PRIMARY KEY (acct_client)
) STORED AS kudu TBLPROPERTIES('transactional'='false')
-;''' in sql, sql
+;'''
+ in sql
+ ), sql
@pytest.mark.django_db
def test_create_ddl_with_nonascii():
- source = {u'kafkaFieldType': u'delimited', u'rdbmsUsername': u'', u'kafkaFieldTypes': u'',
- u'selectedTableIndex': 0, u'rdbmsJdbcDriverNames': [], u'tableName': u'',
- u'sample': [[u'Weihaiwei', u'\u5a01\u6d77\u536b\u5e02', u'Weihai', u'\u5a01\u6d77\u5e02', u'1949-11-01'],
- [u'Xingshan', u'\u5174\u5c71\u5e02', u'Hegang', u'\u9e64\u5c97\u5e02', u'1950-03-23'],
- [u"Xi'an", u'\u897f\u5b89\u5e02', u'Liaoyuan', u'\u8fbd\u6e90\u5e02', u'1952-04-03'],
- [u'Nanzheng', u'\u5357\u90d1\u5e02', u'Hanzhong', u'\u6c49\u4e2d\u5e02', u'1953-10-24'],
- [u'Dihua', u'\u8fea\u5316\u5e02', u'?r\xfcmqi', u'\u4e4c\u9c81\u6728\u9f50\u5e02', u'1953-11-20']],
- u'rdbmsTypes': [], u'isFetchingDatabaseNames': False, u'rdbmsDbIsValid': False, u'query': u'',
- u'channelSourceSelectedHosts': [], u'table': u'', u'rdbmsAllTablesSelected': False,
- u'inputFormatsManual': [{u'name': u'Manually', u'value': u'manual'}], u'rdbmsPassword': u'',
- u'isObjectStore': False, u'tables': [{u'name': u''}], u'streamUsername': u'',
- u'kafkaSchemaManual': u'detect', u'connectorSelection': u'sfdc', u'namespace':
- {u'status': u'CREATED', u'computes':
- [{u'credentials': {}, u'type': u'direct', u'id': u'default', u'name': u'default'}],
- u'id': u'default', u'name': u'default'}, u'rdbmsIsAllTables': False, u'rdbmsDatabaseNames': [],
- u'hasStreamSelected': False, u'channelSourcePath': u'/var/log/hue-httpd/access_log',
- u'channelSourceHosts': [], u'show': True, u'streamObjects': [], u'streamPassword': u'',
- u'tablesNames': [], u'sampleCols': [{u'operations': [], u'comment': u'', u'unique': False,
- u'name': u'Before', u'level': 0, u'keyType': u'string',
- u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'',
- u'multiValued': False, u'keep': True, u'type': u'string',
- u'showProperties': False, u'scale': 0},
- {u'operations': [], u'comment': u'', u'unique': False,
- u'name': u'old_Chinese_name', u'level': 0, u'keyType':
- u'string', u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'',
- u'multiValued': False, u'keep': True, u'type': u'string',
- u'showProperties': False, u'scale': 0},
- {u'operations': [], u'comment': u'', u'unique': False,
- u'name': u'After', u'level': 0, u'keyType': u'string',
- u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'',
- u'multiValued': False, u'keep': True, u'type': u'string',
- u'showProperties': False, u'scale': 0},
- {u'operations': [], u'comment': u'', u'unique': False,
- u'name': u'new_Chinese_name', u'level': 0, u'keyType':
- u'string', u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'',
- u'multiValued': False, u'keep': True, u'type': u'string',
- u'showProperties': False, u'scale': 0},
- {u'operations': [], u'comment': u'', u'unique': False,
- u'name': u'Renamed_date', u'level': 0, u'keyType': u'string',
- u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'',
- u'multiValued': False, u'keep': True, u'type': u'string',
- u'showProperties': False, u'scale': 0}], u'rdbmsDatabaseName': u'',
- u'sourceType': u'hive', u'inputFormat': u'file', u'format': {u'status': 0, u'fieldSeparator': u',',
- u'hasHeader': True, u'quoteChar': u'"',
- u'recordSeparator': u'\\n', u'type': u'csv'},
- u'connectorList': [{u'name': u'Salesforce', u'value': u'sfdc'}], u'kafkaFieldDelimiter': u',',
- u'rdbmsPort': u'', u'rdbmsTablesExclude': [], u'isFetchingDriverNames': False, u'publicStreams':
- [{u'name': u'Kafka Topics', u'value': u'kafka'}, {u'name': u'Flume Agent', u'value': u'flume'}],
- u'channelSourceTypes': [{u'name': u'Directory or File', u'value': u'directory'},
- {u'name': u'Program', u'value': u'exec'},
- {u'name': u'Syslogs', u'value': u'syslogs'},
- {u'name': u'HTTP', u'value': u'http'}],
- u'databaseName': u'default', u'inputFormats': [{u'name': u'File', u'value': u'file'},
- {u'name': u'External Database', u'value': u'rdbms'},
- {u'name': u'Manually', u'value': u'manual'}],
- u'path': u'/user/admin/renamed_chinese_cities_gb2312.csv', u'streamToken': u'', u'kafkaFieldNames': u'',
- u'streamSelection': u'kafka', u'compute': {u'credentials': {}, u'type': u'direct',
- u'id': u'default', u'name': u'default'},
- u'name': u'', u'kafkaFieldSchemaPath': u'', u'kafkaTopics': [], u'rdbmsJdbcDriver': u'',
- u'rdbmsHostname': u'', u'isFetchingTableNames': False, u'rdbmsType': None, u'inputFormatsAll':
- [{u'name': u'File', u'value': u'file'}, {u'name': u'External Database', u'value': u'rdbms'},
- {u'name': u'Manually', u'value': u'manual'}], u'rdbmsTableNames': [],
- u'streamEndpointUrl': u'https://login.salesforce.com/services/Soap/u/42.0', u'kafkaSelectedTopics': u''}
- destination = {u'isTransactionalVisible': True, u'KUDU_DEFAULT_PARTITION_COLUMN':
- {u'int_val': 16, u'name': u'HASH', u'columns': [], u'range_partitions':
- [{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=',
- u'lower_val': 0, u'values': [{u'value': u''}]}]}, u'namespaces':
- [{u'status': u'CREATED', u'computes': [{u'credentials': {}, u'type': u'direct', u'id': u'default', u'name': u'default'}],
- u'id': u'default', u'name': u'default'}], u'isTargetChecking': False, 'ouputFormat': u'table',
- u'tableName': u'renamed_chinese_cities_gb2312', u'outputFormatsList':
- [{u'name': u'Table', u'value': u'table'}, {u'name': u'Search index', u'value': u'index'},
- {u'name': u'Database', u'value': u'database'}, {u'name': u'Folder', u'value': u'file'},
- {u'name': u'HBase Table', u'value': u'hbase'}],
- u'fieldEditorPlaceHolder': u'Example: SELECT * FROM [object Promise]', u'indexerDefaultField': [],
- u'fieldEditorValue':
- u'SELECT Before,\n old_Chinese_name,\n After,\n new_Chinese_name,\n Renamed_date\n FROM [object Promise];',
- u'customRegexp': u'', u'customLineDelimiter': u'\\n', u'isTargetExisting': False,
- u'customEnclosedByDelimiter': u"'", u'indexerConfigSets': [], u'sourceType': u'hive',
- u'useCustomDelimiters': False, u'apiHelperType': u'hive', u'numMappers': 1,
- u'fieldEditorDatabase': u'default', u'namespace': {u'status': u'CREATED', u'computes':
- [{u'credentials': {}, u'type': u'direct', u'id': u'default', u'name': u'default'}], u'id': u'default', u'name': u'default'},
- u'indexerPrimaryKeyObject': [], u'kuduPartitionColumns': [], u'rdbmsFileOutputFormats':
- [{u'name': u'text', u'value': u'text'}, {u'name': u'sequence', u'value': u'sequence'},
- {u'name': u'avro', u'value': u'avro'}], u'outputFormats': [{u'name': u'Table', u'value': u'table'},
- {u'name': u'Search index', u'value': u'index'}],
- u'fieldEditorEnabled': False, u'indexerDefaultFieldObject': [],
- u'customMapDelimiter': u'', u'partitionColumns': [], u'rdbmsFileOutputFormat': u'text',
- u'showProperties': False, u'isTransactional': True, u'useDefaultLocation': True, u'description': u'',
- u'customFieldsDelimiter': u',', u'primaryKeyObjects': [], u'customFieldDelimiter': u',',
- u'rdbmsSplitByColumn': [], u'existingTargetUrl': u'', u'channelSinkTypes':
- [{u'name': u'This topic', u'value': u'kafka'}, {u'name': u'Solr', u'value': u'solr'},
- {u'name': u'HDFS', u'value': u'hdfs'}], u'defaultName': u'default.renamed_chinese_cities_gb2312',
- u'isTransactionalUpdateEnabled': False, u'importData': True, u'isIceberg': False, u'useCopy': False, u'databaseName':
- u'default', u'indexerRunJob': False, u'indexerReplicationFactor': 1, u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN':
- {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=',
- u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [], u'indexerConfigSet': u'',
- u'sqoopJobLibPaths': [{u'path': u''}], u'outputFormat': u'table',
- u'nonDefaultLocation': u'/user/admin/renamed_chinese_cities_gb2312.csv',
- u'compute': {u'credentials': {}, u'type': u'direct', u'id': u'default', u'name': u'default'},
- u'name': u'default.renamed_chinese_cities_gb2312', u'tableFormat': u'text', u'isInsertOnly': True,
- u'targetNamespaceId': u'default', u'bulkColumnNames': u'Before,old_Chinese_name,After,new_Chinese_name,Renamed_date',
- u'columns': [{u'operations': [], u'comment': u'', u'unique': False, u'name': u'Before', u'level': 0,
- u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False,
- u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0},
- {u'operations': [], u'comment': u'', u'unique': False, u'name': u'old_Chinese_name',
- u'level': 0, u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False,
- u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0},
- {u'operations': [], u'comment': u'', u'unique': False, u'name': u'After', u'level': 0,
- u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False,
- u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0},
- {u'operations': [], u'comment': u'', u'unique': False, u'name': u'new_Chinese_name',
- u'level': 0, u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False,
- u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0},
- {u'operations': [], u'comment': u'', u'unique': False, u'name': u'Renamed_date',
- u'level': 0, u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [],
- u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False,
- u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0}],
- u'hasHeader': True, u'indexerPrimaryKey': [], u'tableFormats':
- [{u'name': u'Text', u'value': u'text'}, {u'name': u'Parquet', u'value': u'parquet'},
- {u'name': u'Csv', u'value': u'csv'}, {u'name': u'Avro', u'value': u'avro'},
- {u'name': u'Json', u'value': u'json'}, {u'name': u'Regexp', u'value': u'regexp'},
- {u'name': u'ORC', u'value': u'orc'}], u'customCollectionDelimiter': u'', u'indexerNumShards': 1,
- u'useFieldEditor': False, u'indexerJobLibPath': u'/tmp/smart_indexer_lib'}
-
- file_encoding = u'gb2312'
+ source = {
+ 'kafkaFieldType': 'delimited',
+ 'rdbmsUsername': '',
+ 'kafkaFieldTypes': '',
+ 'selectedTableIndex': 0,
+ 'rdbmsJdbcDriverNames': [],
+ 'tableName': '',
+ 'sample': [
+ ['Weihaiwei', '\u5a01\u6d77\u536b\u5e02', 'Weihai', '\u5a01\u6d77\u5e02', '1949-11-01'],
+ ['Xingshan', '\u5174\u5c71\u5e02', 'Hegang', '\u9e64\u5c97\u5e02', '1950-03-23'],
+ ["Xi'an", '\u897f\u5b89\u5e02', 'Liaoyuan', '\u8fbd\u6e90\u5e02', '1952-04-03'],
+ ['Nanzheng', '\u5357\u90d1\u5e02', 'Hanzhong', '\u6c49\u4e2d\u5e02', '1953-10-24'],
+ ['Dihua', '\u8fea\u5316\u5e02', '?r\xfcmqi', '\u4e4c\u9c81\u6728\u9f50\u5e02', '1953-11-20'],
+ ],
+ 'rdbmsTypes': [],
+ 'isFetchingDatabaseNames': False,
+ 'rdbmsDbIsValid': False,
+ 'query': '',
+ 'channelSourceSelectedHosts': [],
+ 'table': '',
+ 'rdbmsAllTablesSelected': False,
+ 'inputFormatsManual': [{'name': 'Manually', 'value': 'manual'}],
+ 'rdbmsPassword': '',
+ 'isObjectStore': False,
+ 'tables': [{'name': ''}],
+ 'streamUsername': '',
+ 'kafkaSchemaManual': 'detect',
+ 'connectorSelection': 'sfdc',
+ 'namespace': {
+ 'status': 'CREATED',
+ 'computes': [{'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'}],
+ 'id': 'default',
+ 'name': 'default',
+ },
+ 'rdbmsIsAllTables': False,
+ 'rdbmsDatabaseNames': [],
+ 'hasStreamSelected': False,
+ 'channelSourcePath': '/var/log/hue-httpd/access_log',
+ 'channelSourceHosts': [],
+ 'show': True,
+ 'streamObjects': [],
+ 'streamPassword': '',
+ 'tablesNames': [],
+ 'sampleCols': [
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'Before',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'old_Chinese_name',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'After',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'new_Chinese_name',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'Renamed_date',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ ],
+ 'rdbmsDatabaseName': '',
+ 'sourceType': 'hive',
+ 'inputFormat': 'file',
+ 'format': {'status': 0, 'fieldSeparator': ',', 'hasHeader': True, 'quoteChar': '"', 'recordSeparator': '\\n', 'type': 'csv'},
+ 'connectorList': [{'name': 'Salesforce', 'value': 'sfdc'}],
+ 'kafkaFieldDelimiter': ',',
+ 'rdbmsPort': '',
+ 'rdbmsTablesExclude': [],
+ 'isFetchingDriverNames': False,
+ 'publicStreams': [{'name': 'Kafka Topics', 'value': 'kafka'}, {'name': 'Flume Agent', 'value': 'flume'}],
+ 'channelSourceTypes': [
+ {'name': 'Directory or File', 'value': 'directory'},
+ {'name': 'Program', 'value': 'exec'},
+ {'name': 'Syslogs', 'value': 'syslogs'},
+ {'name': 'HTTP', 'value': 'http'},
+ ],
+ 'databaseName': 'default',
+ 'inputFormats': [
+ {'name': 'File', 'value': 'file'},
+ {'name': 'External Database', 'value': 'rdbms'},
+ {'name': 'Manually', 'value': 'manual'},
+ ],
+ 'path': '/user/admin/renamed_chinese_cities_gb2312.csv',
+ 'streamToken': '',
+ 'kafkaFieldNames': '',
+ 'streamSelection': 'kafka',
+ 'compute': {'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'},
+ 'name': '',
+ 'kafkaFieldSchemaPath': '',
+ 'kafkaTopics': [],
+ 'rdbmsJdbcDriver': '',
+ 'rdbmsHostname': '',
+ 'isFetchingTableNames': False,
+ 'rdbmsType': None,
+ 'inputFormatsAll': [
+ {'name': 'File', 'value': 'file'},
+ {'name': 'External Database', 'value': 'rdbms'},
+ {'name': 'Manually', 'value': 'manual'},
+ ],
+ 'rdbmsTableNames': [],
+ 'streamEndpointUrl': 'https://login.salesforce.com/services/Soap/u/42.0',
+ 'kafkaSelectedTopics': '',
+ }
+ destination = {
+ 'isTransactionalVisible': True,
+ 'KUDU_DEFAULT_PARTITION_COLUMN': {
+ 'int_val': 16,
+ 'name': 'HASH',
+ 'columns': [],
+ 'range_partitions': [
+ {'include_upper_val': '<=', 'upper_val': 1, 'name': 'VALUES', 'include_lower_val': '<=', 'lower_val': 0, 'values': [{'value': ''}]}
+ ],
+ },
+ 'namespaces': [
+ {
+ 'status': 'CREATED',
+ 'computes': [{'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'}],
+ 'id': 'default',
+ 'name': 'default',
+ }
+ ],
+ 'isTargetChecking': False,
+ 'ouputFormat': 'table',
+ 'tableName': 'renamed_chinese_cities_gb2312',
+ 'outputFormatsList': [
+ {'name': 'Table', 'value': 'table'},
+ {'name': 'Search index', 'value': 'index'},
+ {'name': 'Database', 'value': 'database'},
+ {'name': 'Folder', 'value': 'file'},
+ {'name': 'HBase Table', 'value': 'hbase'},
+ ],
+ 'fieldEditorPlaceHolder': 'Example: SELECT * FROM [object Promise]',
+ 'indexerDefaultField': [],
+ 'fieldEditorValue': 'SELECT Before,\n old_Chinese_name,\n After,\n new_Chinese_name,\n Renamed_date\n FROM [object Promise];', # noqa: E501
+ 'customRegexp': '',
+ 'customLineDelimiter': '\\n',
+ 'isTargetExisting': False,
+ 'customEnclosedByDelimiter': "'",
+ 'indexerConfigSets': [],
+ 'sourceType': 'hive',
+ 'useCustomDelimiters': False,
+ 'apiHelperType': 'hive',
+ 'numMappers': 1,
+ 'fieldEditorDatabase': 'default',
+ 'namespace': {
+ 'status': 'CREATED',
+ 'computes': [{'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'}],
+ 'id': 'default',
+ 'name': 'default',
+ },
+ 'indexerPrimaryKeyObject': [],
+ 'kuduPartitionColumns': [],
+ 'rdbmsFileOutputFormats': [
+ {'name': 'text', 'value': 'text'},
+ {'name': 'sequence', 'value': 'sequence'},
+ {'name': 'avro', 'value': 'avro'},
+ ],
+ 'outputFormats': [{'name': 'Table', 'value': 'table'}, {'name': 'Search index', 'value': 'index'}],
+ 'fieldEditorEnabled': False,
+ 'indexerDefaultFieldObject': [],
+ 'customMapDelimiter': '',
+ 'partitionColumns': [],
+ 'rdbmsFileOutputFormat': 'text',
+ 'showProperties': False,
+ 'isTransactional': True,
+ 'useDefaultLocation': True,
+ 'description': '',
+ 'customFieldsDelimiter': ',',
+ 'primaryKeyObjects': [],
+ 'customFieldDelimiter': ',',
+ 'rdbmsSplitByColumn': [],
+ 'existingTargetUrl': '',
+ 'channelSinkTypes': [{'name': 'This topic', 'value': 'kafka'}, {'name': 'Solr', 'value': 'solr'}, {'name': 'HDFS', 'value': 'hdfs'}],
+ 'defaultName': 'default.renamed_chinese_cities_gb2312',
+ 'isTransactionalUpdateEnabled': False,
+ 'importData': True,
+ 'isIceberg': False,
+ 'useCopy': False,
+ 'databaseName': 'default',
+ 'indexerRunJob': False,
+ 'indexerReplicationFactor': 1,
+ 'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {
+ 'include_upper_val': '<=',
+ 'upper_val': 1,
+ 'name': 'VALUES',
+ 'include_lower_val': '<=',
+ 'lower_val': 0,
+ 'values': [{'value': ''}],
+ },
+ 'primaryKeys': [],
+ 'indexerConfigSet': '',
+ 'sqoopJobLibPaths': [{'path': ''}],
+ 'outputFormat': 'table',
+ 'nonDefaultLocation': '/user/admin/renamed_chinese_cities_gb2312.csv',
+ 'compute': {'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'},
+ 'name': 'default.renamed_chinese_cities_gb2312',
+ 'tableFormat': 'text',
+ 'isInsertOnly': True,
+ 'targetNamespaceId': 'default',
+ 'bulkColumnNames': 'Before,old_Chinese_name,After,new_Chinese_name,Renamed_date',
+ 'columns': [
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'Before',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'old_Chinese_name',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'After',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'new_Chinese_name',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ {
+ 'operations': [],
+ 'comment': '',
+ 'unique': False,
+ 'name': 'Renamed_date',
+ 'level': 0,
+ 'keyType': 'string',
+ 'required': False,
+ 'precision': 10,
+ 'nested': [],
+ 'isPartition': False,
+ 'length': 100,
+ 'partitionValue': '',
+ 'multiValued': False,
+ 'keep': True,
+ 'type': 'string',
+ 'showProperties': False,
+ 'scale': 0,
+ },
+ ],
+ 'hasHeader': True,
+ 'indexerPrimaryKey': [],
+ 'tableFormats': [
+ {'name': 'Text', 'value': 'text'},
+ {'name': 'Parquet', 'value': 'parquet'},
+ {'name': 'Csv', 'value': 'csv'},
+ {'name': 'Avro', 'value': 'avro'},
+ {'name': 'Json', 'value': 'json'},
+ {'name': 'Regexp', 'value': 'regexp'},
+ {'name': 'ORC', 'value': 'orc'},
+ ],
+ 'customCollectionDelimiter': '',
+ 'indexerNumShards': 1,
+ 'useFieldEditor': False,
+ 'indexerJobLibPath': '/tmp/smart_indexer_lib',
+ }
+
+ file_encoding = 'gb2312'
path = {
'isDir': False,
'split': ('/user/admin', 'renamed_chinese_cities_gb2312.csv'),
'listdir': ['/user/admin/data'],
- 'parent_path': '/user/admin/.scratchdir/03d184ad-dd11-4ae1-aace-378daaa094e5/renamed_chinese_cities_gb2312.csv/..'
+ 'parent_path': '/user/admin/.scratchdir/03d184ad-dd11-4ae1-aace-378daaa094e5/renamed_chinese_cities_gb2312.csv/..',
}
request = MockRequest(fs=MockFs(path=path))
- sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(source, destination, start_time=-1,
- file_encoding=file_encoding).get_str()
+ sql = (
+ SQLIndexer(user=request.user, fs=request.fs)
+ .create_table_from_a_file(source, destination, start_time=-1, file_encoding=file_encoding)
+ .get_str()
+ )
assert '''USE default;''' in sql, sql
@@ -1111,8 +1962,9 @@ def test_create_ddl_with_nonascii():
;'''
assert statement in sql, sql
- statement = "LOAD DATA INPATH '/user/admin/renamed_chinese_cities_gb2312.csv' " + \
- "INTO TABLE `default`.`hue__tmp_renamed_chinese_cities_gb2312`;"
+ statement = (
+ "LOAD DATA INPATH '/user/admin/renamed_chinese_cities_gb2312.csv' " + "INTO TABLE `default`.`hue__tmp_renamed_chinese_cities_gb2312`;"
+ )
assert statement in sql, sql
statement = '''CREATE TABLE `default`.`renamed_chinese_cities_gb2312`
@@ -1125,8 +1977,7 @@ def test_create_ddl_with_nonascii():
statement = '''DROP TABLE IF EXISTS `default`.`hue__tmp_renamed_chinese_cities_gb2312`;'''
assert statement in sql, sql
- statement = '''ALTER TABLE `default`.`renamed_chinese_cities_gb2312` ''' + \
- '''SET serdeproperties ("serialization.encoding"="gb2312");'''
+ statement = '''ALTER TABLE `default`.`renamed_chinese_cities_gb2312` ''' + '''SET serdeproperties ("serialization.encoding"="gb2312");'''
assert statement in sql, sql
@@ -1136,12 +1987,12 @@ def test_create_ddl_with_abfs():
{
'default': {
'fs_defaultfs': 'abfs://my-data@yingstorage.dfs.core.windows.net',
- 'webhdfs_url': 'https://yingstorage.dfs.core.windows.net'
+ 'webhdfs_url': 'https://yingstorage.dfs.core.windows.net',
}
}
)
- form_data = {'path': u'abfs://my-data/test_data/cars.csv', 'partition_columns': [], 'overwrite': False}
+ form_data = {'path': 'abfs://my-data/test_data/cars.csv', 'partition_columns': [], 'overwrite': False}
sql = ''
request = MockRequest(fs=MockFs())
query_server_config = dbms.get_query_server_config(name='impala')
@@ -1150,17 +2001,14 @@ def test_create_ddl_with_abfs():
sql = "\n\n%s;" % db.load_data('default', 'cars', form_data, None, generate_ddl_only=True)
finally:
finish()
- assert u"\'abfs://my-data@yingstorage.dfs.core.windows.net/test_data/cars.csv\'" in sql
+ assert "'abfs://my-data@yingstorage.dfs.core.windows.net/test_data/cars.csv'" in sql
@pytest.mark.django_db
def test_create_table_from_local():
with patch('indexer.indexers.sql.get_interpreter') as get_interpreter:
get_interpreter.return_value = {'Name': 'Hive', 'dialect': 'hive'}
- source = {
- 'path': '',
- 'sourceType': 'hive'
- }
+ source = {'path': '', 'sourceType': 'hive'}
destination = {
'name': 'default.test1',
'columns': [
@@ -1180,7 +2028,7 @@ def test_create_table_from_local():
{'name': 'dist', 'type': 'bigint', 'keep': True},
],
'indexerPrimaryKey': [],
- 'sourceType': 'hive'
+ 'sourceType': 'hive',
}
sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str()
@@ -1208,11 +2056,7 @@ def test_create_table_from_local():
def test_create_table_from_local_mysql():
with patch('indexer.indexers.sql.get_interpreter') as get_interpreter:
get_interpreter.return_value = {'Name': 'MySQL', 'dialect': 'mysql'}
- source = {
- 'path': BASE_DIR + '/apps/beeswax/data/tables/us_population.csv',
- 'sourceType': 'mysql',
- 'format': {'hasHeader': False}
- }
+ source = {'path': BASE_DIR + '/apps/beeswax/data/tables/us_population.csv', 'sourceType': 'mysql', 'format': {'hasHeader': False}}
destination = {
'name': 'default.test1',
'columns': [
@@ -1220,7 +2064,7 @@ def test_create_table_from_local_mysql():
{'name': 'field_2', 'type': 'string', 'keep': True},
{'name': 'field_3', 'type': 'bigint', 'keep': True},
],
- 'sourceType': 'mysql'
+ 'sourceType': 'mysql',
}
sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str()
@@ -1243,11 +2087,7 @@ def test_create_table_from_local_mysql():
def test_create_table_from_local_impala():
with patch('indexer.indexers.sql.get_interpreter') as get_interpreter:
get_interpreter.return_value = {'Name': 'Impala', 'dialect': 'impala'}
- source = {
- 'path': BASE_DIR + '/apps/beeswax/data/tables/flights.csv',
- 'sourceType': 'impala',
- 'format': {'hasHeader': True}
- }
+ source = {'path': BASE_DIR + '/apps/beeswax/data/tables/flights.csv', 'sourceType': 'impala', 'format': {'hasHeader': True}}
destination = {
'name': 'default.test1',
'columns': [
@@ -1266,7 +2106,7 @@ def test_create_table_from_local_impala():
{'name': 'time', 'type': 'bigint', 'keep': True},
{'name': 'dist', 'type': 'bigint', 'keep': True},
],
- 'sourceType': 'impala'
+ 'sourceType': 'impala',
}
sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str()
@@ -1325,11 +2165,7 @@ def test_create_table_from_local_impala():
def test_create_table_only_header_file_local_impala():
with patch('indexer.indexers.sql.get_interpreter') as get_interpreter:
get_interpreter.return_value = {'Name': 'Impala', 'dialect': 'impala'}
- source = {
- 'path': BASE_DIR + '/apps/beeswax/data/tables/onlyheader.csv',
- 'sourceType': 'impala',
- 'format': {'hasHeader': True}
- }
+ source = {'path': BASE_DIR + '/apps/beeswax/data/tables/onlyheader.csv', 'sourceType': 'impala', 'format': {'hasHeader': True}}
destination = {
'name': 'default.test1',
'columns': [
@@ -1348,7 +2184,7 @@ def test_create_table_only_header_file_local_impala():
{'name': 'time', 'type': 'bigint', 'keep': True},
{'name': 'dist', 'type': 'bigint', 'keep': True},
],
- 'sourceType': 'impala'
+ 'sourceType': 'impala',
}
sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str()
@@ -1397,10 +2233,7 @@ def test_create_table_only_header_file_local_impala():
def test_create_table_with_drop_column_from_local():
with patch('indexer.indexers.sql.get_interpreter') as get_interpreter:
get_interpreter.return_value = {'Name': 'Hive', 'dialect': 'hive'}
- source = {
- 'path': '',
- 'sourceType': 'hive'
- }
+ source = {'path': '', 'sourceType': 'hive'}
destination = {
'name': 'default.test1',
'columns': [
@@ -1411,7 +2244,7 @@ def test_create_table_with_drop_column_from_local():
{'name': 'arr', 'type': 'bigint', 'keep': False},
],
'indexerPrimaryKey': [],
- 'sourceType': 'hive'
+ 'sourceType': 'hive',
}
sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str()
diff --git a/desktop/libs/indexer/src/indexer/management/commands/indexer_setup.py b/desktop/libs/indexer/src/indexer/management/commands/indexer_setup.py
index f2f47b97ad5..3447e39e4de 100644
--- a/desktop/libs/indexer/src/indexer/management/commands/indexer_setup.py
+++ b/desktop/libs/indexer/src/indexer/management/commands/indexer_setup.py
@@ -15,25 +15,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import next
-from builtins import zip
-import itertools
-import logging
import os
import sys
+import logging
+import itertools
+from builtins import next, zip
from django.core.management.base import BaseCommand
-
-from useradmin.models import install_sample_user
+from django.utils.translation import gettext as _
from indexer import utils
from indexer.solr_client import SolrClient
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from useradmin.models import install_sample_user
LOG = logging.getLogger()
@@ -42,6 +35,7 @@ class Command(BaseCommand):
"""
Install examples but do not overwrite them.
"""
+
def handle(self, *args, **options):
self.user = install_sample_user()
self.client = SolrClient(self.user)
@@ -50,70 +44,84 @@ def handle(self, *args, **options):
if collection == 'twitter_demo':
LOG.info("Installing twitter collection")
- path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_twitter_demo/index_data.csv'))
- self._setup_collection_from_csv({
+ path = os.path.abspath(
+ os.path.join(
+ os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_twitter_demo/index_data.csv'
+ )
+ )
+ self._setup_collection_from_csv(
+ {
'name': 'twitter_demo',
- 'fields': self._parse_fields(path, fieldtypes={
- 'source': 'string',
- 'username': 'string',
- }),
+ 'fields': self._parse_fields(
+ path,
+ fieldtypes={
+ 'source': 'string',
+ 'username': 'string',
+ },
+ ),
'uniqueKeyField': 'id',
- 'df': 'text'
+ 'df': 'text',
},
- path
+ path,
)
LOG.info("Twitter collection successfully installed")
if collection == 'yelp_demo':
LOG.info("Installing yelp collection")
- path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_yelp_demo/index_data.csv'))
- self._setup_collection_from_csv({
+ path = os.path.abspath(
+ os.path.join(
+ os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_yelp_demo/index_data.csv'
+ )
+ )
+ self._setup_collection_from_csv(
+ {
'name': 'yelp_demo',
- 'fields': self._parse_fields(path, fieldtypes={
- 'name': 'string',
- }),
+ 'fields': self._parse_fields(
+ path,
+ fieldtypes={
+ 'name': 'string',
+ },
+ ),
'uniqueKeyField': 'id',
- 'df': 'text'
+ 'df': 'text',
},
- path
+ path,
)
LOG.info("Yelp collection successfully installed")
if collection == 'log_analytics_demo':
LOG.info("Installing logs collection")
- path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_log_analytics_demo/index_data.csv'))
- self._setup_collection_from_csv({
+ path = os.path.abspath(
+ os.path.join(
+ os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_log_analytics_demo/index_data.csv'
+ )
+ )
+ self._setup_collection_from_csv(
+ {
'name': 'log_analytics_demo',
- 'fields': self._parse_fields(path, fieldtypes={
- 'region_code': 'string',
- 'referer': 'string',
- 'user_agent': 'string'
- }),
+ 'fields': self._parse_fields(path, fieldtypes={'region_code': 'string', 'referer': 'string', 'user_agent': 'string'}),
'uniqueKeyField': 'id',
- 'df': 'record'
+ 'df': 'record',
},
- path
+ path,
)
LOG.info("Logs collection successfully installed")
-
def _setup_collection_from_csv(self, collection, path):
if not self.client.exists(collection['name']):
self.client.create_index(
- name=collection['name'],
- fields=collection['fields'],
- unique_key_field=collection['uniqueKeyField'],
- df=collection['df']
+ name=collection['name'], fields=collection['fields'], unique_key_field=collection['uniqueKeyField'], df=collection['df']
)
with open(path) as fh:
self.client.index(collection['name'], fh.read())
-
def _parse_fields(self, path, separator=',', quote_character='"', fieldtypes={}):
with open(path) as fh:
field_generator = utils.field_values_from_separated_file(fh, separator, quote_character)
row = next(field_generator)
field_names = list(row.keys())
field_types = utils.get_field_types((list(row.values()) for row in itertools.chain([row], field_generator)), iterations=51)
- return [{'name': field[0], 'type': field[0] in fieldtypes and fieldtypes[field[0]] or field[1]} for field in zip(field_names, field_types)]
+ return [
+ {'name': field[0], 'type': field[0] in fieldtypes and fieldtypes[field[0]] or field[1]} for field in zip(field_names, field_types)
+ ]
diff --git a/desktop/libs/indexer/src/indexer/solr_api.py b/desktop/libs/indexer/src/indexer/solr_api.py
index c7109ccf009..4069524d4b7 100644
--- a/desktop/libs/indexer/src/indexer/solr_api.py
+++ b/desktop/libs/indexer/src/indexer/solr_api.py
@@ -15,23 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import sys
import json
import logging
-import sys
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_GET, require_POST
from desktop.lib.django_util import JsonResponse
-from desktop.lib.i18n import smart_unicode
-from libsolr.api import SolrApi
-
+from desktop.lib.i18n import smart_str
from indexer.solr_client import SolrClient
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from libsolr.api import SolrApi
LOG = logging.getLogger()
@@ -45,7 +39,7 @@ def decorator(*args, **kwargs):
except Exception as e:
LOG.exception('Error running %s' % func.__name__)
response['status'] = -1
- response['message'] = smart_unicode(e)
+ response['message'] = smart_str(e)
finally:
if response:
return JsonResponse(response)
@@ -147,6 +141,7 @@ def delete_indexes(request):
return JsonResponse(response)
+
@require_POST
@api_error_handler
def index(request):
@@ -161,6 +156,7 @@ def index(request):
return JsonResponse(response)
+
@require_POST
@api_error_handler
def create_alias(request):
diff --git a/desktop/libs/indexer/src/indexer/solr_client.py b/desktop/libs/indexer/src/indexer/solr_client.py
index 4a1324ec244..1cb982460eb 100644
--- a/desktop/libs/indexer/src/indexer/solr_client.py
+++ b/desktop/libs/indexer/src/indexer/solr_client.py
@@ -16,31 +16,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
-import json
import os
-import shutil
import sys
+import json
+import shutil
+import logging
+from builtins import object
+
+from django.utils.translation import gettext as _
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.i18n import smart_str
-from libsolr.api import SolrApi
-from libzookeeper.models import ZookeeperClient
-
from indexer.conf import CORE_INSTANCE_DIR, get_solr_ensemble
from indexer.utils import copy_configs
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from libsolr.api import SolrApi
+from libzookeeper.models import ZookeeperClient
LOG = logging.getLogger()
-MAX_UPLOAD_SIZE = 100 * 1024 * 1024 # 100 MB
+MAX_UPLOAD_SIZE = 100 * 1024 * 1024 # 100 MB
ALLOWED_FIELD_ATTRIBUTES = set(['name', 'type', 'indexed', 'stored'])
FLAGS = [('I', 'indexed'), ('T', 'tokenized'), ('S', 'stored'), ('M', 'multivalued')]
ZK_SOLR_CONFIG_NAMESPACE = 'configs'
@@ -62,7 +57,6 @@ def __init__(self, user, api=None):
self.user = user
self.api = api if api is not None else SolrApi(user=self.user)
-
def get_indexes(self, include_cores=False):
indexes = []
@@ -96,13 +90,14 @@ def get_indexes(self, include_cores=False):
return sorted(indexes, key=lambda index: index['name'])
-
def create_index(self, name, fields, config_name=None, unique_key_field=None, df=None, shards=1, replication=1):
if self.is_solr_cloud_mode():
if self.is_solr_six_or_more():
config_sets = self.list_configs()
if not config_sets:
- raise PopupException(_('Solr does not have any predefined (secure: %s) configSets: %s') % (self.is_sentry_protected(), self.list_configs()))
+ raise PopupException(
+ _('Solr does not have any predefined (secure: %s) configSets: %s') % (self.is_sentry_protected(), self.list_configs())
+ )
if not config_name or config_name not in config_sets:
config_name_target = 'managedTemplate'
@@ -139,7 +134,7 @@ def create_index(self, name, fields, config_name=None, unique_key_field=None, df
if self.is_solr_six_or_more():
self.api.update_config(name, {
'add-updateprocessor': {
- "name" : "tolerant",
+ "name": "tolerant",
"class": "solr.TolerantUpdateProcessorFactory",
"maxErrors": "100"
}
@@ -150,19 +145,16 @@ def create_index(self, name, fields, config_name=None, unique_key_field=None, df
else:
self._create_non_solr_cloud_index(name, fields, unique_key_field, df)
-
def create_alias(self, name, collections):
return self.api.create_alias(name, collections)
-
def index(self, name, data, content_type='csv', version=None, **kwargs):
- """
+ r"""
e.g. Parameters: separator = ',', fieldnames = 'a,b,c', header=true, skip 'a,b', encapsulator="
escape=\, map, split, overwrite=true, rowid=id
"""
return self.api.update(name, data, content_type=content_type, version=version, **kwargs)
-
def exists(self, name):
try:
self.api.get_schema(name)
@@ -171,7 +163,6 @@ def exists(self, name):
LOG.info('Check if index %s existed failed: %s' % (name, e))
return False
-
def delete_index(self, name, keep_config=True):
if not self.is_solr_cloud_mode():
raise PopupException(_('Cannot remove non-Solr cloud cores.'))
@@ -193,34 +184,27 @@ def delete_index(self, name, keep_config=True):
self.api.add_collection(name)
raise PopupException(_('Error in deleting Solr configurations.'), detail=e)
else:
- if not 'Cannot unload non-existent core' in json.dumps(result):
+ if 'Cannot unload non-existent core' not in json.dumps(result):
raise PopupException(_('Could not remove collection: %(message)s') % result)
-
def sample_index(self, collection, rows=100):
return self.api.select(collection, rows=min(rows, 1000))
-
def get_config(self, collection):
return self.api.config(collection)
-
def list_configs(self):
return self.api.configs()
-
def list_schema(self, index_name):
return self.api.get_schema(index_name)
-
def delete_alias(self, name):
return self.api.delete_alias(name)
-
def update_config(self, name, properties):
return self.api.update_config(name, properties)
-
def is_solr_cloud_mode(self):
global _IS_SOLR_CLOUD
@@ -229,7 +213,6 @@ def is_solr_cloud_mode(self):
return _IS_SOLR_CLOUD
-
def is_solr_six_or_more(self):
global _IS_SOLR_6_OR_MORE
@@ -238,7 +221,6 @@ def is_solr_six_or_more(self):
return _IS_SOLR_6_OR_MORE
-
def is_solr_with_hdfs(self):
global _IS_SOLR_WITH_HDFS
@@ -247,7 +229,6 @@ def is_solr_with_hdfs(self):
return _IS_SOLR_WITH_HDFS
-
def is_sentry_protected(self):
global _IS_SENTRY_PROTECTED
@@ -256,7 +237,6 @@ def is_sentry_protected(self):
return _IS_SENTRY_PROTECTED
-
def get_zookeeper_host(self):
global _ZOOKEEPER_HOST
@@ -265,7 +245,6 @@ def get_zookeeper_host(self):
return _ZOOKEEPER_HOST
-
# Deprecated
def _create_cloud_config(self, name, fields, unique_key_field, df):
with ZookeeperClient(hosts=self.get_zookeeper_host(), read_only=False) as zc:
@@ -293,7 +272,6 @@ def _create_cloud_config(self, name, fields, unique_key_field, df):
finally:
shutil.rmtree(tmp_path)
-
# Deprecated
def _create_non_solr_cloud_index(self, name, fields, unique_key_field, df):
# Create instance directory locally.
@@ -316,7 +294,6 @@ def _create_non_solr_cloud_index(self, name, fields, unique_key_field, df):
finally:
shutil.rmtree(instancedir)
-
def _fillup_properties(self):
global _IS_SOLR_CLOUD
global _IS_SOLR_6_OR_MORE
@@ -340,14 +317,12 @@ def _fillup_properties(self):
if '-Dsolr.authorization.sentry.site' in command_line_arg:
_IS_SENTRY_PROTECTED = True
-
@staticmethod
def _port_field_types(field):
- if not field['type'].startswith('p'): # Check for automatically converting to new default Solr types
+ if not field['type'].startswith('p'): # Check for automatically converting to new default Solr types
field['type'] = field['type'].replace('long', 'plong').replace('double', 'pdouble').replace('date', 'pdate')
return field
-
@staticmethod
def _reset_properties():
global _IS_SOLR_CLOUD
@@ -358,7 +333,6 @@ def _reset_properties():
_IS_SOLR_CLOUD = _IS_SOLR_6_OR_MORE = _IS_SOLR_6_OR_MORE = _IS_SOLR_WITH_HDFS = _ZOOKEEPER_HOST = _IS_SENTRY_PROTECTED = None
-
# Used by morphline indexer
def get_index_schema(self, index_name):
try:
diff --git a/desktop/libs/indexer/src/indexer/test_utils.py b/desktop/libs/indexer/src/indexer/test_utils.py
index 32d7c711870..5176f0c1dab 100644
--- a/desktop/libs/indexer/src/indexer/test_utils.py
+++ b/desktop/libs/indexer/src/indexer/test_utils.py
@@ -16,21 +16,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-import sys
+from io import StringIO as string_io
from desktop.lib.i18n import force_unicode
-
from indexer.utils import field_values_from_separated_file
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
-else:
- from StringIO import StringIO as string_io
-
-
def test_get_ensemble():
# Non ascii
data = string_io('fieldA\nrel=""nofollow"">Twitter for Péché')
@@ -43,9 +34,7 @@ def test_get_ensemble():
# Bad binary
test_str = b'fieldA\naaa\x80\x02\x03'
- if sys.version_info[0] > 2:
- data = string_io(force_unicode(test_str, errors='ignore'))
- else:
- data = string_io(test_str)
+ data = string_io(force_unicode(test_str, errors='ignore'))
+
result = list(field_values_from_separated_file(data, delimiter='\t', quote_character='"'))
assert u'aaa\x02\x03' == result[0]['fieldA']
diff --git a/desktop/libs/indexer/src/indexer/tests.py b/desktop/libs/indexer/src/indexer/tests.py
index 579a1b158c2..c0831793c7a 100644
--- a/desktop/libs/indexer/src/indexer/tests.py
+++ b/desktop/libs/indexer/src/indexer/tests.py
@@ -15,28 +15,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import json
-import pytest
import sys
+import json
+from builtins import object
+from unittest.mock import Mock, patch
+import pytest
from django.urls import reverse
-from hadoop.pseudo_hdfs4 import is_live_cluster, get_db_prefix
-from libsolr import conf as libsolr_conf
-from libzookeeper import conf as libzookeeper_conf
+from desktop.lib.django_test_util import make_logged_in_client
+from desktop.lib.test_utils import add_to_group, grant_access
+from hadoop.pseudo_hdfs4 import get_db_prefix, is_live_cluster
from indexer.conf import get_solr_ensemble
from indexer.controller import CollectionManagerController
+from libsolr import conf as libsolr_conf
+from libzookeeper import conf as libzookeeper_conf
from useradmin.models import User
-from desktop.lib.django_test_util import make_logged_in_client
-from desktop.lib.test_utils import add_to_group, grant_access
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock
-else:
- from mock import patch, Mock
-
def test_get_ensemble():
clears = []
@@ -75,7 +70,7 @@ def test_input_formats_no_fs(self):
get_filesystem.return_value = None
resp = self.client.get(reverse('indexer:importer'))
- assert not b"{'value': 'file', 'name': 'Remote File'}" in resp.content
+ assert b"{'value': 'file', 'name': 'Remote File'}" not in resp.content
class TestIndexerWithSolr(object):
diff --git a/desktop/libs/indexer/src/indexer/urls.py b/desktop/libs/indexer/src/indexer/urls.py
index ac3ce529357..76a4c6bf46c 100644
--- a/desktop/libs/indexer/src/indexer/urls.py
+++ b/desktop/libs/indexer/src/indexer/urls.py
@@ -17,18 +17,11 @@
import sys
-from indexer import views as indexer_views
-from indexer import solr_api as indexer_solr_api
-from indexer import api3 as indexer_api3
-from indexer.indexers import rdbms as indexer_indexers_rdbms
-from indexer import api as indexer_api
+from django.urls import re_path
+from indexer import api as indexer_api, api3 as indexer_api3, solr_api as indexer_solr_api, views as indexer_views
from indexer.conf import ENABLE_NEW_INDEXER
-
-if sys.version_info[0] > 2:
- from django.urls import re_path
-else:
- from django.conf.urls import url as re_path
+from indexer.indexers import rdbms as indexer_indexers_rdbms
urlpatterns = [
re_path(r'^install_examples$', indexer_views.install_examples, name='install_examples'),
@@ -46,7 +39,7 @@
re_path(r'^$', indexer_views.indexes, name='indexes'),
re_path(r'^indexes/?$', indexer_views.indexes, name='indexes'),
re_path(r'^indexes/(?P
[^/]+)/?$', indexer_views.indexes, name='indexes'),
- re_path(r'^collections$', indexer_views.collections, name='collections'), # Old page
+ re_path(r'^collections$', indexer_views.collections, name='collections'), # Old page
]
else:
urlpatterns += [
@@ -78,12 +71,12 @@
re_path(r'^api/indexer/guess_format/?$', indexer_api3.guess_format, name='guess_format'),
re_path(r'^api/indexer/guess_field_types/?$', indexer_api3.guess_field_types, name='guess_field_types'),
re_path(r'^api/indexer/index/?$', indexer_api3.index, name='index'),
-
re_path(r'^api/importer/submit', indexer_api3.importer_submit, name='importer_submit'),
re_path(r'^api/importer/save/?$', indexer_api3.save_pipeline, name='save_pipeline'),
-
re_path(r'^api/indexer/upload_local_file/?$', indexer_api3.upload_local_file, name='upload_local_file'),
- re_path(r'^api/indexer/upload_local_file_drag_and_drop/?$', indexer_api3.upload_local_file_drag_and_drop, name='upload_local_file_drag_and_drop'),
+ re_path(
+ r'^api/indexer/upload_local_file_drag_and_drop/?$', indexer_api3.upload_local_file_drag_and_drop, name='upload_local_file_drag_and_drop'
+ ),
]
urlpatterns += [
diff --git a/desktop/libs/indexer/src/indexer/utils.py b/desktop/libs/indexer/src/indexer/utils.py
index 537d1ae6517..9aeab202eea 100644
--- a/desktop/libs/indexer/src/indexer/utils.py
+++ b/desktop/libs/indexer/src/indexer/utils.py
@@ -16,40 +16,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-from builtins import next
-from builtins import range
-from builtins import object
-import csv
-import logging
import os
-import pytz
import re
+import csv
+import uuid
import shutil
-import sys
+import logging
import tempfile
-import uuid
+from io import StringIO as string_io
+import pytz
from dateutil.parser import parse
-
from django.conf import settings
+from django.utils.translation import gettext as _
from desktop.lib.i18n import force_unicode, smart_str
-
from indexer import conf
-from indexer.models import DATE_FIELD_TYPES, TEXT_FIELD_TYPES, INTEGER_FIELD_TYPES, DECIMAL_FIELD_TYPES, BOOLEAN_FIELD_TYPES
-
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
- from StringIO import StringIO as string_io
+from indexer.models import BOOLEAN_FIELD_TYPES, DATE_FIELD_TYPES, DECIMAL_FIELD_TYPES, INTEGER_FIELD_TYPES, TEXT_FIELD_TYPES
LOG = logging.getLogger()
-TIMESTAMP_PATTERN = '\[([\w\d\s\-\/\:\+]*?)\]'
-FIELD_XML_TEMPLATE = ''
+TIMESTAMP_PATTERN = r'\[([\w\d\s\-\/\:\+]*?)\]'
+FIELD_XML_TEMPLATE = '' # noqa: E501
DEFAULT_FIELD = {
'name': None,
'type': 'text',
@@ -94,10 +81,14 @@ def __init__(self, xml):
self.xml = xml
def defaultField(self, df=None):
- self.xml = force_unicode(force_unicode(self.xml).replace(u'text', u'%s' % force_unicode(df) if df is not None else ''))
+ self.xml = force_unicode(
+ force_unicode(self.xml).replace('text', '%s' % force_unicode(df) if df is not None else '')
+ )
-def copy_configs(fields, unique_key_field, df, solr_cloud_mode=True, is_solr_six_or_more=False, is_solr_hdfs_mode=True, is_sentry_protected=False):
+def copy_configs(
+ fields, unique_key_field, df, solr_cloud_mode=True, is_solr_six_or_more=False, is_solr_hdfs_mode=True, is_sentry_protected=False
+):
# Create temporary copy of solr configs
tmp_path = tempfile.mkdtemp()
@@ -286,7 +277,8 @@ def field_values_from_separated_file(fh, delimiter, quote_character, fields=None
remove_keys = None
for row in reader:
- row = dict([(force_unicode(k), force_unicode(v, errors='ignore')) for k, v in row.items()]) # Get rid of invalid binary chars and convert to unicode from DictReader
+ # Get rid of invalid binary chars and convert to unicode from DictReader
+ row = dict([(force_unicode(k), force_unicode(v, errors='ignore')) for k, v in row.items()])
# Remove keys that aren't in collection
if remove_keys is None:
@@ -333,7 +325,7 @@ def field_values_from_separated_file(fh, delimiter, quote_character, fields=None
yield row
-def field_values_from_log(fh, fields=[ {'name': 'message', 'type': 'text_general'}, {'name': 'tdate', 'type': 'timestamp'} ]):
+def field_values_from_log(fh, fields=[{'name': 'message', 'type': 'text_general'}, {'name': 'tdate', 'type': 'timestamp'}]):
"""
Only timestamp and message
"""
@@ -345,12 +337,12 @@ def field_values_from_log(fh, fields=[ {'name': 'message', 'type': 'text_general
else:
try:
timestamp_key = next(iter([field for field in fields if field['type'] in DATE_FIELD_TYPES]))['name']
- except:
+ except Exception:
LOG.exception('failed to get timestamp key')
timestamp_key = None
try:
message_key = next(iter([field for field in fields if field['type'] in TEXT_FIELD_TYPES]))['name']
- except:
+ except Exception:
LOG.exception('failed to get message key')
message_key = None
@@ -370,7 +362,7 @@ def value_generator(buf):
last_newline = content.rfind('\n')
if last_newline > -1:
buf = content[:last_newline]
- content = content[last_newline+1:]
+ content = content[last_newline + 1:]
for row in value_generator(buf):
yield row
prev = fh.read()
diff --git a/desktop/libs/indexer/src/indexer/views.py b/desktop/libs/indexer/src/indexer/views.py
index 32ffd42dbd6..ec63dfb2630 100644
--- a/desktop/libs/indexer/src/indexer/views.py
+++ b/desktop/libs/indexer/src/indexer/views.py
@@ -15,25 +15,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-import json
import sys
+import json
+import logging
+
+from django.utils.translation import gettext as _
from desktop.lib.django_util import JsonResponse, render
from desktop.lib.exceptions_renderable import PopupException
from desktop.models import get_cluster_config
-
-from indexer.solr_client import SolrClient
from indexer.fields import FIELD_TYPES, Field
from indexer.file_format import get_file_indexable_format_types
-from indexer.management.commands import indexer_setup
from indexer.indexers.morphline_operations import OPERATORS
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from indexer.management.commands import indexer_setup
+from indexer.solr_client import SolrClient
LOG = logging.getLogger()
diff --git a/desktop/libs/kafka/src/kafka/conf.py b/desktop/libs/kafka/src/kafka/conf.py
index 339dce07872..9d5683c8efe 100644
--- a/desktop/libs/kafka/src/kafka/conf.py
+++ b/desktop/libs/kafka/src/kafka/conf.py
@@ -15,16 +15,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import sys
+import logging
-from desktop.lib.conf import Config, ConfigSection, coerce_bool
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t
-else:
- from django.utils.translation import ugettext_lazy as _t
+from django.utils.translation import gettext_lazy as _t
+from desktop.lib.conf import Config, ConfigSection, coerce_bool
LOG = logging.getLogger()
@@ -32,6 +28,7 @@
def has_kafka():
return KAFKA.IS_ENABLED.get()
+
def has_kafka_api():
return bool(KAFKA.API_URL.get())
diff --git a/desktop/libs/kafka/src/kafka/kafka_api.py b/desktop/libs/kafka/src/kafka/kafka_api.py
index 80b3122e411..4b63577f86f 100644
--- a/desktop/libs/kafka/src/kafka/kafka_api.py
+++ b/desktop/libs/kafka/src/kafka/kafka_api.py
@@ -16,23 +16,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import sys
import json
import logging
-import sys
+
+from django.utils.translation import gettext as _
from desktop.lib.django_util import JsonResponse
from desktop.lib.i18n import force_unicode
-from metadata.manager_client import ManagerApi
-from notebook.models import _get_notebook_api
-
from kafka.conf import has_kafka_api
from kafka.kafka_client import KafkaApi, KafkaApiException, SchemaRegistryApi
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from metadata.manager_client import ManagerApi
+from notebook.models import _get_notebook_api
LOG = logging.getLogger()
diff --git a/desktop/libs/kafka/src/kafka/kafka_client.py b/desktop/libs/kafka/src/kafka/kafka_client.py
index e06f3d2ea4c..80c12011c73 100644
--- a/desktop/libs/kafka/src/kafka/kafka_client.py
+++ b/desktop/libs/kafka/src/kafka/kafka_client.py
@@ -16,26 +16,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
-import json
import sys
-
+import json
+import logging
+from builtins import object
from subprocess import call
-from desktop.lib.rest.http_client import RestException, HttpClient
-from desktop.lib.rest.resource import Resource
-from desktop.lib.i18n import smart_unicode
+from django.utils.translation import gettext as _
+from desktop.lib.i18n import smart_str
+from desktop.lib.rest.http_client import HttpClient, RestException
+from desktop.lib.rest.resource import Resource
from kafka.conf import KAFKA
from libzookeeper.conf import zkensemble
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -47,7 +41,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
class KafkaApi(object):
@@ -62,7 +56,6 @@ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False):
self._client = HttpClient(self._api_url, logger=LOG)
self._root = Resource(self._client)
-
def topics(self):
try:
response = self._root.get('topics')
@@ -70,7 +63,6 @@ def topics(self):
except RestException as e:
raise KafkaApiException(e)
-
def create_topic(self, name, partitions=1, replication_factor=1):
# Create/delete topics are not available in the REST API.
# Here only works with hack if command is available on the Hue host.
@@ -99,7 +91,6 @@ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False):
self._client = HttpClient(self._api_url, logger=LOG)
self._root = Resource(self._client)
-
def subjects(self):
try:
response = self._root.get('subjects')
diff --git a/desktop/libs/kafka/src/kafka/ksql_client.py b/desktop/libs/kafka/src/kafka/ksql_client.py
index c29ae25eb3c..b8571ff3a38 100644
--- a/desktop/libs/kafka/src/kafka/ksql_client.py
+++ b/desktop/libs/kafka/src/kafka/ksql_client.py
@@ -16,21 +16,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
-import json
import sys
+import json
+import logging
+from builtins import object
from django.core.cache import cache
+from django.utils.translation import gettext as _
-from desktop.lib.i18n import smart_unicode
-from desktop.lib.rest.http_client import RestException
from desktop.conf import has_channels
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from desktop.lib.i18n import smart_str
+from desktop.lib.rest.http_client import RestException
if has_channels():
from notebook.consumer import _send_to_channel
@@ -47,7 +43,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
class KSqlApi(object):
@@ -72,7 +68,6 @@ def __init__(self, user=None, url=None, security_enabled=False, ssl_cert_ca_veri
self.client = client = KSQLAPI(self._api_url)
-
def show_tables(self):
try:
response = self.client.ksql('SHOW TABLES')
@@ -80,7 +75,6 @@ def show_tables(self):
except Exception as e:
raise KSqlApiException(e)
-
def show_topics(self):
try:
response = self.client.ksql('SHOW TOPICS')
@@ -88,7 +82,6 @@ def show_topics(self):
except Exception as e:
raise KSqlApiException(e)
-
def show_streams(self):
try:
response = self.client.ksql('SHOW STREAMS')
@@ -96,7 +89,6 @@ def show_streams(self):
except Exception as e:
raise KSqlApiException(e)
-
def get_columns(self, table):
try:
response = self.client.ksql('DESCRIBE %s' % table)
@@ -104,13 +96,11 @@ def get_columns(self, table):
except Exception as e:
raise KSqlApiException(e)
-
def ksql(self, statement):
response = self.client.ksql(statement)
LOG.debug('ksqlDB response: %s' % response)
return response[0] if response else {'@type': 'queries', 'queries': []} # INSERTs return empty currently
-
def query(self, statement, channel_name=None):
data = []
metadata = []
@@ -188,11 +178,9 @@ def query(self, statement, channel_name=None):
return data, metadata
-
def cancel(self, notebook, snippet):
return {'status': -1}
-
def _decode_result(self, result):
columns = []
data = []
diff --git a/desktop/libs/kafka/src/kafka/urls.py b/desktop/libs/kafka/src/kafka/urls.py
index 1a50146e52f..4551a86ed67 100644
--- a/desktop/libs/kafka/src/kafka/urls.py
+++ b/desktop/libs/kafka/src/kafka/urls.py
@@ -17,12 +17,9 @@
import sys
-from kafka import kafka_api as kafka_kafka_api
+from django.urls import re_path
-if sys.version_info[0] > 2:
- from django.urls import re_path
-else:
- from django.conf.urls import url as re_path
+from kafka import kafka_api as kafka_kafka_api
urlpatterns = [
re_path(r'^api/topics/list/$', kafka_kafka_api.list_topics, name='list_topics'),
diff --git a/desktop/libs/libanalyze/src/libanalyze/analyze_test.py b/desktop/libs/libanalyze/src/libanalyze/analyze_test.py
index 27e974becd5..4eff4338172 100644
--- a/desktop/libs/libanalyze/src/libanalyze/analyze_test.py
+++ b/desktop/libs/libanalyze/src/libanalyze/analyze_test.py
@@ -14,20 +14,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
+import os
+import time
+import pstats
+import logging
+import cProfile
from builtins import object
-import cProfile, logging, os, pstats, sys, time
-from libanalyze import analyze as a
-from libanalyze import rules
+from io import StringIO as string_io
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
-else:
- from cStringIO import StringIO as string_io
+from libanalyze import analyze as a, rules
LOG = logging.getLogger()
+
def ordered(obj):
if isinstance(obj, dict):
return sorted((k, ordered(v)) for k, v in list(obj.items()))
@@ -36,6 +35,7 @@ def ordered(obj):
else:
return obj
+
class AnalyzeTest(object):
def setup_method(self):
self.profile = a.analyze(
@@ -51,16 +51,16 @@ def test_get_top_reasons_json(self):
self.analyze.pre_process(self.profile)
result = self.analyze.run(self.profile)
assert len(result[0]['result']) == 67
- test = [{"result": [{"reason": [{"impact": 16798499570, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1841684634.666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "30", "wall_clock_time": 20683095270, "contribution_factor_str": "SQLOperator 30:AGGREGATION_NODE"}, {"reason": [{"impact": 16137425107, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1249201121.2222214, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "7", "wall_clock_time": 20022020807, "contribution_factor_str": "SQLOperator 07:AGGREGATION_NODE"}, {"reason": [{"impact": 15991669185, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1062368963.2222214, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "20", "wall_clock_time": 19681122971, "contribution_factor_str": "SQLOperator 20:AGGREGATION_NODE"}, {"reason": [{"impact": 538561025.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "17", "wall_clock_time": 6966953012, "contribution_factor_str": "SQLOperator 17:HASH_JOIN_NODE"}, {"reason": [{"impact": 874553885.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "29", "wall_clock_time": 6705756207, "contribution_factor_str": "SQLOperator 29:HASH_JOIN_NODE"}, {"reason": [{"impact": 496170372, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "27", "wall_clock_time": 6663793736, "contribution_factor_str": "SQLOperator 27:HASH_JOIN_NODE"}, {"reason": [{"impact": 467446848.55555534, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "4", "wall_clock_time": 6641201075, "contribution_factor_str": "SQLOperator 04:HASH_JOIN_NODE"}, {"reason": [{"impact": 503890745.8888893, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "6", "wall_clock_time": 6611505627, "contribution_factor_str": "SQLOperator 06:HASH_JOIN_NODE"}, {"reason": [{"impact": 634909229.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "19", "wall_clock_time": 6401734479, "contribution_factor_str": "SQLOperator 19:HASH_JOIN_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 2612825457, "contribution_factor_str": "RemoteFragmentsStarted -1:N/A"}, {"reason": [{"impact": 3672332795.524691, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1271091421, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 929179291.4444444, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "14", "wall_clock_time": 2320876241, "contribution_factor_str": "SQLOperator 14:HDFS_SCAN_NODE"}, {"reason": [{"impact": 165377262.44444442, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "5", "wall_clock_time": 2258327578, "contribution_factor_str": "SQLOperator 05:HASH_JOIN_NODE"}, {"reason": [{"impact": 174711179.44444442, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "28", "wall_clock_time": 2231494483, "contribution_factor_str": "SQLOperator 28:HASH_JOIN_NODE"}, {"reason": [{"impact": 4598206116.796875, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1261948355, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 836163684.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 49606693.93939389, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "1", "wall_clock_time": 2201407589, "contribution_factor_str": "SQLOperator 01:HDFS_SCAN_NODE"}, {"reason": [{"impact": 4407935855.252918, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1767671213, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 722860231, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "24", "wall_clock_time": 2193866884, "contribution_factor_str": "SQLOperator 24:HDFS_SCAN_NODE"}, {"reason": [{"impact": 96606459.11111116, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "18", "wall_clock_time": 2180207014, "contribution_factor_str": "SQLOperator 18:HASH_JOIN_NODE"}, {"reason": [{"impact": 1111759224.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 0", "wall_clock_time": 1250729128, "contribution_factor_str": "SQLOperator F04 0:CodeGen"}, {"reason": [{"impact": 193415667.33333337, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 1", "wall_clock_time": 1201795461, "contribution_factor_str": "SQLOperator F04 1:CodeGen"}, {"reason": [{"impact": 92531774.55555558, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F00 0", "wall_clock_time": 1062080747, "contribution_factor_str": "SQLOperator F00 0:CodeGen"}, {"reason": [{"impact": 118700210.11111116, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F05 0", "wall_clock_time": 1009980856, "contribution_factor_str": "SQLOperator F05 0:CodeGen"}, {"reason": [{"impact": 132909682.88888884, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F09 0", "wall_clock_time": 950194410, "contribution_factor_str": "SQLOperator F09 0:CodeGen"}, {"reason": [{"impact": 95305427.33333337, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F10 0", "wall_clock_time": 878960263, "contribution_factor_str": "SQLOperator F10 0:CodeGen"}, {"reason": [{"impact": 46199805, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F14 0", "wall_clock_time": 769058113, "contribution_factor_str": "SQLOperator F14 0:CodeGen"}, {"reason": [], "result_id": -1, "wall_clock_time": 613452579, "contribution_factor_str": "PlanningTime -1:N/A"}, {"reason": [{"impact": 306772810, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 42519756.55555558, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "45", "wall_clock_time": 319264610, "contribution_factor_str": "SQLOperator 45:AGGREGATION_NODE"}, {"reason": [{"impact": 297637309, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 29017600.555555582, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "51", "wall_clock_time": 309567409, "contribution_factor_str": "SQLOperator 51:AGGREGATION_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 107247619, "contribution_factor_str": "ClientFetchWaitTimer -1:N/A"}, {"reason": [{"impact": 97484030, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 36347752, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "31", "wall_clock_time": 98861130, "contribution_factor_str": "SQLOperator 31:SORT_NODE"}, {"reason": [{"impact": 67982884, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 7664156.555555552, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "40", "wall_clock_time": 80474684, "contribution_factor_str": "SQLOperator 40:AGGREGATION_NODE"}, {"reason": [{"impact": 32130961.111111112, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "12", "wall_clock_time": 71088072, "contribution_factor_str": "SQLOperator 12:SELECT_NODE"}, {"reason": [{"impact": 58733676, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 5766554.333333336, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "8", "wall_clock_time": 60080276, "contribution_factor_str": "SQLOperator 08:SORT_NODE"}, {"reason": [{"impact": 57966057, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 4243951.444444448, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "21", "wall_clock_time": 59294857, "contribution_factor_str": "SQLOperator 21:SORT_NODE"}, {"reason": [{"impact": 47950535, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 37688100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "15", "wall_clock_time": 47950535, "contribution_factor_str": "SQLOperator 15:HDFS_SCAN_NODE"}, {"reason": [{"impact": 17818123.666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "52", "wall_clock_time": 44603227, "contribution_factor_str": "SQLOperator 52:EXCHANGE_NODE"}, {"reason": [{"impact": 9621600, "name": "Wrong join strategy", "fix": {"fixable": False}, "message": "RHS 121390; LHS 105174", "unit": 5}, {"impact": 4113826, "name": "Slow Hash Join", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the hash join", "unit": 5}, {"impact": 2924865.666666664, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "34", "wall_clock_time": 43779812, "contribution_factor_str": "SQLOperator 34:HASH_JOIN_NODE"}, {"reason": [{"impact": 14784147, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "32", "wall_clock_time": 42111797, "contribution_factor_str": "SQLOperator 32:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 39518015, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 29689100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "2", "wall_clock_time": 39518015, "contribution_factor_str": "SQLOperator 02:HDFS_SCAN_NODE"}, {"reason": [{"impact": 20851584.222222224, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "46", "wall_clock_time": 38647270, "contribution_factor_str": "SQLOperator 46:EXCHANGE_NODE"}, {"reason": [{"impact": 8035800, "name": "Wrong join strategy", "fix": {"fixable": False}, "message": "RHS 105576; LHS 121383", "unit": 5}, {"impact": 3816722, "name": "Slow Hash Join", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the hash join", "unit": 5}, {"impact": 1904130.4444444478, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "33", "wall_clock_time": 37364443, "contribution_factor_str": "SQLOperator 33:HASH_JOIN_NODE"}, {"reason": [{"impact": 31174821, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 1894590, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "10", "wall_clock_time": 32551921, "contribution_factor_str": "SQLOperator 10:SORT_NODE"}, {"reason": [{"impact": 26659473.75, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 20690100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "25", "wall_clock_time": 30467970, "contribution_factor_str": "SQLOperator 25:HDFS_SCAN_NODE"}, {"reason": [{"impact": 7084883.444444444, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "11", "wall_clock_time": 28336314, "contribution_factor_str": "SQLOperator 11:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2135688.222222224, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "9", "wall_clock_time": 22614443, "contribution_factor_str": "SQLOperator 09:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 1150084.666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "22", "wall_clock_time": 22144125, "contribution_factor_str": "SQLOperator 22:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2047632, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "39", "wall_clock_time": 11957699, "contribution_factor_str": "SQLOperator 39:EXCHANGE_NODE"}, {"reason": [{"impact": 1332451, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "44", "wall_clock_time": 11506235, "contribution_factor_str": "SQLOperator 44:EXCHANGE_NODE"}, {"reason": [{"impact": 728588, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "50", "wall_clock_time": 10172630, "contribution_factor_str": "SQLOperator 50:EXCHANGE_NODE"}, {"reason": [{"impact": 3334413, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 1199000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "0", "wall_clock_time": 3334413, "contribution_factor_str": "SQLOperator 00:HDFS_SCAN_NODE"}, {"reason": [], "result_id": "53", "wall_clock_time": 3082111, "contribution_factor_str": "SQLOperator 53:EXCHANGE_NODE"}, {"reason": [{"impact": 2594847, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 1199000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "23", "wall_clock_time": 2594847, "contribution_factor_str": "SQLOperator 23:HDFS_SCAN_NODE"}, {"reason": [{"impact": 2452312, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 2198000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "13", "wall_clock_time": 2452312, "contribution_factor_str": "SQLOperator 13:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1706125, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 287883, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "16", "wall_clock_time": 1706125, "contribution_factor_str": "SQLOperator 16:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1619889, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 601555, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "3", "wall_clock_time": 1619889, "contribution_factor_str": "SQLOperator 03:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1385497, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 181359, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "26", "wall_clock_time": 1385497, "contribution_factor_str": "SQLOperator 26:HDFS_SCAN_NODE"}, {"reason": [{"impact": 559177.1111111111, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "43", "wall_clock_time": 1378341, "contribution_factor_str": "SQLOperator 43:EXCHANGE_NODE"}, {"reason": [{"impact": 362490.3333333334, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "38", "wall_clock_time": 1291643, "contribution_factor_str": "SQLOperator 38:EXCHANGE_NODE"}, {"reason": [{"impact": 265681, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "49", "wall_clock_time": 1177394, "contribution_factor_str": "SQLOperator 49:EXCHANGE_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 775849, "contribution_factor_str": "RowMaterializationTimer -1:N/A"}, {"reason": [{"impact": 235417.66666666666, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "48", "wall_clock_time": 279531, "contribution_factor_str": "SQLOperator 48:EXCHANGE_NODE"}, {"reason": [{"impact": 10539.11111111111, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "36", "wall_clock_time": 31603, "contribution_factor_str": "SQLOperator 36:EXCHANGE_NODE"}, {"reason": [{"impact": 8916.666666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "47", "wall_clock_time": 29729, "contribution_factor_str": "SQLOperator 47:EXCHANGE_NODE"}, {"reason": [{"impact": 8002.1111111111095, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "41", "wall_clock_time": 29716, "contribution_factor_str": "SQLOperator 41:EXCHANGE_NODE"}, {"reason": [{"impact": 1725.1111111111113, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "42", "wall_clock_time": 16432, "contribution_factor_str": "SQLOperator 42:EXCHANGE_NODE"}, {"reason": [{"impact": 791.1111111111113, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "37", "wall_clock_time": 14808, "contribution_factor_str": "SQLOperator 37:EXCHANGE_NODE"}, {"reason": [], "result_id": "35", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator 35:SORT_NODE"}, {"reason": [{"impact": 1111759224.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F04 0:BlockMgr"}, {"reason": [], "result_id": "F15 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F15 0:BlockMgr"}], "rule": {"message": "Top contributing factors and its reasons", "prio": 1, "label": "Top Down Analysis"}, "template": "alan-tpl"}]
+ test = [{"result": [{"reason": [{"impact": 16798499570, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1841684634.666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "30", "wall_clock_time": 20683095270, "contribution_factor_str": "SQLOperator 30:AGGREGATION_NODE"}, {"reason": [{"impact": 16137425107, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1249201121.2222214, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "7", "wall_clock_time": 20022020807, "contribution_factor_str": "SQLOperator 07:AGGREGATION_NODE"}, {"reason": [{"impact": 15991669185, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1062368963.2222214, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "20", "wall_clock_time": 19681122971, "contribution_factor_str": "SQLOperator 20:AGGREGATION_NODE"}, {"reason": [{"impact": 538561025.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "17", "wall_clock_time": 6966953012, "contribution_factor_str": "SQLOperator 17:HASH_JOIN_NODE"}, {"reason": [{"impact": 874553885.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "29", "wall_clock_time": 6705756207, "contribution_factor_str": "SQLOperator 29:HASH_JOIN_NODE"}, {"reason": [{"impact": 496170372, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "27", "wall_clock_time": 6663793736, "contribution_factor_str": "SQLOperator 27:HASH_JOIN_NODE"}, {"reason": [{"impact": 467446848.55555534, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "4", "wall_clock_time": 6641201075, "contribution_factor_str": "SQLOperator 04:HASH_JOIN_NODE"}, {"reason": [{"impact": 503890745.8888893, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "6", "wall_clock_time": 6611505627, "contribution_factor_str": "SQLOperator 06:HASH_JOIN_NODE"}, {"reason": [{"impact": 634909229.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "19", "wall_clock_time": 6401734479, "contribution_factor_str": "SQLOperator 19:HASH_JOIN_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 2612825457, "contribution_factor_str": "RemoteFragmentsStarted -1:N/A"}, {"reason": [{"impact": 3672332795.524691, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1271091421, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 929179291.4444444, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "14", "wall_clock_time": 2320876241, "contribution_factor_str": "SQLOperator 14:HDFS_SCAN_NODE"}, {"reason": [{"impact": 165377262.44444442, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "5", "wall_clock_time": 2258327578, "contribution_factor_str": "SQLOperator 05:HASH_JOIN_NODE"}, {"reason": [{"impact": 174711179.44444442, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "28", "wall_clock_time": 2231494483, "contribution_factor_str": "SQLOperator 28:HASH_JOIN_NODE"}, {"reason": [{"impact": 4598206116.796875, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1261948355, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 836163684.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 49606693.93939389, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "1", "wall_clock_time": 2201407589, "contribution_factor_str": "SQLOperator 01:HDFS_SCAN_NODE"}, {"reason": [{"impact": 4407935855.252918, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1767671213, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 722860231, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "24", "wall_clock_time": 2193866884, "contribution_factor_str": "SQLOperator 24:HDFS_SCAN_NODE"}, {"reason": [{"impact": 96606459.11111116, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "18", "wall_clock_time": 2180207014, "contribution_factor_str": "SQLOperator 18:HASH_JOIN_NODE"}, {"reason": [{"impact": 1111759224.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 0", "wall_clock_time": 1250729128, "contribution_factor_str": "SQLOperator F04 0:CodeGen"}, {"reason": [{"impact": 193415667.33333337, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 1", "wall_clock_time": 1201795461, "contribution_factor_str": "SQLOperator F04 1:CodeGen"}, {"reason": [{"impact": 92531774.55555558, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F00 0", "wall_clock_time": 1062080747, "contribution_factor_str": "SQLOperator F00 0:CodeGen"}, {"reason": [{"impact": 118700210.11111116, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F05 0", "wall_clock_time": 1009980856, "contribution_factor_str": "SQLOperator F05 0:CodeGen"}, {"reason": [{"impact": 132909682.88888884, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F09 0", "wall_clock_time": 950194410, "contribution_factor_str": "SQLOperator F09 0:CodeGen"}, {"reason": [{"impact": 95305427.33333337, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F10 0", "wall_clock_time": 878960263, "contribution_factor_str": "SQLOperator F10 0:CodeGen"}, {"reason": [{"impact": 46199805, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F14 0", "wall_clock_time": 769058113, "contribution_factor_str": "SQLOperator F14 0:CodeGen"}, {"reason": [], "result_id": -1, "wall_clock_time": 613452579, "contribution_factor_str": "PlanningTime -1:N/A"}, {"reason": [{"impact": 306772810, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 42519756.55555558, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "45", "wall_clock_time": 319264610, "contribution_factor_str": "SQLOperator 45:AGGREGATION_NODE"}, {"reason": [{"impact": 297637309, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 29017600.555555582, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "51", "wall_clock_time": 309567409, "contribution_factor_str": "SQLOperator 51:AGGREGATION_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 107247619, "contribution_factor_str": "ClientFetchWaitTimer -1:N/A"}, {"reason": [{"impact": 97484030, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 36347752, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "31", "wall_clock_time": 98861130, "contribution_factor_str": "SQLOperator 31:SORT_NODE"}, {"reason": [{"impact": 67982884, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 7664156.555555552, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "40", "wall_clock_time": 80474684, "contribution_factor_str": "SQLOperator 40:AGGREGATION_NODE"}, {"reason": [{"impact": 32130961.111111112, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "12", "wall_clock_time": 71088072, "contribution_factor_str": "SQLOperator 12:SELECT_NODE"}, {"reason": [{"impact": 58733676, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 5766554.333333336, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "8", "wall_clock_time": 60080276, "contribution_factor_str": "SQLOperator 08:SORT_NODE"}, {"reason": [{"impact": 57966057, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 4243951.444444448, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "21", "wall_clock_time": 59294857, "contribution_factor_str": "SQLOperator 21:SORT_NODE"}, {"reason": [{"impact": 47950535, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 37688100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "15", "wall_clock_time": 47950535, "contribution_factor_str": "SQLOperator 15:HDFS_SCAN_NODE"}, {"reason": [{"impact": 17818123.666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "52", "wall_clock_time": 44603227, "contribution_factor_str": "SQLOperator 52:EXCHANGE_NODE"}, {"reason": [{"impact": 9621600, "name": "Wrong join strategy", "fix": {"fixable": False}, "message": "RHS 121390; LHS 105174", "unit": 5}, {"impact": 4113826, "name": "Slow Hash Join", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the hash join", "unit": 5}, {"impact": 2924865.666666664, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "34", "wall_clock_time": 43779812, "contribution_factor_str": "SQLOperator 34:HASH_JOIN_NODE"}, {"reason": [{"impact": 14784147, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "32", "wall_clock_time": 42111797, "contribution_factor_str": "SQLOperator 32:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 39518015, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 29689100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "2", "wall_clock_time": 39518015, "contribution_factor_str": "SQLOperator 02:HDFS_SCAN_NODE"}, {"reason": [{"impact": 20851584.222222224, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "46", "wall_clock_time": 38647270, "contribution_factor_str": "SQLOperator 46:EXCHANGE_NODE"}, {"reason": [{"impact": 8035800, "name": "Wrong join strategy", "fix": {"fixable": False}, "message": "RHS 105576; LHS 121383", "unit": 5}, {"impact": 3816722, "name": "Slow Hash Join", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the hash join", "unit": 5}, {"impact": 1904130.4444444478, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "33", "wall_clock_time": 37364443, "contribution_factor_str": "SQLOperator 33:HASH_JOIN_NODE"}, {"reason": [{"impact": 31174821, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 1894590, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "10", "wall_clock_time": 32551921, "contribution_factor_str": "SQLOperator 10:SORT_NODE"}, {"reason": [{"impact": 26659473.75, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 20690100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "25", "wall_clock_time": 30467970, "contribution_factor_str": "SQLOperator 25:HDFS_SCAN_NODE"}, {"reason": [{"impact": 7084883.444444444, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "11", "wall_clock_time": 28336314, "contribution_factor_str": "SQLOperator 11:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2135688.222222224, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "9", "wall_clock_time": 22614443, "contribution_factor_str": "SQLOperator 09:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 1150084.666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "22", "wall_clock_time": 22144125, "contribution_factor_str": "SQLOperator 22:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2047632, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "39", "wall_clock_time": 11957699, "contribution_factor_str": "SQLOperator 39:EXCHANGE_NODE"}, {"reason": [{"impact": 1332451, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "44", "wall_clock_time": 11506235, "contribution_factor_str": "SQLOperator 44:EXCHANGE_NODE"}, {"reason": [{"impact": 728588, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "50", "wall_clock_time": 10172630, "contribution_factor_str": "SQLOperator 50:EXCHANGE_NODE"}, {"reason": [{"impact": 3334413, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 1199000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "0", "wall_clock_time": 3334413, "contribution_factor_str": "SQLOperator 00:HDFS_SCAN_NODE"}, {"reason": [], "result_id": "53", "wall_clock_time": 3082111, "contribution_factor_str": "SQLOperator 53:EXCHANGE_NODE"}, {"reason": [{"impact": 2594847, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 1199000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "23", "wall_clock_time": 2594847, "contribution_factor_str": "SQLOperator 23:HDFS_SCAN_NODE"}, {"reason": [{"impact": 2452312, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 2198000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "13", "wall_clock_time": 2452312, "contribution_factor_str": "SQLOperator 13:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1706125, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 287883, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "16", "wall_clock_time": 1706125, "contribution_factor_str": "SQLOperator 16:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1619889, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 601555, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "3", "wall_clock_time": 1619889, "contribution_factor_str": "SQLOperator 03:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1385497, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 181359, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "26", "wall_clock_time": 1385497, "contribution_factor_str": "SQLOperator 26:HDFS_SCAN_NODE"}, {"reason": [{"impact": 559177.1111111111, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "43", "wall_clock_time": 1378341, "contribution_factor_str": "SQLOperator 43:EXCHANGE_NODE"}, {"reason": [{"impact": 362490.3333333334, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "38", "wall_clock_time": 1291643, "contribution_factor_str": "SQLOperator 38:EXCHANGE_NODE"}, {"reason": [{"impact": 265681, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "49", "wall_clock_time": 1177394, "contribution_factor_str": "SQLOperator 49:EXCHANGE_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 775849, "contribution_factor_str": "RowMaterializationTimer -1:N/A"}, {"reason": [{"impact": 235417.66666666666, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "48", "wall_clock_time": 279531, "contribution_factor_str": "SQLOperator 48:EXCHANGE_NODE"}, {"reason": [{"impact": 10539.11111111111, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "36", "wall_clock_time": 31603, "contribution_factor_str": "SQLOperator 36:EXCHANGE_NODE"}, {"reason": [{"impact": 8916.666666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "47", "wall_clock_time": 29729, "contribution_factor_str": "SQLOperator 47:EXCHANGE_NODE"}, {"reason": [{"impact": 8002.1111111111095, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "41", "wall_clock_time": 29716, "contribution_factor_str": "SQLOperator 41:EXCHANGE_NODE"}, {"reason": [{"impact": 1725.1111111111113, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "42", "wall_clock_time": 16432, "contribution_factor_str": "SQLOperator 42:EXCHANGE_NODE"}, {"reason": [{"impact": 791.1111111111113, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "37", "wall_clock_time": 14808, "contribution_factor_str": "SQLOperator 37:EXCHANGE_NODE"}, {"reason": [], "result_id": "35", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator 35:SORT_NODE"}, {"reason": [{"impact": 1111759224.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F04 0:BlockMgr"}, {"reason": [], "result_id": "F15 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F15 0:BlockMgr"}], "rule": {"message": "Top contributing factors and its reasons", "prio": 1, "label": "Top Down Analysis"}, "template": "alan-tpl"}] # noqa: E501
assert ordered(result) == ordered(test)
def test_performance(self):
pr = cProfile.Profile()
pr.enable()
- ts1 = time.time()*1000.0
+ ts1 = time.time() * 1000.0
self.analyze.pre_process(self.profile)
result = self.analyze.run(self.profile)
- ts2 = time.time()*1000.0
+ ts2 = time.time() * 1000.0
dts = ts2 - ts1
pr.disable()
s = string_io()
@@ -68,4 +68,4 @@ def test_performance(self):
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
LOG.info(s.getvalue())
- assert dts <= 1000
\ No newline at end of file
+ assert dts <= 1000
diff --git a/desktop/libs/libanalyze/src/libanalyze/rules.py b/desktop/libs/libanalyze/src/libanalyze/rules.py
index cbb8c3d47b8..189fdc77af5 100644
--- a/desktop/libs/libanalyze/src/libanalyze/rules.py
+++ b/desktop/libs/libanalyze/src/libanalyze/rules.py
@@ -14,42 +14,30 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import division
-from builtins import zip
-from builtins import range
-from builtins import object
-from functools import reduce
+
+import os
+import re
import copy
import glob
import json
-import logging
import math
-import os
-import re
-import types
-import sys
import struct
+import logging
+from functools import reduce
+from itertools import groupby
from dateutil.parser import parse as dtparse
-from itertools import groupby
-from libanalyze import models
-from libanalyze import exprs
-from libanalyze import utils
+from libanalyze import exprs, models, utils
from libanalyze.utils import Timer
-if sys.version_info[0] > 2:
- string_types = str
-else:
- string_types = types.StringTypes
-
-
LOG = logging.getLogger()
def to_double(metric_value):
return struct.unpack('d', struct.pack('q', metric_value))[0]
+
class ProfileContext(object):
"""This is the main wrapper around the runtime profile tree. Main accessor
methods are implemented here."""
@@ -70,7 +58,7 @@ class SQLOperatorReason(object):
def __init__(self, node_name, metric_names,
rule, exprs=[], to_json=True, **kwargs):
self.node_name = node_name
- if isinstance(metric_names, string_types):
+ if isinstance(metric_names, str):
self.metric_names = [metric_names]
else:
self.metric_names = metric_names
@@ -117,8 +105,8 @@ def getNumInputRows(self, node):
if nodeType == 'HdfsTableSink':
return node.find_metric_by_name('RowsInserted')[0]['value']
- metrics = reduce(lambda x,y: x + y.find_metric_by_name('RowsReturned'), node.children, [])
- return reduce(lambda x,y: x + y['value'], metrics, 0)
+ metrics = reduce(lambda x, y: x + y.find_metric_by_name('RowsReturned'), node.children, [])
+ return reduce(lambda x, y: x + y['value'], metrics, 0)
def evaluate(self, profile, plan_node_id):
"""
@@ -198,6 +186,7 @@ def check_exprs(self, group):
[g.value for g in group]))
return result
+
class SummaryReason(SQLOperatorReason):
def evaluate(self, profile, plan_node_id):
@@ -257,9 +246,10 @@ def evaluate(self, profile, plan_node_id):
"label": self.rule["label"]
}
+
class JoinOrderStrategyCheck(SQLOperatorReason):
def __init__(self):
- self.kwargs = {'fix': { 'fixable': False }, 'unit': 5}
+ self.kwargs = {'fix': {'fixable': False}, 'unit': 5}
def evaluate(self, profile, plan_node_id):
"""
@@ -308,9 +298,10 @@ def evaluate(self, profile, plan_node_id):
"label": "Wrong join strategy"
}
+
class ExplodingJoinCheck(SQLOperatorReason):
def __init__(self):
- self.kwargs = {'fix': { 'fixable': False }, 'unit': 5}
+ self.kwargs = {'fix': {'fixable': False}, 'unit': 5}
def evaluate(self, profile, plan_node_id):
"""
@@ -338,9 +329,10 @@ def evaluate(self, profile, plan_node_id):
"label": "Exploding join"
}
+
class NNRpcCheck(SQLOperatorReason):
def __init__(self):
- self.kwargs = {'fix': { 'fixable': False }, 'unit': 5}
+ self.kwargs = {'fix': {'fixable': False}, 'unit': 5}
def evaluate(self, profile, plan_node_id):
"""
@@ -363,6 +355,7 @@ def evaluate(self, profile, plan_node_id):
"label": "HDFS NN RPC"
}
+
class TopDownAnalysis(object):
def __init__(self):
@@ -381,10 +374,10 @@ def __init__(self):
nodes = [node_names]
if type == 'SQLOperator':
for node in nodes:
- self.sqlOperatorReasons.setdefault(node,[])\
+ self.sqlOperatorReasons.setdefault(node, [])\
.append(SQLOperatorReason(**json_object))
else:
- self.sqlOperatorReasons.setdefault(type,[])\
+ self.sqlOperatorReasons.setdefault(type, [])\
.append(SummaryReason(**json_object))
# Manually append specially coded reaason
@@ -428,7 +421,6 @@ def getTopReasons(self, contributor):
"""
return sorted(contributor.reason, key=lambda x: x.impact, reverse=True) if contributor.reason else contributor.reason
-
def createContributors(self, profile):
""" Return the models.Contributor objects. Contributor can be planning time,
admission control wait time, query fragment distribution time, SQL operator, DML
@@ -438,10 +430,10 @@ def createContributors(self, profile):
persisted in the database.
"""
execution_profile = profile.find_by_name('Execution Profile')
- #summary = _profile.find_by_name("Summary")
+ # summary = _profile.find_by_name("Summary")
counter_map = profile.find_by_name('Summary').counter_map()
counter_map.update(profile.find_by_name("ImpalaServer").counter_map())
- #counter_map = summary.counter_map()
+ # counter_map = summary.counter_map()
# list of non-SQL operator contributor
# TODO: add admission control, DML Metastore update; profile does not have it yet.
@@ -453,14 +445,14 @@ def createContributors(self, profile):
contributor = models.Contributor(type=metric,
wall_clock_time=counter_map[metric].value,
plan_node_id=-1, plan_node_name="N/A")
- #models.db.session.add(contributor)
+ # models.db.session.add(contributor)
contributors += [contributor]
if self.isDebugBuilt(profile):
contributor = models.Contributor(type="Debug Built",
wall_clock_time=9999999999999999,
plan_node_id=-1, plan_node_name="N/A")
- #models.db.session.add(contributor)
+ # models.db.session.add(contributor)
contributors += [contributor]
# Get the top N contributor from query execution
@@ -468,9 +460,9 @@ def createContributors(self, profile):
# Get the plan node execution time
# Note: ignore DataStreamSender because its metrics is useless
nodes = execution_profile.find_all_non_fragment_nodes()
- nodes = [x for x in nodes if x.fragment and x.fragment.is_averaged() == False]
+ nodes = [x for x in nodes if x.fragment and x.fragment.is_averaged() is False]
nodes = [x for x in nodes if x.name() != 'DataStreamSender']
- metrics = reduce(lambda x,y: x + y.find_metric_by_name('LocalTime'), nodes, [])
+ metrics = reduce(lambda x, y: x + y.find_metric_by_name('LocalTime'), nodes, [])
metrics = sorted(metrics, key=lambda x: (x['node'].id(), x['node'].name()))
for k, g in groupby(metrics, lambda x: (x['node'].id(), x['node'].name())):
grouped = list(g)
@@ -481,7 +473,6 @@ def createContributors(self, profile):
plan_node_id=grouped[0]['node'].id(), plan_node_name=grouped[0]['node'].name())
contributors += [contributor]
-
# Sort execTime based on wall_clock_time and cut it off at limit
contributors = sorted(contributors, key=lambda x: x.wall_clock_time, reverse=True)
return contributors
@@ -493,7 +484,7 @@ def createExecSqlNodeReason(self, contributor, profile):
The result will be in the form of
"""
reasons = []
- self.sqlOperatorReasons.setdefault(contributor.plan_node_name,[])
+ self.sqlOperatorReasons.setdefault(contributor.plan_node_name, [])
for cause in self.sqlOperatorReasons[contributor.plan_node_name] + self.sqlOperatorReasons["ANY"]:
evaluation = cause.evaluate(profile, contributor.plan_node_id)
impact = evaluation["impact"]
@@ -515,7 +506,7 @@ def createExecNodeReason(self, contributor, profile):
The result will be in the form of
"""
reasons = []
- self.sqlOperatorReasons.setdefault(contributor.type,[])
+ self.sqlOperatorReasons.setdefault(contributor.type, [])
for cause in self.sqlOperatorReasons[contributor.type]:
evaluation = cause.evaluate(profile, contributor.plan_node_id)
impact = evaluation["impact"]
@@ -606,7 +597,7 @@ def add_host(node, exec_summary_json=exec_summary_json):
is_plan_node = node.is_plan_node()
node_id = node.id()
nid = int(node_id) if node_id and node.is_regular() else -1
- # Setup Hosts & Broadcast
+ # Setup Hosts & Broadcast
if node_id and node.is_regular() and nid in exec_summary_json:
exec_summary_node = exec_summary_json.get(nid, {})
node.val.counters.append(models.TCounter(name='Hosts', value=exec_summary_node.get('hosts', ''), unit=0))
@@ -665,7 +656,7 @@ def add_host(node, exec_summary_json=exec_summary_json):
grouping_aggregator = node.find_by_name('GroupingAggregator')
if grouping_aggregator and grouping_aggregator.counter_map().get('SpilledPartitions', models.TCounter(value=0)).value > 0:
has_spilled = True
- elif is_plan_node and node_name == 'HASH_JOIN_NODE': # For Hash Join, if the "LocalTime" metrics
+ elif is_plan_node and node_name == 'HASH_JOIN_NODE': # For Hash Join, if the "LocalTime" metrics
hash_join_builder = node.find_by_name('Hash Join Builder')
if hash_join_builder and hash_join_builder.counter_map().get('SpilledPartitions', models.TCounter(value=0)).value > 0:
has_spilled = True
@@ -688,6 +679,7 @@ def add_host(node, exec_summary_json=exec_summary_json):
node.val.counters.append(models.TCounter(name='ChildTime', value=child_time, unit=5))
nodes = {}
+
def create_map(node, nodes=nodes):
nid = node.id()
if nid:
@@ -708,21 +700,21 @@ def run(self, profile):
if self.isDebugBuilt(profile):
topContributions += [{
- "result_id" : result_id,
- "contribution_factor_str" : "Using Debug Built",
- "wall_clock_time" : 9999,
- "reason" : []
+ "result_id": result_id,
+ "contribution_factor_str": "Using Debug Built",
+ "wall_clock_time": 9999,
+ "reason": []
}]
for contributor in topContributors:
reasons = self.getTopReasons(contributor)
topContributions += [{
- "result_id" : contributor.plan_node_id if contributor.plan_node_id != -1 else -1,
- "contribution_factor_str" : contributor.type + " " +
+ "result_id": contributor.plan_node_id if contributor.plan_node_id != -1 else -1,
+ "contribution_factor_str": contributor.type + " " +
str(contributor.plan_node_id).zfill(2) +
":" + contributor.plan_node_name,
- "wall_clock_time" : contributor.wall_clock_time,
- "reason" : [reason.__dict__ for reason in reasons]
+ "wall_clock_time": contributor.wall_clock_time,
+ "reason": [reason.__dict__ for reason in reasons]
}]
result = []
diff --git a/desktop/libs/liboauth/src/liboauth/backend.py b/desktop/libs/liboauth/src/liboauth/backend.py
index fdc599ba02a..5b01644d912 100644
--- a/desktop/libs/liboauth/src/liboauth/backend.py
+++ b/desktop/libs/liboauth/src/liboauth/backend.py
@@ -18,39 +18,33 @@
See desktop/auth/backend.py
"""
-from future import standard_library
-standard_library.install_aliases()
-import json
import cgi
-import logging
import sys
+import json
+import logging
+from urllib.parse import urlencode as lib_urlencode
-LOG = logging.getLogger()
-
-from desktop.auth.backend import force_username_case, DesktopBackendBase
-
-from useradmin.models import get_profile, get_default_user_group, UserProfile, User
+from django.utils.translation import gettext as _
import liboauth.conf
import liboauth.metrics
+from desktop.auth.backend import DesktopBackendBase, force_username_case
+from useradmin.models import User, UserProfile, get_default_user_group, get_profile
+
+LOG = logging.getLogger()
try:
import httplib2
except ImportError:
LOG.warning('httplib2 module not found')
+
+
try:
import oauth2 as oauth
except ImportError:
LOG.warning('oauth2 module not found')
oauth = None
-if sys.version_info[0] > 2:
- from urllib.parse import urlencode as lib_urlencode
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
- from urllib import urlencode as lib_urlencode
-
class OAuthBackend(DesktopBackendBase):
@@ -89,7 +83,6 @@ def authenticate(self, access_token):
return user
-
@classmethod
def manages_passwords_externally(cls):
return True
@@ -99,21 +92,20 @@ def is_first_login_ever(cls):
""" Return true if no external user has ever logged in to Desktop yet. """
return not UserProfile.objects.filter(creation_method=UserProfile.CreationMethod.EXTERNAL.name).exists()
-
@classmethod
def handleAuthenticationRequest(cls, request):
assert oauth is not None
if 'oauth_verifier' in request.GET:
social = 'twitter'
- consumer_key=liboauth.conf.CONSUMER_KEY_TWITTER.get()
- consumer_secret=liboauth.conf.CONSUMER_SECRET_TWITTER.get()
- access_token_uri=liboauth.conf.ACCESS_TOKEN_URL_TWITTER.get()
+ consumer_key = liboauth.conf.CONSUMER_KEY_TWITTER.get()
+ consumer_secret = liboauth.conf.CONSUMER_SECRET_TWITTER.get()
+ access_token_uri = liboauth.conf.ACCESS_TOKEN_URL_TWITTER.get()
consumer = oauth.Consumer(consumer_key, consumer_secret)
token = oauth.Token(request.session['request_token']['oauth_token'], request.session['request_token']['oauth_token_secret'])
client = oauth.Client(consumer, token)
- oauth_verifier=request.GET['oauth_verifier']
+ oauth_verifier = request.GET['oauth_verifier']
resp, content = client.request(access_token_uri + oauth_verifier, "GET")
if resp['status'] != '200':
raise Exception(_("Invalid response from OAuth provider: %s") % resp)
@@ -135,49 +127,49 @@ def handleAuthenticationRequest(cls, request):
social = state_split[0]
if social == 'google':
- consumer_key=liboauth.conf.CONSUMER_KEY_GOOGLE.get()
- consumer_secret=liboauth.conf.CONSUMER_SECRET_GOOGLE.get()
- access_token_uri=liboauth.conf.ACCESS_TOKEN_URL_GOOGLE.get()
- authentication_token_uri=liboauth.conf.AUTHORIZE_URL_GOOGLE.get()
+ consumer_key = liboauth.conf.CONSUMER_KEY_GOOGLE.get()
+ consumer_secret = liboauth.conf.CONSUMER_SECRET_GOOGLE.get()
+ access_token_uri = liboauth.conf.ACCESS_TOKEN_URL_GOOGLE.get()
+ authentication_token_uri = liboauth.conf.AUTHORIZE_URL_GOOGLE.get()
elif social == 'facebook':
- consumer_key=liboauth.conf.CONSUMER_KEY_FACEBOOK.get()
- consumer_secret=liboauth.conf.CONSUMER_SECRET_FACEBOOK.get()
- access_token_uri=liboauth.conf.ACCESS_TOKEN_URL_FACEBOOK.get()
- authentication_token_uri=liboauth.conf.AUTHORIZE_URL_FACEBOOK.get()
+ consumer_key = liboauth.conf.CONSUMER_KEY_FACEBOOK.get()
+ consumer_secret = liboauth.conf.CONSUMER_SECRET_FACEBOOK.get()
+ access_token_uri = liboauth.conf.ACCESS_TOKEN_URL_FACEBOOK.get()
+ authentication_token_uri = liboauth.conf.AUTHORIZE_URL_FACEBOOK.get()
elif social == 'linkedin':
- consumer_key=liboauth.conf.CONSUMER_KEY_LINKEDIN.get()
- consumer_secret=liboauth.conf.CONSUMER_SECRET_LINKEDIN.get()
- access_token_uri=liboauth.conf.ACCESS_TOKEN_URL_LINKEDIN.get()
- authentication_token_uri=liboauth.conf.AUTHORIZE_URL_LINKEDIN.get()
+ consumer_key = liboauth.conf.CONSUMER_KEY_LINKEDIN.get()
+ consumer_secret = liboauth.conf.CONSUMER_SECRET_LINKEDIN.get()
+ access_token_uri = liboauth.conf.ACCESS_TOKEN_URL_LINKEDIN.get()
+ authentication_token_uri = liboauth.conf.AUTHORIZE_URL_LINKEDIN.get()
params = lib_urlencode({
- 'code':code,
- 'redirect_uri':redirect_uri,
+ 'code': code,
+ 'redirect_uri': redirect_uri,
'client_id': consumer_key,
'client_secret': consumer_secret,
- 'grant_type':grant_type
+ 'grant_type': grant_type
})
- headers={'content-type':'application/x-www-form-urlencoded'}
- resp, cont = parser.request(access_token_uri, method = 'POST', body = params, headers = headers)
+ headers = {'content-type': 'application/x-www-form-urlencoded'}
+ resp, cont = parser.request(access_token_uri, method='POST', body=params, headers=headers)
if resp['status'] != '200':
raise Exception(_("Invalid response from OAuth provider: %s") % resp)
- #google
+ # google
if social == 'google':
access_tok = (json.loads(cont))['access_token']
auth_token_uri = authentication_token_uri + access_tok
resp, content = parser.request(auth_token_uri, "GET")
if resp['status'] != '200':
raise Exception(_("Invalid response from OAuth provider: %s") % resp)
- username=(json.loads(content))["email"]
+ username = (json.loads(content))["email"]
access_token = dict(screen_name=map_username(username), oauth_token_secret=access_tok)
whitelisted_domains = liboauth.conf.WHITELISTED_DOMAINS_GOOGLE.get()
if whitelisted_domains:
if username.split('@')[1] not in whitelisted_domains:
access_token = ""
- #facebook
+ # facebook
elif social == 'facebook':
access_tok = (dict(cgi.parse_qsl(cont)))['access_token']
auth_token_uri = authentication_token_uri + access_tok
@@ -186,7 +178,7 @@ def handleAuthenticationRequest(cls, request):
raise Exception(_("Invalid response from OAuth provider: %s") % resp)
username = (json.loads(content))["email"]
access_token = dict(screen_name=map_username(username), oauth_token_secret=access_tok)
- #linkedin
+ # linkedin
elif social == 'linkedin':
access_tok = (json.loads(cont))['access_token']
auth_token_uri = authentication_token_uri + access_tok
@@ -196,10 +188,8 @@ def handleAuthenticationRequest(cls, request):
username = (json.loads(content))['emailAddress']
access_token = dict(screen_name=map_username(username), oauth_token_secret=access_tok)
-
return access_token, nexturl
-
@classmethod
def handleLoginRequest(cls, request):
assert oauth is not None
@@ -211,30 +201,30 @@ def handleLoginRequest(cls, request):
state = social + "," + request.GET.get('next', '/')
if social == 'google':
- consumer_key=liboauth.conf.CONSUMER_KEY_GOOGLE.get()
+ consumer_key = liboauth.conf.CONSUMER_KEY_GOOGLE.get()
token_request_uri = liboauth.conf.REQUEST_TOKEN_URL_GOOGLE.get()
scope = "https://www.googleapis.com/auth/userinfo.email"
- access_type="offline"
- approval_prompt="force"
-
- url = "{token_request_uri}?response_type={response_type}&client_id={client_id}&redirect_uri={redirect_uri}&scope={scope}&state={state}&access_type={access_type}&approval_prompt={approval_prompt}".format(
- token_request_uri = token_request_uri,
- response_type = response_type,
- client_id = consumer_key,
- redirect_uri = redirect_uri,
- scope = scope,
- state = state,
- access_type = access_type,
- approval_prompt = approval_prompt)
-
- #facebook
+ access_type = "offline"
+ approval_prompt = "force"
+
+ url = "{token_request_uri}?response_type={response_type}&client_id={client_id}&redirect_uri={redirect_uri}&scope={scope}&state={state}&access_type={access_type}&approval_prompt={approval_prompt}".format( # noqa: E501
+ token_request_uri=token_request_uri,
+ response_type=response_type,
+ client_id=consumer_key,
+ redirect_uri=redirect_uri,
+ scope=scope,
+ state=state,
+ access_type=access_type,
+ approval_prompt=approval_prompt)
+
+ # facebook
elif social == 'facebook':
- consumer_key=liboauth.conf.CONSUMER_KEY_FACEBOOK.get()
+ consumer_key = liboauth.conf.CONSUMER_KEY_FACEBOOK.get()
token_request_uri = liboauth.conf.REQUEST_TOKEN_URL_FACEBOOK.get()
scope = "email"
grant_type = "client_credentials"
- url = "{token_request_uri}?client_id={client_id}&redirect_uri={redirect_uri}&grant_type={grant_type}&scope={scope}&state={state}".format(
+ url = "{token_request_uri}?client_id={client_id}&redirect_uri={redirect_uri}&grant_type={grant_type}&scope={scope}&state={state}".format( # noqa: E501
token_request_uri=token_request_uri,
client_id=consumer_key,
redirect_uri=redirect_uri,
@@ -242,23 +232,23 @@ def handleLoginRequest(cls, request):
scope=scope,
state=state)
- #linkedin
+ # linkedin
elif social == 'linkedin':
- consumer_key=liboauth.conf.CONSUMER_KEY_LINKEDIN.get()
+ consumer_key = liboauth.conf.CONSUMER_KEY_LINKEDIN.get()
token_request_uri = liboauth.conf.REQUEST_TOKEN_URL_LINKEDIN.get()
- scope= "r_emailaddress"
+ scope = "r_emailaddress"
- url = "{token_request_uri}?response_type={response_type}&client_id={client_id}&scope={scope}&state={state}&redirect_uri={redirect_uri}".format(
+ url = "{token_request_uri}?response_type={response_type}&client_id={client_id}&scope={scope}&state={state}&redirect_uri={redirect_uri}".format( # noqa: E501
token_request_uri=token_request_uri,
response_type=response_type,
client_id=consumer_key,
scope=scope,
state=state,
redirect_uri=redirect_uri)
- #twitter
+ # twitter
else:
- consumer_key=liboauth.conf.CONSUMER_KEY_TWITTER.get()
- consumer_secret=liboauth.conf.CONSUMER_SECRET_TWITTER.get()
+ consumer_key = liboauth.conf.CONSUMER_KEY_TWITTER.get()
+ consumer_secret = liboauth.conf.CONSUMER_SECRET_TWITTER.get()
token_request_uri = liboauth.conf.REQUEST_TOKEN_URL_TWITTER.get()
token_authentication_uri = liboauth.conf.AUTHORIZE_URL_TWITTER.get()
@@ -274,6 +264,7 @@ def handleLoginRequest(cls, request):
)
return url
+
def map_username(username):
username_map = liboauth.conf.USERNAME_MAP.get()
if username_map:
@@ -281,6 +272,7 @@ def map_username(username):
username = username.replace(key, value)
return ''.join([x for x in username if x.isalnum()])
+
def find_or_create_user(username, password=None):
try:
user = User.objects.get(username=username)
@@ -296,6 +288,7 @@ def find_or_create_user(username, password=None):
user.save()
return user
+
def get_redirect_uri(request):
# Either use the proxy-specified protocol or the one from the request itself.
# This is useful if the server is behind some kind of proxy
diff --git a/desktop/libs/liboauth/src/liboauth/conf.py b/desktop/libs/liboauth/src/liboauth/conf.py
index ccdf8df1a3e..8e8340799e0 100644
--- a/desktop/libs/liboauth/src/liboauth/conf.py
+++ b/desktop/libs/liboauth/src/liboauth/conf.py
@@ -18,13 +18,9 @@
import os
import sys
-from desktop.lib.conf import Config, coerce_bool, coerce_csv, coerce_json_dict
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext as _, gettext_lazy as _t
+from desktop.lib.conf import Config, coerce_bool, coerce_csv, coerce_json_dict
CONSUMER_KEY_TWITTER = Config(
key="consumer_key_twitter",
@@ -168,4 +164,3 @@
type=coerce_json_dict,
default='{}'
)
-
diff --git a/desktop/libs/liboauth/src/liboauth/urls.py b/desktop/libs/liboauth/src/liboauth/urls.py
index 700b8d85408..fb69885fcc1 100644
--- a/desktop/libs/liboauth/src/liboauth/urls.py
+++ b/desktop/libs/liboauth/src/liboauth/urls.py
@@ -17,12 +17,9 @@
import sys
-from liboauth import views as liboauth_views
+from django.urls import re_path
-if sys.version_info[0] > 2:
- from django.urls import re_path
-else:
- from django.conf.urls import url as re_path
+from liboauth import views as liboauth_views
urlpatterns = [
re_path(r'^accounts/login/$', liboauth_views.show_login_page, name='show_oauth_login'),
diff --git a/desktop/libs/liboauth/src/liboauth/views.py b/desktop/libs/liboauth/src/liboauth/views.py
index 61bd11310ca..110c7e139b0 100644
--- a/desktop/libs/liboauth/src/liboauth/views.py
+++ b/desktop/libs/liboauth/src/liboauth/views.py
@@ -15,43 +15,35 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-
import logging
-import sys
-
-LOG = logging.getLogger()
-
-import urllib.request, urllib.parse, urllib.error
-try:
- import httplib2
-except ImportError:
- LOG.warning('httplib2 module not found')
+import urllib.error
+import urllib.parse
+import urllib.request
import django.contrib.auth.views
-from django.core.exceptions import SuspiciousOperation
-from django.contrib.auth import login, get_backends, authenticate
+from django.contrib.auth import authenticate, get_backends, login
from django.contrib.sessions.models import Session
+from django.core.exceptions import SuspiciousOperation
from django.http import HttpResponseRedirect
from django.urls import reverse
-from hadoop.fs.exceptions import WebHdfsException
-from useradmin.models import User
-from useradmin.views import ensure_home_directory
+from django.utils.translation import gettext as _
+import liboauth.conf
from desktop.auth.backend import AllowFirstUserDjangoBackend
-from desktop.auth.forms import UserCreationForm, AuthenticationForm
-from desktop.lib.django_util import render
-from desktop.lib.django_util import login_notrequired
+from desktop.auth.forms import AuthenticationForm, UserCreationForm
+from desktop.lib.django_util import login_notrequired, render
from desktop.log.access import access_warn, last_access_map
-
-import liboauth.conf
+from hadoop.fs.exceptions import WebHdfsException
from liboauth.backend import OAuthBackend
+from useradmin.models import User
+from useradmin.views import ensure_home_directory
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+LOG = logging.getLogger()
+
+try:
+ import httplib2
+except ImportError:
+ LOG.warning('httplib2 module not found')
@login_notrequired
@@ -73,7 +65,6 @@ def show_login_page(request, login_errors=False):
})
-
@login_notrequired
def oauth_login(request):
if 'social' not in request.GET:
diff --git a/desktop/libs/liboozie/src/liboozie/conf.py b/desktop/libs/liboozie/src/liboozie/conf.py
index 55b6f0c0a6a..5ab6d9e7da8 100644
--- a/desktop/libs/liboozie/src/liboozie/conf.py
+++ b/desktop/libs/liboozie/src/liboozie/conf.py
@@ -15,20 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import oct, object
-import logging
import sys
+import logging
+from builtins import object, oct
+
+from django.utils.translation import gettext as _, gettext_lazy as _t
from desktop import appmanager
from desktop.conf import default_ssl_validate
from desktop.lib.conf import Config, coerce_bool, validate_path
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _, gettext_lazy as _t
-else:
- from django.utils.translation import ugettext as _, ugettext_lazy as _t
-
-
LOG = logging.getLogger()
@@ -55,7 +51,7 @@
)
)
-SSL_CERT_CA_VERIFY=Config(
+SSL_CERT_CA_VERIFY = Config(
key="ssl_cert_ca_verify",
help="In secure mode (HTTPS), if SSL certificates from Oozie Rest APIs have to be verified against certificate authority",
dynamic_default=default_ssl_validate,
@@ -79,9 +75,9 @@ def get_oozie_status(user):
status = 'down'
try:
- if not 'test' in sys.argv: # Avoid tests hanging
+ if 'test' not in sys.argv: # Avoid tests hanging
status = str(get_oozie(user).get_oozie_status())
- except:
+ except Exception:
LOG.exception('failed to get oozie status')
return status
@@ -131,9 +127,16 @@ def config_validator(user):
api = get_oozie(user, api_version="v2")
configuration = api.get_configuration()
- if 'org.apache.oozie.service.MetricsInstrumentationService' in [c.strip() for c in configuration.get('oozie.services.ext', '').split(',')]:
+ if 'org.apache.oozie.service.MetricsInstrumentationService' in [
+ c.strip() for c in configuration.get('oozie.services.ext', '').split(',')
+ ]:
metrics = api.get_metrics()
- sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics['gauges'] and [metrics['gauges']['libs.sharelib.system.libpath']['value']] or []
+ sharelib_url = (
+ 'gauges' in metrics
+ and 'libs.sharelib.system.libpath' in metrics['gauges']
+ and [metrics['gauges']['libs.sharelib.system.libpath']['value']]
+ or []
+ )
else:
intrumentation = api.get_instrumentation()
sharelib_url = [
diff --git a/desktop/libs/liboozie/src/liboozie/conf_tests.py b/desktop/libs/liboozie/src/liboozie/conf_tests.py
index c45db57cd09..0429d8061cb 100644
--- a/desktop/libs/liboozie/src/liboozie/conf_tests.py
+++ b/desktop/libs/liboozie/src/liboozie/conf_tests.py
@@ -15,19 +15,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import pytest
import sys
+from unittest.mock import Mock, patch
-from useradmin.models import User
-from desktop.lib.django_test_util import make_logged_in_client
+import pytest
+from desktop.lib.django_test_util import make_logged_in_client
from liboozie.conf import config_validator
-
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock
-else:
- from mock import patch, Mock
+from useradmin.models import User
@pytest.mark.django_db
diff --git a/desktop/libs/liboozie/src/liboozie/credentials.py b/desktop/libs/liboozie/src/liboozie/credentials.py
index 811d61f804a..c22915c0bbf 100644
--- a/desktop/libs/liboozie/src/liboozie/credentials.py
+++ b/desktop/libs/liboozie/src/liboozie/credentials.py
@@ -15,15 +15,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
import sys
+import logging
+from builtins import object
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from django.utils.translation import gettext as _
LOG = logging.getLogger()
@@ -46,15 +42,16 @@ def fetch(self, oozie_api):
self.credentials = self._parse_oozie(configuration)
def _parse_oozie(self, configuration_dic):
- return dict([cred.strip().split('=') for cred in configuration_dic.get('oozie.credentials.credentialclasses', '').strip().split(',') if cred])
+ return dict(
+ [cred.strip().split('=') for cred in configuration_dic.get('oozie.credentials.credentialclasses', '').strip().split(',') if cred])
@property
def class_to_name_credentials(self):
- return dict((v,k) for k, v in self.credentials.items())
+ return dict((v, k) for k, v in self.credentials.items())
def get_properties(self, hive_properties=None):
credentials = {}
- from beeswax import hive_site, conf
+ from beeswax import conf, hive_site
if not hasattr(conf.HIVE_SERVER_HOST, 'get') or not conf.HIVE_SERVER_HOST.get():
LOG.warning('Could not get all the Oozie credentials: beeswax app is blacklisted.')
diff --git a/desktop/libs/liboozie/src/liboozie/oozie_api.py b/desktop/libs/liboozie/src/liboozie/oozie_api.py
index 348db58665a..7709763739d 100644
--- a/desktop/libs/liboozie/src/liboozie/oozie_api.py
+++ b/desktop/libs/liboozie/src/liboozie/oozie_api.py
@@ -14,25 +14,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
+import sys
import logging
import posixpath
-import sys
+from builtins import object
-from desktop.conf import TIME_ZONE
-from desktop.conf import DEFAULT_USER
+from desktop.conf import DEFAULT_USER, TIME_ZONE
from desktop.lib.rest.http_client import HttpClient
from desktop.lib.rest.resource import Resource
-
-from liboozie.conf import SECURITY_ENABLED, OOZIE_URL, SSL_CERT_CA_VERIFY
-from liboozie.types import WorkflowList, CoordinatorList, Coordinator, Workflow,\
- CoordinatorAction, WorkflowAction, BundleList, Bundle, BundleAction
+from liboozie.conf import OOZIE_URL, SECURITY_ENABLED, SSL_CERT_CA_VERIFY
+from liboozie.types import (
+ Bundle,
+ BundleAction,
+ BundleList,
+ Coordinator,
+ CoordinatorAction,
+ CoordinatorList,
+ Workflow,
+ WorkflowAction,
+ WorkflowList,
+)
from liboozie.utils import config_gen
-
LOG = logging.getLogger()
DEFAULT_USER = DEFAULT_USER.get()
-API_VERSION = 'v1' # Overridden to v2 for SLA
+API_VERSION = 'v1' # Overridden to v2 for SLA
_XML_CONTENT_TYPE = 'application/xml;charset=UTF-8'
@@ -178,10 +184,9 @@ def get_job_definition(self, jobid):
params = self._get_params()
params['show'] = 'definition'
job_def = self._root.get('job/%s' % (jobid,), params)
- if sys.version_info[0] > 2:
- job_def = job_def.decode()
- return job_def
+ job_def = job_def.decode()
+ return job_def
def get_job_log(self, jobid, logfilter=None):
"""
@@ -199,10 +204,9 @@ def get_job_log(self, jobid, logfilter=None):
filter_list.append('%s=%s' % (key, val))
params['logfilter'] = ';'.join(filter_list)
log = self._root.get('job/%s' % (jobid,), params)
- if sys.version_info[0] > 2:
- log = log.decode()
- return log
+ log = log.decode()
+ return log
def get_job_graph(self, jobid, format='svg'):
params = self._get_params()
@@ -212,7 +216,6 @@ def get_job_graph(self, jobid, format='svg'):
svg_data = self._root.get('job/%s' % (jobid,), params)
return svg_data
-
def get_job_status(self, jobid):
params = self._get_params()
params['show'] = 'status'
@@ -247,8 +250,7 @@ def job_control(self, jobid, action, properties=None, parameters=None):
params.update(parameters)
resp = self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
- if sys.version_info[0] > 2:
- resp = resp.decode()
+ resp = resp.decode()
return resp
diff --git a/desktop/libs/liboozie/src/liboozie/submission2.py b/desktop/libs/liboozie/src/liboozie/submission2.py
index 06ce7a01eec..da3a09519c1 100644
--- a/desktop/libs/liboozie/src/liboozie/submission2.py
+++ b/desktop/libs/liboozie/src/liboozie/submission2.py
@@ -15,16 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import errno
-import logging
import os
import sys
import time
-
+import errno
+import logging
+from builtins import object
from string import Template
from django.utils.functional import wraps
+from django.utils.translation import gettext as _
from beeswax.hive_site import get_hive_site_content
from desktop.lib.exceptions_renderable import PopupException
@@ -32,21 +32,14 @@
from desktop.lib.parameterization import find_variables
from desktop.lib.paths import get_desktop_root
from desktop.models import Document2
-from indexer.conf import CONFIG_JDBC_LIBS_PATH
-from metadata.conf import ALTUS
-from oozie.utils import convert_to_server_timezone
-
from hadoop import cluster
from hadoop.fs.hadoopfs import Hdfs
-
+from indexer.conf import CONFIG_JDBC_LIBS_PATH
from liboozie.conf import REMOTE_DEPLOYMENT_DIR, USE_LIBPATH_FOR_JARS
from liboozie.credentials import Credentials
from liboozie.oozie_api import get_oozie
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from metadata.conf import ALTUS
+from oozie.utils import convert_to_server_timezone
LOG = logging.getLogger()
@@ -96,7 +89,7 @@ def __init__(self, user, job=None, fs=None, jt=None, properties=None, oozie_id=N
local_tz = self.job.data.get('properties')['timezone']
# Modify start_date & end_date only when it's a coordinator
- from oozie.models2 import Coordinator, Bundle
+ from oozie.models2 import Bundle, Coordinator
if type(self.job) is Coordinator:
if 'start_date' in self.properties:
properties['start_date'] = convert_to_server_timezone(self.properties['start_date'], local_tz)
@@ -292,8 +285,8 @@ def deploy(self, deployment_dir=None):
self.fs.do_as_user(self.user, self.fs.copyFromLocal, os.path.join(source_path, name), destination_path)
elif action.data['type'] == 'impala' or action.data['type'] == 'impala-document':
- from oozie.models2 import _get_impala_url
from impala.impala_flags import get_ssl_server_certificate
+ from oozie.models2 import _get_impala_url
if action.data['type'] == 'impala-document':
from notebook.models import Notebook
@@ -620,7 +613,7 @@ def _create_file(self, deployment_dir, file_name, data, do_as=False):
# In Py3 because of i18n, the xml data is not properly utf-8 encoded for some languages.
# This can later throw UnicodeEncodeError exception for request body in HDFS or other FS API calls. To tackle this,
# We are converting the data into bytes by utf-8 encoding instead of str type.
- data = smart_str(data).encode('utf-8') if sys.version_info[0] > 2 else smart_str(data)
+ data = smart_str(data).encode('utf-8')
if do_as:
self.fs.do_as_user(self.user, self.fs.create, file_path, overwrite=True, permission=0o644, data=data)
@@ -679,7 +672,7 @@ def _generate_altus_job_action_script(self, service, cluster, jobs, auth_key_id,
else:
hostname = ALTUS.HOSTNAME.get()
- if type(cluster) == dict:
+ if type(cluster) is dict:
command = 'createAWSCluster'
arguments = cluster
else:
diff --git a/desktop/libs/liboozie/src/liboozie/submittion.py b/desktop/libs/liboozie/src/liboozie/submittion.py
index ec299d12daf..2d00b8a0c7f 100644
--- a/desktop/libs/liboozie/src/liboozie/submittion.py
+++ b/desktop/libs/liboozie/src/liboozie/submittion.py
@@ -15,30 +15,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import errno
-import logging
import os
import re
import sys
import time
+import errno
+import logging
+from builtins import object
from django.utils.functional import wraps
+from django.utils.translation import gettext as _
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.i18n import smart_str
from desktop.lib.parameterization import find_variables
from hadoop import cluster
from hadoop.fs.hadoopfs import Hdfs
-
-from liboozie.oozie_api import get_oozie
from liboozie.conf import REMOTE_DEPLOYMENT_DIR
from liboozie.credentials import Credentials
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from liboozie.oozie_api import get_oozie
LOG = logging.getLogger()
@@ -51,7 +46,7 @@ def decorate(self, deployment_dir=None):
jt_address = cluster.get_cluster_addr_for_job_submission()
if deployment_dir is None:
- self._update_properties(jt_address) # Needed as we need to set some properties like Credentials before
+ self._update_properties(jt_address) # Needed as we need to set some properties like Credentials before
deployment_dir = self.deploy()
self._update_properties(jt_address, deployment_dir)
@@ -73,7 +68,7 @@ def __init__(self, user, job=None, fs=None, jt=None, properties=None, oozie_id=N
self.job = job
self.user = user
self.fs = fs
- self.jt = jt # Deprecated with YARN, we now use logical names only for RM
+ self.jt = jt # Deprecated with YARN, we now use logical names only for RM
self.oozie_id = oozie_id
self.api = get_oozie(self.user)
@@ -116,7 +111,7 @@ def rerun(self, deployment_dir, fail_nodes=None, skip_nodes=None):
if fail_nodes:
self.properties.update({'oozie.wf.rerun.failnodes': fail_nodes})
elif not skip_nodes:
- self.properties.update({'oozie.wf.rerun.failnodes': 'false'}) # Case empty 'skip_nodes' list
+ self.properties.update({'oozie.wf.rerun.failnodes': 'false'}) # Case empty 'skip_nodes' list
else:
self.properties.update({'oozie.wf.rerun.skip.nodes': skip_nodes})
@@ -126,7 +121,6 @@ def rerun(self, deployment_dir, fail_nodes=None, skip_nodes=None):
return self.oozie_id
-
def rerun_coord(self, deployment_dir, params):
jt_address = cluster.get_cluster_addr_for_job_submission()
@@ -138,7 +132,6 @@ def rerun_coord(self, deployment_dir, params):
return self.oozie_id
-
def rerun_bundle(self, deployment_dir, params):
jt_address = cluster.get_cluster_addr_for_job_submission()
@@ -149,7 +142,6 @@ def rerun_bundle(self, deployment_dir, params):
return self.oozie_id
-
def deploy(self):
try:
deployment_dir = self._create_deployment_dir()
@@ -160,10 +152,10 @@ def deploy(self):
if self.api.security_enabled:
jt_address = cluster.get_cluster_addr_for_job_submission()
- self._update_properties(jt_address) # Needed for coordinator deploying workflows
+ self._update_properties(jt_address) # Needed for coordinator deploying workflows
oozie_xml = self.job.to_xml(self.properties)
- self._do_as(self.user.username , self._copy_files, deployment_dir, oozie_xml)
+ self._do_as(self.user.username, self._copy_files, deployment_dir, oozie_xml)
if hasattr(self.job, 'actions'):
for action in self.job.actions:
@@ -176,7 +168,6 @@ def deploy(self):
return deployment_dir
-
def get_external_parameters(self, application_path):
"""From XML and job.properties HDFS files"""
deployment_dir = os.path.dirname(application_path)
@@ -192,11 +183,16 @@ def get_external_parameters(self, application_path):
def _get_external_parameters(self, xml, properties=None):
from oozie.models import DATASET_FREQUENCY
- parameters = dict([(var, '') for var in find_variables(xml, include_named=False) if not self._is_coordinator() or var not in DATASET_FREQUENCY])
+ parameters = dict(
+ [(var, '') for var in find_variables(xml, include_named=False) if not self._is_coordinator() or var not in DATASET_FREQUENCY])
if properties:
- parameters.update(dict([re.split(r'(? 2:
- from io import BytesIO as string_io
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
- from cStringIO import StringIO as string_io
+from desktop.lib import i18n
+from desktop.lib.exceptions_renderable import PopupException
+from desktop.log.access import access_warn
+from liboozie.utils import catch_unicode_time, format_time, parse_timestamp
LOG = logging.getLogger()
@@ -256,7 +243,7 @@ def _fixup(self):
else:
self.conf_dict = {}
- self.title = ' %s-%s'% (self.actionNumber, format_time(self.nominalTime))
+ self.title = ' %s-%s' % (self.actionNumber, format_time(self.nominalTime))
class BundleAction(Action):
@@ -311,7 +298,7 @@ def get_progress(self):
"""How much more time before the next action."""
if self.lastAction is None:
return 0
-
+
next = mktime(parse_timestamp(self.lastAction))
start = mktime(parse_timestamp(self.startTime))
end = mktime(parse_timestamp(self.endTime))
@@ -325,7 +312,7 @@ def get_progress(self):
class Job(object):
- MAX_LOG_SIZE = 3500 * 20 # 20 pages
+ MAX_LOG_SIZE = 3500 * 20 # 20 pages
"""
Accessing log and definition will trigger Oozie API calls.
@@ -501,10 +488,10 @@ def get_absolute_url(self, format='html'):
def get_progress(self, full_node_list=None):
if self.status in ('SUCCEEDED', 'KILLED', 'FAILED'):
- return 100 # Case of decision nodes
+ return 100 # Case of decision nodes
else:
if full_node_list is not None: # Should remove the un-reached branches if decision node
- total_actions = len(full_node_list) - 1 # -1 because of Kill node
+ total_actions = len(full_node_list) - 1 # -1 because of Kill node
else:
total_actions = len(self.actions)
return int(sum([action.is_finished() for action in self.actions]) / float(max(total_actions, 1)) * 100)
@@ -560,7 +547,6 @@ def _fixup(self):
if self.pauseTime:
self.pauseTime = parse_timestamp(self.pauseTime)
-
# For when listing/mixing all the jobs together
self.id = self.coordJobId
self.appName = self.coordJobName
@@ -723,4 +709,3 @@ def __init__(self, api, json_dict, filters=None):
class BundleList(JobList):
def __init__(self, api, json_dict, filters=None):
super(BundleList, self).__init__(Bundle, 'bundlejobs', api, json_dict, filters)
-
diff --git a/desktop/libs/liboozie/src/liboozie/utils.py b/desktop/libs/liboozie/src/liboozie/utils.py
index fc53d641dce..e21240b0cbb 100644
--- a/desktop/libs/liboozie/src/liboozie/utils.py
+++ b/desktop/libs/liboozie/src/liboozie/utils.py
@@ -18,34 +18,20 @@
"""
Misc helper functions
"""
-from __future__ import print_function
-from future import standard_library
-standard_library.install_aliases()
-from past.builtins import basestring
-
-import logging
import re
-import sys
import time
-
+import logging
from datetime import datetime
-from dateutil.parser import parse
+from io import StringIO as string_io
from time import strftime
from xml.sax.saxutils import escape
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
- new_str = str
-else:
- try:
- from cStringIO import StringIO as string_io
- except:
- from StringIO import StringIO as string_io
- new_str = unicode
+from dateutil.parser import parse
+from past.builtins import basestring
LOG = logging.getLogger()
-_NAME_REGEX = re.compile('^[a-zA-Z][\-_a-zA-Z0-0]*$')
+_NAME_REGEX = re.compile(r'^[a-zA-Z][\-_a-zA-Z0-0]*$')
def catch_unicode_time(u_time):
@@ -67,7 +53,7 @@ def parse_timestamp(timestamp, time_format=None):
return time.strptime(timestamp, time_format)
except ValueError:
try:
- return time.strptime(re.sub(' \w+$', '', timestamp), time_format.replace(' %Z', ''))
+ return time.strptime(re.sub(r' \w+$', '', timestamp), time_format.replace(' %Z', ''))
except ValueError:
LOG.error("Failed to convert Oozie timestamp: %s" % time_format)
except Exception:
@@ -84,7 +70,7 @@ def config_gen(dic):
print("", file=sio)
# if dic's key contains <,>,& then it will be escaped and if dic's value contains ']]>' then ']]>' will be stripped
for k, v in sorted(dic.items()):
- print("\n %s\n \n\n" \
+ print("\n %s\n \n\n"
% (escape(k), v.replace(']]>', '') if isinstance(v, basestring) else v), file=sio)
print("", file=sio)
sio.flush()
@@ -95,23 +81,24 @@ def config_gen(dic):
def is_valid_node_name(name):
return _NAME_REGEX.match(name) is not None
+
def format_time(time, format='%d %b %Y %H:%M:%S'):
if time is None:
return ''
fmt_time = None
- if type(time) == new_str:
+ if type(time) is str:
return time
else:
try:
fmt_time = strftime(format, time)
- except:
+ except Exception:
fmt_time = None
if fmt_time is None:
try:
- fmt_time = strftime(format+" %f", time)
- except:
+ fmt_time = strftime(format + " %f", time)
+ except Exception:
fmt_time = None
return fmt_time
diff --git a/desktop/libs/librdbms/src/librdbms/conf.py b/desktop/libs/librdbms/src/librdbms/conf.py
index 9419d7a096e..f47ed42df89 100644
--- a/desktop/libs/librdbms/src/librdbms/conf.py
+++ b/desktop/libs/librdbms/src/librdbms/conf.py
@@ -17,16 +17,10 @@
import sys
-from desktop.lib.conf import Config, UnspecifiedConfigSection,\
- ConfigSection, coerce_json_dict,\
- coerce_password_from_script
-from desktop.conf import coerce_database
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext as _, gettext_lazy as _t
+from desktop.conf import coerce_database
+from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_json_dict, coerce_password_from_script
DATABASES = UnspecifiedConfigSection(
key="databases",
@@ -102,7 +96,8 @@ def config_validator(user):
if engine in ('sqlite', 'sqlite3'):
res.append((DATABASES[server].NAME, _("Database name should not be empty for the SQLite backend.")))
if engine == 'oracle':
- res.append((DATABASES[server].NAME, _("Database name should not be empty for the Oracle backend. It should be the SID of your database.")))
+ res.append((
+ DATABASES[server].NAME, _("Database name should not be empty for the Oracle backend. It should be the SID of your database.")))
if engine in ('postgresql_psycopg2', 'postgresql'):
res.append((DATABASES[server].NAME, _("Database name should not be empty for the PostgreSQL backend.")))
diff --git a/desktop/libs/librdbms/src/librdbms/design.py b/desktop/libs/librdbms/src/librdbms/design.py
index e4f75c9df6b..852c8e7f6b4 100644
--- a/desktop/libs/librdbms/src/librdbms/design.py
+++ b/desktop/libs/librdbms/src/librdbms/design.py
@@ -19,21 +19,17 @@
The HQLdesign class can (de)serialize a design to/from a QueryDict.
"""
-from builtins import object
+import sys
import json
import logging
-import sys
+from builtins import object
import django.http
+from django.utils.translation import gettext as _
-from beeswax.design import normalize_form_dict, denormalize_form_dict, split_statements
+from beeswax.design import denormalize_form_dict, normalize_form_dict, split_statements
from notebook.sql_utils import strip_trailing_semicolon
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
SERIALIZATION_VERSION = "0.0.1"
@@ -43,12 +39,12 @@ class SQLdesign(object):
"""
Represents an SQL design, with methods to perform (de)serialization.
"""
- _QUERY_ATTRS = [ 'query', 'type', 'database', 'server' ]
+ _QUERY_ATTRS = ['query', 'type', 'database', 'server']
def __init__(self, form=None, query_type=None):
"""Initialize the design from a valid form data."""
if form is not None:
- self._data_dict = dict(query = normalize_form_dict(form, SQLdesign._QUERY_ATTRS))
+ self._data_dict = dict(query=normalize_form_dict(form, SQLdesign._QUERY_ATTRS))
if query_type is not None:
self._data_dict['query']['type'] = query_type
@@ -98,7 +94,8 @@ def get_query_statement(self, n=0):
@property
def statements(self):
sql_query = strip_trailing_semicolon(self.sql_query)
- return [strip_trailing_semicolon(statement.strip()) for (start_row, start_col), (end_row, end_col), statement in split_statements(sql_query)]
+ return [
+ strip_trailing_semicolon(statement.strip()) for (start_row, start_col), (end_row, end_col), statement in split_statements(sql_query)]
@staticmethod
def loads(data):
@@ -119,4 +116,4 @@ def loads(data):
design = SQLdesign()
design._data_dict = dic
- return design
\ No newline at end of file
+ return design
diff --git a/desktop/libs/librdbms/src/librdbms/server/mysql_lib.py b/desktop/libs/librdbms/src/librdbms/server/mysql_lib.py
index 18b2e748485..9dd23d785cf 100644
--- a/desktop/libs/librdbms/src/librdbms/server/mysql_lib.py
+++ b/desktop/libs/librdbms/src/librdbms/server/mysql_lib.py
@@ -15,8 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import sys
+import logging
try:
import MySQLdb as Database
@@ -33,23 +33,20 @@
from django.core.exceptions import ImproperlyConfigured
raise ImproperlyConfigured("MySQLdb-1.2.1p2 or newer is required; you have %s" % Database.__version__)
+from django.utils.translation import gettext as _
from MySQLdb.converters import FIELD_TYPE
from librdbms.server.rdbms_base_lib import BaseRDBMSDataTable, BaseRDBMSResult, BaseRDMSClient
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
-class DataTable(BaseRDBMSDataTable): pass
+class DataTable(BaseRDBMSDataTable):
+ pass
-class Result(BaseRDBMSResult): pass
+class Result(BaseRDBMSResult):
+ pass
def _convert_types(t):
@@ -109,7 +106,6 @@ def __init__(self, *args, **kwargs):
super(MySQLClient, self).__init__(*args, **kwargs)
self.connection = Database.connect(**self._conn_params)
-
@property
def _conn_params(self):
params = {
@@ -128,7 +124,6 @@ def _conn_params(self):
return params
-
def use(self, database):
if 'db' in self._conn_params and self._conn_params['db'] != database:
raise RuntimeError(_("Database '%s' is not allowed. Please use database '%s'.") % (database, self._conn_params['db']))
@@ -137,7 +132,6 @@ def use(self, database):
cursor.execute("USE `%s`" % database)
self.connection.commit()
-
def execute_statement(self, statement):
cursor = self.connection.cursor()
cursor.execute(statement)
@@ -149,7 +143,6 @@ def execute_statement(self, statement):
columns = []
return self.data_table_cls(cursor, columns)
-
def get_databases(self):
cursor = self.connection.cursor()
cursor.execute("SHOW DATABASES")
@@ -163,7 +156,6 @@ def get_databases(self):
else:
return databases
-
def get_tables(self, database, table_names=[]):
cursor = self.connection.cursor()
query = 'SHOW TABLES'
@@ -175,7 +167,6 @@ def get_tables(self, database, table_names=[]):
self.connection.commit()
return [row[0] for row in cursor.fetchall()]
-
def get_columns(self, database, table, names_only=True):
cursor = self.connection.cursor()
cursor.execute("SHOW COLUMNS FROM %s.%s" % (database, table))
@@ -186,7 +177,6 @@ def get_columns(self, database, table, names_only=True):
columns = [dict(name=row[0], type=row[1], comment='') for row in cursor.fetchall()]
return columns
-
def get_sample_data(self, database, table, column=None, limit=100):
column = '`%s`' % column if column else '*'
statement = "SELECT %s FROM `%s`.`%s` LIMIT %d" % (column, database, table, limit)
diff --git a/desktop/libs/libsaml/src/libsaml/conf.py b/desktop/libs/libsaml/src/libsaml/conf.py
index 7b36c869db3..546ef3c1222 100644
--- a/desktop/libs/libsaml/src/libsaml/conf.py
+++ b/desktop/libs/libsaml/src/libsaml/conf.py
@@ -15,18 +15,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import os
+import sys
import json
import logging
-import os
import subprocess
-import sys
-from desktop.lib.conf import Config, coerce_bool, coerce_csv, coerce_password_from_script
+from django.utils.translation import gettext as _, gettext_lazy as _t
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from desktop.lib.conf import Config, coerce_bool, coerce_csv, coerce_password_from_script
LOG = logging.getLogger()
@@ -212,6 +209,7 @@ def dict_list_map(value):
default="",
help=_t("To log users out of magic-sso, CDP control panel use Logout URL"))
+
def get_key_file_password():
password = os.environ.get('HUE_SAML_KEY_FILE_PASSWORD')
if password is not None:
@@ -230,6 +228,7 @@ def config_validator(user):
res.append(("libsaml.username_source", _("username_source not configured properly. SAML integration may not work.")))
return res
+
def get_logout_redirect_url():
# This logic was derived from KNOX.
prod_url = "consoleauth.altus.cloudera.com"
diff --git a/desktop/libs/libsaml/src/libsaml/tests.py b/desktop/libs/libsaml/src/libsaml/tests.py
index e9ab55ea0b5..ecb727887bb 100644
--- a/desktop/libs/libsaml/src/libsaml/tests.py
+++ b/desktop/libs/libsaml/src/libsaml/tests.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-## -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
# Licensed to Cloudera, Inc. under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -17,19 +17,16 @@
# limitations under the License.
import sys
+from unittest.mock import Mock, patch
from libsaml.conf import xmlsec
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock
-else:
- from mock import patch, Mock
def test_xmlsec_dynamic_default_no_which():
with patch('libsaml.conf.subprocess') as subprocess:
subprocess.Popen = Mock(
- side_effect = OSError('No such file or directory. `which` command is not present')
+ side_effect=OSError('No such file or directory. `which` command is not present')
)
assert '/usr/local/bin/xmlsec1' == xmlsec()
diff --git a/desktop/libs/libsaml/src/libsaml/urls.py b/desktop/libs/libsaml/src/libsaml/urls.py
index d1eafe16fda..bdb1d16fbf3 100644
--- a/desktop/libs/libsaml/src/libsaml/urls.py
+++ b/desktop/libs/libsaml/src/libsaml/urls.py
@@ -15,18 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import sys
+import logging
-if sys.version_info[0] > 2:
- from django.urls import re_path
-else:
- from django.conf.urls import url as re_path
+from django.urls import re_path
LOG = logging.getLogger()
try:
from djangosaml2 import views as djangosaml2_views
+
from libsaml import views as libsaml_views
except ImportError:
LOG.warning('djangosaml2 module not found')
diff --git a/desktop/libs/libsentry/src/libsentry/api.py b/desktop/libs/libsentry/src/libsentry/api.py
index 62e96d53f0f..e22a3bade64 100644
--- a/desktop/libs/libsentry/src/libsentry/api.py
+++ b/desktop/libs/libsentry/src/libsentry/api.py
@@ -15,24 +15,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
import sys
+import logging
import threading
+from builtins import object
+
+from django.utils.translation import gettext as _
from desktop.lib.exceptions import StructuredThriftTransportException
from desktop.lib.exceptions_renderable import PopupException
-
from libsentry.client import SentryClient
-from libsentry.sentry_ha import get_next_available_server, create_client
+from libsentry.sentry_ha import create_client, get_next_available_server
from libsentry.sentry_site import get_sentry_server, is_ha_enabled
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
API_CACHE = None
@@ -233,7 +228,6 @@ def rename_sentry_privileges(self, oldAuthorizable, newAuthorizable):
else:
raise SentryException(response)
-
def _massage_privilege(self, privilege):
return {
'scope': privilege.privilegeScope,
@@ -247,7 +241,6 @@ def _massage_privilege(self, privilege):
'column': privilege.columnName,
}
-
def _massage_authorizable(self, authorizable):
return {
'server': authorizable.server,
diff --git a/desktop/libs/libsentry/src/libsentry/api2.py b/desktop/libs/libsentry/src/libsentry/api2.py
index 82be39f88e8..0f1cab6cd1c 100644
--- a/desktop/libs/libsentry/src/libsentry/api2.py
+++ b/desktop/libs/libsentry/src/libsentry/api2.py
@@ -15,24 +15,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
import sys
+import logging
import threading
+from builtins import object
+
+from django.utils.translation import gettext as _
from desktop.lib.exceptions import StructuredThriftTransportException
from desktop.lib.exceptions_renderable import PopupException
-
from libsentry.client2 import SentryClient
-from libsentry.sentry_ha import get_next_available_server, create_client
+from libsentry.sentry_ha import create_client, get_next_available_server
from libsentry.sentry_site import get_sentry_server, is_ha_enabled
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
API_CACHE = None
@@ -49,7 +44,8 @@ def decorator(*args, **kwargs):
raise PopupException(_('Failed to connect to Sentry server %s, and Sentry HA is not enabled.') % args[0].client.host, detail=e)
else:
LOG.warning("Failed to connect to Sentry server %s, will attempt to find next available host." % args[0].client.host)
- server, attempts = get_next_available_server(client_class=SentryClient, username=args[0].client.username, failed_host=args[0].client.host, component=args[0].client.component)
+ server, attempts = get_next_available_server(
+ client_class=SentryClient, username=args[0].client.username, failed_host=args[0].client.host, component=args[0].client.component)
if server is not None:
args[0].client = create_client(SentryClient, args[0].client.username, server, args[0].client.component)
set_api_cache(server)
@@ -236,7 +232,6 @@ def rename_sentry_privileges(self, oldAuthorizable, newAuthorizable):
else:
raise SentryException(response)
-
def _massage_privilege(self, privilege):
return {
'component': privilege.component,
@@ -248,11 +243,9 @@ def _massage_privilege(self, privilege):
'grantOption': privilege.grantOption == 1,
}
-
def _massage_authorizable(self, authorizables):
return [{'type': auth.type, 'name': auth.name} for auth in authorizables]
-
def _massage_string_authorizable(self, authorizables):
return [{'type': auth.split('=')[0], 'name': auth.split('=')[1]} for auth in authorizables.split('->')]
diff --git a/desktop/libs/libsentry/src/libsentry/conf.py b/desktop/libs/libsentry/src/libsentry/conf.py
index 0ea4bab762a..b8a22c08974 100644
--- a/desktop/libs/libsentry/src/libsentry/conf.py
+++ b/desktop/libs/libsentry/src/libsentry/conf.py
@@ -15,29 +15,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import os
-import sys
-
-from desktop.lib.conf import Config
+import logging
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t
-else:
- from django.utils.translation import ugettext_lazy as _t
+from django.utils.translation import gettext_lazy as _t
+from desktop.lib.conf import Config
LOG = logging.getLogger()
-HOSTNAME=Config(
+HOSTNAME = Config(
key='hostname',
help=_t('Hostname or IP of server.'),
type=str,
default='localhost',
)
-PORT=Config(
+PORT = Config(
key='port',
help=_t('Port the sentry service is running on.'),
type=int,
@@ -50,7 +45,7 @@
default=os.environ.get("SENTRY_CONF_DIR", '/etc/sentry/conf')
)
-PRIVILEGE_CHECKER_CACHING=Config(
+PRIVILEGE_CHECKER_CACHING = Config(
key='privilege_checker_caching',
help=_t('Number of seconds when the privilege list of a user is cached.'),
type=int,
@@ -59,7 +54,7 @@
def is_enabled():
- from hadoop import cluster # Avoid dependencies conflicts
+ from hadoop import cluster # Avoid dependencies conflicts
cluster = cluster.get_cluster_conf_for_job_submission()
return HOSTNAME.get() != 'localhost' and cluster.SECURITY_ENABLED.get()
diff --git a/desktop/libs/libsentry/src/libsentry/sentry_ha.py b/desktop/libs/libsentry/src/libsentry/sentry_ha.py
index fb5726fce9a..f20e5cdfa4b 100644
--- a/desktop/libs/libsentry/src/libsentry/sentry_ha.py
+++ b/desktop/libs/libsentry/src/libsentry/sentry_ha.py
@@ -15,22 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import sys
import time
+import logging
+
+from django.utils.translation import gettext as _
from desktop.lib.exceptions import StructuredThriftTransportException
from desktop.lib.exceptions_renderable import PopupException
-
from libsentry.client2 import SentryClient
from libsentry.sentry_site import get_sentry_server
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -67,7 +62,8 @@ def get_next_available_server(client_class, username, failed_host=None, componen
client = create_client_fn(client_class, username, next_server, component)
client.list_sentry_roles_by_group(groupName='*')
# If above operation succeeds, return client
- LOG.info('Successfully connected to Sentry server %s, after attempting [%s], returning client.' % (client.host, ', '.join(attempted_hosts)))
+ LOG.info(
+ 'Successfully connected to Sentry server %s, after attempting [%s], returning client.' % (client.host, ', '.join(attempted_hosts)))
return next_server, attempted_hosts
except StructuredThriftTransportException as e:
# If we have come back around to the original failed client, exit
diff --git a/desktop/libs/libsentry/src/libsentry/sentry_site.py b/desktop/libs/libsentry/src/libsentry/sentry_site.py
index 3ec02f83d70..817667db306 100644
--- a/desktop/libs/libsentry/src/libsentry/sentry_site.py
+++ b/desktop/libs/libsentry/src/libsentry/sentry_site.py
@@ -15,24 +15,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import sys
import errno
+import random
import logging
import os.path
-import random
-import sys
-from hadoop import confparse
+from django.utils.translation import gettext as _
from desktop.lib import security_util
from desktop.lib.exceptions_renderable import PopupException
-
-from libsentry.conf import SENTRY_CONF_DIR, HOSTNAME, PORT
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from hadoop import confparse
+from libsentry.conf import HOSTNAME, PORT, SENTRY_CONF_DIR
LOG = logging.getLogger()
@@ -114,7 +108,7 @@ def get_sentry_server(current_host=None):
servers = get_sentry_servers()
hosts = [s['hostname'] for s in servers]
- next_idx = random.randint(0, len(servers)-1)
+ next_idx = random.randint(0, len(servers) - 1)
if current_host is not None and hosts:
try:
current_idx = hosts.index(current_host)
@@ -161,7 +155,7 @@ def get_sentry_servers():
def _parse_sites():
global _SITE_DICT
- _SITE_DICT ={}
+ _SITE_DICT = {}
paths = [
('sentry', os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')),
diff --git a/desktop/libs/libsentry/src/libsentry/test_client.py b/desktop/libs/libsentry/src/libsentry/test_client.py
index 0985d9bd1b3..ee79a503e20 100644
--- a/desktop/libs/libsentry/src/libsentry/test_client.py
+++ b/desktop/libs/libsentry/src/libsentry/test_client.py
@@ -17,19 +17,12 @@
import os
import shutil
-import sys
import tempfile
from libsentry import sentry_site
-from libsentry.conf import SENTRY_CONF_DIR
-from libsentry.sentry_site import get_sentry_server_principal,\
- get_sentry_server_admin_groups
from libsentry.client import SentryClient
-
-if sys.version_info[0] > 2:
- open_file = open
-else:
- open_file = file
+from libsentry.conf import SENTRY_CONF_DIR
+from libsentry.sentry_site import get_sentry_server_admin_groups, get_sentry_server_principal
def test_security_plain():
@@ -38,7 +31,7 @@ def test_security_plain():
try:
xml = sentry_site_xml(provider='default')
- open_file(os.path.join(tmpdir, 'sentry-site.xml'), 'w').write(xml)
+ open(os.path.join(tmpdir, 'sentry-site.xml'), 'w').write(xml)
sentry_site.reset()
assert 'test/test.com@TEST.COM' == get_sentry_server_principal()
@@ -47,7 +40,7 @@ def test_security_plain():
security = SentryClient('test.com', 11111, 'test')._get_security()
assert 'test' == security['kerberos_principal_short_name']
- assert False == security['use_sasl']
+ assert False is security['use_sasl']
assert 'NOSASL' == security['mechanism']
finally:
sentry_site.reset()
@@ -61,12 +54,12 @@ def test_security_kerberos():
try:
xml = sentry_site_xml(provider='default', authentication='kerberos')
- open_file(os.path.join(tmpdir, 'sentry-site.xml'), 'w').write(xml)
+ open(os.path.join(tmpdir, 'sentry-site.xml'), 'w').write(xml)
sentry_site.reset()
security = SentryClient('test.com', 11111, 'test')._get_security()
- assert True == security['use_sasl']
+ assert True is security['use_sasl']
assert 'GSSAPI' == security['mechanism']
finally:
sentry_site.reset()
diff --git a/desktop/libs/libsolr/src/libsolr/api.py b/desktop/libs/libsolr/src/libsolr/api.py
index d8cf488f277..b2ed89337de 100644
--- a/desktop/libs/libsolr/src/libsolr/api.py
+++ b/desktop/libs/libsolr/src/libsolr/api.py
@@ -16,52 +16,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-from builtins import next
-from builtins import str
-from builtins import zip
-from builtins import object
+import re
import json
import logging
-import re
-import sys
-
+import urllib.error
+import urllib.parse
+import urllib.request
from itertools import groupby
+from urllib.parse import quote as urllib_quote, unquote as urllib_unquote
+
+from django.utils.translation import gettext as _
from dashboard.facet_builder import _compute_range_facet
from dashboard.models import Collection2
-from desktop.lib.exceptions_renderable import PopupException
from desktop.conf import SERVER_USER
+from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.i18n import force_unicode
-from desktop.lib.rest.http_client import HttpClient, RestException
from desktop.lib.rest import resource
-
+from desktop.lib.rest.http_client import HttpClient, RestException
from libsolr.conf import SSL_CERT_CA_VERIFY
-if sys.version_info[0] > 2:
- import urllib.request, urllib.parse, urllib.error
- from urllib.parse import quote as urllib_quote
- from urllib.parse import unquote as urllib_unquote
- new_str = str
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
- from urllib import quote as urllib_quote
- from urllib import unquote as urllib_unquote
- new_str = unicode
-
LOG = logging.getLogger()
try:
- from search.conf import EMPTY_QUERY, SECURITY_ENABLED, SOLR_URL, DOWNLOAD_LIMIT
+ from search.conf import DOWNLOAD_LIMIT, EMPTY_QUERY, SECURITY_ENABLED, SOLR_URL
except ImportError as e:
LOG.warning('Solr Search is not enabled')
def utf_quoter(what):
- return urllib_quote(new_str(what).encode('utf-8'), safe='~@#$&()*!+=;,.?/\'')
+ return urllib_quote(str(what).encode('utf-8'), safe='~@#$&()*!+=;,.?/\'')
class SolrApi(object):
@@ -92,7 +77,6 @@ def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca
if self.security_enabled:
self._root.invoke('HEAD', '/')
-
def query(self, collection, query):
solr_query = {}
json_facets = {}
@@ -192,7 +176,7 @@ def query(self, collection, query):
'numBuckets': True,
'allBuckets': True,
'sort': sort
- #'prefix': '' # Forbidden on numeric fields
+ # 'prefix': '' # Forbidden on numeric fields
})
json_facets[facet['id']] = _f['facet'][dim_key]
elif facet['type'] == 'function':
@@ -200,7 +184,7 @@ def query(self, collection, query):
json_facets[facet['id']] = self._get_aggregate_function(facet['properties']['facets'][0])
if facet['properties']['compare']['is_enabled']:
# TODO: global compare override
- unit = re.split('\d+', facet['properties']['compare']['gap'])[1]
+ unit = re.split(r'\d+', facet['properties']['compare']['gap'])[1]
json_facets[facet['id']] = {
'type': 'range',
'field': collection['timeFilter'].get('field'),
@@ -246,7 +230,7 @@ def query(self, collection, query):
if nested_fields:
fl += urllib_unquote(utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields)))
- if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents
+ if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents
id_field = collection.get('idField', 'id')
params += (
('mlt', 'true'),
@@ -256,8 +240,8 @@ def query(self, collection, query):
('mlt.maxdf', 50),
('mlt.maxntp', 1000),
('mlt.count', 10),
- #('mlt.minwl', 1),
- #('mlt.maxwl', 1),
+ # ('mlt.minwl', 1),
+ # ('mlt.maxwl', 1),
)
fl = '*'
@@ -270,7 +254,7 @@ def query(self, collection, query):
('hl.fragsize', 1000),
)
- #if query.get('timezone'):
+ # if query.get('timezone'):
# params += (('TZ', query.get('timezone')),)
if collection['template']['fieldsSelected']:
@@ -296,7 +280,6 @@ def query(self, collection, query):
return self._get_json(response)
-
def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, can_range=None):
facet = facets[0]
f_name = 'dim_%02d:%s' % (dim, facet['field'])
@@ -324,10 +307,10 @@ def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, ca
'allBuckets': True,
'sort': sort,
'missing': facet.get('missing', False)
- #'prefix': '' # Forbidden on numeric fields
+ # 'prefix': '' # Forbidden on numeric fields
}
if int(facet['mincount']):
- _f[f_name]['mincount'] = int(facet['mincount']) # Forbidden on n > 0 field if mincount = 0
+ _f[f_name]['mincount'] = int(facet['mincount']) # Forbidden on n > 0 field if mincount = 0
if 'start' in facet and not facet.get('type') == 'field':
_f[f_name].update({
@@ -339,14 +322,14 @@ def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, ca
# Only on dim 1 currently
if can_range or (timeFilter and timeFilter['time_field'] == facet['field']
- and (widget['id'] not in timeFilter['time_filter_overrides'])): # or facet['widgetType'] != 'bucket-widget'):
+ and (widget['id'] not in timeFilter['time_filter_overrides'])): # or facet['widgetType'] != 'bucket-widget'):
facet['widgetType'] = widget['widgetType']
_f[f_name].update(self._get_time_filter_query(timeFilter, facet, collection))
if widget['widgetType'] == 'tree2-widget' and facets[-1]['aggregate']['function'] != 'count':
_f['subcount'] = self._get_aggregate_function(facets[-1])
- if len(facets) > 1: # Get n+1 dimension
+ if len(facets) > 1: # Get n+1 dimension
if facets[1]['aggregate']['function'] == 'count':
self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1, timeFilter, collection)
else:
@@ -361,10 +344,9 @@ def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, ca
agg_function = self._get_aggregate_function(_f_agg)
_f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function
else:
- self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter, collection) # Get n+1 dimension
+ self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter, collection) # Get n+1 dimension
break
-
def select(self, collection, query=None, rows=100, start=0):
if query is None:
query = EMPTY_QUERY.get()
@@ -379,7 +361,6 @@ def select(self, collection, query=None, rows=100, start=0):
response = self._root.get('%s/select' % collection, params)
return self._get_json(response)
-
def suggest(self, collection, query):
try:
params = self._get_params() + (
@@ -397,8 +378,7 @@ def suggest(self, collection, query):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
- def collections(self): # To drop, used in indexer v1
+ def collections(self): # To drop, used in indexer v1
try:
params = self._get_params() + (
('detail', 'true'),
@@ -409,7 +389,6 @@ def collections(self): # To drop, used in indexer v1
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def collections2(self):
try:
params = self._get_params() + (
@@ -420,7 +399,6 @@ def collections2(self):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def config(self, name):
try:
params = self._get_params() + (
@@ -431,7 +409,6 @@ def config(self, name):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def configs(self):
try:
params = self._get_params() + (
@@ -442,7 +419,6 @@ def configs(self):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def create_config(self, name, base_config, immutable=False):
try:
params = self._get_params() + (
@@ -456,7 +432,6 @@ def create_config(self, name, base_config, immutable=False):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def delete_config(self, name):
response = {'status': -1, 'message': ''}
@@ -476,7 +451,6 @@ def delete_config(self, name):
raise PopupException(e, title=_('Error while accessing Solr'))
return response
-
def list_aliases(self):
try:
params = self._get_params() + (
@@ -487,14 +461,12 @@ def list_aliases(self):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def collection_or_core(self, hue_collection):
if hue_collection.is_core_only:
return self.core(hue_collection.name)
else:
return self.collection(hue_collection.name)
-
def collection(self, name):
try:
collections = self.collections()
@@ -502,7 +474,6 @@ def collection(self, name):
except Exception as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def create_collection2(self, name, config_name=None, shards=1, replication=1, **kwargs):
try:
params = self._get_params() + (
@@ -528,7 +499,6 @@ def create_collection2(self, name, config_name=None, shards=1, replication=1, **
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def update_config(self, name, properties):
try:
params = self._get_params() + (
@@ -541,7 +511,6 @@ def update_config(self, name, properties):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def add_fields(self, name, fields):
try:
params = self._get_params() + (
@@ -556,7 +525,6 @@ def add_fields(self, name, fields):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def create_core(self, name, instance_dir, shards=1, replication=1):
try:
params = self._get_params() + (
@@ -579,7 +547,6 @@ def create_core(self, name, instance_dir, shards=1, replication=1):
else:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def create_alias(self, name, collections):
try:
params = self._get_params() + (
@@ -597,7 +564,6 @@ def create_alias(self, name, collections):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def delete_alias(self, name):
try:
params = self._get_params() + (
@@ -614,7 +580,6 @@ def delete_alias(self, name):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def delete_collection(self, name):
response = {'status': -1, 'message': ''}
@@ -634,7 +599,6 @@ def delete_collection(self, name):
raise PopupException(e, title=_('Error while accessing Solr'))
return response
-
def remove_core(self, name):
try:
params = self._get_params() + (
@@ -653,7 +617,6 @@ def remove_core(self, name):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def cores(self):
try:
params = self._get_params() + (
@@ -673,7 +636,6 @@ def core(self, core):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def get_schema(self, collection):
try:
params = self._get_params() + (
@@ -775,7 +737,6 @@ def terms(self, core, field, properties=None):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def info_system(self):
try:
params = self._get_params() + (
@@ -787,10 +748,9 @@ def info_system(self):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def sql(self, collection, statement):
try:
- if 'limit' not in statement.lower(): # rows is not supported
+ if 'limit' not in statement.lower(): # rows is not supported
statement = statement + ' LIMIT 100'
params = self._get_params() + (
@@ -818,7 +778,6 @@ def get(self, core, doc_id):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def export(self, name, query, fl, sort, rows=100):
try:
params = self._get_params() + (
@@ -833,7 +792,6 @@ def export(self, name, query, fl, sort, rows=100):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def update(self, collection_or_core_name, data, content_type='csv', version=None, **kwargs):
if content_type == 'csv':
content_type = 'application/csv'
@@ -859,11 +817,10 @@ def update(self, collection_or_core_name, data, content_type='csv', version=None
response = self._root.post('%s/update' % collection_or_core_name, contenttype=content_type, params=params, data=data)
return self._get_json(response)
-
# Deprecated
def aliases(self):
try:
- params = self._get_params() + ( # Waiting for SOLR-4968
+ params = self._get_params() + ( # Waiting for SOLR-4968
('detail', 'true'),
('path', '/aliases.json'),
)
@@ -872,7 +829,6 @@ def aliases(self):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
# Deprecated
def create_collection(self, name, shards=1, replication=1):
try:
@@ -894,7 +850,6 @@ def create_collection(self, name, shards=1, replication=1):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
# Deprecated
def remove_collection(self, name):
try:
@@ -913,7 +868,6 @@ def remove_collection(self, name):
except RestException as e:
raise PopupException(e, title=_('Error while accessing Solr'))
-
def _get_params(self):
if self.security_enabled:
return (('doAs', self._user),)
@@ -975,7 +929,7 @@ def _get_time_filter_query(self, timeFilter, facet, collection):
stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']}
properties['start'] = None
properties['end'] = None
- else: # The user has zoomed in. Only show that section.
+ else: # The user has zoomed in. Only show that section.
stat_facet = {'min': properties['min'], 'max': properties['max']}
_compute_range_facet(facet['widgetType'], stat_facet, props, properties['start'], properties['end'],
SLOTS=properties['slot'])
@@ -996,7 +950,7 @@ def _get_time_filter_query(self, timeFilter, facet, collection):
stat_facet = stats_json['stats']['stats_fields'][facet['field']]
properties['start'] = None
properties['end'] = None
- else: # the user has zoomed in. Only show that section.
+ else: # the user has zoomed in. Only show that section.
stat_facet = {'min': properties['min'], 'max': properties['max']}
_compute_range_facet(facet['widgetType'], stat_facet, props, properties['start'], properties['end'], SLOTS=properties['slot'])
return {
@@ -1028,12 +982,12 @@ def _get_fq(self, collection, query):
for fq in merged_fqs:
if fq['type'] == 'field':
- fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support
+ fields = fq['field'] if type(fq['field']) is list else [fq['field']] # 2D facets support
for field in fields:
f = []
for _filter in fq['filter']:
- values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support
- if fields.index(field) < len(values): # Lowest common field denominator
+ values = _filter['value'] if type(_filter['value']) is list else [_filter['value']] # 2D facets support
+ if fields.index(field) < len(values): # Lowest common field denominator
value = values[fields.index(field)]
if value or value is False:
exclude = '-' if _filter['exclude'] else ''
@@ -1042,7 +996,7 @@ def _get_fq(self, collection, query):
f.append('%s%s:"%s"' % (exclude, field, value))
else:
f.append('%s{!field f=%s}%s' % (exclude, field, value))
- else: # Handle empty value selection that are returned using solr facet.missing
+ else: # Handle empty value selection that are returned using solr facet.missing
value = "*"
exclude = '-'
f.append('%s%s:%s' % (exclude, field, value))
@@ -1070,7 +1024,6 @@ def _get_fq(self, collection, query):
return params
-
def _get_dimension_aggregates(self, facets):
aggregates = []
for agg in facets:
@@ -1080,14 +1033,12 @@ def _get_dimension_aggregates(self, facets):
return aggregates
return aggregates
-
def _get_nested_fields(self, collection):
if collection and collection.get('nested') and collection['nested']['enabled']:
return [field['filter'] for field in self._flatten_schema(collection['nested']['schema']) if field['selected']]
else:
return []
-
def _flatten_schema(self, level):
fields = []
for field in level:
@@ -1096,20 +1047,18 @@ def _flatten_schema(self, level):
fields.extend(self._flatten_schema(field['values']))
return fields
-
@classmethod
def _get_json(cls, response):
- if type(response) != dict:
+ if type(response) is not dict:
# Got 'plain/text' mimetype instead of 'application/json'
try:
response = json.loads(response)
except ValueError as e:
# Got some null bytes in the response
- LOG.error('%s: %s' % (new_str(e), repr(response)))
+ LOG.error('%s: %s' % (str(e), repr(response)))
response = json.loads(response.replace('\x00', ''))
return response
-
def uniquekey(self, collection):
try:
params = self._get_params() + (
@@ -1123,12 +1072,12 @@ def uniquekey(self, collection):
GAPS = {
'5MINUTES': {
- 'histogram-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
- 'timeline-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
- 'bucket-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
- 'bar-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
- 'facet-widget': {'coeff': '+1', 'unit': 'MINUTES'}, # ~10 slots
- 'pie-widget': {'coeff': '+1', 'unit': 'MINUTES'} # ~10 slots
+ 'histogram-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
+ 'timeline-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
+ 'bucket-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
+ 'bar-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
+ 'facet-widget': {'coeff': '+1', 'unit': 'MINUTES'}, # ~10 slots
+ 'pie-widget': {'coeff': '+1', 'unit': 'MINUTES'} # ~10 slots
},
'30MINUTES': {
'histogram-widget': {'coeff': '+20', 'unit': 'SECONDS'},
diff --git a/desktop/libs/libsolr/src/libsolr/conf.py b/desktop/libs/libsolr/src/libsolr/conf.py
index dafd60e978b..fa3fc3ef8ec 100644
--- a/desktop/libs/libsolr/src/libsolr/conf.py
+++ b/desktop/libs/libsolr/src/libsolr/conf.py
@@ -15,23 +15,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
import logging
-import sys
+from urllib.parse import urlparse
+
+from django.utils.translation import gettext_lazy as _t
-from desktop.lib.conf import Config, coerce_bool
from desktop.conf import default_ssl_validate
+from desktop.lib.conf import Config, coerce_bool
from libzookeeper.conf import ENSEMBLE
-if sys.version_info[0] > 2:
- from urllib.parse import urlparse
- new_str = str
- from django.utils.translation import gettext_lazy as _t
-else:
- from django.utils.translation import ugettext_lazy as _t
- from urlparse import urlparse
-
LOG = logging.getLogger()
@@ -49,9 +41,9 @@ def zkensemble_path():
"""
try:
parsed = urlparse(ENSEMBLE.get())
- if parsed.port == 9983: # Standalone Solr cloud
+ if parsed.port == 9983: # Standalone Solr cloud
return ''
- except:
+ except Exception:
LOG.warning('Failed to get Zookeeper ensemble path')
return '/solr'
diff --git a/desktop/libs/libzookeeper/src/libzookeeper/conf.py b/desktop/libs/libzookeeper/src/libzookeeper/conf.py
index 173e89d4667..dc902258513 100644
--- a/desktop/libs/libzookeeper/src/libzookeeper/conf.py
+++ b/desktop/libs/libzookeeper/src/libzookeeper/conf.py
@@ -15,19 +15,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
import logging
-import sys
+from urllib.parse import urlparse
from desktop.lib.conf import Config, coerce_string
-if sys.version_info[0] > 2:
- from urllib.parse import urlparse
- new_str = str
-else:
- from urlparse import urlparse
-
LOG = logging.getLogger()
@@ -45,7 +37,7 @@ def zkensemble():
clusters = CLUSTERS.get()
if clusters['default'].HOST_PORTS.get() != 'localhost:2181':
return '%s' % clusters['default'].HOST_PORTS.get()
- except:
+ except Exception:
LOG.warning('Could not get zookeeper ensemble from the zookeeper app')
if 'search' in settings.INSTALLED_APPS:
@@ -53,20 +45,20 @@ def zkensemble():
from search.conf import SOLR_URL
parsed = urlparse(SOLR_URL.get())
return "%s:2181" % (parsed.hostname or 'localhost')
- except:
+ except Exception:
LOG.warning('Could not get zookeeper ensemble from the search app')
return "localhost:2181"
-ENSEMBLE=Config(
+ENSEMBLE = Config(
"ensemble",
help="ZooKeeper ensemble. Comma separated list of Host/Port, e.g. localhost:2181,localhost:2182,localhost:2183",
dynamic_default=zkensemble,
type=coerce_string,
)
-PRINCIPAL_NAME=Config(
+PRINCIPAL_NAME = Config(
"principal_name",
help="Name of Kerberos principal when using security",
default="zookeeper",
diff --git a/desktop/libs/metadata/src/metadata/analytic_db_api.py b/desktop/libs/metadata/src/metadata/analytic_db_api.py
index 916bbf0f6ac..f55a2b98d8c 100644
--- a/desktop/libs/metadata/src/metadata/analytic_db_api.py
+++ b/desktop/libs/metadata/src/metadata/analytic_db_api.py
@@ -15,21 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import sys
+import logging
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
from desktop.lib.django_util import JsonResponse
from desktop.lib.i18n import force_unicode
from notebook.connectors.altus import AnalyticDbApi, DataWarehouse2Api
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
diff --git a/desktop/libs/metadata/src/metadata/assistant/queries_utils.py b/desktop/libs/metadata/src/metadata/assistant/queries_utils.py
index a3e751b1c01..a5baa84077a 100644
--- a/desktop/libs/metadata/src/metadata/assistant/queries_utils.py
+++ b/desktop/libs/metadata/src/metadata/assistant/queries_utils.py
@@ -15,18 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-import json
-import sys
import os
+import sys
+import json
+import logging
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from django.utils.translation import gettext as _
LOG = logging.getLogger()
+
def get_all_queries():
with open(os.path.join(os.path.dirname(__file__), 'data/queries.json')) as file:
queries = json.load(file)
diff --git a/desktop/libs/metadata/src/metadata/catalog/atlas_client.py b/desktop/libs/metadata/src/metadata/catalog/atlas_client.py
index b0e814a7275..a5b4ea6e747 100644
--- a/desktop/libs/metadata/src/metadata/catalog/atlas_client.py
+++ b/desktop/libs/metadata/src/metadata/catalog/atlas_client.py
@@ -16,23 +16,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import json
-import logging
import re
import sys
+import json
+import logging
+
import requests
+from django.utils.translation import gettext as _
from desktop.lib.exceptions_renderable import raise_popup_exception
from desktop.lib.rest import resource
from desktop.lib.rest.http_client import HttpClient, RestException
-
+from metadata.catalog.base import Api, CatalogApiException, CatalogAuthException, CatalogEntityDoesNotExistException
from metadata.conf import CATALOG, get_catalog_search_cluster
-from metadata.catalog.base import CatalogAuthException, CatalogApiException, CatalogEntityDoesNotExistException, Api
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
LOG = logging.getLogger()
@@ -59,9 +55,9 @@ class AtlasApi(Api):
'hive_column': 'FIELD'
}
- CLASSIFICATION_RE = re.compile('(?:tag|tags|classification)\s*\:\s*(?:(?:\"([^"]+)\")|([^ ]+))\s*', re.IGNORECASE)
- TYPE_RE = re.compile('type\s*\:\s*([^ ]+)\s*', re.IGNORECASE)
- OWNER_RE = re.compile('owner\s*\:\s*([^ ]+)\s*', re.IGNORECASE)
+ CLASSIFICATION_RE = re.compile('(?:tag|tags|classification)\\s*\\:\\s*(?:(?:\"([^"]+)\")|([^ ]+))\\s*', re.IGNORECASE)
+ TYPE_RE = re.compile(r'type\s*\:\s*([^ ]+)\s*', re.IGNORECASE)
+ OWNER_RE = re.compile(r'owner\s*\:\s*([^ ]+)\s*', re.IGNORECASE)
def __init__(self, user=None):
super(AtlasApi, self).__init__(user)
@@ -94,13 +90,12 @@ def __init__(self, user=None):
elif self._password:
self._client.set_basic_auth(self._username, self._password)
- self._root = resource.Resource(self._client, urlencode=False) # For search_entities_interactive
+ self._root = resource.Resource(self._client, urlencode=False) # For search_entities_interactive
self.__headers = {}
self.__params = ()
- #self._fillup_properties() # Disabled currently
-
+ # self._fillup_properties() # Disabled currently
def _get_types_from_sources(self, sources):
default_entity_types = entity_types = ('DATABASE', 'TABLE', 'PARTITION', 'FIELD', 'FILE', 'VIEW', 'S3BUCKET', 'OPERATION', 'DIRECTORY')
@@ -124,16 +119,16 @@ def adapt_atlas_entity_to_navigator(self, atlas_entity):
"description": atlas_entity['attributes'].get('description'),
"identity": atlas_entity['guid'],
"internalType": atlas_entity['typeName'],
- "meaningNames": atlas_entity['meaningNames'], # Atlas specific
- "meanings": atlas_entity['meanings'], # Atlas specific
+ "meaningNames": atlas_entity['meaningNames'], # Atlas specific
+ "meanings": atlas_entity['meanings'], # Atlas specific
"name": atlas_entity['attributes'].get('name'),
"original_name": atlas_entity['attributes'].get('name'),
"originalDescription": None,
"originalName": atlas_entity['attributes'].get('name'),
"owner": atlas_entity['attributes'].get('owner'),
- "parentPath": '', # Set below
- "properties": {}, # Set below
- "sourceType": '', # Set below
+ "parentPath": '', # Set below
+ "properties": {}, # Set below
+ "sourceType": '', # Set below
"classifications": [],
"tags": atlas_entity['classificationNames'],
"type": self.ATLAS_TO_NAV_TYPE.get(atlas_entity['typeName'].lower()) or atlas_entity['typeName']
@@ -198,7 +193,7 @@ def fetch_single_entity(self, dsl_query):
atlas_response = self._root.get('/v2/search/dsl?query=%s' % dsl_query, headers=self.__headers,
params=self.__params)
- if not 'entities' in atlas_response or len(atlas_response['entities']) < 1:
+ if 'entities' not in atlas_response or len(atlas_response['entities']) < 1:
raise CatalogEntityDoesNotExistException('Could not find entity with query: %s' % dsl_query)
for atlas_entity in atlas_response['entities']:
@@ -413,7 +408,6 @@ def get_entity(self, entity_id):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def update_entity(self, entity, **metadata):
"""
PUT /api/v3/entities/:id
@@ -438,7 +432,6 @@ def update_entity(self, entity, **metadata):
else:
raise raise_popup_exception('Failed to update entity', detail=e)
-
def get_cluster_source_ids(self):
return []
# params = (
@@ -449,14 +442,12 @@ def get_cluster_source_ids(self):
# LOG.info(params)
# return self._root.get('entities', headers=self.__headers, params=params)
-
def add_tags(self, entity_id, tags):
entity = self.get_entity(entity_id)
new_tags = entity['tags'] or []
new_tags.extend(tags)
return self.update_entity(entity, tags=new_tags)
-
def delete_tags(self, entity_id, tags):
entity = self.get_entity(entity_id)
new_tags = entity['tags'] or []
@@ -465,7 +456,6 @@ def delete_tags(self, entity_id, tags):
new_tags.remove(tag)
return self.update_entity(entity, tags=new_tags)
-
def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None):
entity = self.get_entity(entity_id)
@@ -479,7 +469,6 @@ def update_properties(self, entity_id, properties, modified_custom_metadata=None
del properties['properties'][key]
return self.update_entity(entity, **properties)
-
def delete_metadata_properties(self, entity_id, property_keys):
entity = self.get_entity(entity_id)
new_props = entity['properties'] or {}
@@ -488,7 +477,6 @@ def delete_metadata_properties(self, entity_id, property_keys):
del new_props[key]
return self.update_entity(entity, properties=new_props)
-
def get_lineage(self, entity_id):
"""
GET /api/v3/lineage/entityIds=:id
@@ -508,7 +496,6 @@ def get_lineage(self, entity_id):
else:
raise raise_popup_exception('Failed to get lineage', detail=e)
-
def create_namespace(self, namespace, description=None):
try:
data = json.dumps({'name': namespace, 'description': description})
@@ -519,7 +506,6 @@ def create_namespace(self, namespace, description=None):
else:
raise raise_popup_exception('Failed to create namespace', detail=e)
-
def get_namespace(self, namespace):
try:
return self._root.get('models/namespaces/%(namespace)s' % {'namespace': namespace})
@@ -529,7 +515,6 @@ def get_namespace(self, namespace):
else:
raise raise_popup_exception('Failed to get namespace', detail=e)
-
def create_namespace_property(self, namespace, properties):
try:
data = json.dumps(properties)
@@ -541,7 +526,6 @@ def create_namespace_property(self, namespace, properties):
else:
raise raise_popup_exception('Failed to create namespace', detail=e)
-
def get_namespace_properties(self, namespace):
try:
return self._root.get('models/namespaces/%(namespace)s/properties' % {'namespace': namespace})
@@ -551,7 +535,6 @@ def get_namespace_properties(self, namespace):
else:
raise raise_popup_exception('Failed to create namespace', detail=e)
-
def map_namespace_property(self, clazz, properties):
try:
data = json.dumps(properties)
@@ -563,7 +546,6 @@ def map_namespace_property(self, clazz, properties):
else:
raise raise_popup_exception('Failed to map class', detail=e)
-
def get_model_properties_mapping(self):
try:
return self._root.get('models/properties/mappings')
@@ -573,7 +555,6 @@ def get_model_properties_mapping(self):
else:
raise raise_popup_exception('Failed to get models properties mappings', detail=e)
-
def _fillup_properties(self):
global _HAS_CATALOG_NAMESPACE
@@ -591,7 +572,7 @@ def _fillup_properties(self):
"description": "List of Hue document UUIDs related to this entity",
"multiValued": True,
"maxLength": 36,
- "pattern": ".*", # UUID
+ "pattern": ".*", # UUID
"enumValues": None,
"type": "TEXT"
})
@@ -605,7 +586,6 @@ def _fillup_properties(self):
_HAS_CATALOG_NAMESPACE = True
-
def _get_boosted_term(self, term):
return 'AND'.join([
'(%s)' % 'OR'.join(['(%s:%s*^%s)' % (field, term, weight)
@@ -619,6 +599,5 @@ def _get_boosted_term(self, term):
def _clean_path(self, path):
return path.rstrip('/').split('/')[-1], self._escape_slashes(path.rstrip('/'))
-
def _escape_slashes(self, s):
- return s.replace('/', '\/')
+ return s.replace('/', r'\/')
diff --git a/desktop/libs/metadata/src/metadata/catalog/base.py b/desktop/libs/metadata/src/metadata/catalog/base.py
index 89d75b210c0..2b9c1e78955 100644
--- a/desktop/libs/metadata/src/metadata/catalog/base.py
+++ b/desktop/libs/metadata/src/metadata/catalog/base.py
@@ -18,13 +18,10 @@
import sys
from builtins import object
-from desktop.lib.exceptions_renderable import PopupException
-from desktop.lib.i18n import smart_unicode
+from django.utils.translation import gettext as _
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from desktop.lib.exceptions_renderable import PopupException
+from desktop.lib.i18n import smart_str
def get_api(request, interface):
@@ -50,7 +47,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
class CatalogEntityDoesNotExistException(Exception):
@@ -61,7 +58,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
class CatalogAuthException(Exception):
@@ -72,7 +69,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
# Base API
@@ -88,28 +85,22 @@ def search_entities_interactive(self, query_s=None, limit=100, **filters):
"""For the top search"""
return {}
-
def find_entity(self, source_type, type, name, **filters):
"""e.g. From a database and table name, retrieve the enity id"""
return {}
-
def get_entity(self, entity_id):
return {}
-
def update_entity(self, entity, **metadata):
return {}
-
def add_tags(self, entity_id, tags):
return {}
-
def delete_tags(self, entity_id, tags):
return {}
-
def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None):
"""For updating entity comments or other attributes"""
return {}
@@ -119,26 +110,21 @@ def update_properties(self, entity_id, properties, modified_custom_metadata=None
def get_database(self, name):
return self.find_entity(source_type='HIVE', type='DATABASE', name=name)
-
def get_table(self, database_name, table_name, is_view=False):
- parent_path = '\/%s' % database_name
+ parent_path = r'\/%s' % database_name
return self.find_entity(source_type='HIVE', type='VIEW' if is_view else 'TABLE', name=table_name, parentPath=parent_path)
-
def get_field(self, database_name, table_name, field_name):
- parent_path = '\/%s\/%s' % (database_name, table_name)
+ parent_path = r'\/%s\/%s' % (database_name, table_name)
return self.find_entity(source_type='HIVE', type='FIELD', name=field_name, parentPath=parent_path)
-
def get_partition(self, database_name, table_name, partition_spec):
raise NotImplementedError
-
def get_directory(self, path):
dir_name, dir_path = self._clean_path(path)
return self.find_entity(source_type='HDFS', type='DIRECTORY', name=dir_name, fileSystemPath=dir_path)
-
def get_file(self, path):
file_name, file_path = self._clean_path(path)
return self.find_entity(source_type='HDFS', type='FILE', name=file_name, fileSystemPath=file_path)
diff --git a/desktop/libs/metadata/src/metadata/catalog/dummy_client.py b/desktop/libs/metadata/src/metadata/catalog/dummy_client.py
index 1447fc9c53c..2ae64738414 100644
--- a/desktop/libs/metadata/src/metadata/catalog/dummy_client.py
+++ b/desktop/libs/metadata/src/metadata/catalog/dummy_client.py
@@ -16,16 +16,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import sys
+import logging
-from metadata.catalog.base import Api
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from django.utils.translation import gettext as _
+from metadata.catalog.base import Api
LOG = logging.getLogger()
@@ -35,32 +31,25 @@ class DummyClient(Api):
def __init__(self, user=None):
self.user = user
-
def search_entities_interactive(self, query_s=None, limit=100, **filters):
return {u'highlighting': {u'27': {u'sourceType': [u'HIVE'], u'originalName': [u'sample_08'], u'owner': [u'admin'], u'type': [u'TABLE'], u'fileSystemPath': [u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_08'], u'internalType': [u'hv_table']}, u'1144700': {u'sourceType': [u'HIVE'], u'originalName': [u'sample_07_parquet'], u'owner': [u'admin'], u'type': [u'TABLE'], u'fileSystemPath': [u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07_parquet'], u'internalType': [u'hv_table']}, u'22': {u'sourceType': [u'HIVE'], u'description': [u'Job data'], u'originalName': [u'sample_07'], u'owner': [u'admin'], u'type': [u'TABLE'], u'fileSystemPath': [u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07'], u'internalType': [u'hv_table']}}, u'facets': {}, u'qtime': 1339, u'facetRanges': [], u'results': [{u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_08', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': None, u'inputFormat': u'org.apache.hadoop.mapred.TextInputFormat', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_08', u'dd': u'xx'}, u'identity': u'27', u'outputFormat': u'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##503', u'created': u'2018-03-30T17:14:44.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_08', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}, {u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07_parquet', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': None, u'inputFormat': u'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07_parquet'}, u'identity': u'1144700', u'outputFormat': u'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##718', u'created': u'2018-04-17T06:16:17.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_07_parquet', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}, {u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': u'Job data', u'inputFormat': u'org.apache.hadoop.mapred.TextInputFormat', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07'}, u'identity': u'22', u'outputFormat': u'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##503', u'created': u'2018-03-30T17:14:42.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_07', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}], u'totalMatched': 3, u'limit': 45, u'offset': 0}
-
def find_entity(self, source_type, type, name, **filters):
return [{u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': u'Job data', u'inputFormat': u'org.apache.hadoop.mapred.TextInputFormat', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07'}, u'identity': u'22', u'outputFormat': u'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##503', u'created': u'2018-03-30T17:14:42.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_07', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}]
-
def get_entity(self, entity_id):
return {u'customProperties': None, u'deleteTime': None, u'description': None, u'dataType': u'int', u'type': u'FIELD', u'internalType': u'hv_column', u'sourceType': u'HIVE', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'originalDescription': None, u'metaClassName': u'hv_column', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07'}, u'identity': u'26', u'firstClassParentId': u'22', u'name': None, u'extractorRunId': u'8##1', u'sourceId': u'8', u'packageName': u'nav', u'parentPath': u'/default/sample_07', u'originalName': u'total_emp'}
-
def update_entity(self, entity, **metadata):
return {}
-
def add_tags(self, entity_id, tags):
# Return entity but not used currently
return {u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': u'Job data', u'inputFormat': u'org.apache.hadoop.mapred.TextInputFormat', u'tags': [u'usage'], u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07'}, u'identity': u'22', u'outputFormat': u'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##503', u'created': u'2018-03-30T17:14:42.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_07', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}
-
def delete_tags(self, entity_id, tags):
return {}
-
def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None):
# For updating comments of table or columns
# Returning the entity but not used currently
diff --git a/desktop/libs/metadata/src/metadata/catalog/navigator_client.py b/desktop/libs/metadata/src/metadata/catalog/navigator_client.py
index 437f7407fdd..82ce67f6d57 100644
--- a/desktop/libs/metadata/src/metadata/catalog/navigator_client.py
+++ b/desktop/libs/metadata/src/metadata/catalog/navigator_client.py
@@ -16,32 +16,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import json
-import logging
import re
import sys
-
+import json
+import logging
from itertools import islice
from django.core.cache import cache
+from django.utils.translation import gettext as _
from desktop.lib.rest import resource
-from desktop.lib.rest.unsecure_http_client import UnsecureHttpClient
from desktop.lib.rest.http_client import RestException
-
+from desktop.lib.rest.unsecure_http_client import UnsecureHttpClient
from hadoop.conf import HDFS_CLUSTERS
from libsentry.privilege_checker import get_checker
from libsentry.sentry_site import get_hive_sentry_provider
-
+from metadata.catalog.base import Api, CatalogApiException, CatalogAuthException, CatalogEntityDoesNotExistException
from metadata.conf import NAVIGATOR, get_navigator_auth_password, get_navigator_auth_username
-from metadata.catalog.base import CatalogAuthException, CatalogApiException, CatalogEntityDoesNotExistException, Api
from metadata.metadata_sites import get_navigator_hue_server_name
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
VERSION = 'v9'
_JSON_CONTENT_TYPE = 'application/json'
@@ -68,7 +61,7 @@ def get_cluster_source_ids(api):
else:
# 0 means always false
cluster_source_ids = 'sourceId:0 AND'
- cache.set(CLUSTER_SOURCE_IDS_CACHE_KEY, cluster_source_ids, 60 * 60 * 12) # 1/2 Day
+ cache.set(CLUSTER_SOURCE_IDS_CACHE_KEY, cluster_source_ids, 60 * 60 * 12) # 1/2 Day
return cluster_source_ids
@@ -99,13 +92,12 @@ def __init__(self, user=None):
# Navigator does not support Kerberos authentication while other components usually requires it
self._client = UnsecureHttpClient(self._api_url, logger=LOG)
self._client.set_basic_auth(self._username, self._password)
- self._root = resource.Resource(self._client, urlencode=False) # For search_entities_interactive
+ self._root = resource.Resource(self._client, urlencode=False) # For search_entities_interactive
self.__headers = {}
self.__params = {}
- #self._fillup_properties() # Disabled currently
-
+ # self._fillup_properties() # Disabled currently
def _get_types_from_sources(self, sources):
default_entity_types = entity_types = ('DATABASE', 'TABLE', 'PARTITION', 'FIELD', 'FILE', 'VIEW', 'S3BUCKET', 'OPERATION', 'DIRECTORY')
@@ -122,8 +114,7 @@ def _get_types_from_sources(self, sources):
return default_entity_types, entity_types
-
- def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFields=None, facetPrefix=None,
+ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFields=None, facetPrefix=None,
facetRanges=None, filterQueries=None, firstClassEntitiesOnly=None, sources=None):
try:
pagination = {
@@ -234,7 +225,7 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi
fq_type = default_entity_types
filterQueries.append('sourceType:s3')
- if query_s.strip().endswith('type:*'): # To list all available types
+ if query_s.strip().endswith('type:*'): # To list all available types
fq_type = entity_types
search_terms = [term for term in query_s.strip().split()] if query_s else []
@@ -244,8 +235,8 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi
query.append(self._get_boosted_term(term))
else:
name, val = term.split(':')
- if val: # Allow to type non default types, e.g for SQL: type:FIEL*
- if name == 'type': # Make sure type value still makes sense for the source
+ if val: # Allow to type non default types, e.g for SQL: type:FIEL*
+ if name == 'type': # Make sure type value still makes sense for the source
term = '%s:%s' % (name, val.upper())
fq_type = entity_types
if name.lower() not in ['type', 'tags', 'owner', 'originalname', 'originaldescription', 'lastmodifiedby']:
@@ -264,7 +255,7 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi
if source_ids:
body['query'] = source_ids + '(' + body['query'] + ')'
- body['facetFields'] = facetFields or [] # Currently mandatory in API
+ body['facetFields'] = facetFields or [] # Currently mandatory in API
if facetPrefix:
body['facetPrefix'] = facetPrefix
if facetRanges:
@@ -283,7 +274,7 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi
clear_cookies=True
)
- response['results'] = list(islice(self._secure_results(response['results']), limit)) # Apply Sentry perms
+ response['results'] = list(islice(self._secure_results(response['results']), limit)) # Apply Sentry perms
return response
except RestException as e:
@@ -293,8 +284,6 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi
else:
raise CatalogApiException(e.message)
-
-
def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filters):
"""
Solr edismax query parser syntax.
@@ -330,8 +319,8 @@ def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filte
if val:
if name == 'type':
term = '%s:%s' % (name, val.upper().strip('*'))
- default_entity_types = entity_types # Make sure type value still makes sense for the source
- user_filters.append(term + '*') # Manual filter allowed e.g. type:VIE* ca
+ default_entity_types = entity_types # Make sure type value still makes sense for the source
+ user_filters.append(term + '*') # Manual filter allowed e.g. type:VIE* ca
filter_query = '*'
@@ -365,7 +354,7 @@ def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filte
LOG.info(params)
response = self._root.get('entities', headers=self.__headers, params=params)
- response = list(islice(self._secure_results(response), limit)) # Apply Sentry perms
+ response = list(islice(self._secure_results(response), limit)) # Apply Sentry perms
return response
except RestException as e:
@@ -375,7 +364,6 @@ def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filte
else:
raise CatalogApiException(e)
-
def _secure_results(self, results, checker=None):
# TODO: to move directly to Catalog API
if NAVIGATOR.APPLY_SENTRY_PERMISSIONS.get():
@@ -402,7 +390,6 @@ def getkey(result):
else:
return results
-
def suggest(self, prefix=None):
try:
return self._root.get('interactive/suggestions?query=%s' % (prefix or '*'))
@@ -411,7 +398,6 @@ def suggest(self, prefix=None):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def find_entity(self, source_type, type, name, **filters):
"""
GET /api/v3/entities?query=((sourceType:)AND(type:)AND(originalName:))
@@ -431,7 +417,7 @@ def find_entity(self, source_type, type, name, **filters):
filter_query = 'AND'.join('(%s:%s)' % (key, value) for key, value in list(query_filters.items()))
filter_query = '%(type)s AND %(filter_query)s' % {
- 'type': '(type:%s)' % 'TABLE OR type:VIEW' if type == 'TABLE' else type, # Impala don't always say that a table is actually a view
+ 'type': '(type:%s)' % 'TABLE OR type:VIEW' if type == 'TABLE' else type, # Impala don't always say that a table is actually a view
'filter_query': filter_query
}
@@ -458,7 +444,6 @@ def find_entity(self, source_type, type, name, **filters):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def get_entity(self, entity_id):
"""
GET /api/v3/entities/:id
@@ -471,7 +456,6 @@ def get_entity(self, entity_id):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def update_entity(self, entity, **metadata):
"""
PUT /api/v3/entities/:id
@@ -502,7 +486,6 @@ def update_entity(self, entity, **metadata):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def get_cluster_source_ids(self):
params = {
'query': 'clusterName:"%s"' % get_navigator_hue_server_name(),
@@ -512,14 +495,12 @@ def get_cluster_source_ids(self):
LOG.info(params)
return self._root.get('entities', headers=self.__headers, params=params)
-
def add_tags(self, entity_id, tags):
entity = self.get_entity(entity_id)
new_tags = entity['tags'] or []
new_tags.extend(tags)
return self.update_entity(entity, tags=new_tags)
-
def delete_tags(self, entity_id, tags):
entity = self.get_entity(entity_id)
new_tags = entity['tags'] or []
@@ -528,7 +509,6 @@ def delete_tags(self, entity_id, tags):
new_tags.remove(tag)
return self.update_entity(entity, tags=new_tags)
-
def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None):
entity = self.get_entity(entity_id)
@@ -542,7 +522,6 @@ def update_properties(self, entity_id, properties, modified_custom_metadata=None
del properties['properties'][key]
return self.update_entity(entity, **properties)
-
def delete_metadata_properties(self, entity_id, property_keys):
entity = self.get_entity(entity_id)
new_props = entity['properties'] or {}
@@ -551,7 +530,6 @@ def delete_metadata_properties(self, entity_id, property_keys):
del new_props[key]
return self.update_entity(entity, properties=new_props)
-
def get_lineage(self, entity_id):
"""
GET /api/v3/lineage/entityIds=:id
@@ -570,7 +548,6 @@ def get_lineage(self, entity_id):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def create_namespace(self, namespace, description=None):
try:
data = json.dumps({'name': namespace, 'description': description})
@@ -580,7 +557,6 @@ def create_namespace(self, namespace, description=None):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def get_namespace(self, namespace):
try:
return self._root.get('models/namespaces/%(namespace)s' % {'namespace': namespace})
@@ -589,7 +565,6 @@ def get_namespace(self, namespace):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def create_namespace_property(self, namespace, properties):
try:
data = json.dumps(properties)
@@ -605,7 +580,6 @@ def create_namespace_property(self, namespace, properties):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def get_namespace_properties(self, namespace):
try:
return self._root.get('models/namespaces/%(namespace)s/properties' % {'namespace': namespace})
@@ -614,14 +588,13 @@ def get_namespace_properties(self, namespace):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def map_namespace_property(self, clazz, properties):
try:
data = json.dumps(properties)
return self._root.post(
'models/packages/nav/classes/%(class)s/properties' % {'class': clazz},
- data=data,
- contenttype=_JSON_CONTENT_TYPE,
+ data=data,
+ contenttype=_JSON_CONTENT_TYPE,
clear_cookies=True
)
@@ -630,7 +603,6 @@ def map_namespace_property(self, clazz, properties):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def get_model_properties_mapping(self):
try:
return self._root.get('models/properties/mappings')
@@ -639,7 +611,6 @@ def get_model_properties_mapping(self):
LOG.error(msg)
raise CatalogApiException(e.message)
-
def _fillup_properties(self):
global _HAS_CATALOG_NAMESPACE
@@ -657,7 +628,7 @@ def _fillup_properties(self):
"description": "List of Hue document UUIDs related to this entity",
"multiValued": True,
"maxLength": 36,
- "pattern": ".*", # UUID
+ "pattern": ".*", # UUID
"enumValues": None,
"type": "TEXT"
})
@@ -671,7 +642,6 @@ def _fillup_properties(self):
_HAS_CATALOG_NAMESPACE = True
-
def _get_boosted_term(self, term):
return 'AND'.join([
# Matching fields
@@ -683,10 +653,8 @@ def _get_boosted_term(self, term):
# Could add certain customProperties and properties
])
-
def _clean_path(self, path):
return path.rstrip('/').split('/')[-1], self._escape_slashes(path.rstrip('/'))
-
def _escape_slashes(self, s):
- return s.replace('/', '\/')
+ return s.replace('/', r'\/')
diff --git a/desktop/libs/metadata/src/metadata/catalog_api.py b/desktop/libs/metadata/src/metadata/catalog_api.py
index 9293c2a3d72..27b5636a8ef 100644
--- a/desktop/libs/metadata/src/metadata/catalog_api.py
+++ b/desktop/libs/metadata/src/metadata/catalog_api.py
@@ -16,30 +16,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import next
-import json
-import logging
import re
import sys
-
+import json
+import logging
+from builtins import next
from collections import OrderedDict
from django.http import Http404
from django.utils.html import escape
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
from desktop.lib.django_util import JsonResponse
from desktop.lib.i18n import force_unicode, smart_str
-
from metadata.catalog.base import get_api
-from metadata.catalog.navigator_client import CatalogApiException, CatalogEntityDoesNotExistException, CatalogAuthException
-from metadata.conf import has_catalog, CATALOG, has_catalog_file_search, NAVIGATOR
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from metadata.catalog.navigator_client import CatalogApiException, CatalogAuthException, CatalogEntityDoesNotExistException
+from metadata.conf import CATALOG, NAVIGATOR, has_catalog, has_catalog_file_search
LOG = logging.getLogger()
@@ -111,7 +104,7 @@ def search_entities_interactive(request):
sources=sources
)
- if response.get('facets'): # Remove empty facets
+ if response.get('facets'): # Remove empty facets
for fname, fvalues in list(response['facets'].items()):
# Should be a CATALOG option at some point for hidding table with no access / asking for access.
if interface == 'navigator' and NAVIGATOR.APPLY_SENTRY_PERMISSIONS.get():
@@ -122,7 +115,6 @@ def search_entities_interactive(request):
if ':' in query_s and not response['facets'][fname]:
del response['facets'][fname]
-
_augment_highlighting(query_s, response.get('results'))
response['status'] = 0
@@ -189,14 +181,14 @@ def _augment_highlighting(query_s, records):
if record['hue_name'] and record.get('sourceType', '') != 'S3':
record['hue_name'] = (record['hue_name'].replace('/', '.') + '.').lstrip('.')
- record['originalName'] = record['hue_name'] + name # Inserted when selected in autocomplete, full path
- record['selectionName'] = name # Use when hovering / selecting a search result
+ record['originalName'] = record['hue_name'] + name # Inserted when selected in autocomplete, full path
+ record['selectionName'] = name # Use when hovering / selecting a search result
for term in ts:
name = _highlight(term, name)
if record.get('tags'):
_highlight_tags(record, term)
- for fname, fval in fs.items(): # e.g. owner:hue
+ for fname, fval in fs.items(): # e.g. owner:hue
if record.get(fname, ''):
if fname == 'tags':
_highlight_tags(record, fval)
@@ -386,8 +378,8 @@ def delete_tags(request):
def update_properties(request):
interface = request.POST.get('interface', CATALOG.INTERFACE.get())
entity_id = json.loads(request.POST.get('id', '""'))
- properties = json.loads(request.POST.get('properties', '{}')) # Entity properties
- modified_custom_metadata = json.loads(request.POST.get('modifiedCustomMetadata', '{}')) # Aka "Custom Metadata"
+ properties = json.loads(request.POST.get('properties', '{}')) # Entity properties
+ modified_custom_metadata = json.loads(request.POST.get('modifiedCustomMetadata', '{}')) # Aka "Custom Metadata"
deleted_custom_metadata_keys = json.loads(request.POST.get('deletedCustomMetadataKeys', '[]'))
api = get_api(request=request, interface=interface)
@@ -397,7 +389,8 @@ def update_properties(request):
request.audit = {
'allowed': is_allowed,
'operation': '%s_UPDATE_PROPERTIES' % interface.upper(),
- 'operationText': 'Updating custom metadata %s, deleted custom metadata keys %s and properties %s of entity %s' % (modified_custom_metadata, deleted_custom_metadata_keys, properties, entity_id)
+ 'operationText': 'Updating custom metadata %s, deleted custom metadata keys %s and properties %s of entity %s' % (
+ modified_custom_metadata, deleted_custom_metadata_keys, properties, entity_id)
}
if not entity_id:
diff --git a/desktop/libs/metadata/src/metadata/conf.py b/desktop/libs/metadata/src/metadata/conf.py
index f37635f0a35..f455c3e070b 100644
--- a/desktop/libs/metadata/src/metadata/conf.py
+++ b/desktop/libs/metadata/src/metadata/conf.py
@@ -15,24 +15,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import os
import sys
-
+import logging
from subprocess import CalledProcessError
+from django.utils.translation import gettext_lazy as _t
+
from desktop.conf import AUTH_USERNAME as DEFAULT_AUTH_USERNAME, CLUSTER_ID as DEFAULT_CLUSTER_ID
from desktop.lib.conf import Config, ConfigSection, coerce_bool, coerce_password_from_script
from desktop.lib.paths import get_config_root, get_desktop_root
-
-from metadata.settings import DJANGO_APPS
from metadata.catalog import atlas_flags
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t
-else:
- from django.utils.translation import ugettext_lazy as _t
-
+from metadata.settings import DJANGO_APPS
OPTIMIZER_AUTH_PASSWORD = None
NAVIGATOR_AUTH_PASSWORD = None
@@ -44,14 +38,17 @@ def get_auth_username():
"""Get from top level default from desktop"""
return DEFAULT_AUTH_USERNAME.get()
+
def default_catalog_url():
"""Get from main Hue config directory if present"""
return atlas_flags.get_api_url() if atlas_flags.get_api_url() else None
+
def default_catalog_config_dir():
"""Get from usual main Hue config directory"""
return os.environ.get("HUE_CONF_DIR", get_desktop_root("conf")) + '/hive-conf'
+
def default_catalog_interface():
"""Detect if the configured catalog is Navigator or default to Atlas"""
from metadata.metadata_sites import get_navigator_server_url
@@ -62,10 +59,12 @@ def default_catalog_interface():
catalog_interface = 'navigator'
return catalog_interface
+
def default_navigator_config_dir():
"""Get from usual main Hue config directory"""
return get_config_root()
+
def default_navigator_url():
"""Get from usual main Hue config directory"""
from metadata.metadata_sites import get_navigator_server_url
@@ -75,12 +74,15 @@ def default_navigator_url():
def get_optimizer_url():
return OPTIMIZER.HOSTNAME.get() and OPTIMIZER.HOSTNAME.get().strip('/')
+
def has_optimizer():
return OPTIMIZER.INTERFACE.get() != 'navopt' or bool(OPTIMIZER.AUTH_KEY_ID.get())
+
def get_optimizer_mode():
return has_optimizer() and OPTIMIZER.MODE.get() or 'off'
+
def has_workload_analytics():
# Note: unused
return bool(ALTUS.AUTH_KEY_ID.get()) and ALTUS.HAS_WA.get()
@@ -265,9 +267,11 @@ def get_optimizer_password_script():
# Data Catalog
+
def get_catalog_url():
return (CATALOG.API_URL.get() and CATALOG.API_URL.get().strip('/')) or (CATALOG.INTERFACE.get() == 'navigator' and get_navigator_url())
+
def has_catalog(user):
from desktop.auth.backend import is_admin
return (
@@ -276,16 +280,20 @@ def has_catalog(user):
is_admin(user) or user.has_hue_permission(action="access", app=DJANGO_APPS[0])
)
+
def has_readonly_catalog(user):
return has_catalog(user) and not has_navigator(user)
+
def get_catalog_search_cluster():
return CATALOG.SEARCH_CLUSTER.get()
+
def get_kerberos_enabled_default():
'''Use atlas.authentication.method.kerberos if catalog interface is atlas else False '''
return atlas_flags.is_kerberos_enabled() if CATALOG.INTERFACE.get() == 'atlas' else False
+
def get_catalog_server_password_script():
'''Execute script at path'''
return CATALOG.SERVER_PASSWORD_SCRIPT.get()
@@ -356,17 +364,21 @@ def get_catalog_server_password_script():
# Navigator is deprecated over generic Catalog above
+
def get_navigator_url():
return NAVIGATOR.API_URL.get() and NAVIGATOR.API_URL.get().strip('/')[:-3]
+
def has_navigator(user):
from desktop.auth.backend import is_admin
return bool(get_navigator_url() and get_navigator_auth_password()) \
and (is_admin(user) or user.has_hue_permission(action="access", app=DJANGO_APPS[0]))
+
def get_navigator_auth_type():
return NAVIGATOR.AUTH_TYPE.get().lower()
+
def get_navigator_auth_username():
'''Get the username to authenticate with.'''
@@ -377,6 +389,7 @@ def get_navigator_auth_username():
else:
return NAVIGATOR.AUTH_CM_USERNAME.get()
+
def get_navigator_auth_password():
'''Get the password to authenticate with.'''
global NAVIGATOR_AUTH_PASSWORD
@@ -394,18 +407,22 @@ def get_navigator_auth_password():
return NAVIGATOR_AUTH_PASSWORD
+
def get_navigator_cm_password():
'''Get default password from secured file'''
return NAVIGATOR.AUTH_CM_PASSWORD_SCRIPT.get()
+
def get_navigator_ldap_password():
'''Get default password from secured file'''
return NAVIGATOR.AUTH_LDAP_PASSWORD_SCRIPT.get()
+
def get_navigator_saml_password():
'''Get default password from secured file'''
return NAVIGATOR.AUTH_SAML_PASSWORD_SCRIPT.get()
+
def has_catalog_file_search(user):
return has_catalog(user) and NAVIGATOR.ENABLE_FILE_SEARCH.get()
diff --git a/desktop/libs/metadata/src/metadata/dataeng_api.py b/desktop/libs/metadata/src/metadata/dataeng_api.py
index e5cb22b7321..11cc0457196 100644
--- a/desktop/libs/metadata/src/metadata/dataeng_api.py
+++ b/desktop/libs/metadata/src/metadata/dataeng_api.py
@@ -15,23 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-import json
import sys
+import json
+import logging
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
from desktop.lib.django_util import JsonResponse
from desktop.lib.i18n import force_unicode
-
from notebook.connectors.altus import DataEngApi
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
diff --git a/desktop/libs/metadata/src/metadata/manager_api.py b/desktop/libs/metadata/src/metadata/manager_api.py
index a89efeb54f6..5cc1f8dbb04 100644
--- a/desktop/libs/metadata/src/metadata/manager_api.py
+++ b/desktop/libs/metadata/src/metadata/manager_api.py
@@ -16,31 +16,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import json
-import logging
import os
import sys
+import json
+import logging
from django.http import Http404
from django.utils.html import escape
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
from desktop.auth.backend import is_admin
from desktop.lib.django_util import JsonResponse
from desktop.lib.i18n import force_unicode
-from libzookeeper.conf import zkensemble
from indexer.conf import config_morphline_path
-
+from libzookeeper.conf import zkensemble
from metadata.catalog.navigator_client import CatalogApiException
from metadata.conf import has_catalog
from metadata.manager_client import ManagerApi
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -52,7 +46,7 @@ def decorator(*args, **kwargs):
}
try:
- if has_catalog(args[0].user): # TODO
+ if has_catalog(args[0].user): # TODO
return view_fn(*args, **kwargs)
else:
raise CatalogApiException('Navigator API is not configured.')
@@ -125,7 +119,6 @@ def update_flume_config(request):
tier1.sinks.sink1.morphlineId = hue_accesslogs_no_geo
tier1.sinks.sink1.channel = channel1'''
-
morphline_config = open(os.path.join(config_morphline_path(), 'hue_accesslogs_no_geo.morphline.conf')).read()
morphline_config = morphline_config.replace(
'${SOLR_COLLECTION}', 'log_analytics_demo'
@@ -135,8 +128,10 @@ def update_flume_config(request):
responses = {}
- responses['agent_config_file'] = api.update_flume_config(cluster_name=None, config_name='agent_config_file', config_value=flume_agent_config)
- responses['agent_morphlines_conf_file'] = api.update_flume_config(cluster_name=None, config_name='agent_morphlines_conf_file', config_value=morphline_config)
+ responses['agent_config_file'] = api.update_flume_config(
+ cluster_name=None, config_name='agent_config_file', config_value=flume_agent_config)
+ responses['agent_morphlines_conf_file'] = api.update_flume_config(
+ cluster_name=None, config_name='agent_morphlines_conf_file', config_value=morphline_config)
responses['refresh_flume'] = api.refresh_flume(cluster_name=None, restart=True)
diff --git a/desktop/libs/metadata/src/metadata/manager_client.py b/desktop/libs/metadata/src/metadata/manager_client.py
index b40078990fa..411971a419a 100644
--- a/desktop/libs/metadata/src/metadata/manager_client.py
+++ b/desktop/libs/metadata/src/metadata/manager_client.py
@@ -16,29 +16,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-from builtins import object
-import base64
+import sys
import json
+import base64
import logging
-import sys
+from builtins import object
+from urllib.parse import quote as urllib_quote
from django.core.cache import cache
+from django.utils.translation import gettext as _
-from desktop.lib.rest.http_client import RestException, HttpClient
+from desktop.lib.i18n import smart_str
+from desktop.lib.rest.http_client import HttpClient, RestException
from desktop.lib.rest.resource import Resource
-from desktop.lib.i18n import smart_unicode
-
-from metadata.conf import MANAGER, get_navigator_auth_username, get_navigator_auth_password
-
-
-if sys.version_info[0] > 2:
- from urllib.parse import quote as urllib_quote
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
- from urllib import quote as urllib_quote
+from metadata.conf import MANAGER, get_navigator_auth_password, get_navigator_auth_username
LOG = logging.getLogger()
VERSION = 'v19'
@@ -52,7 +43,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
class ManagerApi(object):
@@ -76,7 +67,6 @@ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False):
self._client.set_verify(ssl_cert_ca_verify)
self._root = Resource(self._client)
-
def has_service(self, service_name, cluster_name=None):
cluster = self._get_cluster(cluster_name)
try:
@@ -89,7 +79,6 @@ def has_service(self, service_name, cluster_name=None):
except RestException as e:
raise ManagerApiException(e)
-
def get_spark_history_server_configs(self, cluster_name=None):
service_name = "SPARK_ON_YARN"
shs_role_type = "SPARK_YARN_HISTORY_SERVER"
@@ -103,7 +92,6 @@ def get_spark_history_server_configs(self, cluster_name=None):
service_display_names = [service['displayName'] for service in services if service['type'] == service_name]
-
if service_display_names:
spark_service_display_name = service_display_names[0]
@@ -118,7 +106,8 @@ def get_spark_history_server_configs(self, cluster_name=None):
shs_server_hostId = shs_server_hostRef[0]['hostId'] if shs_server_hostRef else None
if shs_server_name and shs_server_hostId:
- shs_server_configs = self._root.get('clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % {
+ shs_server_configs = self._root.get(
+ 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % {
'cluster_name': cluster['name'],
'spark_service_display_name': spark_service_display_name,
'shs_server_name': shs_server_name
@@ -187,7 +176,6 @@ def tools_echo(self):
except RestException as e:
raise ManagerApiException(e)
-
def get_kafka_brokers(self, cluster_name=None):
try:
@@ -199,7 +187,6 @@ def get_kafka_brokers(self, cluster_name=None):
except RestException as e:
raise ManagerApiException(e)
-
def get_kudu_master(self, cluster_name=None):
try:
cluster = self._get_cluster(cluster_name)
@@ -214,7 +201,6 @@ def get_kudu_master(self, cluster_name=None):
except RestException as e:
raise ManagerApiException(e)
-
def get_kafka_topics(self, broker_host):
try:
client = HttpClient('http://%s:24042' % broker_host, logger=LOG)
@@ -224,14 +210,13 @@ def get_kafka_topics(self, broker_host):
except RestException as e:
raise ManagerApiException(e)
-
def update_flume_config(self, cluster_name, config_name, config_value):
service = 'FLUME-1'
cluster = self._get_cluster(cluster_name)
roleConfigGroup = [role['roleConfigGroupRef']['roleConfigGroupName'] for role in self._get_roles(cluster['name'], service, 'AGENT')]
data = {
u'items': [{
- u'url': u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.'.replace('%(cluster_name)s', urllib_quote(cluster['name'])).replace('%(service)s', service).replace('%(roleConfigGroups)s', roleConfigGroup[0]),
+ u'url': u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.'.replace('%(cluster_name)s', urllib_quote(cluster['name'])).replace('%(service)s', service).replace('%(roleConfigGroups)s', roleConfigGroup[0]), # noqa: E501
u'body': {
u'items': [
{u'name': config_name, u'value': config_value}
@@ -246,11 +231,9 @@ def update_flume_config(self, cluster_name, config_name, config_value):
items=data
)
-
def get_flume_agents(self, cluster_name=None):
return [host['hostname'] for host in self._get_hosts('FLUME', 'AGENT', cluster_name=cluster_name)]
-
def _get_hosts(self, service_name, role_name, cluster_name=None):
try:
cluster = self._get_cluster(cluster_name)
@@ -265,7 +248,6 @@ def _get_hosts(self, service_name, role_name, cluster_name=None):
except RestException as e:
raise ManagerApiException(e)
-
def refresh_flume(self, cluster_name, restart=False):
service = 'FLUME-1'
cluster = self._get_cluster(cluster_name)
@@ -276,13 +258,15 @@ def refresh_flume(self, cluster_name, restart=False):
else:
return self.refresh_configs(cluster['name'], service, roles)
-
def refresh_configs(self, cluster_name, service=None, roles=None):
try:
if service is None:
- return self._root.post('clusters/%(cluster_name)s/commands/refresh' % {'cluster_name': cluster_name}, contenttype="application/json")
+ return self._root.post(
+ 'clusters/%(cluster_name)s/commands/refresh' % {'cluster_name': cluster_name}, contenttype="application/json")
elif roles is None:
- return self._root.post('clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % {'cluster_name': cluster_name, 'service': service}, contenttype="application/json")
+ return self._root.post(
+ 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % {'cluster_name': cluster_name, 'service': service},
+ contenttype="application/json")
else:
return self._root.post(
'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % {'cluster_name': cluster_name, 'service': service},
@@ -292,13 +276,15 @@ def refresh_configs(self, cluster_name, service=None, roles=None):
except RestException as e:
raise ManagerApiException(e)
-
def restart_services(self, cluster_name, service=None, roles=None):
try:
if service is None:
- return self._root.post('clusters/%(cluster_name)s/commands/restart' % {'cluster_name': cluster_name}, contenttype="application/json")
+ return self._root.post(
+ 'clusters/%(cluster_name)s/commands/restart' % {'cluster_name': cluster_name}, contenttype="application/json")
elif roles is None:
- return self._root.post('clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % {'cluster_name': cluster_name, 'service': service}, contenttype="application/json")
+ return self._root.post(
+ 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % {'cluster_name': cluster_name, 'service': service},
+ contenttype="application/json")
else:
return self._root.post(
'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % {'cluster_name': cluster_name, 'service': service},
@@ -308,14 +294,12 @@ def restart_services(self, cluster_name, service=None, roles=None):
except RestException as e:
raise ManagerApiException(e)
-
def batch(self, items):
try:
return self._root.post('batch', data=json.dumps(items), contenttype='application/json')
except RestException as e:
raise ManagerApiException(e)
-
def _get_cluster(self, cluster_name=None):
clusters = self._root.get('clusters/')['items']
@@ -326,12 +310,11 @@ def _get_cluster(self, cluster_name=None):
return cluster
-
def _get_roles(self, cluster_name, service_name, role_type):
- roles = self._root.get('clusters/%(cluster_name)s/services/%(service_name)s/roles' % {'cluster_name': cluster_name, 'service_name': service_name})['items']
+ roles = self._root.get(
+ 'clusters/%(cluster_name)s/services/%(service_name)s/roles' % {'cluster_name': cluster_name, 'service_name': service_name})['items']
return [role for role in roles if role['type'] == role_type]
-
def get_impalad_config(self, key=None, impalad_host=None, cluster_name=None):
if not key or not impalad_host:
return None
@@ -360,11 +343,13 @@ def get_impalad_config(self, key=None, impalad_host=None, cluster_name=None):
'spark_service_display_name': impala_service_display_name
})['items']
- impalad_server_names = [server['name'] for server in servers if server['type'] == role_type and server['hostRef']['hostId'] == impalad_hostId]
+ impalad_server_names = [
+ server['name'] for server in servers if server['type'] == role_type and server['hostRef']['hostId'] == impalad_hostId]
impalad_server_name = impalad_server_names[0] if impalad_server_names else None
if impalad_server_name:
- server_configs = self._root.get('clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % {
+ server_configs = self._root.get(
+ 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % {
'cluster_name': cluster['name'],
'spark_service_display_name': impala_service_display_name,
'shs_server_name': impalad_server_name
diff --git a/desktop/libs/metadata/src/metadata/metadata_sites_tests.py b/desktop/libs/metadata/src/metadata/metadata_sites_tests.py
index 98f3fa19fad..aca62503a00 100644
--- a/desktop/libs/metadata/src/metadata/metadata_sites_tests.py
+++ b/desktop/libs/metadata/src/metadata/metadata_sites_tests.py
@@ -15,22 +15,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import absolute_import
-from builtins import object
-import logging
import os
import shutil
-import sys
+import logging
import tempfile
-from . import metadata_sites
from metadata.conf import NAVIGATOR
from metadata.metadata_sites import get_navigator_server_url
-if sys.version_info[0] > 2:
- open_file = open
-else:
- open_file = file
+from . import metadata_sites
LOG = logging.getLogger()
@@ -44,7 +37,7 @@ def test_navigator_site(self):
]
try:
- open_file(os.path.join(tmpdir, 'navigator.lineage.client.properties'), 'w').write("""
+ open(os.path.join(tmpdir, 'navigator.lineage.client.properties'), 'w').write("""
navigator.client.serviceType=HUE
navigator.server.url=http://hue-rocks.com:7187
navigator.client.roleName=HUE-1-HUE_SERVER-50cf99601c4bf64e9ccded4c8cd96d12
@@ -62,7 +55,6 @@ def test_navigator_site(self):
reset()
shutil.rmtree(tmpdir)
-
def test_missing_navigator_site(self):
tmpdir = tempfile.mkdtemp()
shutil.rmtree(tmpdir)
@@ -74,7 +66,7 @@ def test_missing_navigator_site(self):
try:
metadata_sites.reset()
- assert get_navigator_server_url() == None
+ assert get_navigator_server_url() is None
finally:
metadata_sites.reset()
for reset in resets:
diff --git a/desktop/libs/metadata/src/metadata/optimizer/base.py b/desktop/libs/metadata/src/metadata/optimizer/base.py
index 612fac7e26c..0c51e0a178b 100644
--- a/desktop/libs/metadata/src/metadata/optimizer/base.py
+++ b/desktop/libs/metadata/src/metadata/optimizer/base.py
@@ -18,13 +18,10 @@
import sys
from builtins import object
-from desktop.lib.exceptions_renderable import PopupException
-from desktop.lib.i18n import smart_unicode
+from django.utils.translation import gettext as _
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from desktop.lib.exceptions_renderable import PopupException
+from desktop.lib.i18n import smart_str
def get_api(user, interface):
@@ -49,7 +46,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
def check_privileges(view_func):
diff --git a/desktop/libs/metadata/src/metadata/optimizer/dummy_client.py b/desktop/libs/metadata/src/metadata/optimizer/dummy_client.py
index 9c855b04683..2dc12938d1c 100644
--- a/desktop/libs/metadata/src/metadata/optimizer/dummy_client.py
+++ b/desktop/libs/metadata/src/metadata/optimizer/dummy_client.py
@@ -16,19 +16,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import sys
+import logging
-from desktop.lib.exceptions_renderable import PopupException
+from django.utils.translation import gettext as _
+from desktop.lib.exceptions_renderable import PopupException
from metadata.optimizer.base import Api
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -37,19 +32,15 @@ class DummyClient(Api):
def __init__(self, user, api_url=None, auth_key=None, auth_key_secret=None, tenant_id=None):
self.user = user
-
def get_tenant(self, cluster_id='default'):
pass
-
def upload(self, data, data_type='queries', source_platform='generic', workload_id=None):
pass
-
def upload_status(self, workload_id):
pass
-
def top_tables(self, workfloadId=None, database_name='default', page_size=1000, startingToken=None, connector=None):
data = {
'results': [{
@@ -82,15 +73,12 @@ def top_tables(self, workfloadId=None, database_name='default', page_size=1000,
return data
-
def table_details(self, database_name, table_name, page_size=100, startingToken=None, connector=None):
return {}
-
def query_compatibility(self, source_platform, target_platform, query, page_size=100, startingToken=None, connector=None):
return {}
-
def query_risk(self, query, source_platform, db_name, page_size=100, startingToken=None, connector=None):
hints = []
response = {}
@@ -101,7 +89,6 @@ def query_risk(self, query, source_platform, db_name, page_size=100, startingTok
'noDDL': response.get('noDDL', []),
}
-
def predict(self, query, source_platform, connector):
hints = []
response = {}
@@ -110,23 +97,19 @@ def predict(self, query, source_platform, connector):
'hints': hints,
}
-
def similar_queries(self, source_platform, query, page_size=100, startingToken=None, connector=None):
raise PopupException(_('Call not supported'))
-
def top_filters(self, db_tables=None, page_size=100, startingToken=None, connector=None):
results = {'results': []}
return results
-
def top_aggs(self, db_tables=None, page_size=100, startingToken=None, connector=None):
results = {'results': []}
return results
-
def top_columns(self, db_tables=None, page_size=100, startingToken=None, connector=None):
results = {
'selectColumns': [{
@@ -145,13 +128,11 @@ def top_columns(self, db_tables=None, page_size=100, startingToken=None, connect
return results
-
def top_joins(self, db_tables=None, page_size=100, startingToken=None, connector=None):
results = {'results': []}
return results
-
def top_databases(self, page_size=100, startingToken=None, connector=None):
results = {'results': []}
diff --git a/desktop/libs/metadata/src/metadata/optimizer/optimizer_client.py b/desktop/libs/metadata/src/metadata/optimizer/optimizer_client.py
index e1cba476e54..f0717692543 100644
--- a/desktop/libs/metadata/src/metadata/optimizer/optimizer_client.py
+++ b/desktop/libs/metadata/src/metadata/optimizer/optimizer_client.py
@@ -16,35 +16,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import json
-import logging
import os
import sys
+import json
import time
import uuid
-
+import logging
+from builtins import object
from tempfile import NamedTemporaryFile
from django.core.cache import cache
from django.utils.functional import wraps
+from django.utils.translation import gettext as _
from desktop.auth.backend import is_admin
-from desktop.lib.exceptions_renderable import PopupException
from desktop.lib import export_csvxls
-from desktop.lib.i18n import smart_unicode
+from desktop.lib.exceptions_renderable import PopupException
+from desktop.lib.i18n import smart_str
from desktop.lib.rest.http_client import RestException
+from libsentry.privilege_checker import MissingSentryPrivilegeException, get_checker
from libsentry.sentry_site import get_hive_sentry_provider
-from libsentry.privilege_checker import get_checker, MissingSentryPrivilegeException
-
from metadata.conf import OPTIMIZER, get_optimizer_url
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
_JSON_CONTENT_TYPE = 'application/json'
@@ -64,7 +57,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
def check_privileges(view_func):
@@ -111,8 +104,7 @@ def __init__(self, user, api_url=None, auth_key=None, auth_key_secret=None, tena
self._api = ApiLib("navopt", self._api_url, self._auth_key, self._auth_key_secret)
- self._tenant_id = tenant_id if tenant_id else _get_tenant_id(self) # Aka "workload"
-
+ self._tenant_id = tenant_id if tenant_id else _get_tenant_id(self) # Aka "workload"
def _call(self, *kwargs):
start_time = time.time()
@@ -130,11 +122,9 @@ def _call(self, *kwargs):
else:
return data
-
def get_tenant(self, cluster_id='default'):
return self._call('getTenant', {'clusterId': cluster_id})
-
def upload(self, data, data_type='queries', source_platform='generic', workload_id=None):
if data_type in ('table_stats', 'cols_stats'):
data_suffix = '.json'
@@ -157,7 +147,7 @@ def upload(self, data, data_type='queries', source_platform='generic', workload_
}
f_queries_path = NamedTemporaryFile(suffix=data_suffix)
- f_queries_path.close() # Reopened as real file below to work well with the command
+ f_queries_path.close() # Reopened as real file below to work well with the command
try:
f_queries = open(f_queries_path.name, 'w+')
@@ -186,7 +176,7 @@ def upload(self, data, data_type='queries', source_platform='generic', workload_
}
parameters.update(extra_parameters)
response = self._api.call_api('upload', parameters)
- status = json.loads(response) # Workaround getting back a string
+ status = json.loads(response) # Workaround getting back a string
status['count'] = len(data)
return status
@@ -196,7 +186,6 @@ def upload(self, data, data_type='queries', source_platform='generic', workload_
finally:
os.remove(f_queries_path.name)
-
def upload_status(self, workload_id):
return self._call('uploadStatus', {'tenant': self._tenant_id, 'workloadId': workload_id})
@@ -213,7 +202,6 @@ def top_tables(self, workfloadId=None, database_name='default', page_size=1000,
}
)
-
@check_privileges
def table_details(self, database_name, table_name, page_size=100, startingToken=None, connector=None):
return self._call(
@@ -227,7 +215,6 @@ def table_details(self, database_name, table_name, page_size=100, startingToken=
}
)
-
def query_compatibility(self, source_platform, target_platform, query, page_size=100, startingToken=None, connector=None):
return self._call(
'getQueryCompatible', {
@@ -240,7 +227,6 @@ def query_compatibility(self, source_platform, target_platform, query, page_size
}
)
-
def query_risk(self, query, source_platform, db_name, page_size=100, startingToken=None, connector=None):
response = self._call(
'getQueryRisk', {
@@ -265,7 +251,6 @@ def query_risk(self, query, source_platform, db_name, page_size=100, startingTok
'noDDL': response.get('noDDL', []),
}
-
def predict(self, before_cursor, after_cursor, connector):
response = self._call(
'predict', {
@@ -282,7 +267,6 @@ def predict(self, before_cursor, after_cursor, connector):
'statement': predictions and predictions[0]['statement']
}
-
def similar_queries(self, source_platform, query, page_size=100, startingToken=None, connector=None):
if is_admin(self.user):
return self._call(
@@ -298,7 +282,6 @@ def similar_queries(self, source_platform, query, page_size=100, startingToken=N
else:
raise PopupException(_('Call not supported'))
-
@check_privileges
def top_filters(self, db_tables=None, page_size=100, startingToken=None, connector=None):
args = {
@@ -312,7 +295,6 @@ def top_filters(self, db_tables=None, page_size=100, startingToken=None, connect
return self._call('getTopFilters', args)
-
@check_privileges
def top_aggs(self, db_tables=None, page_size=100, startingToken=None, connector=None):
args = {
@@ -339,7 +321,6 @@ def getkey(table):
return results
-
@check_privileges
def top_columns(self, db_tables=None, page_size=100, startingToken=None, connector=None):
args = {
@@ -358,7 +339,6 @@ def top_columns(self, db_tables=None, page_size=100, startingToken=None, connect
results[section] = list(_secure_results(results[section], self.user))
return results
-
@check_privileges
def top_joins(self, db_tables=None, page_size=100, startingToken=None, connector=None):
args = {
@@ -381,7 +361,6 @@ def top_joins(self, db_tables=None, page_size=100, startingToken=None, connector
results['results'] = filtered_joins
return results
-
def top_databases(self, page_size=100, startingToken=None, connector=None):
args = {
'tenant': self._tenant_id,
diff --git a/desktop/libs/metadata/src/metadata/optimizer/optimizer_rest_client.py b/desktop/libs/metadata/src/metadata/optimizer/optimizer_rest_client.py
index 871db111352..2bfc352d474 100644
--- a/desktop/libs/metadata/src/metadata/optimizer/optimizer_rest_client.py
+++ b/desktop/libs/metadata/src/metadata/optimizer/optimizer_rest_client.py
@@ -16,22 +16,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import sys
import json
import logging
-import sys
+
+from django.utils.translation import gettext as _
from desktop.lib.rest.http_client import HttpClient
from desktop.lib.rest.resource import Resource
-
from metadata.conf import OPTIMIZER, get_optimizer_url
from metadata.optimizer.optimizer_client import OptimizerClient
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
_JSON_CONTENT_TYPE = 'application/json'
@@ -48,11 +43,10 @@ def __init__(self, user, api_url=None, auth_key=None, auth_key_secret=None, tena
self._api = MockApiLib()
-
def _call(self, path, data):
try:
return self._root.post(path, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)
- except:
+ except Exception:
LOG.exception('Error calling Optimize service')
return {}
diff --git a/desktop/libs/metadata/src/metadata/optimizer_api.py b/desktop/libs/metadata/src/metadata/optimizer_api.py
index 5b84bf51d54..ddc9ef3bca9 100644
--- a/desktop/libs/metadata/src/metadata/optimizer_api.py
+++ b/desktop/libs/metadata/src/metadata/optimizer_api.py
@@ -15,14 +15,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import base64
import json
-import logging
import struct
-import sys
+import logging
+from base64 import decodebytes
from django.http import Http404
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
from desktop.auth.backend import is_admin
@@ -30,28 +29,19 @@
from desktop.lib.i18n import force_unicode
from desktop.models import Document2
from libsentry.privilege_checker import MissingSentryPrivilegeException
+from metadata.conf import OPTIMIZER
+from metadata.optimizer.base import get_api
+from metadata.optimizer.optimizer_client import NavOptException, _clean_query, _get_table_name
from notebook.api import _get_statement
from notebook.models import Notebook
from notebook.sql_utils import get_current_statement
-from metadata.optimizer.base import get_api
-from metadata.optimizer.optimizer_client import NavOptException, _get_table_name, _clean_query
-from metadata.conf import OPTIMIZER
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
- from base64 import decodebytes
-else:
- from django.utils.translation import ugettext as _
- from base64 import decodestring as decodebytes
-
LOG = logging.getLogger()
try:
from beeswax.api import get_table_stats
from beeswax.design import hql_query
-
from metastore.views import _get_db
except ImportError as e:
LOG.warning("Hive lib not enabled")
@@ -280,7 +270,7 @@ def top_filters(request):
interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get())
connector = json.loads(request.POST.get('connector', '{}'))
db_tables = json.loads(request.POST.get('dbTables', '[]'))
- column_name = request.POST.get('columnName') # Unused
+ column_name = request.POST.get('columnName') # Unused
api = get_api(request.user, interface)
@@ -394,7 +384,7 @@ def _convert_queries(queries_data):
for query_data in queries_data:
try:
snippet = query_data['snippets'][0]
- if 'guid' in snippet['result']['handle']: # Not failed query
+ if 'guid' in snippet['result']['handle']: # Not failed query
guid = snippet['result']['handle']['guid']
if isinstance(guid, str):
guid = guid.encode('utf-8')
@@ -499,7 +489,6 @@ def upload_table_stats(request):
if not OPTIMIZER.AUTO_UPLOAD_STATS.get():
with_table_stats = with_columns_stats = False
-
for db_table in db_tables:
path = _get_table_name(db_table)
@@ -520,7 +509,7 @@ def upload_table_stats(request):
stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats'])
table_stats.append({
- 'table_name': '%(database)s.%(table)s' % path, # DB Prefix
+ 'table_name': '%(database)s.%(table)s' % path, # DB Prefix
'num_rows': stats.get('numRows', -1),
'last_modified_time': stats.get('transient_lastDdlTime', -1),
'total_size': stats.get('totalSize', -1),
@@ -554,7 +543,7 @@ def upload_table_stats(request):
for col_stats in raw_column_stats:
column_stats.append({
- 'table_name': '%(database)s.%(table)s' % path, # DB Prefix
+ 'table_name': '%(database)s.%(table)s' % path, # DB Prefix
'column_name': col_stats['col_name'],
'data_type': col_stats['data_type'],
"num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1,
diff --git a/desktop/libs/metadata/src/metadata/optimizer_api_tests.py b/desktop/libs/metadata/src/metadata/optimizer_api_tests.py
index f06e164bd24..19a97cd58a9 100644
--- a/desktop/libs/metadata/src/metadata/optimizer_api_tests.py
+++ b/desktop/libs/metadata/src/metadata/optimizer_api_tests.py
@@ -15,33 +15,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import zip
+import sys
import json
import logging
-import pytest
-import sys
+from builtins import zip
+from unittest.mock import Mock, patch
-from django.urls import reverse
+import pytest
from django.test import TestCase
+from django.urls import reverse
from desktop.auth.backend import rewrite_user
from desktop.conf import ENABLE_ORGANIZATIONS
from desktop.lib.django_test_util import make_logged_in_client
from desktop.lib.test_utils import add_to_group, grant_access
-from useradmin.models import User
-
from metadata.optimizer_api import _convert_queries
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock
-else:
- from mock import patch, Mock
-
+from useradmin.models import User
LOG = logging.getLogger()
-
@pytest.mark.django_db
class TestApi():
@@ -54,7 +47,6 @@ def setup_method(self):
add_to_group('test')
grant_access("test", "test", "metadata")
-
def test_risk_ui_api(self):
snippet = {
"id": "2b7d1f46-17a0-30af-efeb-33d4c29b1055",
@@ -112,13 +104,11 @@ def setup_class(cls):
grant_access("test", "test", "metadata")
grant_access("test", "test", "optimizer")
-
@classmethod
def teardown_class(cls):
cls.user.is_superuser = False
cls.user.save()
-
# Should run first
def test_upload(self):
query_docs = [
diff --git a/desktop/libs/metadata/src/metadata/prometheus_api.py b/desktop/libs/metadata/src/metadata/prometheus_api.py
index 809749ac1ab..838a463c0d7 100644
--- a/desktop/libs/metadata/src/metadata/prometheus_api.py
+++ b/desktop/libs/metadata/src/metadata/prometheus_api.py
@@ -16,24 +16,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import sys
import json
import logging
-import sys
from django.utils.html import escape
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
from desktop.lib.django_util import JsonResponse
from desktop.lib.i18n import force_unicode
-
from metadata.prometheus_client import PrometheusApi
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
diff --git a/desktop/libs/metadata/src/metadata/prometheus_client.py b/desktop/libs/metadata/src/metadata/prometheus_client.py
index 93643c1dd04..9b7bbe1b729 100644
--- a/desktop/libs/metadata/src/metadata/prometheus_client.py
+++ b/desktop/libs/metadata/src/metadata/prometheus_client.py
@@ -16,24 +16,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
import sys
+import logging
+from builtins import object
from django.core.cache import cache
+from django.utils.translation import gettext as _
-from desktop.lib.rest.http_client import RestException, HttpClient
+from desktop.lib.i18n import smart_str
+from desktop.lib.rest.http_client import HttpClient, RestException
from desktop.lib.rest.resource import Resource
-from desktop.lib.i18n import smart_unicode
-
from metadata.conf import PROMETHEUS
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
VERSION = 'v1'
@@ -46,7 +40,7 @@ def __str__(self):
return str(self.message)
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
class PrometheusApi(object):
@@ -60,7 +54,6 @@ def __init__(self, user=None, ssl_cert_ca_verify=False):
self._client.set_verify(ssl_cert_ca_verify)
self._root = Resource(self._client)
-
def query(self, query):
try:
return self._root.get('query', {
diff --git a/desktop/libs/metadata/src/metadata/urls.py b/desktop/libs/metadata/src/metadata/urls.py
index 1353de30086..4a521164906 100644
--- a/desktop/libs/metadata/src/metadata/urls.py
+++ b/desktop/libs/metadata/src/metadata/urls.py
@@ -17,16 +17,17 @@
import sys
-from metadata import catalog_api as metadata_catalog_api, analytic_db_api, dataeng_api, prometheus_api
-from metadata import optimizer_api as metadata_optimizer_api
-from metadata import workload_analytics_api as metadata_workload_analytics_api
-from metadata import manager_api as metadata_manager_api
-
-if sys.version_info[0] > 2:
- from django.urls import re_path
-else:
- from django.conf.urls import url as re_path
-
+from django.urls import re_path
+
+from metadata import (
+ analytic_db_api,
+ catalog_api as metadata_catalog_api,
+ dataeng_api,
+ manager_api as metadata_manager_api,
+ optimizer_api as metadata_optimizer_api,
+ prometheus_api,
+ workload_analytics_api as metadata_workload_analytics_api,
+)
# Catalog
urlpatterns = [
@@ -78,7 +79,7 @@
re_path(r'^api/optimizer/upload/table_stats/?$', metadata_optimizer_api.upload_table_stats, name='upload_table_stats'),
re_path(r'^api/optimizer/upload/status/?$', metadata_optimizer_api.upload_status, name='upload_status'),
- #v2
+ # v2
re_path(r'^api/optimizer/get_tenant/?$', metadata_optimizer_api.get_tenant, name='get_tenant'),
re_path(r'^api/optimizer/top_databases/?$', metadata_optimizer_api.top_databases, name='top_databases'),
diff --git a/desktop/libs/metadata/src/metadata/workload_analytics_api.py b/desktop/libs/metadata/src/metadata/workload_analytics_api.py
index 0d37689d35d..8f3a295a87b 100644
--- a/desktop/libs/metadata/src/metadata/workload_analytics_api.py
+++ b/desktop/libs/metadata/src/metadata/workload_analytics_api.py
@@ -15,23 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-import json
import sys
+import json
+import logging
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
from desktop.lib.django_util import JsonResponse
from desktop.lib.i18n import force_unicode
-
from metadata.workload_analytics_client import WorkfloadAnalyticsClient
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
diff --git a/desktop/libs/metadata/src/metadata/workload_analytics_client.py b/desktop/libs/metadata/src/metadata/workload_analytics_client.py
index 731381c87b8..032a0eaefe8 100644
--- a/desktop/libs/metadata/src/metadata/workload_analytics_client.py
+++ b/desktop/libs/metadata/src/metadata/workload_analytics_client.py
@@ -15,17 +15,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
import sys
+import logging
+from builtins import object
-from notebook.connectors.altus import _exec
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from django.utils.translation import gettext as _
+from notebook.connectors.altus import _exec
LOG = logging.getLogger()
@@ -51,7 +47,6 @@ def get_mr_task_attempt_log(self, operation_execution_id, attempt_id):
return WorkloadAnalytics(self.user).get_mr_task_attempt_log(operation_execution_id=operation_execution_id, attempt_id=attempt_id)
-
class WorkloadAnalytics(object):
def __init__(self, user): pass
@@ -61,15 +56,12 @@ def get_impala_query(self, cluster, query_id):
return _exec('wa', 'getImpalaQuery', parameters=parameters)
-
def list_uploads(self):
return _exec('wa', 'listUploads')
-
def list_environments(self):
return _exec('wa', 'listEnvironments')
-
def get_operation_execution_details(self, operation_id, include_tree=False):
parameters = {'id': operation_id}
@@ -78,7 +70,6 @@ def get_operation_execution_details(self, operation_id, include_tree=False):
return _exec('wa', 'getOperationExecutionDetails', parameters=parameters)
-
def get_mr_task_attempt_log(self, operation_execution_id, attempt_id):
parameters = {'operationExecutionId': operation_execution_id, 'attemptId': attempt_id}
diff --git a/desktop/libs/notebook/src/notebook/api.py b/desktop/libs/notebook/src/notebook/api.py
index 2c4739c8a5e..4f68b2c3b52 100644
--- a/desktop/libs/notebook/src/notebook/api.py
+++ b/desktop/libs/notebook/src/notebook/api.py
@@ -15,14 +15,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import sys
import json
import logging
+from urllib.parse import unquote as urllib_unquote
import sqlparse
import opentracing.tracer
from django.db.models import Q
from django.urls import reverse
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_GET, require_POST
from azure.abfs.__init__ import abfspath
@@ -36,20 +37,9 @@
from metadata.conf import OPTIMIZER
from notebook.conf import EXAMPLES
from notebook.connectors.base import Notebook, QueryError, QueryExpired, SessionExpired, _get_snippet_name, patch_snippet_for_connector
-from notebook.connectors.hiveserver2 import HS2Api
from notebook.decorators import api_error_handler, check_document_access_permission, check_document_modify_permission
from notebook.models import _get_dialect_example, escape_rows, get_api, make_notebook, upgrade_session_properties
-if sys.version_info[0] > 2:
- from urllib.parse import unquote as urllib_unquote
-
- from django.utils.translation import gettext as _
-else:
- from urllib import unquote as urllib_unquote
-
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
DEFAULT_HISTORY_NAME = ''
diff --git a/desktop/libs/notebook/src/notebook/api_tests.py b/desktop/libs/notebook/src/notebook/api_tests.py
index 839e24e36c0..84da9a15147 100644
--- a/desktop/libs/notebook/src/notebook/api_tests.py
+++ b/desktop/libs/notebook/src/notebook/api_tests.py
@@ -815,6 +815,7 @@ def test_get_interpreters_to_show():
resets.append(INTERPRETERS_SHOWN_ON_WHEEL.set_for_testing('java,pig'))
+ # 'get_interpreters_to_show did not return interpreters in the correct order expected'
assert (
list(expected_interpreters.values()) == get_ordered_interpreters()
), 'get_interpreters_to_show did not return interpreters in the correct order expected'
diff --git a/desktop/libs/notebook/src/notebook/conf.py b/desktop/libs/notebook/src/notebook/conf.py
index 38b64516903..11f9fc72d6b 100644
--- a/desktop/libs/notebook/src/notebook/conf.py
+++ b/desktop/libs/notebook/src/notebook/conf.py
@@ -158,6 +158,7 @@ def computes_for_dialect(dialect, user):
# cf. admin wizard too
+
INTERPRETERS = UnspecifiedConfigSection(
"interpreters",
help="One entry for each type of snippet.",
diff --git a/desktop/libs/notebook/src/notebook/conf_tests.py b/desktop/libs/notebook/src/notebook/conf_tests.py
index 77ebb655ab5..a28630c5c2e 100644
--- a/desktop/libs/notebook/src/notebook/conf_tests.py
+++ b/desktop/libs/notebook/src/notebook/conf_tests.py
@@ -15,25 +15,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import sys
import json
-import pytest
import unittest
-import sys
+from unittest.mock import Mock, patch
+import pytest
from django.test import TestCase
+
from desktop.auth.backend import rewrite_user
from desktop.conf import ENABLE_CONNECTORS
from desktop.lib.connectors.api import _get_installed_connectors
from desktop.lib.django_test_util import make_logged_in_client
-from useradmin.models import User, update_app_permissions, get_default_user_group
-
-from notebook.conf import config_validator, get_ordered_interpreters, _excute_test_query
-
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock
-else:
- from mock import patch, Mock
+from notebook.conf import _excute_test_query, config_validator, get_ordered_interpreters
+from useradmin.models import User, get_default_user_group, update_app_permissions
class TestInterpreterConfig(TestCase):
@@ -59,7 +54,6 @@ def teardown_class(cls):
for reset in cls._class_resets:
reset()
-
def test_get_ordered_interpreters(self):
with patch('desktop.lib.connectors.api._get_installed_connectors') as _get_installed_connectors:
_get_installed_connectors.return_value = [{
@@ -128,7 +122,6 @@ def test_config_validator(self, has_connectors):
assert not warnings, warnings
-
_excute_test_query.side_effect = Exception('')
connectors = _get_installed_connectors(user=self.user)
diff --git a/desktop/libs/notebook/src/notebook/connectors/altus.py b/desktop/libs/notebook/src/notebook/connectors/altus.py
index 4f25a59f3f5..22183c94121 100644
--- a/desktop/libs/notebook/src/notebook/connectors/altus.py
+++ b/desktop/libs/notebook/src/notebook/connectors/altus.py
@@ -15,25 +15,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
-import json
import sys
-
+import json
+import logging
+from builtins import object
from datetime import datetime, timedelta
from django.urls import reverse
+from django.utils.translation import gettext as _
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.rest.http_client import HttpClient
from desktop.lib.rest.resource import Resource
from metadata.conf import ALTUS, K8S
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
DATE_FORMAT = "%Y-%m-%d"
@@ -72,7 +67,8 @@ def _exec(service, command, parameters=None):
raise PopupException(e, title=_('Error accessing'))
-class IAMApi(object): pass
+class IAMApi(object):
+ pass
# altus iam list-user-assigned-roles --user=crn:altus:ia
@@ -142,9 +138,9 @@ def submit_hive_job(self, cluster_name, script, params=None, job_xml=None):
job = {'script': script}
if params:
- job['params'] = params
+ job['params'] = params
if job_xml:
- job['jobXml'] = job_xml
+ job['jobXml'] = job_xml
return self.submit_jobs(cluster_name, [{'hiveJob': job}])
@@ -152,7 +148,7 @@ def submit_spark_job(self, cluster_name, jars=None, main_class=None, arguments=N
job = {
"jars": jars if jars else [],
"applicationArguments": arguments if arguments else [],
- #"propertiesFile": "string"
+ # "propertiesFile": "string"
}
if spark_arguments:
job['sparkArguments'] = ' '.join(spark_arguments)
@@ -186,14 +182,14 @@ def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key,
# [--cloudera-manager-username ]
# [--cloudera-manager-password ]
- params = { # cloud_provider: AWS, Azure...
+ params = { # cloud_provider: AWS, Azure...
'clusterName': cluster_name,
'cdhVersion': cdh_version,
'publicKey': public_key,
'instanceType': instance_type,
'environmentName': environment_name,
'workersGroupSize': workers_group_size,
- #'automaticTerminationCondition': "EMPTY_JOB_QUEUE"
+ # 'automaticTerminationCondition': "EMPTY_JOB_QUEUE"
}
if namespace_name:
@@ -294,7 +290,6 @@ def __init__(self, user=None):
self._client.set_verify(False)
self._root = Resource(self._client)
-
def list_k8_clusters(self):
clusters = self._root.post('listClusters', contenttype="application/json")
for cluster in clusters['clusters']:
@@ -305,7 +300,6 @@ def list_k8_clusters(self):
cluster['creationDate'] = str(datetime.now())
return clusters
-
def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key, instance_type, environment_name, workers_group_size=3, namespace_name=None,
cloudera_manager_username='hue', cloudera_manager_password='hue'):
data = {
@@ -319,25 +313,22 @@ def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key,
return self._root.post('createCluster', data=json.dumps(data), contenttype="application/json")
-
def list_clusters(self):
clusters = self._root.post('listClusters', contenttype="application/json")
for cluster in clusters['clusters']:
cluster['clusterName'] = cluster['name']
cluster['workersGroupSize'] = cluster['workerReplicas']
- cluster['instanceType'] = 'Data Warehouse'# '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster
+ cluster['instanceType'] = 'Data Warehouse' # '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster
cluster['progress'] = '%(workerReplicasOnline)s / %(workerReplicas)s' % cluster
cluster['creationDate'] = str(datetime.now())
return clusters
-
def delete_cluster(self, cluster_id):
data = json.dumps({'clusterName': cluster_id})
return {
'result': self._root.post('deleteCluster', data=data, contenttype="application/json")
}
-
def describe_cluster(self, cluster_id):
data = json.dumps({'clusterName': cluster_id})
data = self._root.post('describeCluster', data=data, contenttype="application/json")
@@ -345,6 +336,5 @@ def describe_cluster(self, cluster_id):
data['cluster']['cdhVersion'] = 'Data Warehouse'
return data
-
def update_cluster(self, **params):
return self._root.post('updateCluster', data=json.dumps(params), contenttype="application/json")
diff --git a/desktop/libs/notebook/src/notebook/connectors/altus_adb.py b/desktop/libs/notebook/src/notebook/connectors/altus_adb.py
index 0926102b598..f0621ff85a7 100644
--- a/desktop/libs/notebook/src/notebook/connectors/altus_adb.py
+++ b/desktop/libs/notebook/src/notebook/connectors/altus_adb.py
@@ -15,26 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-from builtins import object
-import logging
import json
-import sys
+import logging
+import urllib.error
+import urllib.request
+from urllib.parse import quote as urllib_quote, quote_plus as urllib_quote_plus
-from django.urls import reverse
+from django.utils.translation import gettext as _
from notebook.connectors.altus import AnalyticDbApi
from notebook.connectors.base import Api, QueryError
-if sys.version_info[0] > 2:
- import urllib.request, urllib.error
- from urllib.parse import quote as urllib_quote, quote_plus as urllib_quote_plus
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
- from urllib import quote as urllib_quote, quote_plus as urllib_quote_plus
-
LOG = logging.getLogger()
@@ -47,41 +38,33 @@ def __init__(self, user, cluster_name, interpreter=None, request=None):
Api.__init__(self, user, interpreter=interpreter, request=request)
self.cluster_name = cluster_name
-
def execute(self, notebook, snippet):
statement = snippet['statement']
return HueQuery(self.user, cluster_crn=self.cluster_name).do_execute(statement)
-
def check_status(self, notebook, snippet):
handle = snippet['result']['handle']
return HueQuery(self.user, cluster_crn=self.cluster_name).do_check_status(handle)
-
def fetch_result(self, notebook, snippet, rows, start_over):
handle = snippet['result']['handle']
return HueQuery(self.user, cluster_crn=self.cluster_name).do_fetch_result(handle)
-
def close_statement(self, notebook, snippet):
return {'status': -1}
-
def cancel(self, notebook, snippet):
return {'status': -1, 'message': _('Could not cancel.')}
-
def get_log(self, notebook, snippet, startFrom=0, size=None):
return '...'
-
def get_jobs(self, notebook, snippet, logs):
return []
-
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None):
url_path = '/notebook/api/autocomplete'
@@ -104,7 +87,7 @@ def __init__(self, user, cluster_crn):
self.api = AnalyticDbApi(self.user)
def do_post(self, url_path):
- payload = '''{"method":"POST","url":"https://localhost:8888''' + url_path +'''","httpVersion":"HTTP/1.1","headers":[{"name":"Accept-Encoding","value":"gzip, deflate, br"},{"name":"Content-Type","value":"application/x-www-form-urlencoded; charset=UTF-8"},{"name":"Accept","value":"*/*"},{"name":"X-Requested-With","value":"XMLHttpRequest"},{"name":"Connection","value":"keep-alive"}],"queryString":[],"postData": {
+ payload = '''{"method":"POST","url":"https://localhost:8888''' + url_path + '''","httpVersion":"HTTP/1.1","headers":[{"name":"Accept-Encoding","value":"gzip, deflate, br"},{"name":"Content-Type","value":"application/x-www-form-urlencoded; charset=UTF-8"},{"name":"Accept","value":"*/*"},{"name":"X-Requested-With","value":"XMLHttpRequest"},{"name":"Connection","value":"keep-alive"}],"queryString":[],"postData": {
"mimeType": "application/x-www-form-urlencoded; charset=UTF-8",
"text": "snippet=%7B%22type%22%3A%22impala%22%2C%22source%22%3A%22data%22%7D",
"params": [
@@ -178,10 +161,9 @@ def do_execute(self, query):
else:
raise QueryError(resp.get('message'))
-
def do_check_status(self, handle):
- notebook = {"type":"impala", "name": "query", "isSaved": False, "sessions": [], "snippets": [{"id": "1234", "type":"impala","statement_raw": "SHOW DATABASES", "result": {"handle": {} }}]}
- snippet = {"id": "1234", "type": "impala", "statement":"SHOW DATABASES", "status": "running", "result": {'handle': {"log_context":None,"statements_count":1,"end":{"column":13,"row":0},"statement_id":0,"has_more_statements":False,"start":{"column":0,"row":0},"secret":"3h9WBnLbTUYAAAAAPQjxlQ==\n","has_result_set":True,"session_guid":"qcrpEBmCTGacxfhM+CxbkQ==\n","statement":"SHOW DATABASES","operation_type":0,"modified_row_count":None,"guid":"3h9WBnLbTUYAAAAAPQjxlQ==\n","previous_statement_hash":"5b1f14102d749be7b41da376bcdbb64f993ce00bc46e3aab0b8008c4"}}, "properties": {}}
+ notebook = {"type": "impala", "name": "query", "isSaved": False, "sessions": [], "snippets": [{"id": "1234", "type": "impala", "statement_raw": "SHOW DATABASES", "result": {"handle": {}}}]}
+ snippet = {"id": "1234", "type": "impala", "statement": "SHOW DATABASES", "status": "running", "result": {'handle': {"log_context": None, "statements_count": 1, "end": {"column": 13, "row": 0}, "statement_id": 0, "has_more_statements": False, "start": {"column": 0, "row": 0}, "secret": "3h9WBnLbTUYAAAAAPQjxlQ==\n", "has_result_set": True, "session_guid": "qcrpEBmCTGacxfhM+CxbkQ==\n", "statement": "SHOW DATABASES", "operation_type": 0, "modified_row_count": None, "guid": "3h9WBnLbTUYAAAAAPQjxlQ==\n", "previous_statement_hash": "5b1f14102d749be7b41da376bcdbb64f993ce00bc46e3aab0b8008c4"}}, "properties": {}}
snippet['result']['handle'] = handle
@@ -242,10 +224,9 @@ def do_check_status(self, handle):
else:
return resp_payload
-
def do_fetch_result(self, handle):
- notebook = {"type":"impala", "name": "query", "isSaved": False, "sessions": [], "snippets": [{"id": "1234", "type":"impala","statement_raw": "SHOW DATABASES", "result": {"handle": {} }}]}
- snippet = {"id": "1234", "type": "impala", "statement":"SHOW DATABASES", "status": "running", "result": {'handle': {"log_context":None,"statements_count":1,"end":{"column":13,"row":0},"statement_id":0,"has_more_statements":False,"start":{"column":0,"row":0},"secret":"3h9WBnLbTUYAAAAAPQjxlQ==\n","has_result_set":True,"session_guid":"qcrpEBmCTGacxfhM+CxbkQ==\n","statement":"SHOW DATABASES","operation_type":0,"modified_row_count":None,"guid":"3h9WBnLbTUYAAAAAPQjxlQ==\n","previous_statement_hash":"5b1f14102d749be7b41da376bcdbb64f993ce00bc46e3aab0b8008c4"}}, "properties": {}}
+ notebook = {"type": "impala", "name": "query", "isSaved": False, "sessions": [], "snippets": [{"id": "1234", "type": "impala", "statement_raw": "SHOW DATABASES", "result": {"handle": {}}}]}
+ snippet = {"id": "1234", "type": "impala", "statement": "SHOW DATABASES", "status": "running", "result": {'handle': {"log_context": None, "statements_count": 1, "end": {"column": 13, "row": 0}, "statement_id": 0, "has_more_statements": False, "start": {"column": 0, "row": 0}, "secret": "3h9WBnLbTUYAAAAAPQjxlQ==\n", "has_result_set": True, "session_guid": "qcrpEBmCTGacxfhM+CxbkQ==\n", "statement": "SHOW DATABASES", "operation_type": 0, "modified_row_count": None, "guid": "3h9WBnLbTUYAAAAAPQjxlQ==\n", "previous_statement_hash": "5b1f14102d749be7b41da376bcdbb64f993ce00bc46e3aab0b8008c4"}}, "properties": {}}
rows = 100
start_over = True
diff --git a/desktop/libs/notebook/src/notebook/connectors/base.py b/desktop/libs/notebook/src/notebook/connectors/base.py
index 03491cf169e..22c770868c0 100644
--- a/desktop/libs/notebook/src/notebook/connectors/base.py
+++ b/desktop/libs/notebook/src/notebook/connectors/base.py
@@ -16,7 +16,6 @@
# limitations under the License.
import re
-import sys
import json
import time
import uuid
@@ -24,23 +23,18 @@
from builtins import object
from django.utils.encoding import smart_str
+from django.utils.translation import gettext as _
from beeswax.common import find_compute, is_compute
from desktop.auth.backend import is_admin
from desktop.conf import TASK_SERVER, has_connectors, is_cdw_compute_enabled
from desktop.lib import export_csvxls
from desktop.lib.exceptions_renderable import PopupException
-from desktop.lib.i18n import smart_unicode
+from desktop.lib.i18n import smart_str
from metadata.optimizer.base import get_api as get_optimizer_api
from notebook.conf import get_ordered_interpreters
from notebook.sql_utils import get_current_statement
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -80,7 +74,7 @@ def __init__(self, message, handle=None):
self.extra = {}
def __unicode__(self):
- return smart_unicode(self.message)
+ return smart_str(self.message)
class Notebook(object):
diff --git a/desktop/libs/notebook/src/notebook/connectors/base_tests.py b/desktop/libs/notebook/src/notebook/connectors/base_tests.py
index 33d9298c90c..0fe73918aa9 100644
--- a/desktop/libs/notebook/src/notebook/connectors/base_tests.py
+++ b/desktop/libs/notebook/src/notebook/connectors/base_tests.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-## -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
# Licensed to Cloudera, Inc. under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,22 +16,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import json
-import pytest
import sys
+import json
+from builtins import object
+from unittest.mock import MagicMock, Mock, patch
+import pytest
from django.urls import reverse
from desktop.lib.django_test_util import make_logged_in_client
-from useradmin.models import User
-
from notebook.connectors.base import Notebook, get_api
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock, MagicMock
-else:
- from mock import patch, Mock, MagicMock
+from useradmin.models import User
@pytest.mark.django_db
@@ -41,7 +36,6 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="empty", recreate=True, is_superuser=False)
self.user = User.objects.get(username="test")
-
def test_get_api(self):
request = Mock()
snippet = {
@@ -51,7 +45,6 @@ def test_get_api(self):
get_api(request=request, snippet=snippet)
-
def test_execute_and_wait(self):
query = Notebook()
@@ -72,7 +65,6 @@ def test_execute_and_wait(self):
assert 2 == query.check_status.call_count
-
def test_check_status(self):
query = Notebook()
@@ -90,7 +82,6 @@ def test_check_status(self):
assert 0 == resp['status']
assert 0 == resp['query_status']['status']
-
def test_statement_with_variables(self):
snippet = {
'statement_raw': "SELECT * FROM table WHERE city='${city}'",
@@ -112,6 +103,8 @@ def test_statement_with_variables(self):
iteration = 0
+
+
def check_status_side_effect(request, operation_id):
"""First time query is still running, second time the execution is finished."""
global iteration
diff --git a/desktop/libs/notebook/src/notebook/connectors/dataeng.py b/desktop/libs/notebook/src/notebook/connectors/dataeng.py
index 204441262f5..0048ed8e3b1 100644
--- a/desktop/libs/notebook/src/notebook/connectors/dataeng.py
+++ b/desktop/libs/notebook/src/notebook/connectors/dataeng.py
@@ -15,23 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import re
import sys
+import logging
from django.urls import reverse
+from django.utils.translation import gettext as _
+from jobbrowser.apis.data_eng_api import RUNNING_STATES
from metadata.workload_analytics_client import WorkfloadAnalyticsClient
-
from notebook.connectors.altus import DataEngApi as AltusDataEngApi
from notebook.connectors.base import Api, QueryError
-from jobbrowser.apis.data_eng_api import RUNNING_STATES
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
LOG = logging.getLogger()
@@ -42,7 +36,6 @@ def __init__(self, user, cluster_name, interpreter=None, request=None):
Api.__init__(self, user, interpreter=interpreter, request=request)
self.cluster_name = cluster_name
-
def execute(self, notebook, snippet):
if snippet['type'] == 'spark2':
@@ -72,7 +65,6 @@ def execute(self, notebook, snippet):
'has_result_set': False,
}
-
def check_status(self, notebook, snippet):
response = {'status': 'running'}
@@ -90,7 +82,6 @@ def check_status(self, notebook, snippet):
return response
-
def fetch_result(self, notebook, snippet, rows, start_over):
return {
'data': [[_('Job successfully completed.')]],
@@ -99,7 +90,6 @@ def fetch_result(self, notebook, snippet, rows, start_over):
'has_more': False,
}
-
def cancel(self, notebook, snippet):
if snippet['result']['handle'].get('id'):
job_id = snippet['result']['handle']['id']
@@ -110,7 +100,6 @@ def cancel(self, notebook, snippet):
return response
-
def get_log(self, notebook, snippet, startFrom=0, size=None):
# Currently no way to get the logs properly easily
@@ -122,22 +111,19 @@ def get_log(self, notebook, snippet, startFrom=0, size=None):
# (.*?)(?=<<< Invocation of Beeline command completed <<<)', logs['stdout'], re.DOTALL))
return ''
-
def get_jobs(self, notebook, snippet, logs):
- ## 50cf0e00-746b-4d86-b8e3-f2722296df71
+ # 50cf0e00-746b-4d86-b8e3-f2722296df71
job_id = snippet['result']['handle']['id']
return [{
'name': job_id,
'url': reverse('jobbrowser:jobbrowser.views.apps') + '#!' + job_id,
'started': True,
- 'finished': False # Would need call to check_status
+ 'finished': False # Would need call to check_status
}
]
-
def close_statement(self, notebook, snippet):
pass
-
def close_session(self, session):
pass
diff --git a/desktop/libs/notebook/src/notebook/connectors/flink_sql.py b/desktop/libs/notebook/src/notebook/connectors/flink_sql.py
index cf14d509343..2eeb72f3217 100644
--- a/desktop/libs/notebook/src/notebook/connectors/flink_sql.py
+++ b/desktop/libs/notebook/src/notebook/connectors/flink_sql.py
@@ -17,23 +17,18 @@
from __future__ import absolute_import
-import logging
+import sys
import json
+import logging
import posixpath
-import sys
+
+from django.utils.translation import gettext as _
from desktop.lib.i18n import force_unicode
from desktop.lib.rest.http_client import HttpClient, RestException
from desktop.lib.rest.resource import Resource
-
from notebook.connectors.base import Api, QueryError
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
_JSON_CONTENT_TYPE = 'application/json'
_API_VERSION = 'v1'
@@ -50,7 +45,7 @@ def decorator(*args, **kwargs):
except RestException as e:
try:
message = force_unicode(json.loads(e.message)['errors'])
- except:
+ except Exception:
message = e.message
message = force_unicode(message)
raise QueryError(message)
@@ -60,7 +55,6 @@ def decorator(*args, **kwargs):
return decorator
-
class FlinkSqlApi(Api):
def __init__(self, user, interpreter=None):
@@ -71,7 +65,6 @@ def __init__(self, user, interpreter=None):
self.db = FlinkSqlClient(user=user, api_url=api_url)
-
@query_error_handler
def create_session(self, lang=None, properties=None):
session = self._get_session()
@@ -105,7 +98,6 @@ def _get_session(self):
return SESSIONS[session_key]
-
@query_error_handler
def execute(self, notebook, snippet):
global n
@@ -146,7 +138,6 @@ def execute(self, notebook, snippet):
}
}
-
@query_error_handler
def check_status(self, notebook, snippet):
global n
@@ -182,13 +173,12 @@ def check_status(self, notebook, snippet):
return response
-
@query_error_handler
def fetch_result(self, notebook, snippet, rows, start_over):
global n
session = self._get_session()
statement_id = snippet['result']['handle']['guid']
- token = n #rows
+ token = n # rows
resp = self.db.fetch_results(session['id'], job_id=statement_id, token=token)
@@ -209,7 +199,6 @@ def fetch_result(self, notebook, snippet, rows, start_over):
'type': 'table'
}
-
@query_error_handler
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None):
response = {}
@@ -231,7 +220,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N
return response
-
@query_error_handler
def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None):
if operation == 'hello':
@@ -250,7 +238,6 @@ def get_sample_data(self, snippet, database=None, table=None, column=None, is_as
return response
-
def cancel(self, notebook, snippet):
session = self._get_session()
statement_id = snippet['result']['handle']['guid']
@@ -259,7 +246,7 @@ def cancel(self, notebook, snippet):
if session and statement_id:
self.db.close_statement(session_id=session['id'], job_id=statement_id)
else:
- return {'status': -1} # missing operation ids
+ return {'status': -1} # missing operation ids
except Exception as e:
if 'does not exist in current session:' in str(e):
return {'status': -1} # skipped
@@ -268,14 +255,12 @@ def cancel(self, notebook, snippet):
return {'status': 0}
-
def close_session(self, session):
# Avoid closing session on page refresh or editor close for now
pass
# session = self._get_session()
# self.db.close_session(session['id'])
-
def _show_databases(self):
session = self._get_session()
session_id = session['id']
@@ -284,7 +269,6 @@ def _show_databases(self):
return [db[0] for db in resp['results'][0]['data']]
-
def _show_tables(self, database):
session = self._get_session()
session_id = session['id']
@@ -294,7 +278,6 @@ def _show_tables(self, database):
return [table[0] for table in resp['results'][0]['data']]
-
def _get_columns(self, database, table):
session = self._get_session()
session_id = session['id']
diff --git a/desktop/libs/notebook/src/notebook/connectors/hbase.py b/desktop/libs/notebook/src/notebook/connectors/hbase.py
index e2d1b28de8d..a1b0aab8ec8 100644
--- a/desktop/libs/notebook/src/notebook/connectors/hbase.py
+++ b/desktop/libs/notebook/src/notebook/connectors/hbase.py
@@ -17,22 +17,16 @@
from __future__ import absolute_import
-import logging
import sys
+import logging
from django.urls import reverse
+from django.utils.translation import gettext as _
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.i18n import force_unicode
-
from notebook.connectors.base import Api, QueryError
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
diff --git a/desktop/libs/notebook/src/notebook/connectors/hive_metastore.py b/desktop/libs/notebook/src/notebook/connectors/hive_metastore.py
index 373211d94dd..efe62313637 100644
--- a/desktop/libs/notebook/src/notebook/connectors/hive_metastore.py
+++ b/desktop/libs/notebook/src/notebook/connectors/hive_metastore.py
@@ -15,23 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import sys
+import logging
from django.urls import reverse
+from django.utils.translation import gettext as _
from desktop.lib.exceptions import StructuredException
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.i18n import force_unicode, smart_str
from desktop.lib.rest.http_client import RestException
-
-from notebook.connectors.base import Api, QueryError, QueryExpired, OperationTimeout, OperationNotSupported
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from notebook.connectors.base import Api, OperationNotSupported, OperationTimeout, QueryError, QueryExpired
LOG = logging.getLogger()
@@ -39,7 +33,7 @@
try:
from beeswax.api import _autocomplete
from beeswax.server import dbms
- from beeswax.server.dbms import get_query_server_config, QueryServerException
+ from beeswax.server.dbms import QueryServerException, get_query_server_config
except ImportError as e:
LOG.warning('Hive and HiveMetastoreServer interfaces are not enabled: %s' % e)
hive_settings = None
@@ -72,11 +66,9 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N
return _autocomplete(db, database, table, column, nested, query=None, cluster=self.cluster)
-
@query_error_handler
def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None):
return []
-
def _get_db(self, snippet, is_async=False, cluster=None):
return dbms.get(self.user, query_server=get_query_server_config(name='hms', cluster=cluster))
diff --git a/desktop/libs/notebook/src/notebook/connectors/hiveserver2.py b/desktop/libs/notebook/src/notebook/connectors/hiveserver2.py
index 4a8eda35834..21a24d6b225 100644
--- a/desktop/libs/notebook/src/notebook/connectors/hiveserver2.py
+++ b/desktop/libs/notebook/src/notebook/connectors/hiveserver2.py
@@ -340,9 +340,9 @@ def execute(self, notebook, snippet):
# All good
server_id, server_guid = handle.get()
- if sys.version_info[0] > 2:
- server_id = server_id.decode('utf-8')
- server_guid = server_guid.decode('utf-8')
+
+ server_id = server_id.decode('utf-8')
+ server_guid = server_guid.decode('utf-8')
response = {
'secret': server_id,
diff --git a/desktop/libs/notebook/src/notebook/connectors/hiveserver2_tests.py b/desktop/libs/notebook/src/notebook/connectors/hiveserver2_tests.py
index 94ac44bab5b..5b5fda81480 100644
--- a/desktop/libs/notebook/src/notebook/connectors/hiveserver2_tests.py
+++ b/desktop/libs/notebook/src/notebook/connectors/hiveserver2_tests.py
@@ -16,38 +16,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import next, object
-import json
-import logging
-import pytest
import re
import sys
+import json
import time
+import logging
+from builtins import next, object
+from unittest.mock import Mock, patch
+import pytest
from django.urls import reverse
-from TCLIService.ttypes import TStatusCode, TProtocolVersion, TOperationType
+from TCLIService.ttypes import TOperationType, TProtocolVersion, TStatusCode
+from beeswax.server import dbms
+from beeswax.server.dbms import QueryServerException
+from beeswax.test_base import BeeswaxSampleProvider, get_query_server_config, is_hive_on_spark
from desktop.auth.backend import rewrite_user
from desktop.conf import has_connectors
-from desktop.lib.i18n import smart_str
from desktop.lib.django_test_util import make_logged_in_client
+from desktop.lib.i18n import smart_str
from desktop.lib.test_utils import add_to_group, grant_access
-from beeswax.server import dbms
-from beeswax.server.dbms import QueryServerException
-from beeswax.test_base import BeeswaxSampleProvider, get_query_server_config, is_hive_on_spark
from hadoop.pseudo_hdfs4 import is_live_cluster
-from useradmin.models import User
-
from notebook.api import _save_notebook
from notebook.connectors.base import QueryError, QueryExpired
from notebook.connectors.hiveserver2 import HS2Api
-from notebook.models import make_notebook, Notebook
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock
-else:
- from mock import patch, Mock
-
+from notebook.models import Notebook, make_notebook
+from useradmin.models import User
LOG = logging.getLogger()
@@ -108,7 +102,6 @@ def setup_method(self):
self.user = rewrite_user(User.objects.get(username="test"))
grant_access("test", "default", "notebook")
-
def test_execute_impala(self):
with patch('desktop.lib.connectors.api.CONNECTOR_INSTANCES', TestApi.CONNECTOR):
@@ -155,7 +148,6 @@ def test_execute_impala(self):
get_client.return_value = tclient
tclient.get_coordinator_host = Mock(return_value={})
-
response = self.client.post(reverse('notebook:execute'), {
'notebook': TestApi.NOTEBOOK_JSON,
'snippet': json.dumps(json.loads(TestApi.NOTEBOOK_JSON)['snippets'][0]),
@@ -167,7 +159,6 @@ def test_execute_impala(self):
data = json.loads(response.content)
assert data['status'] == 0
-
def test_autocomplete_database_impala(self):
with patch('desktop.lib.connectors.api.CONNECTOR_INSTANCES', TestApi.CONNECTOR):
@@ -190,7 +181,6 @@ def test_autocomplete_database_impala(self):
assert data['status'] == 0
assert data['databases'] == [{u'comment': u'', u'hdfs_link': u'hdfs://table'}]
-
def test_sample_data_table_sync_impala(self):
with patch('desktop.lib.connectors.api.CONNECTOR_INSTANCES', TestApi.CONNECTOR):
@@ -224,7 +214,6 @@ def test_sample_data_table_sync_impala(self):
assert data['full_headers'] == [{'name': 'name'}]
assert data['rows'] == [[1], [2]]
-
def test_sample_data_table_async_impala(self):
with patch('desktop.lib.connectors.api.CONNECTOR_INSTANCES', TestApi.CONNECTOR):
@@ -278,7 +267,6 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False)
self.user = rewrite_user(User.objects.get(username="test"))
-
@patch('notebook.connectors.hiveserver2.has_jobbrowser', True)
def test_get_jobs_with_jobbrowser(self):
notebook = Mock()
@@ -297,7 +285,6 @@ def test_get_jobs_with_jobbrowser(self):
assert jobs[0]['name'] == 'job_id_00001'
assert jobs[0]['url'] == '/jobbrowser/jobs/job_id_00001'
-
@patch('notebook.connectors.hiveserver2.has_jobbrowser', False)
def test_get_jobs_without_jobbrowser(self):
notebook = Mock()
@@ -316,7 +303,6 @@ def test_get_jobs_without_jobbrowser(self):
assert jobs[0]['name'] == 'job_id_00001'
assert jobs[0]['url'] == '' # Is empty
-
def test_close_statement(self):
with patch('notebook.connectors.hiveserver2.HS2Api._get_db') as _get_db:
_get_db.return_value = Mock(
@@ -371,11 +357,10 @@ def test_close_statement(self):
'interface': 'impala', 'type': 'direct', 'options': {}}, 'wasBatchExecuted': False, 'dialect': 'impala'
}
api = HS2Api(self.user)
-
+
response = api.close_statement(notebook, snippet)
assert response['status'] == -1 # snippet['result']['handel'] ['guid'] and ['secret'] are missing
-
def test_get_error_message_from_query(self):
with patch('notebook.connectors.hiveserver2.HS2Api._get_db') as _get_db:
with patch('notebook.connectors.hiveserver2.HS2Api._get_current_statement') as _get_current_statement:
@@ -408,7 +393,6 @@ def test_get_error_message_from_query(self):
e.message ==
'Error while compiling statement: FAILED: HiveAccessControlException Permission denied')
-
def test_autocomplete_time_out(self):
snippet = {'type': 'hive', 'properties': {}}
@@ -427,7 +411,6 @@ def test_autocomplete_time_out(self):
except QueryExpired as e:
assert e.message == "HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)"
-
def test_autocomplete_functions_hive(self):
snippet = {'type': 'hive', 'properties': {}}
@@ -460,7 +443,6 @@ def setup_method(self):
self.db = dbms.get(self.user, get_query_server_config())
self.api = HS2Api(self.user)
-
def test_prepare_hql_query(self):
statement = "SELECT myUpper(description) FROM sample_07 LIMIT 10"
snippet_json = """
@@ -564,7 +546,6 @@ def test_prepare_hql_query(self):
assert pattern.search(config_statements), config_statements
assert "CREATE TEMPORARY FUNCTION myUpper AS 'org.hue.udf.MyUpper'" in config_statements, config_statements
-
def test_upgrade_properties(self):
properties = None
# Verify that upgrade will return defaults if current properties not formatted as settings
@@ -628,7 +609,6 @@ def test_upgrade_properties(self):
upgraded_props = self.api.upgrade_properties(lang='hive', properties=properties)
assert upgraded_props == properties
-
def test_progress(self):
snippet = json.loads("""
{
@@ -684,7 +664,7 @@ def test_progress(self):
INFO : number of splits:1
INFO : Submitting tokens for job: job_1466104358744_0003
INFO : The url to track the job: http://jennykim-1.vpc.cloudera.com:8088/proxy/application_1466104358744_0003/
- """
+ """ # noqa: E501
assert self.api.progress({}, snippet, logs=logs) == 5
@@ -741,7 +721,6 @@ def test_progress(self):
assert self.api.progress({}, snippet, logs=logs) == 50
-
def test_get_jobs(self):
notebook = json.loads("""
@@ -812,14 +791,14 @@ def test_get_jobs(self):
INFO : The url to track the job: http://jennykim-1.vpc.cloudera.com:8088/proxy/application_1466630204796_0059/
INFO : Starting Job = job_1466630204796_0059, Tracking URL = http://jennykim-1.vpc.cloudera.com:8088/proxy/application_1466630204796_0059/
INFO : Kill Command = /usr/lib/hadoop/bin/hadoop job -kill job_1466630204796_0059
- """
+ """ # noqa: E501
jobs = self.api.get_jobs(notebook, snippet, logs)
assert isinstance(jobs, list)
assert len(jobs), 1
assert jobs[0]['name'] == 'job_1466630204796_0059'
- assert jobs[0]['started'] == True
- assert jobs[0]['finished'] == False
+ assert jobs[0]['started'] is True
+ assert jobs[0]['finished'] is False
assert 'url' in jobs[0]
logs += """INFO : Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
@@ -831,13 +810,11 @@ def test_get_jobs(self):
INFO : Launching Job 2 out of 2
"""
-
jobs = self.api.get_jobs(notebook, snippet, logs)
assert len(jobs), 1
assert jobs[0]['name'] == 'job_1466630204796_0059'
- assert jobs[0]['started'] == True
- assert jobs[0]['finished'] == True
-
+ assert jobs[0]['started'] is True
+ assert jobs[0]['finished'] is True
def test_get_current_statement(self):
snippet = json.loads("""
@@ -869,7 +846,6 @@ def test_get_current_statement(self):
assert '086ecec9a8b89b1b47cce358bdbb343be23b1f8b54ca76bc81927e27' == statement['previous_statement_hash']
-
def test_plan_extraction_from_profile(self):
query_plan = self.api._get_impala_profile_plan(
query_id='e147228183f1f0b3:6f086cc600000000', profile=IMPALA_CUSTOMER_QUERY_SAMPLE_PROFILE
@@ -893,7 +869,6 @@ def setup_class(cls):
super(TestHiveserver2ApiWithHadoop, cls).setup_class(load_data=False)
-
def setup_method(self):
self.client.post('/beeswax/install_examples')
@@ -908,7 +883,6 @@ def setup_method(self):
self.statement = 'SELECT description, salary FROM sample_07 WHERE (sample_07.salary > 100000) ORDER BY salary DESC LIMIT 1000'
-
def create_query_document(self, owner, query_type='hive', database='default',
name='Test Query', description='Test Query', statement='',
files=None, functions=None, settings=None):
@@ -933,7 +907,6 @@ def create_query_document(self, owner, query_type='hive', database='default',
notebook_doc, save_as = _save_notebook(notebook.get_data(), owner)
return notebook_doc
-
def get_snippet(self, notebook, snippet_idx=0):
data = notebook.get_data()
snippet = data['snippets'][snippet_idx]
@@ -945,7 +918,6 @@ def get_snippet(self, notebook, snippet_idx=0):
return snippet
-
def execute_and_wait(self, query_doc, snippet_idx=0, timeout=30.0, wait=1.0):
notebook = Notebook(document=query_doc)
snippet = self.get_snippet(notebook, snippet_idx=snippet_idx)
@@ -975,7 +947,6 @@ def execute_and_wait(self, query_doc, snippet_idx=0, timeout=30.0, wait=1.0):
return snippet
-
def test_query_with_unicode(self):
statement = "SELECT * FROM sample_07 WHERE code='한';"
@@ -996,7 +967,6 @@ def test_query_with_unicode(self):
assert 0 == data['status'], data
assert "SELECT * FROM sample_07 WHERE code='한'" in smart_str(data['logs'])
-
def test_get_current_statement(self):
multi_statement = "SELECT description, salary FROM sample_07 LIMIT 20;\r\nSELECT AVG(salary) FROM sample_07;"
@@ -1011,7 +981,7 @@ def test_get_current_statement(self):
assert 0 == data['status'], data
assert 0 == data['handle']['statement_id'], data
assert 2 == data['handle']['statements_count'], data
- assert True == data['handle']['has_more_statements'], data
+ assert True is data['handle']['has_more_statements'], data
assert {'row': 0, 'column': 0} == data['handle']['start'], data
assert {'row': 0, 'column': 51} == data['handle']['end'], data
@@ -1024,11 +994,10 @@ def test_get_current_statement(self):
assert 0 == data['status'], data
assert 1 == data['handle']['statement_id'], data
assert 2 == data['handle']['statements_count'], data
- assert False == data['handle']['has_more_statements'], data
+ assert False is data['handle']['has_more_statements'], data
assert {'row': 1, 'column': 0} == data['handle']['start'], data
assert {'row': 1, 'column': 33} == data['handle']['end'], data
-
def test_explain(self):
# Hive 2 with Tez set hive.explain.user to true by default, but this test is expecting output when this setting
# is set to false.
@@ -1046,7 +1015,6 @@ def test_explain(self):
assert 'STAGE DEPENDENCIES' in data['explanation'], data
assert self.statement == data['statement'], data
-
def test_download(self):
statement = "SELECT 'hello world';"
@@ -1060,7 +1028,6 @@ def test_download(self):
assert 200 == response.status_code
assert ('Content-Disposition', 'attachment; filename="Test Query.csv"') == response._headers['content-disposition']
-
def test_get_sample(self):
doc = self.create_query_document(owner=self.user, statement=self.statement)
notebook = Notebook(document=doc)
@@ -1087,7 +1054,6 @@ def test_get_sample(self):
assert 'rows' in data
assert len(data['rows']) > 0
-
def test_fetch_result_size_mr(self):
if not is_live_cluster(): # Mini-cluster does not have JHS
pytest.skip("Skipping Test")
@@ -1113,8 +1079,8 @@ def test_fetch_result_size_mr(self):
assert 'result' in data
assert 'rows' in data['result']
assert 'size' in data['result']
- assert None == data['result']['rows']
- assert None == data['result']['size']
+ assert None is data['result']['rows']
+ assert None is data['result']['size']
# Assert that a query with map & reduce task returns rows
statement = "SELECT DISTINCT code FROM sample_07;"
@@ -1149,7 +1115,6 @@ def test_fetch_result_size_mr(self):
assert 23 == data['result']['rows']
assert data['result']['size'] > 0, data['result']
-
def test_fetch_result_size_spark(self):
if not is_live_cluster() or not is_hive_on_spark():
pytest.skip("Skipping Test")
@@ -1177,8 +1142,8 @@ def test_fetch_result_size_spark(self):
assert 'result' in data
assert 'rows' in data['result']
assert 'size' in data['result']
- assert None == data['result']['rows']
- assert None == data['result']['size']
+ assert None is data['result']['rows']
+ assert None is data['result']['size']
# Assert that a query that runs a job will return rows and size
statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
@@ -1197,7 +1162,6 @@ def test_fetch_result_size_spark(self):
assert 23 == data['result']['rows']
assert data['result']['size'] > 0
-
def test_fetch_result_size_impala(self):
if not is_live_cluster():
pytest.skip("Skipping Test")
@@ -1224,7 +1188,7 @@ def test_fetch_result_size_impala(self):
assert 'rows' in data['result']
assert 'size' in data['result']
assert 23 == data['result']['rows']
- assert None == data['result']['size']
+ assert None is data['result']['size']
# Assert that selecting all from partitioned table works
statement = "SELECT * FROM web_logs;"
@@ -1246,7 +1210,6 @@ def test_fetch_result_size_impala(self):
finally:
self.api.close_session(session)
-
def test_fetch_result_abbreviated(self):
if not is_live_cluster():
pytest.skip("Skipping Test")
diff --git a/desktop/libs/notebook/src/notebook/connectors/kafka.py b/desktop/libs/notebook/src/notebook/connectors/kafka.py
index 182545af9fe..e80b1584a2a 100644
--- a/desktop/libs/notebook/src/notebook/connectors/kafka.py
+++ b/desktop/libs/notebook/src/notebook/connectors/kafka.py
@@ -17,20 +17,15 @@
from __future__ import absolute_import
-import logging
import sys
+import logging
+
+from django.utils.translation import gettext as _
from desktop.lib.i18n import force_unicode
from kafka.kafka_api import get_topics
-
from notebook.connectors.base import Api, QueryError
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
diff --git a/desktop/libs/notebook/src/notebook/connectors/ksql.py b/desktop/libs/notebook/src/notebook/connectors/ksql.py
index ff626435489..63f1b94d0ce 100644
--- a/desktop/libs/notebook/src/notebook/connectors/ksql.py
+++ b/desktop/libs/notebook/src/notebook/connectors/ksql.py
@@ -18,22 +18,17 @@
from __future__ import absolute_import
-import logging
-import json
import sys
+import json
+import logging
+
+from django.utils.translation import gettext as _
-from desktop.lib.i18n import force_unicode
from desktop.conf import has_channels
+from desktop.lib.i18n import force_unicode
from kafka.ksql_client import KSqlApi as KSqlClientApi
-
from notebook.connectors.base import Api, QueryError
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -60,11 +55,9 @@ def __init__(self, user, interpreter=None):
self.url = self.options['url']
-
def _get_db(self):
return KSqlClientApi(user=self.user, url=self.url)
-
@query_error_handler
def execute(self, notebook, snippet):
channel_name = notebook.get('editorWsChannel')
@@ -94,12 +87,10 @@ def execute(self, notebook, snippet):
}
}
-
@query_error_handler
def check_status(self, notebook, snippet):
return {'status': 'available'}
-
@query_error_handler
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None):
response = {}
diff --git a/desktop/libs/notebook/src/notebook/connectors/oozie_batch.py b/desktop/libs/notebook/src/notebook/connectors/oozie_batch.py
index d7d2604bfb6..c5af220e5d9 100644
--- a/desktop/libs/notebook/src/notebook/connectors/oozie_batch.py
+++ b/desktop/libs/notebook/src/notebook/connectors/oozie_batch.py
@@ -15,25 +15,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import re
import sys
import time
+import logging
-from django.urls import reverse
from django.http import QueryDict
+from django.urls import reverse
+from django.utils.translation import gettext as _
from desktop.lib.exceptions_renderable import PopupException
from desktop.models import Document2
-
from notebook.connectors.base import Api, QueryError
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -64,7 +58,6 @@ def __init__(self, *args, **kwargs):
self.fs = self.request.fs
self.jt = self.request.jt
-
def execute(self, notebook, snippet):
# Get document from notebook
if not notebook.get('uuid', ''):
@@ -90,7 +83,6 @@ def execute(self, notebook, snippet):
'has_result_set': True,
}
-
def check_status(self, notebook, snippet):
response = {'status': 'running'}
@@ -115,7 +107,6 @@ def check_status(self, notebook, snippet):
return response
-
def fetch_result(self, notebook, snippet, rows, start_over):
log_output = self.get_log(notebook, snippet)
results = self._get_results(log_output, snippet['type'])
@@ -127,7 +118,6 @@ def fetch_result(self, notebook, snippet, rows, start_over):
'has_more': False,
}
-
def cancel(self, notebook, snippet):
job_id = snippet['result']['handle']['id']
@@ -138,7 +128,6 @@ def cancel(self, notebook, snippet):
return {'status': 0}
-
def get_log(self, notebook, snippet, startFrom=0, size=None):
job_id = snippet['result']['handle']['id']
@@ -147,14 +136,12 @@ def get_log(self, notebook, snippet, startFrom=0, size=None):
return logs if logs else oozie_job.log
-
def progress(self, notebook, snippet, logs=None):
job_id = snippet['result']['handle']['id']
oozie_job = check_job_access_permission(self.request, job_id)
return oozie_job.get_progress()
-
def get_jobs(self, notebook, snippet, logs):
jobs = []
job_id = snippet['result']['handle']['id']
@@ -171,15 +158,12 @@ def get_jobs(self, notebook, snippet, logs):
})
return jobs
-
def close_statement(self, notebook, snippet):
pass
-
def close_session(self, session):
pass
-
def _get_log_output(self, oozie_workflow):
log_output = ''
q = self.request.GET.copy()
@@ -204,7 +188,6 @@ def _get_log_output(self, oozie_workflow):
attempts += 1
return log_output
-
def _get_results(self, log_output, action_type):
results = ''
diff --git a/desktop/libs/notebook/src/notebook/connectors/rdbms.py b/desktop/libs/notebook/src/notebook/connectors/rdbms.py
index c68865d512d..9d052ab907b 100644
--- a/desktop/libs/notebook/src/notebook/connectors/rdbms.py
+++ b/desktop/libs/notebook/src/notebook/connectors/rdbms.py
@@ -15,19 +15,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import next
-from builtins import object
-import logging
import sys
-
-from desktop.lib.i18n import force_unicode
+import logging
+from builtins import next, object
from beeswax import data_export
+from desktop.lib.i18n import force_unicode
from librdbms.server import dbms
-
from notebook.connectors.base import Api, QueryError, QueryExpired, _get_snippet_name
-
LOG = logging.getLogger()
@@ -40,10 +36,7 @@ def decorator(*args, **kwargs):
if 'Invalid query handle' in message or 'Invalid OperationHandle' in message:
raise QueryExpired(e)
else:
- if sys.version_info[0] > 2:
- raise QueryError(message).with_traceback(sys.exc_info()[2])
- else:
- raise QueryError, message, sys.exc_info()[2]
+ raise QueryError(message).with_traceback(sys.exc_info()[2])
return decorator
@@ -80,12 +73,10 @@ def execute(self, notebook, snippet):
}
}
-
@query_error_handler
def check_status(self, notebook, snippet):
return {'status': 'expired'}
-
@query_error_handler
def fetch_result(self, notebook, snippet, rows, start_over):
return {
@@ -95,27 +86,22 @@ def fetch_result(self, notebook, snippet, rows, start_over):
'type': 'table'
}
-
@query_error_handler
def fetch_result_metadata(self):
pass
-
@query_error_handler
def cancel(self, notebook, snippet):
return {'status': 0}
-
@query_error_handler
def get_log(self, notebook, snippet, startFrom=None, size=None):
return 'No logs'
-
@query_error_handler
def close_statement(self, notebook, snippet):
return {'status': -1}
-
@query_error_handler
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None):
query_server = self._get_query_server()
@@ -143,7 +129,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N
response['status'] = 0
return response
-
@query_error_handler
def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None):
query_server = self._get_query_server()
diff --git a/desktop/libs/notebook/src/notebook/connectors/solr.py b/desktop/libs/notebook/src/notebook/connectors/solr.py
index 53f350e05bd..fb5b14173ff 100644
--- a/desktop/libs/notebook/src/notebook/connectors/solr.py
+++ b/desktop/libs/notebook/src/notebook/connectors/solr.py
@@ -15,23 +15,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
import sys
+import logging
+from builtins import object
+
+from django.utils.translation import gettext as _
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.i18n import force_unicode
from indexer.solr_client import SolrClient
-
from notebook.connectors.base import Api, QueryError
from notebook.models import escape_rows
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -71,7 +66,7 @@ def execute(self, notebook, snippet):
response = api.sql(collection, snippet['statement'])
- info = response['result-set']['docs'].pop(-1) # EOF, RESPONSE_TIME, EXCEPTION
+ info = response['result-set']['docs'].pop(-1) # EOF, RESPONSE_TIME, EXCEPTION
if info.get('EXCEPTION'):
raise QueryError(info['EXCEPTION'])
@@ -103,12 +98,10 @@ def execute(self, notebook, snippet):
'statements_count': 1
}
-
@query_error_handler
def check_status(self, notebook, snippet):
return {'status': 'available'}
-
@query_error_handler
def fetch_result(self, notebook, snippet, rows, start_over):
return {
@@ -118,17 +111,14 @@ def fetch_result(self, notebook, snippet, rows, start_over):
'type': 'table'
}
-
@query_error_handler
def fetch_result_metadata(self):
pass
-
@query_error_handler
def cancel(self, notebook, snippet):
return {'status': 0}
-
@query_error_handler
def get_log(self, notebook, snippet, startFrom=None, size=None):
return 'No logs'
@@ -137,7 +127,6 @@ def get_log(self, notebook, snippet, startFrom=None, size=None):
def close_statement(self, notebook, snippet):
return {'status': -1}
-
@query_error_handler
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None):
from search.conf import SOLR_URL
@@ -157,7 +146,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N
response['status'] = 0
return response
-
@query_error_handler
def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None):
from search.conf import SOLR_URL
@@ -203,7 +191,13 @@ def get_tables(self, database, table_names=[]):
]
def get_columns(self, database, table):
- return [{'name': field['name'], 'type': field['type'], 'comment': '', 'primary_key': field.get('primary_key')} for field in self.db.schema_fields(table)['fields']]
+ return [{
+ 'name': field['name'],
+ 'type': field['type'],
+ 'comment': '',
+ 'primary_key': field.get('primary_key')
+ } for field in self.db.schema_fields(table)['fields']
+ ]
def get_sample_data(self, database, table, column=None):
# Note: currently ignores dynamic fields
diff --git a/desktop/libs/notebook/src/notebook/connectors/spark_shell.py b/desktop/libs/notebook/src/notebook/connectors/spark_shell.py
index 89d9993ee68..292c47f3edd 100644
--- a/desktop/libs/notebook/src/notebook/connectors/spark_shell.py
+++ b/desktop/libs/notebook/src/notebook/connectors/spark_shell.py
@@ -15,31 +15,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import range, object
-import logging
import re
import sys
+import json
import time
+import logging
import textwrap
-import json
+from builtins import object, range
-from beeswax.server.dbms import Table
+from django.utils.translation import gettext as _
+from beeswax.server.dbms import Table
+from desktop.auth.backend import rewrite_user
from desktop.conf import USE_DEFAULT_CONFIGURATION
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.i18n import force_unicode
from desktop.lib.rest.http_client import RestException
from desktop.models import DefaultConfiguration
-from desktop.auth.backend import rewrite_user
-
-from notebook.data_export import download as spark_download
from notebook.connectors.base import Api, QueryError, SessionExpired, _get_snippet_session
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
+from notebook.data_export import download as spark_download
LOG = logging.getLogger()
@@ -52,21 +46,19 @@
SESSION_KEY = '%(username)s-%(interpreter_name)s'
+
class SparkApi(Api):
- SPARK_UI_RE = re.compile("Started SparkUI at (http[s]?://([0-9a-zA-Z-_\.]+):(\d+))")
+ SPARK_UI_RE = re.compile(r"Started SparkUI at (http[s]?://([0-9a-zA-Z-_\.]+):(\d+))")
YARN_JOB_RE = re.compile("tracking URL: (http[s]?://.+/)")
- STANDALONE_JOB_RE = re.compile("Got job (\d+)")
-
+ STANDALONE_JOB_RE = re.compile(r"Got job (\d+)")
def __init__(self, user, interpreter):
super(SparkApi, self).__init__(user=user, interpreter=interpreter)
-
def get_api(self):
return get_spark_api(self.user, self.interpreter)
-
@staticmethod
def get_livy_props(lang, properties=None):
props = dict([(p['name'], p['value']) for p in SparkConfiguration.PROPERTIES])
@@ -100,7 +92,6 @@ def get_livy_props(lang, properties=None):
return props
-
@staticmethod
def to_properties(props=None):
properties = list()
@@ -114,14 +105,12 @@ def to_properties(props=None):
return properties
-
def _get_session_key(self):
return SESSION_KEY % {
'username': self.user.username if hasattr(self.user, 'username') else self.user,
'interpreter_name': self.interpreter['name']
}
-
def _check_session(self, session):
'''
Check if the session is actually present and its state is healthy.
@@ -135,7 +124,6 @@ def _check_session(self, session):
if session_present and session_present['state'] not in ('dead', 'shutting_down', 'error', 'killed'):
return session_present
-
def create_session(self, lang='scala', properties=None):
api = self.get_api()
stored_session_info = self._get_session_info_from_user()
@@ -174,7 +162,6 @@ def create_session(self, lang='scala', properties=None):
self._set_session_info_to_user(new_session_info)
return new_session_info
-
def execute(self, notebook, snippet):
api = self.get_api()
@@ -183,7 +170,6 @@ def execute(self, notebook, snippet):
response = self._execute(api, session, snippet.get('type'), snippet['statement'])
return response
-
def _execute(self, api, session, snippet_type, statement):
if not session or not self._check_session(session):
stored_session_info = self._get_session_info_from_user()
@@ -201,12 +187,11 @@ def _execute(self, api, session, snippet_type, statement):
}
except Exception as e:
message = force_unicode(str(e)).lower()
- if re.search("session ('\d+' )?not found", message) or 'connection refused' in message or 'session is in state busy' in message:
+ if re.search(r"session ('\d+' )?not found", message) or 'connection refused' in message or 'session is in state busy' in message:
raise SessionExpired(e)
else:
raise e
-
def check_status(self, notebook, snippet):
api = self.get_api()
session = _get_snippet_session(notebook, snippet)
@@ -221,12 +206,11 @@ def check_status(self, notebook, snippet):
}
except Exception as e:
message = force_unicode(str(e)).lower()
- if re.search("session ('\d+' )?not found", message):
+ if re.search(r"session ('\d+' )?not found", message):
raise SessionExpired(e)
else:
raise e
-
def fetch_result(self, notebook, snippet, rows, start_over=False):
api = self.get_api()
session = _get_snippet_session(notebook, snippet)
@@ -243,13 +227,12 @@ def fetch_result(self, notebook, snippet, rows, start_over=False):
return response
-
def _fetch_result(self, api, session, cell):
try:
response = api.fetch_data(session['id'], cell)
except Exception as e:
message = force_unicode(str(e))
- if re.search("session ('\d+' )?not found", message):
+ if re.search(r"session ('\d+' )?not found", message):
raise SessionExpired(e)
else:
raise PopupException(_(message))
@@ -301,13 +284,12 @@ def _fetch_result(self, api, session, cell):
raise QueryError(msg)
-
def _handle_result_data(self, result, is_complex_type=False):
"""
Parse the data from the 'result' dict based on whether it has complex datatypes or not.
- If the 'is_complex_type' flag is True, it parses the result dict, checking for 'schema' and 'values'
- and if found, formatting them into a appropriate result data dictionary representing that result column.
+ If the 'is_complex_type' flag is True, it parses the result dict, checking for 'schema' and 'values'
+ and if found, formatting them into a appropriate result data dictionary representing that result column.
If the flag is False, it simply returns the 'data' as is.
Args:
@@ -337,9 +319,8 @@ def _handle_result_data(self, result, is_complex_type=False):
else:
# If the query result is not having complex datatype, return the 'data' as it is.
data = result['data']
-
- return data
+ return data
def _handle_result_meta(self, result):
meta = []
@@ -357,9 +338,8 @@ def _handle_result_meta(self, result):
meta.append({'name': f['name'], 'type': complex_type, 'comment': ''})
else:
meta.append({'name': f['name'], 'type': f['type'], 'comment': ''})
-
- return meta, is_complex_type
+ return meta, is_complex_type
def cancel(self, notebook, snippet):
api = self.get_api()
@@ -375,7 +355,6 @@ def cancel(self, notebook, snippet):
return {'status': 0}
-
def get_log(self, notebook, snippet, startFrom=0, size=None):
response = {'status': 0}
api = self.get_api()
@@ -389,7 +368,6 @@ def get_log(self, notebook, snippet, startFrom=0, size=None):
LOG.debug(message)
return response
-
def _handle_session_health_check(self, session):
if not session or not self._check_session(session):
@@ -398,14 +376,12 @@ def _handle_session_health_check(self, session):
session = stored_session_info
else:
raise PopupException(_("Session error. Please create new session and try again."))
-
- return session
+ return session
- def close_statement(self, notebook, snippet): # Individual statements cannot be closed
+ def close_statement(self, notebook, snippet): # Individual statements cannot be closed
pass
-
def close_session(self, session):
api = self.get_api()
@@ -417,7 +393,7 @@ def close_session(self, session):
'status': 0
}
except RestException as e:
- if e.code == 404 or e.code == 500: # TODO remove the 500
+ if e.code == 404 or e.code == 500: # TODO remove the 500
raise SessionExpired(e)
finally:
stored_session_info = self._get_session_info_from_user()
@@ -426,7 +402,6 @@ def close_session(self, session):
else:
return {'status': -1}
-
def get_jobs(self, notebook, snippet, logs):
if self._is_yarn_mode():
# Tracking URL is found at the start of the logs
@@ -435,7 +410,6 @@ def get_jobs(self, notebook, snippet, logs):
else:
return self._get_standalone_jobs(logs)
-
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None):
response = {}
# As booting a new SQL session is slow and we don't send the id of the current one in /autocomplete
@@ -446,7 +420,7 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N
# Calling the method here since this /autocomplete call can be frequent enough and we dont need dedicated one.
if self._get_session_info_from_user():
self._close_unused_sessions(snippet.get('type'))
-
+
stored_session_info = self._get_session_info_from_user()
if stored_session_info and self._check_session(stored_session_info):
session = stored_session_info
@@ -470,7 +444,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N
return response
-
def _close_unused_sessions(self, session_type):
'''
Closes all unused Livy sessions for a particular user to free up session resources.
@@ -493,7 +466,6 @@ def _close_unused_sessions(self, session_type):
session['kind'] == session_type and session['state'] in ('idle', 'shutting_down', 'error', 'dead', 'killed'):
self.close_session(session)
-
def _check_status_and_fetch_result(self, api, session, execute_resp):
check_status = api.fetch_data(session['id'], execute_resp['id'])
@@ -506,7 +478,6 @@ def _check_status_and_fetch_result(self, api, session, execute_resp):
if check_status['state'] == 'available':
return self._fetch_result(api, session, execute_resp['id'])
-
def _show_databases(self, api, session, snippet_type):
show_db_execute = self._execute(api, session, snippet_type, 'SHOW DATABASES')
db_list = self._check_status_and_fetch_result(api, session, show_db_execute)
@@ -514,7 +485,6 @@ def _show_databases(self, api, session, snippet_type):
if db_list:
return [db[0] for db in db_list['data']]
-
def _show_tables(self, api, session, snippet_type, database):
use_db_execute = self._execute(api, session, snippet_type, 'USE %(database)s' % {'database': database})
use_db_resp = self._check_status_and_fetch_result(api, session, use_db_execute)
@@ -525,7 +495,6 @@ def _show_tables(self, api, session, snippet_type, database):
if tables_list:
return [table[1] for table in tables_list['data']]
-
def _get_columns(self, api, session, snippet_type, database, table):
use_db_execute = self._execute(api, session, snippet_type, 'USE %(database)s' % {'database': database})
use_db_resp = self._check_status_and_fetch_result(api, session, use_db_execute)
@@ -550,7 +519,6 @@ def _get_columns(self, api, session, snippet_type, database, table):
return cols
-
def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None):
api = self.get_api()
response = {
@@ -578,7 +546,6 @@ def get_sample_data(self, snippet, database=None, table=None, column=None, is_as
if stat.get('data_type') and stat['data_type'] == 'transactional' and stat.get('col_name'):
return response
-
statement = self._get_select_query(database, table, column, operation)
sample_execute = self._execute(api, session, snippet.get('type'), statement)
@@ -589,11 +556,9 @@ def get_sample_data(self, snippet, database=None, table=None, column=None, is_as
return response
-
def get_browse_query(self, snippet, database, table, partition_spec=None):
return self._get_select_query(database, table)
-
def _get_select_query(self, database, table, column=None, operation=None, limit=100):
if operation == 'hello':
statement = "SELECT 'Hello World!'"
@@ -612,7 +577,6 @@ def _get_select_query(self, database, table, column=None, operation=None, limit=
return statement
-
def describe_table(self, notebook, snippet, database=None, table=None):
api = self.get_api()
@@ -644,7 +608,6 @@ def describe_table(self, notebook, snippet, database=None, table=None):
'stats': tb.stats
}
-
def describe_database(self, notebook, snippet, database=None):
response = {'status': 0}
api = self.get_api()
@@ -675,7 +638,6 @@ def describe_database(self, notebook, snippet, database=None):
return response
-
def _get_standalone_jobs(self, logs):
job_ids = set([])
@@ -700,7 +662,6 @@ def _get_standalone_jobs(self, logs):
return jobs
-
def _get_yarn_jobs(self, logs):
tracking_urls = set([])
@@ -716,11 +677,9 @@ def _get_yarn_jobs(self, logs):
return jobs
-
def _is_yarn_mode(self):
return LIVY_SERVER_SESSION_KIND.get() == "yarn"
-
def _get_session_info_from_user(self):
self.user = rewrite_user(self.user)
session_key = self._get_session_key()
@@ -728,7 +687,6 @@ def _get_session_info_from_user(self):
if self.user.profile.data.get(session_key):
return self.user.profile.data[session_key]
-
def _set_session_info_to_user(self, session_info):
self.user = rewrite_user(self.user)
session_key = self._get_session_key()
@@ -736,7 +694,6 @@ def _set_session_info_to_user(self, session_info):
self.user.profile.update_data({session_key: session_info})
self.user.profile.save()
-
def _remove_session_info_from_user(self):
self.user = rewrite_user(self.user)
session_key = self._get_session_key()
@@ -745,7 +702,7 @@ def _remove_session_info_from_user(self):
json_data = self.user.profile.data
json_data.pop(session_key)
self.user.profile.json_data = json.dumps(json_data)
-
+
self.user.profile.save()
@@ -767,7 +724,7 @@ def __init__(self, desc_results):
self.stats = []
self.cols = []
self.partition_keys = []
- self.primary_keys = [] # Not implemented
+ self.primary_keys = [] # Not implemented
self.is_view = False
self._details = None
@@ -808,7 +765,7 @@ def handle_describe_format(self):
})
if d[0] == 'Table':
- self.name = d[1]
+ self.name = d[1]
elif d[0] == 'Type':
if 'view' in d[1].lower():
self.is_view = True
diff --git a/desktop/libs/notebook/src/notebook/connectors/spark_shell_tests.py b/desktop/libs/notebook/src/notebook/connectors/spark_shell_tests.py
index 1d96e328b72..d172385a227 100644
--- a/desktop/libs/notebook/src/notebook/connectors/spark_shell_tests.py
+++ b/desktop/libs/notebook/src/notebook/connectors/spark_shell_tests.py
@@ -15,20 +15,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import pytest
import sys
-
from builtins import object
+from unittest.mock import Mock, patch
-from desktop.lib.django_test_util import make_logged_in_client
-from useradmin.models import User
+import pytest
+from desktop.lib.django_test_util import make_logged_in_client
from notebook.connectors.spark_shell import SparkApi
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock
-else:
- from mock import patch, Mock
+from useradmin.models import User
@pytest.mark.django_db
@@ -46,7 +41,6 @@ def setup_method(self):
}
self.api = SparkApi(self.user, self.interpreter)
-
def test_get_api(self):
lang = 'pyspark'
properties = None
@@ -55,7 +49,6 @@ def test_get_api(self):
spark_api = self.api.get_api()
assert spark_api.__class__.__name__ == 'LivyClient'
-
def test_get_livy_props_method(self):
test_properties = [{
"name": "files",
@@ -64,7 +57,6 @@ def test_get_livy_props_method(self):
props = self.api.get_livy_props('scala', test_properties)
assert props['files'] == ['file_a', 'file_b', 'file_c']
-
def test_create_session_with_config(self):
lang = 'pyspark'
properties = None
@@ -128,7 +120,6 @@ def test_create_session_with_config(self):
cores = p['value']
assert cores == 1
-
def test_create_session_plain(self):
lang = 'pyspark'
properties = None
@@ -153,7 +144,6 @@ def test_create_session_plain(self):
assert files_properties, session['properties']
assert files_properties[0]['value'] == [], session['properties']
-
def test_execute(self):
with patch('notebook.connectors.spark_shell._get_snippet_session') as _get_snippet_session:
with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
@@ -177,7 +167,6 @@ def test_execute(self):
with pytest.raises(Exception):
self.api.execute(notebook, snippet)
-
def test_handle_result_data(self):
# When result data has no complex type.
data = {
@@ -216,7 +205,6 @@ def test_handle_result_data(self):
processed_data = self.api._handle_result_data(data, is_complex_type=True)
assert processed_data == [['0', 535.0, {'site_id': 'BEB'}, {'c_id': 'EF'}, '2023-06-16T23:53:31Z']]
-
def test_check_status(self):
with patch('notebook.connectors.spark_shell._get_snippet_session') as _get_snippet_session:
with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api:
@@ -245,7 +233,6 @@ def test_check_status(self):
)
with pytest.raises(Exception):
self.api.check_status(notebook, snippet)
-
def test_get_sample_data(self):
snippet = Mock()
@@ -282,14 +269,13 @@ def test_get_sample_data(self):
# When table is not transactional
self.api.describe_table = Mock(
return_value={
- 'stats': [] # No details regarding transactionality is present in describe response
+ 'stats': [] # No details regarding transactionality is present in describe response
}
)
response = self.api.get_sample_data(snippet, 'test_db', 'test_table', 'test_column')
assert response['rows'] == 'test_data'
assert response['full_headers'] == 'test_meta'
-
def test_get_select_query(self):
# With operation as 'hello'
@@ -304,7 +290,6 @@ def test_get_select_query(self):
response = self.api._get_select_query('test_db', 'test_table', 'test_column')
assert response == 'SELECT test_column\nFROM test_db.test_table\nLIMIT 100\n'
-
def test_describe_database(self):
notebook = Mock()
snippet = Mock()
@@ -340,7 +325,6 @@ def test_describe_database(self):
'parameters': '{Create-by=Kevin, Create-date=09/01/2019}',
'status': 0}
-
def test_describe_table(self):
notebook = Mock()
snippet = Mock()
@@ -501,7 +485,6 @@ def test_describe_table(self):
'data_type': 'transient_lastDdlTime'}],
'status': 0}
-
def test_get_jobs(self):
local_jobs = [
{'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'}
@@ -516,7 +499,6 @@ def test_get_jobs(self):
assert jobs == yarn_jobs, jobs
-
LIVY_STANDALONE_LOG = """
Starting livy-repl on http://172.21.1.246:58449
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
@@ -592,7 +574,7 @@ def test_get_jobs(self):
15/10/05 14:02:37 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
15/10/05 14:02:37 INFO DAGScheduler: ShuffleMapStage 0 (reduceByKey at :1) finished in 0.973 s
15/10/05 14:02:37 INFO DAGScheduler: looking for newly runnable stages
-"""
+""" # noqa: E501
LIVY_YARN_LOG = """
15/10/05 13:51:21 INFO client.RMProxy: Connecting to ResourceManager at huetest-1.test.com/175.18.213.12:8032
15/10/05 13:51:21 INFO yarn.Client: Requesting a new application from cluster with 3 NodeManagers
@@ -645,4 +627,4 @@ def test_get_jobs(self):
tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/
user: huetest
15/10/05 13:52:24 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING)
-"""
+""" # noqa: E501
diff --git a/desktop/libs/notebook/src/notebook/connectors/sql_alchemy.py b/desktop/libs/notebook/src/notebook/connectors/sql_alchemy.py
index 67b8bb9c942..dc5dccbd23a 100644
--- a/desktop/libs/notebook/src/notebook/connectors/sql_alchemy.py
+++ b/desktop/libs/notebook/src/notebook/connectors/sql_alchemy.py
@@ -49,47 +49,36 @@
Each query statement grabs a connection from the engine and will return it after its close().
Disposing the engine closes all its connections.
'''
-from future import standard_library
-standard_library.install_aliases()
-from builtins import next, object
-import datetime
-import json
-import logging
-import uuid
import re
import sys
+import json
+import uuid
+import logging
+import datetime
import textwrap
-
from string import Template
+from urllib.parse import parse_qs as urllib_parse_qs, quote_plus as urllib_quote_plus, urlparse as urllib_urlparse
from django.core.cache import caches
-from sqlalchemy import create_engine, inspect, Table, MetaData
-from sqlalchemy.exc import OperationalError, UnsupportedCompilationError, CompileError, ProgrammingError, NoSuchTableError
+from django.utils.translation import gettext as _
+from past.builtins import long
+from sqlalchemy import MetaData, Table, create_engine, inspect
+from sqlalchemy.exc import CompileError, NoSuchTableError, OperationalError, ProgrammingError, UnsupportedCompilationError
+from beeswax import data_export
from desktop.lib import export_csvxls
from desktop.lib.i18n import force_unicode
-from beeswax import data_export
from librdbms.server import dbms
-
-from notebook.connectors.base import Api, QueryError, QueryExpired, _get_snippet_name, AuthenticationRequired
+from notebook.connectors.base import Api, AuthenticationRequired, QueryError, QueryExpired, _get_snippet_name
from notebook.models import escape_rows
-if sys.version_info[0] > 2:
- from urllib.parse import quote_plus as urllib_quote_plus, urlparse as urllib_urlparse, parse_qs as urllib_parse_qs
- from past.builtins import long
- from io import StringIO
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
- from urllib import quote_plus as urllib_quote_plus, urlparse as urllib_urlparse, parse_qs as urllib_parse_qs
- from cStringIO import StringIO
-
ENGINES = {}
CONNECTIONS = {}
ENGINE_KEY = '%(username)s-%(connector_name)s'
URL_PATTERN = '(?P.+?://)(?P[^:/ ]+):(?P[0-9]*).*'
+
LOG = logging.getLogger()
@@ -99,7 +88,7 @@ def decorator(*args, **kwargs):
return func(*args, **kwargs)
except OperationalError as e:
message = str(e)
- if '1045' in message: # 'Access denied' # MySQL
+ if '1045' in message: # 'Access denied' # MySQL
raise AuthenticationRequired(message=message)
else:
raise e
@@ -215,7 +204,6 @@ def _create_engine(self):
return create_engine(url, **options)
-
def _get_session(self, notebook, snippet):
for session in notebook['sessions']:
if session['type'] == snippet['type']:
@@ -223,7 +211,6 @@ def _get_session(self, notebook, snippet):
return None
-
def _create_connection(self, engine):
connection = None
try:
@@ -236,7 +223,6 @@ def _create_connection(self, engine):
return connection
-
@query_error_handler
def execute(self, notebook, snippet):
guid = uuid.uuid4().hex
@@ -278,11 +264,11 @@ def execute(self, notebook, snippet):
response = {
'sync': False,
- 'has_result_set': result.cursor != None,
+ 'has_result_set': result.cursor is not None,
'modified_row_count': 0,
'guid': guid,
'result': {
- 'has_more': result.cursor != None,
+ 'has_more': result.cursor is not None,
'data': [],
'meta': cache['meta'],
'type': 'table'
@@ -292,7 +278,6 @@ def execute(self, notebook, snippet):
return response
-
@query_error_handler
def explain(self, notebook, snippet):
session = self._get_session(notebook, snippet)
@@ -310,7 +295,7 @@ def explain(self, notebook, snippet):
explanation = ''
else:
try:
- result = connection.execute('EXPLAIN '+ statement)
+ result = connection.execute('EXPLAIN ' + statement)
explanation = "\n".join("{}: {},".format(k, v) for row in result for k, v in row.items())
except ProgrammingError:
pass
@@ -323,7 +308,6 @@ def explain(self, notebook, snippet):
'statement': statement
}
-
@query_error_handler
def check_status(self, notebook, snippet):
guid = snippet.get('result', {}).get('handle', {}).get('guid')
@@ -344,7 +328,6 @@ def check_status(self, notebook, snippet):
return response
-
@query_error_handler
def progress(self, notebook, snippet, logs=''):
progress = 50
@@ -363,7 +346,6 @@ def progress(self, notebook, snippet, logs=''):
progress = stats.get('completedSplits', 0) * 100 // stats.get('totalSplits', 1)
return progress
-
@query_error_handler
def fetch_result(self, notebook, snippet, rows, start_over):
guid = snippet['result']['handle']['guid']
@@ -383,7 +365,6 @@ def fetch_result(self, notebook, snippet, rows, start_over):
'type': 'table'
}
-
def _assign_types(self, results, meta):
result = results and results[0]
if result:
@@ -401,17 +382,14 @@ def _assign_types(self, results, meta):
else:
meta[index]['type'] = 'STRING_TYPE'
-
@query_error_handler
def fetch_result_metadata(self):
pass
-
@query_error_handler
def cancel(self, notebook, snippet):
return self.close_statement(notebook, snippet)
-
@query_error_handler
def get_log(self, notebook, snippet, startFrom=None, size=None):
guid = snippet['result']['handle']['guid']
@@ -433,12 +411,10 @@ def close_statement(self, notebook, snippet):
finally:
return result
-
def close_session(self, session):
engine = self._get_engine()
engine.dispose() # ENGINE_KEY currently includes the current user
-
@query_error_handler
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None):
engine = self._get_engine()
@@ -486,7 +462,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N
response['status'] = 0
return response
-
@query_error_handler
def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None):
engine = self._get_engine()
@@ -530,7 +505,6 @@ def get_browse_query(self, snippet, database, table, partition_spec=None):
'backticks': self.backticks
})
-
def _get_column_type_name(self, col):
try:
name = str(col.get('type'))
@@ -539,7 +513,6 @@ def _get_column_type_name(self, col):
return name
-
def _fix_bigquery_db_prefixes(self, table_or_column):
if self.options['url'].startswith('bigquery://'):
table_or_column = table_or_column.rsplit('.', 1)[-1]
diff --git a/desktop/libs/notebook/src/notebook/connectors/sql_alchemy_tests.py b/desktop/libs/notebook/src/notebook/connectors/sql_alchemy_tests.py
index fe8f785d1fd..20cab695a22 100644
--- a/desktop/libs/notebook/src/notebook/connectors/sql_alchemy_tests.py
+++ b/desktop/libs/notebook/src/notebook/connectors/sql_alchemy_tests.py
@@ -16,27 +16,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from builtins import object
-import logging
-import pytest
import sys
+import logging
+from builtins import object
+from unittest.mock import MagicMock, Mock, patch
+import pytest
from sqlalchemy.exc import UnsupportedCompilationError
-from sqlalchemy.types import NullType, ARRAY, JSON, VARCHAR
+from sqlalchemy.types import ARRAY, JSON, VARCHAR, NullType
from desktop.auth.backend import rewrite_user
from desktop.lib.django_test_util import make_logged_in_client
-from useradmin.models import User
-
from notebook.connectors.base import AuthenticationRequired
-from notebook.connectors.sql_alchemy import SqlAlchemyApi, Assist
-
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock, MagicMock
-else:
- from mock import patch, Mock, MagicMock
-
+from notebook.connectors.sql_alchemy import Assist, SqlAlchemyApi
+from useradmin.models import User
LOG = logging.getLogger()
@@ -55,7 +48,6 @@ def setup_method(self):
},
}
-
def test_column_backticks_escaping(self):
interpreter = {
'name': 'hive',
@@ -73,7 +65,6 @@ def test_column_backticks_escaping(self):
}
assert SqlAlchemyApi(self.user, interpreter).backticks == '"'
-
def test_create_athena_engine(self):
interpreter = {
'name': 'hive',
@@ -86,7 +77,6 @@ def test_create_athena_engine(self):
with patch('notebook.connectors.sql_alchemy.create_engine') as create_engine:
SqlAlchemyApi(self.user, interpreter)._create_engine()
-
def test_fetch_result_empty(self):
notebook = Mock()
snippet = {'result': {'handle': {'guid': 'guid-1'}}}
@@ -97,7 +87,7 @@ def test_fetch_result_empty(self):
CONNECTIONS.get = Mock(
return_value={
'result': Mock(
- fetchmany=Mock(return_value=[]) # We have 0 rows
+ fetchmany=Mock(return_value=[]) # We have 0 rows
),
'meta': MagicMock(
__getitem__=Mock(return_value={'type': 'BIGINT_TYPE'}),
@@ -110,12 +100,11 @@ def test_fetch_result_empty(self):
assert not data['has_more']
assert data['has_more'] != []
- assert data['has_more'] == False
+ assert data['has_more'] is False
assert data['data'] == []
assert data['meta']() == [{'type': 'BIGINT_TYPE'}]
-
def test_fetch_result_rows(self):
notebook = Mock()
snippet = {'result': {'handle': {'guid': 'guid-1'}}}
@@ -126,7 +115,7 @@ def test_fetch_result_rows(self):
CONNECTIONS.get = Mock(
return_value={
'result': Mock(
- fetchmany=Mock(return_value=[['row1'], ['row2']]) # We have 2 rows
+ fetchmany=Mock(return_value=[['row1'], ['row2']]) # We have 2 rows
),
'meta': MagicMock(
__getitem__=Mock(return_value={'type': 'BIGINT_TYPE'}),
@@ -139,12 +128,11 @@ def test_fetch_result_rows(self):
assert not data['has_more']
assert data['has_more'] != []
- assert data['has_more'] == False
+ assert data['has_more'] is False
assert data['data'] == [['row1'], ['row2']]
assert data['meta']() == [{'type': 'BIGINT_TYPE'}]
-
def test_create_engine_auth_error(self):
interpreter = {
'name': 'hive',
@@ -157,7 +145,6 @@ def test_create_engine_auth_error(self):
with pytest.raises(AuthenticationRequired):
SqlAlchemyApi(self.user, interpreter)._create_engine()
-
def test_create_engine_auth(self):
interpreter = {
'name': 'hive',
@@ -181,7 +168,6 @@ def test_create_engine_auth(self):
with patch('notebook.connectors.sql_alchemy.create_engine') as create_engine:
SqlAlchemyApi(self.user, interpreter)._create_engine()
-
def test_create_connection_error(self):
interpreter = {
'name': 'hive',
@@ -195,7 +181,6 @@ def test_create_connection_error(self):
engine = SqlAlchemyApi(self.user, interpreter)._create_engine()
SqlAlchemyApi(self.user, interpreter)._create_connection(engine)
-
def test_create_connection(self):
interpreter = {
'name': 'hive',
@@ -220,7 +205,6 @@ def test_create_connection(self):
engine = SqlAlchemyApi(self.user, interpreter)._create_engine()
SqlAlchemyApi(self.user, interpreter)._create_connection(engine)
-
def test_create_engine_with_impersonation(self):
interpreter = {
'name': 'hive',
@@ -236,7 +220,6 @@ def test_create_engine_with_impersonation(self):
create_engine.assert_called_with('presto://hue:8080/hue', pool_pre_ping=True)
-
interpreter['options']['has_impersonation'] = True # On
with patch('notebook.connectors.sql_alchemy.create_engine') as create_engine:
@@ -261,7 +244,6 @@ def test_create_engine_with_impersonation_phoenix(self):
create_engine.assert_called_with('phoenix://hue:8080/hue', pool_pre_ping=False)
-
interpreter['options']['has_impersonation'] = True # On
with patch('notebook.connectors.sql_alchemy.create_engine') as create_engine:
@@ -269,7 +251,6 @@ def test_create_engine_with_impersonation_phoenix(self):
create_engine.assert_called_with('phoenix://test@hue:8080/hue', pool_pre_ping=False)
-
def test_explain(self):
with patch('notebook.connectors.sql_alchemy.SqlAlchemyApi._create_connection') as _create_connection:
@@ -291,7 +272,6 @@ def test_explain(self):
assert explanation == response['explanation']
-
def test_check_status(self):
notebook = Mock()
@@ -305,7 +285,6 @@ def test_check_status(self):
response = SqlAlchemyApi(self.user, self.interpreter).check_status(notebook, snippet)
assert response['status'] == 'available'
-
def test_get_sample_data(self):
snippet = Mock()
@@ -321,7 +300,6 @@ def test_get_sample_data(self):
response['full_headers'] ==
[{'name': 'col1', 'type': 'STRING_TYPE', 'comment': ''}])
-
def test_get_tables(self):
snippet = MagicMock()
@@ -338,7 +316,6 @@ def test_get_tables(self):
assert response['tables_meta'][0]['type'] == 'Table'
assert response['tables_meta'][1]['type'] == 'View'
-
def test_get_sample_data_table(self):
snippet = Mock()
@@ -351,7 +328,6 @@ def test_get_sample_data_table(self):
assert response['rows'] == [[1], [2]]
-
def test_dialect_trim_statement_semicolon(self):
interpreter = {
'name': 'presto',
@@ -385,7 +361,6 @@ def test_dialect_trim_statement_semicolon(self):
execute.assert_called_with('SELECT 1')
-
def test_get_log(self):
notebook = Mock()
snippet = MagicMock()
@@ -411,27 +386,23 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False)
self.user = rewrite_user(User.objects.get(username="test"))
-
def test_backticks_with_connectors(self):
interpreter = {'name': 'hive', 'options': {'url': 'dialect://'}, 'dialect_properties': {'sql_identifier_quote': '`'}}
data = SqlAlchemyApi(self.user, interpreter).get_browse_query(snippet=Mock(), database='db1', table='table1')
assert data == 'SELECT *\nFROM `db1`.`table1`\nLIMIT 1000\n'
-
interpreter = {'options': {'url': 'dialect://'}, 'dialect_properties': {'sql_identifier_quote': '"'}}
data = SqlAlchemyApi(self.user, interpreter).get_browse_query(snippet=Mock(), database='db1', table='table1')
assert data == 'SELECT *\nFROM "db1"."table1"\nLIMIT 1000\n'
-
def test_backticks_without_connectors(self):
interpreter = {'name': 'hive', 'options': {'url': 'hive://'}}
data = SqlAlchemyApi(self.user, interpreter).get_browse_query(snippet=Mock(), database='db1', table='table1')
assert data == 'SELECT *\nFROM `db1`.`table1`\nLIMIT 1000\n'
-
interpreter = {'name': 'postgresql', 'options': {'url': 'postgresql://'}}
data = SqlAlchemyApi(self.user, interpreter).get_browse_query(snippet=Mock(), database='db1', table='table1')
@@ -445,7 +416,6 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False)
self.user = rewrite_user(User.objects.get(username="test"))
-
def test_empty_database_names(self):
interpreter = {
'name': 'hive',
@@ -480,6 +450,7 @@ def col1_dict(key):
col1 = MagicMock()
col1.__getitem__.side_effect = col1_dict
col1.get = col1_dict
+
def col2_dict(key):
return {
'name': 'col2',
@@ -543,7 +514,6 @@ def test_get_column_type_name_complex(self):
assert api._get_column_type_name({'type': ARRAY}) == 'array'
assert api._get_column_type_name({'type': JSON}) == 'json'
-
def test_fix_bigquery_db_prefixes(self):
interpreter = {
'name': 'bigquery',
diff --git a/desktop/libs/notebook/src/notebook/connectors/sqlflow.py b/desktop/libs/notebook/src/notebook/connectors/sqlflow.py
index 481c9011706..f1e6c73abc4 100644
--- a/desktop/libs/notebook/src/notebook/connectors/sqlflow.py
+++ b/desktop/libs/notebook/src/notebook/connectors/sqlflow.py
@@ -18,26 +18,20 @@
from __future__ import absolute_import
-import logging
-import json
import os
import sys
+import json
+import logging
import sqlflow
+from django.utils.translation import gettext as _
from sqlflow.rows import Rows
from desktop.lib.i18n import force_unicode
-
from notebook.connectors.base import Api, QueryError
-from notebook.decorators import ssh_error_handler, rewrite_ssh_api_url
+from notebook.decorators import rewrite_ssh_api_url, ssh_error_handler
from notebook.models import escape_rows
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -62,12 +56,10 @@ def __init__(self, user, interpreter=None):
if self.options.get('has_ssh'):
self.url = rewrite_ssh_api_url(self.url)['url']
-
def _get_db(self):
os.environ['SQLFLOW_DATASOURCE'] = self.interpreter['options']['datasource']
return sqlflow.Client(server_url='172.18.1.3:50051') # TODO Send as param instead of ENV
-
@query_error_handler
@ssh_error_handler
def execute(self, notebook, snippet):
@@ -98,7 +90,6 @@ def execute(self, notebook, snippet):
}
}
-
def _execute(self, statement):
db = self._get_db()
@@ -124,12 +115,10 @@ def _execute(self, statement):
'description': description,
}
-
@query_error_handler
def check_status(self, notebook, snippet):
return {'status': 'available'}
-
@query_error_handler
@ssh_error_handler
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None):
@@ -175,7 +164,6 @@ def get_sample_data(self, snippet, database=None, table=None, column=None, is_as
return response
-
def fetch_result(self, notebook, snippet, rows, start_over):
"""Only called at the end of a live query."""
return {
diff --git a/desktop/libs/notebook/src/notebook/decorators.py b/desktop/libs/notebook/src/notebook/decorators.py
index 78d16abe2e8..7d59b46a61c 100644
--- a/desktop/libs/notebook/src/notebook/decorators.py
+++ b/desktop/libs/notebook/src/notebook/decorators.py
@@ -15,36 +15,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from past.builtins import basestring
-import json
-import logging
-import math
import re
import sys
+import json
+import math
+import logging
from django.forms import ValidationError
from django.http import Http404
from django.utils.functional import wraps
+from django.utils.translation import gettext as _
+from past.builtins import basestring
from dashboard.models import extract_solr_exception_message
from desktop.conf import ENABLE_HUE_5
from desktop.lib.django_util import JsonResponse
from desktop.lib.exceptions_renderable import PopupException
-from desktop.lib.i18n import smart_unicode
+from desktop.lib.i18n import smart_str
from desktop.lib.rest.http_client import RestException
-from desktop.models import Document2, Document, FilesystemException
-
+from desktop.models import Document, Document2, FilesystemException
from notebook.conf import check_has_missing_permission
-from notebook.connectors.base import QueryExpired, QueryError, SessionExpired, AuthenticationRequired, OperationTimeout, \
- OperationNotSupported
+from notebook.connectors.base import (
+ AuthenticationRequired,
+ OperationNotSupported,
+ OperationTimeout,
+ QueryError,
+ QueryExpired,
+ SessionExpired,
+)
from notebook.models import _get_editor_type
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -55,7 +55,7 @@ def decorate(request, *args, **kwargs):
editor_type = request.GET.get('type', 'hive')
gist_id = request.POST.get('gist')
- if editor_type == 'gist' or gist_id: # Gist don't have permissions currently
+ if editor_type == 'gist' or gist_id: # Gist don't have permissions currently
pass
else:
if editor_id: # Open existing saved editor document
@@ -144,7 +144,7 @@ def wrapper(*args, **kwargs):
except QueryError as e:
LOG.exception('Error running %s' % f.__name__)
response['status'] = 1
- response['message'] = smart_unicode(e)
+ response['message'] = smart_str(e)
if response['message'].index("max_row_size"):
size = re.search(r"(\d+.?\d*) (.B)", response['message'])
if size and size.group(1):
@@ -168,16 +168,18 @@ def wrapper(*args, **kwargs):
except Exception as e:
LOG.exception('Error running %s' % f.__name__)
response['status'] = -1
- response['message'] = smart_unicode(e)
+ response['message'] = smart_str(e)
finally:
if response:
return JsonResponse(response)
return wrapper
+
def _closest_power_of_2(number):
return math.pow(2, math.ceil(math.log(number, 2)))
+
def _to_size_in_bytes(size, unit):
unit_size = 1
unit = unit.upper()
@@ -192,6 +194,7 @@ def _to_size_in_bytes(size, unit):
return float(size) * unit_size
+
def json_error_handler(view_fn):
def decorator(*args, **kwargs):
try:
diff --git a/desktop/libs/notebook/src/notebook/management/commands/notebook_setup.py b/desktop/libs/notebook/src/notebook/management/commands/notebook_setup.py
index bb89dee16c0..12cf9b0527d 100644
--- a/desktop/libs/notebook/src/notebook/management/commands/notebook_setup.py
+++ b/desktop/libs/notebook/src/notebook/management/commands/notebook_setup.py
@@ -15,18 +15,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import os
import pwd
import sys
+import logging
from django.core import management
from django.core.management.base import BaseCommand
from django.db import transaction
-from desktop.models import Directory, Document, Document2, Document2Permission, SAMPLE_USER_OWNERS
-from useradmin.models import get_default_user_group, install_sample_user, User
-
+from desktop.models import SAMPLE_USER_OWNERS, Directory, Document, Document2, Document2Permission
+from useradmin.models import User, get_default_user_group, install_sample_user
LOG = logging.getLogger()
@@ -47,10 +46,8 @@ def handle(self, *args, **options):
sample_user = install_sample_user()
with transaction.atomic():
- if sys.version_info[0] > 2:
- management.call_command('loaddata', 'initial_notebook_examples.json', verbosity=2)
- else:
- management.call_command('loaddata', 'initial_notebook_examples.json', verbosity=2, commit=False)
+ management.call_command('loaddata', 'initial_notebook_examples.json', verbosity=2)
+
Document.objects.sync()
# Get or create sample user directories
diff --git a/desktop/libs/notebook/src/notebook/models.py b/desktop/libs/notebook/src/notebook/models.py
index 5005a8be268..4ef3923261c 100644
--- a/desktop/libs/notebook/src/notebook/models.py
+++ b/desktop/libs/notebook/src/notebook/models.py
@@ -15,7 +15,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import sys
import json
import math
import uuid
@@ -24,31 +23,23 @@
import datetime
from builtins import object, str
from datetime import timedelta
+from urllib.parse import quote as urllib_quote
from django.contrib.sessions.models import Session
from django.db.models import Count
from django.db.models.functions import Trunc
from django.utils.html import escape
+from django.utils.translation import gettext as _
from desktop.conf import has_connectors
from desktop.lib.connectors.models import _get_installed_connectors
-from desktop.lib.i18n import smart_unicode
+from desktop.lib.i18n import smart_str
from desktop.lib.paths import SAFE_CHARACTERS_URI
from desktop.models import Directory, Document2
from notebook.conf import EXAMPLES, get_ordered_interpreters
from notebook.connectors.base import Notebook, get_api as _get_api, get_interpreter
from useradmin.models import User, install_sample_user
-if sys.version_info[0] > 2:
- from urllib.parse import quote as urllib_quote
-
- from django.utils.translation import gettext as _
-else:
- from urllib import quote as urllib_quote
-
- from django.utils.translation import ugettext as _
-
-
LOG = logging.getLogger()
@@ -70,7 +61,7 @@ def escape_rows(rows, nulls_only=False, encoding=None):
escaped_field = 'NULL'
else:
# Prevent error when getting back non utf8 like charset=iso-8859-1
- escaped_field = smart_unicode(field, errors='replace', encoding=encoding)
+ escaped_field = smart_str(field, errors='replace', encoding=encoding)
if not nulls_only:
escaped_field = escape(escaped_field).replace(' ', ' ')
escaped_row.append(escaped_field)
diff --git a/desktop/libs/notebook/src/notebook/models_tests.py b/desktop/libs/notebook/src/notebook/models_tests.py
index 8384b687007..4232562162d 100644
--- a/desktop/libs/notebook/src/notebook/models_tests.py
+++ b/desktop/libs/notebook/src/notebook/models_tests.py
@@ -16,23 +16,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
+import sys
import json
+import logging
+from unittest.mock import MagicMock, Mock, patch
+
import pytest
-import sys
from desktop.lib.django_test_util import make_logged_in_client
from desktop.models import Document2
-from useradmin.models import User
-
from notebook.conf import EXAMPLES
-from notebook.models import install_custom_examples, Analytics
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock, MagicMock
-else:
- from mock import patch, Mock, MagicMock
-
+from notebook.models import Analytics, install_custom_examples
+from useradmin.models import User
LOG = logging.getLogger()
@@ -62,7 +57,6 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=True, is_admin=True)
self.user = User.objects.get(username="test")
-
def test_install_only_hive_queries(self):
finish = [
EXAMPLES.AUTO_LOAD.set_for_testing(True),
@@ -100,7 +94,6 @@ def test_install_only_hive_queries(self):
for f in finish:
f()
-
def test_install_auto_load_disabled(self):
f = EXAMPLES.AUTO_LOAD.set_for_testing(False)
try:
diff --git a/desktop/libs/notebook/src/notebook/routing.py b/desktop/libs/notebook/src/notebook/routing.py
index ff2e97c0558..b5681213716 100644
--- a/desktop/libs/notebook/src/notebook/routing.py
+++ b/desktop/libs/notebook/src/notebook/routing.py
@@ -17,13 +17,9 @@
import sys
-from desktop.conf import has_channels
-
-if sys.version_info[0] > 2:
- from django.urls import re_path
-else:
- from django.conf.urls import url as re_path
+from django.urls import re_path
+from desktop.conf import has_channels
if has_channels():
from notebook import consumer
diff --git a/desktop/libs/notebook/src/notebook/sql_utils.py b/desktop/libs/notebook/src/notebook/sql_utils.py
index 6a5fdf21e01..d121a9924dc 100644
--- a/desktop/libs/notebook/src/notebook/sql_utils.py
+++ b/desktop/libs/notebook/src/notebook/sql_utils.py
@@ -15,19 +15,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from future import standard_library
-standard_library.install_aliases()
-import hashlib
import os
import re
-import sys
-
-from desktop.lib.i18n import smart_str
-
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
-else:
- from StringIO import StringIO as string_io
+import hashlib
+from io import StringIO as string_io
# Note: Might be replaceable by sqlparse.split
@@ -50,6 +41,7 @@ def get_statements(hql_query, dialect=None):
})
return statements
+
def get_current_statement(snippet):
# Multiquery, if not first statement or arrived to the last query
should_close = False
@@ -59,7 +51,7 @@ def get_current_statement(snippet):
statements = get_statements(snippet['statement'], snippet['dialect'] if 'dialect' in snippet else None)
- statement_id = min(statement_id, len(statements) - 1) # In case of removal of statements
+ statement_id = min(statement_id, len(statements) - 1) # In case of removal of statements
previous_statement_hash = compute_statement_hash(statements[statement_id]['statement'])
non_edited_statement = previous_statement_hash == handle.get('previous_statement_hash') or not handle.get('previous_statement_hash')
@@ -86,10 +78,8 @@ def get_current_statement(snippet):
def compute_statement_hash(statement):
- if sys.version_info[0] > 2:
- return hashlib.sha224(statement.encode()).hexdigest()
- else:
- return hashlib.sha224(smart_str(statement)).hexdigest()
+ return hashlib.sha224(statement.encode()).hexdigest()
+
def split_statements(hql, dialect=None):
"""
@@ -154,11 +144,13 @@ def split_statements(hql, dialect=None):
if current and current != ';':
current = current.strip()
- statements.append(((start_row, start_col), (end_row, end_col+1), current))
+ statements.append(((start_row, start_col), (end_row, end_col + 1), current))
return statements
-_SEMICOLON_WHITESPACE = re.compile(";\s*$")
+
+_SEMICOLON_WHITESPACE = re.compile(r";\s*$")
+
def strip_trailing_semicolon(query):
"""As a convenience, we remove trailing semicolons from queries."""
diff --git a/desktop/libs/notebook/src/notebook/tasks.py b/desktop/libs/notebook/src/notebook/tasks.py
index da54badb695..96fb34f2802 100644
--- a/desktop/libs/notebook/src/notebook/tasks.py
+++ b/desktop/libs/notebook/src/notebook/tasks.py
@@ -25,6 +25,7 @@
import logging
import datetime
from builtins import next, object
+from io import StringIO as string_io
from celery import states
from celery.utils.log import get_task_logger
@@ -46,12 +47,6 @@
from notebook.sql_utils import get_current_statement
from useradmin.models import User
-if sys.version_info[0] > 2:
- from io import StringIO as string_io
-else:
- from StringIO import StringIO as string_io
-
-
LOG_TASK = get_task_logger(__name__)
LOG = logging.getLogger()
STATE_MAP = {
@@ -133,7 +128,7 @@ def download_to_file(notebook, snippet, file_format='csv', max_rows=-1, **kwargs
if TASK_SERVER.RESULT_CACHE.get():
with storage.open(result_key, 'rb') as store:
with codecs.getreader('utf-8')(store) as text_file:
- delimiter = ',' if sys.version_info[0] > 2 else ','.encode('utf-8')
+ delimiter = ','
csv_reader = csv.reader(text_file, delimiter=delimiter)
caches[CACHES_CELERY_QUERY_RESULT_KEY].set(result_key, [row for row in csv_reader], 60 * 5)
LOG.info('Caching results %s.' % result_key)
@@ -403,7 +398,7 @@ def _get_data(task_id):
csv_reader = csv_reader[1:] if csv_reader else []
else:
f = storage.open(result_key, 'rb')
- delimiter = ',' if sys.version_info[0] > 2 else ','.encode('utf-8')
+ delimiter = ','
csv_reader = csv.reader(f, delimiter=delimiter)
headers = next(csv_reader, [])
diff --git a/desktop/libs/notebook/src/notebook/tasks_tests.py b/desktop/libs/notebook/src/notebook/tasks_tests.py
index 4651f2e5c66..cf2d5d5f4e6 100644
--- a/desktop/libs/notebook/src/notebook/tasks_tests.py
+++ b/desktop/libs/notebook/src/notebook/tasks_tests.py
@@ -16,28 +16,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-import pytest
import sys
+import logging
+from unittest.mock import MagicMock, Mock, patch
+import pytest
from celery import states
from desktop.lib.django_test_util import make_logged_in_client
-from useradmin.models import User
-
from notebook.connectors.sql_alchemy import SqlAlchemyApi
-from notebook.tasks import run_sync_query, download_to_file, close_statement, get_log
-
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock, MagicMock
-else:
- from mock import patch, Mock, MagicMock
-
+from notebook.tasks import close_statement, download_to_file, get_log, run_sync_query
+from useradmin.models import User
LOG = logging.getLogger()
-
@pytest.mark.django_db
class TestRunAsyncQueryTask():
@@ -45,7 +38,6 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False)
self.user = User.objects.get(username="test")
-
def test_run_query_only(self):
with patch('notebook.tasks._get_request') as _get_request:
with patch('notebook.tasks.get_api') as get_api:
@@ -73,7 +65,6 @@ def notebook_dict(key):
assert meta['row_counter'] == 2, meta
-
def test_close_statement(self):
with patch('notebook.tasks._get_request') as _get_request:
with patch('notebook.tasks.download_to_file') as download_to_file:
@@ -96,7 +87,6 @@ def notebook_dict(key):
assert response == {'status': 0}
-
def test_get_log(self):
with patch('notebook.tasks._get_request') as _get_request:
with patch('notebook.tasks.download_to_file') as download_to_file:
@@ -118,7 +108,6 @@ def notebook_dict(key):
assert response == ''
-
@pytest.mark.django_db
class TestRunSyncQueryTask():
@@ -126,7 +115,6 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False)
self.user = User.objects.get(username="test")
-
def test_run_query(self):
snippet = {'type': 'mysql', 'statement_raw': 'SHOW TABLES', 'variables': []}
diff --git a/desktop/libs/notebook/src/notebook/templates/editor_components.mako b/desktop/libs/notebook/src/notebook/templates/editor_components.mako
index 24ad6624757..836f6180e8b 100644
--- a/desktop/libs/notebook/src/notebook/templates/editor_components.mako
+++ b/desktop/libs/notebook/src/notebook/templates/editor_components.mako
@@ -21,7 +21,6 @@ from webpack_loader.templatetags.webpack_loader import render_bundle
from desktop import conf
from desktop.auth.backend import is_admin
-from desktop.lib.i18n import smart_unicode
from desktop.views import _ko, antixss
from desktop.webpack_utils import get_hue_bundles
diff --git a/desktop/libs/notebook/src/notebook/urls.py b/desktop/libs/notebook/src/notebook/urls.py
index 2c38a1644e7..928e33e6081 100644
--- a/desktop/libs/notebook/src/notebook/urls.py
+++ b/desktop/libs/notebook/src/notebook/urls.py
@@ -15,8 +15,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import sys
-
from django.urls import re_path
from notebook import api as notebook_api, views as notebook_views
diff --git a/desktop/libs/notebook/src/notebook/views.py b/desktop/libs/notebook/src/notebook/views.py
index c2a20e9689b..e22a02afbd1 100644
--- a/desktop/libs/notebook/src/notebook/views.py
+++ b/desktop/libs/notebook/src/notebook/views.py
@@ -15,12 +15,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import sys
import json
import logging
-import sys
-from django.urls import reverse
from django.shortcuts import redirect
+from django.urls import reverse
+from django.utils.translation import gettext as _
from django.views.decorators.http import require_POST
from beeswax.data_export import DOWNLOAD_COOKIE_AGE
@@ -29,26 +30,21 @@
from desktop.conf import ENABLE_DOWNLOAD, ENABLE_HUE_5, USE_NEW_EDITOR
from desktop.lib import export_csvxls
from desktop.lib.connectors.models import Connector
-from desktop.lib.django_util import render, JsonResponse
+from desktop.lib.django_util import JsonResponse, render
from desktop.lib.exceptions_renderable import PopupException
-from desktop.models import Document2, Document, FilesystemException, _get_gist_document
+from desktop.models import Document, Document2, FilesystemException, _get_gist_document
from desktop.views import serve_403_error
-from metadata.conf import has_optimizer, has_catalog, has_workload_analytics
-
-from notebook.conf import get_ordered_interpreters, SHOW_NOTEBOOKS, EXAMPLES
+from metadata.conf import has_catalog, has_optimizer, has_workload_analytics
+from notebook.conf import EXAMPLES, SHOW_NOTEBOOKS, get_ordered_interpreters
from notebook.connectors.base import Notebook, _get_snippet_name, get_interpreter
from notebook.connectors.spark_shell import SparkApi
-from notebook.decorators import check_editor_access_permission, check_document_access_permission, check_document_modify_permission
+from notebook.decorators import check_document_access_permission, check_document_modify_permission, check_editor_access_permission
from notebook.management.commands.notebook_setup import Command
-from notebook.models import make_notebook, _get_editor_type, get_api, _get_dialect_example
-
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext as _
-else:
- from django.utils.translation import ugettext as _
+from notebook.models import _get_dialect_example, _get_editor_type, get_api, make_notebook
LOG = logging.getLogger()
+
@check_document_access_permission
def notebook(request, is_embeddable=False):
if not SHOW_NOTEBOOKS.get() or not request.user.has_hue_permission(action="access", app='notebook'):
@@ -60,7 +56,7 @@ def notebook(request, is_embeddable=False):
try:
from spark.conf import LIVY_SERVER_SESSION_KIND
is_yarn_mode = LIVY_SERVER_SESSION_KIND.get()
- except:
+ except Exception:
LOG.exception('Spark is not enabled')
return render('notebook.mako', request, {
@@ -225,9 +221,9 @@ def execute_and_watch(request):
sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True)
- from indexer.api3 import _index # Will ve moved to the lib
- from indexer.file_format import HiveFormat
+ from indexer.api3 import _index # Will ve moved to the lib
from indexer.fields import Field
+ from indexer.file_format import HiveFormat
file_format = {
'name': 'col',
diff --git a/desktop/libs/notebook/src/notebook/views_tests.py b/desktop/libs/notebook/src/notebook/views_tests.py
index 66a0648612b..9e0227a01c6 100644
--- a/desktop/libs/notebook/src/notebook/views_tests.py
+++ b/desktop/libs/notebook/src/notebook/views_tests.py
@@ -16,23 +16,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import json
-import pytest
-import sys
+import logging
+from unittest.mock import MagicMock, Mock, patch
+import pytest
from django.urls import reverse
-from desktop.lib.django_test_util import make_logged_in_client
from desktop.lib.connectors.models import Connector
+from desktop.lib.django_test_util import make_logged_in_client
from useradmin.models import User
-if sys.version_info[0] > 2:
- from unittest.mock import patch, Mock, MagicMock
-else:
- from mock import patch, Mock, MagicMock
-
-
LOG = logging.getLogger()
@@ -43,7 +37,6 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=True, is_admin=True)
self.user = User.objects.get(username="test")
-
def test_install_via_insert_mysql(self):
with patch('notebook.views.Connector.objects') as ConnectorObjects:
with patch('notebook.views.get_interpreter') as get_interpreter:
@@ -80,7 +73,6 @@ def test_install_via_insert_mysql(self):
make_notebook.assert_called()
-
def test_install_via_load_hive(self):
with patch('notebook.views.Connector.objects') as ConnectorObjects:
with patch('notebook.views.get_interpreter') as get_interpreter:
@@ -132,7 +124,6 @@ def test_install_via_load_hive(self):
fs.do_as_user.assert_called()
-
def test_install_via_insert_hive(self):
with patch('notebook.views.Connector.objects') as ConnectorObjects:
with patch('notebook.views.get_interpreter') as get_interpreter:
diff --git a/pyproject.toml b/pyproject.toml
index 4947ea90d21..332923fbeda 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,8 +39,21 @@ ignore = [
"E114",
"E117",
"W191",
+ "E731"
]
+[tool.ruff.lint.per-file-ignores]
+"desktop/libs/notebook/src/notebook/connectors/altus.py" = ["E501"]
+"desktop/libs/notebook/src/notebook/connectors/altus_adb.py" = ["E501"]
+"desktop/libs/metadata/src/metadata/catalog/dummy_client.py" = ["E501"]
+"desktop/libs/liboozie/src/liboozie/submission2.py" = ["E501"]
+"desktop/libs/libanalyze/src/libanalyze/rules.py" = ["E501"]
+"apps/hbase/src/hbase/api.py" = ["E501"]
+"desktop/libs/librdbms/src/librdbms/server/mysql_lib.py" = ["E402"]
+"desktop/core/src/desktop/urls.py" = ["E402"]
+"desktop/core/src/desktop/lib/thrift_util_test.py" = ["E402"]
+"desktop/core/src/desktop/auth/backend.py" = ["E402"]
+
[tool.ruff.format]
docstring-code-format = true
docstring-code-line-length = 140
diff --git a/tools/app_reg/pth.py b/tools/app_reg/pth.py
index ae421099ebd..5579d2659f7 100644
--- a/tools/app_reg/pth.py
+++ b/tools/app_reg/pth.py
@@ -19,21 +19,12 @@
Tools to manipulate the .pth file in the virtualenv.
"""
+import os
import glob
import logging
-import os
-import sys
import common
-py2or3 = "2"
-if sys.version_info[0] > 2:
- from builtins import object
- open_file = open
- py2or3 = "3"
-else:
- open_file = file
-
LOG = logging.getLogger(__name__)
PTH_FILE = 'hue.pth'
@@ -63,7 +54,7 @@ def _relpath(self, path):
def _read(self):
if os.path.exists(self._path):
- self._entries = set(open_file(self._path).read().split('\n'))
+ self._entries = set(open(self._path).read().split('\n'))
def add(self, app):
"""
@@ -139,11 +130,7 @@ def save(self):
with open(self._path, 'w') as _file:
# We want the Hue libraries to come before system libraries in
# case there is a name collision.
- if py2or3 == "2":
- _file.write("import sys; sys.__plen = len(sys.path)\n")
_file.write('\n'.join(sorted(self._entries)))
- if py2or3 == "2":
- _file.write("\nimport sys; new=sys.path[sys.__plen:]; del sys.path[sys.__plen:]; sys.path[0:0]=new\n")
LOG.info('=== Saved %s' % self._path)
def sync(self, apps):
diff --git a/tools/app_reg/registry.py b/tools/app_reg/registry.py
index e72dae0e5f2..47d96efbac8 100644
--- a/tools/app_reg/registry.py
+++ b/tools/app_reg/registry.py
@@ -19,19 +19,15 @@
Registry for the applications
"""
-import glob
-import logging
import os
import sys
+import glob
import json
+import logging
import common
from common import cmp
-if sys.version_info[0] > 2:
- from builtins import object
-
-
LOG = logging.getLogger(__name__)
@@ -43,16 +39,14 @@ def __init__(self):
"""Open the existing registry"""
self._reg_path = os.path.join(common.HUE_APP_REG_DIR, 'app.reg')
self._initialized = False
- self._apps = { } # Map of name -> HueApp
+ self._apps = {} # Map of name -> HueApp
self._open()
def _open(self):
"""Open the registry file. May raise OSError"""
if os.path.exists(self._reg_path):
- if sys.version_info[0] > 2:
- reg_file = open(self._reg_path)
- else:
- reg_file = file(self._reg_path)
+ reg_file = open(self._reg_path)
+
app_list = json.load(reg_file)
reg_file.close()
@@ -65,10 +59,8 @@ def _open(self):
def _write(self, path):
"""Write out the registry to the given path"""
- if sys.version_info[0] > 2:
- outfile = open(path, 'w')
- else:
- outfile = file(path, 'w')
+ outfile = open(path, 'w')
+
json.dump(list(self._apps.values()), outfile, cls=AppJsonEncoder, indent=2)
outfile.close()
@@ -178,14 +170,13 @@ def get_conffiles(self):
"""get_conffiles() -> A list of config (.ini) files"""
return glob.glob(os.path.join(self.abs_path, 'conf', '*.ini'))
-
def install_conf(self):
"""
install_conf() -> True/False
Symlink the app's conf/*.ini files into the conf directory.
"""
- installed = [ ]
+ installed = []
for target in self.get_conffiles():
link_name = os.path.join(common.HUE_CONF_DIR, os.path.basename(target))
@@ -220,7 +211,6 @@ def install_conf(self):
return False
return True
-
def uninstall_conf(self):
"""uninstall_conf() -> True/False"""
app_conf_dir = os.path.abspath(os.path.join(self.abs_path, 'conf'))
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/backend_test_curl.py b/tools/ops/script_runner/lib/custom_commands/management/commands/backend_test_curl.py
index 85e9acad293..b0cef4ad627 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/backend_test_curl.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/backend_test_curl.py
@@ -35,10 +35,7 @@
from hue_curl import Curl
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
DEFAULT_LOG_DIR = 'logs'
log_dir = os.getenv("DESKTOP_LOG_DIR", DEFAULT_LOG_DIR)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/c6_test_command.py b/tools/ops/script_runner/lib/custom_commands/management/commands/c6_test_command.py
index 65015669677..f3e800a337c 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/c6_test_command.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/c6_test_command.py
@@ -29,10 +29,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/change_owner_of_docs.py b/tools/ops/script_runner/lib/custom_commands/management/commands/change_owner_of_docs.py
index 595019418a3..638e789d70d 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/change_owner_of_docs.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/change_owner_of_docs.py
@@ -29,10 +29,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/db_query_test.py b/tools/ops/script_runner/lib/custom_commands/management/commands/db_query_test.py
index 7d7db90c01a..49115f9b6d2 100755
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/db_query_test.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/db_query_test.py
@@ -29,10 +29,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/delete_user.py b/tools/ops/script_runner/lib/custom_commands/management/commands/delete_user.py
index d3f961b453b..3c692d592d5 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/delete_user.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/delete_user.py
@@ -28,10 +28,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/estimate_concurrent_users.py b/tools/ops/script_runner/lib/custom_commands/management/commands/estimate_concurrent_users.py
index 0e428a494d6..8e500edf4c0 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/estimate_concurrent_users.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/estimate_concurrent_users.py
@@ -35,10 +35,7 @@
from hadoop import conf as hdfs_conf
from hadoop import cluster
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
DEFAULT_LOG_DIR = 'logs'
log_dir = os.getenv("DESKTOP_LOG_DIR", DEFAULT_LOG_DIR)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/hue_desktop_document_cleanup.py b/tools/ops/script_runner/lib/custom_commands/management/commands/hue_desktop_document_cleanup.py
index 8bc5b285eb2..027d32cceed 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/hue_desktop_document_cleanup.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/hue_desktop_document_cleanup.py
@@ -36,10 +36,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
logging.basicConfig()
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/list_groups.py b/tools/ops/script_runner/lib/custom_commands/management/commands/list_groups.py
index 4939a66ea69..04c4a96bde9 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/list_groups.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/list_groups.py
@@ -28,10 +28,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/remove_doc2_without_content_object.py b/tools/ops/script_runner/lib/custom_commands/management/commands/remove_doc2_without_content_object.py
index b6203222b85..769db2061da 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/remove_doc2_without_content_object.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/remove_doc2_without_content_object.py
@@ -30,10 +30,7 @@
import logging
import logging.handlers
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/remove_orphaned_docs.py b/tools/ops/script_runner/lib/custom_commands/management/commands/remove_orphaned_docs.py
index 3c89d205e04..f62f67a38ae 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/remove_orphaned_docs.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/remove_orphaned_docs.py
@@ -40,10 +40,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/rename_duplicate_users.py b/tools/ops/script_runner/lib/custom_commands/management/commands/rename_duplicate_users.py
index cecf51659de..4aad0a8bd1d 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/rename_duplicate_users.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/rename_duplicate_users.py
@@ -28,10 +28,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/run_hive_impala_query.py b/tools/ops/script_runner/lib/custom_commands/management/commands/run_hive_impala_query.py
index ed9f4a7d237..e33045f78df 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/run_hive_impala_query.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/run_hive_impala_query.py
@@ -27,10 +27,7 @@
import desktop.conf
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
logging.basicConfig()
LOG = logging.getLogger(__name__)
diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/share_all_workflows.py b/tools/ops/script_runner/lib/custom_commands/management/commands/share_all_workflows.py
index f7a4eaab0ca..d8d2954a3b9 100644
--- a/tools/ops/script_runner/lib/custom_commands/management/commands/share_all_workflows.py
+++ b/tools/ops/script_runner/lib/custom_commands/management/commands/share_all_workflows.py
@@ -27,10 +27,7 @@
import logging
import logging.handlers
-if sys.version_info[0] > 2:
- from django.utils.translation import gettext_lazy as _t, gettext as _
-else:
- from django.utils.translation import ugettext_lazy as _t, ugettext as _
+from django.utils.translation import gettext_lazy as _t, gettext as _
LOG = logging.getLogger(__name__)