From bc6618c345c6c63a0526e38811768342ef5ad55c Mon Sep 17 00:00:00 2001 From: Sambhasan Biswas Date: Tue, 5 Oct 2021 22:08:47 +0530 Subject: [PATCH 1/8] Added timeout for connection --- pyhive/hive.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyhive/hive.py b/pyhive/hive.py index 3f71df33..d0701481 100644 --- a/pyhive/hive.py +++ b/pyhive/hive.py @@ -120,7 +120,8 @@ def __init__( password=None, check_hostname=None, ssl_cert=None, - thrift_transport=None + thrift_transport=None, + timeout=None ): """Connect to HiveServer2 @@ -152,6 +153,7 @@ def __init__( ), ssl_context=ssl_context, ) + thrift_transport.setTimeout(timeout) if auth in ("BASIC", "NOSASL", "NONE", None): # Always needs the Authorization header @@ -195,6 +197,7 @@ def __init__( if auth is None: auth = 'NONE' socket = thrift.transport.TSocket.TSocket(host, port) + socket.setTimeout(timeout) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport(socket) From a8b9b92b186f2aa059ed553a05b8a879202df5b2 Mon Sep 17 00:00:00 2001 From: George Liaw Date: Fri, 31 Dec 2021 16:15:56 -0800 Subject: [PATCH 2/8] leverage thrift keepalive --- TCLIService/TCLIService-remote | 11 ++++++++--- dev_requirements.txt | 2 +- pyhive/hive.py | 5 +++-- pyhive/tests/test_hive.py | 2 ++ setup.py | 2 +- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/TCLIService/TCLIService-remote b/TCLIService/TCLIService-remote index 8d875fa7..69723a45 100755 --- a/TCLIService/TCLIService-remote +++ b/TCLIService/TCLIService-remote @@ -21,7 +21,7 @@ from TCLIService.ttypes import * if len(sys.argv) <= 1 or sys.argv[1] == '--help': print('') - print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') + print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-keepalive] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') print('') print('Functions:') print(' TOpenSessionResp OpenSession(TOpenSessionReq req)') @@ -56,6 +56,7 @@ uri = '' framed = False ssl = False validate = True +keepalive = False ca_certs = None keyfile = None certfile = None @@ -95,6 +96,10 @@ if sys.argv[argi] == '-novalidate': validate = False argi += 1 +if sys.argv[argi] == '-keepalive': + keepalive = True + argi += 1 + if sys.argv[argi] == '-ca_certs': ca_certs = sys.argv[argi+1] argi += 2 @@ -114,9 +119,9 @@ if http: transport = THttpClient.THttpClient(host, port, uri) else: if ssl: - socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) + socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile, thrift_keepalive=keepalive) else: - socket = TSocket.TSocket(host, port) + socket = TSocket.TSocket(host, port, thrift_keepalive=keepalive) if framed: transport = TTransport.TFramedTransport(socket) else: diff --git a/dev_requirements.txt b/dev_requirements.txt index 0bf6d8a7..5400d7da 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -12,6 +12,6 @@ pytest-timeout==1.2.0 requests>=1.0.0 requests_kerberos>=0.12.0 sasl>=0.2.1 -thrift>=0.10.0 +thrift>=0.13.0 #thrift_sasl>=0.1.0 git+https://github.com/cloudera/thrift_sasl # Using master branch in order to get Python 3 SASL patches diff --git a/pyhive/hive.py b/pyhive/hive.py index 3f71df33..0fd42df1 100644 --- a/pyhive/hive.py +++ b/pyhive/hive.py @@ -120,7 +120,8 @@ def __init__( password=None, check_hostname=None, ssl_cert=None, - thrift_transport=None + thrift_transport=None, + thrift_keepalive=False ): """Connect to HiveServer2 @@ -194,7 +195,7 @@ def __init__( port = 10000 if auth is None: auth = 'NONE' - socket = thrift.transport.TSocket.TSocket(host, port) + socket = thrift.transport.TSocket.TSocket(host, port, thrift_keepalive) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport(socket) diff --git a/pyhive/tests/test_hive.py b/pyhive/tests/test_hive.py index c70ed962..390b8dcc 100644 --- a/pyhive/tests/test_hive.py +++ b/pyhive/tests/test_hive.py @@ -200,6 +200,8 @@ def test_invalid_transport(self): lambda: hive.connect(_HOST, thrift_transport=transport) ) + # TODO test keepalive + def test_custom_transport(self): socket = thrift.transport.TSocket.TSocket('localhost', 10000) sasl_auth = 'PLAIN' diff --git a/setup.py b/setup.py index ad34a38b..5675caa7 100755 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ def run_tests(self): 'requests_kerberos>=0.12.0', 'sasl>=0.2.1', 'sqlalchemy>=1.3.0', - 'thrift>=0.10.0', + 'thrift>=0.13.0', ], cmdclass={'test': PyTest}, package_data={ From f15f0e0a453b056eb4ddc1466ef8ff21992eda81 Mon Sep 17 00:00:00 2001 From: George Liaw Date: Fri, 31 Dec 2021 16:49:33 -0800 Subject: [PATCH 3/8] update extras_requires --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5675caa7..db4a79f8 100755 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ def run_tests(self): extras_require={ 'presto': ['requests>=1.0.0'], 'trino': ['requests>=1.0.0'], - 'hive': ['sasl>=0.2.1', 'thrift>=0.10.0', 'thrift_sasl>=0.1.0'], + 'hive': ['sasl>=0.2.1', 'thrift>=0.13.0', 'thrift_sasl>=0.1.0'], 'sqlalchemy': ['sqlalchemy>=1.3.0'], 'kerberos': ['requests_kerberos>=0.12.0'], }, From fd84578747c41e16eb15bf3450222d67c7d634a3 Mon Sep 17 00:00:00 2001 From: George Liaw Date: Fri, 31 Dec 2021 16:55:34 -0800 Subject: [PATCH 4/8] fix typo --- TCLIService/TCLIService-remote | 4 ++-- pyhive/hive.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/TCLIService/TCLIService-remote b/TCLIService/TCLIService-remote index 69723a45..482d25bb 100755 --- a/TCLIService/TCLIService-remote +++ b/TCLIService/TCLIService-remote @@ -119,9 +119,9 @@ if http: transport = THttpClient.THttpClient(host, port, uri) else: if ssl: - socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile, thrift_keepalive=keepalive) + socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile, socket_keepalive=keepalive) else: - socket = TSocket.TSocket(host, port, thrift_keepalive=keepalive) + socket = TSocket.TSocket(host, port, socket_keepalive=keepalive) if framed: transport = TTransport.TFramedTransport(socket) else: diff --git a/pyhive/hive.py b/pyhive/hive.py index 0fd42df1..76e5d938 100644 --- a/pyhive/hive.py +++ b/pyhive/hive.py @@ -195,7 +195,7 @@ def __init__( port = 10000 if auth is None: auth = 'NONE' - socket = thrift.transport.TSocket.TSocket(host, port, thrift_keepalive) + socket = thrift.transport.TSocket.TSocket(host, port, socket_keepalive=thrift_keepalive) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport(socket) From 6e92972a09146a0c7cb6ab14fb31aef514d0054a Mon Sep 17 00:00:00 2001 From: abhishek6s <94046069+abhishek6s@users.noreply.github.com> Date: Mon, 24 Jul 2023 23:18:01 +0530 Subject: [PATCH 5/8] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2ba823c2..7d1026e7 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ cover/ .cache/ *.iml /scripts/.thrift_gen +build/ \ No newline at end of file From e0be949181090fee4c85a73ca66723084337918b Mon Sep 17 00:00:00 2001 From: abhishek6s <94046069+abhishek6s@users.noreply.github.com> Date: Mon, 21 Aug 2023 21:13:48 +0530 Subject: [PATCH 6/8] [BIG-1835] added query timeout to override connection timeout --- pyhive/hive.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyhive/hive.py b/pyhive/hive.py index 8777acc7..e50ec3b1 100644 --- a/pyhive/hive.py +++ b/pyhive/hive.py @@ -161,6 +161,7 @@ def __init__( ssl_cert=None, thrift_transport=None, timeout=None, + query_timeout=None, thrift_keepalive=False ): """Connect to HiveServer2 @@ -282,6 +283,7 @@ def __init__( self._sessionHandle = response.sessionHandle assert response.serverProtocolVersion == protocol_version, \ "Unable to handle protocol version {}".format(response.serverProtocolVersion) + socket.setTimeout(query_timeout) with contextlib.closing(self.cursor()) as cursor: cursor.execute('USE `{}`'.format(database)) except: From 101f0e67e9c4feea10478295d9b5f4fa70600fac Mon Sep 17 00:00:00 2001 From: George Liaw Date: Wed, 23 Aug 2023 01:07:04 -0700 Subject: [PATCH 7/8] bump version --- pyhive/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyhive/__init__.py b/pyhive/__init__.py index 0a6bb1f6..aa1b9a92 100644 --- a/pyhive/__init__.py +++ b/pyhive/__init__.py @@ -1,3 +1,3 @@ from __future__ import absolute_import from __future__ import unicode_literals -__version__ = '0.7.0' +__version__ = '0.7.1' From 5a68ed36464fb968289a68be10ae2859a95eb442 Mon Sep 17 00:00:00 2001 From: akashshelke-6sense Date: Tue, 6 Jan 2026 22:21:54 +0530 Subject: [PATCH 8/8] [BIG-6817] pyhive client for hive hs2 TLS --- pyhive/hive.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pyhive/hive.py b/pyhive/hive.py index e50ec3b1..e3cbd739 100644 --- a/pyhive/hive.py +++ b/pyhive/hive.py @@ -32,6 +32,7 @@ import thrift.protocol.TBinaryProtocol import thrift.transport.TSocket import thrift.transport.TTransport +import thrift.transport.TSSLSocket # PEP 249 module globals apilevel = '2.0' @@ -237,7 +238,19 @@ def __init__( port = 10000 if auth is None: auth = 'NONE' - socket = thrift.transport.TSocket.TSocket(host, port, socket_keepalive=thrift_keepalive) + if configuration.get('use_ssl', False): + _logger.info("Using SSL for Hive connection") + hive_ssl_context = create_default_context() + hive_ssl_context.load_verify_locations(capath=configuration.get('ca_certs_dir', '/etc/ssl/certs/')) + hive_ssl_context.check_hostname = check_hostname == configuration.get('ssl_check_hostname', "true") + socket = thrift.transport.TSSLSocket.TSSLSocket(host, port, ssl_context=hive_ssl_context, + socket_keepalive=thrift_keepalive) + configuration.pop("use_ssl", None) + configuration.pop("ca_certs_dir", None) + configuration.pop("ssl_check_hostname", None) + else: + _logger.info("Using Non-SSL for Hive connection") + socket = thrift.transport.TSocket.TSocket(host, port, socket_keepalive=thrift_keepalive) socket.setTimeout(timeout) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml