diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/https_trasparent.py b/https_trasparent.py index f5150b7..48bd689 100644 --- a/https_trasparent.py +++ b/https_trasparent.py @@ -32,7 +32,7 @@ def test(HandlerClass=ProxyRequestHandler, ServerClass=ThreadingHTTPSServer, pro httpd = ServerClass(server_address, HandlerClass) sa = httpd.socket.getsockname() - print "Serving HTTPS Proxy on", sa[0], "port", sa[1], "..." + print("Serving HTTPS Proxy on", sa[0], "port", sa[1], "...") httpd.serve_forever() diff --git a/proxy2.py b/proxy2.py index f502e8c..3b850c2 100644 --- a/proxy2.py +++ b/proxy2.py @@ -10,22 +10,22 @@ import time import json import re -from subprocess import Popen, PIPE - +from ssl_wrapper import * +from string import Template +from OpenSSL import crypto +import html try: import http.client as httplib import urllib.parse as urlparse from http.server import HTTPServer, BaseHTTPRequestHandler from socketserver import ThreadingMixIn - from io import StringIO - from html.parser import HTMLParser + from io import StringIO, BytesIO except ImportError: import httplib import urlparse from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler from SocketServer import ThreadingMixIn - from cStringIO import StringIO - from HTMLParser import HTMLParser + from cStringIO import StringIO, BytesIO def print_color(c, s): @@ -42,7 +42,13 @@ class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): def handle_error(self, request, client_address): # surpress socket/ssl related errors cls, e = sys.exc_info()[:2] - if cls is socket.error or cls is ssl.SSLError: + if cls is socket.error or cls is BrokenPipeError or cls is ssl.SSLError: + # BrokenPipeError is socket.error in Python2 and standalone error in Python3. + # This is most frequently raised error here. + # I don't understand why it raises here + # looks like it is caused by some errors in the proxy logic: for some + # reasons a client closes connection + # I thinks the keep-alive logic should be checked. pass else: return HTTPServer.handle_error(self, request, client_address) @@ -53,13 +59,13 @@ class ProxyRequestHandler(BaseHTTPRequestHandler): cacert = join_with_script_dir('ca.crt') certkey = join_with_script_dir('cert.key') certdir = join_with_script_dir('certs/') - timeout = 5 + timeout = 10 + chain_proxy = "" lock = threading.Lock() def __init__(self, *args, **kwargs): self.tls = threading.local() self.tls.conns = {} - BaseHTTPRequestHandler.__init__(self, *args, **kwargs) def log_error(self, format, *args): @@ -70,26 +76,43 @@ def log_error(self, format, *args): self.log_message(format, *args) def do_CONNECT(self): - if os.path.isfile(self.cakey) and os.path.isfile(self.cacert) and os.path.isfile(self.certkey) and os.path.isdir(self.certdir): + if ca_files_exist(): self.connect_intercept() else: + print("can't encode ssl traffic, just relay it") self.connect_relay() def connect_intercept(self): hostname = self.path.split(':')[0] - certpath = "{}/{}.crt".format(self.certdir.rstrip('/'), hostname) + ippat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$") + cert_category = "DNS" + if ippat.match(hostname): + cert_category = "IP" + + certpath = "%s/%s.crt" % (cert_dir.rstrip('/'), hostname) with self.lock: if not os.path.isfile(certpath): - epoch = str(int(time.time() * 1000)) - p1 = Popen(["openssl", "req", "-new", "-key", self.certkey, "-subj", "/CN={}".format(hostname)], stdout=PIPE) - p2 = Popen(["openssl", "x509", "-req", "-days", "3650", "-CA", self.cacert, "-CAkey", self.cakey, "-set_serial", epoch, "-out", certpath], stdin=p1.stdout, stderr=PIPE) - p2.communicate() - - self.wfile.write("{} {} {}\r\n".format(self.protocol_version, 200, 'Connection Established').encode('latin_1')) + x509_serial = int("%d" % (time.time() * 1000)) + valid_time_interval = (0, 60 * 60 * 24 * 365) + cert_request = create_cert_request(cert_key_obj, CN=hostname) + cert = create_certificate( + cert_request, (ca_crt_obj, ca_key_obj), x509_serial, + valid_time_interval, + subject_alt_names=[ + Template("${category}:${hostname}").substitute(hostname=hostname, category=cert_category) + ] + ) + with open(certpath, 'wb+') as f: + f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert)) + + self.wfile.write("{} {} {}\r\n".format(self.protocol_version, 200, 'Connection Established').encode('latin-1')) self.wfile.write(b'\r\n') - self.connection = ssl.wrap_socket(self.connection, keyfile=self.certkey, certfile=certpath, server_side=True) + self.connection = ssl.wrap_socket(self.connection, + keyfile=cert_key, + certfile=certpath, + server_side=True) self.rfile = self.connection.makefile("rb", self.rbufsize) self.wfile = self.connection.makefile("wb", self.wbufsize) @@ -121,6 +144,7 @@ def connect_relay(self): data = r.recv(8192) if not data: self.close_connection = 1 + break other.sendall(data) @@ -145,22 +169,30 @@ def do_GET(self): return elif req_body_modified is not None: req_body = req_body_modified - req.headers['Content-length'] = str(len(req_body)) + if 'Content-Length' in req.headers: + del req.headers['Content-Length'] + req.headers['Content-Length'] = str(len(req_body)) u = urlparse.urlsplit(req.path) scheme, netloc, path = u.scheme, u.netloc, (u.path + '?' + u.query if u.query else u.path) assert scheme in ('http', 'https') if netloc: + if 'Host' in req.headers: + del req.headers['Host'] req.headers['Host'] = netloc setattr(req, 'headers', self.filter_headers(req.headers)) try: origin = (scheme, netloc) - if not origin in self.tls.conns: + if origin not in self.tls.conns: + connection_host = self.chain_proxy if len(self.chain_proxy) else netloc if scheme == 'https': - self.tls.conns[origin] = httplib.HTTPSConnection(netloc, timeout=self.timeout) + self.tls.conns[origin] = httplib.HTTPSConnection(connection_host, timeout=self.timeout) else: - self.tls.conns[origin] = httplib.HTTPConnection(netloc, timeout=self.timeout) + self.tls.conns[origin] = httplib.HTTPConnection(connection_host, timeout=self.timeout) + if len(self.chain_proxy): + self.tls.conns[origin].set_tunnel(netloc) + conn = self.tls.conns[origin] conn.request(self.command, path, req_body, dict(req.headers)) res = conn.getresponse() @@ -170,15 +202,14 @@ def do_GET(self): setattr(res, 'response_version', version_table[res.version]) # support streaming - if not 'Content-Length' in res.headers and 'no-store' in res.headers.get('Cache-Control', ''): + if 'Content-Length' not in res.headers and 'no-store' in res.headers.get('Cache-Control', ''): self.response_handler(req, req_body, res, '') setattr(res, 'headers', self.filter_headers(res.headers)) self.relay_streaming(res) - with self.lock: - self.save_handler(req, req_body, res, '') + #with self.lock: + # self.save_handler(req, req_body, res, '') return - - res_body = res.read().decode('latin_1') + res_body = res.read().decode('latin-1') except Exception as e: if origin in self.tls.conns: del self.tls.conns[origin] @@ -186,7 +217,7 @@ def do_GET(self): return content_encoding = res.headers.get('Content-Encoding', 'identity') - res_body_plain = self.decode_content_body(res_body, content_encoding) + res_body_plain = self.decode_content_body(res_body.encode('latin-1'), content_encoding) res_body_modified = self.response_handler(req, req_body, res, res_body_plain) if res_body_modified is False: @@ -195,24 +226,32 @@ def do_GET(self): elif res_body_modified is not None: res_body_plain = res_body_modified res_body = self.encode_content_body(res_body_plain, content_encoding) + if 'Content-Length' in res.headers: + del res.headers['Content-Length'] + res.headers['Content-Length'] = str(len(res_body)) + + if 'Content-Length' not in res.headers: res.headers['Content-Length'] = str(len(res_body)) setattr(res, 'headers', self.filter_headers(res.headers)) - self.wfile.write("{} {} {}\r\n".format(self.protocol_version, res.status, res.reason).encode('latin_1')) + + self.wfile.write("{} {} {}\r\n".format(self.protocol_version, res.status, res.reason).encode('latin-1')) for k, v in res.headers.items(): self.send_header(k, v) self.end_headers() - self.wfile.write(res_body.encode('latin_1')) + if res_body: + self.wfile.write(res_body.encode('latin-1')) self.wfile.flush() with self.lock: self.save_handler(req, req_body, res, res_body_plain) def relay_streaming(self, res): - self.wfile.write("%s %d %s\r\n" % (self.protocol_version, res.status, res.reason)) - for line in res.headers.headers: - self.wfile.write(line) + self.wfile.write("{} {} {}\r\n".format(self.protocol_version, res.status, res.reason) + .encode('latin-1', 'strinct')) + for k, v in res.headers.items(): + self.send_header(k, v) self.end_headers() try: while True: @@ -233,7 +272,16 @@ def relay_streaming(self, res): def filter_headers(self, headers): # http://tools.ietf.org/html/rfc2616#section-13.5.1 - hop_by_hop = ('connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade') + hop_by_hop = ( + 'connection', + 'keep-alive', + 'proxy-authenticate', + 'proxy-authorization', + 'te', + 'trailers', + 'transfer-encoding', + 'upgrade' + ) for k in hop_by_hop: del headers[k] @@ -241,6 +289,7 @@ def filter_headers(self, headers): if 'Accept-Encoding' in headers: ae = headers['Accept-Encoding'] filtered_encodings = [x for x in re.split(r',\s*', ae) if x in ('identity', 'gzip', 'x-gzip', 'deflate')] + del headers['Accept-Encoding'] headers['Accept-Encoding'] = ', '.join(filtered_encodings) return headers @@ -249,7 +298,7 @@ def encode_content_body(self, text, encoding): if encoding == 'identity': data = text elif encoding in ('gzip', 'x-gzip'): - io = StringIO() + io = BytesIO() with gzip.GzipFile(fileobj=io, mode='wb') as f: f.write(text) data = io.getvalue() @@ -263,7 +312,7 @@ def decode_content_body(self, data, encoding): if encoding == 'identity': text = data elif encoding in ('gzip', 'x-gzip'): - io = StringIO(data) + io = BytesIO(data) with gzip.GzipFile(fileobj=io) as f: text = f.read() elif encoding == 'deflate': @@ -271,6 +320,8 @@ def decode_content_body(self, data, encoding): text = zlib.decompress(data) except zlib.error: text = zlib.decompress(data, -zlib.MAX_WBITS) + elif encoding == 'br': #Brotli + return data else: raise Exception("Unknown Content-Encoding: {}".format(encoding)) return text @@ -279,7 +330,7 @@ def send_cacert(self): with open(self.cacert, 'rb') as f: data = f.read() - self.wfile.write("{} {} {}\r\n".format(self.protocol_version, 200, 'OK').encode('latin_1')) + self.wfile.write("{} {} {}\r\n".format(self.protocol_version, 200, 'OK').encode('latin-1')) self.send_header('Content-Type', 'application/x-x509-ca-cert') self.send_header('Content-Length', len(data)) self.send_header('Connection', 'close') @@ -315,7 +366,7 @@ def parse_qsl(s): content_type = req.headers.get('Content-Type', '') if content_type.startswith('application/x-www-form-urlencoded'): - req_body_text = parse_qsl(req_body) + req_body_text = parse_qsl(req_body.decode('latin-1')) elif content_type.startswith('application/json'): try: json_obj = json.loads(req_body) @@ -359,10 +410,9 @@ def parse_qsl(s): except ValueError: res_body_text = res_body elif content_type.startswith('text/html'): - m = re.search(r']*>\s*([^<]+?)\s*', res_body, re.I) + m = re.search(r']*>\s*([^<]+?)\s*', res_body.decode('latin-1'), re.I) if m: - h = HTMLParser() - print_color(32, "==== HTML TITLE ====\n{}\n".format(h.unescape(m.group(1)))) + print_color(32, "==== HTML TITLE ====\n{}\n".format(html.unescape(m.group(1)))) elif content_type.startswith('text/') and len(res_body) < 1024: res_body_text = res_body @@ -384,9 +434,10 @@ def test(HandlerClass=ProxyRequestHandler, ServerClass=ThreadingHTTPServer, prot port = int(sys.argv[1]) else: port = 8080 - server_address = ('::1', port) + server_address = ("localhost", port) HandlerClass.protocol_version = protocol + #HandlerClass.chain_proxy = "localhost:9182" httpd = ServerClass(server_address, HandlerClass) sa = httpd.socket.getsockname() diff --git a/ssl_wrapper.py b/ssl_wrapper.py new file mode 100644 index 0000000..37a2036 --- /dev/null +++ b/ssl_wrapper.py @@ -0,0 +1,194 @@ +""" +Provides x509 certificates and paths. +""" + +from os import mkdir +from os.path import abspath, dirname, isdir, isfile, join +import OpenSSL.crypto as crypto + + +proxy_CN = 'proxy2 CA' + +# TODO: do this on package-install-time after move to pyopenssl +dir_name = join(dirname(abspath(__file__)), 'ssl-data') + +ca_key = join(dir_name, 'ca.key') +ca_crt = join(dir_name, 'ca.crt') +cert_key = join(dir_name, 'cert.key') +cert_dir = join(dir_name, 'certs') + +def generate_key_pair(key_type, bits): + """ + Creates key pair + :param key_type: one of crypto.TYPE_RSA or crypto.TYPE_DSA + :param bits: key length + :return: key pair in a PKey object + :return type: instance of crypto.PKey + """ + pkey = crypto.PKey() + pkey.generate_key(key_type, bits) + return pkey + + +def create_cert_request(p_key, digest="sha256", **subject_kwargs): + """ + Creates certificate request + :param p_key: key to associate with the request + :param digest: signing method + :param subject_kwargs: subject of request + valuable args are: (took from RFC 5280) + C: country + ST: state or province name + L: Locality name + O: organization + OU: organizational unit + CN: common name (e.g., "Susan Housley") + emailAddress: e-mail + :return: certificate request + """ + req = crypto.X509Req() + subj = req.get_subject() + + for key, value in subject_kwargs.items(): + setattr(subj, key, value) + + req.set_pubkey(p_key) + req.sign(p_key, digest) + return req + + +def create_certificate( + req, cert_key_pair, serial, begin_end_validity, digest="sha256", + self_signed_x509v3=False, subject_alt_names=[]): + """ + Create certificate by certificate request. + :param req: certificate request + :param cert_key_pair: tuple with issuer certificate and private key + :param serial: serial number + :param begin_end_validity: tuple with seconds certificate validity. + 0 means now. Example for set one year valid certificate from now: + begin_end_validity=(0, 60*60*24*365) + :param digest: signing method + :param self_signed_x509v3: generate self signed x509v3 CA certificate, add + extensions similar to these: + X509v3 extensions: + X509v3 Subject Key Identifier: + 88:31:6A:B7:8C:B3:F0:1D:5F:CD:9F:F8:70:F7:D4:7C:E5:5E:D2:A1 + X509v3 Authority Key Identifier: + keyid:88:31:6A:B7:8C:B3:F0:1D:5F:CD:9F:F8:70:F7:D4:7C:E5:5E:D2:A1 + X509v3 Basic Constraints: + CA:TRUE + :param subject_alt_names: subject alt names e.g. IP:192.168.7.1 or DNS:my.domain + :return: signed certificate + :return type: crypto.X509 + """ + i_cert, i_key = cert_key_pair + not_before, not_after = begin_end_validity + ret_x509_obj = crypto.X509() + ret_x509_obj.set_serial_number(serial) + + ret_x509_obj.gmtime_adj_notAfter(not_after) + ret_x509_obj.gmtime_adj_notBefore(not_before) + if i_cert == '__self_signed': + i_cert = ret_x509_obj + ret_x509_obj.set_issuer(i_cert.get_subject()) + ret_x509_obj.set_subject(req.get_subject()) + ret_x509_obj.set_pubkey(req.get_pubkey()) + if self_signed_x509v3: + ret_x509_obj.set_version(2) + ret_x509_obj.add_extensions([ + crypto.X509Extension(b'subjectKeyIdentifier', False, b'hash', + subject=ret_x509_obj), + crypto.X509Extension(b'basicConstraints', False, b'CA:TRUE'), + ]) + ret_x509_obj.add_extensions([ + crypto.X509Extension(b'authorityKeyIdentifier', False, + b'keyid:always', issuer=ret_x509_obj), + ]) + if len(subject_alt_names) != 0: + ret_x509_obj.set_version(2) # 0x3 + ret_x509_obj.add_extensions([ + crypto.X509Extension( + type_name=b'subjectAltName', + critical=False, + value=", ".join(subject_alt_names).encode()) + ]) + + ret_x509_obj.sign(i_key, digest) + return ret_x509_obj + + +def ca_files_exist(): + return all( + list(map(isfile, [ca_key, ca_crt, cert_key])) + [isdir(dir_name)]) + +if not ca_files_exist(): + # TODO: move this code to pyopenssl library + try: + if not isdir(dir_name): + mkdir(dir_name) + ca_key_o = generate_key_pair(crypto.TYPE_RSA, 2048) + cert_key_o = generate_key_pair(crypto.TYPE_RSA, 2048) + cert_req_temp = create_cert_request(ca_key_o, CN=proxy_CN) + ca_crt_o = create_certificate( + cert_req_temp, ('__self_signed', ca_key_o), 1509982490957715, + (0, 60 * 60 * 24 * 30), self_signed_x509v3=True + ) + with open(ca_key, 'wb+') as f: + f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, ca_key_o)) + with open(cert_key, 'wb+') as f: + f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, cert_key_o)) + with open(ca_crt, 'wb+') as f: + f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, ca_crt_o)) + + if not isdir(cert_dir): + mkdir(cert_dir) + + except StandardError as e: + logger.exception(e) + + +def _load_crypto_obj(path, crypto_method): + with open(path, 'r') as key_fp: + return crypto_method(crypto.FILETYPE_PEM, key_fp.read()) + +cert_key_obj = _load_crypto_obj(cert_key, crypto.load_privatekey) +ca_key_obj = _load_crypto_obj(ca_key, crypto.load_privatekey) +ca_crt_obj = _load_crypto_obj(ca_crt, crypto.load_certificate) + + +__all__ = [ + 'proxy_CN', + 'dir_name', + 'ca_key', + 'ca_crt', + 'cert_key', + 'cert_dir', + 'ca_files_exist', + 'cert_key_obj', + 'ca_key_obj', + 'ca_crt_obj', + 'generate_key_pair', + 'create_cert_request', + 'create_certificate', +] + + +if __name__ == '__main__': + def _load_crypto_obj(path, crypto_method): + with open(path, 'r') as key_fp: + return crypto_method(crypto.FILETYPE_PEM, key_fp.read()) + + cert_key_obj = _load_crypto_obj(cert_key, crypto.load_privatekey) + ca_key_obj = _load_crypto_obj(ca_key, crypto.load_privatekey) + ca_crt_obj = _load_crypto_obj(ca_crt, crypto.load_certificate) + cert_req = create_cert_request(ca_key_obj, CN=proxy_CN) + signed_req = create_certificate( + cert_req, (ca_crt_obj, ca_key_obj), 1509982490957715, + (0, 60 * 60 * 24 * 30), self_signed_x509v3=True + ) + + + print(crypto.dump_certificate(crypto.FILETYPE_PEM, signed_req)) + 'openssl x509 -req -days 3650 -CA ca.crt -CAkey ca.key -set_serial 1509982490957715' + 'https://github.com/pyca/pyopenssl/blob/master/examples/certgen.py'