forked from nahamsec/JSParser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
safeurl.py
736 lines (565 loc) · 20.1 KB
/
safeurl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
# coding: utf8
"""
.. module:: safeurl
synopsis:: An SSRF protection library
.. moduleauthor:: Nicolas Rodriguez <http://github.com/nicolasrod>
"""
from __future__ import unicode_literals
from __future__ import print_function
from numbers import Number
from socket import gethostbyname_ex
import re
import netaddr
import pycurl
#import StringIO
# Python 2.7/3 urlparse
try:
# Python 2.7
from urlparse import urlparse
from urllib import quote
except:
# Python 3
from urllib.parse import urlparse
from urllib.parse import quote
class InvalidOptionException(Exception):
pass
class InvalidURLException(Exception):
pass
class InvalidDomainException(Exception):
pass
class InvalidIPException(Exception):
pass
class InvalidPortException(Exception):
pass
class InvalidSchemeException(Exception):
pass
class Empty(object):
pass
# TODO: Remove this ugly hack!
def _mutable(obj):
newobj = Empty()
for i in dir(obj):
if not i.startswith("_"):
setattr(newobj, i, getattr(obj, i))
return newobj
def _check_allowed_keys(val):
if val not in ["ip", "port", "domain", "scheme"]:
raise InvalidOptionException(
"Provided type 'type' must be 'ip', 'port', 'domain' or 'scheme'")
def _check_allowed_lists(val):
if val not in ["whitelist", "blacklist"]:
raise InvalidOptionException(
"Provided list 'list' must be 'whitelist' or 'blacklist'")
class Options(object):
"""
This object contains configuration options for safeurl.
"""
def __init__(self):
self._follow_location = False
self._follow_location_limit = 0
self._send_credentials = False
self._pin_dns = False
self._lists = {
"whitelist": {
"ip": [],
"port": ["80", "443", "8080"],
"domain": [],
"scheme": ["http", "https"]},
"blacklist": {
"ip": ["0.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10",
"127.0.0.0/8", "169.254.0.0/16",
"172.16.0.0/12", "192.0.0.0/29", "192.0.2.0/24",
"192.88.99.0/24", "192.168.0.0/16",
"198.18.0.0/15", "198.51.100.0/24",
"203.0.113.0/24", "224.0.0.0/4", "240.0.0.0/4"],
"port": [],
"domain": [],
"scheme": []}
}
def getFollowLocation(self):
"""
Get followLocation
:rtype: bool
"""
return self._follow_location
def enableFollowLocation(self):
"""
Enables following redirects
:rtype: :class:`Options`
"""
self._follow_location = True
return self
def disableFollowLocation(self):
"""
Disable following redirects
:rtype: :class:`Options`
"""
self._follow_location = False
return self
def getFollowLocationLimit(self):
"""
Gets the follow location limit
0 is no limit (infinite)
:rtype: int
"""
return self._follow_location_limit
def setFollowLocationLimit(self, limit):
"""
Set follow location limit
0 is no limit (unlimited)
:rtype: int
"""
if not isinstance(limit, Number) or limit < 0:
raise InvalidOptionException(
"Provided limit 'limit' must be an integer >= 0")
self._follow_location_limit = limit
return self
def getSendCredentials(self):
"""
Get send credentials option
:rtype: bool
"""
return self._send_credentials
def enableSendCredentials(self):
"""
Enable sending of credentials
:rtype: :class:`Options`
"""
self._send_credentials = True
return self
def disableSendCredentials(self):
"""
Disable sending of credentials
:rtype: :class:`Options`
"""
self._send_credentials = False
return self
def getPinDns(self):
"""
Get pin DNS option
:rtype: bool
"""
return self._pin_dns
def enablePinDns(self):
"""
Enable pin DNS option
:rtype: :class:`Options`
"""
self._pin_dns = True
return self
def disablePinDns(self):
"""
Disable pin DNS option
:rtype: :class:`Options`
"""
self._pin_dns = False
return self
def isInList(self, lst, type_, value):
"""
Checks if a specific value is in a list
:param arg1: Options: "whitelist" or "blacklist"
:type arg1: string
:param arg2: Options: "ip", "port", "domain", or "scheme"
:type arg2: string
:param arg3: Value to check for
:type arg3: string
:rtype: bool
"""
_check_allowed_lists(lst)
_check_allowed_keys(type_)
dst = self._lists[lst][type_]
if len(dst) == 0:
if lst == "whitelist":
return True
else:
return False
# For domains, a regex match is needed
if type_ == "domain":
for domain in dst:
if re.match("(?i)^%s" % domain, value) is not None:
return True
return False
else:
return value in dst
def getList(self, lst, type_=None):
""""
Returns a specific list
:param arg1: Options: "blacklist" or "whitelist"
:type arg1: string
:param arg2: Type (Optional) - Options: "ip", "port", "domain", or "scheme"
:type arg2: string
:rtype: list
"""
_check_allowed_lists(lst)
dst = self._lists[lst]
if type_ != None:
_check_allowed_keys(type_)
return dst[type_]
return dst
def setList(self, lst, values, type_=None):
"""
Sets a list to be passed in as dictionary
:param arg1: Options: "blacklist" or "whitelist"
:type arg1: string
:param arg2: dictionary to be passed in
:type arg2: dict
:param arg3: Type (Optional) - Options: "ip", "port", "domain", or "scheme"
:type arg3: string
:rtype: :class:`Options`
"""
_check_allowed_lists(lst)
if type_ is not None:
if not isinstance(values, list):
raise InvalidOptionException("Provided values must be a list")
_check_allowed_keys(type_)
self._lists[lst][type_] = values
return self
if not isinstance(values, dict):
raise InvalidOptionException(
"Provided values must be a dictionary")
for k, v in values.iteritems():
_check_allowed_keys(k)
self._lists[lst][k] = v
return self
def clearList(self, lst):
"""
Clears specified list
:param arg1: Options: "blacklist" or "whitelist"
:type arg1: string
"""
_check_allowed_lists(lst)
self._lists[lst] = {"ip": [], "domain": [], "port": [], "scheme": []}
def addToList(self, lst, type_, values):
"""
Add value(s) to a specific list
:param arg1: Options: "blacklist" or "whitelist"
:type arg1: string
:param arg2: Options: "ip", "domain", "port", or "scheme"
:type arg2: string
:param arg3: values to add
:type arg3: string/list (string)
:rtype: :class:`Options`
"""
_check_allowed_lists(lst)
_check_allowed_keys(type_)
if len(values) == 0:
raise InvalidOptionException("Provided values cannot be empty")
if not isinstance(values, list):
values = list(values)
dst = self._lists[lst][type_]
for v in values:
if not v in dst:
dst.append(v)
return self
def removeFromList(self, lst, type_, values):
"""
Remove value(s) from a specific list
:param arg1: Option: "blacklist" or "whitelist"
:type arg1: string
:param arg2: Options: "ip", "domain", "port", or "scheme"
:type arg2: string
:param arg3: values to remove
:type arg3: string/list(string)
:rtype: :class:`Options`
"""
_check_allowed_lists(lst)
_check_allowed_keys(type_)
if len(values) == 0:
raise InvalidOptionException("Provided values cannot be empty")
if not isinstance(values, list):
values = [values]
dst = self._lists[lst][type_]
self._lists[lst][type_] = [x for x in dst if x not in values]
return self
class Url(object):
"""
Class for handling URLs
"""
@staticmethod
def validateUrl(url, options):
"""
Validates the whole URL
:param arg1: The URL
:type arg1: string
:param arg2: Options object
:type arg2: :class:`Options`
:rtype: dict
"""
if len(url) == 0:
raise InvalidURLException("Provided URL 'url' cannot be empty")
# Split URL into parts first
parts = _mutable(urlparse(url))
if parts is None:
raise InvalidURLException("Error parsing URL 'url'")
if parts.hostname is None:
raise InvalidURLException(
"Provided URL 'url' doesn't contain a hostname")
# First, validate the scheme
if len(parts.scheme) != 0:
parts.scheme = Url.validateScheme(parts.scheme, options)
else:
# Default to http
parts.scheme = "http"
# Validate the port
if not parts.port is None:
parts.port = Url.validatePort(parts.port, options)
# Reolve host to ip(s)
parts.ips = Url.resolveHostname(parts.hostname)
# Validate the host
parts.hostname = Url.validateHostname(
parts.hostname, parts.ips, options)
if options.getPinDns():
# Since we"re pinning DNS, we replace the host in the URL
# with an IP, then get cURL to send the Host header
parts.hostname = parts.ips[0]
# Rebuild the URL
cleanUrl = Url.buildUrl(parts)
return {"originalUrl": str(url),
"cleanUrl": str(cleanUrl), "parts": parts}
@staticmethod
def validateScheme(scheme, options):
"""
Validates a URL scheme
:param arg1: scheme
:type arg1: string
:param arg2: Options object
:type arg2: :class:`Object`
:rtype: string
"""
# Whitelist always takes precedence over a blacklist
if not options.isInList("whitelist", "scheme", scheme):
raise InvalidSchemeException("Provided scheme 'scheme' doesn't \
match whitelisted values: %s" % (
", ".join(options.getList("whitelist", "scheme"))))
if options.isInList("blacklist", "scheme", scheme):
raise InvalidSchemeException(
"Provided scheme 'scheme' matches a blacklisted value")
# Existing value is fine
return scheme
@staticmethod
def validatePort(port, options):
"""
Validates a port
:param arg1: port
:type arg1: int
:param arg2: Options object
:type arg2: :class:`Options`
:rtype: int
"""
if not options.isInList("whitelist", "port", port):
raise InvalidPortException("Provided port 'port' doesn't match \
whitelisted values: %s" % (
", ".join(options.getList("whitelist", "port"))))
if options.isInList("blacklist", "port", port):
raise InvalidPortException(
"Provided port 'port' matches a blacklisted value")
# Existing value is fine
return port
@staticmethod
def validateHostname(hostname, ips, options):
"""
Validates a URL hostname
:param arg1: hostname
:type arg1: string
:param arg2: IP addresses to validate
:type arg2: list (string)
:param arg3: Options object
:type arg3: :class:`Options`
:rtype: string
"""
# Check the host against the domain lists
if not options.isInList("whitelist", "domain", hostname):
raise InvalidDomainException("Provided hostname 'hostname' doesn't match \
whitelisted values: %s" % (
", ".join(options.getList("whitelist", "domain"))))
if options.isInList("blacklist", "domain", hostname):
raise InvalidDomainException(
"Provided hostname 'hostname' matches a blacklisted value")
whitelistedIps = options.getList("whitelist", "ip")
if len(whitelistedIps) != 0:
has_match = any(Url.cidrMatch(ip, wlip)
for ip in ips for wlip in whitelistedIps)
if not has_match:
raise InvalidIPException("Provided hostname 'hostname' \
resolves to '%s', which doesn't match whitelisted values: %s"
% (", ".join(ips),
", ".join(whitelistedIps)))
blacklistedIps = options.getList("blacklist", "ip")
if len(blacklistedIps) != 0:
has_match = any(Url.cidrMatch(ip, blip)
for ip in ips for blip in blacklistedIps)
if has_match:
raise InvalidIPException("Provided hostname 'hostname' \
resolves to '%s', which matches a blacklisted value: %s" % (
", ".join(ips), blacklistedIps))
return hostname
@staticmethod
def buildUrl(parts):
"""
Rebuild a URL based on a :func:`_mutable()` object of parts
:param arg1: object of parts
:type arg1: :func:`_mutable()` object
:rtype: string
"""
url = []
if len(parts.scheme) != 0:
url.append("%s://" % parts.scheme)
if not parts.username is None:
url.append(quote(parts.username))
if not parts.password is None:
url.append(":%s" % quote(parts.password))
# If we have a user or pass, make sure to add an "@"
if (not parts.username is None) or (not parts.password is None):
url.append("@")
if not parts.hostname is None:
url.append(parts.hostname)
if not parts.port is None:
url.append(":%d" % int(parts.port))
if len(parts.path) != 0:
url.append("/%s" % quote(parts.path[1:]))
# The query string is difficult to encode properly
# We need to ensure no special characters can be
# used to mangle the URL, but URL encoding all of it
# prevents the query string from being parsed properly
if len(parts.query) != 0:
query = quote(parts.query)
# Replace encoded &, =, ;, [ and ] to originals
query = query.replace("%26", "&").replace("%3D", "=").replace(
"%3B", ";").replace("%5B", "[").replace("%5D", "]")
url.append("?")
url.append(query)
if len(parts.fragment) != 0:
url.append("#%s" % quote(parts.fragment))
return "".join(url)
@staticmethod
def resolveHostname(hostname):
"""
Resolve a hostname to its IP(s)
:param arg1: hostname
:type arg1: string
:rtype: list (string)
"""
try:
ips = gethostbyname_ex(hostname)
return ips[2]
except:
raise InvalidDomainException(
"Provided hostname 'hostname' doesn't \
to an IP address")
@staticmethod
def cidrMatch(ip, cidr):
"""
Checks a passed in IP against a CIDR
:param arg1: IP address
:type arg1: string
:param arg2: CIDR
:type arg2: string
:rtype: bool
"""
return netaddr.IPAddress(ip) in netaddr.IPNetwork(cidr)
class SafeURL(object):
"""
Core interface of module
"""
def __init__(self, handle=None, options=None):
self.setCurlHandle(handle)
if options == None:
options = Options()
self.setOptions(options)
# To start with, disable FOLLOWLOCATION since we'll handle it
self._handle.setopt(pycurl.FOLLOWLOCATION, False)
# Force IPv4, since this class isn't yet compatible with IPv6
self._handle.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
def getCurlHandle(self):
"""
Returns cURL Handle
:rtype: :class:`SafeURL`
"""
return self._handle
def setCurlHandle(self, handle):
"""
Sets cURL handle
:param arg1: handle
:type arg1: (Optional) :class:`pycurl.Curl` object
:rtype: :class:`pycurl.Curl` object
"""
if handle is None:
handle = pycurl.Curl()
# TODO: Fix this hack!
if repr(handle).find("pycurl.Curl") == -1:
raise Exception("SafeURL expects a valid cURL object!")
self._handle = handle
def getOptions(self):
"""
Gets Options
:rtype: :class:`Options`
"""
return self._options
def setOptions(self, options):
"""
Sets options
:param arg1: Options object
:type: :class:`Options` object
:rtype: :class:`Options` object
"""
self._options = options
def execute(self, url):
"""
Executes a cURL request, whilst checking that the URL abides by our whitelists/blacklists
:param arg1: URL
:type arg1: string
:rtype: string
"""
# Backup the existing URL
originalUrl = url
# Execute, catch redirects and validate the URL
redirected = False
redirectCount = 0
redirectLimit = self._options.getFollowLocationLimit()
followLocation = self._options.getFollowLocation()
while True:
# Validate the URL
url = Url.validateUrl(url, self._options)
# Are there credentials, but we don"t want to send them?
if not self._options.getSendCredentials() and \
(url["parts"].username is not None or
url["parts"].password is not None):
raise InvalidURLException(
"Credentials passed in but 'sendCredentials' \
is set to false")
if self._options.getPinDns():
# Send a Host header
self._handle.setopt(pycurl.HTTPHEADER, [
"Host: %s" % url["parts"].hostname])
# The "fake" URL
self._handle.setopt(pycurl.URL, url["cleanUrl"])
# We also have to disable SSL cert verification, which is not great
# Might be possible to manually check the certificate
# ourselves?
self._handle.setopt(pycurl.SSL_VERIFYPEER, False)
else:
self._handle.setopt(pycurl.URL, url["cleanUrl"])
# Execute the cURL request
try:
from BytesIO import BytesIO
except ImportError:
from io import BytesIO
response = BytesIO()
self._handle.setopt(pycurl.WRITEFUNCTION, response.write)
self._handle.perform()
# Check for an HTTP redirect
if followLocation:
statuscode = self._handle.getinfo(pycurl.HTTP_CODE)
if statuscode in [301, 302, 303, 307, 308]:
redirectCount += 1
if redirectLimit == 0 or redirectCount < redirectLimit:
# Redirect received, so rinse and repeat
url = self._handle.getinfo(pycurl.REDIRECT_URL)
redirected = True
else:
raise Exception("Redirect limit 'redirectLimit' hit")
else:
redirected = False
if not redirected:
break
return response.getvalue().decode('UTF-8')