diff --git a/conf_thief.py b/conf_thief.py index 64b1be7..cbf05e7 100644 --- a/conf_thief.py +++ b/conf_thief.py @@ -1,4 +1,4 @@ -import requests, json, sys, getopt, time +import requests, json, sys, getopt, time, os # Set that holds all of the pages found in the keyword search contentSet = set() @@ -93,30 +93,36 @@ def searchKeyWords(path, username, access_token, cURL): #print(contentSet) print("[*] Compiled set of %i unique pages to download from your search" % len(contentSet)) -def downloadContent(username, access_token, cURL): +def downloadContent(username, access_token, cURL, output_dir): headers = form_token_headers print('[*] Downloading files') count = 1 set_length = len(contentSet) for contentId in contentSet: - page = contentId.split(",", 1) - url = cURL + "/wiki/spaces/flyingpdf/pdfpageexport.action?pageId={pageId}".format(pageId=page[0]) - url = get_pdf_download_url_for_confluence_cloud(cURL, url, username, access_token) - url = cURL + "/wiki/" + url try: - response = requests.request("GET", - url, - auth=(username, access_token), - headers=headers - ) + page = contentId.split(",", 1) + url = cURL + "/wiki/spaces/flyingpdf/pdfpageexport.action?pageId={pageId}".format(pageId=page[0]) + url = get_pdf_download_url_for_confluence_cloud(cURL, url, username, access_token) + if url is not None: + url = cURL + "/wiki/" + url + + response = requests.request("GET", + url, + auth=(username, access_token), + headers=headers + ) - path = "loot/{file_name}-{pageId}.pdf".format(file_name=page[1], pageId=page[0]) - with open(path, 'wb') as f: - f.write(response.content) - print('[*] Downloaded %i of %i files: %s-%s.pdf' % (count, set_length, page[1], page[0])) - count += 1 + path = "{OUTDIR}/{file_name}-{pageId}.pdf".format(OUTDIR=output_dir, file_name=page[1], pageId=page[0]) + with open(path, 'wb') as f: + f.write(response.content) + print('[*] Downloaded %i of %i files: %s-%s.pdf' % (count, set_length, page[1], page[0])) + count += 1 + else: + print("[*] Error getting PDF url.") + count += 1 except Exception as err: print("Error : " + str(err)) + count += 1 def get_pdf_download_url_for_confluence_cloud(cURL, url, username, access_token): """ @@ -181,9 +187,10 @@ def main(): username = "" access_token = "" user_agent = "" + output_dir = "" # usage - usage = '\nusage: python3 conf_thief.py [-h] -c -u -p -d [-a] ""' + usage = '\nusage: python3 conf_thief.py [-h] -c -u -p -d [-a] "" [-o] ' #help help = '\nThis Module will connect to Confluence\'s API using an access token, ' @@ -205,11 +212,12 @@ def main(): help += '\n\t\tThe User-Agent string you wish to send in the http request.' help += '\n\t\tYou can use the latest chrome for MacOS for example: -a "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"' help += '\n\t\tDefault is "python-requests/2.25.1"' + help += '\n\n\t-o, --output-dir\n\t\t Output path to use instead of loot\n' help += '\n\n\t-h, --help\n\t\tshow this help message and exit\n' # try parsing options and arguments try : - opts, args = getopt.getopt(sys.argv[1:], "hc:u:p:d:a:", ["help", "url=", "user=", "accesstoken=", "dict=", "user-agent="]) + opts, args = getopt.getopt(sys.argv[1:], "hc:u:p:d:a:o:", ["help", "url=", "user=", "accesstoken=", "dict=", "user-agent=", "output-dir="]) except getopt.GetoptError as err: print(str(err)) print(usage) @@ -228,6 +236,10 @@ def main(): dict_path = arg if opt in ("-a", "--user-agent"): user_agent = arg + if opt in ("-o", "--output-dir"): + output_dir = arg + if not os.path.isdir(output_dir): + os.mkdir(output_dir) # check for mandatory arguments if not username: @@ -258,8 +270,12 @@ def main(): default_headers['User-Agent'] = user_agent form_token_headers['User-Agent'] = user_agent + # Set default loot path + if not output_dir: + output_dir = 'loot' + searchKeyWords(dict_path, username, access_token, cURL) - downloadContent(username, access_token, cURL) + downloadContent(username, access_token, cURL, output_dir) if __name__ == "__main__": main()