forked from x4nth055/pythoncode-tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
form_extractor.py
56 lines (49 loc) · 1.79 KB
/
form_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from bs4 import BeautifulSoup
from requests_html import HTMLSession
from pprint import pprint
# initialize an HTTP session
session = HTMLSession()
def get_all_forms(url):
"""Returns all form tags found on a web page's `url` """
# GET request
res = session.get(url)
# for javascript driven website
# res.html.render()
soup = BeautifulSoup(res.html.html, "html.parser")
return soup.find_all("form")
def get_form_details(form):
"""Returns the HTML details of a form,
including action, method and list of form controls (inputs, etc)"""
details = {}
# get the form action (requested URL)
action = form.attrs.get("action").lower()
# get the form method (POST, GET, DELETE, etc)
# if not specified, GET is the default in HTML
method = form.attrs.get("method", "get").lower()
# get all form inputs
inputs = []
for input_tag in form.find_all("input"):
# get type of input form control
input_type = input_tag.attrs.get("type", "text")
# get name attribute
input_name = input_tag.attrs.get("name")
# get the default value of that input tag
input_value =input_tag.attrs.get("value", "")
# add everything to that list
inputs.append({"type": input_type, "name": input_name, "value": input_value})
# put everything to the resulting dictionary
details["action"] = action
details["method"] = method
details["inputs"] = inputs
return details
if __name__ == "__main__":
import sys
# get URL from the command line
url = sys.argv[1]
# get all form tags
forms = get_all_forms(url)
# iteratte over forms
for i, form in enumerate(forms, start=1):
form_details = get_form_details(form)
print("="*50, f"form #{i}", "="*50)
pprint(form_details)