-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtim_table.py
30 lines (23 loc) · 919 Bytes
/
tim_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from lxml import etree
import requests
import pandas as pd
import json
url = "https://timdietrich.me/blog/netsuite-suiteql-tables-permissions-reference/"
html_content = requests.get(url).content
parser = etree.HTMLParser()
tree = etree.fromstring(html_content, parser)
# Extracting the table using XPath
xpath_query = '//*[@id="permissionsTable"]'
table_element = tree.xpath(xpath_query)[0]
# Converting the HTML table element to a string
table_html = etree.tostring(table_element, method="html").decode()
# Parsing the table HTML to a DataFrame
df = pd.read_html(table_html, flavor="bs4")[0]
# Convert the DataFrame to a JSON structure and print it
json_struct = json.loads(df.to_json(orient="records"))
print(json.dumps(json_struct, indent=2))
# Save the DataFrame to a CSV file
csv_filename = "./permissions_table.csv"
df.to_csv(csv_filename, index=False)
# Output the path to the saved CSV file
csv_filename