Skip to content

Commit b3db6cd

Browse files
committed
[BUGFIX]: Webtool optional headers
MINOR: - Use different url for get_meta_need_auth as previous URL (https://docs.google.com/presentation/d/1lIYEuzzhZZ9PJaG_u3XgrFXX5Y6xd0zHV-aB2F8bXXU/edit) doesn't have meta if loaded without javascript
1 parent 279fe14 commit b3db6cd

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

jaseci_core/jaseci/extens/act_lib/tests/fixtures/webtool.jac

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ walker get_meta_valid {
55
}
66

77
walker get_meta_need_auth {
8-
has url = "https://docs.google.com/presentation/d/1lIYEuzzhZZ9PJaG_u3XgrFXX5Y6xd0zHV-aB2F8bXXU/edit";
8+
has url = "https://github.com/settings/profile";
99
can webtool.get_page_meta;
1010
report webtool.get_page_meta(url);
1111
}
@@ -26,5 +26,7 @@ walker get_meta_timeout {
2626
walker get_meta_need_header {
2727
has url = "https://www.invaluable.com/blog/what-is-a-mandala/";
2828
can webtool.get_page_meta;
29-
report webtool.get_page_meta(url);
29+
report webtool.get_page_meta(url, headers = {
30+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0"
31+
});
3032
}

jaseci_core/jaseci/extens/act_lib/webtool.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
@jaseci_action()
8-
def get_page_meta(url: str, timeout: int = 3, parser: str = "lxml"):
8+
def get_page_meta(url: str, timeout: int = 3, parser: str = "lxml", headers: dict = {}):
99
"""
1010
Util to parse metadata out of urls and html documents
1111
Parser option: lxml (default), html5lib, html.parser
@@ -15,9 +15,7 @@ def get_page_meta(url: str, timeout: int = 3, parser: str = "lxml"):
1515
webpage = requests.get(
1616
url,
1717
timeout=timeout,
18-
headers={
19-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0"
20-
},
18+
headers=headers,
2119
)
2220
soup = BeautifulSoup(webpage.content, features=parser)
2321
meta = soup.find_all("meta")

0 commit comments

Comments
 (0)