-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrainly_api.py
66 lines (48 loc) · 2.53 KB
/
brainly_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from io import BytesIO
import requests
import re
from html_text import extract_text
header = {'host': 'brainly.co.id', 'content-type': 'application/json; charset=utf-8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
class Attachment:
def __init__(self, url):
self.url = url["url"]
self.size = None # Since we're not downloading the content, size is set to None
def __repr__(self):
return f"<[ type: attachment ]>"
class Answers:
def __init__(self, json):
self.content = clean_text(extract_text(json["content"]))
self.attachments = [Attachment(x) for x in json["attachments"]]
def __repr__(self):
return f"<[ type Text {'& ATTACHMENT' if self.attachments else ''}]>"
class Question:
def __init__(self, node):
self.content = clean_text(extract_text(node["node"]["content"]))
self.attachments = [Attachment(x) for x in node["node"]["attachments"]]
def __repr__(self):
return f"<( QUESTION:1 ATTACHMENT: {len(self.attachments)})>"
class Content:
def __init__(self, json):
self.question = Question(json)
self.answers = [Answers(x) for x in json["node"]["answers"]["nodes"]]
def __repr__(self):
return f"<( QUESTION: 1 ANSWER:{len(self.answers)} )>"
def clean_text(text):
text = re.sub(r'\\[^{}]+', '', text)
# Remove LaTeX-style formatting
text = re.sub(r'/tex|\[\/tex\]|\frac\{(.*?)\}\{(.*?)\}', '', text)
# Remove any remaining curly braces
text = re.sub(r'\{|\}', '', text)
# Remove double backslashes
#text = text.replace('\\\\', ' ')
# Remove single backslashes
#text = text.replace('\\', '')
# Remove LaTeX commands like \LARGE, \bf, \sf
#text = re.sub(r'\\LARGE|\\bf|\\sf', '', text)
return text
def brainly(query: str, first: int, after=None) -> list[Content]:
body = {'operationName': 'SearchQuery', 'variables': {'query': query, 'after': after, 'first': first},
'query': 'query SearchQuery($query: String!, $first: Int!, $after: ID) {\n\tquestionSearch(query: $query, first: $first, after: $after) {\n\tedges {\n\t node {\ncontent\n\t\tattachments{\nurl\n}\n\t\tanswers {\n\t\t\tnodes {\ncontent\n\t\t\t\tattachments{\nurl\n}\n}\n}\n}\n}\n}\n}\n'}
req = requests.post("https://brainly.co.id/graphql/id", headers=header, json=body).json()
return [Content(js) for js in req["data"]["questionSearch"]["edges"]]