-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrss-to-bluesky.py
154 lines (127 loc) · 4.22 KB
/
rss-to-bluesky.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from dotenv import load_dotenv
import requests
import datetime
import re
import feedparser
import os
import time
from uniseg.graphemecluster import grapheme_clusters
load_dotenv()
ATP_HOST = os.getenv('ATP_HOST')
ATP_USERNAME = os.getenv('ATP_USERNAME')
ATP_PASSWORD = os.getenv('ATP_PASSWORD')
RSS_FEED_URL = os.getenv('RSS_FEED_URL')
def fetch_latest_rss_entry(rss_url):
feed = feedparser.parse(rss_url)
latest_entry = feed.entries[0]
return latest_entry
def fetch_external_embed(uri):
try:
response = requests.get(uri)
if response.status_code == 200:
html_content = response.text
title_match = re.search(r'<title>(.+?)</title>', html_content, re.IGNORECASE | re.DOTALL)
title = title_match.group(1) if title_match else ""
description_match = re.search(r'<meta[^>]+name=["\']description["\'][^>]+content=["\'](.*?)["\']', html_content, re.IGNORECASE)
description = description_match.group(1) if description_match else ""
return {
"uri": uri,
"title": title,
"description": description
}
else:
print("Error fetching the website")
return None
except Exception as e:
print(f"Error: {e}")
return None
def trim_text(text, max_length=250):
clusters = list(grapheme_clusters(text))
if len(clusters) <= max_length:
return text
else:
return "".join(clusters[:max_length])
def find_uri_position(text):
pattern = r'(https?://\S+)'
match = re.search(pattern, text)
if match:
uri = match.group(0)
start_position = len(text[:text.index(uri)].encode('utf-8'))
end_position = start_position + len(uri.encode('utf-8')) - 1
return (uri, start_position, end_position)
else:
return None
def login(username, password):
data = {"identifier": username, "password": password}
resp = requests.post(
ATP_HOST + "/xrpc/com.atproto.server.createSession",
json=data
)
atp_auth_token = resp.json().get('accessJwt')
if atp_auth_token == None:
raise ValueError("No access token, is your password wrong?")
did = resp.json().get("did")
return atp_auth_token, did
def post_text(text, atp_auth_token, did, timestamp=None):
if not timestamp:
timestamp = datetime.datetime.now(datetime.timezone.utc)
timestamp = timestamp.isoformat().replace('+00:00', 'Z')
headers = {"Authorization": "Bearer " + atp_auth_token}
found_uri = find_uri_position(text)
if found_uri:
uri, start_position, end_position = found_uri
facets = [
{
"index": {
"byteStart": start_position,
"byteEnd": end_position + 1
},
"features": [
{
"$type": "app.bsky.richtext.facet#link",
"uri": uri
}
]
},
]
#embed = {
# "$type": "app.bsky.embed.external",
# "external": fetch_external_embed(uri)
# }
data = {
"collection": "app.bsky.feed.post",
"$type": "app.bsky.feed.post",
"repo": "{}".format(did),
"record": {
"$type": "app.bsky.feed.post",
"createdAt": timestamp,
"text": text,
"facets": facets,
#"embed": embed
}
}
resp = requests.post(
ATP_HOST + "/xrpc/com.atproto.repo.createRecord",
json=data,
headers=headers
)
return resp
def main():
while True:
latest_entry = fetch_latest_rss_entry(RSS_FEED_URL)
title = latest_entry.title
link = latest_entry.link
post_content = f"{title} {link}"
atp_auth_token, did = login(ATP_USERNAME, ATP_PASSWORD)
post_resp = post_text(post_content, atp_auth_token, did)
print(post_resp.json())
time.sleep(15 * 60) # wait for 15 minutes
#time.sleep(300) # Sleep for 5 minutes (300 seconds)
def lambda_handler(event, context):
main()
return {
"statusCode": 200,
"body": "Success"
}
if __name__ == "__main__":
main()