-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathht6.py
More file actions
42 lines (29 loc) · 1.09 KB
/
ht6.py
File metadata and controls
42 lines (29 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import requests
from bs4 import BeautifulSoup
import json
url = "https://www.bbc.com/sport"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
data = []
promo_wrappers = soup.find_all('div', {'class': 'ssrcss-1va2pun-UncontainedPromoWrapper eqfxz1e5'})
count = 0
for promo_wrapper in promo_wrappers:
promo_link = promo_wrapper.find('a', {'class': 'ssrcss-vdnb7q-PromoLink exn3ah91'})
if promo_link:
link = "https://www.bbc.com" + promo_link['href']
metadata_text = promo_wrapper.find('span', {'class': 'ssrcss-1if1g9v-MetadataText e4wm5bw1'})
if metadata_text:
topic = metadata_text.text.strip()
data.append({
"Link": link,
"Topics": [topic]
})
count += 1
if count == 5:
break
if count == 0:
print("No promo wrappers found on the page.")
else:
with open('bbc_json.json', 'w') as json_file:
json.dump(data, json_file, indent=2)
print(f"JSON file 'bbc_sport_promos.json' created successfully with {count} entries.")