-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnews_feed.py
69 lines (50 loc) · 1.89 KB
/
news_feed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 1 15:48:39 2018
@author: Abinfinity
"""
import requests
import feedparser
import news_scraper as ns # scapre paragraph
import media_scraper as ms # scrape images
import para_summary as ps # gives summary
import json
def produce():
#list of rss news source
url_list = {'hindustan_times':'https://www.hindustantimes.com/rss/topnews/rssfeed.xml'
# 'ndtv_news':'http://feeds.feedburner.com/ndtvnews-top-stories',\
# 'india_times':'https://timesofindia.indiatimes.com/rssfeedstopstories.cms',\
# #'the_hindu':'https://www.thehindu.com/news/feeder/default.rss',\
# 'india_today':'https://www.indiatoday.in/rss/1206584',\
# # 'reuters':'http://feeds.reuters.com/reuters/INtopNews',\
# 'indian_express':'http://www.newindianexpress.com/Nation/rssfeed/?id=170&getXmlFeed=true',\
# 'livemint':'https://www.livemint.com/rss/homepage',\
# 'b_quint':'https://www.bloombergquint.com/stories.rss',\
# 'ib_times':'https://www.ibtimes.co.in/rss/feed',\
# 'b_today':'https://www.businesstoday.in/rss/rssstory.jsp?sid=105'
}
news = dict()
for key,value in url_list.items():
d = feedparser.parse(value)
print("source----->"+key)
# print(d['entries'][0])
page = []
try:
for sup in range(len(d['entries'])):
title = d['entries'][sup]['title']
link = d['entries'][sup]['link']
# paragraph = ns.scraper.scraper(key,link)
media = ms.scraper.scraper(key,link)
paragraph = ps.para.summ(link)
data = {"title": title, "link": link, "media_link" : media, "summary": paragraph}
page.append(data)
except Exception as e:
print("None to display")
news[key] = page
final = json.dumps(news, indent = 4)
print(final)
# print(type(final))
def main():
produce()
if __name__ == '__main__':
main()