-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathYoutubeCommentScrapper.py
111 lines (86 loc) · 3.69 KB
/
YoutubeCommentScrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import csv
from googleapiclient.discovery import build
from collections import Counter
import streamlit as st
from Senti import extract_video_id
from googleapiclient.errors import HttpError
import warnings
warnings.filterwarnings('ignore')
# Replace with your own API key
DEVELOPER_KEY = "AIzaSyCQ3ONrUytp2_fJbKhe7F3L7jNC3QSTvHw"
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'
# Create a client object to interact with the YouTube API
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
#video_id=extract_video_id(youtube_link)
def get_channel_id(video_id):
response = youtube.videos().list(part='snippet', id=video_id).execute()
channel_id = response['items'][0]['snippet']['channelId']
return channel_id
#channel_id=get_channel_id(video_id)
def save_video_comments_to_csv(video_id):
# Retrieve comments for the specified video using the comments().list() method
comments = []
results = youtube.commentThreads().list(
part='snippet',
videoId=video_id,
textFormat='plainText'
).execute()
# Extract the text content of each comment and add it to the comments list
while results:
for item in results['items']:
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
username = item['snippet']['topLevelComment']['snippet']['authorDisplayName']
comments.append([username,comment])
if 'nextPageToken' in results:
nextPage = results['nextPageToken']
results = youtube.commentThreads().list(
part='snippet',
videoId=video_id,
textFormat='plainText',
pageToken=nextPage
).execute()
else:
break
# Save the comments to a CSV file with the video ID as the filename
filename = video_id + '.csv'
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Username','Comment'])
for comment in comments:
writer.writerow([comment[0],comment[1]])
return filename
def get_video_stats(video_id):
try:
response = youtube.videos().list(
part='statistics',
id=video_id
).execute()
return response['items'][0]['statistics']
except HttpError as error:
print(f'An error occurred: {error}')
return None
def get_channel_info(youtube, channel_id):
try:
response = youtube.channels().list(
part='snippet,statistics,brandingSettings',
id=channel_id
).execute()
channel_title = response['items'][0]['snippet']['title']
video_count = response['items'][0]['statistics']['videoCount']
channel_logo_url = response['items'][0]['snippet']['thumbnails']['high']['url']
channel_created_date = response['items'][0]['snippet']['publishedAt']
subscriber_count = response['items'][0]['statistics']['subscriberCount']
channel_description = response['items'][0]['snippet']['description']
channel_info = {
'channel_title': channel_title,
'video_count': video_count,
'channel_logo_url': channel_logo_url,
'channel_created_date': channel_created_date,
'subscriber_count': subscriber_count,
'channel_description': channel_description
}
return channel_info
except HttpError as error:
print(f'An error occurred: {error}')
return None