-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpart-1-slack-scraping.js
130 lines (109 loc) · 4.83 KB
/
part-1-slack-scraping.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// Unique words of Slack users
// Part I. A JavaScript code snippet for scraping messages on Slack.
// Full description on https://github.com/dmitryplaunov/unique-words-on-slack
// Copyright (c) 2020 Dmitry Plaunov; Licensed MIT.
// This script depends on the front-end of the search page on Slack.
// If Slack re-organizes the page or renames element classes (identifiers), this script might stop working.
// Luckily, in 6 months of 2020, only 1 element name was changed, so the script required minimal adjustment.
// declaring a variable where all the scraped messages will be stored
// making it empty, instead of 'undefined', as new messages will be appended after every page is scraped
var all_messages = ''
// getting the number of pages to scrape
// your search results page should not have more than 100 pages, as Slack breaks when navigating to pages after 100th
// if you have more than 100 pages, try to segment the search results by dates
try {
page_counter_element = document.getElementsByClassName('c-search__pager__details')[0].innerHTML; // "Page 1 of 8"
last_page_str = page_counter_element.split('of ')[1] // "8"
number_of_pages = parseInt(last_page_str) // 8
} catch(error) {
throw "The script needs adjustment. The number of pages element wasn't found."
}
// or you can define the number of pages manually
// it should not be bigger than 100, as Slack breaks when navigating to pages after 100th
// number_of_pages = 50
// a function for scraping Slack messages
function pageScraping() {
try {
// declaring a variable where all the scraped messages from the current search page will be stored
// the content of it will be later appended to 'all_messages'
var page_messages = ''
// getting the number of messages that need to be expanded to get their full content
var num_of_expand = document.getElementsByClassName('c-button-unstyled c-search__expand').length;
if (num_of_expand > 0) {
// expanding messages on the page
var k=0
for (k=0; k<num_of_expand; k++) {
// when the 'show more' button gets clicked, it disappears, so the next button has the 0 index again
document.getElementsByClassName('c-button-unstyled c-search__expand')[0].click()
}
}
// getting the number of messages on the page
var num_of_messages = document.getElementsByClassName('c-search_message__content').length;
// scraping all messages on the page
var i=0
for (i=0; i<num_of_messages; i++) {
var message = document.getElementsByClassName('c-search_message__content')[i].getElementsByClassName('c-message__message_blocks')[0]
// if a particular message has any text content (and not just a picure or emoji), then add it to 'page_messages'
if (message.children[0] !== undefined || typeof message.children[0] !== 'undefined') {
page_messages += message.children[0].innerText + ' ';
}
}
// appending 'all_messages' with the messages from the current search page
all_messages += page_messages + ' ';
} catch(error) {
throw "The script needs adjustment. Some message elements weren't found."
}
}
// a function for moving to the next page
function nextPage() {
try {
document.getElementsByClassName('c-link--button c-search__pager__button_forward')[0].click()
} catch(error) {
throw "The script needs adjustment. The next page button wasn't found."
}
}
// initiating the main function
pageScraping();
console.log('Scraping...');
// a function that initiates nextPage() every 5 seconds
// giving time to load the search page and scrape it before moving further
var t = 1;
function pageLoop() {
setTimeout(function() {
console.log('Moving to the next page')
nextPage()
t++;
if (t < number_of_pages) {
pageLoop();
}
}, 5000)
}
// initiating the function for page switching
pageLoop();
// a function that initiates pageScraping() every 5 seconds
// waiting for the page to be changed, as they change only once in 5 seconds (see previous function)
var z = 1;
function scrapingLoop() {
setTimeout(function() {
console.log('Scraping...')
pageScraping()
z++;
if (z < number_of_pages) {
scrapingLoop();
}
}, 5000)
}
// initiating the loop with the scraping function
// adding a 4 second delay to wait for the search page to load
// the delay needs to be between 0 the the delay that is defined for each search page, currently 5 seconds
setTimeout(scrapingLoop, 4000);
// a function for copying the scraped messages to the clipboard
function scrapingDone() {
console.log('_______________')
console.log('Done!');
copy(all_messages);
console.log("If the messages weren't copied to your clipboard, then type 'copy(all_messages)'");
console.log('_______________')
}
// initiating the function for copying the search messages to the clipboard
setTimeout(scrapingDone, number_of_pages*5000 + 1000);