-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsave-onetab-links.js
534 lines (398 loc) · 15.6 KB
/
save-onetab-links.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
/*
Program Name: Console: Scrape OneTab Links
File Name: ehCode_2018.03.05_JavaScriptES6_ScrapeOneTabLinks_01.js
Date Created: 02/27/18
Date Modified: 04/01/18
Version: 1.02
Programmer: Eric Hepperle
Purpose: Parses links and information from OneTab.
Displays categories in console. Uses vanilla JavaScript ES6.
NOTE: Can't inject jQuery anymore due to "Content Security Policy",
so this version use vanilla JavaScript ES6.
Usage: Open OneTab page in a browser and copy-paste the code below
into the console.
To save/archive the links results use code inspector in browser to
grab the "body" tag and contents, then paste that into a new document
and save it.
Sample results: N/A
Requires:
* Browser console
*/
/* global $ */
/*jshint esversion: 6 */
console.clear();
// ********************** GLOBAL VARIABLES
// =========== Output Variables =========
// Groups array to store all group info (this is the root)
var objArrGroups = [];
// Output string to generate new page
var strOut = '';
// =========== END Output Variables =====
// =========== Row Variables ============
// Row Link
var rowLink = '';
// Row Text
var rowText = '';
// Icon URL
var rowIconLink = '';
// Row domain (parse from icon url)
var rowDomain = '';
// ========== END Row Variables =========
// =========== Counters =================
// Blank Title Count
var blankTitleCount = 1;
// Group Counter = 1
var groupCount = 1;
// Total number of groups counted
var groupsTotal = 0;
// Row Counter
var rowCount = 1;
//============ END Counters =============
// =========== Selector Constants =======
// skips first 3 children
var selAllGroups = "#contentAreaDiv > div:nth-child(n+4):not(:nth-last-child(-n+1)";
var selGroupTitle = "div.tabGroupTitleText";
// =========== END Selector Constants ===
// =========== CSS Style Constants ======
var aliceblue_dashed = "background:aliceblue; border-bottom: dashed 3px cadetblue";
var lemonyellow_dashed = "background:#ffffb3; border-bottom: dashed 3px orange";
var lemyel = "background:#ffffb3";
var ltgrn = "background:lightgreen";
// =========== END CSS Style Constants ==
// ********************** HELPER FUNCTIONS
/*
pad()
Usage:
pad(10, 4); // 0010
pad(9, 4); // 0009
pad(123, 4); // 0123
pad(10, 4, '-'); // --10
pad(10, 4, ' '); // 10
*/
function pad(n, width, z) {
z = z || '0';
n = n + '';
return n.length >= width ? n : new Array(width - n.length + 1).join(z) + n;
}
// ********************** MAIN
// Get All Group And Row Info And Store In Array Of Objects:
function getAllGroups() {
// Grab list of all link groups and row info
var groups = [...document.querySelectorAll(selAllGroups)];
// Store the groups array length
groupsTotal = groups.length;
// FOREACH GROUP
groups.forEach(function(el, i, arr) {
// Create empty object to store group data
var group = {};
// Create empty group title variable
var groupTitle = '';
// Which line of group info the groupName is on
var groupNameStartLineIndex = 2;
// If group title exists, store in a variable. Else,
// build group title from blank title counter. NOTE:
// testing for " " doesn't work but fromCharCode does.
if (el.querySelector(selGroupTitle).innerText &&
el.querySelector(selGroupTitle).innerText !== String.fromCharCode(160)
) {
groupTitle = el.querySelector(selGroupTitle).innerText;
} else {
groupTitle = "blankGroup_" + blankTitleCount;
// increment blank title counter
++blankTitleCount;
groupNameStartLineIndex = 1;
}
// Add group title to group object
group.groupTitle = groupTitle;
// Grab group details block
var thisGroupDetails = el.querySelector("div > div > div").innerText
console.log("%cGroup Info (incl. date): ", "background:orange");
console.log(thisGroupDetails);
// Parse group details block for
var arrGroupDetails = thisGroupDetails.split('\n');
var tempTimeDate = arrGroupDetails[groupNameStartLineIndex];
// Determine what line of group info the date is on:
if (tempTimeDate.includes("tabs")) {
tempTimeDate = arrGroupDetails[(groupNameStartLineIndex+1)];
} else {
tempTimeDate = arrGroupDetails[groupNameStartLineIndex];
}
console.log("%ctempTimeDate: %s ", "background: lavender; border: solid gold 2px;", tempTimeDate);
// Parse time date with Regex like:
// Created 6/21/2016, 1:41:28 PM
var reg = /^Created\s+(\d{1,2})\/(\d{1,2})\/(\d{4}),\s(\d{1,2}):(\d{1,2}):(\d{1,2})\s([APM]{2})$/;
var matches = reg.exec(tempTimeDate);
// debugging ...
console.log("%c************ MATCHES ************** ", "background:yellow");
console.log(matches);
var monthNum = matches[1] ? matches[1] : 'no-month';
var dayNum = matches[2];
var year4 = matches[3];
var hourNum = matches[4];
var minuteNum = matches[5];
var secondNum = matches[6];
var ampm = matches[7];
// var date = matches[1] + "/" + matches[2] + "/" + matches[3];
// var time = matches[4] + ":" + matches[5] + ":" + matches[6] + " " + matches[7];
var date = monthNum + "/" + dayNum + "/" + year4;
var time = hourNum + ":" + minuteNum + ":" + secondNum + " " + ampm;
// Grab just date and time
console.log("%cDate: ", "background:bisque");
console.log(date);
console.log("%cTime: ", "background:bisque");
console.log(time);
// Add date and time info to group object. This will help with sorting
group.year = year4;
group.date = date;
group.time = time;
group.monthNum = monthNum;
group.dayNum = dayNum;
group.hourNum = hourNum;
group.minuteNum = minuteNum;
group.secondNum = secondNum;
group.ampm = ampm;
// Grab list of all rows in this group
var rows = Array.from(el.children[1].children);
// debugging ... child rows
console.log("%cChild Rows: ", lemonyellow_dashed);
console.log(rows);
// Create rows array
arrGroupRows = [];
// Reset row counter to 1
rowCount = 1;
// Foreach Row:
rows.forEach(function(el, i, arr) {
// Create row object
var rowObj = {};
// Grab row link
// rowLink = el.querySelector('.row_text > a').href;
rowLink = el.children[1].querySelector('a').href;
// Grab row text
rowText = el.children[1].querySelector('a').text;
// Grab icon link
rowIconLink = el.children[1].querySelector('img').src;
// Add all row data to row object
rowObj.rowText = rowText;
rowObj.rowLink = rowLink;
rowObj.rowIconLink = rowIconLink;
// Push this row object onto group rows array
arrGroupRows.push(rowObj);
// Increment row counter
++rowCount;
});
// END processing rows in this group
// Add group rows array onto this group as property
group.rows = arrGroupRows;
// Push this group object onto groups array
objArrGroups.push(group);
});
// END processing groups
objArrGroups.blankTitleCount = blankTitleCount;
return objArrGroups;
} // END function
var groupInfo = getAllGroups();
// uncomment to output object
// console.log("%c --- GROUP INFO --- ", "background:#ffffb3;");
// console.log(groupInfo);
// --------------------------------------------------------------------
// Create webpage by parsing the groups object and
// launch in new window.
// add doctype and header to html output string
strOut += "<!DOCTYPE html>\n";
strOut += "<html lang='en'>\n";
strOut += "\t<head>\n";
strOut += "\t<title>Scraped Links Output Page</title>\n";
strOut += "\t<meta charset='utf-8'>\n";
var testTemplateLiteralStyle = `
<style>
.group-info {
background-color: orange;
border: solid black 2px;
border-radius: 15px;
padding: 10px;
max- width: 1024px;
display: inline-block;
}
.group-title {
float: left;
position: relative;
top: -.6em;
}
.group-table {
float: left;
border: solid 3px gold;
margin-left: 9em;
background: #ffffb3;
border-radius: .8em;
padding: .6em;
font-family: "courier new";
font-size: .8em;
}
.clear:after {
content: "";
clear: both;
display: table;
}
.row-icon {
width: 16px;
height: 16px;
}
/* Note: clear the div and the table */
</style>
`;
strOut += testTemplateLiteralStyle;
strOut += "<body>\n";
groupInfo.forEach(function(group, groupIndex, groupArr) {
// begin this group html string
var htmGroup = '';
var htmGroupInfo = "<div class='clear group-info'>\n";
// Build formatted group title:
var htmGroupTitle = "<h2 class='group-title'>" + group.groupTitle + "</h2>\n";
// Build formatted group table:
var htmGroupTable = "<table class='clear group-table'>\n";
htmGroupTable += "\t<tr>\n\t\t<td class='info-label'>Date & Time:</td>\n\t\t<td>" + group.date + ", " + group.time + "</td>\n\t</tr>\n";
htmGroupTable += "\t<tr>\n\t\t<td class='info-label'>Group #:</td>\n\t\t<td>" + (groupIndex+1) + "</td>\n\t</tr>\n";
htmGroupTable += "</table>\n";
// Build formatted group header and info:
htmGroupInfo += htmGroupTitle + htmGroupTable + "</div>\n"
// debugging ...
console.log(group.groupTitle);
// Begin current link list:
var htmRowsList = "<ul style='list-style: none'>";
// debugging ...
//var linksCount = [...document.querySelector('.row_text > a')].length;
//console.log("linksCount = " + linksCount);
// Loop through all rows in this group ...
group.rows.forEach(function(row, rowIndex, rowArr) {
// Format row index to 3 padded digits
var formattedRowNum = pad((rowIndex+1), 3);
// debugging ... formattedRowNum
console.log("%cFormatted row number = %s", lemonyellow_dashed, formattedRowNum);
// start row list item
var htmRow = "<li class='row'>[Row #: " + formattedRowNum + "]: ";
// add icon image to row
htmRow += "\t<img alt='favicon for " + rowDomain + "'"
+ "class='row-icon' src='" + row.rowIconLink + "'\\>";
// add hyperlink to row
htmRow += "\t<a href='" + row.rowLink + "' target='_blank' >" +
row.rowText + "</a></li>";
// add row html to the rows string
htmRowsList += htmRow;
});
// close current link list
htmRowsList += "</ul><!-- END group -->\n";
// assemble html parts for this group
htmGroup += htmGroupInfo + htmRowsList + "<hr />";
// Add this group's html to out page html
strOut += htmGroup;
});
// Add closing tags to html page string
strOut += "</body>\n</html>\n";
// Launch results in new window:
var win = window.open("", "APPLES");
win.document.body.innerHTML = strOut;
/*
NOTES:
04/01/18 - Verisoned to 1.02.
- Restricts favicon display size to 16x16 px.
03/05/18 - Versioned to 1.01.
- Previous version didn't work correctly. #GOTCHA It used to be
working code, but all of a sudden it wasn't working. Today,
I realized the issue was because of how starring a OneTab
group adds an element to the group info, so when I'm trying to parse a certain index, it fails as null. Instead of a date string, it is an element that says "7 tabs", "12 tabs", etc.
- #SOLVED: If tempDate includes "tabs", then look for date in'
the next index (+1). WORKS now! :)
02/27/18 - Created file from 11/27/17 version.
- Versioned as #1.
- Improved global variable organization.
- Reorganized 'Notes' section to be descending by date
#GOTCHA: Realized that my comments are so long now,
it makes sense to have the latest ones on top so
I don't have to scroll! :)
- NOTE: This doesn't work for some reason. I KNOW that I fixed
this within the last few months, but I can't find the
working version. :(
-
11/27/17 - Parses date and time from group info block
- Versioned to 10.0
- Refactored time/date parts into better semantic variables
- Stores date and time info to groups.
- Correctly grabs rows and stores to group! :) WORKS!!!
- Generates new page correctly, except misses the last group.
- #GOTCHA Figure out this off-by-one error.
- Versioned to 11.0.
- Removed debugs
- Changed selAllGroups to (-n+1) from (-n+2). WORKS! :)
- Changed group heading to include some other info
- Adds template literal to inject styles into out page
- Replaced inline styles with classes including clearfix
- Formatted group info divs to be inline-block no float
- Adds number padding function from:
https://stackoverflow.com/questions/10073699/pad-a-number-with-leading-zeros-in-javascript
- Formatted row numbers to pad with leading spaces.
11/26/17 - Versioned to 9.0 - This version we will adapt to work with
OneTab.
- Refactored selectors as constants
- Adds color & CSS style constants.
- Adds groupsTotal to store total number of groups.
- Adds debugs in getAllGroups()
- SOLVED for How to exclude children from front and end with:
var selAllGroups = "#contentAreaDiv > div:nth-child(n+4):not(:nth-last-child(-n+2)";
- This link explains HOW TO TEST FOR   (non-breaking space)
if (x == String.fromCharCode(160))
- #GOTCHA Those last two wer gotchas.
- Adds blankTitleCount as property of objArrGroups.
- Grabs and stores group info
- We have an "off-by-one" error and are not grabbing the last group for some reason
11/25/17 - Versioned file to 5.0.
- Removed redundant function.
- Created algorithm on paper.
- Versioned to 6.0 based on my algorithm.
- Troubleshot code --> arrays and objects. This post helped:
https://www.sitepoint.com/get-url-parameters-with-javascript/
- 10:47 AM (CST) WORKS !!!! :) So far, this version builds a
groupInfo object which stores all group data as JSON.
- NOTE: This works on sample links list page, but has not been
converted for OneTab yet.
- Versioned to 7.0
- Started from scratch with algorithm and left out most debugging
and console logging. This is cleaner code.
- WORKS!!! Generates new page! with grouped links! :) Next, test
in OneTab
- Versioned to 8.0
- Added HTML header to results page --> sort of works. You will still
need to copy the inner html of the whole document and move
to a new file for archiving.
11/24/17 - Duplicated file to make edits. Versioned to 2.0.
- Added comments to document code and make it more understandable.
- Replaced MS Word apostrophes with single-quotes
- Changed arrTabGroups from const to a var and removed square
brackets.
- Changed selector for arrTabGroups to:
#contentAreaDiv div.row_text > a');
- Adds elCount var as blank and undefines/unsets arrTabGroups
at start of program.
- Removes co-existing 2nd version of top arrTabGroups code. This
was just for testing if we actually are getting all the links.
YES!!! It works! :) Now we will make that a function.
- Refactored code into getTabGroups() function. Still works! :)
- Added the ... with brackets back after learning this is the
ES6 way to auto convert a nodelist to an array. Still works! :)
- Now we are going to see if we can parse more than just url.
- Works! I'm getting img icon and link url. NEXT, let's get link text
and lets make sure to build objects that we can iterate over.
Here is a great link: https://www.sitepoint.com/dom-manipulation-vanilla-javascript-no-jquery/#modifyingthedom
11/01/17 - Created file, beginning from copy of scrapeLinksFromOneTab3.
- Converting from jQuery to vanilla javascript.
-----------------------------
*/
/*
IDEAS & FUTURE IMPROVEMENTS:
- Consider de-duplicating icon urls
- Get domain from icon url
- Grab group date and time
- Sort by date and time
- Can I create a global row object?
- What is the expense of creating a new rowobject variable each iteration?
- Create JavaScript plugin or library from this.
*/