-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathjupiterCrawl.js
147 lines (125 loc) · 3.63 KB
/
jupiterCrawl.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
require = require('esm')(module)
const fs = require('fs')
const phantom = require('phantom')
const cheerioAdv = require('cheerio-advanced-selectors')
const cheerio = cheerioAdv.wrap(require('cheerio'))
const geral = require('./src/tracks/geral').default
const teoria = require('./src/tracks/teoria').default
const escience = require('./src/tracks/escience').default
const sistemas = require('./src/tracks/sistemas').default
const ia = require('./src/tracks/ia').default
const grabClassesFromBoxes = boxes =>
boxes.reduce((acc, cur) => [...acc, ...cur.classes], [])
const grabClassesFromTrack = track => [
...grabClassesFromBoxes(track.boxes.left),
...grabClassesFromBoxes(track.boxes.right),
]
; (async function () {
const allClasses = Array.from(
new Set([
...grabClassesFromTrack(geral),
...grabClassesFromTrack(teoria),
...grabClassesFromTrack(escience),
...grabClassesFromTrack(sistemas),
...grabClassesFromTrack(ia),
])
)
var remaining = allClasses.length
var fullClasses = []
for (const code of allClasses) {
console.log(`Buscando materia ${code}...`)
const instance = await phantom.create()
const page = await instance.createPage()
await page.open(
`https://uspdigital.usp.br/jupiterweb/obterDisciplina?sgldis=${code}`
)
const content = await page.property('content')
const $ = cheerio.load(content)
const headerTable = $(
'#layout_conteudo form[name=form1] table tr table:eq(2)'
)
const name = $('tr:eq(4)', headerTable)
.text()
.split('-')[1]
.trim()
const creditsTable = $(
'#layout_conteudo form[name=form1] table tr table:eq(3)'
)
const credits = $('tr:eq(0)', creditsTable)
.text()
.replace(/\D/g, '')
const wcredits = $('tr:eq(1)', creditsTable)
.text()
.replace(/\D/g, '')
let summaryTable = $(
'#layout_conteudo form[name=form1] table tr table:eq(5)'
)
if (
$('tr:eq(0)', summaryTable)
.text()
.includes('Docente')
) {
summaryTable = summaryTable.next('table')
}
const summaryRow = $('tr', summaryTable).filter(
(_, el) =>
$(el)
.text()
.trim() === 'Programa'
)
const summary = summaryRow
.next('tr')
.text()
.trim()
console.log(`Buscando requisitos ${code}...`)
const dependencies = await fetchDependencies(page, code)
console.log(`Finalizando ${code}...`)
instance.exit()
remaining -= 1
console.log(`Faltam ${remaining}...`)
fullClasses.push({ code, name, credits, wcredits, summary, dependencies })
}
fs.writeFileSync(
'./src/definitions/allclasses.json',
JSON.stringify(fullClasses, null, 4)
)
})()
const fetchDependencies = async (page, code) => {
await page.open(
`https://uspdigital.usp.br/jupiterweb/listarCursosRequisitos?coddis=${code}`
)
const content = await page.property('content')
const $ = cheerio.load(content)
const dependencies = []
if (
$('#web_mensagem')
.text()
.includes('Disciplina não tem requisitos')
)
return dependencies
const depsTable = $('#layout_conteudo form[name=form1] table table:eq(2)')
let depsRow = $('tr', depsTable).filter((_, el) =>
$(el)
.text()
.trim()
.includes('45052')
)
if (!depsRow.length) return dependencies
while ((depsRow = depsRow.next('tr')).text().trim() !== '') {
if (
depsRow
.last('td')
.text()
.includes('fraco')
)
continue
dependencies.push(
depsRow
.first('td')
.text()
.split('-')[0]
.trim()
)
}
return dependencies
}