-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextratorcolombo.py
38 lines (36 loc) · 1.61 KB
/
extratorcolombo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from bs4 import BeautifulSoup
import csv
from json import load
import codecs
from bs4 import BeautifulSoup
from os.path import exists
def main():
cs = None
with open('colombo.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["Marca", "Tamanho", "Tecnologia", "Tela", "Entradas", "Dominio","arquivo","title"])
for y in range(2000):
print(y)
if exists("colombo/{}.html".format(y)):
f=codecs.open("colombo/{}.html".format(y), 'r', 'utf-8')
document= BeautifulSoup(f.read(), 'html.parser')
marca = document.findAll("div", {"class": "caracteristicas-fabricante-item"})
left = document.findAll("div", {"class": "caracteristicas-label"})
right = document.findAll("div", {"class": "caracteristicas-description"})
row = ["","","","","null","Colombo","{}.html".format(y), document.title.text]
if(len(marca)>0):
row[0] = marca[0].text
for x in range(len(left)):
if(left[x].text == "Tela:"):
row[1] = right[x].text
if(left[x].text == "Tipo de tela:"):
row[2] = right[x].text
if(left[x].text == "Resolução:"):
row[3] = right[x].text
#if(left[x].text == "Conexões"):
# row[4] = right[x].text
if(not "" in row):
writer.writerow(row)
return
if __name__ == "__main__":
main()