-
Notifications
You must be signed in to change notification settings - Fork 0
/
catalog.go
executable file
·107 lines (90 loc) · 2.24 KB
/
catalog.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package main
import (
"github.com/PuerkitoBio/goquery"
"net/http"
"github.com/djimenez/iconv-go"
_"github.com/go-sql-driver/mysql"
"regexp"
"database/sql"
"log"
"strings"
"github.com/go-sql-driver/mysql"
"os"
"time"
)
func Scrape(url string) {
os.Setenv("HTTP_PROXY", "89.36.215.14:1189")
var netClient = &http.Client{
Timeout: time.Second * 30,
}
res, err := netClient.Get(url)
if err != nil {
// handle error
}
defer res.Body.Close()
// Convert the designated charset HTML to utf-8 encoded HTML.
// `charset` being one of the charsets known by the iconv package.
utfBody, err := iconv.NewReader(res.Body, "windows-1251", "utf-8")
if err != nil {
// handler error
}
// use utfBody using goquery
doc, err := goquery.NewDocumentFromReader(utfBody)
if err != nil {
// handler error
}
//var digitsRegexp = regexp.MustCompile(`[^-]+`)
var appends []string
// Find the review items
doc.Find(".Content a").Each(func(i int, s *goquery.Selection) {
// For each item found, get the band and title
// (/catalog\/)
re := regexp.MustCompile(`(/catalog/)`)
//var digitsRegexp = regexp.MustCompile(`(/catalog/)`)
//fmt.Println(digitsRegexp.FindStringSubmatch(s.Attr("href")))
band, ok := s.Attr("href")
if ok {
if re.FindString(band) == "/catalog/" {
//appends = append(appends, band)
band := strings.TrimSpace(band)
appends = append(appends, "http://www.office-planet.ru"+band)
}
}
})
insertCatlinks(appends)
}
func main() {
Scrape("http://www.office-planet.ru/catalog/abc/")
}
func insertCatlinks(vals []string) {
db, err := sql.Open("mysql", "root:root@/parser")
if err != nil {
log.Fatal("Open -> ", err)
panic(err)
}
stmt, err := db.Prepare("INSERT INTO catalog_links(link) VALUES (?)")
if err != nil {
log.Fatal("Prepare -> ", err)
}
for _, ok := range vals {
res, err := stmt.Exec(ok)
if err != nil {
me, ok := err.(*mysql.MySQLError)
if !ok {
log.Fatal("Exec -> ", err)
}
if me.Number == 1062 {
continue
}
}
lastId, err := res.LastInsertId()
if err != nil {
log.Fatal("LastInsertId -> ", err)
}
rowCnt, err := res.RowsAffected()
if err != nil {
log.Fatal("RowsAffected -> ", err)
}
log.Printf("ID = %d, affected = %d\n", lastId, rowCnt)
}
}