This repository has been archived by the owner on Oct 8, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
127 lines (115 loc) · 2.99 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package main
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"github.com/axgle/mahonia"
)
type testQuestion struct {
id string
question string
testType string
classType string
options []string
answer string
}
var exams = make(chan string)
var tests = make(chan testQuestion)
func get(url string) {
exams <- httpGet(url)
}
func httpGet(url string) string {
resp, err := http.Get(url)
if err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
}
dec := mahonia.NewDecoder("gbk")
str := dec.ConvertString(string(body))
return str
}
func main() {
config := [][]int{
{1490, 26}, //tikubiaohao, the num of pages
{1471, 55},
{1492, 27},
{1491, 11},
{1489, 9},
{1488, 10},
{1436, 50},
{1467, 26},
{1471, 55},
{1484, 18},
{1485, 9},
{1486, 9},
{1487, 4},
}
pagesUrls := []string{}
for _, v := range config {
for i := 1; i <= v[1]; i++ {
pagesUrls = append(pagesUrls,
"http://safeexam.hdu.edu.cn/redir.php?catalog_id=6&cmd=learning&tikubh="+strconv.Itoa(v[0])+"&page="+strconv.Itoa(i))
}
}
go parseTest()
for _, page := range pagesUrls {
fmt.Fprintln(os.Stderr, "get ", page)
go get(page)
}
/*
for t := range tests {
if t.testType == "判断" {
fmt.Println("INSERT INTO JExam (ID,problem,answer) VALUES (" + t.id + ",'" + t.question + "','" + t.answer + "');")
} else {
str := "INSERT INTO CExam (ID,problem,choice_A,choice_B,choice_C,choice_D,answer) VALUES (" + t.id + ",'" + t.question + "'"
for _, op := range t.options {
str += ", '" + op + "'"
}
if len(t.options) == 3 {
str += ", '空'"
}
str += ", '" + t.answer + "');"
fmt.Println(str)
}
}
*/
for t := range tests {
fmt.Println(t.id, "(", t.testType, ")", t.question)
fmt.Println(t.classType, t.answer)
for _, op := range t.options {
fmt.Println(op)
}
fmt.Println()
}
}
func parseTest() {
var classType, testType string
titleRe := regexp.MustCompile("<title>(.+)安全题((.+)) - 高校实验室安全考试系统</title>")
testRe := regexp.MustCompile(`<div class="shiti"><h3>([0-9]*)、(.*)</h3><ul class="xuanxiang_[^"]*">(.*)</ul></div> <span [^>]*>(标准答案:\s*(.*)\s*)</span>`)
optionRe := regexp.MustCompile(`(?U)<li><input[^>]*><label [^>]*>(.*)</label></li>`)
for exam := range exams {
res := titleRe.FindStringSubmatch(exam)
classType, testType = res[1], res[2]
testRes := testRe.FindAllStringSubmatch(exam, -1)
for _, test := range testRes {
var t testQuestion
t.testType = strings.TrimSpace(testType)
t.classType = strings.TrimSpace(classType)
t.id, t.question, t.answer = strings.TrimSpace(test[1]), strings.TrimSpace(test[2]), strings.TrimSpace(test[4])
opRes := optionRe.FindAllStringSubmatch(test[3], -1)
for _, op := range opRes {
t.options = append(t.options, strings.TrimSpace(op[1]))
}
tests <- t
}
}
close(tests)
}