-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpicture.go
126 lines (111 loc) · 2.96 KB
/
picture.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
package main
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"regexp"
"strings"
"time"
"github.com/antchfx/htmlquery"
)
type picture struct {
Copyright string `json:"copyright"`
Date string `json:"date"`
Explanation string `json:"explanation"`
Title string `json:"title"`
MediaType string `json:"media_type"`
FullImageURL string `json:"hdurl"`
URL string `json:"url"`
Link string
}
func makePictureFromHTML(reader io.Reader, p *picture) error {
doc, err := htmlquery.Parse(reader)
if err != nil {
return err
}
titleNode, err := htmlquery.Query(doc, "//html/body/center[2]/b[1]")
if err != nil {
return err
}
title := htmlquery.InnerText(titleNode)
explanationNode, err := htmlquery.Query(doc, "//html/body/p[1]")
if err != nil {
return err
}
explanation := htmlquery.InnerText(explanationNode)
explanation = strings.Replace(explanation, "Explanation:", "", 1)
imageNode, err := htmlquery.Query(doc, "//html/body/center[1]/p[2]/a/img")
fullImageURL := ""
imageURL := ""
mediaType := mediaTypeImage
if imageNode == nil {
// try video
imageNode, err = htmlquery.Query(doc, "//html/body/center[1]/p[2]/iframe")
if imageNode == nil {
return err
}
mediaType = mediaTypeVideo
imageURL = htmlquery.SelectAttr(imageNode, "src")
} else {
imageURL = apodSiteURL + htmlquery.SelectAttr(imageNode, "src")
fullImageNode, err := htmlquery.Query(doc, "//html/body/center[1]/p[2]/a")
if err != nil {
return err
}
fullImageURL = apodSiteURL + htmlquery.SelectAttr(fullImageNode, "href")
}
dateNode, err := htmlquery.Query(doc, "//html/body/center[1]/p[2]")
dateText := trimSpaces(htmlquery.InnerText(dateNode))
pictureTime, err := time.Parse("2006 January 2", dateText)
if err != nil {
return err
}
pictureDate := pictureTime.Format("2006-01-02")
p.Title = title
p.Explanation = explanation
p.URL = imageURL
p.FullImageURL = fullImageURL
p.MediaType = mediaType
p.Date = pictureDate
p.trim()
return nil
}
func makePictureFromAPI(reader io.Reader, p *picture) error {
body, err := ioutil.ReadAll(reader)
if err != nil {
return err
}
err = json.Unmarshal(body, p)
if err != nil {
return err
}
p.removeAds()
p.trim()
return nil
}
func (p *picture) removeAds() {
adStartIndex := strings.Index(p.Explanation, " ")
if adStartIndex != -1 {
p.Explanation = p.Explanation[0:adStartIndex]
}
}
func (p *picture) trim() {
p.Title = trimSpaces(p.Title)
p.Explanation = trimSpaces(p.Explanation)
// Sometimes copyright contains new lines :shrug:
p.Copyright = strings.ReplaceAll(p.Copyright, "\n", " ")
}
func checkResponseStatus(resp *http.Response) error {
if resp.StatusCode != http.StatusOK {
body, _ := ioutil.ReadAll(resp.Body)
return fmt.Errorf("Bad response status: %s %s", resp.Status, string(body))
}
return nil
}
func trimSpaces(s string) string {
spaces := regexp.MustCompile(`\s+`)
oneline := spaces.ReplaceAllString(s, " ")
return strings.TrimSpace(oneline)
}