scope/search/web/ddg.go

104 lines
2.1 KiB
Go
Raw Normal View History

2021-12-08 17:24:05 +00:00
package web
import (
"net/http"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
var ddgURL = urlMustParse("https://html.duckduckgo.com/html")
const uddgPrefix = "//duckduckgo.com/l/?uddg="
type DDG struct {
keyword string
userAgent string
page int
doc *goquery.Document
initDone bool
baseSel *goquery.Selection
}
func (d *DDG) SetKeyword(keyword string) {
d.keyword = keyword
}
func (d *DDG) SetPage(page int) {
d.page = page * 30
}
func (d *DDG) SetUserAgent(ua string) {
d.userAgent = "Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10" //ua
}
func (d *DDG) Init() error {
initURL := copyURL(ddgURL)
query := initURL.Query()
query.Set("q", d.keyword)
if d.page > 0 {
query.Set("s", strconv.Itoa(d.page))
query.Set("dc", strconv.Itoa(d.page+1))
}
initURL.RawQuery = query.Encode()
req, err := http.NewRequest(
http.MethodGet,
initURL.String(),
nil,
)
if err != nil {
return err
}
if d.userAgent == "" {
d.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
}
req.Header.Set("User-Agent", d.userAgent)
res, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer res.Body.Close()
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return err
}
d.doc = doc
d.baseSel = doc.Find(`#links > .result`)
d.initDone = true
return nil
}
func (d *DDG) Each(eachCb func(int) error) error {
for i := 0; i < d.baseSel.Length(); i++ {
err := eachCb(i)
if err != nil {
return err
}
}
return nil
}
func (d *DDG) Title(i int) (string, error) {
return strings.TrimSpace(get(d.baseSel, i).Children().First().ChildrenFiltered("h2").Text()), nil
}
func (d *DDG) Link(i int) (string, error) {
link := get(d.baseSel, i).Children().First().ChildrenFiltered("a").AttrOr("href", "")
if strings.HasPrefix(link, uddgPrefix) {
link = urlMustParse(link).Query().Get("uddg")
}
return link, nil
}
func (d *DDG) Desc(i int) (string, error) {
return get(d.baseSel, i).Children().First().ChildrenFiltered("a").Text(), nil
}
func (d *DDG) Name() string {
return "ddg"
}