package web import ( "net/http" "strconv" "strings" "github.com/PuerkitoBio/goquery" ) var ddgURL = urlMustParse("https://html.duckduckgo.com/html") const uddgPrefix = "//duckduckgo.com/l/?uddg=" type DDG struct { keyword string userAgent string page int doc *goquery.Document initDone bool baseSel *goquery.Selection } func (d *DDG) SetKeyword(keyword string) { d.keyword = keyword } func (d *DDG) SetPage(page int) { d.page = page * 30 } func (d *DDG) SetUserAgent(ua string) { d.userAgent = "Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10" //ua } func (d *DDG) Init() error { initURL := copyURL(ddgURL) query := initURL.Query() query.Set("q", d.keyword) if d.page > 0 { query.Set("s", strconv.Itoa(d.page)) query.Set("dc", strconv.Itoa(d.page+1)) } initURL.RawQuery = query.Encode() req, err := http.NewRequest( http.MethodGet, initURL.String(), nil, ) if err != nil { return err } if d.userAgent == "" { d.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" } req.Header.Set("User-Agent", d.userAgent) res, err := http.DefaultClient.Do(req) if err != nil { return err } defer res.Body.Close() doc, err := goquery.NewDocumentFromReader(res.Body) if err != nil { return err } d.doc = doc d.baseSel = doc.Find(`#links > .result`) d.initDone = true return nil } func (d *DDG) Each(eachCb func(int) error) error { for i := 0; i < d.baseSel.Length(); i++ { err := eachCb(i) if err != nil { return err } } return nil } func (d *DDG) Title(i int) (string, error) { return strings.TrimSpace(get(d.baseSel, i).Children().First().ChildrenFiltered("h2").Text()), nil } func (d *DDG) Link(i int) (string, error) { link := get(d.baseSel, i).Children().First().ChildrenFiltered("a").AttrOr("href", "") if strings.HasPrefix(link, uddgPrefix) { link = urlMustParse(link).Query().Get("uddg") } return link, nil } func (d *DDG) Desc(i int) (string, error) { return get(d.baseSel, i).Children().First().ChildrenFiltered("a").Text(), nil } func (d *DDG) Name() string { return "ddg" }