From f1aec25e0bbd1c7b5dc913f4338cf5b4d867e17f Mon Sep 17 00:00:00 2001 From: Arsen Musayelyan Date: Wed, 8 Jun 2022 02:40:16 -0700 Subject: [PATCH] Add AOL engine --- .gitignore | 4 +- search/web/aol.go | 144 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 search/web/aol.go diff --git a/.gitignore b/.gitignore index f14b1bf..16df4e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /static/ext/ -/scope \ No newline at end of file +/scope +/cmd/test/ +/test \ No newline at end of file diff --git a/search/web/aol.go b/search/web/aol.go new file mode 100644 index 0000000..febc4b3 --- /dev/null +++ b/search/web/aol.go @@ -0,0 +1,144 @@ +package web + +import ( + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +var aolURL = urlMustParse("https://search.aol.com/aol/search?rp=&s_chn=prt_bon&s_it=comsearch") + +type AOL struct { + keyword string + userAgent string + page int + doc *goquery.Document + initDone bool + baseSel *goquery.Selection +} + +// SetKeyword sets the keyword for searching +func (a *AOL) SetKeyword(keyword string) { + a.keyword = keyword +} + +// SetPage sets the page number for searching +func (a *AOL) SetPage(page int) { + a.page = page * 10 + if a.page > 0 { + a.page++ + } +} + +// SetUserAgent sets the user agent to use for the request +func (a *AOL) SetUserAgent(ua string) { + a.userAgent = ua +} + +// Init runs requests for Bing search engine +func (a *AOL) Init() error { + // Copy URL so it can be changed + initURL := copyURL(aolURL) + query := initURL.Query() + // Set query + query.Set("q", a.keyword) + if a.page > 0 { + query.Set("b", strconv.Itoa(a.page)) + } + // Update URL query parameters + initURL.RawQuery = query.Encode() + + // Create new request for modified URL + req, err := http.NewRequest( + http.MethodGet, + initURL.String(), + nil, + ) + if err != nil { + return err + } + // If no user agent, use default + if a.userAgent == "" { + a.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" + } + // Set request user agent + req.Header.Set("User-Agent", a.userAgent) + + // Perform request + res, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer res.Body.Close() + + // Create new goquery document + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return err + } + a.doc = doc + a.baseSel = doc.Find(`h3.title > a[href]`) + a.initDone = true + return nil +} + +// Each runs eachCb with the index of each search result +func (a *AOL) Each(eachCb func(int) error) error { + for i := 0; i < a.baseSel.Length(); i++ { + err := eachCb(i) + if err != nil { + return err + } + } + return nil +} + +// Title returns the title of the search result corresponding to i +func (a *AOL) Title(i int) (string, error) { + return get(a.baseSel, i).Text(), nil +} + +// Link returns the link to the search result corresponding to i +func (a *AOL) Link(i int) (string, error) { + href := get(a.baseSel, i).AttrOr("href", "") + hrefURL, err := url.Parse(href) + if err != nil { + return "", err + } + + var ru string + splitPath := strings.Split(hrefURL.RawPath, "/") + for _, item := range splitPath { + if strings.HasPrefix(item, "RU=") { + ru = strings.TrimPrefix(item, "RU=") + break + } + } + if ru == "" { + return href, nil + } + + return url.PathUnescape(ru) +} + +// Desc returns the description of the search result corresponding to i +func (a *AOL) Desc(i int) (string, error) { + return a.baseSel. + First(). + Parent(). + Parent(). + Next(). + Children(). + First(). + Text(), nil +} + +// Name returns "aol" +func (*AOL) Name() string { + return "aol" +} + +// https://search.aol.com/aol/search?q=site%3Alinkedin.com%2Fin%2F+%22Senior+Developer%22+%22Nvidia%22&rp=&s_chn=prt_bon&s_it=comsearch