scope/search/web/web.go

199 lines
4.5 KiB
Go
Raw Normal View History

/*
* Scope - A simple and minimal metasearch engine
* Copyright (C) 2021 Arsen Musayelyan
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
2021-12-08 17:24:05 +00:00
package web
import (
"net/http"
"sort"
"sync"
"time"
"golang.org/x/sync/errgroup"
)
func init() {
http.DefaultClient.Timeout = 5 * time.Second
}
2021-12-08 21:18:14 +00:00
// Result represents a search result
2021-12-08 17:24:05 +00:00
type Result struct {
Title string
Link string
Desc string
Engines []string
Rank int
}
2021-12-08 21:18:14 +00:00
// Engine represents a search engine for web results (not images, shopping, erc.)
2021-12-08 17:24:05 +00:00
type Engine interface {
// Set search keyword for engine
SetKeyword(string)
// Set User Agent. If string is empty,
// an acceptable will should be used.
SetUserAgent(string)
// Set page number to search
SetPage(int)
// Initialize engine (make requests, set variables, etc.)
Init() error
// Run function for each search result,
// inputting index
Each(func(int) error) error
// Get title from index given by Each()
Title(int) (string, error)
// Get link from index given by Each()
Link(int) (string, error)
// Get description from index given by Each()
Desc(int) (string, error)
// Return shortened name of search engine.
// Should be lowercase (e.g. google, ddg, bing)
Name() string
}
2021-12-08 21:18:14 +00:00
// Options represents search options
2021-12-08 17:24:05 +00:00
type Options struct {
Keyword string
UserAgent string
Page int
}
2021-12-08 21:18:14 +00:00
// Search searches the given engines concurrently and returns the results
2021-12-08 17:24:05 +00:00
func Search(opts Options, engines ...Engine) ([]*Result, error) {
var outMtx sync.Mutex
var out []*Result
2021-12-08 21:18:14 +00:00
// Create new error group
2021-12-08 17:24:05 +00:00
wg := errgroup.Group{}
2021-12-08 21:18:14 +00:00
// For every engine
2021-12-08 17:24:05 +00:00
for index, engine := range engines {
2021-12-08 21:18:14 +00:00
// Copy index and engine (for goroutine)
2021-12-08 17:24:05 +00:00
curIndex, curEngine := index, engine
wg.Go(func() error {
2021-12-08 21:18:14 +00:00
// Set options
2021-12-08 17:24:05 +00:00
curEngine.SetKeyword(opts.Keyword)
curEngine.SetUserAgent(opts.UserAgent)
curEngine.SetPage(opts.Page)
2021-12-08 21:18:14 +00:00
// Attempt to init engine
2021-12-08 17:24:05 +00:00
if err := curEngine.Init(); err != nil {
return err
}
2021-12-08 21:18:14 +00:00
// For each result
2021-12-08 17:24:05 +00:00
err := curEngine.Each(func(i int) error {
2021-12-08 21:18:14 +00:00
// Get result link
2021-12-08 17:24:05 +00:00
link, err := curEngine.Link(i)
if err != nil {
return err
}
2021-12-08 21:18:14 +00:00
// Calculate result rank
2021-12-08 17:24:05 +00:00
rank := (curIndex * 100) + i
2021-12-08 21:18:14 +00:00
// Check if result exists
2021-12-08 17:24:05 +00:00
index, exists := linkExists(out, link)
2021-12-08 21:18:14 +00:00
// If result already exists
2021-12-08 17:24:05 +00:00
if exists {
2021-12-08 21:18:14 +00:00
// Add engine to the existing result
2021-12-08 17:24:05 +00:00
out[index].Engines = append(out[index].Engines, curEngine.Name())
2021-12-08 21:18:14 +00:00
// If the rank is higher than the old one, update it
2021-12-08 17:24:05 +00:00
if rank < out[index].Rank {
out[index].Rank = rank
}
return nil
}
2021-12-08 21:18:14 +00:00
// Get result title
2021-12-08 17:24:05 +00:00
title, err := curEngine.Title(i)
if err != nil {
return err
}
2021-12-08 21:18:14 +00:00
// Get result description
2021-12-08 17:24:05 +00:00
desc, err := curEngine.Desc(i)
if err != nil {
return err
}
2021-12-08 21:18:14 +00:00
// If title, link, or description empty, ignore
2021-12-08 17:24:05 +00:00
if title == "" || link == "" || desc == "" {
return nil
}
2021-12-08 21:18:14 +00:00
// If length of description, truncate
2021-12-08 17:24:05 +00:00
if len(desc) > 500 {
desc = desc[:500] + "..."
}
2021-12-08 21:18:14 +00:00
// Create result struct
2021-12-08 17:24:05 +00:00
result := &Result{
2021-12-08 21:18:14 +00:00
Title: title,
Link: link,
Desc: desc,
Rank: rank,
Engines: []string{curEngine.Name()},
2021-12-08 17:24:05 +00:00
}
2021-12-08 21:18:14 +00:00
// Lock out mutex
2021-12-08 17:24:05 +00:00
outMtx.Lock()
2021-12-08 21:18:14 +00:00
// Add result to slice
2021-12-08 17:24:05 +00:00
out = append(out, result)
2021-12-08 21:18:14 +00:00
// Unlock out mutex
2021-12-08 17:24:05 +00:00
outMtx.Unlock()
return nil
})
if err != nil {
return err
}
2021-12-08 21:18:14 +00:00
// Sort slice by rank
2021-12-08 17:24:05 +00:00
sort.Slice(out, func(i, j int) bool {
return out[i].Rank < out[j].Rank
})
return nil
})
}
2021-12-08 21:18:14 +00:00
// Wait for error group
2021-12-08 17:24:05 +00:00
if err := wg.Wait(); err != nil {
return out, err
}
return out, nil
}
2021-12-08 21:18:14 +00:00
// linkExists checks if a link exists in the results
2021-12-08 17:24:05 +00:00
func linkExists(results []*Result, link string) (int, bool) {
2021-12-08 21:18:14 +00:00
// For every result
2021-12-08 17:24:05 +00:00
for index, result := range results {
2021-12-08 21:18:14 +00:00
// If link is the same as provided
2021-12-08 17:24:05 +00:00
if result.Link == link {
2021-12-08 21:18:14 +00:00
// Return index with true
2021-12-08 17:24:05 +00:00
return index, true
}
}
return -1, false
}