/* * Scope - A simple and minimal metasearch engine * Copyright (C) 2021 Arsen Musayelyan * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ package web import ( "net/http" "sort" "sync" "time" "golang.org/x/sync/errgroup" ) func init() { http.DefaultClient.Timeout = 5 * time.Second } // Result represents a search result type Result struct { Title string Link string Desc string Engines []string Rank int } // Engine represents a search engine for web results (not images, shopping, erc.) type Engine interface { // Set search keyword for engine SetKeyword(string) // Set User Agent. If string is empty, // an acceptable will should be used. SetUserAgent(string) // Set page number to search SetPage(int) // Initialize engine (make requests, set variables, etc.) Init() error // Run function for each search result, // inputting index Each(func(int) error) error // Get title from index given by Each() Title(int) (string, error) // Get link from index given by Each() Link(int) (string, error) // Get description from index given by Each() Desc(int) (string, error) // Return shortened name of search engine. // Should be lowercase (e.g. google, ddg, bing) Name() string } // Options represents search options type Options struct { Keyword string UserAgent string Page int } // Search searches the given engines concurrently and returns the results func Search(opts Options, engines ...Engine) ([]*Result, error) { var outMtx sync.Mutex var out []*Result // Create new error group wg := errgroup.Group{} // For every engine for index, engine := range engines { // Copy index and engine (for goroutine) curIndex, curEngine := index, engine wg.Go(func() error { // Set options curEngine.SetKeyword(opts.Keyword) curEngine.SetUserAgent(opts.UserAgent) curEngine.SetPage(opts.Page) // Attempt to init engine if err := curEngine.Init(); err != nil { return err } // For each result err := curEngine.Each(func(i int) error { // Get result link link, err := curEngine.Link(i) if err != nil { return err } // Calculate result rank rank := (curIndex * 100) + i // Check if result exists index, exists := linkExists(out, link) // If result already exists if exists { // Add engine to the existing result out[index].Engines = append(out[index].Engines, curEngine.Name()) // If the rank is higher than the old one, update it if rank < out[index].Rank { out[index].Rank = rank } return nil } // Get result title title, err := curEngine.Title(i) if err != nil { return err } // Get result description desc, err := curEngine.Desc(i) if err != nil { return err } // If title, link, or description empty, ignore if title == "" || link == "" || desc == "" { return nil } // If length of description, truncate if len(desc) > 500 { desc = desc[:500] + "..." } // Create result struct result := &Result{ Title: title, Link: link, Desc: desc, Rank: rank, Engines: []string{curEngine.Name()}, } // Lock out mutex outMtx.Lock() // Add result to slice out = append(out, result) // Unlock out mutex outMtx.Unlock() return nil }) if err != nil { return err } // Sort slice by rank sort.Slice(out, func(i, j int) bool { return out[i].Rank < out[j].Rank }) return nil }) } // Wait for error group if err := wg.Wait(); err != nil { return out, err } return out, nil } // linkExists checks if a link exists in the results func linkExists(results []*Result, link string) (int, bool) { // For every result for index, result := range results { // If link is the same as provided if result.Link == link { // Return index with true return index, true } } return -1, false }