From 0ece536810dd58cf97f98c944f22ac4d539d4744 Mon Sep 17 00:00:00 2001 From: Arsen Musayelyan Date: Sun, 29 Jan 2023 13:42:23 -0800 Subject: [PATCH] Normalize URL for caching --- go.mod | 1 + go.sum | 2 ++ internal/dl/dl.go | 28 +++++++++++++++++++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index ddcf7e2..820bd49 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.18 require ( github.com/AlecAivazis/survey/v2 v2.3.6 + github.com/PuerkitoBio/purell v1.2.0 github.com/alecthomas/chroma/v2 v2.4.0 github.com/charmbracelet/bubbles v0.14.0 github.com/charmbracelet/bubbletea v0.23.1 diff --git a/go.sum b/go.sum index f3bdebf..45363a1 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ github.com/ProtonMail/go-crypto v0.0.0-20210512092938-c05353c2d58c h1:bNpaLLv2Y4 github.com/ProtonMail/go-crypto v0.0.0-20210512092938-c05353c2d58c/go.mod h1:z4/9nQmJSSwwds7ejkxaJwO37dru3geImFUdJlaLzQo= github.com/ProtonMail/go-mime v0.0.0-20220302105931-303f85f7fe0f h1:CGq7OieOz3wyQJ1fO8S0eO9TCW1JyvLrf8fhzz1i8ko= github.com/ProtonMail/gopenpgp/v2 v2.2.2 h1:u2m7xt+CZWj88qK1UUNBoXeJCFJwJCZ/Ff4ymGoxEXs= +github.com/PuerkitoBio/purell v1.2.0 h1:/Jdm5QfyM8zdlqT6WVZU4cfP23sot6CEHA4CS49Ezig= +github.com/PuerkitoBio/purell v1.2.0/go.mod h1:OhLRTaaIzhvIyofkJfB24gokC7tM42Px5UhoT32THBk= github.com/acomagu/bufpipe v1.0.3 h1:fxAGrHZTgQ9w5QqVItgzwj235/uYZYgbXitB+dLupOk= github.com/acomagu/bufpipe v1.0.3/go.mod h1:mxdxdup/WdsKVreO5GpW4+M/1CE2sMG4jeGJ2sYmHc4= github.com/alecthomas/assert/v2 v2.2.0 h1:f6L/b7KE2bfA+9O4FL3CM/xJccDEwPVYd5fALBiuwvw= diff --git a/internal/dl/dl.go b/internal/dl/dl.go index 94f19f4..a84239e 100644 --- a/internal/dl/dl.go +++ b/internal/dl/dl.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" + "github.com/PuerkitoBio/purell" "github.com/vmihailenco/msgpack/v5" "go.arsenm.dev/logger/log" "go.arsenm.dev/lure/internal/dlcache" @@ -88,6 +89,12 @@ type UpdatingDownloader interface { // it downloads the source to a new cache directory and links it // to the destination. func Download(ctx context.Context, opts Options) (err error) { + normalized, err := normalizeURL(opts.URL) + if err != nil { + return err + } + opts.URL = normalized + d := getDownloader(opts.URL) if opts.CacheDisabled { @@ -232,7 +239,7 @@ func handleCache(cacheDir, dest string, t Type) (bool, error) { // hard links for each file from the src directory to the // dest directory. If it encounters a directory, it will // create a directory with the same name and permissions -// in the dest directory, because hard links cannot be +// in the dest directory, because hard links cannot be // created for directories. func linkDir(src, dest string) error { return filepath.Walk(src, func(path string, info os.FileInfo, err error) error { @@ -266,3 +273,22 @@ func getDownloader(u string) Downloader { } return nil } + +// normalizeURL normalizes a URL string, so that insignificant +// don't change the hash. +func normalizeURL(u string) (string, error) { + const normalizationFlags = purell.FlagRemoveTrailingSlash | + purell.FlagRemoveDefaultPort | + purell.FlagLowercaseHost | + purell.FlagLowercaseScheme | + purell.FlagRemoveDuplicateSlashes | + purell.FlagRemoveFragment | + purell.FlagRemoveUnnecessaryHostDots | + purell.FlagSortQuery | + purell.FlagDecodeHexHost | + purell.FlagDecodeOctalHost | + purell.FlagDecodeUnnecessaryEscapes | + purell.FlagRemoveEmptyPortSeparator + + return purell.NormalizeURLString(u, normalizationFlags) +}