commit 9821a219a316bc046a79568b5ef68413a0466826 Author: Knut Ahlers Date: Sun Jun 14 02:09:06 2020 +0200 Initial version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8fce603 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a99abbb --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +FROM golang:alpine as builder + +COPY . /go/src/github.com/Luzifer/preserve +WORKDIR /go/src/github.com/Luzifer/preserve + +RUN set -ex \ + && apk add --update git \ + && go install \ + -ldflags "-X main.version=$(git describe --tags --always || echo dev)" \ + -mod=readonly + +FROM alpine:latest + +LABEL maintainer "Knut Ahlers " + +ENV STORAGE_DIR=/data + +RUN set -ex \ + && apk --no-cache add \ + ca-certificates + +COPY --from=builder /go/bin/preserve /usr/local/bin/preserve + +EXPOSE 3000 +VOLUME ["/data"] + +ENTRYPOINT ["/usr/local/bin/preserve"] +CMD ["--"] + +# vim: set ft=Dockerfile: diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..583cf8e --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020- Knut Ahlers + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a8bede8 --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +[![Go Report Card](https://goreportcard.com/badge/github.com/Luzifer/preserve)](https://goreportcard.com/report/github.com/Luzifer/preserve) +![](https://badges.fyi/github/license/Luzifer/preserve) +![](https://badges.fyi/github/downloads/Luzifer/preserve) +![](https://badges.fyi/github/latest-release/Luzifer/preserve) +![](https://knut.in/project-status/preserve) + +# Luzifer / preserve + +`preserve` is a little HTTP server to preserve the presence of URLs. + +Ever relied on an HTTP resource to be available and it vanished? Happened too often to me so I wrote a little tool to prevent URLs from vanishing: `preserve`. + +## Usage + +After you've started `preserve` it will by default listen on port 3000 and you can start using it by prefixing the URL of the resource: + +Lets say you want to ensure the image `https://example.com/image.png` does not vanish: + +- `http://localhost:3000/https://example.com/image.png` will fetch the resource once and then deliver it from the local cache +- `http://localhost:3000/latest/https://example.com/image.png` will fetch the resource with every request until it gets unavailable and then serve it from local cache + +This also works with parameters: + +`http://localhost:3000/https://pbs.twimg.com/media/somemediaid?format=jpg&name=4096x4096` + diff --git a/cache.go b/cache.go new file mode 100644 index 0000000..5e0b3b9 --- /dev/null +++ b/cache.go @@ -0,0 +1,51 @@ +package main + +import ( + "io" + "net/http" + "os" + "path" + "time" + + "github.com/pkg/errors" +) + +func renewCache(url string) (*meta, error) { + var cachePath = urlToCachePath(url) + + resp, err := http.Get(url) + if err != nil { + return nil, errors.Wrap(err, "Unable to fetch source file") + } + + if resp.StatusCode > 299 { + return nil, errors.Errorf("HTTP status signaled failure: %d", resp.StatusCode) + } + + if err = os.MkdirAll(path.Dir(cachePath), 0700); err != nil { + return nil, errors.Wrap(err, "Unable to create cache dir") + } + + f, err := os.Create(cachePath) + if err != nil { + return nil, errors.Wrap(err, "Unable to create cache file") + } + defer f.Close() + + if _, err := io.Copy(f, resp.Body); err != nil { + return nil, errors.Wrap(err, "Unable to write cache file") + } + + var lm = time.Now() + if t, err := time.Parse(http.TimeFormat, resp.Header.Get("Last-Modified")); err == nil { + lm = t + } + + metadata := &meta{ + ContentType: resp.Header.Get("Content-Type"), + LastCached: time.Now(), + LastModified: lm, + } + + return metadata, saveMeta(cachePath, *metadata) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..0827b25 --- /dev/null +++ b/go.mod @@ -0,0 +1,11 @@ +module github.com/Luzifer/preserve + +go 1.14 + +require ( + github.com/Luzifer/rconfig/v2 v2.2.1 + github.com/gologme/log v1.2.0 + github.com/gorilla/mux v1.7.4 + github.com/pkg/errors v0.9.1 + github.com/sirupsen/logrus v1.6.0 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6f9d242 --- /dev/null +++ b/go.sum @@ -0,0 +1,25 @@ +github.com/Luzifer/rconfig v1.2.0 h1:waD1sqasGVSQSrExpLrQ9Q1JmMaltrS391VdOjWXP/I= +github.com/Luzifer/rconfig/v2 v2.2.1 h1:zcDdLQlnlzwcBJ8E0WFzOkQE1pCMn3EbX0dFYkeTczg= +github.com/Luzifer/rconfig/v2 v2.2.1/go.mod h1:OKIX0/JRZrPJ/ZXXWklQEFXA6tBfWaljZbW37w+sqBw= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gologme/log v1.2.0 h1:Ya5Ip/KD6FX7uH0S31QO87nCCSucKtF44TLbTtO7V4c= +github.com/gologme/log v1.2.0/go.mod h1:gq31gQ8wEHkR+WekdWsqDuf8pXTUZA9BnnzTuPz1Y9U= +github.com/gorilla/mux v1.7.4 h1:VuZ8uybHlWmqV03+zRzdwKL4tUnIp1MAQtp1mIFE1bc= +github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/validator.v2 v2.0.0-20180514200540-135c24b11c19 h1:WB265cn5OpO+hK3pikC9hpP1zI/KTwmyMFKloW9eOVc= +gopkg.in/validator.v2 v2.0.0-20180514200540-135c24b11c19/go.mod h1:o4V0GXN9/CAmCsvJ0oXYZvrZOe7syiDZSN1GWGZTGzc= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/main.go b/main.go new file mode 100644 index 0000000..6b0e4e4 --- /dev/null +++ b/main.go @@ -0,0 +1,122 @@ +package main + +import ( + "crypto/sha256" + "fmt" + "net/http" + "net/url" + "os" + "path" + "strings" + + "github.com/gorilla/mux" + log "github.com/sirupsen/logrus" + + "github.com/Luzifer/rconfig/v2" +) + +var ( + cfg = struct { + Listen string `flag:"listen" default:":3000" description:"Port/IP to listen on"` + LogLevel string `flag:"log-level" default:"info" description:"Log level (debug, info, warn, error, fatal)"` + StorageDir string `flag:"storage-dir" default:"./data/" description:"Where to store cached files"` + VersionAndExit bool `flag:"version" default:"false" description:"Prints current version and exits"` + }{} + + version = "dev" +) + +func init() { + rconfig.AutoEnv(true) + if err := rconfig.ParseAndValidate(&cfg); err != nil { + log.Fatalf("Unable to parse commandline options: %s", err) + } + + if cfg.VersionAndExit { + fmt.Printf("preserve %s\n", version) + os.Exit(0) + } + + if l, err := log.ParseLevel(cfg.LogLevel); err != nil { + log.WithError(err).Fatal("Unable to parse log level") + } else { + log.SetLevel(l) + } +} + +func main() { + r := mux.NewRouter() + r.PathPrefix("/latest/").HandlerFunc(handleCacheLatest) + r.PathPrefix("/").HandlerFunc(handleCacheOnce) + + r.SkipClean(true) + + http.ListenAndServe(cfg.Listen, r) +} + +func handleCacheLatest(w http.ResponseWriter, r *http.Request) { + handleCache(w, r, strings.TrimPrefix(r.RequestURI, "/latest/"), true) +} + +func handleCacheOnce(w http.ResponseWriter, r *http.Request) { + handleCache(w, r, strings.TrimPrefix(r.RequestURI, "/"), false) +} + +func handleCache(w http.ResponseWriter, r *http.Request, uri string, update bool) { + var ( + cachePath = urlToCachePath(uri) + cacheHeader = "HIT" + logger = log.WithFields(log.Fields{ + "url": uri, + "path": cachePath, + }) + ) + + if u, err := url.Parse(uri); err != nil || u.Scheme == "" { + http.Error(w, "Unable to parse requested URL", http.StatusBadRequest) + return + } + + logger.Debug("Received request") + + metadata, err := loadMeta(cachePath) + if err != nil && !os.IsNotExist(err) { + log.WithError(err).Error("Unable to load meta") + http.Error(w, "Unable to access entry metadata", http.StatusInternalServerError) + return + } + + if update || os.IsNotExist(err) { + logger.Debug("Updating cache") + cacheHeader = "MISS" + + metadata, err = renewCache(uri) + if err != nil { + log.WithError(err).Warn("Unable to refresh file") + } + } + + if metadata == nil { + http.NotFound(w, r) + return + } + + w.Header().Set("Content-Type", metadata.ContentType) + w.Header().Set("X-Last-Cached", metadata.LastCached.UTC().Format(http.TimeFormat)) + w.Header().Set("X-Cache", cacheHeader) + + f, err := os.Open(cachePath) + if err != nil { + log.WithError(err).Error("Unable to load cached file") + http.Error(w, "Unable to access cache entry", http.StatusInternalServerError) + return + } + defer f.Close() + + http.ServeContent(w, r, "", metadata.LastModified, f) +} + +func urlToCachePath(url string) string { + h := fmt.Sprintf("%x", sha256.Sum256([]byte(url))) + return path.Join(cfg.StorageDir, h[0:2], h) +} diff --git a/meta.go b/meta.go new file mode 100644 index 0000000..ce4e31a --- /dev/null +++ b/meta.go @@ -0,0 +1,48 @@ +package main + +import ( + "encoding/json" + "os" + "strings" + "time" + + "github.com/pkg/errors" +) + +type meta struct { + ContentType string + LastCached time.Time + LastModified time.Time +} + +func loadMeta(p string) (*meta, error) { + var metaPath = strings.Join([]string{p, "meta"}, ".") + if _, err := os.Stat(metaPath); err != nil { + return nil, err + } + + f, err := os.Open(metaPath) + if err != nil { + return nil, errors.Wrap(err, "Unable to open metadata file") + } + defer f.Close() + + var out = new(meta) + return out, errors.Wrap( + json.NewDecoder(f).Decode(out), + "Unable to decode metadata file", + ) +} + +func saveMeta(p string, m meta) error { + f, err := os.Create(strings.Join([]string{p, "meta"}, ".")) + if err != nil { + return errors.Wrap(err, "Unable to create cache meta file") + } + defer f.Close() + + return errors.Wrap( + json.NewEncoder(f).Encode(m), + "Unable to write cache meta file", + ) +}