go-latestver/internal/fetcher/html.go

99 lines
3.0 KiB
Go
Raw Normal View History

2021-11-22 02:39:25 +00:00
package fetcher
import (
"context"
"regexp"
"time"
"github.com/antchfx/htmlquery"
"github.com/antchfx/xpath"
"github.com/pkg/errors"
"golang.org/x/net/html"
"github.com/Luzifer/go-latestver/internal/database"
"github.com/Luzifer/go_helpers/v2/fieldcollection"
2021-11-22 02:39:25 +00:00
)
/*
* @module html
* @module_desc Downloads website, selects text-node using XPath and optionally applies custom regular expression
*/
2021-11-22 02:39:25 +00:00
var htmlFetcherDefaultRegex = `(v?(?:[0-9]+\.?){2,})`
type (
// HTMLFetcher implements the fetcher interface to monitor versions on websites by xpath queries
2021-11-22 02:39:25 +00:00
HTMLFetcher struct{}
)
func init() { registerFetcher("html", func() Fetcher { return &HTMLFetcher{} }) }
// FetchVersion retrieves the latest version for the catalog entry
func (HTMLFetcher) FetchVersion(_ context.Context, attrs *fieldcollection.FieldCollection) (string, time.Time, error) {
2021-11-22 02:39:25 +00:00
doc, err := htmlquery.LoadURL(attrs.MustString("url", nil))
if err != nil {
return "", time.Time{}, errors.Wrap(err, "loading URL")
}
node, err := htmlquery.Query(doc, attrs.MustString("xpath", nil))
if err != nil {
return "", time.Time{}, errors.Wrap(err, "querying xpath")
}
if node == nil {
return "", time.Time{}, errors.New("xpath expression lead to nil-node")
}
if node.Type == html.ElementNode && node.FirstChild != nil && node.FirstChild.Type == html.TextNode {
2021-11-22 02:39:25 +00:00
node = node.FirstChild
}
if node.Type != html.TextNode {
return "", time.Time{}, errors.Errorf("xpath expression lead to unexpected node type: %d", node.Type)
}
match := regexp.MustCompile(attrs.MustString("regex", &htmlFetcherDefaultRegex)).FindStringSubmatch(node.Data)
Add PR testing, fix linter errors Squashed commit of the following: commit 2a83adf6c54d6abcf6762760fd38f2505511f545 Author: Knut Ahlers <knut@ahlers.me> Date: Wed Dec 1 03:34:49 2021 +0100 Lint: Fix copylocks errors Signed-off-by: Knut Ahlers <knut@ahlers.me> commit 418f85d504203a6968329e280ecd9cf7d2365373 Author: Knut Ahlers <knut@ahlers.me> Date: Wed Dec 1 03:31:38 2021 +0100 Lint: Fix gosec warnings Signed-off-by: Knut Ahlers <knut@ahlers.me> commit 1a977875740be3c40884aa0985578721ceb4ae37 Author: Knut Ahlers <knut@ahlers.me> Date: Wed Dec 1 03:28:02 2021 +0100 Lint: Disable gomnd for certain cases Signed-off-by: Knut Ahlers <knut@ahlers.me> commit 5e81cf79ba7256b321442530715a2b53de0a18e1 Author: Knut Ahlers <knut@ahlers.me> Date: Wed Dec 1 03:26:01 2021 +0100 Lint: fix ineffassign errors Signed-off-by: Knut Ahlers <knut@ahlers.me> commit cb14fae2dad985368e1f05d62f8e778817d01c6f Author: Knut Ahlers <knut@ahlers.me> Date: Wed Dec 1 03:23:42 2021 +0100 Lint: Fix revive linter errors Signed-off-by: Knut Ahlers <knut@ahlers.me> commit b3390b8dff9b939caa4e3821a48dd848af0bfba4 Author: Knut Ahlers <knut@ahlers.me> Date: Wed Dec 1 03:21:35 2021 +0100 Lint: Remove unrequired dereference Signed-off-by: Knut Ahlers <knut@ahlers.me> commit f9052e6aa530c5b5017249fc6c31bdbb94252760 Author: Knut Ahlers <knut@ahlers.me> Date: Wed Dec 1 03:20:43 2021 +0100 Lint: Remove deadcode Signed-off-by: Knut Ahlers <knut@ahlers.me> commit 72b88adaa25a3bb5a7af21da7ed12f08cae36573 Author: Knut Ahlers <knut@ahlers.me> Date: Wed Dec 1 02:52:27 2021 +0100 Add PR-testing Signed-off-by: Knut Ahlers <knut@ahlers.me> Signed-off-by: Knut Ahlers <knut@ahlers.me>
2021-12-01 02:38:52 +00:00
if len(match) < 2 { //nolint:gomnd // Simple count of fields, no need for constant
2021-11-22 02:39:25 +00:00
return "", time.Time{}, errors.New("regular expression did not yield version")
}
return match[1], time.Now(), nil
}
// Links retrieves a collection of links for the fetcher
func (HTMLFetcher) Links(attrs *fieldcollection.FieldCollection) []database.CatalogLink {
2021-11-22 02:39:25 +00:00
return []database.CatalogLink{
{
IconClass: "fas fa-globe",
Name: "Website",
URL: attrs.MustString("url", nil),
},
}
}
// Validate validates the configuration given to the fetcher
func (HTMLFetcher) Validate(attrs *fieldcollection.FieldCollection) error {
// @attr url required string "" URL to fetch the HTML from
2021-11-22 02:39:25 +00:00
if v, err := attrs.String("url"); err != nil || v == "" {
return errors.New("url is expected to be non-empty string")
}
// @attr xpath required string "" XPath expression leading to the text-node containing the version
2021-11-22 02:39:25 +00:00
if v, err := attrs.String("xpath"); err != nil || v == "" {
return errors.New("xpath is expected to be non-empty string")
}
if _, err := xpath.Compile(attrs.MustString("xpath", nil)); err != nil {
return errors.Wrap(err, "compiling xpath expression")
}
// @attr regex optional string "(v?(?:[0-9]+\.?){2,})" Regular expression to apply to the text from the XPath expression
2021-11-22 02:39:25 +00:00
if attrs.CanString("regex") {
if _, err := regexp.Compile(attrs.MustString("regex", nil)); err != nil {
return errors.Wrap(err, "invalid regex given")
}
}
return nil
}