From 187ae109d0a6f022a3a1e4aaae3ffe922b1f3272 Mon Sep 17 00:00:00 2001 From: Antoine GIRARD Date: Thu, 29 Aug 2019 05:18:42 +0200 Subject: Update to mod enabled version goquery (#8021) * update to mod enabled version goquery * manually set appengine new release --- vendor/github.com/PuerkitoBio/goquery/.travis.yml | 6 ++- vendor/github.com/PuerkitoBio/goquery/README.md | 61 ++++++++++++++++++++-- vendor/github.com/PuerkitoBio/goquery/array.go | 25 ++++++++- vendor/github.com/PuerkitoBio/goquery/expand.go | 24 +++++++++ vendor/github.com/PuerkitoBio/goquery/go.mod | 6 +++ vendor/github.com/PuerkitoBio/goquery/go.sum | 5 ++ .../github.com/PuerkitoBio/goquery/manipulation.go | 7 +-- vendor/github.com/PuerkitoBio/goquery/query.go | 6 +-- vendor/github.com/PuerkitoBio/goquery/type.go | 12 +++-- vendor/github.com/andybalholm/cascadia/go.mod | 3 ++ vendor/modules.txt | 6 +-- 11 files changed, 140 insertions(+), 21 deletions(-) create mode 100644 vendor/github.com/PuerkitoBio/goquery/go.mod create mode 100644 vendor/github.com/PuerkitoBio/goquery/go.sum create mode 100644 vendor/github.com/andybalholm/cascadia/go.mod (limited to 'vendor') diff --git a/vendor/github.com/PuerkitoBio/goquery/.travis.yml b/vendor/github.com/PuerkitoBio/goquery/.travis.yml index 148a1fb532..cc1402d5cf 100644 --- a/vendor/github.com/PuerkitoBio/goquery/.travis.yml +++ b/vendor/github.com/PuerkitoBio/goquery/.travis.yml @@ -1,7 +1,7 @@ language: go go: - - 1.1.x + - 1.1 - 1.2.x - 1.3.x - 1.4.x @@ -9,4 +9,8 @@ go: - 1.6.x - 1.7.x - 1.8.x + - 1.9.x + - "1.10.x" + - 1.11.x - tip + diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md index 7b01a27178..84f9af39e3 100644 --- a/vendor/github.com/PuerkitoBio/goquery/README.md +++ b/vendor/github.com/PuerkitoBio/goquery/README.md @@ -1,6 +1,5 @@ -# goquery - a little like that j-thing, only in Go -[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.png)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge) - +# goquery - a little like that j-thing, only in Go +[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge) goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off. @@ -8,6 +7,16 @@ Also, because the net/html parser requires UTF-8 encoding, so does goquery: it i Syntax-wise, it is as close as possible to jQuery, with the same function names when possible, and that warm and fuzzy chainable interface. jQuery being the ultra-popular library that it is, I felt that writing a similar HTML-manipulating library was better to follow its API than to start anew (in the same spirit as Go's `fmt` package), even though some of its methods are less than intuitive (looking at you, [index()][index]...). +## Table of Contents + +* [Installation](#installation) +* [Changelog](#changelog) +* [API](#api) +* [Examples](#examples) +* [Related Projects](#related-projects) +* [Support](#support) +* [License](#license) + ## Installation Please note that because of the net/html dependency, goquery requires Go1.1+. @@ -28,6 +37,11 @@ Please note that because of the net/html dependency, goquery requires Go1.1+. **Note that goquery's API is now stable, and will not break.** +* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505). +* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples. +* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`. +* **2018-01-28 (v1.3.0)** : Add `ToEnd` constant to `Slice` until the end of the selection (thanks to @davidjwilkins for raising the issue). +* **2018-01-11 (v1.2.0)** : Add `AddBack*` and deprecate `AndSelf` (thanks to @davidjwilkins). * **2017-02-12 (v1.1.0)** : Add `SetHtml` and `SetText` (thanks to @glebtv). * **2016-12-29 (v1.0.2)** : Optimize allocations for `Selection.Text` (thanks to @radovskyb). * **2016-08-28 (v1.0.1)** : Optimize performance for large documents. @@ -83,12 +97,24 @@ package main import ( "fmt" "log" + "net/http" "github.com/PuerkitoBio/goquery" ) func ExampleScrape() { - doc, err := goquery.NewDocument("http://metalsucks.net") + // Request the HTML page. + res, err := http.Get("http://metalsucks.net") + if err != nil { + log.Fatal(err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + log.Fatalf("status code error: %d %s", res.StatusCode, res.Status) + } + + // Load the HTML document + doc, err := goquery.NewDocumentFromReader(res.Body) if err != nil { log.Fatal(err) } @@ -107,6 +133,31 @@ func main() { } ``` +## Related Projects + +- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags. +- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery. +- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors. +- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework +- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets. +- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping. + +## Support + +There are a number of ways you can support the project: + +* Use it, star it, build something with it, spread the word! + - If you do build something open-source or otherwise publicly-visible, let me know so I can add it to the [Related Projects](#related-projects) section! +* Raise issues to improve the project (note: doc typos and clarifications are issues too!) + - Please search existing issues before opening a new one - it may have already been adressed. +* Pull requests: please discuss new code in an issue first, unless the fix is really trivial. + - Make sure new code is tested. + - Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue. + +If you desperately want to send money my way, I have a BuyMeACoffee.com page: + +Buy Me A Coffee + ## License The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia's license is [here][caslic]. @@ -114,6 +165,7 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia' [jquery]: http://jquery.com/ [go]: http://golang.org/ [cascadia]: https://github.com/andybalholm/cascadia +[cascadiacli]: https://github.com/suntong/cascadia [bsd]: http://opensource.org/licenses/BSD-3-Clause [golic]: http://golang.org/LICENSE [caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE @@ -124,3 +176,4 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia' [wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks [thatguystone]: https://github.com/thatguystone [piotr]: https://github.com/piotrkowalczuk +[goq]: https://github.com/andrewstuart/goq diff --git a/vendor/github.com/PuerkitoBio/goquery/array.go b/vendor/github.com/PuerkitoBio/goquery/array.go index d7af5eee1c..1b1f6cbe66 100644 --- a/vendor/github.com/PuerkitoBio/goquery/array.go +++ b/vendor/github.com/PuerkitoBio/goquery/array.go @@ -4,6 +4,16 @@ import ( "golang.org/x/net/html" ) +const ( + maxUint = ^uint(0) + maxInt = int(maxUint >> 1) + + // ToEnd is a special index value that can be used as end index in a call + // to Slice so that all elements are selected until the end of the Selection. + // It is equivalent to passing (*Selection).Length(). + ToEnd = maxInt +) + // First reduces the set of matched elements to the first in the set. // It returns a new Selection object, and an empty Selection object if the // the selection is empty. @@ -35,12 +45,23 @@ func (s *Selection) Eq(index int) *Selection { } // Slice reduces the set of matched elements to a subset specified by a range -// of indices. +// of indices. The start index is 0-based and indicates the index of the first +// element to select. The end index is 0-based and indicates the index at which +// the elements stop being selected (the end index is not selected). +// +// The indices may be negative, in which case they represent an offset from the +// end of the selection. +// +// The special value ToEnd may be specified as end index, in which case all elements +// until the end are selected. This works both for a positive and negative start +// index. func (s *Selection) Slice(start, end int) *Selection { if start < 0 { start += len(s.Nodes) } - if end < 0 { + if end == ToEnd { + end = len(s.Nodes) + } else if end < 0 { end += len(s.Nodes) } return pushStack(s, s.Nodes[start:end]) diff --git a/vendor/github.com/PuerkitoBio/goquery/expand.go b/vendor/github.com/PuerkitoBio/goquery/expand.go index f0c6c86d8c..7caade531e 100644 --- a/vendor/github.com/PuerkitoBio/goquery/expand.go +++ b/vendor/github.com/PuerkitoBio/goquery/expand.go @@ -41,6 +41,30 @@ func (s *Selection) AddNodes(nodes ...*html.Node) *Selection { // AndSelf adds the previous set of elements on the stack to the current set. // It returns a new Selection object containing the current Selection combined // with the previous one. +// Deprecated: This function has been deprecated and is now an alias for AddBack(). func (s *Selection) AndSelf() *Selection { + return s.AddBack() +} + +// AddBack adds the previous set of elements on the stack to the current set. +// It returns a new Selection object containing the current Selection combined +// with the previous one. +func (s *Selection) AddBack() *Selection { return s.AddSelection(s.prevSel) } + +// AddBackFiltered reduces the previous set of elements on the stack to those that +// match the selector string, and adds them to the current set. +// It returns a new Selection object containing the current Selection combined +// with the filtered previous one +func (s *Selection) AddBackFiltered(selector string) *Selection { + return s.AddSelection(s.prevSel.Filter(selector)) +} + +// AddBackMatcher reduces the previous set of elements on the stack to those that match +// the mateher, and adds them to the curernt set. +// It returns a new Selection object containing the current Selection combined +// with the filtered previous one +func (s *Selection) AddBackMatcher(m Matcher) *Selection { + return s.AddSelection(s.prevSel.FilterMatcher(m)) +} diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod new file mode 100644 index 0000000000..2fa1332a58 --- /dev/null +++ b/vendor/github.com/PuerkitoBio/goquery/go.mod @@ -0,0 +1,6 @@ +module github.com/PuerkitoBio/goquery + +require ( + github.com/andybalholm/cascadia v1.0.0 + golang.org/x/net v0.0.0-20181114220301-adae6a3d119a +) diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum new file mode 100644 index 0000000000..11c5757546 --- /dev/null +++ b/vendor/github.com/PuerkitoBio/goquery/go.sum @@ -0,0 +1,5 @@ +github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= +github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go index ebb4ffe128..34eb7570fb 100644 --- a/vendor/github.com/PuerkitoBio/goquery/manipulation.go +++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go @@ -270,13 +270,14 @@ func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection { return s.Remove() } -// Set the html content of each element in the selection to specified html string. +// SetHtml sets the html content of each element in the selection to +// specified html string. func (s *Selection) SetHtml(html string) *Selection { return setHtmlNodes(s, parseHtml(html)...) } -// Set the content of each element in the selection to specified content. The -// provided text string is escaped. +// SetText sets the content of each element in the selection to specified content. +// The provided text string is escaped. func (s *Selection) SetText(text string) *Selection { return s.SetHtml(html.EscapeString(text)) } diff --git a/vendor/github.com/PuerkitoBio/goquery/query.go b/vendor/github.com/PuerkitoBio/goquery/query.go index 1a7f8732e9..fe86bf0bf0 100644 --- a/vendor/github.com/PuerkitoBio/goquery/query.go +++ b/vendor/github.com/PuerkitoBio/goquery/query.go @@ -5,11 +5,7 @@ import "golang.org/x/net/html" // Is checks the current matched set of elements against a selector and // returns true if at least one of these elements matches. func (s *Selection) Is(selector string) bool { - if len(s.Nodes) > 0 { - return s.IsMatcher(compileMatcher(selector)) - } - - return false + return s.IsMatcher(compileMatcher(selector)) } // IsMatcher checks the current matched set of elements against a matcher and diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go index e2169fa30d..6ad51dbc53 100644 --- a/vendor/github.com/PuerkitoBio/goquery/type.go +++ b/vendor/github.com/PuerkitoBio/goquery/type.go @@ -31,6 +31,10 @@ func NewDocumentFromNode(root *html.Node) *Document { // NewDocument is a Document constructor that takes a string URL as argument. // It loads the specified document, parses it, and stores the root Document // node, ready to be manipulated. +// +// Deprecated: Use the net/http standard library package to make the request +// and validate the response before calling goquery.NewDocumentFromReader +// with the response's body. func NewDocument(url string) (*Document, error) { // Load the URL res, e := http.Get(url) @@ -40,10 +44,10 @@ func NewDocument(url string) (*Document, error) { return NewDocumentFromResponse(res) } -// NewDocumentFromReader returns a Document from a generic reader. +// NewDocumentFromReader returns a Document from an io.Reader. // It returns an error as second value if the reader's data cannot be parsed -// as html. It does *not* check if the reader is also an io.Closer, so the -// provided reader is never closed by this call, it is the responsibility +// as html. It does not check if the reader is also an io.Closer, the +// provided reader is never closed by this call. It is the responsibility // of the caller to close it if required. func NewDocumentFromReader(r io.Reader) (*Document, error) { root, e := html.Parse(r) @@ -56,6 +60,8 @@ func NewDocumentFromReader(r io.Reader) (*Document, error) { // NewDocumentFromResponse is another Document constructor that takes an http response as argument. // It loads the specified response's document, parses it, and stores the root Document // node, ready to be manipulated. The response's body is closed on return. +// +// Deprecated: Use goquery.NewDocumentFromReader with the response's body. func NewDocumentFromResponse(res *http.Response) (*Document, error) { if res == nil { return nil, errors.New("Response is nil") diff --git a/vendor/github.com/andybalholm/cascadia/go.mod b/vendor/github.com/andybalholm/cascadia/go.mod new file mode 100644 index 0000000000..e6febbbfed --- /dev/null +++ b/vendor/github.com/andybalholm/cascadia/go.mod @@ -0,0 +1,3 @@ +module "github.com/andybalholm/cascadia" + +require "golang.org/x/net" v0.0.0-20180218175443-cbe0f9307d01 diff --git a/vendor/modules.txt b/vendor/modules.txt index 8c4607181c..ade08e5b45 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -30,11 +30,11 @@ gitea.com/macaron/session/redis gitea.com/macaron/toolbox # github.com/BurntSushi/toml v0.3.1 github.com/BurntSushi/toml -# github.com/PuerkitoBio/goquery v0.0.0-20170324135448-ed7d758e9a34 +# github.com/PuerkitoBio/goquery v1.5.0 github.com/PuerkitoBio/goquery # github.com/RoaringBitmap/roaring v0.4.7 github.com/RoaringBitmap/roaring -# github.com/andybalholm/cascadia v0.0.0-20161224141413-349dd0209470 +# github.com/andybalholm/cascadia v1.0.0 github.com/andybalholm/cascadia # github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 github.com/anmitsu/go-shlex @@ -400,7 +400,7 @@ golang.org/x/text/internal/language/compact golang.org/x/text/internal/utf8internal golang.org/x/text/runes golang.org/x/text/internal/tag -# google.golang.org/appengine v1.6.1 +# google.golang.org/appengine v1.6.2 google.golang.org/appengine/cloudsql google.golang.org/appengine/urlfetch google.golang.org/appengine/internal -- cgit v1.2.3