You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

type.go 4.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. package goquery
  2. import (
  3. "errors"
  4. "io"
  5. "net/http"
  6. "net/url"
  7. "github.com/andybalholm/cascadia"
  8. "golang.org/x/net/html"
  9. )
  10. // Document represents an HTML document to be manipulated. Unlike jQuery, which
  11. // is loaded as part of a DOM document, and thus acts upon its containing
  12. // document, GoQuery doesn't know which HTML document to act upon. So it needs
  13. // to be told, and that's what the Document class is for. It holds the root
  14. // document node to manipulate, and can make selections on this document.
  15. type Document struct {
  16. *Selection
  17. Url *url.URL
  18. rootNode *html.Node
  19. }
  20. // NewDocumentFromNode is a Document constructor that takes a root html Node
  21. // as argument.
  22. func NewDocumentFromNode(root *html.Node) *Document {
  23. return newDocument(root, nil)
  24. }
  25. // NewDocument is a Document constructor that takes a string URL as argument.
  26. // It loads the specified document, parses it, and stores the root Document
  27. // node, ready to be manipulated.
  28. //
  29. // Deprecated: Use the net/http standard library package to make the request
  30. // and validate the response before calling goquery.NewDocumentFromReader
  31. // with the response's body.
  32. func NewDocument(url string) (*Document, error) {
  33. // Load the URL
  34. res, e := http.Get(url)
  35. if e != nil {
  36. return nil, e
  37. }
  38. return NewDocumentFromResponse(res)
  39. }
  40. // NewDocumentFromReader returns a Document from an io.Reader.
  41. // It returns an error as second value if the reader's data cannot be parsed
  42. // as html. It does not check if the reader is also an io.Closer, the
  43. // provided reader is never closed by this call. It is the responsibility
  44. // of the caller to close it if required.
  45. func NewDocumentFromReader(r io.Reader) (*Document, error) {
  46. root, e := html.Parse(r)
  47. if e != nil {
  48. return nil, e
  49. }
  50. return newDocument(root, nil), nil
  51. }
  52. // NewDocumentFromResponse is another Document constructor that takes an http response as argument.
  53. // It loads the specified response's document, parses it, and stores the root Document
  54. // node, ready to be manipulated. The response's body is closed on return.
  55. //
  56. // Deprecated: Use goquery.NewDocumentFromReader with the response's body.
  57. func NewDocumentFromResponse(res *http.Response) (*Document, error) {
  58. if res == nil {
  59. return nil, errors.New("Response is nil")
  60. }
  61. defer res.Body.Close()
  62. if res.Request == nil {
  63. return nil, errors.New("Response.Request is nil")
  64. }
  65. // Parse the HTML into nodes
  66. root, e := html.Parse(res.Body)
  67. if e != nil {
  68. return nil, e
  69. }
  70. // Create and fill the document
  71. return newDocument(root, res.Request.URL), nil
  72. }
  73. // CloneDocument creates a deep-clone of a document.
  74. func CloneDocument(doc *Document) *Document {
  75. return newDocument(cloneNode(doc.rootNode), doc.Url)
  76. }
  77. // Private constructor, make sure all fields are correctly filled.
  78. func newDocument(root *html.Node, url *url.URL) *Document {
  79. // Create and fill the document
  80. d := &Document{nil, url, root}
  81. d.Selection = newSingleSelection(root, d)
  82. return d
  83. }
  84. // Selection represents a collection of nodes matching some criteria. The
  85. // initial Selection can be created by using Document.Find, and then
  86. // manipulated using the jQuery-like chainable syntax and methods.
  87. type Selection struct {
  88. Nodes []*html.Node
  89. document *Document
  90. prevSel *Selection
  91. }
  92. // Helper constructor to create an empty selection
  93. func newEmptySelection(doc *Document) *Selection {
  94. return &Selection{nil, doc, nil}
  95. }
  96. // Helper constructor to create a selection of only one node
  97. func newSingleSelection(node *html.Node, doc *Document) *Selection {
  98. return &Selection{[]*html.Node{node}, doc, nil}
  99. }
  100. // Matcher is an interface that defines the methods to match
  101. // HTML nodes against a compiled selector string. Cascadia's
  102. // Selector implements this interface.
  103. type Matcher interface {
  104. Match(*html.Node) bool
  105. MatchAll(*html.Node) []*html.Node
  106. Filter([]*html.Node) []*html.Node
  107. }
  108. // compileMatcher compiles the selector string s and returns
  109. // the corresponding Matcher. If s is an invalid selector string,
  110. // it returns a Matcher that fails all matches.
  111. func compileMatcher(s string) Matcher {
  112. cs, err := cascadia.Compile(s)
  113. if err != nil {
  114. return invalidMatcher{}
  115. }
  116. return cs
  117. }
  118. // invalidMatcher is a Matcher that always fails to match.
  119. type invalidMatcher struct{}
  120. func (invalidMatcher) Match(n *html.Node) bool { return false }
  121. func (invalidMatcher) MatchAll(n *html.Node) []*html.Node { return nil }
  122. func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }