You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

htmlstream.go 5.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. // Copyright 2022 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package charset
  4. import (
  5. "fmt"
  6. "io"
  7. "golang.org/x/net/html"
  8. )
  9. // HTMLStreamer represents a SAX-like interface for HTML
  10. type HTMLStreamer interface {
  11. Error(err error) error
  12. Doctype(data string) error
  13. Comment(data string) error
  14. StartTag(data string, attrs ...html.Attribute) error
  15. SelfClosingTag(data string, attrs ...html.Attribute) error
  16. EndTag(data string) error
  17. Text(data string) error
  18. }
  19. // PassthroughHTMLStreamer is a passthrough streamer
  20. type PassthroughHTMLStreamer struct {
  21. next HTMLStreamer
  22. }
  23. func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer {
  24. return &PassthroughHTMLStreamer{next: next}
  25. }
  26. var _ (HTMLStreamer) = &PassthroughHTMLStreamer{}
  27. // Error tells the next streamer in line that there is an error
  28. func (p *PassthroughHTMLStreamer) Error(err error) error {
  29. return p.next.Error(err)
  30. }
  31. // Doctype tells the next streamer what the doctype is
  32. func (p *PassthroughHTMLStreamer) Doctype(data string) error {
  33. return p.next.Doctype(data)
  34. }
  35. // Comment tells the next streamer there is a comment
  36. func (p *PassthroughHTMLStreamer) Comment(data string) error {
  37. return p.next.Comment(data)
  38. }
  39. // StartTag tells the next streamer there is a starting tag
  40. func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error {
  41. return p.next.StartTag(data, attrs...)
  42. }
  43. // SelfClosingTag tells the next streamer there is a self-closing tag
  44. func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error {
  45. return p.next.SelfClosingTag(data, attrs...)
  46. }
  47. // EndTag tells the next streamer there is a end tag
  48. func (p *PassthroughHTMLStreamer) EndTag(data string) error {
  49. return p.next.EndTag(data)
  50. }
  51. // Text tells the next streamer there is a text
  52. func (p *PassthroughHTMLStreamer) Text(data string) error {
  53. return p.next.Text(data)
  54. }
  55. // HTMLStreamWriter acts as a writing sink
  56. type HTMLStreamerWriter struct {
  57. io.Writer
  58. err error
  59. }
  60. // Write implements io.Writer
  61. func (h *HTMLStreamerWriter) Write(data []byte) (int, error) {
  62. if h.err != nil {
  63. return 0, h.err
  64. }
  65. return h.Writer.Write(data)
  66. }
  67. // Write implements io.StringWriter
  68. func (h *HTMLStreamerWriter) WriteString(data string) (int, error) {
  69. if h.err != nil {
  70. return 0, h.err
  71. }
  72. return h.Writer.Write([]byte(data))
  73. }
  74. // Error tells the next streamer in line that there is an error
  75. func (h *HTMLStreamerWriter) Error(err error) error {
  76. if h.err == nil {
  77. h.err = err
  78. }
  79. return h.err
  80. }
  81. // Doctype tells the next streamer what the doctype is
  82. func (h *HTMLStreamerWriter) Doctype(data string) error {
  83. _, h.err = h.WriteString("<!DOCTYPE " + data + ">")
  84. return h.err
  85. }
  86. // Comment tells the next streamer there is a comment
  87. func (h *HTMLStreamerWriter) Comment(data string) error {
  88. _, h.err = h.WriteString("<!--" + data + "-->")
  89. return h.err
  90. }
  91. // StartTag tells the next streamer there is a starting tag
  92. func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error {
  93. return h.startTag(data, attrs, false)
  94. }
  95. // SelfClosingTag tells the next streamer there is a self-closing tag
  96. func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error {
  97. return h.startTag(data, attrs, true)
  98. }
  99. func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error {
  100. if _, h.err = h.WriteString("<" + data); h.err != nil {
  101. return h.err
  102. }
  103. for _, attr := range attrs {
  104. if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil {
  105. return h.err
  106. }
  107. }
  108. if selfclosing {
  109. if _, h.err = h.WriteString("/>"); h.err != nil {
  110. return h.err
  111. }
  112. } else {
  113. if _, h.err = h.WriteString(">"); h.err != nil {
  114. return h.err
  115. }
  116. }
  117. return h.err
  118. }
  119. // EndTag tells the next streamer there is a end tag
  120. func (h *HTMLStreamerWriter) EndTag(data string) error {
  121. _, h.err = h.WriteString("</" + data + ">")
  122. return h.err
  123. }
  124. // Text tells the next streamer there is a text
  125. func (h *HTMLStreamerWriter) Text(data string) error {
  126. _, h.err = h.WriteString(html.EscapeString(data))
  127. return h.err
  128. }
  129. // StreamHTML streams an html to a provided streamer
  130. func StreamHTML(source io.Reader, streamer HTMLStreamer) error {
  131. tokenizer := html.NewTokenizer(source)
  132. for {
  133. tt := tokenizer.Next()
  134. switch tt {
  135. case html.ErrorToken:
  136. if tokenizer.Err() != io.EOF {
  137. return tokenizer.Err()
  138. }
  139. return nil
  140. case html.DoctypeToken:
  141. token := tokenizer.Token()
  142. if err := streamer.Doctype(token.Data); err != nil {
  143. return err
  144. }
  145. case html.CommentToken:
  146. token := tokenizer.Token()
  147. if err := streamer.Comment(token.Data); err != nil {
  148. return err
  149. }
  150. case html.StartTagToken:
  151. token := tokenizer.Token()
  152. if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
  153. return err
  154. }
  155. case html.SelfClosingTagToken:
  156. token := tokenizer.Token()
  157. if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
  158. return err
  159. }
  160. case html.EndTagToken:
  161. token := tokenizer.Token()
  162. if err := streamer.EndTag(token.Data); err != nil {
  163. return err
  164. }
  165. case html.TextToken:
  166. token := tokenizer.Token()
  167. if err := streamer.Text(token.Data); err != nil {
  168. return err
  169. }
  170. default:
  171. return fmt.Errorf("unknown type of token: %d", tt)
  172. }
  173. }
  174. }