123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200 |
- // Copyright 2022 The Gitea Authors. All rights reserved.
- // SPDX-License-Identifier: MIT
-
- package charset
-
- import (
- "fmt"
- "io"
-
- "golang.org/x/net/html"
- )
-
- // HTMLStreamer represents a SAX-like interface for HTML
- type HTMLStreamer interface {
- Error(err error) error
- Doctype(data string) error
- Comment(data string) error
- StartTag(data string, attrs ...html.Attribute) error
- SelfClosingTag(data string, attrs ...html.Attribute) error
- EndTag(data string) error
- Text(data string) error
- }
-
- // PassthroughHTMLStreamer is a passthrough streamer
- type PassthroughHTMLStreamer struct {
- next HTMLStreamer
- }
-
- func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer {
- return &PassthroughHTMLStreamer{next: next}
- }
-
- var _ (HTMLStreamer) = &PassthroughHTMLStreamer{}
-
- // Error tells the next streamer in line that there is an error
- func (p *PassthroughHTMLStreamer) Error(err error) error {
- return p.next.Error(err)
- }
-
- // Doctype tells the next streamer what the doctype is
- func (p *PassthroughHTMLStreamer) Doctype(data string) error {
- return p.next.Doctype(data)
- }
-
- // Comment tells the next streamer there is a comment
- func (p *PassthroughHTMLStreamer) Comment(data string) error {
- return p.next.Comment(data)
- }
-
- // StartTag tells the next streamer there is a starting tag
- func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error {
- return p.next.StartTag(data, attrs...)
- }
-
- // SelfClosingTag tells the next streamer there is a self-closing tag
- func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error {
- return p.next.SelfClosingTag(data, attrs...)
- }
-
- // EndTag tells the next streamer there is a end tag
- func (p *PassthroughHTMLStreamer) EndTag(data string) error {
- return p.next.EndTag(data)
- }
-
- // Text tells the next streamer there is a text
- func (p *PassthroughHTMLStreamer) Text(data string) error {
- return p.next.Text(data)
- }
-
- // HTMLStreamWriter acts as a writing sink
- type HTMLStreamerWriter struct {
- io.Writer
- err error
- }
-
- // Write implements io.Writer
- func (h *HTMLStreamerWriter) Write(data []byte) (int, error) {
- if h.err != nil {
- return 0, h.err
- }
- return h.Writer.Write(data)
- }
-
- // Write implements io.StringWriter
- func (h *HTMLStreamerWriter) WriteString(data string) (int, error) {
- if h.err != nil {
- return 0, h.err
- }
- return h.Writer.Write([]byte(data))
- }
-
- // Error tells the next streamer in line that there is an error
- func (h *HTMLStreamerWriter) Error(err error) error {
- if h.err == nil {
- h.err = err
- }
- return h.err
- }
-
- // Doctype tells the next streamer what the doctype is
- func (h *HTMLStreamerWriter) Doctype(data string) error {
- _, h.err = h.WriteString("<!DOCTYPE " + data + ">")
- return h.err
- }
-
- // Comment tells the next streamer there is a comment
- func (h *HTMLStreamerWriter) Comment(data string) error {
- _, h.err = h.WriteString("<!--" + data + "-->")
- return h.err
- }
-
- // StartTag tells the next streamer there is a starting tag
- func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error {
- return h.startTag(data, attrs, false)
- }
-
- // SelfClosingTag tells the next streamer there is a self-closing tag
- func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error {
- return h.startTag(data, attrs, true)
- }
-
- func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error {
- if _, h.err = h.WriteString("<" + data); h.err != nil {
- return h.err
- }
- for _, attr := range attrs {
- if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil {
- return h.err
- }
- }
- if selfclosing {
- if _, h.err = h.WriteString("/>"); h.err != nil {
- return h.err
- }
- } else {
- if _, h.err = h.WriteString(">"); h.err != nil {
- return h.err
- }
- }
- return h.err
- }
-
- // EndTag tells the next streamer there is a end tag
- func (h *HTMLStreamerWriter) EndTag(data string) error {
- _, h.err = h.WriteString("</" + data + ">")
- return h.err
- }
-
- // Text tells the next streamer there is a text
- func (h *HTMLStreamerWriter) Text(data string) error {
- _, h.err = h.WriteString(html.EscapeString(data))
- return h.err
- }
-
- // StreamHTML streams an html to a provided streamer
- func StreamHTML(source io.Reader, streamer HTMLStreamer) error {
- tokenizer := html.NewTokenizer(source)
- for {
- tt := tokenizer.Next()
- switch tt {
- case html.ErrorToken:
- if tokenizer.Err() != io.EOF {
- return tokenizer.Err()
- }
- return nil
- case html.DoctypeToken:
- token := tokenizer.Token()
- if err := streamer.Doctype(token.Data); err != nil {
- return err
- }
- case html.CommentToken:
- token := tokenizer.Token()
- if err := streamer.Comment(token.Data); err != nil {
- return err
- }
- case html.StartTagToken:
- token := tokenizer.Token()
- if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
- return err
- }
- case html.SelfClosingTagToken:
- token := tokenizer.Token()
- if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
- return err
- }
- case html.EndTagToken:
- token := tokenizer.Token()
- if err := streamer.EndTag(token.Data); err != nil {
- return err
- }
- case html.TextToken:
- token := tokenizer.Token()
- if err := streamer.Text(token.Data); err != nil {
- return err
- }
- default:
- return fmt.Errorf("unknown type of token: %d", tt)
- }
- }
- }
|