]> source.dussan.org Git - gitea.git/blob
fcf1ffbc1
[gitea.git] /
1 // Copyright 2022 The Gitea Authors. All rights reserved.
2 // SPDX-License-Identifier: MIT
3
4 package charset
5
6 import (
7         "fmt"
8         "regexp"
9         "sort"
10         "strings"
11         "unicode"
12         "unicode/utf8"
13
14         "code.gitea.io/gitea/modules/translation"
15
16         "golang.org/x/net/html"
17 )
18
19 // VScode defaultWordRegexp
20 var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s\x00-\x1f]+)`)
21
22 func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer {
23         sort.Slice(allowed, func(i, j int) bool {
24                 return allowed[i] < allowed[j]
25         })
26         return &escapeStreamer{
27                 escaped:                 &EscapeStatus{},
28                 PassthroughHTMLStreamer: *NewPassthroughStreamer(next),
29                 locale:                  locale,
30                 ambiguousTables:         AmbiguousTablesForLocale(locale),
31                 allowed:                 allowed,
32         }
33 }
34
35 type escapeStreamer struct {
36         PassthroughHTMLStreamer
37         escaped         *EscapeStatus
38         locale          translation.Locale
39         ambiguousTables []*AmbiguousTable
40         allowed         []rune
41 }
42
43 func (e *escapeStreamer) EscapeStatus() *EscapeStatus {
44         return e.escaped
45 }
46
47 // Text tells the next streamer there is a text
48 func (e *escapeStreamer) Text(data string) error {
49         sb := &strings.Builder{}
50         pos, until, next := 0, 0, 0
51         if len(data) > len(UTF8BOM) && data[:len(UTF8BOM)] == string(UTF8BOM) {
52                 _, _ = sb.WriteString(data[:len(UTF8BOM)])
53                 pos = len(UTF8BOM)
54         }
55         dataBytes := []byte(data)
56         for pos < len(data) {
57                 nextIdxs := defaultWordRegexp.FindStringIndex(data[pos:])
58                 if nextIdxs == nil {
59                         until = len(data)
60                         next = until
61                 } else {
62                         until, next = nextIdxs[0]+pos, nextIdxs[1]+pos
63                 }
64
65                 // from pos until until we know that the runes are not \r\t\n or even ' '
66                 runes := make([]rune, 0, next-until)
67                 positions := make([]int, 0, next-until+1)
68
69                 for pos < until {
70                         r, sz := utf8.DecodeRune(dataBytes[pos:])
71                         positions = positions[:0]
72                         positions = append(positions, pos, pos+sz)
73                         types, confusables, _ := e.runeTypes(r)
74                         if err := e.handleRunes(dataBytes, []rune{r}, positions, types, confusables, sb); err != nil {
75                                 return err
76                         }
77                         pos += sz
78                 }
79
80                 for i := pos; i < next; {
81                         r, sz := utf8.DecodeRune(dataBytes[i:])
82                         runes = append(runes, r)
83                         positions = append(positions, i)
84                         i += sz
85                 }
86                 positions = append(positions, next)
87                 types, confusables, runeCounts := e.runeTypes(runes...)
88                 if runeCounts.needsEscape() {
89                         if err := e.handleRunes(dataBytes, runes, positions, types, confusables, sb); err != nil {
90                                 return err
91                         }
92                 } else {
93                         _, _ = sb.Write(dataBytes[pos:next])
94                 }
95                 pos = next
96         }
97         if sb.Len() > 0 {
98                 if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
99                         return err
100                 }
101         }
102         return nil
103 }
104
105 func (e *escapeStreamer) handleRunes(data []byte, runes []rune, positions []int, types []runeType, confusables []rune, sb *strings.Builder) error {
106         for i, r := range runes {
107                 switch types[i] {
108                 case brokenRuneType:
109                         if sb.Len() > 0 {
110                                 if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
111                                         return err
112                                 }
113                                 sb.Reset()
114                         }
115                         end := positions[i+1]
116                         start := positions[i]
117                         if err := e.brokenRune(data[start:end]); err != nil {
118                                 return err
119                         }
120                 case ambiguousRuneType:
121                         if sb.Len() > 0 {
122                                 if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
123                                         return err
124                                 }
125                                 sb.Reset()
126                         }
127                         if err := e.ambiguousRune(r, confusables[0]); err != nil {
128                                 return err
129                         }
130                         confusables = confusables[1:]
131                 case invisibleRuneType:
132                         if sb.Len() > 0 {
133                                 if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
134                                         return err
135                                 }
136                                 sb.Reset()
137                         }
138                         if err := e.invisibleRune(r); err != nil {
139                                 return err
140                         }
141                 default:
142                         _, _ = sb.WriteRune(r)
143                 }
144         }
145         return nil
146 }
147
148 func (e *escapeStreamer) brokenRune(bs []byte) error {
149         e.escaped.Escaped = true
150         e.escaped.HasBadRunes = true
151
152         if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
153                 Key: "class",
154                 Val: "broken-code-point",
155         }); err != nil {
156                 return err
157         }
158         if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("<%X>", bs)); err != nil {
159                 return err
160         }
161
162         return e.PassthroughHTMLStreamer.EndTag("span")
163 }
164
165 func (e *escapeStreamer) ambiguousRune(r, c rune) error {
166         e.escaped.Escaped = true
167         e.escaped.HasAmbiguous = true
168
169         if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
170                 Key: "class",
171                 Val: "ambiguous-code-point tooltip",
172         }, html.Attribute{
173                 Key: "data-content",
174                 Val: e.locale.Tr("repo.ambiguous_character", r, c),
175         }); err != nil {
176                 return err
177         }
178         if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
179                 Key: "class",
180                 Val: "char",
181         }); err != nil {
182                 return err
183         }
184         if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
185                 return err
186         }
187         if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
188                 return err
189         }
190
191         return e.PassthroughHTMLStreamer.EndTag("span")
192 }
193
194 func (e *escapeStreamer) invisibleRune(r rune) error {
195         e.escaped.Escaped = true
196         e.escaped.HasInvisible = true
197
198         if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
199                 Key: "class",
200                 Val: "escaped-code-point",
201         }, html.Attribute{
202                 Key: "data-escaped",
203                 Val: fmt.Sprintf("[U+%04X]", r),
204         }); err != nil {
205                 return err
206         }
207         if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
208                 Key: "class",
209                 Val: "char",
210         }); err != nil {
211                 return err
212         }
213         if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
214                 return err
215         }
216         if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
217                 return err
218         }
219
220         return e.PassthroughHTMLStreamer.EndTag("span")
221 }
222
223 type runeCountType struct {
224         numBasicRunes                int
225         numNonConfusingNonBasicRunes int
226         numAmbiguousRunes            int
227         numInvisibleRunes            int
228         numBrokenRunes               int
229 }
230
231 func (counts runeCountType) needsEscape() bool {
232         if counts.numBrokenRunes > 0 {
233                 return true
234         }
235         if counts.numBasicRunes == 0 &&
236                 counts.numNonConfusingNonBasicRunes > 0 {
237                 return false
238         }
239         return counts.numAmbiguousRunes > 0 || counts.numInvisibleRunes > 0
240 }
241
242 type runeType int
243
244 const (
245         basicASCIIRuneType runeType = iota // <- This is technically deadcode but its self-documenting so it should stay
246         brokenRuneType
247         nonBasicASCIIRuneType
248         ambiguousRuneType
249         invisibleRuneType
250 )
251
252 func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables []rune, runeCounts runeCountType) {
253         types = make([]runeType, len(runes))
254         for i, r := range runes {
255                 var confusable rune
256                 switch {
257                 case r == utf8.RuneError:
258                         types[i] = brokenRuneType
259                         runeCounts.numBrokenRunes++
260                 case r == ' ' || r == '\t' || r == '\n':
261                         runeCounts.numBasicRunes++
262                 case e.isAllowed(r):
263                         if r > 0x7e || r < 0x20 {
264                                 types[i] = nonBasicASCIIRuneType
265                                 runeCounts.numNonConfusingNonBasicRunes++
266                         } else {
267                                 runeCounts.numBasicRunes++
268                         }
269                 case unicode.Is(InvisibleRanges, r):
270                         types[i] = invisibleRuneType
271                         runeCounts.numInvisibleRunes++
272                 case unicode.IsControl(r):
273                         types[i] = invisibleRuneType
274                         runeCounts.numInvisibleRunes++
275                 case isAmbiguous(r, &confusable, e.ambiguousTables...):
276                         confusables = append(confusables, confusable)
277                         types[i] = ambiguousRuneType
278                         runeCounts.numAmbiguousRunes++
279                 case r > 0x7e || r < 0x20:
280                         types[i] = nonBasicASCIIRuneType
281                         runeCounts.numNonConfusingNonBasicRunes++
282                 default:
283                         runeCounts.numBasicRunes++
284                 }
285         }
286         return types, confusables, runeCounts
287 }
288
289 func (e *escapeStreamer) isAllowed(r rune) bool {
290         i := sort.Search(len(e.allowed), func(i int) bool {
291                 return e.allowed[i] >= r
292         })
293         return i < len(e.allowed) && e.allowed[i] == r
294 }