diff options
author | zeripath <art27@cantab.net> | 2021-03-15 23:20:05 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-16 00:20:05 +0100 |
commit | ed31ddc29a1cae7af193fb0793d129b07da91ce2 (patch) | |
tree | 2a1ce0fd4085d4ded6c913f0e7763a7239fc8ffd /modules/emoji | |
parent | 044cd4d016196e8c7091eee90b7e6f230bba142f (diff) | |
download | gitea-ed31ddc29a1cae7af193fb0793d129b07da91ce2.tar.gz gitea-ed31ddc29a1cae7af193fb0793d129b07da91ce2.zip |
Fix several render issues (#14986)
* Fix an issue with panics related to attributes
* Wrap goldmark render in a recovery function
* Reduce memory use in render emoji
* Use a pipe for rendering goldmark - still needs more work and a limiter
Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Lauris BH <lauris@nix.lv>
Diffstat (limited to 'modules/emoji')
-rw-r--r-- | modules/emoji/emoji.go | 75 | ||||
-rw-r--r-- | modules/emoji/emoji_test.go | 33 |
2 files changed, 81 insertions, 27 deletions
diff --git a/modules/emoji/emoji.go b/modules/emoji/emoji.go index 169ee0a182..01fb764ce3 100644 --- a/modules/emoji/emoji.go +++ b/modules/emoji/emoji.go @@ -30,6 +30,9 @@ var ( // aliasMap provides a map of the alias to its emoji data. aliasMap map[string]int + // emptyReplacer is the string replacer for emoji codes. + emptyReplacer *strings.Replacer + // codeReplacer is the string replacer for emoji codes. codeReplacer *strings.Replacer @@ -49,6 +52,7 @@ func loadMap() { // process emoji codes and aliases codePairs := make([]string, 0) + emptyPairs := make([]string, 0) aliasPairs := make([]string, 0) // sort from largest to small so we match combined emoji first @@ -64,6 +68,7 @@ func loadMap() { // setup codes codeMap[e.Emoji] = i codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":") + emptyPairs = append(emptyPairs, e.Emoji, e.Emoji) // setup aliases for _, a := range e.Aliases { @@ -77,6 +82,7 @@ func loadMap() { } // create replacers + emptyReplacer = strings.NewReplacer(emptyPairs...) codeReplacer = strings.NewReplacer(codePairs...) aliasReplacer = strings.NewReplacer(aliasPairs...) }) @@ -127,38 +133,53 @@ func ReplaceAliases(s string) string { return aliasReplacer.Replace(s) } -// FindEmojiSubmatchIndex returns index pair of longest emoji in a string -func FindEmojiSubmatchIndex(s string) []int { - loadMap() - found := make(map[int]int) - keys := make([]int, 0) +type rememberSecondWriteWriter struct { + pos int + idx int + end int + writecount int +} - //see if there are any emoji in string before looking for position of specific ones - //no performance difference when there is a match but 10x faster when there are not - if s == ReplaceCodes(s) { - return nil +func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) { + n.writecount++ + if n.writecount == 2 { + n.idx = n.pos + n.end = n.pos + len(p) } + n.pos += len(p) + return len(p), nil +} - // get index of first emoji occurrence while also checking for longest combination - for j := range GemojiData { - i := strings.Index(s, GemojiData[j].Emoji) - if i != -1 { - if _, ok := found[i]; !ok { - if len(keys) == 0 || i < keys[0] { - found[i] = j - keys = []int{i} - } - if i == 0 { - break - } - } - } +func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) { + n.writecount++ + if n.writecount == 2 { + n.idx = n.pos + n.end = n.pos + len(s) } + n.pos += len(s) + return len(s), nil +} - if len(keys) > 0 { - index := keys[0] - return []int{index, index + len(GemojiData[found[index]].Emoji)} +// FindEmojiSubmatchIndex returns index pair of longest emoji in a string +func FindEmojiSubmatchIndex(s string) []int { + loadMap() + secondWriteWriter := rememberSecondWriteWriter{} + + // A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but + // we can be lazy here. + // + // The implementation of strings.Replacer.WriteString is such that the first index of the emoji + // submatch is simply the second thing that is written to WriteString in the writer. + // + // Therefore we can simply take the index of the second write as our first emoji + // + // FIXME: just copy the trie implementation from strings.NewReplacer + _, _ = emptyReplacer.WriteString(&secondWriteWriter, s) + + // if we wrote less than twice then we never "replaced" + if secondWriteWriter.writecount < 2 { + return nil } - return nil + return []int{secondWriteWriter.idx, secondWriteWriter.end} } diff --git a/modules/emoji/emoji_test.go b/modules/emoji/emoji_test.go index 3eca3a8d8a..def252896f 100644 --- a/modules/emoji/emoji_test.go +++ b/modules/emoji/emoji_test.go @@ -8,6 +8,8 @@ package emoji import ( "reflect" "testing" + + "github.com/stretchr/testify/assert" ) func TestDumpInfo(t *testing.T) { @@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) { } } } + +func TestFindEmojiSubmatchIndex(t *testing.T) { + type testcase struct { + teststring string + expected []int + } + + testcases := []testcase{ + { + "\U0001f44d", + []int{0, len("\U0001f44d")}, + }, + { + "\U0001f44d +1 \U0001f44d \U0001f37a", + []int{0, 4}, + }, + { + " \U0001f44d", + []int{1, 1 + len("\U0001f44d")}, + }, + { + string([]byte{'\u0001'}) + "\U0001f44d", + []int{1, 1 + len("\U0001f44d")}, + }, + } + + for _, kase := range testcases { + actual := FindEmojiSubmatchIndex(kase.teststring) + assert.Equal(t, kase.expected, actual) + } +} |