aboutsummaryrefslogtreecommitdiffstats
path: root/modules/markup
diff options
context:
space:
mode:
authormrsdizzie <info@mrsdizzie.com>2020-04-28 14:05:39 -0400
committerGitHub <noreply@github.com>2020-04-28 15:05:39 -0300
commit4563eb873d3d46953aeb43a1cebf3389906b0b88 (patch)
tree3d11783240910c1d515d39c644b69c5477c812d5 /modules/markup
parent922a239079aefb78a05f1234f8883d8096f46c62 (diff)
downloadgitea-4563eb873d3d46953aeb43a1cebf3389906b0b88.tar.gz
gitea-4563eb873d3d46953aeb43a1cebf3389906b0b88.zip
Support unicode emojis and remove emojify.js (#11032)
* Support unicode emojis and remove emojify.js This PR replaces all use of emojify.js and adds unicode emoji support to various areas of gitea. This works in a few ways: First it adds emoji parsing support into gitea itself. This allows us to * Render emojis from valid alias (:smile:) * Detect unicode emojis and let us put them in their own class with proper aria-labels and styling * Easily allow for custom "emoji" * Support all emoji rendering and features without javascript * Uses plain unicode and lets the system render in appropriate emoji font * Doesn't leave us relying on external sources for updates/fixes/features That same list of emoji is also used to create a json file which replaces the part of emojify.js that populates the emoji search tribute. This file is about 35KB with GZIP turned on and I've set it to load after the page renders to not hinder page load time (and this removes loading emojify.js also) For custom "emoji" it uses a pretty simple scheme of just looking for /emojis/img/name.png where name is something a user has put in the "allowed reactions" setting we already have. The gitea reaction that was previously hard coded into a forked copy of emojify.js is included and works as a custom reaction under this method. The emoji data sourced here is from https://github.com/github/gemoji which is the gem library Github uses for their emoji rendering (and a data source for other sites). So we should be able to easily render any emoji and :alias: that Github can, removing any errors from migrated content. They also update it as well, so we can sync when there are new unicode emoji lists released. I've included a slimmed down and slightly modified forked copy of https://github.com/knq/emoji to make up our own emoji module. The code is pretty straight forward and again allows us to have a lot of flexibility in what happens. I had seen a few comments about performance in some of the other threads if we render this ourselves, but there doesn't seem to be any issue here. In a test it can parse, convert, and render 1,000 emojis inside of a large markdown table in about 100ms on my laptop (which is many more emojis than will ever be in any normal issue). This also prevents any flickering and other weirdness from using javascript to render some things while using go for others. Not included here are image fall back URLS. I don't really think they are necessary for anything new being written in 2020. However, managing the emoji ourselves would allow us to add these as a feature later on if it seems necessary. Fixes: https://github.com/go-gitea/gitea/issues/9182 Fixes: https://github.com/go-gitea/gitea/issues/8974 Fixes: https://github.com/go-gitea/gitea/issues/8953 Fixes: https://github.com/go-gitea/gitea/issues/6628 Fixes: https://github.com/go-gitea/gitea/issues/5130 * add new shared function emojiHTML * don't increase emoji size in issue title * Update templates/repo/issue/view_content/add_reaction.tmpl Co-Authored-By: 6543 <6543@obermui.de> * Support for emoji rendering in various templates * Render code and review comments as they should be * Better way to handle mail subjects * insert unicode from tribute selection * Add template helper for plain text when needed * Use existing replace function I forgot about * Don't include emoji greater than Unicode Version 12 Only include emoji and aliases in JSON * Update build/generate-emoji.go * Tweak regex slightly to really match everything including random invisible characters. Run tests for every emoji we have * final updates * code review * code review * hard code gitea custom emoji to match previous behavior * Update .eslintrc Co-Authored-By: silverwind <me@silverwind.io> * disable preempt Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: Lauris BH <lauris@nix.lv> Co-authored-by: guillep2k <18600385+guillep2k@users.noreply.github.com>
Diffstat (limited to 'modules/markup')
-rw-r--r--modules/markup/html.go123
-rw-r--r--modules/markup/html_test.go45
-rw-r--r--modules/markup/sanitizer.go4
3 files changed, 172 insertions, 0 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go
index 294b870d8c..c5bb4d847b 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -6,6 +6,7 @@ package markup
import (
"bytes"
+ "fmt"
"net/url"
"path"
"path/filepath"
@@ -13,6 +14,7 @@ import (
"strings"
"code.gitea.io/gitea/modules/base"
+ "code.gitea.io/gitea/modules/emoji"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup/common"
@@ -60,6 +62,13 @@ var (
// blackfriday extensions create IDs like fn:user-content-footnote
blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
+
+ // EmojiShortCodeRegex find emoji by alias like :smile:
+ EmojiShortCodeRegex = regexp.MustCompile(`\:[\w\+\-]+\:{1}`)
+
+ // find emoji literal: search all emoji hex range as many times as they appear as
+ // some emojis (skin color etc..) are just two or more chained together
+ emojiRegex = regexp.MustCompile(`[\x{1F000}-\x{1FFFF}|\x{2000}-\x{32ff}|\x{fe4e5}-\x{fe4ee}|\x{200D}|\x{FE0F}|\x{e0000}-\x{e007f}]+`)
)
// CSS class for action keywords (e.g. "closes: #1")
@@ -154,6 +163,8 @@ var defaultProcessors = []processor{
issueIndexPatternProcessor,
sha1CurrentPatternProcessor,
emailAddressProcessor,
+ emojiProcessor,
+ emojiShortCodeProcessor,
}
type postProcessCtx struct {
@@ -194,6 +205,8 @@ var commitMessageProcessors = []processor{
issueIndexPatternProcessor,
sha1CurrentPatternProcessor,
emailAddressProcessor,
+ emojiProcessor,
+ emojiShortCodeProcessor,
}
// RenderCommitMessage will use the same logic as PostProcess, but will disable
@@ -226,6 +239,13 @@ var commitMessageSubjectProcessors = []processor{
mentionProcessor,
issueIndexPatternProcessor,
sha1CurrentPatternProcessor,
+ emojiShortCodeProcessor,
+ emojiProcessor,
+}
+
+var emojiProcessors = []processor{
+ emojiShortCodeProcessor,
+ emojiProcessor,
}
// RenderCommitMessageSubject will use the same logic as PostProcess and
@@ -269,6 +289,17 @@ func RenderDescriptionHTML(
return ctx.postProcess(rawHTML)
}
+// RenderEmoji for when we want to just process emoji and shortcodes
+// in various places it isn't already run through the normal markdown procesor
+func RenderEmoji(
+ rawHTML []byte,
+) ([]byte, error) {
+ ctx := &postProcessCtx{
+ procs: emojiProcessors,
+ }
+ return ctx.postProcess(rawHTML)
+}
+
var byteBodyTag = []byte("<body>")
var byteBodyTagClosing = []byte("</body>")
@@ -319,7 +350,12 @@ func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
if attr.Key == "id" && !(strings.HasPrefix(attr.Val, "user-content-") || blackfridayExtRegex.MatchString(attr.Val)) {
node.Attr[idx].Val = "user-content-" + attr.Val
}
+
+ if attr.Key == "class" && attr.Val == "emoji" {
+ visitText = false
+ }
}
+
// We ignore code, pre and already generated links.
switch node.Type {
case html.TextNode:
@@ -406,6 +442,54 @@ func createKeyword(content string) *html.Node {
return span
}
+func createEmoji(content, class, name string) *html.Node {
+ span := &html.Node{
+ Type: html.ElementNode,
+ Data: atom.Span.String(),
+ Attr: []html.Attribute{},
+ }
+ if class != "" {
+ span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class})
+ }
+ if name != "" {
+ span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name})
+ }
+
+ text := &html.Node{
+ Type: html.TextNode,
+ Data: content,
+ }
+
+ span.AppendChild(text)
+ return span
+}
+
+func createCustomEmoji(alias, class string) *html.Node {
+
+ span := &html.Node{
+ Type: html.ElementNode,
+ Data: atom.Span.String(),
+ Attr: []html.Attribute{},
+ }
+ if class != "" {
+ span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class})
+ span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias})
+ }
+
+ img := &html.Node{
+ Type: html.ElementNode,
+ DataAtom: atom.Img,
+ Data: "img",
+ Attr: []html.Attribute{},
+ }
+ if class != "" {
+ img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: fmt.Sprintf(`%s/img/emoji/%s.png`, setting.StaticURLPrefix, alias)})
+ }
+
+ span.AppendChild(img)
+ return span
+}
+
func createLink(href, content, class string) *html.Node {
a := &html.Node{
Type: html.ElementNode,
@@ -810,6 +894,45 @@ func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) {
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
}
+// emojiShortCodeProcessor for rendering text like :smile: into emoji
+func emojiShortCodeProcessor(ctx *postProcessCtx, node *html.Node) {
+
+ m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data)
+ if m == nil {
+ return
+ }
+
+ alias := node.Data[m[0]:m[1]]
+ alias = strings.Replace(alias, ":", "", -1)
+ converted := emoji.FromAlias(alias)
+ if converted == nil {
+ // check if this is a custom reaction
+ s := strings.Join(setting.UI.Reactions, " ") + "gitea"
+ if strings.Contains(s, alias) {
+ replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
+ return
+ }
+ return
+ }
+
+ replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
+}
+
+// emoji processor to match emoji and add emoji class
+func emojiProcessor(ctx *postProcessCtx, node *html.Node) {
+ m := emojiRegex.FindStringSubmatchIndex(node.Data)
+
+ if m == nil {
+ return
+ }
+
+ codepoint := node.Data[m[0]:m[1]]
+ val := emoji.FromCode(codepoint)
+ if val != nil {
+ replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
+ }
+}
+
// sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
// are assumed to be in the same repository.
func sha1CurrentPatternProcessor(ctx *postProcessCtx, node *html.Node) {
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index 44f5926ac7..65d2d327d6 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -8,6 +8,7 @@ import (
"strings"
"testing"
+ "code.gitea.io/gitea/modules/emoji"
. "code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/markup/markdown"
"code.gitea.io/gitea/modules/setting"
@@ -228,6 +229,50 @@ func TestRender_email(t *testing.T) {
`<p>email@domain..com</p>`)
}
+func TestRender_emoji(t *testing.T) {
+ setting.AppURL = AppURL
+ setting.AppSubURL = AppSubURL
+ setting.StaticURLPrefix = AppURL
+
+ test := func(input, expected string) {
+ expected = strings.Replace(expected, "&", "&amp;", -1)
+ buffer := RenderString("a.md", input, setting.AppSubURL, nil)
+ assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer))
+ }
+
+ // Make sure we can successfully match every emoji in our dataset with regex
+ for i := range emoji.GemojiData {
+ test(
+ emoji.GemojiData[i].Emoji,
+ `<p><span class="emoji" aria-label="`+emoji.GemojiData[i].Description+`">`+emoji.GemojiData[i].Emoji+`</span></p>`)
+ }
+ for i := range emoji.GemojiData {
+ test(
+ ":"+emoji.GemojiData[i].Aliases[0]+":",
+ `<p><span class="emoji" aria-label="`+emoji.GemojiData[i].Description+`">`+emoji.GemojiData[i].Emoji+`</span></p>`)
+ }
+
+ //Text that should be turned into or recognized as emoji
+ test(
+ ":gitea:",
+ `<p><span class="emoji" aria-label="gitea"><img src="`+setting.StaticURLPrefix+`/img/emoji/gitea.png"/></span></p>`)
+
+ test(
+ "Some text with 😄 in the middle",
+ `<p>Some text with <span class="emoji" aria-label="grinning face with smiling eyes">😄</span> in the middle</p>`)
+ test(
+ "Some text with :smile: in the middle",
+ `<p>Some text with <span class="emoji" aria-label="grinning face with smiling eyes">😄</span> in the middle</p>`)
+
+ // should match nothing
+ test(
+ "2001:0db8:85a3:0000:0000:8a2e:0370:7334",
+ `<p>2001:0db8:85a3:0000:0000:8a2e:0370:7334</p>`)
+ test(
+ ":not exist:",
+ `<p>:not exist:</p>`)
+}
+
func TestRender_ShortLinks(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go
index ddb5584e80..faf4163109 100644
--- a/modules/markup/sanitizer.go
+++ b/modules/markup/sanitizer.go
@@ -63,6 +63,10 @@ func ReplaceSanitizer() {
// Allow unlabelled labels
sanitizer.policy.AllowNoAttrs().OnElements("label")
+ // Allow classes for emojis
+ sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("span")
+ sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
+
// Allow generally safe attributes
generalSafeAttrs := []string{"abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt",