Quellcode durchsuchen

Update emoji dataset with skin tone variants (#11678)

* Update emoji dataset with skin tone variants

Since the format of emoji that support skin tone modifiers is predictable we can add different variants into our dataset when generating it so that we can match and properly style most skin tone variants of emoji. No real code change here other than what generates the dataset and the data itself.

* use escape unicode sequence in map

Co-authored-by: techknowlogick <techknowlogick@gitea.io>
tags/v1.13.0-rc1
mrsdizzie vor 3 Jahren
Ursprung
Commit
3af51f1ab7
Es ist kein Account mit der E-Mail-Adresse des Committers verbunden
4 geänderte Dateien mit 3155 neuen und 1742 gelöschten Zeilen
  1. 1
    1
      assets/emoji.json
  2. 51
    14
      build/generate-emoji.go
  3. 5
    4
      modules/emoji/emoji.go
  4. 3098
    1723
      modules/emoji/emoji_data.go

+ 1
- 1
assets/emoji.json
Datei-Diff unterdrückt, da er zu groß ist
Datei anzeigen


+ 51
- 14
build/generate-emoji.go Datei anzeigen

@@ -19,6 +19,7 @@ import (
"sort"
"strconv"
"strings"
"unicode/utf8"
)

const (
@@ -39,6 +40,7 @@ type Emoji struct {
Description string `json:"description,omitempty"`
Aliases []string `json:"aliases"`
UnicodeVersion string `json:"unicode_version,omitempty"`
SkinTones bool `json:"skin_tones,omitempty"`
}

// Don't include some fields in JSON
@@ -47,6 +49,7 @@ func (e Emoji) MarshalJSON() ([]byte, error) {
x := emoji(e)
x.UnicodeVersion = ""
x.Description = ""
x.SkinTones = false
return json.Marshal(x)
}

@@ -75,6 +78,7 @@ var replacer = strings.NewReplacer(
", Description:", ", ",
", Aliases:", ", ",
", UnicodeVersion:", ", ",
", SkinTones:", ", ",
)

var emojiRE = regexp.MustCompile(`\{Emoji:"([^"]*)"`)
@@ -102,18 +106,20 @@ func generate() ([]byte, error) {
return nil, err
}

var re = regexp.MustCompile(`keycap|registered|copyright`)
tmp := data[:0]
var skinTones = make(map[string]string)

// filter out emoji that require greater than max unicode version
skinTones["\U0001f3fb"] = "Light Skin Tone"
skinTones["\U0001f3fc"] = "Medium-Light Skin Tone"
skinTones["\U0001f3fd"] = "Medium Skin Tone"
skinTones["\U0001f3fe"] = "Medium-Dark Skin Tone"
skinTones["\U0001f3ff"] = "Dark Skin Tone"

var tmp Gemoji

//filter out emoji that require greater than max unicode version
for i := range data {
val, _ := strconv.ParseFloat(data[i].UnicodeVersion, 64)
if int(val) <= maxUnicodeVersion {
// remove these keycaps for now they really complicate matching since
// they include normal letters in them
if re.MatchString(data[i].Description) {
continue
}
tmp = append(tmp, data[i])
}
}
@@ -123,7 +129,6 @@ func generate() ([]byte, error) {
return data[i].Aliases[0] < data[j].Aliases[0]
})

aliasPairs := make([]string, 0)
aliasMap := make(map[string]int, len(data))

for i, e := range data {
@@ -135,7 +140,6 @@ func generate() ([]byte, error) {
continue
}
aliasMap[a] = i
aliasPairs = append(aliasPairs, ":"+a+":", e.Emoji)
}
}

@@ -149,6 +153,43 @@ func generate() ([]byte, error) {
data[i].Aliases = append(data[i].Aliases, "laugh")
}

// write a JSON file to use with tribute (write before adding skin tones since we can't support them there yet)
file, _ := json.Marshal(data)
_ = ioutil.WriteFile("assets/emoji.json", file, 0644)

// Add skin tones to emoji that support it
var (
s []string
newEmoji string
newDescription string
newData Emoji
)

for i := range data {
if data[i].SkinTones {
for k, v := range skinTones {
s = strings.Split(data[i].Emoji, "")

if utf8.RuneCountInString(data[i].Emoji) == 1 {
s = append(s, k)
} else {
// insert into slice after first element because all emoji that support skin tones
// have that modifer placed at this spot
s = append(s, "")
copy(s[2:], s[1:])
s[1] = k
}

newEmoji = strings.Join(s, "")
newDescription = data[i].Description + ": " + v
newAlias := data[i].Aliases[0] + "_" + strings.ReplaceAll(v, " ", "_")

newData = Emoji{newEmoji, newDescription, []string{newAlias}, "12.0", false}
data = append(data, newData)
}
}
}

// add header
str := replacer.Replace(fmt.Sprintf(hdr, gemojiURL, data))

@@ -162,10 +203,6 @@ func generate() ([]byte, error) {
return "{" + strconv.QuoteToASCII(s)
})

// write a JSON file to use with tribute
file, _ := json.Marshal(data)
_ = ioutil.WriteFile("assets/emoji.json", file, 0644)

// format
return format.Source([]byte(str))
}

+ 5
- 4
modules/emoji/emoji.go Datei anzeigen

@@ -9,7 +9,6 @@ import (
"sort"
"strings"
"sync"
"unicode/utf8"
)

// Gemoji is a set of emoji data.
@@ -21,6 +20,7 @@ type Emoji struct {
Description string
Aliases []string
UnicodeVersion string
SkinTones bool
}

var (
@@ -131,11 +131,12 @@ func ReplaceAliases(s string) string {
func FindEmojiSubmatchIndex(s string) []int {
loadMap()

// if rune and string length are the same then no emoji will be present
// similar performance when there is unicode present but almost 200% faster when not
if utf8.RuneCountInString(s) == len(s) {
//see if there are any emoji in string before looking for position of specific ones
//no performance difference when there is a match but 10x faster when there are not
if s == ReplaceCodes(s) {
return nil
}

for j := range GemojiData {
i := strings.Index(s, GemojiData[j].Emoji)
if i != -1 {

+ 3098
- 1723
modules/emoji/emoji_data.go
Datei-Diff unterdrückt, da er zu groß ist
Datei anzeigen


Laden…
Abbrechen
Speichern