You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

emoji.go 4.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // Copyright 2015 Kenneth Shaw
  3. // SPDX-License-Identifier: MIT
  4. package emoji
  5. import (
  6. "io"
  7. "sort"
  8. "strings"
  9. "sync"
  10. )
  11. // Gemoji is a set of emoji data.
  12. type Gemoji []Emoji
  13. // Emoji represents a single emoji and associated data.
  14. type Emoji struct {
  15. Emoji string
  16. Description string
  17. Aliases []string
  18. UnicodeVersion string
  19. SkinTones bool
  20. }
  21. var (
  22. // codeMap provides a map of the emoji unicode code to its emoji data.
  23. codeMap map[string]int
  24. // aliasMap provides a map of the alias to its emoji data.
  25. aliasMap map[string]int
  26. // emptyReplacer is the string replacer for emoji codes.
  27. emptyReplacer *strings.Replacer
  28. // codeReplacer is the string replacer for emoji codes.
  29. codeReplacer *strings.Replacer
  30. // aliasReplacer is the string replacer for emoji aliases.
  31. aliasReplacer *strings.Replacer
  32. once sync.Once
  33. )
  34. func loadMap() {
  35. once.Do(func() {
  36. // initialize
  37. codeMap = make(map[string]int, len(GemojiData))
  38. aliasMap = make(map[string]int, len(GemojiData))
  39. // process emoji codes and aliases
  40. codePairs := make([]string, 0)
  41. emptyPairs := make([]string, 0)
  42. aliasPairs := make([]string, 0)
  43. // sort from largest to small so we match combined emoji first
  44. sort.Slice(GemojiData, func(i, j int) bool {
  45. return len(GemojiData[i].Emoji) > len(GemojiData[j].Emoji)
  46. })
  47. for i, e := range GemojiData {
  48. if e.Emoji == "" || len(e.Aliases) == 0 {
  49. continue
  50. }
  51. // setup codes
  52. codeMap[e.Emoji] = i
  53. codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
  54. emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
  55. // setup aliases
  56. for _, a := range e.Aliases {
  57. if a == "" {
  58. continue
  59. }
  60. aliasMap[a] = i
  61. aliasPairs = append(aliasPairs, ":"+a+":", e.Emoji)
  62. }
  63. }
  64. // create replacers
  65. emptyReplacer = strings.NewReplacer(emptyPairs...)
  66. codeReplacer = strings.NewReplacer(codePairs...)
  67. aliasReplacer = strings.NewReplacer(aliasPairs...)
  68. })
  69. }
  70. // FromCode retrieves the emoji data based on the provided unicode code (ie,
  71. // "\u2618" will return the Gemoji data for "shamrock").
  72. func FromCode(code string) *Emoji {
  73. loadMap()
  74. i, ok := codeMap[code]
  75. if !ok {
  76. return nil
  77. }
  78. return &GemojiData[i]
  79. }
  80. // FromAlias retrieves the emoji data based on the provided alias in the form
  81. // "alias" or ":alias:" (ie, "shamrock" or ":shamrock:" will return the Gemoji
  82. // data for "shamrock").
  83. func FromAlias(alias string) *Emoji {
  84. loadMap()
  85. if strings.HasPrefix(alias, ":") && strings.HasSuffix(alias, ":") {
  86. alias = alias[1 : len(alias)-1]
  87. }
  88. i, ok := aliasMap[alias]
  89. if !ok {
  90. return nil
  91. }
  92. return &GemojiData[i]
  93. }
  94. // ReplaceCodes replaces all emoji codes with the first corresponding emoji
  95. // alias (in the form of ":alias:") (ie, "\u2618" will be converted to
  96. // ":shamrock:").
  97. func ReplaceCodes(s string) string {
  98. loadMap()
  99. return codeReplacer.Replace(s)
  100. }
  101. // ReplaceAliases replaces all aliases of the form ":alias:" with its
  102. // corresponding unicode value.
  103. func ReplaceAliases(s string) string {
  104. loadMap()
  105. return aliasReplacer.Replace(s)
  106. }
  107. type rememberSecondWriteWriter struct {
  108. pos int
  109. idx int
  110. end int
  111. writecount int
  112. }
  113. func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
  114. n.writecount++
  115. if n.writecount == 2 {
  116. n.idx = n.pos
  117. n.end = n.pos + len(p)
  118. n.pos += len(p)
  119. return len(p), io.EOF
  120. }
  121. n.pos += len(p)
  122. return len(p), nil
  123. }
  124. func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
  125. n.writecount++
  126. if n.writecount == 2 {
  127. n.idx = n.pos
  128. n.end = n.pos + len(s)
  129. n.pos += len(s)
  130. return len(s), io.EOF
  131. }
  132. n.pos += len(s)
  133. return len(s), nil
  134. }
  135. // FindEmojiSubmatchIndex returns index pair of longest emoji in a string
  136. func FindEmojiSubmatchIndex(s string) []int {
  137. loadMap()
  138. secondWriteWriter := rememberSecondWriteWriter{}
  139. // A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
  140. // we can be lazy here.
  141. //
  142. // The implementation of strings.Replacer.WriteString is such that the first index of the emoji
  143. // submatch is simply the second thing that is written to WriteString in the writer.
  144. //
  145. // Therefore we can simply take the index of the second write as our first emoji
  146. //
  147. // FIXME: just copy the trie implementation from strings.NewReplacer
  148. _, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
  149. // if we wrote less than twice then we never "replaced"
  150. if secondWriteWriter.writecount < 2 {
  151. return nil
  152. }
  153. return []int{secondWriteWriter.idx, secondWriteWriter.end}
  154. }