You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

emoji.go 4.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // Copyright 2015 Kenneth Shaw
  3. // Use of this source code is governed by a MIT-style
  4. // license that can be found in the LICENSE file.
  5. package emoji
  6. import (
  7. "io"
  8. "sort"
  9. "strings"
  10. "sync"
  11. )
  12. // Gemoji is a set of emoji data.
  13. type Gemoji []Emoji
  14. // Emoji represents a single emoji and associated data.
  15. type Emoji struct {
  16. Emoji string
  17. Description string
  18. Aliases []string
  19. UnicodeVersion string
  20. SkinTones bool
  21. }
  22. var (
  23. // codeMap provides a map of the emoji unicode code to its emoji data.
  24. codeMap map[string]int
  25. // aliasMap provides a map of the alias to its emoji data.
  26. aliasMap map[string]int
  27. // emptyReplacer is the string replacer for emoji codes.
  28. emptyReplacer *strings.Replacer
  29. // codeReplacer is the string replacer for emoji codes.
  30. codeReplacer *strings.Replacer
  31. // aliasReplacer is the string replacer for emoji aliases.
  32. aliasReplacer *strings.Replacer
  33. once sync.Once
  34. )
  35. func loadMap() {
  36. once.Do(func() {
  37. // initialize
  38. codeMap = make(map[string]int, len(GemojiData))
  39. aliasMap = make(map[string]int, len(GemojiData))
  40. // process emoji codes and aliases
  41. codePairs := make([]string, 0)
  42. emptyPairs := make([]string, 0)
  43. aliasPairs := make([]string, 0)
  44. // sort from largest to small so we match combined emoji first
  45. sort.Slice(GemojiData, func(i, j int) bool {
  46. return len(GemojiData[i].Emoji) > len(GemojiData[j].Emoji)
  47. })
  48. for i, e := range GemojiData {
  49. if e.Emoji == "" || len(e.Aliases) == 0 {
  50. continue
  51. }
  52. // setup codes
  53. codeMap[e.Emoji] = i
  54. codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
  55. emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
  56. // setup aliases
  57. for _, a := range e.Aliases {
  58. if a == "" {
  59. continue
  60. }
  61. aliasMap[a] = i
  62. aliasPairs = append(aliasPairs, ":"+a+":", e.Emoji)
  63. }
  64. }
  65. // create replacers
  66. emptyReplacer = strings.NewReplacer(emptyPairs...)
  67. codeReplacer = strings.NewReplacer(codePairs...)
  68. aliasReplacer = strings.NewReplacer(aliasPairs...)
  69. })
  70. }
  71. // FromCode retrieves the emoji data based on the provided unicode code (ie,
  72. // "\u2618" will return the Gemoji data for "shamrock").
  73. func FromCode(code string) *Emoji {
  74. loadMap()
  75. i, ok := codeMap[code]
  76. if !ok {
  77. return nil
  78. }
  79. return &GemojiData[i]
  80. }
  81. // FromAlias retrieves the emoji data based on the provided alias in the form
  82. // "alias" or ":alias:" (ie, "shamrock" or ":shamrock:" will return the Gemoji
  83. // data for "shamrock").
  84. func FromAlias(alias string) *Emoji {
  85. loadMap()
  86. if strings.HasPrefix(alias, ":") && strings.HasSuffix(alias, ":") {
  87. alias = alias[1 : len(alias)-1]
  88. }
  89. i, ok := aliasMap[alias]
  90. if !ok {
  91. return nil
  92. }
  93. return &GemojiData[i]
  94. }
  95. // ReplaceCodes replaces all emoji codes with the first corresponding emoji
  96. // alias (in the form of ":alias:") (ie, "\u2618" will be converted to
  97. // ":shamrock:").
  98. func ReplaceCodes(s string) string {
  99. loadMap()
  100. return codeReplacer.Replace(s)
  101. }
  102. // ReplaceAliases replaces all aliases of the form ":alias:" with its
  103. // corresponding unicode value.
  104. func ReplaceAliases(s string) string {
  105. loadMap()
  106. return aliasReplacer.Replace(s)
  107. }
  108. type rememberSecondWriteWriter struct {
  109. pos int
  110. idx int
  111. end int
  112. writecount int
  113. }
  114. func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
  115. n.writecount++
  116. if n.writecount == 2 {
  117. n.idx = n.pos
  118. n.end = n.pos + len(p)
  119. n.pos += len(p)
  120. return len(p), io.EOF
  121. }
  122. n.pos += len(p)
  123. return len(p), nil
  124. }
  125. func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
  126. n.writecount++
  127. if n.writecount == 2 {
  128. n.idx = n.pos
  129. n.end = n.pos + len(s)
  130. n.pos += len(s)
  131. return len(s), io.EOF
  132. }
  133. n.pos += len(s)
  134. return len(s), nil
  135. }
  136. // FindEmojiSubmatchIndex returns index pair of longest emoji in a string
  137. func FindEmojiSubmatchIndex(s string) []int {
  138. loadMap()
  139. secondWriteWriter := rememberSecondWriteWriter{}
  140. // A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
  141. // we can be lazy here.
  142. //
  143. // The implementation of strings.Replacer.WriteString is such that the first index of the emoji
  144. // submatch is simply the second thing that is written to WriteString in the writer.
  145. //
  146. // Therefore we can simply take the index of the second write as our first emoji
  147. //
  148. // FIXME: just copy the trie implementation from strings.NewReplacer
  149. _, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
  150. // if we wrote less than twice then we never "replaced"
  151. if secondWriteWriter.writecount < 2 {
  152. return nil
  153. }
  154. return []int{secondWriteWriter.idx, secondWriteWriter.end}
  155. }