You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

manipulate.go 4.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. // Copyright 2015 Huan Du. All rights reserved.
  2. // Licensed under the MIT license that can be found in the LICENSE file.
  3. package xstrings
  4. import (
  5. "strings"
  6. "unicode/utf8"
  7. )
  8. // Reverse a utf8 encoded string.
  9. func Reverse(str string) string {
  10. var size int
  11. tail := len(str)
  12. buf := make([]byte, tail)
  13. s := buf
  14. for len(str) > 0 {
  15. _, size = utf8.DecodeRuneInString(str)
  16. tail -= size
  17. s = append(s[:tail], []byte(str[:size])...)
  18. str = str[size:]
  19. }
  20. return string(buf)
  21. }
  22. // Slice a string by rune.
  23. //
  24. // Start must satisfy 0 <= start <= rune length.
  25. //
  26. // End can be positive, zero or negative.
  27. // If end >= 0, start and end must satisfy start <= end <= rune length.
  28. // If end < 0, it means slice to the end of string.
  29. //
  30. // Otherwise, Slice will panic as out of range.
  31. func Slice(str string, start, end int) string {
  32. var size, startPos, endPos int
  33. origin := str
  34. if start < 0 || end > len(str) || (end >= 0 && start > end) {
  35. panic("out of range")
  36. }
  37. if end >= 0 {
  38. end -= start
  39. }
  40. for start > 0 && len(str) > 0 {
  41. _, size = utf8.DecodeRuneInString(str)
  42. start--
  43. startPos += size
  44. str = str[size:]
  45. }
  46. if end < 0 {
  47. return origin[startPos:]
  48. }
  49. endPos = startPos
  50. for end > 0 && len(str) > 0 {
  51. _, size = utf8.DecodeRuneInString(str)
  52. end--
  53. endPos += size
  54. str = str[size:]
  55. }
  56. if len(str) == 0 && (start > 0 || end > 0) {
  57. panic("out of range")
  58. }
  59. return origin[startPos:endPos]
  60. }
  61. // Partition splits a string by sep into three parts.
  62. // The return value is a slice of strings with head, match and tail.
  63. //
  64. // If str contains sep, for example "hello" and "l", Partition returns
  65. // "he", "l", "lo"
  66. //
  67. // If str doesn't contain sep, for example "hello" and "x", Partition returns
  68. // "hello", "", ""
  69. func Partition(str, sep string) (head, match, tail string) {
  70. index := strings.Index(str, sep)
  71. if index == -1 {
  72. head = str
  73. return
  74. }
  75. head = str[:index]
  76. match = str[index : index+len(sep)]
  77. tail = str[index+len(sep):]
  78. return
  79. }
  80. // LastPartition splits a string by last instance of sep into three parts.
  81. // The return value is a slice of strings with head, match and tail.
  82. //
  83. // If str contains sep, for example "hello" and "l", LastPartition returns
  84. // "hel", "l", "o"
  85. //
  86. // If str doesn't contain sep, for example "hello" and "x", LastPartition returns
  87. // "", "", "hello"
  88. func LastPartition(str, sep string) (head, match, tail string) {
  89. index := strings.LastIndex(str, sep)
  90. if index == -1 {
  91. tail = str
  92. return
  93. }
  94. head = str[:index]
  95. match = str[index : index+len(sep)]
  96. tail = str[index+len(sep):]
  97. return
  98. }
  99. // Insert src into dst at given rune index.
  100. // Index is counted by runes instead of bytes.
  101. //
  102. // If index is out of range of dst, panic with out of range.
  103. func Insert(dst, src string, index int) string {
  104. return Slice(dst, 0, index) + src + Slice(dst, index, -1)
  105. }
  106. // Scrub scrubs invalid utf8 bytes with repl string.
  107. // Adjacent invalid bytes are replaced only once.
  108. func Scrub(str, repl string) string {
  109. var buf *stringBuilder
  110. var r rune
  111. var size, pos int
  112. var hasError bool
  113. origin := str
  114. for len(str) > 0 {
  115. r, size = utf8.DecodeRuneInString(str)
  116. if r == utf8.RuneError {
  117. if !hasError {
  118. if buf == nil {
  119. buf = &stringBuilder{}
  120. }
  121. buf.WriteString(origin[:pos])
  122. hasError = true
  123. }
  124. } else if hasError {
  125. hasError = false
  126. buf.WriteString(repl)
  127. origin = origin[pos:]
  128. pos = 0
  129. }
  130. pos += size
  131. str = str[size:]
  132. }
  133. if buf != nil {
  134. buf.WriteString(origin)
  135. return buf.String()
  136. }
  137. // No invalid byte.
  138. return origin
  139. }
  140. // WordSplit splits a string into words. Returns a slice of words.
  141. // If there is no word in a string, return nil.
  142. //
  143. // Word is defined as a locale dependent string containing alphabetic characters,
  144. // which may also contain but not start with `'` and `-` characters.
  145. func WordSplit(str string) []string {
  146. var word string
  147. var words []string
  148. var r rune
  149. var size, pos int
  150. inWord := false
  151. for len(str) > 0 {
  152. r, size = utf8.DecodeRuneInString(str)
  153. switch {
  154. case isAlphabet(r):
  155. if !inWord {
  156. inWord = true
  157. word = str
  158. pos = 0
  159. }
  160. case inWord && (r == '\'' || r == '-'):
  161. // Still in word.
  162. default:
  163. if inWord {
  164. inWord = false
  165. words = append(words, word[:pos])
  166. }
  167. }
  168. pos += size
  169. str = str[size:]
  170. }
  171. if inWord {
  172. words = append(words, word[:pos])
  173. }
  174. return words
  175. }