aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/huandu/xstrings/count.go
diff options
context:
space:
mode:
authorJohn Olheiser <42128690+jolheiser@users.noreply.github.com>2020-01-28 07:57:15 -0600
committerzeripath <art27@cantab.net>2020-01-28 13:57:15 +0000
commit28216bde46a8cf415755ca41f3d58843eeb45e7c (patch)
treea07282f9c102d0c4605094eeeb497453aa0dc36f /vendor/github.com/huandu/xstrings/count.go
parent206a031b38a766d0ce89ae94a304f7d418ccdafb (diff)
downloadgitea-28216bde46a8cf415755ca41f3d58843eeb45e7c.tar.gz
gitea-28216bde46a8cf415755ca41f3d58843eeb45e7c.zip
More expansions in template repositories (#10021)
* Super expansion * Explain which features are in 1.11 vs 1.12 * Move imports Signed-off-by: jolheiser <john.olheiser@gmail.com>
Diffstat (limited to 'vendor/github.com/huandu/xstrings/count.go')
-rw-r--r--vendor/github.com/huandu/xstrings/count.go120
1 files changed, 120 insertions, 0 deletions
diff --git a/vendor/github.com/huandu/xstrings/count.go b/vendor/github.com/huandu/xstrings/count.go
new file mode 100644
index 0000000000..f96e38703a
--- /dev/null
+++ b/vendor/github.com/huandu/xstrings/count.go
@@ -0,0 +1,120 @@
+// Copyright 2015 Huan Du. All rights reserved.
+// Licensed under the MIT license that can be found in the LICENSE file.
+
+package xstrings
+
+import (
+ "unicode"
+ "unicode/utf8"
+)
+
+// Len returns str's utf8 rune length.
+func Len(str string) int {
+ return utf8.RuneCountInString(str)
+}
+
+// WordCount returns number of words in a string.
+//
+// Word is defined as a locale dependent string containing alphabetic characters,
+// which may also contain but not start with `'` and `-` characters.
+func WordCount(str string) int {
+ var r rune
+ var size, n int
+
+ inWord := false
+
+ for len(str) > 0 {
+ r, size = utf8.DecodeRuneInString(str)
+
+ switch {
+ case isAlphabet(r):
+ if !inWord {
+ inWord = true
+ n++
+ }
+
+ case inWord && (r == '\'' || r == '-'):
+ // Still in word.
+
+ default:
+ inWord = false
+ }
+
+ str = str[size:]
+ }
+
+ return n
+}
+
+const minCJKCharacter = '\u3400'
+
+// Checks r is a letter but not CJK character.
+func isAlphabet(r rune) bool {
+ if !unicode.IsLetter(r) {
+ return false
+ }
+
+ switch {
+ // Quick check for non-CJK character.
+ case r < minCJKCharacter:
+ return true
+
+ // Common CJK characters.
+ case r >= '\u4E00' && r <= '\u9FCC':
+ return false
+
+ // Rare CJK characters.
+ case r >= '\u3400' && r <= '\u4D85':
+ return false
+
+ // Rare and historic CJK characters.
+ case r >= '\U00020000' && r <= '\U0002B81D':
+ return false
+ }
+
+ return true
+}
+
+// Width returns string width in monotype font.
+// Multi-byte characters are usually twice the width of single byte characters.
+//
+// Algorithm comes from `mb_strwidth` in PHP.
+// http://php.net/manual/en/function.mb-strwidth.php
+func Width(str string) int {
+ var r rune
+ var size, n int
+
+ for len(str) > 0 {
+ r, size = utf8.DecodeRuneInString(str)
+ n += RuneWidth(r)
+ str = str[size:]
+ }
+
+ return n
+}
+
+// RuneWidth returns character width in monotype font.
+// Multi-byte characters are usually twice the width of single byte characters.
+//
+// Algorithm comes from `mb_strwidth` in PHP.
+// http://php.net/manual/en/function.mb-strwidth.php
+func RuneWidth(r rune) int {
+ switch {
+ case r == utf8.RuneError || r < '\x20':
+ return 0
+
+ case '\x20' <= r && r < '\u2000':
+ return 1
+
+ case '\u2000' <= r && r < '\uFF61':
+ return 2
+
+ case '\uFF61' <= r && r < '\uFFA0':
+ return 1
+
+ case '\uFFA0' <= r:
+ return 2
+ }
+
+ return 0
+}