diff options
author | John Olheiser <42128690+jolheiser@users.noreply.github.com> | 2020-01-28 07:57:15 -0600 |
---|---|---|
committer | zeripath <art27@cantab.net> | 2020-01-28 13:57:15 +0000 |
commit | 28216bde46a8cf415755ca41f3d58843eeb45e7c (patch) | |
tree | a07282f9c102d0c4605094eeeb497453aa0dc36f /vendor/github.com/huandu/xstrings/count.go | |
parent | 206a031b38a766d0ce89ae94a304f7d418ccdafb (diff) | |
download | gitea-28216bde46a8cf415755ca41f3d58843eeb45e7c.tar.gz gitea-28216bde46a8cf415755ca41f3d58843eeb45e7c.zip |
More expansions in template repositories (#10021)
* Super expansion
* Explain which features are in 1.11 vs 1.12
* Move imports
Signed-off-by: jolheiser <john.olheiser@gmail.com>
Diffstat (limited to 'vendor/github.com/huandu/xstrings/count.go')
-rw-r--r-- | vendor/github.com/huandu/xstrings/count.go | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/vendor/github.com/huandu/xstrings/count.go b/vendor/github.com/huandu/xstrings/count.go new file mode 100644 index 0000000000..f96e38703a --- /dev/null +++ b/vendor/github.com/huandu/xstrings/count.go @@ -0,0 +1,120 @@ +// Copyright 2015 Huan Du. All rights reserved. +// Licensed under the MIT license that can be found in the LICENSE file. + +package xstrings + +import ( + "unicode" + "unicode/utf8" +) + +// Len returns str's utf8 rune length. +func Len(str string) int { + return utf8.RuneCountInString(str) +} + +// WordCount returns number of words in a string. +// +// Word is defined as a locale dependent string containing alphabetic characters, +// which may also contain but not start with `'` and `-` characters. +func WordCount(str string) int { + var r rune + var size, n int + + inWord := false + + for len(str) > 0 { + r, size = utf8.DecodeRuneInString(str) + + switch { + case isAlphabet(r): + if !inWord { + inWord = true + n++ + } + + case inWord && (r == '\'' || r == '-'): + // Still in word. + + default: + inWord = false + } + + str = str[size:] + } + + return n +} + +const minCJKCharacter = '\u3400' + +// Checks r is a letter but not CJK character. +func isAlphabet(r rune) bool { + if !unicode.IsLetter(r) { + return false + } + + switch { + // Quick check for non-CJK character. + case r < minCJKCharacter: + return true + + // Common CJK characters. + case r >= '\u4E00' && r <= '\u9FCC': + return false + + // Rare CJK characters. + case r >= '\u3400' && r <= '\u4D85': + return false + + // Rare and historic CJK characters. + case r >= '\U00020000' && r <= '\U0002B81D': + return false + } + + return true +} + +// Width returns string width in monotype font. +// Multi-byte characters are usually twice the width of single byte characters. +// +// Algorithm comes from `mb_strwidth` in PHP. +// http://php.net/manual/en/function.mb-strwidth.php +func Width(str string) int { + var r rune + var size, n int + + for len(str) > 0 { + r, size = utf8.DecodeRuneInString(str) + n += RuneWidth(r) + str = str[size:] + } + + return n +} + +// RuneWidth returns character width in monotype font. +// Multi-byte characters are usually twice the width of single byte characters. +// +// Algorithm comes from `mb_strwidth` in PHP. +// http://php.net/manual/en/function.mb-strwidth.php +func RuneWidth(r rune) int { + switch { + case r == utf8.RuneError || r < '\x20': + return 0 + + case '\x20' <= r && r < '\u2000': + return 1 + + case '\u2000' <= r && r < '\uFF61': + return 2 + + case '\uFF61' <= r && r < '\uFFA0': + return 1 + + case '\uFFA0' <= r: + return 2 + } + + return 0 +} |