diff options
author | Unknwon <joe2010xtmf@163.com> | 2014-09-07 19:58:01 -0400 |
---|---|---|
committer | Unknwon <joe2010xtmf@163.com> | 2014-09-07 19:58:01 -0400 |
commit | 25d6ae69d1cb392922b9d9dc0da1c17aef9a9db2 (patch) | |
tree | 4bc49d5520c52bf1ea6367cc0b80bf0065b920bc /modules/mahonia/mbcs.go | |
parent | f8977f4847b8df9feec1bb5913f75401d79db876 (diff) | |
download | gitea-25d6ae69d1cb392922b9d9dc0da1c17aef9a9db2.tar.gz gitea-25d6ae69d1cb392922b9d9dc0da1c17aef9a9db2.zip |
Remove hg dep
Diffstat (limited to 'modules/mahonia/mbcs.go')
-rw-r--r-- | modules/mahonia/mbcs.go | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/modules/mahonia/mbcs.go b/modules/mahonia/mbcs.go new file mode 100644 index 0000000000..306dd5de1c --- /dev/null +++ b/modules/mahonia/mbcs.go @@ -0,0 +1,92 @@ +package mahonia + +// Generic converters for multibyte character sets. + +// An mbcsTrie contains the data to convert from the character set to Unicode. +// If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune +// children either is nil or has 256 elements. +type mbcsTrie struct { + // For leaf nodes, the Unicode character that is represented. + char rune + + // For non-leaf nodes, the trie to decode the remainder of the character. + children []mbcsTrie +} + +// A MBCSTable holds the data to convert to and from Unicode. +type MBCSTable struct { + toUnicode mbcsTrie + fromUnicode map[rune]string +} + +// AddCharacter adds a character to the table. rune is its Unicode code point, +// and bytes contains the bytes used to encode it in the character set. +func (table *MBCSTable) AddCharacter(c rune, bytes string) { + if table.fromUnicode == nil { + table.fromUnicode = make(map[rune]string) + } + + table.fromUnicode[c] = bytes + + trie := &table.toUnicode + for i := 0; i < len(bytes); i++ { + if trie.children == nil { + trie.children = make([]mbcsTrie, 256) + } + + b := bytes[i] + trie = &trie.children[b] + } + + trie.char = c +} + +func (table *MBCSTable) Decoder() Decoder { + return func(p []byte) (c rune, size int, status Status) { + if len(p) == 0 { + status = NO_ROOM + return + } + + if p[0] == 0 { + return 0, 1, SUCCESS + } + + trie := &table.toUnicode + for trie.char == 0 { + if trie.children == nil { + return 0xfffd, 1, INVALID_CHAR + } + if len(p) < size+1 { + return 0, 0, NO_ROOM + } + + trie = &trie.children[p[size]] + size++ + } + + c = trie.char + status = SUCCESS + return + } +} + +func (table *MBCSTable) Encoder() Encoder { + return func(p []byte, c rune) (size int, status Status) { + bytes := table.fromUnicode[c] + if bytes == "" { + if len(p) > 0 { + p[0] = '?' + return 1, INVALID_CHAR + } else { + return 0, NO_ROOM + } + } + + if len(p) < len(bytes) { + return 0, NO_ROOM + } + + return copy(p, bytes), SUCCESS + } +} |