summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/gogs/chardet/utf8.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/gogs/chardet/utf8.go')
-rw-r--r--vendor/github.com/gogs/chardet/utf8.go71
1 files changed, 71 insertions, 0 deletions
diff --git a/vendor/github.com/gogs/chardet/utf8.go b/vendor/github.com/gogs/chardet/utf8.go
new file mode 100644
index 0000000000..ae036ad9bc
--- /dev/null
+++ b/vendor/github.com/gogs/chardet/utf8.go
@@ -0,0 +1,71 @@
+package chardet
+
+import (
+ "bytes"
+)
+
+var utf8Bom = []byte{0xEF, 0xBB, 0xBF}
+
+type recognizerUtf8 struct {
+}
+
+func newRecognizer_utf8() *recognizerUtf8 {
+ return &recognizerUtf8{}
+}
+
+func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) {
+ output = recognizerOutput{
+ Charset: "UTF-8",
+ }
+ hasBom := bytes.HasPrefix(input.raw, utf8Bom)
+ inputLen := len(input.raw)
+ var numValid, numInvalid uint32
+ var trailBytes uint8
+ for i := 0; i < inputLen; i++ {
+ c := input.raw[i]
+ if c&0x80 == 0 {
+ continue
+ }
+ if c&0xE0 == 0xC0 {
+ trailBytes = 1
+ } else if c&0xF0 == 0xE0 {
+ trailBytes = 2
+ } else if c&0xF8 == 0xF0 {
+ trailBytes = 3
+ } else {
+ numInvalid++
+ if numInvalid > 5 {
+ break
+ }
+ trailBytes = 0
+ }
+
+ for i++; i < inputLen; i++ {
+ c = input.raw[i]
+ if c&0xC0 != 0x80 {
+ numInvalid++
+ break
+ }
+ if trailBytes--; trailBytes == 0 {
+ numValid++
+ break
+ }
+ }
+ }
+
+ if hasBom && numInvalid == 0 {
+ output.Confidence = 100
+ } else if hasBom && numValid > numInvalid*10 {
+ output.Confidence = 80
+ } else if numValid > 3 && numInvalid == 0 {
+ output.Confidence = 100
+ } else if numValid > 0 && numInvalid == 0 {
+ output.Confidence = 80
+ } else if numValid == 0 && numInvalid == 0 {
+ // Plain ASCII
+ output.Confidence = 10
+ } else if numValid > numInvalid*10 {
+ output.Confidence = 25
+ }
+ return
+}