diff options
author | Antoine GIRARD <sapk@users.noreply.github.com> | 2019-12-14 01:15:48 +0100 |
---|---|---|
committer | Lauris BH <lauris@nix.lv> | 2019-12-14 02:15:48 +0200 |
commit | 81a52442a129322acfa12bc22a75aa2652484f33 (patch) | |
tree | 668795fc540bbae4ade570efd7e5d5c9ce47906e /vendor/github.com/gogs/chardet/utf8.go | |
parent | 74179d1b5e739b3fa0d0915bb35d6b7596fd13af (diff) | |
download | gitea-81a52442a129322acfa12bc22a75aa2652484f33.tar.gz gitea-81a52442a129322acfa12bc22a75aa2652484f33.zip |
deps: update and fix chardet import (#9351)
Diffstat (limited to 'vendor/github.com/gogs/chardet/utf8.go')
-rw-r--r-- | vendor/github.com/gogs/chardet/utf8.go | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/vendor/github.com/gogs/chardet/utf8.go b/vendor/github.com/gogs/chardet/utf8.go new file mode 100644 index 0000000000..ae036ad9bc --- /dev/null +++ b/vendor/github.com/gogs/chardet/utf8.go @@ -0,0 +1,71 @@ +package chardet + +import ( + "bytes" +) + +var utf8Bom = []byte{0xEF, 0xBB, 0xBF} + +type recognizerUtf8 struct { +} + +func newRecognizer_utf8() *recognizerUtf8 { + return &recognizerUtf8{} +} + +func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: "UTF-8", + } + hasBom := bytes.HasPrefix(input.raw, utf8Bom) + inputLen := len(input.raw) + var numValid, numInvalid uint32 + var trailBytes uint8 + for i := 0; i < inputLen; i++ { + c := input.raw[i] + if c&0x80 == 0 { + continue + } + if c&0xE0 == 0xC0 { + trailBytes = 1 + } else if c&0xF0 == 0xE0 { + trailBytes = 2 + } else if c&0xF8 == 0xF0 { + trailBytes = 3 + } else { + numInvalid++ + if numInvalid > 5 { + break + } + trailBytes = 0 + } + + for i++; i < inputLen; i++ { + c = input.raw[i] + if c&0xC0 != 0x80 { + numInvalid++ + break + } + if trailBytes--; trailBytes == 0 { + numValid++ + break + } + } + } + + if hasBom && numInvalid == 0 { + output.Confidence = 100 + } else if hasBom && numValid > numInvalid*10 { + output.Confidence = 80 + } else if numValid > 3 && numInvalid == 0 { + output.Confidence = 100 + } else if numValid > 0 && numInvalid == 0 { + output.Confidence = 80 + } else if numValid == 0 && numInvalid == 0 { + // Plain ASCII + output.Confidence = 10 + } else if numValid > numInvalid*10 { + output.Confidence = 25 + } + return +} |