1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
package mahonia
// This file is based on bufio.Reader in the Go standard library,
// which has the following copyright notice:
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
import (
"io"
"unicode/utf8"
)
const (
defaultBufSize = 4096
)
// Reader implements character-set decoding for an io.Reader object.
type Reader struct {
buf []byte
rd io.Reader
decode Decoder
r, w int
err error
}
// NewReader creates a new Reader that uses the receiver to decode text.
func (d Decoder) NewReader(rd io.Reader) *Reader {
b := new(Reader)
b.buf = make([]byte, defaultBufSize)
b.rd = rd
b.decode = d
return b
}
// fill reads a new chunk into the buffer.
func (b *Reader) fill() {
// Slide existing data to beginning.
if b.r > 0 {
copy(b.buf, b.buf[b.r:b.w])
b.w -= b.r
b.r = 0
}
// Read new data.
n, e := b.rd.Read(b.buf[b.w:])
b.w += n
if e != nil {
b.err = e
}
}
// Read reads data into p.
// It returns the number of bytes read into p.
// It calls Read at most once on the underlying Reader,
// hence n may be less than len(p).
// At EOF, the count will be zero and err will be os.EOF.
func (b *Reader) Read(p []byte) (n int, err error) {
n = len(p)
filled := false
if n == 0 {
return 0, b.err
}
if b.w == b.r {
if b.err != nil {
return 0, b.err
}
if n > len(b.buf) {
// Large read, empty buffer.
// Allocate a larger buffer for efficiency.
b.buf = make([]byte, n)
}
b.fill()
filled = true
if b.w == b.r {
return 0, b.err
}
}
i := 0
for i < n {
rune, size, status := b.decode(b.buf[b.r:b.w])
if status == STATE_ONLY {
b.r += size
continue
}
if status == NO_ROOM {
if b.err != nil {
rune = 0xfffd
size = b.w - b.r
if size == 0 {
break
}
status = INVALID_CHAR
} else if filled {
break
} else {
b.fill()
filled = true
continue
}
}
if i+utf8.RuneLen(rune) > n {
break
}
b.r += size
if rune < 128 {
p[i] = byte(rune)
i++
} else {
i += utf8.EncodeRune(p[i:], rune)
}
}
return i, nil
}
// ReadRune reads a single Unicode character and returns the
// rune and its size in bytes.
func (b *Reader) ReadRune() (c rune, size int, err error) {
read:
c, size, status := b.decode(b.buf[b.r:b.w])
if status == NO_ROOM && b.err == nil {
b.fill()
goto read
}
if status == STATE_ONLY {
b.r += size
goto read
}
if b.r == b.w {
return 0, 0, b.err
}
if status == NO_ROOM {
c = 0xfffd
size = b.w - b.r
status = INVALID_CHAR
}
b.r += size
return c, size, nil
}
|