summaryrefslogtreecommitdiffstats
path: root/modules/mahonia/reader.go
blob: 3514b95b8c423a611bae0b14694d4587ccfab0e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package mahonia

// This file is based on bufio.Reader in the Go standard library,
// which has the following copyright notice:

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

import (
	"io"
	"unicode/utf8"
)

const (
	defaultBufSize = 4096
)

// Reader implements character-set decoding for an io.Reader object.
type Reader struct {
	buf    []byte
	rd     io.Reader
	decode Decoder
	r, w   int
	err    error
}

// NewReader creates a new Reader that uses the receiver to decode text.
func (d Decoder) NewReader(rd io.Reader) *Reader {
	b := new(Reader)
	b.buf = make([]byte, defaultBufSize)
	b.rd = rd
	b.decode = d
	return b
}

// fill reads a new chunk into the buffer.
func (b *Reader) fill() {
	// Slide existing data to beginning.
	if b.r > 0 {
		copy(b.buf, b.buf[b.r:b.w])
		b.w -= b.r
		b.r = 0
	}

	// Read new data.
	n, e := b.rd.Read(b.buf[b.w:])
	b.w += n
	if e != nil {
		b.err = e
	}
}

// Read reads data into p.
// It returns the number of bytes read into p.
// It calls Read at most once on the underlying Reader,
// hence n may be less than len(p).
// At EOF, the count will be zero and err will be os.EOF.
func (b *Reader) Read(p []byte) (n int, err error) {
	n = len(p)
	filled := false
	if n == 0 {
		return 0, b.err
	}
	if b.w == b.r {
		if b.err != nil {
			return 0, b.err
		}
		if n > len(b.buf) {
			// Large read, empty buffer.
			// Allocate a larger buffer for efficiency.
			b.buf = make([]byte, n)
		}
		b.fill()
		filled = true
		if b.w == b.r {
			return 0, b.err
		}
	}

	i := 0
	for i < n {
		rune, size, status := b.decode(b.buf[b.r:b.w])

		if status == STATE_ONLY {
			b.r += size
			continue
		}

		if status == NO_ROOM {
			if b.err != nil {
				rune = 0xfffd
				size = b.w - b.r
				if size == 0 {
					break
				}
				status = INVALID_CHAR
			} else if filled {
				break
			} else {
				b.fill()
				filled = true
				continue
			}
		}

		if i+utf8.RuneLen(rune) > n {
			break
		}

		b.r += size
		if rune < 128 {
			p[i] = byte(rune)
			i++
		} else {
			i += utf8.EncodeRune(p[i:], rune)
		}
	}

	return i, nil
}

// ReadRune reads a single Unicode character and returns the
// rune and its size in bytes.
func (b *Reader) ReadRune() (c rune, size int, err error) {
read:
	c, size, status := b.decode(b.buf[b.r:b.w])

	if status == NO_ROOM && b.err == nil {
		b.fill()
		goto read
	}

	if status == STATE_ONLY {
		b.r += size
		goto read
	}

	if b.r == b.w {
		return 0, 0, b.err
	}

	if status == NO_ROOM {
		c = 0xfffd
		size = b.w - b.r
		status = INVALID_CHAR
	}

	b.r += size
	return c, size, nil
}