summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/text/width/gen.go
blob: 092277e1f64b96a3475d9b4036d93ad9b0f00610 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ignore

// This program generates the trie for width operations. The generated table
// includes width category information as well as the normalization mappings.
package main

import (
	"bytes"
	"fmt"
	"io"
	"log"
	"math"
	"unicode/utf8"

	"golang.org/x/text/internal/gen"
	"golang.org/x/text/internal/triegen"
)

// See gen_common.go for flags.

func main() {
	gen.Init()
	genTables()
	genTests()
	gen.Repackage("gen_trieval.go", "trieval.go", "width")
	gen.Repackage("gen_common.go", "common_test.go", "width")
}

func genTables() {
	t := triegen.NewTrie("width")
	// fold and inverse mappings. See mapComment for a description of the format
	// of each entry. Add dummy value to make an index of 0 mean no mapping.
	inverse := [][4]byte{{}}
	mapping := map[[4]byte]int{[4]byte{}: 0}

	getWidthData(func(r rune, tag elem, alt rune) {
		idx := 0
		if alt != 0 {
			var buf [4]byte
			buf[0] = byte(utf8.EncodeRune(buf[1:], alt))
			s := string(r)
			buf[buf[0]] ^= s[len(s)-1]
			var ok bool
			if idx, ok = mapping[buf]; !ok {
				idx = len(mapping)
				if idx > math.MaxUint8 {
					log.Fatalf("Index %d does not fit in a byte.", idx)
				}
				mapping[buf] = idx
				inverse = append(inverse, buf)
			}
		}
		t.Insert(r, uint64(tag|elem(idx)))
	})

	w := &bytes.Buffer{}
	gen.WriteUnicodeVersion(w)

	sz, err := t.Gen(w)
	if err != nil {
		log.Fatal(err)
	}

	sz += writeMappings(w, inverse)

	fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)

	gen.WriteVersionedGoFile(*outputFile, "width", w.Bytes())
}

const inverseDataComment = `
// inverseData contains 4-byte entries of the following format:
//   <length> <modified UTF-8-encoded rune> <0 padding>
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following
// pattern:
//   A -> A  (U+FF21 -> U+0041)
//   B -> B  (U+FF22 -> U+0042)
//   ...
// By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is
//   { 0x01, 0xE0, 0x00, 0x00 }
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
//   E0 ^ A1 = 41.
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
//   E0 ^ A2 = 42.
// Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8.`

func writeMappings(w io.Writer, data [][4]byte) int {
	fmt.Fprintln(w, inverseDataComment)
	fmt.Fprintf(w, "var inverseData = [%d][4]byte{\n", len(data))
	for _, x := range data {
		fmt.Fprintf(w, "{ 0x%02x, 0x%02x, 0x%02x, 0x%02x },\n", x[0], x[1], x[2], x[3])
	}
	fmt.Fprintln(w, "}")
	return len(data) * 4
}

func genTests() {
	w := &bytes.Buffer{}
	fmt.Fprintf(w, "\nvar mapRunes = map[rune]struct{r rune; e elem}{\n")
	getWidthData(func(r rune, tag elem, alt rune) {
		if alt != 0 {
			fmt.Fprintf(w, "\t0x%X: {0x%X, 0x%X},\n", r, alt, tag)
		}
	})
	fmt.Fprintln(w, "}")
	gen.WriteGoFile("runes_test.go", "width", w.Bytes())
}