123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- // Copyright (c) 2015 Couchbase, Inc.
- // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
- // except in compliance with the License. You may obtain a copy of the License at
- // http://www.apache.org/licenses/LICENSE-2.0
- // Unless required by applicable law or agreed to in writing, software distributed under the
- // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
- // either express or implied. See the License for the specific language governing permissions
- // and limitations under the License.
-
- // +build ignore
-
- package main
-
- import (
- "bufio"
- "bytes"
- "flag"
- "fmt"
- "io"
- "log"
- "net/http"
- "os"
- "os/exec"
- "strconv"
- "strings"
- "unicode"
- )
-
- var url = flag.String("url",
- "http://www.unicode.org/Public/"+unicode.Version+"/ucd/auxiliary/",
- "URL of Unicode database directory")
- var verbose = flag.Bool("verbose",
- false,
- "write data to stdout as it is parsed")
- var localFiles = flag.Bool("local",
- false,
- "data files have been copied to the current directory; for debugging only")
-
- var outputFile = flag.String("output",
- "",
- "output file for generated tables; default stdout")
-
- var output *bufio.Writer
-
- func main() {
- flag.Parse()
- setupOutput()
-
- graphemeTests := make([]test, 0)
- graphemeComments := make([]string, 0)
- graphemeTests, graphemeComments = loadUnicodeData("GraphemeBreakTest.txt", graphemeTests, graphemeComments)
- wordTests := make([]test, 0)
- wordComments := make([]string, 0)
- wordTests, wordComments = loadUnicodeData("WordBreakTest.txt", wordTests, wordComments)
- sentenceTests := make([]test, 0)
- sentenceComments := make([]string, 0)
- sentenceTests, sentenceComments = loadUnicodeData("SentenceBreakTest.txt", sentenceTests, sentenceComments)
-
- fmt.Fprintf(output, fileHeader, *url)
- generateTestTables("Grapheme", graphemeTests, graphemeComments)
- generateTestTables("Word", wordTests, wordComments)
- generateTestTables("Sentence", sentenceTests, sentenceComments)
-
- flushOutput()
- }
-
- // WordBreakProperty.txt has the form:
- // 05F0..05F2 ; Hebrew_Letter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
- // FB1D ; Hebrew_Letter # Lo HEBREW LETTER YOD WITH HIRIQ
- func openReader(file string) (input io.ReadCloser) {
- if *localFiles {
- f, err := os.Open(file)
- if err != nil {
- log.Fatal(err)
- }
- input = f
- } else {
- path := *url + file
- resp, err := http.Get(path)
- if err != nil {
- log.Fatal(err)
- }
- if resp.StatusCode != 200 {
- log.Fatal("bad GET status for "+file, resp.Status)
- }
- input = resp.Body
- }
- return
- }
-
- func loadUnicodeData(filename string, tests []test, comments []string) ([]test, []string) {
- f := openReader(filename)
- defer f.Close()
- bufioReader := bufio.NewReader(f)
- line, err := bufioReader.ReadString('\n')
- for err == nil {
- tests, comments = parseLine(line, tests, comments)
- line, err = bufioReader.ReadString('\n')
- }
- // if the err was EOF still need to process last value
- if err == io.EOF {
- tests, comments = parseLine(line, tests, comments)
- }
- return tests, comments
- }
-
- const comment = "#"
- const brk = "÷"
- const nbrk = "×"
-
- type test [][]byte
-
- func parseLine(line string, tests []test, comments []string) ([]test, []string) {
- if strings.HasPrefix(line, comment) {
- return tests, comments
- }
- line = strings.TrimSpace(line)
- if len(line) == 0 {
- return tests, comments
- }
- commentStart := strings.Index(line, comment)
- comment := strings.TrimSpace(line[commentStart+1:])
- if commentStart > 0 {
- line = line[0:commentStart]
- }
- pieces := strings.Split(line, brk)
- t := make(test, 0)
- for _, piece := range pieces {
- piece = strings.TrimSpace(piece)
- if len(piece) > 0 {
- codePoints := strings.Split(piece, nbrk)
- word := ""
- for _, codePoint := range codePoints {
- codePoint = strings.TrimSpace(codePoint)
- r, err := strconv.ParseInt(codePoint, 16, 64)
- if err != nil {
- log.Printf("err: %v for '%s'", err, string(r))
- return tests, comments
- }
-
- word += string(r)
- }
- t = append(t, []byte(word))
- }
- }
- tests = append(tests, t)
- comments = append(comments, comment)
- return tests, comments
- }
-
- func generateTestTables(prefix string, tests []test, comments []string) {
- fmt.Fprintf(output, testHeader, prefix)
- for i, t := range tests {
- fmt.Fprintf(output, "\t\t{\n")
- fmt.Fprintf(output, "\t\t\tinput: %#v,\n", bytes.Join(t, []byte{}))
- fmt.Fprintf(output, "\t\t\toutput: %s,\n", generateTest(t))
- fmt.Fprintf(output, "\t\t\tcomment: `%s`,\n", comments[i])
- fmt.Fprintf(output, "\t\t},\n")
- }
- fmt.Fprintf(output, "}\n")
- }
-
- func generateTest(t test) string {
- rv := "[][]byte{"
- for _, te := range t {
- rv += fmt.Sprintf("%#v,", te)
- }
- rv += "}"
- return rv
- }
-
- const fileHeader = `// Generated by running
- // maketesttables --url=%s
- // DO NOT EDIT
-
- package segment
- `
-
- const testHeader = `var unicode%sTests = []struct {
- input []byte
- output [][]byte
- comment string
- }{
- `
-
- func setupOutput() {
- output = bufio.NewWriter(startGofmt())
- }
-
- // startGofmt connects output to a gofmt process if -output is set.
- func startGofmt() io.Writer {
- if *outputFile == "" {
- return os.Stdout
- }
- stdout, err := os.Create(*outputFile)
- if err != nil {
- log.Fatal(err)
- }
- // Pipe output to gofmt.
- gofmt := exec.Command("gofmt")
- fd, err := gofmt.StdinPipe()
- if err != nil {
- log.Fatal(err)
- }
- gofmt.Stdout = stdout
- gofmt.Stderr = os.Stderr
- err = gofmt.Start()
- if err != nil {
- log.Fatal(err)
- }
- return fd
- }
-
- func flushOutput() {
- err := output.Flush()
- if err != nil {
- log.Fatal(err)
- }
- }
|