// Copyright (c) 2015 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software distributed under the // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. // +build ignore package main import ( "bufio" "bytes" "flag" "fmt" "io" "log" "net/http" "os" "os/exec" "strconv" "strings" "unicode" ) var url = flag.String("url", "http://www.unicode.org/Public/"+unicode.Version+"/ucd/auxiliary/", "URL of Unicode database directory") var verbose = flag.Bool("verbose", false, "write data to stdout as it is parsed") var localFiles = flag.Bool("local", false, "data files have been copied to the current directory; for debugging only") var outputFile = flag.String("output", "", "output file for generated tables; default stdout") var output *bufio.Writer func main() { flag.Parse() setupOutput() graphemeTests := make([]test, 0) graphemeComments := make([]string, 0) graphemeTests, graphemeComments = loadUnicodeData("GraphemeBreakTest.txt", graphemeTests, graphemeComments) wordTests := make([]test, 0) wordComments := make([]string, 0) wordTests, wordComments = loadUnicodeData("WordBreakTest.txt", wordTests, wordComments) sentenceTests := make([]test, 0) sentenceComments := make([]string, 0) sentenceTests, sentenceComments = loadUnicodeData("SentenceBreakTest.txt", sentenceTests, sentenceComments) fmt.Fprintf(output, fileHeader, *url) generateTestTables("Grapheme", graphemeTests, graphemeComments) generateTestTables("Word", wordTests, wordComments) generateTestTables("Sentence", sentenceTests, sentenceComments) flushOutput() } // WordBreakProperty.txt has the form: // 05F0..05F2 ; Hebrew_Letter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD // FB1D ; Hebrew_Letter # Lo HEBREW LETTER YOD WITH HIRIQ func openReader(file string) (input io.ReadCloser) { if *localFiles { f, err := os.Open(file) if err != nil { log.Fatal(err) } input = f } else { path := *url + file resp, err := http.Get(path) if err != nil { log.Fatal(err) } if resp.StatusCode != 200 { log.Fatal("bad GET status for "+file, resp.Status) } input = resp.Body } return } func loadUnicodeData(filename string, tests []test, comments []string) ([]test, []string) { f := openReader(filename) defer f.Close() bufioReader := bufio.NewReader(f) line, err := bufioReader.ReadString('\n') for err == nil { tests, comments = parseLine(line, tests, comments) line, err = bufioReader.ReadString('\n') } // if the err was EOF still need to process last value if err == io.EOF { tests, comments = parseLine(line, tests, comments) } return tests, comments } const comment = "#" const brk = "÷" const nbrk = "×" type test [][]byte func parseLine(line string, tests []test, comments []string) ([]test, []string) { if strings.HasPrefix(line, comment) { return tests, comments } line = strings.TrimSpace(line) if len(line) == 0 { return tests, comments } commentStart := strings.Index(line, comment) comment := strings.TrimSpace(line[commentStart+1:]) if commentStart > 0 { line = line[0:commentStart] } pieces := strings.Split(line, brk) t := make(test, 0) for _, piece := range pieces { piece = strings.TrimSpace(piece) if len(piece) > 0 { codePoints := strings.Split(piece, nbrk) word := "" for _, codePoint := range codePoints { codePoint = strings.TrimSpace(codePoint) r, err := strconv.ParseInt(codePoint, 16, 64) if err != nil { log.Printf("err: %v for '%s'", err, string(r)) return tests, comments } word += string(r) } t = append(t, []byte(word)) } } tests = append(tests, t) comments = append(comments, comment) return tests, comments } func generateTestTables(prefix string, tests []test, comments []string) { fmt.Fprintf(output, testHeader, prefix) for i, t := range tests { fmt.Fprintf(output, "\t\t{\n") fmt.Fprintf(output, "\t\t\tinput: %#v,\n", bytes.Join(t, []byte{})) fmt.Fprintf(output, "\t\t\toutput: %s,\n", generateTest(t)) fmt.Fprintf(output, "\t\t\tcomment: `%s`,\n", comments[i]) fmt.Fprintf(output, "\t\t},\n") } fmt.Fprintf(output, "}\n") } func generateTest(t test) string { rv := "[][]byte{" for _, te := range t { rv += fmt.Sprintf("%#v,", te) } rv += "}" return rv } const fileHeader = `// Generated by running // maketesttables --url=%s // DO NOT EDIT package segment ` const testHeader = `var unicode%sTests = []struct { input []byte output [][]byte comment string }{ ` func setupOutput() { output = bufio.NewWriter(startGofmt()) } // startGofmt connects output to a gofmt process if -output is set. func startGofmt() io.Writer { if *outputFile == "" { return os.Stdout } stdout, err := os.Create(*outputFile) if err != nil { log.Fatal(err) } // Pipe output to gofmt. gofmt := exec.Command("gofmt") fd, err := gofmt.StdinPipe() if err != nil { log.Fatal(err) } gofmt.Stdout = stdout gofmt.Stderr = os.Stderr err = gofmt.Start() if err != nil { log.Fatal(err) } return fd } func flushOutput() { err := output.Flush() if err != nil { log.Fatal(err) } }