diff options
author | Antoine GIRARD <sapk@users.noreply.github.com> | 2018-05-21 14:34:20 +0200 |
---|---|---|
committer | Lauris BH <lauris@nix.lv> | 2018-05-21 15:34:20 +0300 |
commit | 3f3383dc0a0de9d6a0444bba71603e5c5d248f0b (patch) | |
tree | 34f4f2ad9ce686d265c2f81e6a3e1b02b92e8e22 /vendor/golang.org | |
parent | d7fd9bf7bb25e7537aef335a0927c216aed881a3 (diff) | |
download | gitea-3f3383dc0a0de9d6a0444bba71603e5c5d248f0b.tar.gz gitea-3f3383dc0a0de9d6a0444bba71603e5c5d248f0b.zip |
Migrate to dep (#3972)
* Update makefile to use dep
* Migrate to dep
* Fix some deps
* Try to find a better version for golang.org/x/net
* Try to find a better version for golang.org/x/oauth2
Diffstat (limited to 'vendor/golang.org')
51 files changed, 9676 insertions, 2074 deletions
diff --git a/vendor/golang.org/x/crypto/AUTHORS b/vendor/golang.org/x/crypto/AUTHORS new file mode 100644 index 0000000000..2b00ddba0d --- /dev/null +++ b/vendor/golang.org/x/crypto/AUTHORS @@ -0,0 +1,3 @@ +# This source code refers to The Go Authors for copyright purposes. +# The master list of authors is in the main Go distribution, +# visible at https://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/crypto/CONTRIBUTORS b/vendor/golang.org/x/crypto/CONTRIBUTORS new file mode 100644 index 0000000000..1fbd3e976f --- /dev/null +++ b/vendor/golang.org/x/crypto/CONTRIBUTORS @@ -0,0 +1,3 @@ +# This source code was written by the Go contributors. +# The master list of contributors is in the main Go distribution, +# visible at https://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/net/AUTHORS b/vendor/golang.org/x/net/AUTHORS new file mode 100644 index 0000000000..15167cd746 --- /dev/null +++ b/vendor/golang.org/x/net/AUTHORS @@ -0,0 +1,3 @@ +# This source code refers to The Go Authors for copyright purposes. +# The master list of authors is in the main Go distribution, +# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/net/CONTRIBUTORS b/vendor/golang.org/x/net/CONTRIBUTORS new file mode 100644 index 0000000000..1c4577e968 --- /dev/null +++ b/vendor/golang.org/x/net/CONTRIBUTORS @@ -0,0 +1,3 @@ +# This source code was written by the Go contributors. +# The master list of contributors is in the main Go distribution, +# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/oauth2/CONTRIBUTING.md b/vendor/golang.org/x/oauth2/CONTRIBUTING.md deleted file mode 100644 index 46aa2b12dd..0000000000 --- a/vendor/golang.org/x/oauth2/CONTRIBUTING.md +++ /dev/null @@ -1,31 +0,0 @@ -# Contributing to Go - -Go is an open source project. - -It is the work of hundreds of contributors. We appreciate your help! - - -## Filing issues - -When [filing an issue](https://github.com/golang/oauth2/issues), make sure to answer these five questions: - -1. What version of Go are you using (`go version`)? -2. What operating system and processor architecture are you using? -3. What did you do? -4. What did you expect to see? -5. What did you see instead? - -General questions should go to the [golang-nuts mailing list](https://groups.google.com/group/golang-nuts) instead of the issue tracker. -The gophers there will answer or ask you to file an issue if you've tripped over a bug. - -## Contributing code - -Please read the [Contribution Guidelines](https://golang.org/doc/contribute.html) -before sending patches. - -**We do not accept GitHub pull requests** -(we use [Gerrit](https://code.google.com/p/gerrit/) instead for code review). - -Unless otherwise noted, the Go source files are distributed under -the BSD-style license found in the LICENSE file. - diff --git a/vendor/golang.org/x/oauth2/README.md b/vendor/golang.org/x/oauth2/README.md deleted file mode 100644 index 1643c08ef8..0000000000 --- a/vendor/golang.org/x/oauth2/README.md +++ /dev/null @@ -1,65 +0,0 @@ -# OAuth2 for Go - -[![Build Status](https://travis-ci.org/golang/oauth2.svg?branch=master)](https://travis-ci.org/golang/oauth2) -[![GoDoc](https://godoc.org/golang.org/x/oauth2?status.svg)](https://godoc.org/golang.org/x/oauth2) - -oauth2 package contains a client implementation for OAuth 2.0 spec. - -## Installation - -~~~~ -go get golang.org/x/oauth2 -~~~~ - -See godoc for further documentation and examples. - -* [godoc.org/golang.org/x/oauth2](http://godoc.org/golang.org/x/oauth2) -* [godoc.org/golang.org/x/oauth2/google](http://godoc.org/golang.org/x/oauth2/google) - - -## App Engine - -In change 96e89be (March 2015) we removed the `oauth2.Context2` type in favor -of the [`context.Context`](https://golang.org/x/net/context#Context) type from -the `golang.org/x/net/context` package - -This means its no longer possible to use the "Classic App Engine" -`appengine.Context` type with the `oauth2` package. (You're using -Classic App Engine if you import the package `"appengine"`.) - -To work around this, you may use the new `"google.golang.org/appengine"` -package. This package has almost the same API as the `"appengine"` package, -but it can be fetched with `go get` and used on "Managed VMs" and well as -Classic App Engine. - -See the [new `appengine` package's readme](https://github.com/golang/appengine#updating-a-go-app-engine-app) -for information on updating your app. - -If you don't want to update your entire app to use the new App Engine packages, -you may use both sets of packages in parallel, using only the new packages -with the `oauth2` package. - - import ( - "golang.org/x/net/context" - "golang.org/x/oauth2" - "golang.org/x/oauth2/google" - newappengine "google.golang.org/appengine" - newurlfetch "google.golang.org/appengine/urlfetch" - - "appengine" - ) - - func handler(w http.ResponseWriter, r *http.Request) { - var c appengine.Context = appengine.NewContext(r) - c.Infof("Logging a message with the old package") - - var ctx context.Context = newappengine.NewContext(r) - client := &http.Client{ - Transport: &oauth2.Transport{ - Source: google.AppEngineTokenSource(ctx, "scope"), - Base: &newurlfetch.Transport{Context: ctx}, - }, - } - client.Get("...") - } - diff --git a/vendor/golang.org/x/oauth2/internal/token.go b/vendor/golang.org/x/oauth2/internal/token.go index 1c0ec76d04..18328a0dcf 100644 --- a/vendor/golang.org/x/oauth2/internal/token.go +++ b/vendor/golang.org/x/oauth2/internal/token.go @@ -117,8 +117,6 @@ var brokenAuthHeaderProviders = []string{ "https://www.strava.com/oauth/", "https://www.wunderlist.com/oauth/", "https://api.patreon.com/", - "https://sandbox.codeswholesale.com/oauth/token", - "https://api.codeswholesale.com/oauth/token", } func RegisterBrokenAuthHeaderProvider(tokenURL string) { diff --git a/vendor/golang.org/x/sync/AUTHORS b/vendor/golang.org/x/sync/AUTHORS new file mode 100644 index 0000000000..15167cd746 --- /dev/null +++ b/vendor/golang.org/x/sync/AUTHORS @@ -0,0 +1,3 @@ +# This source code refers to The Go Authors for copyright purposes. +# The master list of authors is in the main Go distribution, +# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/sync/CONTRIBUTORS b/vendor/golang.org/x/sync/CONTRIBUTORS new file mode 100644 index 0000000000..1c4577e968 --- /dev/null +++ b/vendor/golang.org/x/sync/CONTRIBUTORS @@ -0,0 +1,3 @@ +# This source code was written by the Go contributors. +# The master list of contributors is in the main Go distribution, +# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/sys/AUTHORS b/vendor/golang.org/x/sys/AUTHORS new file mode 100644 index 0000000000..15167cd746 --- /dev/null +++ b/vendor/golang.org/x/sys/AUTHORS @@ -0,0 +1,3 @@ +# This source code refers to The Go Authors for copyright purposes. +# The master list of authors is in the main Go distribution, +# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/sys/CONTRIBUTORS b/vendor/golang.org/x/sys/CONTRIBUTORS new file mode 100644 index 0000000000..1c4577e968 --- /dev/null +++ b/vendor/golang.org/x/sys/CONTRIBUTORS @@ -0,0 +1,3 @@ +# This source code was written by the Go contributors. +# The master list of contributors is in the main Go distribution, +# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/sys/unix/mkall.sh b/vendor/golang.org/x/sys/unix/mkall.sh deleted file mode 100755 index 3e224c57e2..0000000000 --- a/vendor/golang.org/x/sys/unix/mkall.sh +++ /dev/null @@ -1,285 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# The unix package provides access to the raw system call -# interface of the underlying operating system. Porting Go to -# a new architecture/operating system combination requires -# some manual effort, though there are tools that automate -# much of the process. The auto-generated files have names -# beginning with z. -# -# This script runs or (given -n) prints suggested commands to generate z files -# for the current system. Running those commands is not automatic. -# This script is documentation more than anything else. -# -# * asm_${GOOS}_${GOARCH}.s -# -# This hand-written assembly file implements system call dispatch. -# There are three entry points: -# -# func Syscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr); -# func Syscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr); -# func RawSyscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr); -# -# The first and second are the standard ones; they differ only in -# how many arguments can be passed to the kernel. -# The third is for low-level use by the ForkExec wrapper; -# unlike the first two, it does not call into the scheduler to -# let it know that a system call is running. -# -# * syscall_${GOOS}.go -# -# This hand-written Go file implements system calls that need -# special handling and lists "//sys" comments giving prototypes -# for ones that can be auto-generated. Mksyscall reads those -# comments to generate the stubs. -# -# * syscall_${GOOS}_${GOARCH}.go -# -# Same as syscall_${GOOS}.go except that it contains code specific -# to ${GOOS} on one particular architecture. -# -# * types_${GOOS}.c -# -# This hand-written C file includes standard C headers and then -# creates typedef or enum names beginning with a dollar sign -# (use of $ in variable names is a gcc extension). The hardest -# part about preparing this file is figuring out which headers to -# include and which symbols need to be #defined to get the -# actual data structures that pass through to the kernel system calls. -# Some C libraries present alternate versions for binary compatibility -# and translate them on the way in and out of system calls, but -# there is almost always a #define that can get the real ones. -# See types_darwin.c and types_linux.c for examples. -# -# * zerror_${GOOS}_${GOARCH}.go -# -# This machine-generated file defines the system's error numbers, -# error strings, and signal numbers. The generator is "mkerrors.sh". -# Usually no arguments are needed, but mkerrors.sh will pass its -# arguments on to godefs. -# -# * zsyscall_${GOOS}_${GOARCH}.go -# -# Generated by mksyscall.pl; see syscall_${GOOS}.go above. -# -# * zsysnum_${GOOS}_${GOARCH}.go -# -# Generated by mksysnum_${GOOS}. -# -# * ztypes_${GOOS}_${GOARCH}.go -# -# Generated by godefs; see types_${GOOS}.c above. - -GOOSARCH="${GOOS}_${GOARCH}" - -# defaults -mksyscall="./mksyscall.pl" -mkerrors="./mkerrors.sh" -zerrors="zerrors_$GOOSARCH.go" -mksysctl="" -zsysctl="zsysctl_$GOOSARCH.go" -mksysnum= -mktypes= -run="sh" - -case "$1" in --syscalls) - for i in zsyscall*go - do - sed 1q $i | sed 's;^// ;;' | sh > _$i && gofmt < _$i > $i - rm _$i - done - exit 0 - ;; --n) - run="cat" - shift -esac - -case "$#" in -0) - ;; -*) - echo 'usage: mkall.sh [-n]' 1>&2 - exit 2 -esac - -GOOSARCH_in=syscall_$GOOSARCH.go -case "$GOOSARCH" in -_* | *_ | _) - echo 'undefined $GOOS_$GOARCH:' "$GOOSARCH" 1>&2 - exit 1 - ;; -darwin_386) - mkerrors="$mkerrors -m32" - mksyscall="./mksyscall.pl -l32" - mksysnum="./mksysnum_darwin.pl $(xcrun --show-sdk-path --sdk macosx)/usr/include/sys/syscall.h" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -darwin_amd64) - mkerrors="$mkerrors -m64" - mksysnum="./mksysnum_darwin.pl $(xcrun --show-sdk-path --sdk macosx)/usr/include/sys/syscall.h" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -darwin_arm) - mkerrors="$mkerrors" - mksysnum="./mksysnum_darwin.pl /usr/include/sys/syscall.h" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -darwin_arm64) - mkerrors="$mkerrors -m64" - mksysnum="./mksysnum_darwin.pl $(xcrun --show-sdk-path --sdk iphoneos)/usr/include/sys/syscall.h" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -dragonfly_386) - mkerrors="$mkerrors -m32" - mksyscall="./mksyscall.pl -l32 -dragonfly" - mksysnum="curl -s 'http://gitweb.dragonflybsd.org/dragonfly.git/blob_plain/HEAD:/sys/kern/syscalls.master' | ./mksysnum_dragonfly.pl" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -dragonfly_amd64) - mkerrors="$mkerrors -m64" - mksyscall="./mksyscall.pl -dragonfly" - mksysnum="curl -s 'http://gitweb.dragonflybsd.org/dragonfly.git/blob_plain/HEAD:/sys/kern/syscalls.master' | ./mksysnum_dragonfly.pl" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -freebsd_386) - mkerrors="$mkerrors -m32" - mksyscall="./mksyscall.pl -l32" - mksysnum="curl -s 'http://svn.freebsd.org/base/stable/10/sys/kern/syscalls.master' | ./mksysnum_freebsd.pl" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -freebsd_amd64) - mkerrors="$mkerrors -m64" - mksysnum="curl -s 'http://svn.freebsd.org/base/stable/10/sys/kern/syscalls.master' | ./mksysnum_freebsd.pl" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -freebsd_arm) - mkerrors="$mkerrors" - mksyscall="./mksyscall.pl -l32 -arm" - mksysnum="curl -s 'http://svn.freebsd.org/base/stable/10/sys/kern/syscalls.master' | ./mksysnum_freebsd.pl" - # Let the type of C char be signed for making the bare syscall - # API consistent across over platforms. - mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char" - ;; -linux_386) - mkerrors="$mkerrors -m32" - mksyscall="./mksyscall.pl -l32" - mksysnum="./mksysnum_linux.pl /usr/include/asm/unistd_32.h" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -linux_amd64) - unistd_h=$(ls -1 /usr/include/asm/unistd_64.h /usr/include/x86_64-linux-gnu/asm/unistd_64.h 2>/dev/null | head -1) - if [ "$unistd_h" = "" ]; then - echo >&2 cannot find unistd_64.h - exit 1 - fi - mkerrors="$mkerrors -m64" - mksysnum="./mksysnum_linux.pl $unistd_h" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -linux_arm) - mkerrors="$mkerrors" - mksyscall="./mksyscall.pl -l32 -arm" - mksysnum="curl -s 'http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/arch/arm/include/uapi/asm/unistd.h' | ./mksysnum_linux.pl -" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -linux_arm64) - unistd_h=$(ls -1 /usr/include/asm/unistd.h /usr/include/asm-generic/unistd.h 2>/dev/null | head -1) - if [ "$unistd_h" = "" ]; then - echo >&2 cannot find unistd_64.h - exit 1 - fi - mksysnum="./mksysnum_linux.pl $unistd_h" - # Let the type of C char be signed for making the bare syscall - # API consistent across over platforms. - mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char" - ;; -linux_ppc64) - GOOSARCH_in=syscall_linux_ppc64x.go - unistd_h=/usr/include/asm/unistd.h - mkerrors="$mkerrors -m64" - mksysnum="./mksysnum_linux.pl $unistd_h" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -linux_ppc64le) - GOOSARCH_in=syscall_linux_ppc64x.go - unistd_h=/usr/include/powerpc64le-linux-gnu/asm/unistd.h - mkerrors="$mkerrors -m64" - mksysnum="./mksysnum_linux.pl $unistd_h" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -linux_s390x) - GOOSARCH_in=syscall_linux_s390x.go - unistd_h=/usr/include/asm/unistd.h - mkerrors="$mkerrors -m64" - mksysnum="./mksysnum_linux.pl $unistd_h" - # Let the type of C char be signed to make the bare sys - # API more consistent between platforms. - # This is a deliberate departure from the way the syscall - # package generates its version of the types file. - mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char" - ;; -netbsd_386) - mkerrors="$mkerrors -m32" - mksyscall="./mksyscall.pl -l32 -netbsd" - mksysnum="curl -s 'http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/sys/kern/syscalls.master' | ./mksysnum_netbsd.pl" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -netbsd_amd64) - mkerrors="$mkerrors -m64" - mksyscall="./mksyscall.pl -netbsd" - mksysnum="curl -s 'http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/sys/kern/syscalls.master' | ./mksysnum_netbsd.pl" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -openbsd_386) - mkerrors="$mkerrors -m32" - mksyscall="./mksyscall.pl -l32 -openbsd" - mksysctl="./mksysctl_openbsd.pl" - zsysctl="zsysctl_openbsd.go" - mksysnum="curl -s 'http://cvsweb.openbsd.org/cgi-bin/cvsweb/~checkout~/src/sys/kern/syscalls.master' | ./mksysnum_openbsd.pl" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -openbsd_amd64) - mkerrors="$mkerrors -m64" - mksyscall="./mksyscall.pl -openbsd" - mksysctl="./mksysctl_openbsd.pl" - zsysctl="zsysctl_openbsd.go" - mksysnum="curl -s 'http://cvsweb.openbsd.org/cgi-bin/cvsweb/~checkout~/src/sys/kern/syscalls.master' | ./mksysnum_openbsd.pl" - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -solaris_amd64) - mksyscall="./mksyscall_solaris.pl" - mkerrors="$mkerrors -m64" - mksysnum= - mktypes="GOARCH=$GOARCH go tool cgo -godefs" - ;; -*) - echo 'unrecognized $GOOS_$GOARCH: ' "$GOOSARCH" 1>&2 - exit 1 - ;; -esac - -( - if [ -n "$mkerrors" ]; then echo "$mkerrors |gofmt >$zerrors"; fi - case "$GOOS" in - *) - syscall_goos="syscall_$GOOS.go" - case "$GOOS" in - darwin | dragonfly | freebsd | netbsd | openbsd) - syscall_goos="syscall_bsd.go $syscall_goos" - ;; - esac - if [ -n "$mksyscall" ]; then echo "$mksyscall $syscall_goos $GOOSARCH_in |gofmt >zsyscall_$GOOSARCH.go"; fi - ;; - esac - if [ -n "$mksysctl" ]; then echo "$mksysctl |gofmt >$zsysctl"; fi - if [ -n "$mksysnum" ]; then echo "$mksysnum |gofmt >zsysnum_$GOOSARCH.go"; fi - if [ -n "$mktypes" ]; then - echo "echo // +build $GOARCH,$GOOS > ztypes_$GOOSARCH.go"; - echo "$mktypes types_$GOOS.go | go run mkpost.go >>ztypes_$GOOSARCH.go"; - fi -) | $run diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh deleted file mode 100755 index c40d788c4a..0000000000 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ /dev/null @@ -1,476 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# Generate Go code listing errors and other #defined constant -# values (ENAMETOOLONG etc.), by asking the preprocessor -# about the definitions. - -unset LANG -export LC_ALL=C -export LC_CTYPE=C - -if test -z "$GOARCH" -o -z "$GOOS"; then - echo 1>&2 "GOARCH or GOOS not defined in environment" - exit 1 -fi - -CC=${CC:-cc} - -if [[ "$GOOS" -eq "solaris" ]]; then - # Assumes GNU versions of utilities in PATH. - export PATH=/usr/gnu/bin:$PATH -fi - -uname=$(uname) - -includes_Darwin=' -#define _DARWIN_C_SOURCE -#define KERNEL -#define _DARWIN_USE_64_BIT_INODE -#include <sys/types.h> -#include <sys/event.h> -#include <sys/ptrace.h> -#include <sys/socket.h> -#include <sys/sockio.h> -#include <sys/sysctl.h> -#include <sys/mman.h> -#include <sys/wait.h> -#include <net/bpf.h> -#include <net/if.h> -#include <net/if_types.h> -#include <net/route.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <termios.h> -' - -includes_DragonFly=' -#include <sys/types.h> -#include <sys/event.h> -#include <sys/socket.h> -#include <sys/sockio.h> -#include <sys/sysctl.h> -#include <sys/mman.h> -#include <sys/wait.h> -#include <sys/ioctl.h> -#include <net/bpf.h> -#include <net/if.h> -#include <net/if_types.h> -#include <net/route.h> -#include <netinet/in.h> -#include <termios.h> -#include <netinet/ip.h> -#include <net/ip_mroute/ip_mroute.h> -' - -includes_FreeBSD=' -#include <sys/param.h> -#include <sys/types.h> -#include <sys/event.h> -#include <sys/socket.h> -#include <sys/sockio.h> -#include <sys/sysctl.h> -#include <sys/mman.h> -#include <sys/wait.h> -#include <sys/ioctl.h> -#include <net/bpf.h> -#include <net/if.h> -#include <net/if_types.h> -#include <net/route.h> -#include <netinet/in.h> -#include <termios.h> -#include <netinet/ip.h> -#include <netinet/ip_mroute.h> -#include <sys/extattr.h> - -#if __FreeBSD__ >= 10 -#define IFT_CARP 0xf8 // IFT_CARP is deprecated in FreeBSD 10 -#undef SIOCAIFADDR -#define SIOCAIFADDR _IOW(105, 26, struct oifaliasreq) // ifaliasreq contains if_data -#undef SIOCSIFPHYADDR -#define SIOCSIFPHYADDR _IOW(105, 70, struct oifaliasreq) // ifaliasreq contains if_data -#endif -' - -includes_Linux=' -#define _LARGEFILE_SOURCE -#define _LARGEFILE64_SOURCE -#ifndef __LP64__ -#define _FILE_OFFSET_BITS 64 -#endif -#define _GNU_SOURCE - -#include <bits/sockaddr.h> -#include <sys/epoll.h> -#include <sys/inotify.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/mount.h> -#include <sys/prctl.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/time.h> -#include <sys/socket.h> -#include <linux/if.h> -#include <linux/if_arp.h> -#include <linux/if_ether.h> -#include <linux/if_tun.h> -#include <linux/if_packet.h> -#include <linux/if_addr.h> -#include <linux/filter.h> -#include <linux/netlink.h> -#include <linux/reboot.h> -#include <linux/rtnetlink.h> -#include <linux/ptrace.h> -#include <linux/sched.h> -#include <linux/wait.h> -#include <linux/icmpv6.h> -#include <net/route.h> -#include <asm/termbits.h> - -#ifndef MSG_FASTOPEN -#define MSG_FASTOPEN 0x20000000 -#endif - -#ifndef PTRACE_GETREGS -#define PTRACE_GETREGS 0xc -#endif - -#ifndef PTRACE_SETREGS -#define PTRACE_SETREGS 0xd -#endif -' - -includes_NetBSD=' -#include <sys/types.h> -#include <sys/param.h> -#include <sys/event.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/sockio.h> -#include <sys/sysctl.h> -#include <sys/termios.h> -#include <sys/ttycom.h> -#include <sys/wait.h> -#include <net/bpf.h> -#include <net/if.h> -#include <net/if_types.h> -#include <net/route.h> -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_mroute.h> -#include <netinet/if_ether.h> - -// Needed since <sys/param.h> refers to it... -#define schedppq 1 -' - -includes_OpenBSD=' -#include <sys/types.h> -#include <sys/param.h> -#include <sys/event.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/sockio.h> -#include <sys/sysctl.h> -#include <sys/termios.h> -#include <sys/ttycom.h> -#include <sys/wait.h> -#include <net/bpf.h> -#include <net/if.h> -#include <net/if_types.h> -#include <net/if_var.h> -#include <net/route.h> -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_mroute.h> -#include <netinet/if_ether.h> -#include <net/if_bridge.h> - -// We keep some constants not supported in OpenBSD 5.5 and beyond for -// the promise of compatibility. -#define EMUL_ENABLED 0x1 -#define EMUL_NATIVE 0x2 -#define IPV6_FAITH 0x1d -#define IPV6_OPTIONS 0x1 -#define IPV6_RTHDR_STRICT 0x1 -#define IPV6_SOCKOPT_RESERVED1 0x3 -#define SIOCGIFGENERIC 0xc020693a -#define SIOCSIFGENERIC 0x80206939 -#define WALTSIG 0x4 -' - -includes_SunOS=' -#include <limits.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/sockio.h> -#include <sys/mman.h> -#include <sys/wait.h> -#include <sys/ioctl.h> -#include <net/bpf.h> -#include <net/if.h> -#include <net/if_arp.h> -#include <net/if_types.h> -#include <net/route.h> -#include <netinet/in.h> -#include <termios.h> -#include <netinet/ip.h> -#include <netinet/ip_mroute.h> -' - - -includes=' -#include <sys/types.h> -#include <sys/file.h> -#include <fcntl.h> -#include <dirent.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip6.h> -#include <netinet/tcp.h> -#include <errno.h> -#include <sys/signal.h> -#include <signal.h> -#include <sys/resource.h> -#include <time.h> -' -ccflags="$@" - -# Write go tool cgo -godefs input. -( - echo package unix - echo - echo '/*' - indirect="includes_$(uname)" - echo "${!indirect} $includes" - echo '*/' - echo 'import "C"' - echo 'import "syscall"' - echo - echo 'const (' - - # The gcc command line prints all the #defines - # it encounters while processing the input - echo "${!indirect} $includes" | $CC -x c - -E -dM $ccflags | - awk ' - $1 != "#define" || $2 ~ /\(/ || $3 == "" {next} - - $2 ~ /^E([ABCD]X|[BIS]P|[SD]I|S|FL)$/ {next} # 386 registers - $2 ~ /^(SIGEV_|SIGSTKSZ|SIGRT(MIN|MAX))/ {next} - $2 ~ /^(SCM_SRCRT)$/ {next} - $2 ~ /^(MAP_FAILED)$/ {next} - $2 ~ /^ELF_.*$/ {next}# <asm/elf.h> contains ELF_ARCH, etc. - - $2 ~ /^EXTATTR_NAMESPACE_NAMES/ || - $2 ~ /^EXTATTR_NAMESPACE_[A-Z]+_STRING/ {next} - - $2 !~ /^ETH_/ && - $2 !~ /^EPROC_/ && - $2 !~ /^EQUIV_/ && - $2 !~ /^EXPR_/ && - $2 ~ /^E[A-Z0-9_]+$/ || - $2 ~ /^B[0-9_]+$/ || - $2 == "BOTHER" || - $2 ~ /^CI?BAUD(EX)?$/ || - $2 == "IBSHIFT" || - $2 ~ /^V[A-Z0-9]+$/ || - $2 ~ /^CS[A-Z0-9]/ || - $2 ~ /^I(SIG|CANON|CRNL|UCLC|EXTEN|MAXBEL|STRIP|UTF8)$/ || - $2 ~ /^IGN/ || - $2 ~ /^IX(ON|ANY|OFF)$/ || - $2 ~ /^IN(LCR|PCK)$/ || - $2 ~ /(^FLU?SH)|(FLU?SH$)/ || - $2 ~ /^C(LOCAL|READ|MSPAR|RTSCTS)$/ || - $2 == "BRKINT" || - $2 == "HUPCL" || - $2 == "PENDIN" || - $2 == "TOSTOP" || - $2 == "XCASE" || - $2 == "ALTWERASE" || - $2 == "NOKERNINFO" || - $2 ~ /^PAR/ || - $2 ~ /^SIG[^_]/ || - $2 ~ /^O[CNPFPL][A-Z]+[^_][A-Z]+$/ || - $2 ~ /^(NL|CR|TAB|BS|VT|FF)DLY$/ || - $2 ~ /^(NL|CR|TAB|BS|VT|FF)[0-9]$/ || - $2 ~ /^O?XTABS$/ || - $2 ~ /^TC[IO](ON|OFF)$/ || - $2 ~ /^IN_/ || - $2 ~ /^LOCK_(SH|EX|NB|UN)$/ || - $2 ~ /^(AF|SOCK|SO|SOL|IPPROTO|IP|IPV6|ICMP6|TCP|EVFILT|NOTE|EV|SHUT|PROT|MAP|PACKET|MSG|SCM|MCL|DT|MADV|PR)_/ || - $2 == "ICMPV6_FILTER" || - $2 == "SOMAXCONN" || - $2 == "NAME_MAX" || - $2 == "IFNAMSIZ" || - $2 ~ /^CTL_(MAXNAME|NET|QUERY)$/ || - $2 ~ /^SYSCTL_VERS/ || - $2 ~ /^(MS|MNT)_/ || - $2 ~ /^TUN(SET|GET|ATTACH|DETACH)/ || - $2 ~ /^(O|F|FD|NAME|S|PTRACE|PT)_/ || - $2 ~ /^LINUX_REBOOT_CMD_/ || - $2 ~ /^LINUX_REBOOT_MAGIC[12]$/ || - $2 !~ "NLA_TYPE_MASK" && - $2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P)_/ || - $2 ~ /^SIOC/ || - $2 ~ /^TIOC/ || - $2 ~ /^TCGET/ || - $2 ~ /^TCSET/ || - $2 ~ /^TC(FLSH|SBRKP?|XONC)$/ || - $2 !~ "RTF_BITS" && - $2 ~ /^(IFF|IFT|NET_RT|RTM|RTF|RTV|RTA|RTAX)_/ || - $2 ~ /^BIOC/ || - $2 ~ /^RUSAGE_(SELF|CHILDREN|THREAD)/ || - $2 ~ /^RLIMIT_(AS|CORE|CPU|DATA|FSIZE|NOFILE|STACK)|RLIM_INFINITY/ || - $2 ~ /^PRIO_(PROCESS|PGRP|USER)/ || - $2 ~ /^CLONE_[A-Z_]+/ || - $2 !~ /^(BPF_TIMEVAL)$/ && - $2 ~ /^(BPF|DLT)_/ || - $2 ~ /^CLOCK_/ || - $2 !~ "WMESGLEN" && - $2 ~ /^W[A-Z0-9]+$/ {printf("\t%s = C.%s\n", $2, $2)} - $2 ~ /^__WCOREFLAG$/ {next} - $2 ~ /^__W[A-Z0-9]+$/ {printf("\t%s = C.%s\n", substr($2,3), $2)} - - {next} - ' | sort - - echo ')' -) >_const.go - -# Pull out the error names for later. -errors=$( - echo '#include <errno.h>' | $CC -x c - -E -dM $ccflags | - awk '$1=="#define" && $2 ~ /^E[A-Z0-9_]+$/ { print $2 }' | - sort -) - -# Pull out the signal names for later. -signals=$( - echo '#include <signal.h>' | $CC -x c - -E -dM $ccflags | - awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print $2 }' | - egrep -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT)' | - sort -) - -# Again, writing regexps to a file. -echo '#include <errno.h>' | $CC -x c - -E -dM $ccflags | - awk '$1=="#define" && $2 ~ /^E[A-Z0-9_]+$/ { print "^\t" $2 "[ \t]*=" }' | - sort >_error.grep -echo '#include <signal.h>' | $CC -x c - -E -dM $ccflags | - awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print "^\t" $2 "[ \t]*=" }' | - egrep -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT)' | - sort >_signal.grep - -echo '// mkerrors.sh' "$@" -echo '// MACHINE GENERATED BY THE COMMAND ABOVE; DO NOT EDIT' -echo -echo "// +build ${GOARCH},${GOOS}" -echo -go tool cgo -godefs -- "$@" _const.go >_error.out -cat _error.out | grep -vf _error.grep | grep -vf _signal.grep -echo -echo '// Errors' -echo 'const (' -cat _error.out | grep -f _error.grep | sed 's/=\(.*\)/= syscall.Errno(\1)/' -echo ')' - -echo -echo '// Signals' -echo 'const (' -cat _error.out | grep -f _signal.grep | sed 's/=\(.*\)/= syscall.Signal(\1)/' -echo ')' - -# Run C program to print error and syscall strings. -( - echo -E " -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <ctype.h> -#include <string.h> -#include <signal.h> - -#define nelem(x) (sizeof(x)/sizeof((x)[0])) - -enum { A = 'A', Z = 'Z', a = 'a', z = 'z' }; // avoid need for single quotes below - -int errors[] = { -" - for i in $errors - do - echo -E ' '$i, - done - - echo -E " -}; - -int signals[] = { -" - for i in $signals - do - echo -E ' '$i, - done - - # Use -E because on some systems bash builtin interprets \n itself. - echo -E ' -}; - -static int -intcmp(const void *a, const void *b) -{ - return *(int*)a - *(int*)b; -} - -int -main(void) -{ - int i, j, e; - char buf[1024], *p; - - printf("\n\n// Error table\n"); - printf("var errors = [...]string {\n"); - qsort(errors, nelem(errors), sizeof errors[0], intcmp); - for(i=0; i<nelem(errors); i++) { - e = errors[i]; - if(i > 0 && errors[i-1] == e) - continue; - strcpy(buf, strerror(e)); - // lowercase first letter: Bad -> bad, but STREAM -> STREAM. - if(A <= buf[0] && buf[0] <= Z && a <= buf[1] && buf[1] <= z) - buf[0] += a - A; - printf("\t%d: \"%s\",\n", e, buf); - } - printf("}\n\n"); - - printf("\n\n// Signal table\n"); - printf("var signals = [...]string {\n"); - qsort(signals, nelem(signals), sizeof signals[0], intcmp); - for(i=0; i<nelem(signals); i++) { - e = signals[i]; - if(i > 0 && signals[i-1] == e) - continue; - strcpy(buf, strsignal(e)); - // lowercase first letter: Bad -> bad, but STREAM -> STREAM. - if(A <= buf[0] && buf[0] <= Z && a <= buf[1] && buf[1] <= z) - buf[0] += a - A; - // cut trailing : number. - p = strrchr(buf, ":"[0]); - if(p) - *p = '\0'; - printf("\t%d: \"%s\",\n", e, buf); - } - printf("}\n\n"); - - return 0; -} - -' -) >_errors.c - -$CC $ccflags -o _errors _errors.c && $GORUN ./_errors && rm -f _errors.c _errors _const.go _error.grep _signal.grep _error.out diff --git a/vendor/golang.org/x/sys/unix/mksyscall.pl b/vendor/golang.org/x/sys/unix/mksyscall.pl deleted file mode 100755 index b1e7766dae..0000000000 --- a/vendor/golang.org/x/sys/unix/mksyscall.pl +++ /dev/null @@ -1,323 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# This program reads a file containing function prototypes -# (like syscall_darwin.go) and generates system call bodies. -# The prototypes are marked by lines beginning with "//sys" -# and read like func declarations if //sys is replaced by func, but: -# * The parameter lists must give a name for each argument. -# This includes return parameters. -# * The parameter lists must give a type for each argument: -# the (x, y, z int) shorthand is not allowed. -# * If the return parameter is an error number, it must be named errno. - -# A line beginning with //sysnb is like //sys, except that the -# goroutine will not be suspended during the execution of the system -# call. This must only be used for system calls which can never -# block, as otherwise the system call could cause all goroutines to -# hang. - -use strict; - -my $cmdline = "mksyscall.pl " . join(' ', @ARGV); -my $errors = 0; -my $_32bit = ""; -my $plan9 = 0; -my $openbsd = 0; -my $netbsd = 0; -my $dragonfly = 0; -my $arm = 0; # 64-bit value should use (even, odd)-pair - -if($ARGV[0] eq "-b32") { - $_32bit = "big-endian"; - shift; -} elsif($ARGV[0] eq "-l32") { - $_32bit = "little-endian"; - shift; -} -if($ARGV[0] eq "-plan9") { - $plan9 = 1; - shift; -} -if($ARGV[0] eq "-openbsd") { - $openbsd = 1; - shift; -} -if($ARGV[0] eq "-netbsd") { - $netbsd = 1; - shift; -} -if($ARGV[0] eq "-dragonfly") { - $dragonfly = 1; - shift; -} -if($ARGV[0] eq "-arm") { - $arm = 1; - shift; -} - -if($ARGV[0] =~ /^-/) { - print STDERR "usage: mksyscall.pl [-b32 | -l32] [file ...]\n"; - exit 1; -} - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -sub parseparamlist($) { - my ($list) = @_; - $list =~ s/^\s*//; - $list =~ s/\s*$//; - if($list eq "") { - return (); - } - return split(/\s*,\s*/, $list); -} - -sub parseparam($) { - my ($p) = @_; - if($p !~ /^(\S*) (\S*)$/) { - print STDERR "$ARGV:$.: malformed parameter: $p\n"; - $errors = 1; - return ("xx", "int"); - } - return ($1, $2); -} - -my $text = ""; -while(<>) { - chomp; - s/\s+/ /g; - s/^\s+//; - s/\s+$//; - my $nonblock = /^\/\/sysnb /; - next if !/^\/\/sys / && !$nonblock; - - # Line must be of the form - # func Open(path string, mode int, perm int) (fd int, errno error) - # Split into name, in params, out params. - if(!/^\/\/sys(nb)? (\w+)\(([^()]*)\)\s*(?:\(([^()]+)\))?\s*(?:=\s*((?i)SYS_[A-Z0-9_]+))?$/) { - print STDERR "$ARGV:$.: malformed //sys declaration\n"; - $errors = 1; - next; - } - my ($func, $in, $out, $sysname) = ($2, $3, $4, $5); - - # Split argument lists on comma. - my @in = parseparamlist($in); - my @out = parseparamlist($out); - - # Try in vain to keep people from editing this file. - # The theory is that they jump into the middle of the file - # without reading the header. - $text .= "// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT\n\n"; - - # Go function header. - my $out_decl = @out ? sprintf(" (%s)", join(', ', @out)) : ""; - $text .= sprintf "func %s(%s)%s {\n", $func, join(', ', @in), $out_decl; - - # Check if err return available - my $errvar = ""; - foreach my $p (@out) { - my ($name, $type) = parseparam($p); - if($type eq "error") { - $errvar = $name; - last; - } - } - - # Prepare arguments to Syscall. - my @args = (); - my @uses = (); - my $n = 0; - foreach my $p (@in) { - my ($name, $type) = parseparam($p); - if($type =~ /^\*/) { - push @args, "uintptr(unsafe.Pointer($name))"; - } elsif($type eq "string" && $errvar ne "") { - $text .= "\tvar _p$n *byte\n"; - $text .= "\t_p$n, $errvar = BytePtrFromString($name)\n"; - $text .= "\tif $errvar != nil {\n\t\treturn\n\t}\n"; - push @args, "uintptr(unsafe.Pointer(_p$n))"; - push @uses, "use(unsafe.Pointer(_p$n))"; - $n++; - } elsif($type eq "string") { - print STDERR "$ARGV:$.: $func uses string arguments, but has no error return\n"; - $text .= "\tvar _p$n *byte\n"; - $text .= "\t_p$n, _ = BytePtrFromString($name)\n"; - push @args, "uintptr(unsafe.Pointer(_p$n))"; - push @uses, "use(unsafe.Pointer(_p$n))"; - $n++; - } elsif($type =~ /^\[\](.*)/) { - # Convert slice into pointer, length. - # Have to be careful not to take address of &a[0] if len == 0: - # pass dummy pointer in that case. - # Used to pass nil, but some OSes or simulators reject write(fd, nil, 0). - $text .= "\tvar _p$n unsafe.Pointer\n"; - $text .= "\tif len($name) > 0 {\n\t\t_p$n = unsafe.Pointer(\&${name}[0])\n\t}"; - $text .= " else {\n\t\t_p$n = unsafe.Pointer(&_zero)\n\t}"; - $text .= "\n"; - push @args, "uintptr(_p$n)", "uintptr(len($name))"; - $n++; - } elsif($type eq "int64" && ($openbsd || $netbsd)) { - push @args, "0"; - if($_32bit eq "big-endian") { - push @args, "uintptr($name>>32)", "uintptr($name)"; - } elsif($_32bit eq "little-endian") { - push @args, "uintptr($name)", "uintptr($name>>32)"; - } else { - push @args, "uintptr($name)"; - } - } elsif($type eq "int64" && $dragonfly) { - if ($func !~ /^extp(read|write)/i) { - push @args, "0"; - } - if($_32bit eq "big-endian") { - push @args, "uintptr($name>>32)", "uintptr($name)"; - } elsif($_32bit eq "little-endian") { - push @args, "uintptr($name)", "uintptr($name>>32)"; - } else { - push @args, "uintptr($name)"; - } - } elsif($type eq "int64" && $_32bit ne "") { - if(@args % 2 && $arm) { - # arm abi specifies 64-bit argument uses - # (even, odd) pair - push @args, "0" - } - if($_32bit eq "big-endian") { - push @args, "uintptr($name>>32)", "uintptr($name)"; - } else { - push @args, "uintptr($name)", "uintptr($name>>32)"; - } - } else { - push @args, "uintptr($name)"; - } - } - - # Determine which form to use; pad args with zeros. - my $asm = "Syscall"; - if ($nonblock) { - $asm = "RawSyscall"; - } - if(@args <= 3) { - while(@args < 3) { - push @args, "0"; - } - } elsif(@args <= 6) { - $asm .= "6"; - while(@args < 6) { - push @args, "0"; - } - } elsif(@args <= 9) { - $asm .= "9"; - while(@args < 9) { - push @args, "0"; - } - } else { - print STDERR "$ARGV:$.: too many arguments to system call\n"; - } - - # System call number. - if($sysname eq "") { - $sysname = "SYS_$func"; - $sysname =~ s/([a-z])([A-Z])/${1}_$2/g; # turn FooBar into Foo_Bar - $sysname =~ y/a-z/A-Z/; - } - - # Actual call. - my $args = join(', ', @args); - my $call = "$asm($sysname, $args)"; - - # Assign return values. - my $body = ""; - my @ret = ("_", "_", "_"); - my $do_errno = 0; - for(my $i=0; $i<@out; $i++) { - my $p = $out[$i]; - my ($name, $type) = parseparam($p); - my $reg = ""; - if($name eq "err" && !$plan9) { - $reg = "e1"; - $ret[2] = $reg; - $do_errno = 1; - } elsif($name eq "err" && $plan9) { - $ret[0] = "r0"; - $ret[2] = "e1"; - next; - } else { - $reg = sprintf("r%d", $i); - $ret[$i] = $reg; - } - if($type eq "bool") { - $reg = "$reg != 0"; - } - if($type eq "int64" && $_32bit ne "") { - # 64-bit number in r1:r0 or r0:r1. - if($i+2 > @out) { - print STDERR "$ARGV:$.: not enough registers for int64 return\n"; - } - if($_32bit eq "big-endian") { - $reg = sprintf("int64(r%d)<<32 | int64(r%d)", $i, $i+1); - } else { - $reg = sprintf("int64(r%d)<<32 | int64(r%d)", $i+1, $i); - } - $ret[$i] = sprintf("r%d", $i); - $ret[$i+1] = sprintf("r%d", $i+1); - } - if($reg ne "e1" || $plan9) { - $body .= "\t$name = $type($reg)\n"; - } - } - if ($ret[0] eq "_" && $ret[1] eq "_" && $ret[2] eq "_") { - $text .= "\t$call\n"; - } else { - $text .= "\t$ret[0], $ret[1], $ret[2] := $call\n"; - } - foreach my $use (@uses) { - $text .= "\t$use\n"; - } - $text .= $body; - - if ($plan9 && $ret[2] eq "e1") { - $text .= "\tif int32(r0) == -1 {\n"; - $text .= "\t\terr = e1\n"; - $text .= "\t}\n"; - } elsif ($do_errno) { - $text .= "\tif e1 != 0 {\n"; - $text .= "\t\terr = errnoErr(e1)\n"; - $text .= "\t}\n"; - } - $text .= "\treturn\n"; - $text .= "}\n\n"; -} - -chomp $text; -chomp $text; - -if($errors) { - exit 1; -} - -print <<EOF; -// $cmdline -// MACHINE GENERATED BY THE COMMAND ABOVE; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package unix - -import ( - "syscall" - "unsafe" -) - -var _ syscall.Errno - -$text -EOF -exit 0; diff --git a/vendor/golang.org/x/sys/unix/mksyscall_solaris.pl b/vendor/golang.org/x/sys/unix/mksyscall_solaris.pl deleted file mode 100755 index 06bade7687..0000000000 --- a/vendor/golang.org/x/sys/unix/mksyscall_solaris.pl +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# This program reads a file containing function prototypes -# (like syscall_solaris.go) and generates system call bodies. -# The prototypes are marked by lines beginning with "//sys" -# and read like func declarations if //sys is replaced by func, but: -# * The parameter lists must give a name for each argument. -# This includes return parameters. -# * The parameter lists must give a type for each argument: -# the (x, y, z int) shorthand is not allowed. -# * If the return parameter is an error number, it must be named err. -# * If go func name needs to be different than its libc name, -# * or the function is not in libc, name could be specified -# * at the end, after "=" sign, like -# //sys getsockopt(s int, level int, name int, val uintptr, vallen *_Socklen) (err error) = libsocket.getsockopt - -use strict; - -my $cmdline = "mksyscall_solaris.pl " . join(' ', @ARGV); -my $errors = 0; -my $_32bit = ""; - -binmode STDOUT; - -if($ARGV[0] eq "-b32") { - $_32bit = "big-endian"; - shift; -} elsif($ARGV[0] eq "-l32") { - $_32bit = "little-endian"; - shift; -} - -if($ARGV[0] =~ /^-/) { - print STDERR "usage: mksyscall_solaris.pl [-b32 | -l32] [file ...]\n"; - exit 1; -} - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -sub parseparamlist($) { - my ($list) = @_; - $list =~ s/^\s*//; - $list =~ s/\s*$//; - if($list eq "") { - return (); - } - return split(/\s*,\s*/, $list); -} - -sub parseparam($) { - my ($p) = @_; - if($p !~ /^(\S*) (\S*)$/) { - print STDERR "$ARGV:$.: malformed parameter: $p\n"; - $errors = 1; - return ("xx", "int"); - } - return ($1, $2); -} - -my $package = ""; -my $text = ""; -my $dynimports = ""; -my $linknames = ""; -my @vars = (); -while(<>) { - chomp; - s/\s+/ /g; - s/^\s+//; - s/\s+$//; - $package = $1 if !$package && /^package (\S+)$/; - my $nonblock = /^\/\/sysnb /; - next if !/^\/\/sys / && !$nonblock; - - # Line must be of the form - # func Open(path string, mode int, perm int) (fd int, err error) - # Split into name, in params, out params. - if(!/^\/\/sys(nb)? (\w+)\(([^()]*)\)\s*(?:\(([^()]+)\))?\s*(?:=\s*(?:(\w*)\.)?(\w*))?$/) { - print STDERR "$ARGV:$.: malformed //sys declaration\n"; - $errors = 1; - next; - } - my ($nb, $func, $in, $out, $modname, $sysname) = ($1, $2, $3, $4, $5, $6); - - # Split argument lists on comma. - my @in = parseparamlist($in); - my @out = parseparamlist($out); - - # So file name. - if($modname eq "") { - $modname = "libc"; - } - - # System call name. - if($sysname eq "") { - $sysname = "$func"; - } - - # System call pointer variable name. - my $sysvarname = "proc$sysname"; - - my $strconvfunc = "BytePtrFromString"; - my $strconvtype = "*byte"; - - $sysname =~ y/A-Z/a-z/; # All libc functions are lowercase. - - # Runtime import of function to allow cross-platform builds. - $dynimports .= "//go:cgo_import_dynamic libc_${sysname} ${sysname} \"$modname.so\"\n"; - # Link symbol to proc address variable. - $linknames .= "//go:linkname ${sysvarname} libc_${sysname}\n"; - # Library proc address variable. - push @vars, $sysvarname; - - # Go function header. - $out = join(', ', @out); - if($out ne "") { - $out = " ($out)"; - } - if($text ne "") { - $text .= "\n" - } - $text .= sprintf "func %s(%s)%s {\n", $func, join(', ', @in), $out; - - # Check if err return available - my $errvar = ""; - foreach my $p (@out) { - my ($name, $type) = parseparam($p); - if($type eq "error") { - $errvar = $name; - last; - } - } - - # Prepare arguments to Syscall. - my @args = (); - my @uses = (); - my $n = 0; - foreach my $p (@in) { - my ($name, $type) = parseparam($p); - if($type =~ /^\*/) { - push @args, "uintptr(unsafe.Pointer($name))"; - } elsif($type eq "string" && $errvar ne "") { - $text .= "\tvar _p$n $strconvtype\n"; - $text .= "\t_p$n, $errvar = $strconvfunc($name)\n"; - $text .= "\tif $errvar != nil {\n\t\treturn\n\t}\n"; - push @args, "uintptr(unsafe.Pointer(_p$n))"; - push @uses, "use(unsafe.Pointer(_p$n))"; - $n++; - } elsif($type eq "string") { - print STDERR "$ARGV:$.: $func uses string arguments, but has no error return\n"; - $text .= "\tvar _p$n $strconvtype\n"; - $text .= "\t_p$n, _ = $strconvfunc($name)\n"; - push @args, "uintptr(unsafe.Pointer(_p$n))"; - push @uses, "use(unsafe.Pointer(_p$n))"; - $n++; - } elsif($type =~ /^\[\](.*)/) { - # Convert slice into pointer, length. - # Have to be careful not to take address of &a[0] if len == 0: - # pass nil in that case. - $text .= "\tvar _p$n *$1\n"; - $text .= "\tif len($name) > 0 {\n\t\t_p$n = \&$name\[0]\n\t}\n"; - push @args, "uintptr(unsafe.Pointer(_p$n))", "uintptr(len($name))"; - $n++; - } elsif($type eq "int64" && $_32bit ne "") { - if($_32bit eq "big-endian") { - push @args, "uintptr($name >> 32)", "uintptr($name)"; - } else { - push @args, "uintptr($name)", "uintptr($name >> 32)"; - } - } elsif($type eq "bool") { - $text .= "\tvar _p$n uint32\n"; - $text .= "\tif $name {\n\t\t_p$n = 1\n\t} else {\n\t\t_p$n = 0\n\t}\n"; - push @args, "uintptr(_p$n)"; - $n++; - } else { - push @args, "uintptr($name)"; - } - } - my $nargs = @args; - - # Determine which form to use; pad args with zeros. - my $asm = "sysvicall6"; - if ($nonblock) { - $asm = "rawSysvicall6"; - } - if(@args <= 6) { - while(@args < 6) { - push @args, "0"; - } - } else { - print STDERR "$ARGV:$.: too many arguments to system call\n"; - } - - # Actual call. - my $args = join(', ', @args); - my $call = "$asm(uintptr(unsafe.Pointer(&$sysvarname)), $nargs, $args)"; - - # Assign return values. - my $body = ""; - my $failexpr = ""; - my @ret = ("_", "_", "_"); - my @pout= (); - my $do_errno = 0; - for(my $i=0; $i<@out; $i++) { - my $p = $out[$i]; - my ($name, $type) = parseparam($p); - my $reg = ""; - if($name eq "err") { - $reg = "e1"; - $ret[2] = $reg; - $do_errno = 1; - } else { - $reg = sprintf("r%d", $i); - $ret[$i] = $reg; - } - if($type eq "bool") { - $reg = "$reg != 0"; - } - if($type eq "int64" && $_32bit ne "") { - # 64-bit number in r1:r0 or r0:r1. - if($i+2 > @out) { - print STDERR "$ARGV:$.: not enough registers for int64 return\n"; - } - if($_32bit eq "big-endian") { - $reg = sprintf("int64(r%d)<<32 | int64(r%d)", $i, $i+1); - } else { - $reg = sprintf("int64(r%d)<<32 | int64(r%d)", $i+1, $i); - } - $ret[$i] = sprintf("r%d", $i); - $ret[$i+1] = sprintf("r%d", $i+1); - } - if($reg ne "e1") { - $body .= "\t$name = $type($reg)\n"; - } - } - if ($ret[0] eq "_" && $ret[1] eq "_" && $ret[2] eq "_") { - $text .= "\t$call\n"; - } else { - $text .= "\t$ret[0], $ret[1], $ret[2] := $call\n"; - } - foreach my $use (@uses) { - $text .= "\t$use\n"; - } - $text .= $body; - - if ($do_errno) { - $text .= "\tif e1 != 0 {\n"; - $text .= "\t\terr = e1\n"; - $text .= "\t}\n"; - } - $text .= "\treturn\n"; - $text .= "}\n"; -} - -if($errors) { - exit 1; -} - -print <<EOF; -// $cmdline -// MACHINE GENERATED BY THE COMMAND ABOVE; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package $package - -import ( - "syscall" - "unsafe" -) -EOF - -print "import \"golang.org/x/sys/unix\"\n" if $package ne "unix"; - -my $vardecls = "\t" . join(",\n\t", @vars); -$vardecls .= " syscallFunc"; - -chomp($_=<<EOF); - -$dynimports -$linknames -var ( -$vardecls -) - -$text -EOF -print $_; -exit 0; diff --git a/vendor/golang.org/x/sys/unix/mksysctl_openbsd.pl b/vendor/golang.org/x/sys/unix/mksysctl_openbsd.pl deleted file mode 100755 index be67afa417..0000000000 --- a/vendor/golang.org/x/sys/unix/mksysctl_openbsd.pl +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/env perl - -# Copyright 2011 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# -# Parse the header files for OpenBSD and generate a Go usable sysctl MIB. -# -# Build a MIB with each entry being an array containing the level, type and -# a hash that will contain additional entries if the current entry is a node. -# We then walk this MIB and create a flattened sysctl name to OID hash. -# - -use strict; - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -my $debug = 0; -my %ctls = (); - -my @headers = qw ( - sys/sysctl.h - sys/socket.h - sys/tty.h - sys/malloc.h - sys/mount.h - sys/namei.h - sys/sem.h - sys/shm.h - sys/vmmeter.h - uvm/uvm_param.h - uvm/uvm_swap_encrypt.h - ddb/db_var.h - net/if.h - net/if_pfsync.h - net/pipex.h - netinet/in.h - netinet/icmp_var.h - netinet/igmp_var.h - netinet/ip_ah.h - netinet/ip_carp.h - netinet/ip_divert.h - netinet/ip_esp.h - netinet/ip_ether.h - netinet/ip_gre.h - netinet/ip_ipcomp.h - netinet/ip_ipip.h - netinet/pim_var.h - netinet/tcp_var.h - netinet/udp_var.h - netinet6/in6.h - netinet6/ip6_divert.h - netinet6/pim6_var.h - netinet/icmp6.h - netmpls/mpls.h -); - -my @ctls = qw ( - kern - vm - fs - net - #debug # Special handling required - hw - #machdep # Arch specific - user - ddb - #vfs # Special handling required - fs.posix - kern.forkstat - kern.intrcnt - kern.malloc - kern.nchstats - kern.seminfo - kern.shminfo - kern.timecounter - kern.tty - kern.watchdog - net.bpf - net.ifq - net.inet - net.inet.ah - net.inet.carp - net.inet.divert - net.inet.esp - net.inet.etherip - net.inet.gre - net.inet.icmp - net.inet.igmp - net.inet.ip - net.inet.ip.ifq - net.inet.ipcomp - net.inet.ipip - net.inet.mobileip - net.inet.pfsync - net.inet.pim - net.inet.tcp - net.inet.udp - net.inet6 - net.inet6.divert - net.inet6.ip6 - net.inet6.icmp6 - net.inet6.pim6 - net.inet6.tcp6 - net.inet6.udp6 - net.mpls - net.mpls.ifq - net.key - net.pflow - net.pfsync - net.pipex - net.rt - vm.swapencrypt - #vfsgenctl # Special handling required -); - -# Node name "fixups" -my %ctl_map = ( - "ipproto" => "net.inet", - "net.inet.ipproto" => "net.inet", - "net.inet6.ipv6proto" => "net.inet6", - "net.inet6.ipv6" => "net.inet6.ip6", - "net.inet.icmpv6" => "net.inet6.icmp6", - "net.inet6.divert6" => "net.inet6.divert", - "net.inet6.tcp6" => "net.inet.tcp", - "net.inet6.udp6" => "net.inet.udp", - "mpls" => "net.mpls", - "swpenc" => "vm.swapencrypt" -); - -# Node mappings -my %node_map = ( - "net.inet.ip.ifq" => "net.ifq", - "net.inet.pfsync" => "net.pfsync", - "net.mpls.ifq" => "net.ifq" -); - -my $ctlname; -my %mib = (); -my %sysctl = (); -my $node; - -sub debug() { - print STDERR "$_[0]\n" if $debug; -} - -# Walk the MIB and build a sysctl name to OID mapping. -sub build_sysctl() { - my ($node, $name, $oid) = @_; - my %node = %{$node}; - my @oid = @{$oid}; - - foreach my $key (sort keys %node) { - my @node = @{$node{$key}}; - my $nodename = $name.($name ne '' ? '.' : '').$key; - my @nodeoid = (@oid, $node[0]); - if ($node[1] eq 'CTLTYPE_NODE') { - if (exists $node_map{$nodename}) { - $node = \%mib; - $ctlname = $node_map{$nodename}; - foreach my $part (split /\./, $ctlname) { - $node = \%{@{$$node{$part}}[2]}; - } - } else { - $node = $node[2]; - } - &build_sysctl($node, $nodename, \@nodeoid); - } elsif ($node[1] ne '') { - $sysctl{$nodename} = \@nodeoid; - } - } -} - -foreach my $ctl (@ctls) { - $ctls{$ctl} = $ctl; -} - -# Build MIB -foreach my $header (@headers) { - &debug("Processing $header..."); - open HEADER, "/usr/include/$header" || - print STDERR "Failed to open $header\n"; - while (<HEADER>) { - if ($_ =~ /^#define\s+(CTL_NAMES)\s+{/ || - $_ =~ /^#define\s+(CTL_(.*)_NAMES)\s+{/ || - $_ =~ /^#define\s+((.*)CTL_NAMES)\s+{/) { - if ($1 eq 'CTL_NAMES') { - # Top level. - $node = \%mib; - } else { - # Node. - my $nodename = lc($2); - if ($header =~ /^netinet\//) { - $ctlname = "net.inet.$nodename"; - } elsif ($header =~ /^netinet6\//) { - $ctlname = "net.inet6.$nodename"; - } elsif ($header =~ /^net\//) { - $ctlname = "net.$nodename"; - } else { - $ctlname = "$nodename"; - $ctlname =~ s/^(fs|net|kern)_/$1\./; - } - if (exists $ctl_map{$ctlname}) { - $ctlname = $ctl_map{$ctlname}; - } - if (not exists $ctls{$ctlname}) { - &debug("Ignoring $ctlname..."); - next; - } - - # Walk down from the top of the MIB. - $node = \%mib; - foreach my $part (split /\./, $ctlname) { - if (not exists $$node{$part}) { - &debug("Missing node $part"); - $$node{$part} = [ 0, '', {} ]; - } - $node = \%{@{$$node{$part}}[2]}; - } - } - - # Populate current node with entries. - my $i = -1; - while (defined($_) && $_ !~ /^}/) { - $_ = <HEADER>; - $i++ if $_ =~ /{.*}/; - next if $_ !~ /{\s+"(\w+)",\s+(CTLTYPE_[A-Z]+)\s+}/; - $$node{$1} = [ $i, $2, {} ]; - } - } - } - close HEADER; -} - -&build_sysctl(\%mib, "", []); - -print <<EOF; -// mksysctl_openbsd.pl -// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package unix; - -type mibentry struct { - ctlname string - ctloid []_C_int -} - -var sysctlMib = []mibentry { -EOF - -foreach my $name (sort keys %sysctl) { - my @oid = @{$sysctl{$name}}; - print "\t{ \"$name\", []_C_int{ ", join(', ', @oid), " } }, \n"; -} - -print <<EOF; -} -EOF diff --git a/vendor/golang.org/x/sys/unix/mksysnum_darwin.pl b/vendor/golang.org/x/sys/unix/mksysnum_darwin.pl deleted file mode 100755 index d3e5147fcb..0000000000 --- a/vendor/golang.org/x/sys/unix/mksysnum_darwin.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. -# -# Generate system call table for Darwin from sys/syscall.h - -use strict; - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -my $command = "mksysnum_darwin.pl " . join(' ', @ARGV); - -print <<EOF; -// $command -// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package unix - -const ( -EOF - -while(<>){ - if(/^#define\s+SYS_(\w+)\s+([0-9]+)/){ - my $name = $1; - my $num = $2; - $name =~ y/a-z/A-Z/; - print " SYS_$name = $num;" - } -} - -print <<EOF; -) -EOF diff --git a/vendor/golang.org/x/sys/unix/mksysnum_dragonfly.pl b/vendor/golang.org/x/sys/unix/mksysnum_dragonfly.pl deleted file mode 100755 index 266a248c75..0000000000 --- a/vendor/golang.org/x/sys/unix/mksysnum_dragonfly.pl +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. -# -# Generate system call table for DragonFly from master list -# (for example, /usr/src/sys/kern/syscalls.master). - -use strict; - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -my $command = "mksysnum_dragonfly.pl " . join(' ', @ARGV); - -print <<EOF; -// $command -// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package unix - -const ( -EOF - -while(<>){ - if(/^([0-9]+)\s+STD\s+({ \S+\s+(\w+).*)$/){ - my $num = $1; - my $proto = $2; - my $name = "SYS_$3"; - $name =~ y/a-z/A-Z/; - - # There are multiple entries for enosys and nosys, so comment them out. - if($name =~ /^SYS_E?NOSYS$/){ - $name = "// $name"; - } - if($name eq 'SYS_SYS_EXIT'){ - $name = 'SYS_EXIT'; - } - - print " $name = $num; // $proto\n"; - } -} - -print <<EOF; -) -EOF diff --git a/vendor/golang.org/x/sys/unix/mksysnum_freebsd.pl b/vendor/golang.org/x/sys/unix/mksysnum_freebsd.pl deleted file mode 100755 index b767e124cc..0000000000 --- a/vendor/golang.org/x/sys/unix/mksysnum_freebsd.pl +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. -# -# Generate system call table for FreeBSD from master list -# (for example, /usr/src/sys/kern/syscalls.master). - -use strict; - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -my $command = "mksysnum_freebsd.pl " . join(' ', @ARGV); - -print <<EOF; -// $command -// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package unix - -const ( -EOF - -while(<>){ - if(/^([0-9]+)\s+\S+\s+STD\s+({ \S+\s+(\w+).*)$/){ - my $num = $1; - my $proto = $2; - my $name = "SYS_$3"; - $name =~ y/a-z/A-Z/; - - # There are multiple entries for enosys and nosys, so comment them out. - if($name =~ /^SYS_E?NOSYS$/){ - $name = "// $name"; - } - if($name eq 'SYS_SYS_EXIT'){ - $name = 'SYS_EXIT'; - } - if($name =~ /^SYS_CAP_+/ || $name =~ /^SYS___CAP_+/){ - next - } - - print " $name = $num; // $proto\n"; - - # We keep Capsicum syscall numbers for FreeBSD - # 9-STABLE here because we are not sure whether they - # are mature and stable. - if($num == 513){ - print " SYS_CAP_NEW = 514 // { int cap_new(int fd, uint64_t rights); }\n"; - print " SYS_CAP_GETRIGHTS = 515 // { int cap_getrights(int fd, \\\n"; - print " SYS_CAP_ENTER = 516 // { int cap_enter(void); }\n"; - print " SYS_CAP_GETMODE = 517 // { int cap_getmode(u_int *modep); }\n"; - } - } -} - -print <<EOF; -) -EOF diff --git a/vendor/golang.org/x/sys/unix/mksysnum_linux.pl b/vendor/golang.org/x/sys/unix/mksysnum_linux.pl deleted file mode 100755 index 4d4017deb2..0000000000 --- a/vendor/golang.org/x/sys/unix/mksysnum_linux.pl +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -use strict; - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -my $command = "mksysnum_linux.pl ". join(' ', @ARGV); - -print <<EOF; -// $command -// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package unix - -const( -EOF - -sub fmt { - my ($name, $num) = @_; - if($num > 999){ - # ignore deprecated syscalls that are no longer implemented - # https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/uapi/asm-generic/unistd.h?id=refs/heads/master#n716 - return; - } - $name =~ y/a-z/A-Z/; - print " SYS_$name = $num;\n"; -} - -my $prev; -open(GCC, "gcc -E -dD $ARGV[0] |") || die "can't run gcc"; -while(<GCC>){ - if(/^#define __NR_syscalls\s+/) { - # ignore redefinitions of __NR_syscalls - } - elsif(/^#define __NR_(\w+)\s+([0-9]+)/){ - $prev = $2; - fmt($1, $2); - } - elsif(/^#define __NR3264_(\w+)\s+([0-9]+)/){ - $prev = $2; - fmt($1, $2); - } - elsif(/^#define __NR_(\w+)\s+\(\w+\+\s*([0-9]+)\)/){ - fmt($1, $prev+$2) - } -} - -print <<EOF; -) -EOF diff --git a/vendor/golang.org/x/sys/unix/mksysnum_netbsd.pl b/vendor/golang.org/x/sys/unix/mksysnum_netbsd.pl deleted file mode 100755 index e74616a65a..0000000000 --- a/vendor/golang.org/x/sys/unix/mksysnum_netbsd.pl +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. -# -# Generate system call table for OpenBSD from master list -# (for example, /usr/src/sys/kern/syscalls.master). - -use strict; - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -my $command = "mksysnum_netbsd.pl " . join(' ', @ARGV); - -print <<EOF; -// $command -// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package unix - -const ( -EOF - -my $line = ''; -while(<>){ - if($line =~ /^(.*)\\$/) { - # Handle continuation - $line = $1; - $_ =~ s/^\s+//; - $line .= $_; - } else { - # New line - $line = $_; - } - next if $line =~ /\\$/; - if($line =~ /^([0-9]+)\s+((STD)|(NOERR))\s+(RUMP\s+)?({\s+\S+\s*\*?\s*\|(\S+)\|(\S*)\|(\w+).*\s+})(\s+(\S+))?$/) { - my $num = $1; - my $proto = $6; - my $compat = $8; - my $name = "$7_$9"; - - $name = "$7_$11" if $11 ne ''; - $name =~ y/a-z/A-Z/; - - if($compat eq '' || $compat eq '30' || $compat eq '50') { - print " $name = $num; // $proto\n"; - } - } -} - -print <<EOF; -) -EOF diff --git a/vendor/golang.org/x/sys/unix/mksysnum_openbsd.pl b/vendor/golang.org/x/sys/unix/mksysnum_openbsd.pl deleted file mode 100755 index ae5aad5866..0000000000 --- a/vendor/golang.org/x/sys/unix/mksysnum_openbsd.pl +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. -# -# Generate system call table for OpenBSD from master list -# (for example, /usr/src/sys/kern/syscalls.master). - -use strict; - -if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") { - print STDERR "GOARCH or GOOS not defined in environment\n"; - exit 1; -} - -my $command = "mksysnum_openbsd.pl " . join(' ', @ARGV); - -print <<EOF; -// $command -// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT - -// +build $ENV{'GOARCH'},$ENV{'GOOS'} - -package unix - -const ( -EOF - -while(<>){ - if(/^([0-9]+)\s+STD\s+(NOLOCK\s+)?({ \S+\s+\*?(\w+).*)$/){ - my $num = $1; - my $proto = $3; - my $name = $4; - $name =~ y/a-z/A-Z/; - - # There are multiple entries for enosys and nosys, so comment them out. - if($name =~ /^SYS_E?NOSYS$/){ - $name = "// $name"; - } - if($name eq 'SYS_SYS_EXIT'){ - $name = 'SYS_EXIT'; - } - - print " $name = $num; // $proto\n"; - } -} - -print <<EOF; -) -EOF diff --git a/vendor/golang.org/x/text/AUTHORS b/vendor/golang.org/x/text/AUTHORS new file mode 100644 index 0000000000..15167cd746 --- /dev/null +++ b/vendor/golang.org/x/text/AUTHORS @@ -0,0 +1,3 @@ +# This source code refers to The Go Authors for copyright purposes. +# The master list of authors is in the main Go distribution, +# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/text/CONTRIBUTORS b/vendor/golang.org/x/text/CONTRIBUTORS new file mode 100644 index 0000000000..1c4577e968 --- /dev/null +++ b/vendor/golang.org/x/text/CONTRIBUTORS @@ -0,0 +1,3 @@ +# This source code was written by the Go contributors. +# The master list of contributors is in the main Go distribution, +# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/text/encoding/charmap/maketables.go b/vendor/golang.org/x/text/encoding/charmap/maketables.go new file mode 100644 index 0000000000..f7941701e8 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/charmap/maketables.go @@ -0,0 +1,556 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" + "unicode/utf8" + + "golang.org/x/text/encoding" + "golang.org/x/text/internal/gen" +) + +const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + + ` !"#$%&'()*+,-./0123456789:;<=>?` + + `@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` + + "`abcdefghijklmnopqrstuvwxyz{|}~\u007f" + +var encodings = []struct { + name string + mib string + comment string + varName string + replacement byte + mapping string +}{ + { + "IBM Code Page 037", + "IBM037", + "", + "CodePage037", + 0x3f, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm", + }, + { + "IBM Code Page 437", + "PC8CodePage437", + "", + "CodePage437", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm", + }, + { + "IBM Code Page 850", + "PC850Multilingual", + "", + "CodePage850", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm", + }, + { + "IBM Code Page 852", + "PCp852", + "", + "CodePage852", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm", + }, + { + "IBM Code Page 855", + "IBM855", + "", + "CodePage855", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm", + }, + { + "Windows Code Page 858", // PC latin1 with Euro + "IBM00858", + "", + "CodePage858", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm", + }, + { + "IBM Code Page 860", + "IBM860", + "", + "CodePage860", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm", + }, + { + "IBM Code Page 862", + "PC862LatinHebrew", + "", + "CodePage862", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm", + }, + { + "IBM Code Page 863", + "IBM863", + "", + "CodePage863", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm", + }, + { + "IBM Code Page 865", + "IBM865", + "", + "CodePage865", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm", + }, + { + "IBM Code Page 866", + "IBM866", + "", + "CodePage866", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-ibm866.txt", + }, + { + "IBM Code Page 1047", + "IBM1047", + "", + "CodePage1047", + 0x3f, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm", + }, + { + "IBM Code Page 1140", + "IBM01140", + "", + "CodePage1140", + 0x3f, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm", + }, + { + "ISO 8859-1", + "ISOLatin1", + "", + "ISO8859_1", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm", + }, + { + "ISO 8859-2", + "ISOLatin2", + "", + "ISO8859_2", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-2.txt", + }, + { + "ISO 8859-3", + "ISOLatin3", + "", + "ISO8859_3", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-3.txt", + }, + { + "ISO 8859-4", + "ISOLatin4", + "", + "ISO8859_4", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-4.txt", + }, + { + "ISO 8859-5", + "ISOLatinCyrillic", + "", + "ISO8859_5", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-5.txt", + }, + { + "ISO 8859-6", + "ISOLatinArabic", + "", + "ISO8859_6,ISO8859_6E,ISO8859_6I", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-6.txt", + }, + { + "ISO 8859-7", + "ISOLatinGreek", + "", + "ISO8859_7", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-7.txt", + }, + { + "ISO 8859-8", + "ISOLatinHebrew", + "", + "ISO8859_8,ISO8859_8E,ISO8859_8I", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-8.txt", + }, + { + "ISO 8859-9", + "ISOLatin5", + "", + "ISO8859_9", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm", + }, + { + "ISO 8859-10", + "ISOLatin6", + "", + "ISO8859_10", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-10.txt", + }, + { + "ISO 8859-13", + "ISO885913", + "", + "ISO8859_13", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-13.txt", + }, + { + "ISO 8859-14", + "ISO885914", + "", + "ISO8859_14", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-14.txt", + }, + { + "ISO 8859-15", + "ISO885915", + "", + "ISO8859_15", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-15.txt", + }, + { + "ISO 8859-16", + "ISO885916", + "", + "ISO8859_16", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-16.txt", + }, + { + "KOI8-R", + "KOI8R", + "", + "KOI8R", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-koi8-r.txt", + }, + { + "KOI8-U", + "KOI8U", + "", + "KOI8U", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-koi8-u.txt", + }, + { + "Macintosh", + "Macintosh", + "", + "Macintosh", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-macintosh.txt", + }, + { + "Macintosh Cyrillic", + "MacintoshCyrillic", + "", + "MacintoshCyrillic", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt", + }, + { + "Windows 874", + "Windows874", + "", + "Windows874", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-874.txt", + }, + { + "Windows 1250", + "Windows1250", + "", + "Windows1250", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1250.txt", + }, + { + "Windows 1251", + "Windows1251", + "", + "Windows1251", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1251.txt", + }, + { + "Windows 1252", + "Windows1252", + "", + "Windows1252", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1252.txt", + }, + { + "Windows 1253", + "Windows1253", + "", + "Windows1253", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1253.txt", + }, + { + "Windows 1254", + "Windows1254", + "", + "Windows1254", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1254.txt", + }, + { + "Windows 1255", + "Windows1255", + "", + "Windows1255", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1255.txt", + }, + { + "Windows 1256", + "Windows1256", + "", + "Windows1256", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1256.txt", + }, + { + "Windows 1257", + "Windows1257", + "", + "Windows1257", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1257.txt", + }, + { + "Windows 1258", + "Windows1258", + "", + "Windows1258", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1258.txt", + }, + { + "X-User-Defined", + "XUserDefined", + "It is defined at http://encoding.spec.whatwg.org/#x-user-defined", + "XUserDefined", + encoding.ASCIISub, + ascii + + "\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" + + "\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" + + "\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" + + "\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" + + "\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" + + "\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" + + "\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" + + "\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" + + "\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" + + "\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" + + "\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" + + "\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" + + "\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" + + "\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" + + "\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" + + "\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff", + }, +} + +func getWHATWG(url string) string { + res, err := http.Get(url) + if err != nil { + log.Fatalf("%q: Get: %v", url, err) + } + defer res.Body.Close() + + mapping := make([]rune, 128) + for i := range mapping { + mapping[i] = '\ufffd' + } + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := 0, 0 + if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0 || 128 <= x { + log.Fatalf("code %d is out of range", x) + } + if 0x80 <= y && y < 0xa0 { + // We diverge from the WHATWG spec by mapping control characters + // in the range [0x80, 0xa0) to U+FFFD. + continue + } + mapping[x] = rune(y) + } + return ascii + string(mapping) +} + +func getUCM(url string) string { + res, err := http.Get(url) + if err != nil { + log.Fatalf("%q: Get: %v", url, err) + } + defer res.Body.Close() + + mapping := make([]rune, 256) + for i := range mapping { + mapping[i] = '\ufffd' + } + + charsFound := 0 + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + var c byte + var r rune + if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil { + continue + } + mapping[c] = r + charsFound++ + } + + if charsFound < 200 { + log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound) + } + + return string(mapping) +} + +func main() { + mibs := map[string]bool{} + all := []string{} + + w := gen.NewCodeWriter() + defer w.WriteGoFile("tables.go", "charmap") + + printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) } + + printf("import (\n") + printf("\t\"golang.org/x/text/encoding\"\n") + printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n") + printf(")\n\n") + for _, e := range encodings { + varNames := strings.Split(e.varName, ",") + all = append(all, varNames...) + varName := varNames[0] + switch { + case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"): + e.mapping = getWHATWG(e.mapping) + case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"): + e.mapping = getUCM(e.mapping) + } + + asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00 + if asciiSuperset { + low = 0x80 + } + lvn := 1 + if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") { + lvn = 3 + } + lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:] + printf("// %s is the %s encoding.\n", varName, e.name) + if e.comment != "" { + printf("//\n// %s\n", e.comment) + } + printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n", + varName, lowerVarName, lowerVarName, e.name) + if mibs[e.mib] { + log.Fatalf("MIB type %q declared multiple times.", e.mib) + } + printf("mib: identifier.%s,\n", e.mib) + printf("asciiSuperset: %t,\n", asciiSuperset) + printf("low: 0x%02x,\n", low) + printf("replacement: 0x%02x,\n", e.replacement) + + printf("decode: [256]utf8Enc{\n") + i, backMapping := 0, map[rune]byte{} + for _, c := range e.mapping { + if _, ok := backMapping[c]; !ok && c != utf8.RuneError { + backMapping[c] = byte(i) + } + var buf [8]byte + n := utf8.EncodeRune(buf[:], c) + if n > 3 { + panic(fmt.Sprintf("rune %q (%U) is too long", c, c)) + } + printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2]) + if i%2 == 1 { + printf("\n") + } + i++ + } + printf("},\n") + + printf("encode: [256]uint32{\n") + encode := make([]uint32, 0, 256) + for c, i := range backMapping { + encode = append(encode, uint32(i)<<24|uint32(c)) + } + sort.Sort(byRune(encode)) + for len(encode) < cap(encode) { + encode = append(encode, encode[len(encode)-1]) + } + for i, enc := range encode { + printf("0x%08x,", enc) + if i%8 == 7 { + printf("\n") + } + } + printf("},\n}\n") + + // Add an estimate of the size of a single Charmap{} struct value, which + // includes two 256 elem arrays of 4 bytes and some extra fields, which + // align to 3 uint64s on 64-bit architectures. + w.Size += 2*4*256 + 3*8 + } + // TODO: add proper line breaking. + printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n")) +} + +type byRune []uint32 + +func (b byRune) Len() int { return len(b) } +func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff } +func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/htmlindex/gen.go b/vendor/golang.org/x/text/encoding/htmlindex/gen.go new file mode 100644 index 0000000000..80a52f0d9d --- /dev/null +++ b/vendor/golang.org/x/text/encoding/htmlindex/gen.go @@ -0,0 +1,170 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "log" + "strings" + + "golang.org/x/text/internal/gen" +) + +type group struct { + Encodings []struct { + Labels []string + Name string + } +} + +func main() { + gen.Init() + + r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json") + var groups []group + if err := json.NewDecoder(r).Decode(&groups); err != nil { + log.Fatalf("Error reading encodings.json: %v", err) + } + + w := &bytes.Buffer{} + fmt.Fprintln(w, "type htmlEncoding byte") + fmt.Fprintln(w, "const (") + for i, g := range groups { + for _, e := range g.Encodings { + key := strings.ToLower(e.Name) + name := consts[key] + if name == "" { + log.Fatalf("No const defined for %s.", key) + } + if i == 0 { + fmt.Fprintf(w, "%s htmlEncoding = iota\n", name) + } else { + fmt.Fprintf(w, "%s\n", name) + } + } + } + fmt.Fprintln(w, "numEncodings") + fmt.Fprint(w, ")\n\n") + + fmt.Fprintln(w, "var canonical = [numEncodings]string{") + for _, g := range groups { + for _, e := range g.Encodings { + fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name)) + } + } + fmt.Fprint(w, "}\n\n") + + fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{") + for _, g := range groups { + for _, e := range g.Encodings { + for _, l := range e.Labels { + key := strings.ToLower(e.Name) + name := consts[key] + fmt.Fprintf(w, "%q: %s,\n", l, name) + } + } + } + fmt.Fprint(w, "}\n\n") + + var tags []string + fmt.Fprintln(w, "var localeMap = []htmlEncoding{") + for _, loc := range locales { + tags = append(tags, loc.tag) + fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag) + } + fmt.Fprint(w, "}\n\n") + + fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " ")) + + gen.WriteGoFile("tables.go", "htmlindex", w.Bytes()) +} + +// consts maps canonical encoding name to internal constant. +var consts = map[string]string{ + "utf-8": "utf8", + "ibm866": "ibm866", + "iso-8859-2": "iso8859_2", + "iso-8859-3": "iso8859_3", + "iso-8859-4": "iso8859_4", + "iso-8859-5": "iso8859_5", + "iso-8859-6": "iso8859_6", + "iso-8859-7": "iso8859_7", + "iso-8859-8": "iso8859_8", + "iso-8859-8-i": "iso8859_8I", + "iso-8859-10": "iso8859_10", + "iso-8859-13": "iso8859_13", + "iso-8859-14": "iso8859_14", + "iso-8859-15": "iso8859_15", + "iso-8859-16": "iso8859_16", + "koi8-r": "koi8r", + "koi8-u": "koi8u", + "macintosh": "macintosh", + "windows-874": "windows874", + "windows-1250": "windows1250", + "windows-1251": "windows1251", + "windows-1252": "windows1252", + "windows-1253": "windows1253", + "windows-1254": "windows1254", + "windows-1255": "windows1255", + "windows-1256": "windows1256", + "windows-1257": "windows1257", + "windows-1258": "windows1258", + "x-mac-cyrillic": "macintoshCyrillic", + "gbk": "gbk", + "gb18030": "gb18030", + // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG + "big5": "big5", + "euc-jp": "eucjp", + "iso-2022-jp": "iso2022jp", + "shift_jis": "shiftJIS", + "euc-kr": "euckr", + "replacement": "replacement", + "utf-16be": "utf16be", + "utf-16le": "utf16le", + "x-user-defined": "xUserDefined", +} + +// locales is taken from +// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm. +var locales = []struct{ tag, name string }{ + {"und", "windows-1252"}, // The default value. + {"ar", "windows-1256"}, + {"ba", "windows-1251"}, + {"be", "windows-1251"}, + {"bg", "windows-1251"}, + {"cs", "windows-1250"}, + {"el", "iso-8859-7"}, + {"et", "windows-1257"}, + {"fa", "windows-1256"}, + {"he", "windows-1255"}, + {"hr", "windows-1250"}, + {"hu", "iso-8859-2"}, + {"ja", "shift_jis"}, + {"kk", "windows-1251"}, + {"ko", "euc-kr"}, + {"ku", "windows-1254"}, + {"ky", "windows-1251"}, + {"lt", "windows-1257"}, + {"lv", "windows-1257"}, + {"mk", "windows-1251"}, + {"pl", "iso-8859-2"}, + {"ru", "windows-1251"}, + {"sah", "windows-1251"}, + {"sk", "windows-1250"}, + {"sl", "iso-8859-2"}, + {"sr", "windows-1251"}, + {"tg", "windows-1251"}, + {"th", "windows-874"}, + {"tr", "windows-1254"}, + {"tt", "windows-1251"}, + {"uk", "windows-1251"}, + {"vi", "windows-1258"}, + {"zh-hans", "gb18030"}, + {"zh-hant", "big5"}, +} diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go new file mode 100644 index 0000000000..0c8eba7e52 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go @@ -0,0 +1,137 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "log" + "strings" + + "golang.org/x/text/internal/gen" +) + +type registry struct { + XMLName xml.Name `xml:"registry"` + Updated string `xml:"updated"` + Registry []struct { + ID string `xml:"id,attr"` + Record []struct { + Name string `xml:"name"` + Xref []struct { + Type string `xml:"type,attr"` + Data string `xml:"data,attr"` + } `xml:"xref"` + Desc struct { + Data string `xml:",innerxml"` + // Any []struct { + // Data string `xml:",chardata"` + // } `xml:",any"` + // Data string `xml:",chardata"` + } `xml:"description,"` + MIB string `xml:"value"` + Alias []string `xml:"alias"` + MIME string `xml:"preferred_alias"` + } `xml:"record"` + } `xml:"registry"` +} + +func main() { + r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") + reg := ®istry{} + if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { + log.Fatalf("Error decoding charset registry: %v", err) + } + if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { + log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) + } + + w := &bytes.Buffer{} + fmt.Fprintf(w, "const (\n") + for _, rec := range reg.Registry[0].Record { + constName := "" + for _, a := range rec.Alias { + if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 { + // Some of the constant definitions have comments in them. Strip those. + constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0]) + } + } + if constName == "" { + switch rec.MIB { + case "2085": + constName = "HZGB2312" // Not listed as alias for some reason. + default: + log.Fatalf("No cs alias defined for %s.", rec.MIB) + } + } + if rec.MIME != "" { + rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME) + } + fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME) + if len(rec.Desc.Data) > 0 { + fmt.Fprint(w, "// ") + d := xml.NewDecoder(strings.NewReader(rec.Desc.Data)) + inElem := true + attr := "" + for { + t, err := d.Token() + if err != nil { + if err != io.EOF { + log.Fatal(err) + } + break + } + switch x := t.(type) { + case xml.CharData: + attr = "" // Don't need attribute info. + a := bytes.Split([]byte(x), []byte("\n")) + for i, b := range a { + if b = bytes.TrimSpace(b); len(b) != 0 { + if !inElem && i > 0 { + fmt.Fprint(w, "\n// ") + } + inElem = false + fmt.Fprintf(w, "%s ", string(b)) + } + } + case xml.StartElement: + if x.Name.Local == "xref" { + inElem = true + use := false + for _, a := range x.Attr { + if a.Name.Local == "type" { + use = use || a.Value != "person" + } + if a.Name.Local == "data" && use { + attr = a.Value + " " + } + } + } + case xml.EndElement: + inElem = false + fmt.Fprint(w, attr) + } + } + fmt.Fprint(w, "\n") + } + for _, x := range rec.Xref { + switch x.Type { + case "rfc": + fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data)) + case "uri": + fmt.Fprintf(w, "// Reference: %s\n", x.Data) + } + } + fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB) + fmt.Fprintln(w) + } + fmt.Fprintln(w, ")") + + gen.WriteGoFile("mib.go", "identifier", w.Bytes()) +} diff --git a/vendor/golang.org/x/text/encoding/japanese/maketables.go b/vendor/golang.org/x/text/encoding/japanese/maketables.go new file mode 100644 index 0000000000..d6c10deb07 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/japanese/maketables.go @@ -0,0 +1,161 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates tables.go: +// go run maketables.go | gofmt > tables.go + +// TODO: Emoji extensions? +// http://www.unicode.org/faq/emoji_dingbats.html +// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" +) + +type entry struct { + jisCode, table int +} + +func main() { + fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") + fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n") + fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n") + + reverse := [65536]entry{} + for i := range reverse { + reverse[i].table = -1 + } + + tables := []struct { + url string + name string + }{ + {"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"}, + {"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"}, + } + for i, table := range tables { + res, err := http.Get(table.url) + if err != nil { + log.Fatalf("%q: Get: %v", table.url, err) + } + defer res.Body.Close() + + mapping := [65536]uint16{} + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := 0, uint16(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("%q: could not parse %q", table.url, s) + } + if x < 0 || 120*94 <= x { + log.Fatalf("%q: JIS code %d is out of range", table.url, x) + } + mapping[x] = y + if reverse[y].table == -1 { + reverse[y] = entry{jisCode: x, table: i} + } + } + if err := scanner.Err(); err != nil { + log.Fatalf("%q: scanner error: %v", table.url, err) + } + + fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n", + table.name, table.name, table.url) + fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name) + for i, m := range mapping { + if m != 0 { + fmt.Printf("\t%d: 0x%04X,\n", i, m) + } + } + fmt.Printf("}\n\n") + } + + // Any run of at least separation continuous zero entries in the reverse map will + // be a separate encode table. + const separation = 1024 + + intervals := []interval(nil) + low, high := -1, -1 + for i, v := range reverse { + if v.table == -1 { + continue + } + if low < 0 { + low = i + } else if i-high >= separation { + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + low = i + } + high = i + 1 + } + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + sort.Sort(byDecreasingLength(intervals)) + + fmt.Printf("const (\n") + fmt.Printf("\tjis0208 = 1\n") + fmt.Printf("\tjis0212 = 2\n") + fmt.Printf("\tcodeMask = 0x7f\n") + fmt.Printf("\tcodeShift = 7\n") + fmt.Printf("\ttableShift = 14\n") + fmt.Printf(")\n\n") + + fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) + fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n") + fmt.Printf("// sorted by decreasing length.\n") + for i, v := range intervals { + fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) + } + fmt.Printf("//\n") + fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n") + fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n") + fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n") + fmt.Printf("// JIS code (94*j1 + j2) within that table.\n") + fmt.Printf("\n") + + for i, v := range intervals { + fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) + fmt.Printf("var encode%d = [...]uint16{\n", i) + for j := v.low; j < v.high; j++ { + x := reverse[j] + if x.table == -1 { + continue + } + fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n", + j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94) + } + fmt.Printf("}\n\n") + } +} + +// interval is a half-open interval [low, high). +type interval struct { + low, high int +} + +func (i interval) len() int { return i.high - i.low } + +// byDecreasingLength sorts intervals by decreasing length. +type byDecreasingLength []interval + +func (b byDecreasingLength) Len() int { return len(b) } +func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } +func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/korean/maketables.go b/vendor/golang.org/x/text/encoding/korean/maketables.go new file mode 100644 index 0000000000..c84034fb67 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/korean/maketables.go @@ -0,0 +1,143 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates tables.go: +// go run maketables.go | gofmt > tables.go + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" +) + +func main() { + fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") + fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n") + fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n") + + res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt") + if err != nil { + log.Fatalf("Get: %v", err) + } + defer res.Body.Close() + + mapping := [65536]uint16{} + reverse := [65536]uint16{} + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := uint16(0), uint16(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x { + log.Fatalf("EUC-KR code %d is out of range", x) + } + mapping[x] = y + if reverse[y] == 0 { + c0, c1 := uint16(0), uint16(0) + if x < 178*(0xc7-0x81) { + c0 = uint16(x/178) + 0x81 + c1 = uint16(x % 178) + switch { + case c1 < 1*26: + c1 += 0x41 + case c1 < 2*26: + c1 += 0x47 + default: + c1 += 0x4d + } + } else { + x -= 178 * (0xc7 - 0x81) + c0 = uint16(x/94) + 0xc7 + c1 = uint16(x%94) + 0xa1 + } + reverse[y] = c0<<8 | c1 + } + } + if err := scanner.Err(); err != nil { + log.Fatalf("scanner error: %v", err) + } + + fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n") + fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n") + fmt.Printf("var decode = [...]uint16{\n") + for i, v := range mapping { + if v != 0 { + fmt.Printf("\t%d: 0x%04X,\n", i, v) + } + } + fmt.Printf("}\n\n") + + // Any run of at least separation continuous zero entries in the reverse map will + // be a separate encode table. + const separation = 1024 + + intervals := []interval(nil) + low, high := -1, -1 + for i, v := range reverse { + if v == 0 { + continue + } + if low < 0 { + low = i + } else if i-high >= separation { + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + low = i + } + high = i + 1 + } + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + sort.Sort(byDecreasingLength(intervals)) + + fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) + fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n") + fmt.Printf("// sorted by decreasing length.\n") + for i, v := range intervals { + fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) + } + fmt.Printf("\n") + + for i, v := range intervals { + fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) + fmt.Printf("var encode%d = [...]uint16{\n", i) + for j := v.low; j < v.high; j++ { + x := reverse[j] + if x == 0 { + continue + } + fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) + } + fmt.Printf("}\n\n") + } +} + +// interval is a half-open interval [low, high). +type interval struct { + low, high int +} + +func (i interval) len() int { return i.high - i.low } + +// byDecreasingLength sorts intervals by decreasing length. +type byDecreasingLength []interval + +func (b byDecreasingLength) Len() int { return len(b) } +func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } +func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go new file mode 100644 index 0000000000..55016c7862 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go @@ -0,0 +1,161 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates tables.go: +// go run maketables.go | gofmt > tables.go + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" +) + +func main() { + fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") + fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n") + fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n") + + printGB18030() + printGBK() +} + +func printGB18030() { + res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt") + if err != nil { + log.Fatalf("Get: %v", err) + } + defer res.Body.Close() + + fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n") + fmt.Printf("var gb18030 = [...][2]uint16{\n") + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := uint32(0), uint32(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0x10000 && y < 0x10000 { + fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y) + } + } + fmt.Printf("}\n\n") +} + +func printGBK() { + res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt") + if err != nil { + log.Fatalf("Get: %v", err) + } + defer res.Body.Close() + + mapping := [65536]uint16{} + reverse := [65536]uint16{} + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := uint16(0), uint16(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0 || 126*190 <= x { + log.Fatalf("GBK code %d is out of range", x) + } + mapping[x] = y + if reverse[y] == 0 { + c0, c1 := x/190, x%190 + if c1 >= 0x3f { + c1++ + } + reverse[y] = (0x81+c0)<<8 | (0x40 + c1) + } + } + if err := scanner.Err(); err != nil { + log.Fatalf("scanner error: %v", err) + } + + fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n") + fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n") + fmt.Printf("var decode = [...]uint16{\n") + for i, v := range mapping { + if v != 0 { + fmt.Printf("\t%d: 0x%04X,\n", i, v) + } + } + fmt.Printf("}\n\n") + + // Any run of at least separation continuous zero entries in the reverse map will + // be a separate encode table. + const separation = 1024 + + intervals := []interval(nil) + low, high := -1, -1 + for i, v := range reverse { + if v == 0 { + continue + } + if low < 0 { + low = i + } else if i-high >= separation { + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + low = i + } + high = i + 1 + } + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + sort.Sort(byDecreasingLength(intervals)) + + fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) + fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n") + fmt.Printf("// sorted by decreasing length.\n") + for i, v := range intervals { + fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) + } + fmt.Printf("\n") + + for i, v := range intervals { + fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) + fmt.Printf("var encode%d = [...]uint16{\n", i) + for j := v.low; j < v.high; j++ { + x := reverse[j] + if x == 0 { + continue + } + fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) + } + fmt.Printf("}\n\n") + } +} + +// interval is a half-open interval [low, high). +type interval struct { + low, high int +} + +func (i interval) len() int { return i.high - i.low } + +// byDecreasingLength sorts intervals by decreasing length. +type byDecreasingLength []interval + +func (b byDecreasingLength) Len() int { return len(b) } +func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } +func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go new file mode 100644 index 0000000000..cf7fdb31a5 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go @@ -0,0 +1,140 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates tables.go: +// go run maketables.go | gofmt > tables.go + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" +) + +func main() { + fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") + fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n") + fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n") + + res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt") + if err != nil { + log.Fatalf("Get: %v", err) + } + defer res.Body.Close() + + mapping := [65536]uint32{} + reverse := [65536 * 4]uint16{} + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := uint16(0), uint32(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0 || 126*157 <= x { + log.Fatalf("Big5 code %d is out of range", x) + } + mapping[x] = y + + // The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that + // "The index pointer for code point in index is the first pointer + // corresponding to code point in index", which would normally mean + // that the code below should be guarded by "if reverse[y] == 0", but + // last instead of first seems to match the behavior of + // "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in + // http://encoding.spec.whatwg.org/index-big5.txt, as index 2148 + // (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc") + // and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc". + c0, c1 := x/157, x%157 + if c1 < 0x3f { + c1 += 0x40 + } else { + c1 += 0x62 + } + reverse[y] = (0x81+c0)<<8 | c1 + } + if err := scanner.Err(); err != nil { + log.Fatalf("scanner error: %v", err) + } + + fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n") + fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n") + fmt.Printf("var decode = [...]uint32{\n") + for i, v := range mapping { + if v != 0 { + fmt.Printf("\t%d: 0x%08X,\n", i, v) + } + } + fmt.Printf("}\n\n") + + // Any run of at least separation continuous zero entries in the reverse map will + // be a separate encode table. + const separation = 1024 + + intervals := []interval(nil) + low, high := -1, -1 + for i, v := range reverse { + if v == 0 { + continue + } + if low < 0 { + low = i + } else if i-high >= separation { + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + low = i + } + high = i + 1 + } + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + sort.Sort(byDecreasingLength(intervals)) + + fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) + fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n") + fmt.Printf("// sorted by decreasing length.\n") + for i, v := range intervals { + fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high) + } + fmt.Printf("\n") + + for i, v := range intervals { + fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) + fmt.Printf("var encode%d = [...]uint16{\n", i) + for j := v.low; j < v.high; j++ { + x := reverse[j] + if x == 0 { + continue + } + fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) + } + fmt.Printf("}\n\n") + } +} + +// interval is a half-open interval [low, high). +type interval struct { + low, high int +} + +func (i interval) len() int { return i.high - i.low } + +// byDecreasingLength sorts intervals by decreasing length. +type byDecreasingLength []interval + +func (b byDecreasingLength) Len() int { return len(b) } +func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } +func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/internal/gen/code.go b/vendor/golang.org/x/text/internal/gen/code.go new file mode 100644 index 0000000000..d7031b6945 --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/code.go @@ -0,0 +1,351 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gen + +import ( + "bytes" + "encoding/gob" + "fmt" + "hash" + "hash/fnv" + "io" + "log" + "os" + "reflect" + "strings" + "unicode" + "unicode/utf8" +) + +// This file contains utilities for generating code. + +// TODO: other write methods like: +// - slices, maps, types, etc. + +// CodeWriter is a utility for writing structured code. It computes the content +// hash and size of written content. It ensures there are newlines between +// written code blocks. +type CodeWriter struct { + buf bytes.Buffer + Size int + Hash hash.Hash32 // content hash + gob *gob.Encoder + // For comments we skip the usual one-line separator if they are followed by + // a code block. + skipSep bool +} + +func (w *CodeWriter) Write(p []byte) (n int, err error) { + return w.buf.Write(p) +} + +// NewCodeWriter returns a new CodeWriter. +func NewCodeWriter() *CodeWriter { + h := fnv.New32() + return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)} +} + +// WriteGoFile appends the buffer with the total size of all created structures +// and writes it as a Go file to the the given file with the given package name. +func (w *CodeWriter) WriteGoFile(filename, pkg string) { + f, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer f.Close() + if _, err = w.WriteGo(f, pkg); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteGo appends the buffer with the total size of all created structures and +// writes it as a Go file to the the given writer with the given package name. +func (w *CodeWriter) WriteGo(out io.Writer, pkg string) (n int, err error) { + sz := w.Size + w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32()) + defer w.buf.Reset() + return WriteGo(out, pkg, w.buf.Bytes()) +} + +func (w *CodeWriter) printf(f string, x ...interface{}) { + fmt.Fprintf(w, f, x...) +} + +func (w *CodeWriter) insertSep() { + if w.skipSep { + w.skipSep = false + return + } + // Use at least two newlines to ensure a blank space between the previous + // block. WriteGoFile will remove extraneous newlines. + w.printf("\n\n") +} + +// WriteComment writes a comment block. All line starts are prefixed with "//". +// Initial empty lines are gobbled. The indentation for the first line is +// stripped from consecutive lines. +func (w *CodeWriter) WriteComment(comment string, args ...interface{}) { + s := fmt.Sprintf(comment, args...) + s = strings.Trim(s, "\n") + + // Use at least two newlines to ensure a blank space between the previous + // block. WriteGoFile will remove extraneous newlines. + w.printf("\n\n// ") + w.skipSep = true + + // strip first indent level. + sep := "\n" + for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] { + sep += s[:1] + } + + strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s) + + w.printf("\n") +} + +func (w *CodeWriter) writeSizeInfo(size int) { + w.printf("// Size: %d bytes\n", size) +} + +// WriteConst writes a constant of the given name and value. +func (w *CodeWriter) WriteConst(name string, x interface{}) { + w.insertSep() + v := reflect.ValueOf(x) + + switch v.Type().Kind() { + case reflect.String: + w.printf("const %s %s = ", name, typeName(x)) + w.WriteString(v.String()) + w.printf("\n") + default: + w.printf("const %s = %#v\n", name, x) + } +} + +// WriteVar writes a variable of the given name and value. +func (w *CodeWriter) WriteVar(name string, x interface{}) { + w.insertSep() + v := reflect.ValueOf(x) + oldSize := w.Size + sz := int(v.Type().Size()) + w.Size += sz + + switch v.Type().Kind() { + case reflect.String: + w.printf("var %s %s = ", name, typeName(x)) + w.WriteString(v.String()) + case reflect.Struct: + w.gob.Encode(x) + fallthrough + case reflect.Slice, reflect.Array: + w.printf("var %s = ", name) + w.writeValue(v) + w.writeSizeInfo(w.Size - oldSize) + default: + w.printf("var %s %s = ", name, typeName(x)) + w.gob.Encode(x) + w.writeValue(v) + w.writeSizeInfo(w.Size - oldSize) + } + w.printf("\n") +} + +func (w *CodeWriter) writeValue(v reflect.Value) { + x := v.Interface() + switch v.Kind() { + case reflect.String: + w.WriteString(v.String()) + case reflect.Array: + // Don't double count: callers of WriteArray count on the size being + // added, so we need to discount it here. + w.Size -= int(v.Type().Size()) + w.writeSlice(x, true) + case reflect.Slice: + w.writeSlice(x, false) + case reflect.Struct: + w.printf("%s{\n", typeName(v.Interface())) + t := v.Type() + for i := 0; i < v.NumField(); i++ { + w.printf("%s: ", t.Field(i).Name) + w.writeValue(v.Field(i)) + w.printf(",\n") + } + w.printf("}") + default: + w.printf("%#v", x) + } +} + +// WriteString writes a string literal. +func (w *CodeWriter) WriteString(s string) { + s = strings.Replace(s, `\`, `\\`, -1) + io.WriteString(w.Hash, s) // content hash + w.Size += len(s) + + const maxInline = 40 + if len(s) <= maxInline { + w.printf("%q", s) + return + } + + // We will render the string as a multi-line string. + const maxWidth = 80 - 4 - len(`"`) - len(`" +`) + + // When starting on its own line, go fmt indents line 2+ an extra level. + n, max := maxWidth, maxWidth-4 + + // As per https://golang.org/issue/18078, the compiler has trouble + // compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN, + // for large N. We insert redundant, explicit parentheses to work around + // that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 + + // ... + s127) + etc + (etc + ... + sN). + explicitParens, extraComment := len(s) > 128*1024, "" + if explicitParens { + w.printf(`(`) + extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078" + } + + // Print "" +\n, if a string does not start on its own line. + b := w.buf.Bytes() + if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' { + w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment) + n, max = maxWidth, maxWidth + } + + w.printf(`"`) + + for sz, p, nLines := 0, 0, 0; p < len(s); { + var r rune + r, sz = utf8.DecodeRuneInString(s[p:]) + out := s[p : p+sz] + chars := 1 + if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' { + switch sz { + case 1: + out = fmt.Sprintf("\\x%02x", s[p]) + case 2, 3: + out = fmt.Sprintf("\\u%04x", r) + case 4: + out = fmt.Sprintf("\\U%08x", r) + } + chars = len(out) + } + if n -= chars; n < 0 { + nLines++ + if explicitParens && nLines&63 == 63 { + w.printf("\") + (\"") + } + w.printf("\" +\n\"") + n = max - len(out) + } + w.printf("%s", out) + p += sz + } + w.printf(`"`) + if explicitParens { + w.printf(`)`) + } +} + +// WriteSlice writes a slice value. +func (w *CodeWriter) WriteSlice(x interface{}) { + w.writeSlice(x, false) +} + +// WriteArray writes an array value. +func (w *CodeWriter) WriteArray(x interface{}) { + w.writeSlice(x, true) +} + +func (w *CodeWriter) writeSlice(x interface{}, isArray bool) { + v := reflect.ValueOf(x) + w.gob.Encode(v.Len()) + w.Size += v.Len() * int(v.Type().Elem().Size()) + name := typeName(x) + if isArray { + name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:]) + } + if isArray { + w.printf("%s{\n", name) + } else { + w.printf("%s{ // %d elements\n", name, v.Len()) + } + + switch kind := v.Type().Elem().Kind(); kind { + case reflect.String: + for _, s := range x.([]string) { + w.WriteString(s) + w.printf(",\n") + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + // nLine and nBlock are the number of elements per line and block. + nLine, nBlock, format := 8, 64, "%d," + switch kind { + case reflect.Uint8: + format = "%#02x," + case reflect.Uint16: + format = "%#04x," + case reflect.Uint32: + nLine, nBlock, format = 4, 32, "%#08x," + case reflect.Uint, reflect.Uint64: + nLine, nBlock, format = 4, 32, "%#016x," + case reflect.Int8: + nLine = 16 + } + n := nLine + for i := 0; i < v.Len(); i++ { + if i%nBlock == 0 && v.Len() > nBlock { + w.printf("// Entry %X - %X\n", i, i+nBlock-1) + } + x := v.Index(i).Interface() + w.gob.Encode(x) + w.printf(format, x) + if n--; n == 0 { + n = nLine + w.printf("\n") + } + } + w.printf("\n") + case reflect.Struct: + zero := reflect.Zero(v.Type().Elem()).Interface() + for i := 0; i < v.Len(); i++ { + x := v.Index(i).Interface() + w.gob.EncodeValue(v) + if !reflect.DeepEqual(zero, x) { + line := fmt.Sprintf("%#v,\n", x) + line = line[strings.IndexByte(line, '{'):] + w.printf("%d: ", i) + w.printf(line) + } + } + case reflect.Array: + for i := 0; i < v.Len(); i++ { + w.printf("%d: %#v,\n", i, v.Index(i).Interface()) + } + default: + panic("gen: slice elem type not supported") + } + w.printf("}") +} + +// WriteType writes a definition of the type of the given value and returns the +// type name. +func (w *CodeWriter) WriteType(x interface{}) string { + t := reflect.TypeOf(x) + w.printf("type %s struct {\n", t.Name()) + for i := 0; i < t.NumField(); i++ { + w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type) + } + w.printf("}\n") + return t.Name() +} + +// typeName returns the name of the go type of x. +func typeName(x interface{}) string { + t := reflect.ValueOf(x).Type() + return strings.Replace(fmt.Sprint(t), "main.", "", 1) +} diff --git a/vendor/golang.org/x/text/internal/gen/gen.go b/vendor/golang.org/x/text/internal/gen/gen.go new file mode 100644 index 0000000000..2acb0355a2 --- /dev/null +++ b/vendor/golang.org/x/text/internal/gen/gen.go @@ -0,0 +1,281 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gen contains common code for the various code generation tools in the +// text repository. Its usage ensures consistency between tools. +// +// This package defines command line flags that are common to most generation +// tools. The flags allow for specifying specific Unicode and CLDR versions +// in the public Unicode data repository (http://www.unicode.org/Public). +// +// A local Unicode data mirror can be set through the flag -local or the +// environment variable UNICODE_DIR. The former takes precedence. The local +// directory should follow the same structure as the public repository. +// +// IANA data can also optionally be mirrored by putting it in the iana directory +// rooted at the top of the local mirror. Beware, though, that IANA data is not +// versioned. So it is up to the developer to use the right version. +package gen // import "golang.org/x/text/internal/gen" + +import ( + "bytes" + "flag" + "fmt" + "go/build" + "go/format" + "io" + "io/ioutil" + "log" + "net/http" + "os" + "path" + "path/filepath" + "sync" + "unicode" + + "golang.org/x/text/unicode/cldr" +) + +var ( + url = flag.String("url", + "http://www.unicode.org/Public", + "URL of Unicode database directory") + iana = flag.String("iana", + "http://www.iana.org", + "URL of the IANA repository") + unicodeVersion = flag.String("unicode", + getEnv("UNICODE_VERSION", unicode.Version), + "unicode version to use") + cldrVersion = flag.String("cldr", + getEnv("CLDR_VERSION", cldr.Version), + "cldr version to use") +) + +func getEnv(name, def string) string { + if v := os.Getenv(name); v != "" { + return v + } + return def +} + +// Init performs common initialization for a gen command. It parses the flags +// and sets up the standard logging parameters. +func Init() { + log.SetPrefix("") + log.SetFlags(log.Lshortfile) + flag.Parse() +} + +const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. + +package %s + +` + +// UnicodeVersion reports the requested Unicode version. +func UnicodeVersion() string { + return *unicodeVersion +} + +// UnicodeVersion reports the requested CLDR version. +func CLDRVersion() string { + return *cldrVersion +} + +// IsLocal reports whether data files are available locally. +func IsLocal() bool { + dir, err := localReadmeFile() + if err != nil { + return false + } + if _, err = os.Stat(dir); err != nil { + return false + } + return true +} + +// OpenUCDFile opens the requested UCD file. The file is specified relative to +// the public Unicode root directory. It will call log.Fatal if there are any +// errors. +func OpenUCDFile(file string) io.ReadCloser { + return openUnicode(path.Join(*unicodeVersion, "ucd", file)) +} + +// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there +// are any errors. +func OpenCLDRCoreZip() io.ReadCloser { + return OpenUnicodeFile("cldr", *cldrVersion, "core.zip") +} + +// OpenUnicodeFile opens the requested file of the requested category from the +// root of the Unicode data archive. The file is specified relative to the +// public Unicode root directory. If version is "", it will use the default +// Unicode version. It will call log.Fatal if there are any errors. +func OpenUnicodeFile(category, version, file string) io.ReadCloser { + if version == "" { + version = UnicodeVersion() + } + return openUnicode(path.Join(category, version, file)) +} + +// OpenIANAFile opens the requested IANA file. The file is specified relative +// to the IANA root, which is typically either http://www.iana.org or the +// iana directory in the local mirror. It will call log.Fatal if there are any +// errors. +func OpenIANAFile(path string) io.ReadCloser { + return Open(*iana, "iana", path) +} + +var ( + dirMutex sync.Mutex + localDir string +) + +const permissions = 0755 + +func localReadmeFile() (string, error) { + p, err := build.Import("golang.org/x/text", "", build.FindOnly) + if err != nil { + return "", fmt.Errorf("Could not locate package: %v", err) + } + return filepath.Join(p.Dir, "DATA", "README"), nil +} + +func getLocalDir() string { + dirMutex.Lock() + defer dirMutex.Unlock() + + readme, err := localReadmeFile() + if err != nil { + log.Fatal(err) + } + dir := filepath.Dir(readme) + if _, err := os.Stat(readme); err != nil { + if err := os.MkdirAll(dir, permissions); err != nil { + log.Fatalf("Could not create directory: %v", err) + } + ioutil.WriteFile(readme, []byte(readmeTxt), permissions) + } + return dir +} + +const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT. + +This directory contains downloaded files used to generate the various tables +in the golang.org/x/text subrepo. + +Note that the language subtag repo (iana/assignments/language-subtag-registry) +and all other times in the iana subdirectory are not versioned and will need +to be periodically manually updated. The easiest way to do this is to remove +the entire iana directory. This is mostly of concern when updating the language +package. +` + +// Open opens subdir/path if a local directory is specified and the file exists, +// where subdir is a directory relative to the local root, or fetches it from +// urlRoot/path otherwise. It will call log.Fatal if there are any errors. +func Open(urlRoot, subdir, path string) io.ReadCloser { + file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path)) + return open(file, urlRoot, path) +} + +func openUnicode(path string) io.ReadCloser { + file := filepath.Join(getLocalDir(), filepath.FromSlash(path)) + return open(file, *url, path) +} + +// TODO: automatically periodically update non-versioned files. + +func open(file, urlRoot, path string) io.ReadCloser { + if f, err := os.Open(file); err == nil { + return f + } + r := get(urlRoot, path) + defer r.Close() + b, err := ioutil.ReadAll(r) + if err != nil { + log.Fatalf("Could not download file: %v", err) + } + os.MkdirAll(filepath.Dir(file), permissions) + if err := ioutil.WriteFile(file, b, permissions); err != nil { + log.Fatalf("Could not create file: %v", err) + } + return ioutil.NopCloser(bytes.NewReader(b)) +} + +func get(root, path string) io.ReadCloser { + url := root + "/" + path + fmt.Printf("Fetching %s...", url) + defer fmt.Println(" done.") + resp, err := http.Get(url) + if err != nil { + log.Fatalf("HTTP GET: %v", err) + } + if resp.StatusCode != 200 { + log.Fatalf("Bad GET status for %q: %q", url, resp.Status) + } + return resp.Body +} + +// TODO: use Write*Version in all applicable packages. + +// WriteUnicodeVersion writes a constant for the Unicode version from which the +// tables are generated. +func WriteUnicodeVersion(w io.Writer) { + fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n") + fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion()) +} + +// WriteCLDRVersion writes a constant for the CLDR version from which the +// tables are generated. +func WriteCLDRVersion(w io.Writer) { + fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n") + fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion()) +} + +// WriteGoFile prepends a standard file comment and package statement to the +// given bytes, applies gofmt, and writes them to a file with the given name. +// It will call log.Fatal if there are any errors. +func WriteGoFile(filename, pkg string, b []byte) { + w, err := os.Create(filename) + if err != nil { + log.Fatalf("Could not create file %s: %v", filename, err) + } + defer w.Close() + if _, err = WriteGo(w, pkg, b); err != nil { + log.Fatalf("Error writing file %s: %v", filename, err) + } +} + +// WriteGo prepends a standard file comment and package statement to the given +// bytes, applies gofmt, and writes them to w. +func WriteGo(w io.Writer, pkg string, b []byte) (n int, err error) { + src := []byte(fmt.Sprintf(header, pkg)) + src = append(src, b...) + formatted, err := format.Source(src) + if err != nil { + // Print the generated code even in case of an error so that the + // returned error can be meaningfully interpreted. + n, _ = w.Write(src) + return n, err + } + return w.Write(formatted) +} + +// Repackage rewrites a Go file from belonging to package main to belonging to +// the given package. +func Repackage(inFile, outFile, pkg string) { + src, err := ioutil.ReadFile(inFile) + if err != nil { + log.Fatalf("reading %s: %v", inFile, err) + } + const toDelete = "package main\n\n" + i := bytes.Index(src, []byte(toDelete)) + if i < 0 { + log.Fatalf("Could not find %q in %s.", toDelete, inFile) + } + w := &bytes.Buffer{} + w.Write(src[i+len(toDelete):]) + WriteGoFile(outFile, pkg, w.Bytes()) +} diff --git a/vendor/golang.org/x/text/internal/triegen/compact.go b/vendor/golang.org/x/text/internal/triegen/compact.go new file mode 100644 index 0000000000..397b975c1b --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/compact.go @@ -0,0 +1,58 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen + +// This file defines Compacter and its implementations. + +import "io" + +// A Compacter generates an alternative, more space-efficient way to store a +// trie value block. A trie value block holds all possible values for the last +// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block +// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0). +type Compacter interface { + // Size returns whether the Compacter could encode the given block as well + // as its size in case it can. len(v) is always 64. + Size(v []uint64) (sz int, ok bool) + + // Store stores the block using the Compacter's compression method. + // It returns a handle with which the block can be retrieved. + // len(v) is always 64. + Store(v []uint64) uint32 + + // Print writes the data structures associated to the given store to w. + Print(w io.Writer) error + + // Handler returns the name of a function that gets called during trie + // lookup for blocks generated by the Compacter. The function should be of + // the form func (n uint32, b byte) uint64, where n is the index returned by + // the Compacter's Store method and b is the last byte of the UTF-8 + // encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the + // block. + Handler() string +} + +// simpleCompacter is the default Compacter used by builder. It implements a +// normal trie block. +type simpleCompacter builder + +func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) { + return blockSize * b.ValueSize, true +} + +func (b *simpleCompacter) Store(v []uint64) uint32 { + h := uint32(len(b.ValueBlocks) - blockOffset) + b.ValueBlocks = append(b.ValueBlocks, v) + return h +} + +func (b *simpleCompacter) Print(io.Writer) error { + // Structures are printed in print.go. + return nil +} + +func (b *simpleCompacter) Handler() string { + panic("Handler should be special-cased for this Compacter") +} diff --git a/vendor/golang.org/x/text/internal/triegen/print.go b/vendor/golang.org/x/text/internal/triegen/print.go new file mode 100644 index 0000000000..8d9f120bcd --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/print.go @@ -0,0 +1,251 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package triegen + +import ( + "bytes" + "fmt" + "io" + "strings" + "text/template" +) + +// print writes all the data structures as well as the code necessary to use the +// trie to w. +func (b *builder) print(w io.Writer) error { + b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize + b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize + b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize + b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize + b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize + + // If we only have one root trie, all starter blocks are at position 0 and + // we can access the arrays directly. + if len(b.Trie) == 1 { + // At this point we cannot refer to the generated tables directly. + b.ASCIIBlock = b.Name + "Values" + b.StarterBlock = b.Name + "Index" + } else { + // Otherwise we need to have explicit starter indexes in the trie + // structure. + b.ASCIIBlock = "t.ascii" + b.StarterBlock = "t.utf8Start" + } + + b.SourceType = "[]byte" + if err := lookupGen.Execute(w, b); err != nil { + return err + } + + b.SourceType = "string" + if err := lookupGen.Execute(w, b); err != nil { + return err + } + + if err := trieGen.Execute(w, b); err != nil { + return err + } + + for _, c := range b.Compactions { + if err := c.c.Print(w); err != nil { + return err + } + } + + return nil +} + +func printValues(n int, values []uint64) string { + w := &bytes.Buffer{} + boff := n * blockSize + fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff) + var newline bool + for i, v := range values { + if i%6 == 0 { + newline = true + } + if v != 0 { + if newline { + fmt.Fprintf(w, "\n") + newline = false + } + fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v) + } + } + return w.String() +} + +func printIndex(b *builder, nr int, n *node) string { + w := &bytes.Buffer{} + boff := nr * blockSize + fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff) + var newline bool + for i, c := range n.children { + if i%8 == 0 { + newline = true + } + if c != nil { + v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index) + if v != 0 { + if newline { + fmt.Fprintf(w, "\n") + newline = false + } + fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v) + } + } + } + return w.String() +} + +var ( + trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{ + "printValues": printValues, + "printIndex": printIndex, + "title": strings.Title, + "dec": func(x int) int { return x - 1 }, + "psize": func(n int) string { + return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024) + }, + }).Parse(trieTemplate)) + lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate)) +) + +// TODO: consider the return type of lookup. It could be uint64, even if the +// internal value type is smaller. We will have to verify this with the +// performance of unicode/norm, which is very sensitive to such changes. +const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}} +// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}. +type {{.Name}}Trie struct { {{if $multi}} + ascii []{{.ValueType}} // index for ASCII bytes + utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0 +{{end}}} + +func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}} + h := {{.Name}}TrieHandles[i] + return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] } +} + +type {{.Name}}TrieHandle struct { + ascii, multi {{.IndexType}} +} + +// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes +var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{ +{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}} +{{end}}}{{else}} + return &{{.Name}}Trie{} +} +{{end}} +// lookupValue determines the type of block n and looks up the value for b. +func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} { + switch { {{range $i, $c := .Compactions}} + {{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}} + n -= {{$c.Offset}}{{end}} + return {{print $b.ValueType}}({{$c.Handler}}){{end}} + } +} + +// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes +// The third block is the zero block. +var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} { +{{range $i, $v := .ValueBlocks}}{{printValues $i $v}} +{{end}}} + +// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes +// Block 0 is the zero block. +var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} { +{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}} +{{end}}} +` + +// TODO: consider allowing zero-length strings after evaluating performance with +// unicode/norm. +const lookupTemplate = ` +// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) { + c0 := s[0] + switch { + case c0 < 0x80: // is ASCII + return {{.ASCIIBlock}}[c0], 1 + case c0 < 0xC2: + return 0, 1 // Illegal UTF-8: not a starter, not ASCII. + case c0 < 0xE0: // 2-byte UTF-8 + if len(s) < 2 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c1), 2 + case c0 < 0xF0: // 3-byte UTF-8 + if len(s) < 3 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = {{.Name}}Index[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c2), 3 + case c0 < 0xF8: // 4-byte UTF-8 + if len(s) < 4 { + return 0, 0 + } + i := {{.StarterBlock}}[c0] + c1 := s[1] + if c1 < 0x80 || 0xC0 <= c1 { + return 0, 1 // Illegal UTF-8: not a continuation byte. + } + o := uint32(i)<<6 + uint32(c1) + i = {{.Name}}Index[o] + c2 := s[2] + if c2 < 0x80 || 0xC0 <= c2 { + return 0, 2 // Illegal UTF-8: not a continuation byte. + } + o = uint32(i)<<6 + uint32(c2) + i = {{.Name}}Index[o] + c3 := s[3] + if c3 < 0x80 || 0xC0 <= c3 { + return 0, 3 // Illegal UTF-8: not a continuation byte. + } + return t.lookupValue(uint32(i), c3), 4 + } + // Illegal rune + return 0, 1 +} + +// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s. +// s must start with a full and valid UTF-8 encoded rune. +func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} { + c0 := s[0] + if c0 < 0x80 { // is ASCII + return {{.ASCIIBlock}}[c0] + } + i := {{.StarterBlock}}[c0] + if c0 < 0xE0 { // 2-byte UTF-8 + return t.lookupValue(uint32(i), s[1]) + } + i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])] + if c0 < 0xF0 { // 3-byte UTF-8 + return t.lookupValue(uint32(i), s[2]) + } + i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])] + if c0 < 0xF8 { // 4-byte UTF-8 + return t.lookupValue(uint32(i), s[3]) + } + return 0 +} +` diff --git a/vendor/golang.org/x/text/internal/triegen/triegen.go b/vendor/golang.org/x/text/internal/triegen/triegen.go new file mode 100644 index 0000000000..adb0108124 --- /dev/null +++ b/vendor/golang.org/x/text/internal/triegen/triegen.go @@ -0,0 +1,494 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package triegen implements a code generator for a trie for associating +// unsigned integer values with UTF-8 encoded runes. +// +// Many of the go.text packages use tries for storing per-rune information. A +// trie is especially useful if many of the runes have the same value. If this +// is the case, many blocks can be expected to be shared allowing for +// information on many runes to be stored in little space. +// +// As most of the lookups are done directly on []byte slices, the tries use the +// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to +// runes and contributes a little bit to better performance. It also naturally +// provides a fast path for ASCII. +// +// Space is also an issue. There are many code points defined in Unicode and as +// a result tables can get quite large. So every byte counts. The triegen +// package automatically chooses the smallest integer values to represent the +// tables. Compacters allow further compression of the trie by allowing for +// alternative representations of individual trie blocks. +// +// triegen allows generating multiple tries as a single structure. This is +// useful when, for example, one wants to generate tries for several languages +// that have a lot of values in common. Some existing libraries for +// internationalization store all per-language data as a dynamically loadable +// chunk. The go.text packages are designed with the assumption that the user +// typically wants to compile in support for all supported languages, in line +// with the approach common to Go to create a single standalone binary. The +// multi-root trie approach can give significant storage savings in this +// scenario. +// +// triegen generates both tables and code. The code is optimized to use the +// automatically chosen data types. The following code is generated for a Trie +// or multiple Tries named "foo": +// - type fooTrie +// The trie type. +// +// - func newFooTrie(x int) *fooTrie +// Trie constructor, where x is the index of the trie passed to Gen. +// +// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) +// The lookup method, where uintX is automatically chosen. +// +// - func lookupString, lookupUnsafe and lookupStringUnsafe +// Variants of the above. +// +// - var fooValues and fooIndex and any tables generated by Compacters. +// The core trie data. +// +// - var fooTrieHandles +// Indexes of starter blocks in case of multiple trie roots. +// +// It is recommended that users test the generated trie by checking the returned +// value for every rune. Such exhaustive tests are possible as the the number of +// runes in Unicode is limited. +package triegen // import "golang.org/x/text/internal/triegen" + +// TODO: Arguably, the internally optimized data types would not have to be +// exposed in the generated API. We could also investigate not generating the +// code, but using it through a package. We would have to investigate the impact +// on performance of making such change, though. For packages like unicode/norm, +// small changes like this could tank performance. + +import ( + "encoding/binary" + "fmt" + "hash/crc64" + "io" + "log" + "unicode/utf8" +) + +// builder builds a set of tries for associating values with runes. The set of +// tries can share common index and value blocks. +type builder struct { + Name string + + // ValueType is the type of the trie values looked up. + ValueType string + + // ValueSize is the byte size of the ValueType. + ValueSize int + + // IndexType is the type of trie index values used for all UTF-8 bytes of + // a rune except the last one. + IndexType string + + // IndexSize is the byte size of the IndexType. + IndexSize int + + // SourceType is used when generating the lookup functions. If the user + // requests StringSupport, all lookup functions will be generated for + // string input as well. + SourceType string + + Trie []*Trie + + IndexBlocks []*node + ValueBlocks [][]uint64 + Compactions []compaction + Checksum uint64 + + ASCIIBlock string + StarterBlock string + + indexBlockIdx map[uint64]int + valueBlockIdx map[uint64]nodeIndex + asciiBlockIdx map[uint64]int + + // Stats are used to fill out the template. + Stats struct { + NValueEntries int + NValueBytes int + NIndexEntries int + NIndexBytes int + NHandleBytes int + } + + err error +} + +// A nodeIndex encodes the index of a node, which is defined by the compaction +// which stores it and an index within the compaction. For internal nodes, the +// compaction is always 0. +type nodeIndex struct { + compaction int + index int +} + +// compaction keeps track of stats used for the compaction. +type compaction struct { + c Compacter + blocks []*node + maxHandle uint32 + totalSize int + + // Used by template-based generator and thus exported. + Cutoff uint32 + Offset uint32 + Handler string +} + +func (b *builder) setError(err error) { + if b.err == nil { + b.err = err + } +} + +// An Option can be passed to Gen. +type Option func(b *builder) error + +// Compact configures the trie generator to use the given Compacter. +func Compact(c Compacter) Option { + return func(b *builder) error { + b.Compactions = append(b.Compactions, compaction{ + c: c, + Handler: c.Handler() + "(n, b)"}) + return nil + } +} + +// Gen writes Go code for a shared trie lookup structure to w for the given +// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will +// return the *nameTrie for tries[x]. A value can be looked up by using one of +// the various lookup methods defined on nameTrie. It returns the table size of +// the generated trie. +func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) { + // The index contains two dummy blocks, followed by the zero block. The zero + // block is at offset 0x80, so that the offset for the zero block for + // continuation bytes is 0. + b := &builder{ + Name: name, + Trie: tries, + IndexBlocks: []*node{{}, {}, {}}, + Compactions: []compaction{{ + Handler: name + "Values[n<<6+uint32(b)]", + }}, + // The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero + // block. + indexBlockIdx: map[uint64]int{0: 0}, + valueBlockIdx: map[uint64]nodeIndex{0: {}}, + asciiBlockIdx: map[uint64]int{}, + } + b.Compactions[0].c = (*simpleCompacter)(b) + + for _, f := range opts { + if err := f(b); err != nil { + return 0, err + } + } + b.build() + if b.err != nil { + return 0, b.err + } + if err = b.print(w); err != nil { + return 0, err + } + return b.Size(), nil +} + +// A Trie represents a single root node of a trie. A builder may build several +// overlapping tries at once. +type Trie struct { + root *node + + hiddenTrie +} + +// hiddenTrie contains values we want to be visible to the template generator, +// but hidden from the API documentation. +type hiddenTrie struct { + Name string + Checksum uint64 + ASCIIIndex int + StarterIndex int +} + +// NewTrie returns a new trie root. +func NewTrie(name string) *Trie { + return &Trie{ + &node{ + children: make([]*node, blockSize), + values: make([]uint64, utf8.RuneSelf), + }, + hiddenTrie{Name: name}, + } +} + +// Gen is a convenience wrapper around the Gen func passing t as the only trie +// and uses the name passed to NewTrie. It returns the size of the generated +// tables. +func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) { + return Gen(w, t.Name, []*Trie{t}, opts...) +} + +// node is a node of the intermediate trie structure. +type node struct { + // children holds this node's children. It is always of length 64. + // A child node may be nil. + children []*node + + // values contains the values of this node. If it is non-nil, this node is + // either a root or leaf node: + // For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F]. + // For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF]. + values []uint64 + + index nodeIndex +} + +// Insert associates value with the given rune. Insert will panic if a non-zero +// value is passed for an invalid rune. +func (t *Trie) Insert(r rune, value uint64) { + if value == 0 { + return + } + s := string(r) + if []rune(s)[0] != r && value != 0 { + // Note: The UCD tables will always assign what amounts to a zero value + // to a surrogate. Allowing a zero value for an illegal rune allows + // users to iterate over [0..MaxRune] without having to explicitly + // exclude surrogates, which would be tedious. + panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r)) + } + if len(s) == 1 { + // It is a root node value (ASCII). + t.root.values[s[0]] = value + return + } + + n := t.root + for ; len(s) > 1; s = s[1:] { + if n.children == nil { + n.children = make([]*node, blockSize) + } + p := s[0] % blockSize + c := n.children[p] + if c == nil { + c = &node{} + n.children[p] = c + } + if len(s) > 2 && c.values != nil { + log.Fatalf("triegen: insert(%U): found internal node with values", r) + } + n = c + } + if n.values == nil { + n.values = make([]uint64, blockSize) + } + if n.children != nil { + log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r) + } + n.values[s[0]-0x80] = value +} + +// Size returns the number of bytes the generated trie will take to store. It +// needs to be exported as it is used in the templates. +func (b *builder) Size() int { + // Index blocks. + sz := len(b.IndexBlocks) * blockSize * b.IndexSize + + // Skip the first compaction, which represents the normal value blocks, as + // its totalSize does not account for the ASCII blocks, which are managed + // separately. + sz += len(b.ValueBlocks) * blockSize * b.ValueSize + for _, c := range b.Compactions[1:] { + sz += c.totalSize + } + + // TODO: this computation does not account for the fixed overhead of a using + // a compaction, either code or data. As for data, though, the typical + // overhead of data is in the order of bytes (2 bytes for cases). Further, + // the savings of using a compaction should anyway be substantial for it to + // be worth it. + + // For multi-root tries, we also need to account for the handles. + if len(b.Trie) > 1 { + sz += 2 * b.IndexSize * len(b.Trie) + } + return sz +} + +func (b *builder) build() { + // Compute the sizes of the values. + var vmax uint64 + for _, t := range b.Trie { + vmax = maxValue(t.root, vmax) + } + b.ValueType, b.ValueSize = getIntType(vmax) + + // Compute all block allocations. + // TODO: first compute the ASCII blocks for all tries and then the other + // nodes. ASCII blocks are more restricted in placement, as they require two + // blocks to be placed consecutively. Processing them first may improve + // sharing (at least one zero block can be expected to be saved.) + for _, t := range b.Trie { + b.Checksum += b.buildTrie(t) + } + + // Compute the offsets for all the Compacters. + offset := uint32(0) + for i := range b.Compactions { + c := &b.Compactions[i] + c.Offset = offset + offset += c.maxHandle + 1 + c.Cutoff = offset + } + + // Compute the sizes of indexes. + // TODO: different byte positions could have different sizes. So far we have + // not found a case where this is beneficial. + imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff) + for _, ib := range b.IndexBlocks { + if x := uint64(ib.index.index); x > imax { + imax = x + } + } + b.IndexType, b.IndexSize = getIntType(imax) +} + +func maxValue(n *node, max uint64) uint64 { + if n == nil { + return max + } + for _, c := range n.children { + max = maxValue(c, max) + } + for _, v := range n.values { + if max < v { + max = v + } + } + return max +} + +func getIntType(v uint64) (string, int) { + switch { + case v < 1<<8: + return "uint8", 1 + case v < 1<<16: + return "uint16", 2 + case v < 1<<32: + return "uint32", 4 + } + return "uint64", 8 +} + +const ( + blockSize = 64 + + // Subtract two blocks to offset 0x80, the first continuation byte. + blockOffset = 2 + + // Subtract three blocks to offset 0xC0, the first non-ASCII starter. + rootBlockOffset = 3 +) + +var crcTable = crc64.MakeTable(crc64.ISO) + +func (b *builder) buildTrie(t *Trie) uint64 { + n := t.root + + // Get the ASCII offset. For the first trie, the ASCII block will be at + // position 0. + hasher := crc64.New(crcTable) + binary.Write(hasher, binary.BigEndian, n.values) + hash := hasher.Sum64() + + v, ok := b.asciiBlockIdx[hash] + if !ok { + v = len(b.ValueBlocks) + b.asciiBlockIdx[hash] = v + + b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:]) + if v == 0 { + // Add the zero block at position 2 so that it will be assigned a + // zero reference in the lookup blocks. + // TODO: always do this? This would allow us to remove a check from + // the trie lookup, but at the expense of extra space. Analyze + // performance for unicode/norm. + b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize)) + } + } + t.ASCIIIndex = v + + // Compute remaining offsets. + t.Checksum = b.computeOffsets(n, true) + // We already subtracted the normal blockOffset from the index. Subtract the + // difference for starter bytes. + t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset) + return t.Checksum +} + +func (b *builder) computeOffsets(n *node, root bool) uint64 { + // For the first trie, the root lookup block will be at position 3, which is + // the offset for UTF-8 non-ASCII starter bytes. + first := len(b.IndexBlocks) == rootBlockOffset + if first { + b.IndexBlocks = append(b.IndexBlocks, n) + } + + // We special-case the cases where all values recursively are 0. This allows + // for the use of a zero block to which all such values can be directed. + hash := uint64(0) + if n.children != nil || n.values != nil { + hasher := crc64.New(crcTable) + for _, c := range n.children { + var v uint64 + if c != nil { + v = b.computeOffsets(c, false) + } + binary.Write(hasher, binary.BigEndian, v) + } + binary.Write(hasher, binary.BigEndian, n.values) + hash = hasher.Sum64() + } + + if first { + b.indexBlockIdx[hash] = rootBlockOffset - blockOffset + } + + // Compacters don't apply to internal nodes. + if n.children != nil { + v, ok := b.indexBlockIdx[hash] + if !ok { + v = len(b.IndexBlocks) - blockOffset + b.IndexBlocks = append(b.IndexBlocks, n) + b.indexBlockIdx[hash] = v + } + n.index = nodeIndex{0, v} + } else { + h, ok := b.valueBlockIdx[hash] + if !ok { + bestI, bestSize := 0, blockSize*b.ValueSize + for i, c := range b.Compactions[1:] { + if sz, ok := c.c.Size(n.values); ok && bestSize > sz { + bestI, bestSize = i+1, sz + } + } + c := &b.Compactions[bestI] + c.totalSize += bestSize + v := c.c.Store(n.values) + if c.maxHandle < v { + c.maxHandle = v + } + h = nodeIndex{bestI, int(v)} + b.valueBlockIdx[hash] = h + } + n.index = h + } + return hash +} diff --git a/vendor/golang.org/x/text/internal/ucd/ucd.go b/vendor/golang.org/x/text/internal/ucd/ucd.go new file mode 100644 index 0000000000..309e8d8b16 --- /dev/null +++ b/vendor/golang.org/x/text/internal/ucd/ucd.go @@ -0,0 +1,376 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package ucd provides a parser for Unicode Character Database files, the +// format of which is defined in http://www.unicode.org/reports/tr44/. See +// http://www.unicode.org/Public/UCD/latest/ucd/ for example files. +// +// It currently does not support substitutions of missing fields. +package ucd // import "golang.org/x/text/internal/ucd" + +import ( + "bufio" + "bytes" + "errors" + "io" + "log" + "regexp" + "strconv" + "strings" +) + +// UnicodeData.txt fields. +const ( + CodePoint = iota + Name + GeneralCategory + CanonicalCombiningClass + BidiClass + DecompMapping + DecimalValue + DigitValue + NumericValue + BidiMirrored + Unicode1Name + ISOComment + SimpleUppercaseMapping + SimpleLowercaseMapping + SimpleTitlecaseMapping +) + +// Parse calls f for each entry in the given reader of a UCD file. It will close +// the reader upon return. It will call log.Fatal if any error occurred. +// +// This implements the most common usage pattern of using Parser. +func Parse(r io.ReadCloser, f func(p *Parser)) { + defer r.Close() + + p := New(r) + for p.Next() { + f(p) + } + if err := p.Err(); err != nil { + r.Close() // os.Exit will cause defers not to be called. + log.Fatal(err) + } +} + +// An Option is used to configure a Parser. +type Option func(p *Parser) + +func keepRanges(p *Parser) { + p.keepRanges = true +} + +var ( + // KeepRanges prevents the expansion of ranges. The raw ranges can be + // obtained by calling Range(0) on the parser. + KeepRanges Option = keepRanges +) + +// The Part option register a handler for lines starting with a '@'. The text +// after a '@' is available as the first field. Comments are handled as usual. +func Part(f func(p *Parser)) Option { + return func(p *Parser) { + p.partHandler = f + } +} + +// The CommentHandler option passes comments that are on a line by itself to +// a given handler. +func CommentHandler(f func(s string)) Option { + return func(p *Parser) { + p.commentHandler = f + } +} + +// A Parser parses Unicode Character Database (UCD) files. +type Parser struct { + scanner *bufio.Scanner + + keepRanges bool // Don't expand rune ranges in field 0. + + err error + comment []byte + field [][]byte + // parsedRange is needed in case Range(0) is called more than once for one + // field. In some cases this requires scanning ahead. + parsedRange bool + rangeStart, rangeEnd rune + + partHandler func(p *Parser) + commentHandler func(s string) +} + +func (p *Parser) setError(err error) { + if p.err == nil { + p.err = err + } +} + +func (p *Parser) getField(i int) []byte { + if i >= len(p.field) { + return nil + } + return p.field[i] +} + +// Err returns a non-nil error if any error occurred during parsing. +func (p *Parser) Err() error { + return p.err +} + +// New returns a Parser for the given Reader. +func New(r io.Reader, o ...Option) *Parser { + p := &Parser{ + scanner: bufio.NewScanner(r), + } + for _, f := range o { + f(p) + } + return p +} + +// Next parses the next line in the file. It returns true if a line was parsed +// and false if it reached the end of the file. +func (p *Parser) Next() bool { + if !p.keepRanges && p.rangeStart < p.rangeEnd { + p.rangeStart++ + return true + } + p.comment = nil + p.field = p.field[:0] + p.parsedRange = false + + for p.scanner.Scan() { + b := p.scanner.Bytes() + if len(b) == 0 { + continue + } + if b[0] == '#' { + if p.commentHandler != nil { + p.commentHandler(strings.TrimSpace(string(b[1:]))) + } + continue + } + + // Parse line + if i := bytes.IndexByte(b, '#'); i != -1 { + p.comment = bytes.TrimSpace(b[i+1:]) + b = b[:i] + } + if b[0] == '@' { + if p.partHandler != nil { + p.field = append(p.field, bytes.TrimSpace(b[1:])) + p.partHandler(p) + p.field = p.field[:0] + } + p.comment = nil + continue + } + for { + i := bytes.IndexByte(b, ';') + if i == -1 { + p.field = append(p.field, bytes.TrimSpace(b)) + break + } + p.field = append(p.field, bytes.TrimSpace(b[:i])) + b = b[i+1:] + } + if !p.keepRanges { + p.rangeStart, p.rangeEnd = p.getRange(0) + } + return true + } + p.setError(p.scanner.Err()) + return false +} + +func parseRune(b []byte) (rune, error) { + if len(b) > 2 && b[0] == 'U' && b[1] == '+' { + b = b[2:] + } + x, err := strconv.ParseUint(string(b), 16, 32) + return rune(x), err +} + +func (p *Parser) parseRune(b []byte) rune { + x, err := parseRune(b) + p.setError(err) + return x +} + +// Rune parses and returns field i as a rune. +func (p *Parser) Rune(i int) rune { + if i > 0 || p.keepRanges { + return p.parseRune(p.getField(i)) + } + return p.rangeStart +} + +// Runes interprets and returns field i as a sequence of runes. +func (p *Parser) Runes(i int) (runes []rune) { + add := func(b []byte) { + if b = bytes.TrimSpace(b); len(b) > 0 { + runes = append(runes, p.parseRune(b)) + } + } + for b := p.getField(i); ; { + i := bytes.IndexByte(b, ' ') + if i == -1 { + add(b) + break + } + add(b[:i]) + b = b[i+1:] + } + return +} + +var ( + errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>") + + // reRange matches one line of a legacy rune range. + reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$") +) + +// Range parses and returns field i as a rune range. A range is inclusive at +// both ends. If the field only has one rune, first and last will be identical. +// It supports the legacy format for ranges used in UnicodeData.txt. +func (p *Parser) Range(i int) (first, last rune) { + if !p.keepRanges { + return p.rangeStart, p.rangeStart + } + return p.getRange(i) +} + +func (p *Parser) getRange(i int) (first, last rune) { + b := p.getField(i) + if k := bytes.Index(b, []byte("..")); k != -1 { + return p.parseRune(b[:k]), p.parseRune(b[k+2:]) + } + // The first field may not be a rune, in which case we may ignore any error + // and set the range as 0..0. + x, err := parseRune(b) + if err != nil { + // Disable range parsing henceforth. This ensures that an error will be + // returned if the user subsequently will try to parse this field as + // a Rune. + p.keepRanges = true + } + // Special case for UnicodeData that was retained for backwards compatibility. + if i == 0 && len(p.field) > 1 && bytes.HasSuffix(p.field[1], []byte("First>")) { + if p.parsedRange { + return p.rangeStart, p.rangeEnd + } + mf := reRange.FindStringSubmatch(p.scanner.Text()) + if mf == nil || !p.scanner.Scan() { + p.setError(errIncorrectLegacyRange) + return x, x + } + // Using Bytes would be more efficient here, but Text is a lot easier + // and this is not a frequent case. + ml := reRange.FindStringSubmatch(p.scanner.Text()) + if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] { + p.setError(errIncorrectLegacyRange) + return x, x + } + p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Bytes()[:len(ml[1])]) + p.parsedRange = true + return p.rangeStart, p.rangeEnd + } + return x, x +} + +// bools recognizes all valid UCD boolean values. +var bools = map[string]bool{ + "": false, + "N": false, + "No": false, + "F": false, + "False": false, + "Y": true, + "Yes": true, + "T": true, + "True": true, +} + +// Bool parses and returns field i as a boolean value. +func (p *Parser) Bool(i int) bool { + b := p.getField(i) + for s, v := range bools { + if bstrEq(b, s) { + return v + } + } + p.setError(strconv.ErrSyntax) + return false +} + +// Int parses and returns field i as an integer value. +func (p *Parser) Int(i int) int { + x, err := strconv.ParseInt(string(p.getField(i)), 10, 64) + p.setError(err) + return int(x) +} + +// Uint parses and returns field i as an unsigned integer value. +func (p *Parser) Uint(i int) uint { + x, err := strconv.ParseUint(string(p.getField(i)), 10, 64) + p.setError(err) + return uint(x) +} + +// Float parses and returns field i as a decimal value. +func (p *Parser) Float(i int) float64 { + x, err := strconv.ParseFloat(string(p.getField(i)), 64) + p.setError(err) + return x +} + +// String parses and returns field i as a string value. +func (p *Parser) String(i int) string { + return string(p.getField(i)) +} + +// Strings parses and returns field i as a space-separated list of strings. +func (p *Parser) Strings(i int) []string { + ss := strings.Split(string(p.getField(i)), " ") + for i, s := range ss { + ss[i] = strings.TrimSpace(s) + } + return ss +} + +// Comment returns the comments for the current line. +func (p *Parser) Comment() string { + return string(p.comment) +} + +var errUndefinedEnum = errors.New("ucd: undefined enum value") + +// Enum interprets and returns field i as a value that must be one of the values +// in enum. +func (p *Parser) Enum(i int, enum ...string) string { + b := p.getField(i) + for _, s := range enum { + if bstrEq(b, s) { + return s + } + } + p.setError(errUndefinedEnum) + return "" +} + +func bstrEq(b []byte, s string) bool { + if len(b) != len(s) { + return false + } + for i, c := range b { + if c != s[i] { + return false + } + } + return true +} diff --git a/vendor/golang.org/x/text/language/Makefile b/vendor/golang.org/x/text/language/Makefile deleted file mode 100644 index 79f005784f..0000000000 --- a/vendor/golang.org/x/text/language/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2013 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -CLEANFILES+=maketables - -maketables: maketables.go - go build $^ - -tables: maketables - ./maketables > tables.go - gofmt -w -s tables.go - -# Build (but do not run) maketables during testing, -# just to make sure it still compiles. -testshort: maketables diff --git a/vendor/golang.org/x/text/language/gen_common.go b/vendor/golang.org/x/text/language/gen_common.go new file mode 100644 index 0000000000..83ce180133 --- /dev/null +++ b/vendor/golang.org/x/text/language/gen_common.go @@ -0,0 +1,20 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This file contains code common to the maketables.go and the package code. + +// langAliasType is the type of an alias in langAliasMap. +type langAliasType int8 + +const ( + langDeprecated langAliasType = iota + langMacro + langLegacy + + langAliasTypeUnknown langAliasType = -1 +) diff --git a/vendor/golang.org/x/text/language/gen_index.go b/vendor/golang.org/x/text/language/gen_index.go new file mode 100644 index 0000000000..eef555cd3a --- /dev/null +++ b/vendor/golang.org/x/text/language/gen_index.go @@ -0,0 +1,162 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This file generates derivative tables based on the language package itself. + +import ( + "bytes" + "flag" + "fmt" + "io/ioutil" + "log" + "reflect" + "sort" + "strings" + + "golang.org/x/text/internal/gen" + "golang.org/x/text/language" + "golang.org/x/text/unicode/cldr" +) + +var ( + test = flag.Bool("test", false, + "test existing tables; can be used to compare web data with package data.") + + draft = flag.String("draft", + "contributed", + `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) +) + +func main() { + gen.Init() + + // Read the CLDR zip file. + r := gen.OpenCLDRCoreZip() + defer r.Close() + + d := &cldr.Decoder{} + data, err := d.DecodeZip(r) + if err != nil { + log.Fatalf("DecodeZip: %v", err) + } + + w := gen.NewCodeWriter() + defer func() { + buf := &bytes.Buffer{} + + if _, err = w.WriteGo(buf, "language"); err != nil { + log.Fatalf("Error formatting file index.go: %v", err) + } + + // Since we're generating a table for our own package we need to rewrite + // doing the equivalent of go fmt -r 'language.b -> b'. Using + // bytes.Replace will do. + out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1) + if err := ioutil.WriteFile("index.go", out, 0600); err != nil { + log.Fatalf("Could not create file index.go: %v", err) + } + }() + + m := map[language.Tag]bool{} + for _, lang := range data.Locales() { + // We include all locales unconditionally to be consistent with en_US. + // We want en_US, even though it has no data associated with it. + + // TODO: put any of the languages for which no data exists at the end + // of the index. This allows all components based on ICU to use that + // as the cutoff point. + // if x := data.RawLDML(lang); false || + // x.LocaleDisplayNames != nil || + // x.Characters != nil || + // x.Delimiters != nil || + // x.Measurement != nil || + // x.Dates != nil || + // x.Numbers != nil || + // x.Units != nil || + // x.ListPatterns != nil || + // x.Collations != nil || + // x.Segmentations != nil || + // x.Rbnf != nil || + // x.Annotations != nil || + // x.Metadata != nil { + + // TODO: support POSIX natively, albeit non-standard. + tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1)) + m[tag] = true + // } + } + // Include locales for plural rules, which uses a different structure. + for _, plurals := range data.Supplemental().Plurals { + for _, rules := range plurals.PluralRules { + for _, lang := range strings.Split(rules.Locales, " ") { + m[language.Make(lang)] = true + } + } + } + + var core, special []language.Tag + + for t := range m { + if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" { + log.Fatalf("Unexpected extension %v in %v", x, t) + } + if len(t.Variants()) == 0 && len(t.Extensions()) == 0 { + core = append(core, t) + } else { + special = append(special, t) + } + } + + w.WriteComment(` + NumCompactTags is the number of common tags. The maximum tag is + NumCompactTags-1.`) + w.WriteConst("NumCompactTags", len(core)+len(special)) + + sort.Sort(byAlpha(special)) + w.WriteVar("specialTags", special) + + // TODO: order by frequency? + sort.Sort(byAlpha(core)) + + // Size computations are just an estimate. + w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size()) + w.Size += len(core) * 6 // size of uint32 and uint16 + + fmt.Fprintln(w) + fmt.Fprintln(w, "var coreTags = map[uint32]uint16{") + fmt.Fprintln(w, "0x0: 0, // und") + i := len(special) + 1 // Und and special tags already written. + for _, t := range core { + if t == language.Und { + continue + } + fmt.Fprint(w.Hash, t, i) + b, s, r := t.Raw() + fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n", + getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number + getIndex(s, 2), + getIndex(r, 3), + i, t) + i++ + } + fmt.Fprintln(w, "}") +} + +// getIndex prints the subtag type and extracts its index of size nibble. +// If the index is less than n nibbles, the result is prefixed with 0s. +func getIndex(x interface{}, n int) string { + s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00} + s = s[strings.Index(s, "0x")+2 : len(s)-1] + return strings.Repeat("0", n-len(s)) + s +} + +type byAlpha []language.Tag + +func (a byAlpha) Len() int { return len(a) } +func (a byAlpha) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() } diff --git a/vendor/golang.org/x/text/language/maketables.go b/vendor/golang.org/x/text/language/maketables.go new file mode 100644 index 0000000000..153269bc10 --- /dev/null +++ b/vendor/golang.org/x/text/language/maketables.go @@ -0,0 +1,1699 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// Language tag table generator. +// Data read from the web. + +package main + +import ( + "bufio" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "math" + "reflect" + "regexp" + "sort" + "strconv" + "strings" + + "golang.org/x/text/internal/gen" + "golang.org/x/text/internal/tag" + "golang.org/x/text/unicode/cldr" +) + +var ( + test = flag.Bool("test", + false, + "test existing tables; can be used to compare web data with package data.") + outputFile = flag.String("output", + "tables.go", + "output file for generated tables") +) + +var comment = []string{ + ` +lang holds an alphabetically sorted list of ISO-639 language identifiers. +All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. +For 2-byte language identifiers, the two successive bytes have the following meaning: + - if the first letter of the 2- and 3-letter ISO codes are the same: + the second and third letter of the 3-letter ISO code. + - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. +For 3-byte language identifiers the 4th byte is 0.`, + ` +langNoIndex is a bit vector of all 3-letter language codes that are not used as an index +in lookup tables. The language ids for these language codes are derived directly +from the letters and are not consecutive.`, + ` +altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives +to 2-letter language codes that cannot be derived using the method described above. +Each 3-letter code is followed by its 1-byte langID.`, + ` +altLangIndex is used to convert indexes in altLangISO3 to langIDs.`, + ` +langAliasMap maps langIDs to their suggested replacements.`, + ` +script is an alphabetically sorted list of ISO 15924 codes. The index +of the script in the string, divided by 4, is the internal scriptID.`, + ` +isoRegionOffset needs to be added to the index of regionISO to obtain the regionID +for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for +the UN.M49 codes used for groups.)`, + ` +regionISO holds a list of alphabetically sorted 2-letter ISO region codes. +Each 2-letter codes is followed by two bytes with the following meaning: + - [A-Z}{2}: the first letter of the 2-letter code plus these two + letters form the 3-letter ISO code. + - 0, n: index into altRegionISO3.`, + ` +regionTypes defines the status of a region for various standards.`, + ` +m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are +codes indicating collections of regions.`, + ` +m49Index gives indexes into fromM49 based on the three most significant bits +of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in + fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] +for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. +The region code is stored in the 9 lsb of the indexed value.`, + ` +fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`, + ` +altRegionISO3 holds a list of 3-letter region codes that cannot be +mapped to 2-letter codes using the default algorithm. This is a short list.`, + ` +altRegionIDs holds a list of regionIDs the positions of which match those +of the 3-letter ISO codes in altRegionISO3.`, + ` +variantNumSpecialized is the number of specialized variants in variants.`, + ` +suppressScript is an index from langID to the dominant script for that language, +if it exists. If a script is given, it should be suppressed from the language tag.`, + ` +likelyLang is a lookup table, indexed by langID, for the most likely +scripts and regions given incomplete information. If more entries exist for a +given language, region and script are the index and size respectively +of the list in likelyLangList.`, + ` +likelyLangList holds lists info associated with likelyLang.`, + ` +likelyRegion is a lookup table, indexed by regionID, for the most likely +languages and scripts given incomplete information. If more entries exist +for a given regionID, lang and script are the index and size respectively +of the list in likelyRegionList. +TODO: exclude containers and user-definable regions from the list.`, + ` +likelyRegionList holds lists info associated with likelyRegion.`, + ` +likelyScript is a lookup table, indexed by scriptID, for the most likely +languages and regions given a script.`, + ` +matchLang holds pairs of langIDs of base languages that are typically +mutually intelligible. Each pair is associated with a confidence and +whether the intelligibility goes one or both ways.`, + ` +matchScript holds pairs of scriptIDs where readers of one script +can typically also read the other. Each is associated with a confidence.`, + ` +nRegionGroups is the number of region groups.`, + ` +regionInclusion maps region identifiers to sets of regions in regionInclusionBits, +where each set holds all groupings that are directly connected in a region +containment graph.`, + ` +regionInclusionBits is an array of bit vectors where every vector represents +a set of region groupings. These sets are used to compute the distance +between two regions for the purpose of language matching.`, + ` +regionInclusionNext marks, for each entry in regionInclusionBits, the set of +all groups that are reachable from the groups set in the respective entry.`, +} + +// TODO: consider changing some of these structures to tries. This can reduce +// memory, but may increase the need for memory allocations. This could be +// mitigated if we can piggyback on language tags for common cases. + +func failOnError(e error) { + if e != nil { + log.Panic(e) + } +} + +type setType int + +const ( + Indexed setType = 1 + iota // all elements must be of same size + Linear +) + +type stringSet struct { + s []string + sorted, frozen bool + + // We often need to update values after the creation of an index is completed. + // We include a convenience map for keeping track of this. + update map[string]string + typ setType // used for checking. +} + +func (ss *stringSet) clone() stringSet { + c := *ss + c.s = append([]string(nil), c.s...) + return c +} + +func (ss *stringSet) setType(t setType) { + if ss.typ != t && ss.typ != 0 { + log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ) + } +} + +// parse parses a whitespace-separated string and initializes ss with its +// components. +func (ss *stringSet) parse(s string) { + scan := bufio.NewScanner(strings.NewReader(s)) + scan.Split(bufio.ScanWords) + for scan.Scan() { + ss.add(scan.Text()) + } +} + +func (ss *stringSet) assertChangeable() { + if ss.frozen { + log.Panic("attempt to modify a frozen stringSet") + } +} + +func (ss *stringSet) add(s string) { + ss.assertChangeable() + ss.s = append(ss.s, s) + ss.sorted = ss.frozen +} + +func (ss *stringSet) freeze() { + ss.compact() + ss.frozen = true +} + +func (ss *stringSet) compact() { + if ss.sorted { + return + } + a := ss.s + sort.Strings(a) + k := 0 + for i := 1; i < len(a); i++ { + if a[k] != a[i] { + a[k+1] = a[i] + k++ + } + } + ss.s = a[:k+1] + ss.sorted = ss.frozen +} + +type funcSorter struct { + fn func(a, b string) bool + sort.StringSlice +} + +func (s funcSorter) Less(i, j int) bool { + return s.fn(s.StringSlice[i], s.StringSlice[j]) +} + +func (ss *stringSet) sortFunc(f func(a, b string) bool) { + ss.compact() + sort.Sort(funcSorter{f, sort.StringSlice(ss.s)}) +} + +func (ss *stringSet) remove(s string) { + ss.assertChangeable() + if i, ok := ss.find(s); ok { + copy(ss.s[i:], ss.s[i+1:]) + ss.s = ss.s[:len(ss.s)-1] + } +} + +func (ss *stringSet) replace(ol, nu string) { + ss.s[ss.index(ol)] = nu + ss.sorted = ss.frozen +} + +func (ss *stringSet) index(s string) int { + ss.setType(Indexed) + i, ok := ss.find(s) + if !ok { + if i < len(ss.s) { + log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i]) + } + log.Panicf("find: item %q is not in list", s) + + } + return i +} + +func (ss *stringSet) find(s string) (int, bool) { + ss.compact() + i := sort.SearchStrings(ss.s, s) + return i, i != len(ss.s) && ss.s[i] == s +} + +func (ss *stringSet) slice() []string { + ss.compact() + return ss.s +} + +func (ss *stringSet) updateLater(v, key string) { + if ss.update == nil { + ss.update = map[string]string{} + } + ss.update[v] = key +} + +// join joins the string and ensures that all entries are of the same length. +func (ss *stringSet) join() string { + ss.setType(Indexed) + n := len(ss.s[0]) + for _, s := range ss.s { + if len(s) != n { + log.Panicf("join: not all entries are of the same length: %q", s) + } + } + ss.s = append(ss.s, strings.Repeat("\xff", n)) + return strings.Join(ss.s, "") +} + +// ianaEntry holds information for an entry in the IANA Language Subtag Repository. +// All types use the same entry. +// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various +// fields. +type ianaEntry struct { + typ string + description []string + scope string + added string + preferred string + deprecated string + suppressScript string + macro string + prefix []string +} + +type builder struct { + w *gen.CodeWriter + hw io.Writer // MultiWriter for w and w.Hash + data *cldr.CLDR + supp *cldr.SupplementalData + + // indices + locale stringSet // common locales + lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data + langNoIndex stringSet // 3-letter ISO codes with no associated data + script stringSet // 4-letter ISO codes + region stringSet // 2-letter ISO or 3-digit UN M49 codes + variant stringSet // 4-8-alphanumeric variant code. + + // Region codes that are groups with their corresponding group IDs. + groups map[int]index + + // langInfo + registry map[string]*ianaEntry +} + +type index uint + +func newBuilder(w *gen.CodeWriter) *builder { + r := gen.OpenCLDRCoreZip() + defer r.Close() + d := &cldr.Decoder{} + data, err := d.DecodeZip(r) + failOnError(err) + b := builder{ + w: w, + hw: io.MultiWriter(w, w.Hash), + data: data, + supp: data.Supplemental(), + } + b.parseRegistry() + return &b +} + +func (b *builder) parseRegistry() { + r := gen.OpenIANAFile("assignments/language-subtag-registry") + defer r.Close() + b.registry = make(map[string]*ianaEntry) + + scan := bufio.NewScanner(r) + scan.Split(bufio.ScanWords) + var record *ianaEntry + for more := scan.Scan(); more; { + key := scan.Text() + more = scan.Scan() + value := scan.Text() + switch key { + case "Type:": + record = &ianaEntry{typ: value} + case "Subtag:", "Tag:": + if s := strings.SplitN(value, "..", 2); len(s) > 1 { + for a := s[0]; a <= s[1]; a = inc(a) { + b.addToRegistry(a, record) + } + } else { + b.addToRegistry(value, record) + } + case "Suppress-Script:": + record.suppressScript = value + case "Added:": + record.added = value + case "Deprecated:": + record.deprecated = value + case "Macrolanguage:": + record.macro = value + case "Preferred-Value:": + record.preferred = value + case "Prefix:": + record.prefix = append(record.prefix, value) + case "Scope:": + record.scope = value + case "Description:": + buf := []byte(value) + for more = scan.Scan(); more; more = scan.Scan() { + b := scan.Bytes() + if b[0] == '%' || b[len(b)-1] == ':' { + break + } + buf = append(buf, ' ') + buf = append(buf, b...) + } + record.description = append(record.description, string(buf)) + continue + default: + continue + } + more = scan.Scan() + } + if scan.Err() != nil { + log.Panic(scan.Err()) + } +} + +func (b *builder) addToRegistry(key string, entry *ianaEntry) { + if info, ok := b.registry[key]; ok { + if info.typ != "language" || entry.typ != "extlang" { + log.Fatalf("parseRegistry: tag %q already exists", key) + } + } else { + b.registry[key] = entry + } +} + +var commentIndex = make(map[string]string) + +func init() { + for _, s := range comment { + key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0]) + commentIndex[key] = s + } +} + +func (b *builder) comment(name string) { + if s := commentIndex[name]; len(s) > 0 { + b.w.WriteComment(s) + } else { + fmt.Fprintln(b.w) + } +} + +func (b *builder) pf(f string, x ...interface{}) { + fmt.Fprintf(b.hw, f, x...) + fmt.Fprint(b.hw, "\n") +} + +func (b *builder) p(x ...interface{}) { + fmt.Fprintln(b.hw, x...) +} + +func (b *builder) addSize(s int) { + b.w.Size += s + b.pf("// Size: %d bytes", s) +} + +func (b *builder) writeConst(name string, x interface{}) { + b.comment(name) + b.w.WriteConst(name, x) +} + +// writeConsts computes f(v) for all v in values and writes the results +// as constants named _v to a single constant block. +func (b *builder) writeConsts(f func(string) int, values ...string) { + b.pf("const (") + for _, v := range values { + b.pf("\t_%s = %v", v, f(v)) + } + b.pf(")") +} + +// writeType writes the type of the given value, which must be a struct. +func (b *builder) writeType(value interface{}) { + b.comment(reflect.TypeOf(value).Name()) + b.w.WriteType(value) +} + +func (b *builder) writeSlice(name string, ss interface{}) { + b.writeSliceAddSize(name, 0, ss) +} + +func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) { + b.comment(name) + b.w.Size += extraSize + v := reflect.ValueOf(ss) + t := v.Type().Elem() + b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len()) + + fmt.Fprintf(b.w, "var %s = ", name) + b.w.WriteArray(ss) + b.p() +} + +type fromTo struct { + from, to uint16 +} + +func (b *builder) writeSortedMap(name string, ss *stringSet, index func(s string) uint16) { + ss.sortFunc(func(a, b string) bool { + return index(a) < index(b) + }) + m := []fromTo{} + for _, s := range ss.s { + m = append(m, fromTo{index(s), index(ss.update[s])}) + } + b.writeSlice(name, m) +} + +const base = 'z' - 'a' + 1 + +func strToInt(s string) uint { + v := uint(0) + for i := 0; i < len(s); i++ { + v *= base + v += uint(s[i] - 'a') + } + return v +} + +// converts the given integer to the original ASCII string passed to strToInt. +// len(s) must match the number of characters obtained. +func intToStr(v uint, s []byte) { + for i := len(s) - 1; i >= 0; i-- { + s[i] = byte(v%base) + 'a' + v /= base + } +} + +func (b *builder) writeBitVector(name string, ss []string) { + vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8))) + for _, s := range ss { + v := strToInt(s) + vec[v/8] |= 1 << (v % 8) + } + b.writeSlice(name, vec) +} + +// TODO: convert this type into a list or two-stage trie. +func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) { + b.comment(name) + v := reflect.ValueOf(m) + sz := v.Len() * (2 + int(v.Type().Key().Size())) + for _, k := range m { + sz += len(k) + } + b.addSize(sz) + keys := []string{} + b.pf(`var %s = map[string]uint16{`, name) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + b.pf("\t%q: %v,", k, f(m[k])) + } + b.p("}") +} + +func (b *builder) writeMap(name string, m interface{}) { + b.comment(name) + v := reflect.ValueOf(m) + sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size())) + b.addSize(sz) + f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool { + return strings.IndexRune("{}, ", r) != -1 + }) + sort.Strings(f[1:]) + b.pf(`var %s = %s{`, name, f[0]) + for _, kv := range f[1:] { + b.pf("\t%s,", kv) + } + b.p("}") +} + +func (b *builder) langIndex(s string) uint16 { + if s == "und" { + return 0 + } + if i, ok := b.lang.find(s); ok { + return uint16(i) + } + return uint16(strToInt(s)) + uint16(len(b.lang.s)) +} + +// inc advances the string to its lexicographical successor. +func inc(s string) string { + const maxTagLength = 4 + var buf [maxTagLength]byte + intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)]) + for i := 0; i < len(s); i++ { + if s[i] <= 'Z' { + buf[i] -= 'a' - 'A' + } + } + return string(buf[:len(s)]) +} + +func (b *builder) parseIndices() { + meta := b.supp.Metadata + + for k, v := range b.registry { + var ss *stringSet + switch v.typ { + case "language": + if len(k) == 2 || v.suppressScript != "" || v.scope == "special" { + b.lang.add(k) + continue + } else { + ss = &b.langNoIndex + } + case "region": + ss = &b.region + case "script": + ss = &b.script + case "variant": + ss = &b.variant + default: + continue + } + ss.add(k) + } + // Include any language for which there is data. + for _, lang := range b.data.Locales() { + if x := b.data.RawLDML(lang); false || + x.LocaleDisplayNames != nil || + x.Characters != nil || + x.Delimiters != nil || + x.Measurement != nil || + x.Dates != nil || + x.Numbers != nil || + x.Units != nil || + x.ListPatterns != nil || + x.Collations != nil || + x.Segmentations != nil || + x.Rbnf != nil || + x.Annotations != nil || + x.Metadata != nil { + + from := strings.Split(lang, "_") + if lang := from[0]; lang != "root" { + b.lang.add(lang) + } + } + } + // Include locales for plural rules, which uses a different structure. + for _, plurals := range b.data.Supplemental().Plurals { + for _, rules := range plurals.PluralRules { + for _, lang := range strings.Split(rules.Locales, " ") { + if lang = strings.Split(lang, "_")[0]; lang != "root" { + b.lang.add(lang) + } + } + } + } + // Include languages in likely subtags. + for _, m := range b.supp.LikelySubtags.LikelySubtag { + from := strings.Split(m.From, "_") + b.lang.add(from[0]) + } + // Include ISO-639 alpha-3 bibliographic entries. + for _, a := range meta.Alias.LanguageAlias { + if a.Reason == "bibliographic" { + b.langNoIndex.add(a.Type) + } + } + // Include regions in territoryAlias (not all are in the IANA registry!) + for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { + if len(reg.Type) == 2 { + b.region.add(reg.Type) + } + } + + for _, s := range b.lang.s { + if len(s) == 3 { + b.langNoIndex.remove(s) + } + } + b.writeConst("numLanguages", len(b.lang.slice())+len(b.langNoIndex.slice())) + b.writeConst("numScripts", len(b.script.slice())) + b.writeConst("numRegions", len(b.region.slice())) + + // Add dummy codes at the start of each list to represent "unspecified". + b.lang.add("---") + b.script.add("----") + b.region.add("---") + + // common locales + b.locale.parse(meta.DefaultContent.Locales) +} + +// TODO: region inclusion data will probably not be use used in future matchers. + +func (b *builder) computeRegionGroups() { + b.groups = make(map[int]index) + + // Create group indices. + for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID. + b.groups[i] = index(len(b.groups)) + } + for _, g := range b.supp.TerritoryContainment.Group { + // Skip UN and EURO zone as they are flattening the containment + // relationship. + if g.Type == "EZ" || g.Type == "UN" { + continue + } + group := b.region.index(g.Type) + if _, ok := b.groups[group]; !ok { + b.groups[group] = index(len(b.groups)) + } + } + if len(b.groups) > 32 { + log.Fatalf("only 32 groups supported, found %d", len(b.groups)) + } + b.writeConst("nRegionGroups", len(b.groups)) +} + +var langConsts = []string{ + "af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es", + "et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is", + "it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml", + "mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt", + "ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th", + "tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu", + + // constants for grandfathered tags (if not already defined) + "jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu", + "nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn", +} + +// writeLanguage generates all tables needed for language canonicalization. +func (b *builder) writeLanguage() { + meta := b.supp.Metadata + + b.writeConst("nonCanonicalUnd", b.lang.index("und")) + b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...) + b.writeConst("langPrivateStart", b.langIndex("qaa")) + b.writeConst("langPrivateEnd", b.langIndex("qtz")) + + // Get language codes that need to be mapped (overlong 3-letter codes, + // deprecated 2-letter codes, legacy and grandfathered tags.) + langAliasMap := stringSet{} + aliasTypeMap := map[string]langAliasType{} + + // altLangISO3 get the alternative ISO3 names that need to be mapped. + altLangISO3 := stringSet{} + // Add dummy start to avoid the use of index 0. + altLangISO3.add("---") + altLangISO3.updateLater("---", "aa") + + lang := b.lang.clone() + for _, a := range meta.Alias.LanguageAlias { + if a.Replacement == "" { + a.Replacement = "und" + } + // TODO: support mapping to tags + repl := strings.SplitN(a.Replacement, "_", 2)[0] + if a.Reason == "overlong" { + if len(a.Replacement) == 2 && len(a.Type) == 3 { + lang.updateLater(a.Replacement, a.Type) + } + } else if len(a.Type) <= 3 { + switch a.Reason { + case "macrolanguage": + aliasTypeMap[a.Type] = langMacro + case "deprecated": + // handled elsewhere + continue + case "bibliographic", "legacy": + if a.Type == "no" { + continue + } + aliasTypeMap[a.Type] = langLegacy + default: + log.Fatalf("new %s alias: %s", a.Reason, a.Type) + } + langAliasMap.add(a.Type) + langAliasMap.updateLater(a.Type, repl) + } + } + // Manually add the mapping of "nb" (Norwegian) to its macro language. + // This can be removed if CLDR adopts this change. + langAliasMap.add("nb") + langAliasMap.updateLater("nb", "no") + aliasTypeMap["nb"] = langMacro + + for k, v := range b.registry { + // Also add deprecated values for 3-letter ISO codes, which CLDR omits. + if v.typ == "language" && v.deprecated != "" && v.preferred != "" { + langAliasMap.add(k) + langAliasMap.updateLater(k, v.preferred) + aliasTypeMap[k] = langDeprecated + } + } + // Fix CLDR mappings. + lang.updateLater("tl", "tgl") + lang.updateLater("sh", "hbs") + lang.updateLater("mo", "mol") + lang.updateLater("no", "nor") + lang.updateLater("tw", "twi") + lang.updateLater("nb", "nob") + lang.updateLater("ak", "aka") + lang.updateLater("bh", "bih") + + // Ensure that each 2-letter code is matched with a 3-letter code. + for _, v := range lang.s[1:] { + s, ok := lang.update[v] + if !ok { + if s, ok = lang.update[langAliasMap.update[v]]; !ok { + continue + } + lang.update[v] = s + } + if v[0] != s[0] { + altLangISO3.add(s) + altLangISO3.updateLater(s, v) + } + } + + // Complete canonicalized language tags. + lang.freeze() + for i, v := range lang.s { + // We can avoid these manual entries by using the IANA registry directly. + // Seems easier to update the list manually, as changes are rare. + // The panic in this loop will trigger if we miss an entry. + add := "" + if s, ok := lang.update[v]; ok { + if s[0] == v[0] { + add = s[1:] + } else { + add = string([]byte{0, byte(altLangISO3.index(s))}) + } + } else if len(v) == 3 { + add = "\x00" + } else { + log.Panicf("no data for long form of %q", v) + } + lang.s[i] += add + } + b.writeConst("lang", tag.Index(lang.join())) + + b.writeConst("langNoIndexOffset", len(b.lang.s)) + + // space of all valid 3-letter language identifiers. + b.writeBitVector("langNoIndex", b.langNoIndex.slice()) + + altLangIndex := []uint16{} + for i, s := range altLangISO3.slice() { + altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))}) + if i > 0 { + idx := b.lang.index(altLangISO3.update[s]) + altLangIndex = append(altLangIndex, uint16(idx)) + } + } + b.writeConst("altLangISO3", tag.Index(altLangISO3.join())) + b.writeSlice("altLangIndex", altLangIndex) + + b.writeSortedMap("langAliasMap", &langAliasMap, b.langIndex) + types := make([]langAliasType, len(langAliasMap.s)) + for i, s := range langAliasMap.s { + types[i] = aliasTypeMap[s] + } + b.writeSlice("langAliasTypes", types) +} + +var scriptConsts = []string{ + "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy", + "Zzzz", +} + +func (b *builder) writeScript() { + b.writeConsts(b.script.index, scriptConsts...) + b.writeConst("script", tag.Index(b.script.join())) + + supp := make([]uint8, len(b.lang.slice())) + for i, v := range b.lang.slice()[1:] { + if sc := b.registry[v].suppressScript; sc != "" { + supp[i+1] = uint8(b.script.index(sc)) + } + } + b.writeSlice("suppressScript", supp) + + // There is only one deprecated script in CLDR. This value is hard-coded. + // We check here if the code must be updated. + for _, a := range b.supp.Metadata.Alias.ScriptAlias { + if a.Type != "Qaai" { + log.Panicf("unexpected deprecated stript %q", a.Type) + } + } +} + +func parseM49(s string) int16 { + if len(s) == 0 { + return 0 + } + v, err := strconv.ParseUint(s, 10, 10) + failOnError(err) + return int16(v) +} + +var regionConsts = []string{ + "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US", + "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo. +} + +func (b *builder) writeRegion() { + b.writeConsts(b.region.index, regionConsts...) + + isoOffset := b.region.index("AA") + m49map := make([]int16, len(b.region.slice())) + fromM49map := make(map[int16]int) + altRegionISO3 := "" + altRegionIDs := []uint16{} + + b.writeConst("isoRegionOffset", isoOffset) + + // 2-letter region lookup and mapping to numeric codes. + regionISO := b.region.clone() + regionISO.s = regionISO.s[isoOffset:] + regionISO.sorted = false + + regionTypes := make([]byte, len(b.region.s)) + + // Is the region valid BCP 47? + for s, e := range b.registry { + if len(s) == 2 && s == strings.ToUpper(s) { + i := b.region.index(s) + for _, d := range e.description { + if strings.Contains(d, "Private use") { + regionTypes[i] = iso3166UserAssigned + } + } + regionTypes[i] |= bcp47Region + } + } + + // Is the region a valid ccTLD? + r := gen.OpenIANAFile("domains/root/db") + defer r.Close() + + buf, err := ioutil.ReadAll(r) + failOnError(err) + re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`) + for _, m := range re.FindAllSubmatch(buf, -1) { + i := b.region.index(strings.ToUpper(string(m[1]))) + regionTypes[i] |= ccTLD + } + + b.writeSlice("regionTypes", regionTypes) + + iso3Set := make(map[string]int) + update := func(iso2, iso3 string) { + i := regionISO.index(iso2) + if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] { + regionISO.s[i] += iso3[1:] + iso3Set[iso3] = -1 + } else { + if ok && j >= 0 { + regionISO.s[i] += string([]byte{0, byte(j)}) + } else { + iso3Set[iso3] = len(altRegionISO3) + regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))}) + altRegionISO3 += iso3 + altRegionIDs = append(altRegionIDs, uint16(isoOffset+i)) + } + } + } + for _, tc := range b.supp.CodeMappings.TerritoryCodes { + i := regionISO.index(tc.Type) + isoOffset + if d := m49map[i]; d != 0 { + log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d) + } + m49 := parseM49(tc.Numeric) + m49map[i] = m49 + if r := fromM49map[m49]; r == 0 { + fromM49map[m49] = i + } else if r != i { + dep := b.registry[regionISO.s[r-isoOffset]].deprecated + if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) { + fromM49map[m49] = i + } + } + } + for _, ta := range b.supp.Metadata.Alias.TerritoryAlias { + if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 { + from := parseM49(ta.Type) + if r := fromM49map[from]; r == 0 { + fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset + } + } + } + for _, tc := range b.supp.CodeMappings.TerritoryCodes { + if len(tc.Alpha3) == 3 { + update(tc.Type, tc.Alpha3) + } + } + // This entries are not included in territoryCodes. Mostly 3-letter variants + // of deleted codes and an entry for QU. + for _, m := range []struct{ iso2, iso3 string }{ + {"CT", "CTE"}, + {"DY", "DHY"}, + {"HV", "HVO"}, + {"JT", "JTN"}, + {"MI", "MID"}, + {"NH", "NHB"}, + {"NQ", "ATN"}, + {"PC", "PCI"}, + {"PU", "PUS"}, + {"PZ", "PCZ"}, + {"RH", "RHO"}, + {"VD", "VDR"}, + {"WK", "WAK"}, + // These three-letter codes are used for others as well. + {"FQ", "ATF"}, + } { + update(m.iso2, m.iso3) + } + for i, s := range regionISO.s { + if len(s) != 4 { + regionISO.s[i] = s + " " + } + } + b.writeConst("regionISO", tag.Index(regionISO.join())) + b.writeConst("altRegionISO3", altRegionISO3) + b.writeSlice("altRegionIDs", altRegionIDs) + + // Create list of deprecated regions. + // TODO: consider inserting SF -> FI. Not included by CLDR, but is the only + // Transitionally-reserved mapping not included. + regionOldMap := stringSet{} + // Include regions in territoryAlias (not all are in the IANA registry!) + for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { + if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 { + regionOldMap.add(reg.Type) + regionOldMap.updateLater(reg.Type, reg.Replacement) + i, _ := regionISO.find(reg.Type) + j, _ := regionISO.find(reg.Replacement) + if k := m49map[i+isoOffset]; k == 0 { + m49map[i+isoOffset] = m49map[j+isoOffset] + } + } + } + b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 { + return uint16(b.region.index(s)) + }) + // 3-digit region lookup, groupings. + for i := 1; i < isoOffset; i++ { + m := parseM49(b.region.s[i]) + m49map[i] = m + fromM49map[m] = i + } + b.writeSlice("m49", m49map) + + const ( + searchBits = 7 + regionBits = 9 + ) + if len(m49map) >= 1<<regionBits { + log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits) + } + m49Index := [9]int16{} + fromM49 := []uint16{} + m49 := []int{} + for k, _ := range fromM49map { + m49 = append(m49, int(k)) + } + sort.Ints(m49) + for _, k := range m49[1:] { + val := (k & (1<<searchBits - 1)) << regionBits + fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)])) + m49Index[1:][k>>searchBits] = int16(len(fromM49)) + } + b.writeSlice("m49Index", m49Index) + b.writeSlice("fromM49", fromM49) +} + +const ( + // TODO: put these lists in regionTypes as user data? Could be used for + // various optimizations and refinements and could be exposed in the API. + iso3166Except = "AC CP DG EA EU FX IC SU TA UK" + iso3166Trans = "AN BU CS NT TP YU ZR" // SF is not in our set of Regions. + // DY and RH are actually not deleted, but indeterminately reserved. + iso3166DelCLDR = "CT DD DY FQ HV JT MI NH NQ PC PU PZ RH VD WK YD" +) + +const ( + iso3166UserAssigned = 1 << iota + ccTLD + bcp47Region +) + +func find(list []string, s string) int { + for i, t := range list { + if t == s { + return i + } + } + return -1 +} + +// writeVariants generates per-variant information and creates a map from variant +// name to index value. We assign index values such that sorting multiple +// variants by index value will result in the correct order. +// There are two types of variants: specialized and general. Specialized variants +// are only applicable to certain language or language-script pairs. Generalized +// variants apply to any language. Generalized variants always sort after +// specialized variants. We will therefore always assign a higher index value +// to a generalized variant than any other variant. Generalized variants are +// sorted alphabetically among themselves. +// Specialized variants may also sort after other specialized variants. Such +// variants will be ordered after any of the variants they may follow. +// We assume that if a variant x is followed by a variant y, then for any prefix +// p of x, p-x is a prefix of y. This allows us to order tags based on the +// maximum of the length of any of its prefixes. +// TODO: it is possible to define a set of Prefix values on variants such that +// a total order cannot be defined to the point that this algorithm breaks. +// In other words, we cannot guarantee the same order of variants for the +// future using the same algorithm or for non-compliant combinations of +// variants. For this reason, consider using simple alphabetic sorting +// of variants and ignore Prefix restrictions altogether. +func (b *builder) writeVariant() { + generalized := stringSet{} + specialized := stringSet{} + specializedExtend := stringSet{} + // Collate the variants by type and check assumptions. + for _, v := range b.variant.slice() { + e := b.registry[v] + if len(e.prefix) == 0 { + generalized.add(v) + continue + } + c := strings.Split(e.prefix[0], "-") + hasScriptOrRegion := false + if len(c) > 1 { + _, hasScriptOrRegion = b.script.find(c[1]) + if !hasScriptOrRegion { + _, hasScriptOrRegion = b.region.find(c[1]) + + } + } + if len(c) == 1 || len(c) == 2 && hasScriptOrRegion { + // Variant is preceded by a language. + specialized.add(v) + continue + } + // Variant is preceded by another variant. + specializedExtend.add(v) + prefix := c[0] + "-" + if hasScriptOrRegion { + prefix += c[1] + } + for _, p := range e.prefix { + // Verify that the prefix minus the last element is a prefix of the + // predecessor element. + i := strings.LastIndex(p, "-") + pred := b.registry[p[i+1:]] + if find(pred.prefix, p[:i]) < 0 { + log.Fatalf("prefix %q for variant %q not consistent with predecessor spec", p, v) + } + // The sorting used below does not work in the general case. It works + // if we assume that variants that may be followed by others only have + // prefixes of the same length. Verify this. + count := strings.Count(p[:i], "-") + for _, q := range pred.prefix { + if c := strings.Count(q, "-"); c != count { + log.Fatalf("variant %q preceding %q has a prefix %q of size %d; want %d", p[i+1:], v, q, c, count) + } + } + if !strings.HasPrefix(p, prefix) { + log.Fatalf("prefix %q of variant %q should start with %q", p, v, prefix) + } + } + } + + // Sort extended variants. + a := specializedExtend.s + less := func(v, w string) bool { + // Sort by the maximum number of elements. + maxCount := func(s string) (max int) { + for _, p := range b.registry[s].prefix { + if c := strings.Count(p, "-"); c > max { + max = c + } + } + return + } + if cv, cw := maxCount(v), maxCount(w); cv != cw { + return cv < cw + } + // Sort by name as tie breaker. + return v < w + } + sort.Sort(funcSorter{less, sort.StringSlice(a)}) + specializedExtend.frozen = true + + // Create index from variant name to index. + variantIndex := make(map[string]uint8) + add := func(s []string) { + for _, v := range s { + variantIndex[v] = uint8(len(variantIndex)) + } + } + add(specialized.slice()) + add(specializedExtend.s) + numSpecialized := len(variantIndex) + add(generalized.slice()) + if n := len(variantIndex); n > 255 { + log.Fatalf("maximum number of variants exceeded: was %d; want <= 255", n) + } + b.writeMap("variantIndex", variantIndex) + b.writeConst("variantNumSpecialized", numSpecialized) +} + +func (b *builder) writeLanguageInfo() { +} + +// writeLikelyData writes tables that are used both for finding parent relations and for +// language matching. Each entry contains additional bits to indicate the status of the +// data to know when it cannot be used for parent relations. +func (b *builder) writeLikelyData() { + const ( + isList = 1 << iota + scriptInFrom + regionInFrom + ) + type ( // generated types + likelyScriptRegion struct { + region uint16 + script uint8 + flags uint8 + } + likelyLangScript struct { + lang uint16 + script uint8 + flags uint8 + } + likelyLangRegion struct { + lang uint16 + region uint16 + } + // likelyTag is used for getting likely tags for group regions, where + // the likely region might be a region contained in the group. + likelyTag struct { + lang uint16 + region uint16 + script uint8 + } + ) + var ( // generated variables + likelyRegionGroup = make([]likelyTag, len(b.groups)) + likelyLang = make([]likelyScriptRegion, len(b.lang.s)) + likelyRegion = make([]likelyLangScript, len(b.region.s)) + likelyScript = make([]likelyLangRegion, len(b.script.s)) + likelyLangList = []likelyScriptRegion{} + likelyRegionList = []likelyLangScript{} + ) + type fromTo struct { + from, to []string + } + langToOther := map[int][]fromTo{} + regionToOther := map[int][]fromTo{} + for _, m := range b.supp.LikelySubtags.LikelySubtag { + from := strings.Split(m.From, "_") + to := strings.Split(m.To, "_") + if len(to) != 3 { + log.Fatalf("invalid number of subtags in %q: found %d, want 3", m.To, len(to)) + } + if len(from) > 3 { + log.Fatalf("invalid number of subtags: found %d, want 1-3", len(from)) + } + if from[0] != to[0] && from[0] != "und" { + log.Fatalf("unexpected language change in expansion: %s -> %s", from, to) + } + if len(from) == 3 { + if from[2] != to[2] { + log.Fatalf("unexpected region change in expansion: %s -> %s", from, to) + } + if from[0] != "und" { + log.Fatalf("unexpected fully specified from tag: %s -> %s", from, to) + } + } + if len(from) == 1 || from[0] != "und" { + id := 0 + if from[0] != "und" { + id = b.lang.index(from[0]) + } + langToOther[id] = append(langToOther[id], fromTo{from, to}) + } else if len(from) == 2 && len(from[1]) == 4 { + sid := b.script.index(from[1]) + likelyScript[sid].lang = uint16(b.langIndex(to[0])) + likelyScript[sid].region = uint16(b.region.index(to[2])) + } else { + r := b.region.index(from[len(from)-1]) + if id, ok := b.groups[r]; ok { + if from[0] != "und" { + log.Fatalf("region changed unexpectedly: %s -> %s", from, to) + } + likelyRegionGroup[id].lang = uint16(b.langIndex(to[0])) + likelyRegionGroup[id].script = uint8(b.script.index(to[1])) + likelyRegionGroup[id].region = uint16(b.region.index(to[2])) + } else { + regionToOther[r] = append(regionToOther[r], fromTo{from, to}) + } + } + } + b.writeType(likelyLangRegion{}) + b.writeSlice("likelyScript", likelyScript) + + for id := range b.lang.s { + list := langToOther[id] + if len(list) == 1 { + likelyLang[id].region = uint16(b.region.index(list[0].to[2])) + likelyLang[id].script = uint8(b.script.index(list[0].to[1])) + } else if len(list) > 1 { + likelyLang[id].flags = isList + likelyLang[id].region = uint16(len(likelyLangList)) + likelyLang[id].script = uint8(len(list)) + for _, x := range list { + flags := uint8(0) + if len(x.from) > 1 { + if x.from[1] == x.to[2] { + flags = regionInFrom + } else { + flags = scriptInFrom + } + } + likelyLangList = append(likelyLangList, likelyScriptRegion{ + region: uint16(b.region.index(x.to[2])), + script: uint8(b.script.index(x.to[1])), + flags: flags, + }) + } + } + } + // TODO: merge suppressScript data with this table. + b.writeType(likelyScriptRegion{}) + b.writeSlice("likelyLang", likelyLang) + b.writeSlice("likelyLangList", likelyLangList) + + for id := range b.region.s { + list := regionToOther[id] + if len(list) == 1 { + likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0])) + likelyRegion[id].script = uint8(b.script.index(list[0].to[1])) + if len(list[0].from) > 2 { + likelyRegion[id].flags = scriptInFrom + } + } else if len(list) > 1 { + likelyRegion[id].flags = isList + likelyRegion[id].lang = uint16(len(likelyRegionList)) + likelyRegion[id].script = uint8(len(list)) + for i, x := range list { + if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 { + log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i) + } + x := likelyLangScript{ + lang: uint16(b.langIndex(x.to[0])), + script: uint8(b.script.index(x.to[1])), + } + if len(list[0].from) > 2 { + x.flags = scriptInFrom + } + likelyRegionList = append(likelyRegionList, x) + } + } + } + b.writeType(likelyLangScript{}) + b.writeSlice("likelyRegion", likelyRegion) + b.writeSlice("likelyRegionList", likelyRegionList) + + b.writeType(likelyTag{}) + b.writeSlice("likelyRegionGroup", likelyRegionGroup) +} + +type mutualIntelligibility struct { + want, have uint16 + distance uint8 + oneway bool +} + +type scriptIntelligibility struct { + wantLang, haveLang uint16 + wantScript, haveScript uint8 + distance uint8 + // Always oneway +} + +type regionIntelligibility struct { + lang uint16 // compact language id + script uint8 // 0 means any + group uint8 // 0 means any; if bit 7 is set it means inverse + distance uint8 + // Always twoway. +} + +// writeMatchData writes tables with languages and scripts for which there is +// mutual intelligibility. The data is based on CLDR's languageMatching data. +// Note that we use a different algorithm than the one defined by CLDR and that +// we slightly modify the data. For example, we convert scores to confidence levels. +// We also drop all region-related data as we use a different algorithm to +// determine region equivalence. +func (b *builder) writeMatchData() { + lm := b.supp.LanguageMatching.LanguageMatches + cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new") + + regionHierarchy := map[string][]string{} + for _, g := range b.supp.TerritoryContainment.Group { + regions := strings.Split(g.Contains, " ") + regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...) + } + regionToGroups := make([]uint8, len(b.region.s)) + + idToIndex := map[string]uint8{} + for i, mv := range lm[0].MatchVariable { + if i > 6 { + log.Fatalf("Too many groups: %d", i) + } + idToIndex[mv.Id] = uint8(i + 1) + // TODO: also handle '-' + for _, r := range strings.Split(mv.Value, "+") { + todo := []string{r} + for k := 0; k < len(todo); k++ { + r := todo[k] + regionToGroups[b.region.index(r)] |= 1 << uint8(i) + todo = append(todo, regionHierarchy[r]...) + } + } + } + b.writeSlice("regionToGroups", regionToGroups) + + b.writeType(mutualIntelligibility{}) + b.writeType(scriptIntelligibility{}) + b.writeType(regionIntelligibility{}) + + matchLang := []mutualIntelligibility{{ + // TODO: remove once CLDR is fixed. + want: uint16(b.langIndex("sr")), + have: uint16(b.langIndex("hr")), + distance: uint8(5), + }, { + want: uint16(b.langIndex("sr")), + have: uint16(b.langIndex("bs")), + distance: uint8(5), + }} + matchScript := []scriptIntelligibility{} + matchRegion := []regionIntelligibility{} + // Convert the languageMatch entries in lists keyed by desired language. + for _, m := range lm[0].LanguageMatch { + // Different versions of CLDR use different separators. + desired := strings.Replace(m.Desired, "-", "_", -1) + supported := strings.Replace(m.Supported, "-", "_", -1) + d := strings.Split(desired, "_") + s := strings.Split(supported, "_") + if len(d) != len(s) { + log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) + continue + } + distance, _ := strconv.ParseInt(m.Distance, 10, 8) + switch len(d) { + case 2: + if desired == supported && desired == "*_*" { + continue + } + // language-script pair. + matchScript = append(matchScript, scriptIntelligibility{ + wantLang: uint16(b.langIndex(d[0])), + haveLang: uint16(b.langIndex(s[0])), + wantScript: uint8(b.script.index(d[1])), + haveScript: uint8(b.script.index(s[1])), + distance: uint8(distance), + }) + if m.Oneway != "true" { + matchScript = append(matchScript, scriptIntelligibility{ + wantLang: uint16(b.langIndex(s[0])), + haveLang: uint16(b.langIndex(d[0])), + wantScript: uint8(b.script.index(s[1])), + haveScript: uint8(b.script.index(d[1])), + distance: uint8(distance), + }) + } + case 1: + if desired == supported && desired == "*" { + continue + } + if distance == 1 { + // nb == no is already handled by macro mapping. Check there + // really is only this case. + if d[0] != "no" || s[0] != "nb" { + log.Fatalf("unhandled equivalence %s == %s", s[0], d[0]) + } + continue + } + // TODO: consider dropping oneway field and just doubling the entry. + matchLang = append(matchLang, mutualIntelligibility{ + want: uint16(b.langIndex(d[0])), + have: uint16(b.langIndex(s[0])), + distance: uint8(distance), + oneway: m.Oneway == "true", + }) + case 3: + if desired == supported && desired == "*_*_*" { + continue + } + if desired != supported { // (Weird but correct.) + log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) + continue + } + ri := regionIntelligibility{ + lang: b.langIndex(d[0]), + distance: uint8(distance), + } + if d[1] != "*" { + ri.script = uint8(b.script.index(d[1])) + } + switch { + case d[2] == "*": + ri.group = 0x80 // not contained in anything + case strings.HasPrefix(d[2], "$!"): + ri.group = 0x80 + d[2] = "$" + d[2][len("$!"):] + fallthrough + case strings.HasPrefix(d[2], "$"): + ri.group |= idToIndex[d[2]] + } + matchRegion = append(matchRegion, ri) + default: + log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) + } + } + sort.SliceStable(matchLang, func(i, j int) bool { + return matchLang[i].distance < matchLang[j].distance + }) + b.writeSlice("matchLang", matchLang) + + sort.SliceStable(matchScript, func(i, j int) bool { + return matchScript[i].distance < matchScript[j].distance + }) + b.writeSlice("matchScript", matchScript) + + sort.SliceStable(matchRegion, func(i, j int) bool { + return matchRegion[i].distance < matchRegion[j].distance + }) + b.writeSlice("matchRegion", matchRegion) +} + +func (b *builder) writeRegionInclusionData() { + var ( + // mm holds for each group the set of groups with a distance of 1. + mm = make(map[int][]index) + + // containment holds for each group the transitive closure of + // containment of other groups. + containment = make(map[index][]index) + ) + for _, g := range b.supp.TerritoryContainment.Group { + // Skip UN and EURO zone as they are flattening the containment + // relationship. + if g.Type == "EZ" || g.Type == "UN" { + continue + } + group := b.region.index(g.Type) + groupIdx := b.groups[group] + for _, mem := range strings.Split(g.Contains, " ") { + r := b.region.index(mem) + mm[r] = append(mm[r], groupIdx) + if g, ok := b.groups[r]; ok { + mm[group] = append(mm[group], g) + containment[groupIdx] = append(containment[groupIdx], g) + } + } + } + + regionContainment := make([]uint32, len(b.groups)) + for _, g := range b.groups { + l := containment[g] + + // Compute the transitive closure of containment. + for i := 0; i < len(l); i++ { + l = append(l, containment[l[i]]...) + } + + // Compute the bitmask. + regionContainment[g] = 1 << g + for _, v := range l { + regionContainment[g] |= 1 << v + } + } + b.writeSlice("regionContainment", regionContainment) + + regionInclusion := make([]uint8, len(b.region.s)) + bvs := make(map[uint32]index) + // Make the first bitvector positions correspond with the groups. + for r, i := range b.groups { + bv := uint32(1 << i) + for _, g := range mm[r] { + bv |= 1 << g + } + bvs[bv] = i + regionInclusion[r] = uint8(bvs[bv]) + } + for r := 1; r < len(b.region.s); r++ { + if _, ok := b.groups[r]; !ok { + bv := uint32(0) + for _, g := range mm[r] { + bv |= 1 << g + } + if bv == 0 { + // Pick the world for unspecified regions. + bv = 1 << b.groups[b.region.index("001")] + } + if _, ok := bvs[bv]; !ok { + bvs[bv] = index(len(bvs)) + } + regionInclusion[r] = uint8(bvs[bv]) + } + } + b.writeSlice("regionInclusion", regionInclusion) + regionInclusionBits := make([]uint32, len(bvs)) + for k, v := range bvs { + regionInclusionBits[v] = uint32(k) + } + // Add bit vectors for increasingly large distances until a fixed point is reached. + regionInclusionNext := []uint8{} + for i := 0; i < len(regionInclusionBits); i++ { + bits := regionInclusionBits[i] + next := bits + for i := uint(0); i < uint(len(b.groups)); i++ { + if bits&(1<<i) != 0 { + next |= regionInclusionBits[i] + } + } + if _, ok := bvs[next]; !ok { + bvs[next] = index(len(bvs)) + regionInclusionBits = append(regionInclusionBits, next) + } + regionInclusionNext = append(regionInclusionNext, uint8(bvs[next])) + } + b.writeSlice("regionInclusionBits", regionInclusionBits) + b.writeSlice("regionInclusionNext", regionInclusionNext) +} + +type parentRel struct { + lang uint16 + script uint8 + maxScript uint8 + toRegion uint16 + fromRegion []uint16 +} + +func (b *builder) writeParents() { + b.writeType(parentRel{}) + + parents := []parentRel{} + + // Construct parent overrides. + n := 0 + for _, p := range b.data.Supplemental().ParentLocales.ParentLocale { + // Skipping non-standard scripts to root is implemented using addTags. + if p.Parent == "root" { + continue + } + + sub := strings.Split(p.Parent, "_") + parent := parentRel{lang: b.langIndex(sub[0])} + if len(sub) == 2 { + // TODO: check that all undefined scripts are indeed Latn in these + // cases. + parent.maxScript = uint8(b.script.index("Latn")) + parent.toRegion = uint16(b.region.index(sub[1])) + } else { + parent.script = uint8(b.script.index(sub[1])) + parent.maxScript = parent.script + parent.toRegion = uint16(b.region.index(sub[2])) + } + for _, c := range strings.Split(p.Locales, " ") { + region := b.region.index(c[strings.LastIndex(c, "_")+1:]) + parent.fromRegion = append(parent.fromRegion, uint16(region)) + } + parents = append(parents, parent) + n += len(parent.fromRegion) + } + b.writeSliceAddSize("parents", n*2, parents) +} + +func main() { + gen.Init() + + gen.Repackage("gen_common.go", "common.go", "language") + + w := gen.NewCodeWriter() + defer w.WriteGoFile("tables.go", "language") + + fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`) + + b := newBuilder(w) + gen.WriteCLDRVersion(w) + + b.parseIndices() + b.writeType(fromTo{}) + b.writeLanguage() + b.writeScript() + b.writeRegion() + b.writeVariant() + // TODO: b.writeLocale() + b.computeRegionGroups() + b.writeLikelyData() + b.writeMatchData() + b.writeRegionInclusionData() + b.writeParents() +} diff --git a/vendor/golang.org/x/text/unicode/cldr/base.go b/vendor/golang.org/x/text/unicode/cldr/base.go new file mode 100644 index 0000000000..2382f4d6da --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/base.go @@ -0,0 +1,100 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +import ( + "encoding/xml" + "regexp" + "strconv" +) + +// Elem is implemented by every XML element. +type Elem interface { + setEnclosing(Elem) + setName(string) + enclosing() Elem + + GetCommon() *Common +} + +type hidden struct { + CharData string `xml:",chardata"` + Alias *struct { + Common + Source string `xml:"source,attr"` + Path string `xml:"path,attr"` + } `xml:"alias"` + Def *struct { + Common + Choice string `xml:"choice,attr,omitempty"` + Type string `xml:"type,attr,omitempty"` + } `xml:"default"` +} + +// Common holds several of the most common attributes and sub elements +// of an XML element. +type Common struct { + XMLName xml.Name + name string + enclElem Elem + Type string `xml:"type,attr,omitempty"` + Reference string `xml:"reference,attr,omitempty"` + Alt string `xml:"alt,attr,omitempty"` + ValidSubLocales string `xml:"validSubLocales,attr,omitempty"` + Draft string `xml:"draft,attr,omitempty"` + hidden +} + +// Default returns the default type to select from the enclosed list +// or "" if no default value is specified. +func (e *Common) Default() string { + if e.Def == nil { + return "" + } + if e.Def.Choice != "" { + return e.Def.Choice + } else if e.Def.Type != "" { + // Type is still used by the default element in collation. + return e.Def.Type + } + return "" +} + +// GetCommon returns e. It is provided such that Common implements Elem. +func (e *Common) GetCommon() *Common { + return e +} + +// Data returns the character data accumulated for this element. +func (e *Common) Data() string { + e.CharData = charRe.ReplaceAllStringFunc(e.CharData, replaceUnicode) + return e.CharData +} + +func (e *Common) setName(s string) { + e.name = s +} + +func (e *Common) enclosing() Elem { + return e.enclElem +} + +func (e *Common) setEnclosing(en Elem) { + e.enclElem = en +} + +// Escape characters that can be escaped without further escaping the string. +var charRe = regexp.MustCompile(`&#x[0-9a-fA-F]*;|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\[abtnvfr]`) + +// replaceUnicode converts hexadecimal Unicode codepoint notations to a one-rune string. +// It assumes the input string is correctly formatted. +func replaceUnicode(s string) string { + if s[1] == '#' { + r, _ := strconv.ParseInt(s[3:len(s)-1], 16, 32) + return string(r) + } + r, _, _, _ := strconv.UnquoteChar(s, 0) + return string(r) +} diff --git a/vendor/golang.org/x/text/unicode/cldr/cldr.go b/vendor/golang.org/x/text/unicode/cldr/cldr.go new file mode 100644 index 0000000000..2197f8ac26 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/cldr.go @@ -0,0 +1,130 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run makexml.go -output xml.go + +// Package cldr provides a parser for LDML and related XML formats. +// This package is intended to be used by the table generation tools +// for the various internationalization-related packages. +// As the XML types are generated from the CLDR DTD, and as the CLDR standard +// is periodically amended, this package may change considerably over time. +// This mostly means that data may appear and disappear between versions. +// That is, old code should keep compiling for newer versions, but data +// may have moved or changed. +// CLDR version 22 is the first version supported by this package. +// Older versions may not work. +package cldr // import "golang.org/x/text/unicode/cldr" + +import ( + "fmt" + "sort" +) + +// CLDR provides access to parsed data of the Unicode Common Locale Data Repository. +type CLDR struct { + parent map[string][]string + locale map[string]*LDML + resolved map[string]*LDML + bcp47 *LDMLBCP47 + supp *SupplementalData +} + +func makeCLDR() *CLDR { + return &CLDR{ + parent: make(map[string][]string), + locale: make(map[string]*LDML), + resolved: make(map[string]*LDML), + bcp47: &LDMLBCP47{}, + supp: &SupplementalData{}, + } +} + +// BCP47 returns the parsed BCP47 LDML data. If no such data was parsed, nil is returned. +func (cldr *CLDR) BCP47() *LDMLBCP47 { + return nil +} + +// Draft indicates the draft level of an element. +type Draft int + +const ( + Approved Draft = iota + Contributed + Provisional + Unconfirmed +) + +var drafts = []string{"unconfirmed", "provisional", "contributed", "approved", ""} + +// ParseDraft returns the Draft value corresponding to the given string. The +// empty string corresponds to Approved. +func ParseDraft(level string) (Draft, error) { + if level == "" { + return Approved, nil + } + for i, s := range drafts { + if level == s { + return Unconfirmed - Draft(i), nil + } + } + return Approved, fmt.Errorf("cldr: unknown draft level %q", level) +} + +func (d Draft) String() string { + return drafts[len(drafts)-1-int(d)] +} + +// SetDraftLevel sets which draft levels to include in the evaluated LDML. +// Any draft element for which the draft level is higher than lev will be excluded. +// If multiple draft levels are available for a single element, the one with the +// lowest draft level will be selected, unless preferDraft is true, in which case +// the highest draft will be chosen. +// It is assumed that the underlying LDML is canonicalized. +func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool) { + // TODO: implement + cldr.resolved = make(map[string]*LDML) +} + +// RawLDML returns the LDML XML for id in unresolved form. +// id must be one of the strings returned by Locales. +func (cldr *CLDR) RawLDML(loc string) *LDML { + return cldr.locale[loc] +} + +// LDML returns the fully resolved LDML XML for loc, which must be one of +// the strings returned by Locales. +func (cldr *CLDR) LDML(loc string) (*LDML, error) { + return cldr.resolve(loc) +} + +// Supplemental returns the parsed supplemental data. If no such data was parsed, +// nil is returned. +func (cldr *CLDR) Supplemental() *SupplementalData { + return cldr.supp +} + +// Locales returns the locales for which there exist files. +// Valid sublocales for which there is no file are not included. +// The root locale is always sorted first. +func (cldr *CLDR) Locales() []string { + loc := []string{"root"} + hasRoot := false + for l, _ := range cldr.locale { + if l == "root" { + hasRoot = true + continue + } + loc = append(loc, l) + } + sort.Strings(loc[1:]) + if !hasRoot { + return loc[1:] + } + return loc +} + +// Get fills in the fields of x based on the XPath path. +func Get(e Elem, path string) (res Elem, err error) { + return walkXPath(e, path) +} diff --git a/vendor/golang.org/x/text/unicode/cldr/collate.go b/vendor/golang.org/x/text/unicode/cldr/collate.go new file mode 100644 index 0000000000..80ee28d795 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/collate.go @@ -0,0 +1,359 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +import ( + "bufio" + "encoding/xml" + "errors" + "fmt" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// RuleProcessor can be passed to Collator's Process method, which +// parses the rules and calls the respective method for each rule found. +type RuleProcessor interface { + Reset(anchor string, before int) error + Insert(level int, str, context, extend string) error + Index(id string) +} + +const ( + // cldrIndex is a Unicode-reserved sentinel value used to mark the start + // of a grouping within an index. + // We ignore any rule that starts with this rune. + // See http://unicode.org/reports/tr35/#Collation_Elements for details. + cldrIndex = "\uFDD0" + + // specialAnchor is the format in which to represent logical reset positions, + // such as "first tertiary ignorable". + specialAnchor = "<%s/>" +) + +// Process parses the rules for the tailorings of this collation +// and calls the respective methods of p for each rule found. +func (c Collation) Process(p RuleProcessor) (err error) { + if len(c.Cr) > 0 { + if len(c.Cr) > 1 { + return fmt.Errorf("multiple cr elements, want 0 or 1") + } + return processRules(p, c.Cr[0].Data()) + } + if c.Rules.Any != nil { + return c.processXML(p) + } + return errors.New("no tailoring data") +} + +// processRules parses rules in the Collation Rule Syntax defined in +// http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings. +func processRules(p RuleProcessor, s string) (err error) { + chk := func(s string, e error) string { + if err == nil { + err = e + } + return s + } + i := 0 // Save the line number for use after the loop. + scanner := bufio.NewScanner(strings.NewReader(s)) + for ; scanner.Scan() && err == nil; i++ { + for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) { + level := 5 + var ch byte + switch ch, s = s[0], s[1:]; ch { + case '&': // followed by <anchor> or '[' <key> ']' + if s = skipSpace(s); consume(&s, '[') { + s = chk(parseSpecialAnchor(p, s)) + } else { + s = chk(parseAnchor(p, 0, s)) + } + case '<': // sort relation '<'{1,4}, optionally followed by '*'. + for level = 1; consume(&s, '<'); level++ { + } + if level > 4 { + err = fmt.Errorf("level %d > 4", level) + } + fallthrough + case '=': // identity relation, optionally followed by *. + if consume(&s, '*') { + s = chk(parseSequence(p, level, s)) + } else { + s = chk(parseOrder(p, level, s)) + } + default: + chk("", fmt.Errorf("illegal operator %q", ch)) + break + } + } + } + if chk("", scanner.Err()); err != nil { + return fmt.Errorf("%d: %v", i, err) + } + return nil +} + +// parseSpecialAnchor parses the anchor syntax which is either of the form +// ['before' <level>] <anchor> +// or +// [<label>] +// The starting should already be consumed. +func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) { + i := strings.IndexByte(s, ']') + if i == -1 { + return "", errors.New("unmatched bracket") + } + a := strings.TrimSpace(s[:i]) + s = s[i+1:] + if strings.HasPrefix(a, "before ") { + l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3) + if err != nil { + return s, err + } + return parseAnchor(p, int(l), s) + } + return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0) +} + +func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) { + anchor, s, err := scanString(s) + if err != nil { + return s, err + } + return s, p.Reset(anchor, level) +} + +func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) { + var value, context, extend string + if value, s, err = scanString(s); err != nil { + return s, err + } + if strings.HasPrefix(value, cldrIndex) { + p.Index(value[len(cldrIndex):]) + return + } + if consume(&s, '|') { + if context, s, err = scanString(s); err != nil { + return s, errors.New("missing string after context") + } + } + if consume(&s, '/') { + if extend, s, err = scanString(s); err != nil { + return s, errors.New("missing string after extension") + } + } + return s, p.Insert(level, value, context, extend) +} + +// scanString scans a single input string. +func scanString(s string) (str, tail string, err error) { + if s = skipSpace(s); s == "" { + return s, s, errors.New("missing string") + } + buf := [16]byte{} // small but enough to hold most cases. + value := buf[:0] + for s != "" { + if consume(&s, '\'') { + i := strings.IndexByte(s, '\'') + if i == -1 { + return "", "", errors.New(`unmatched single quote`) + } + if i == 0 { + value = append(value, '\'') + } else { + value = append(value, s[:i]...) + } + s = s[i+1:] + continue + } + r, sz := utf8.DecodeRuneInString(s) + if unicode.IsSpace(r) || strings.ContainsRune("&<=#", r) { + break + } + value = append(value, s[:sz]...) + s = s[sz:] + } + return string(value), skipSpace(s), nil +} + +func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) { + if s = skipSpace(s); s == "" { + return s, errors.New("empty sequence") + } + last := rune(0) + for s != "" { + r, sz := utf8.DecodeRuneInString(s) + s = s[sz:] + + if r == '-' { + // We have a range. The first element was already written. + if last == 0 { + return s, errors.New("range without starter value") + } + r, sz = utf8.DecodeRuneInString(s) + s = s[sz:] + if r == utf8.RuneError || r < last { + return s, fmt.Errorf("invalid range %q-%q", last, r) + } + for i := last + 1; i <= r; i++ { + if err := p.Insert(level, string(i), "", ""); err != nil { + return s, err + } + } + last = 0 + continue + } + + if unicode.IsSpace(r) || unicode.IsPunct(r) { + break + } + + // normal case + if err := p.Insert(level, string(r), "", ""); err != nil { + return s, err + } + last = r + } + return s, nil +} + +func skipSpace(s string) string { + return strings.TrimLeftFunc(s, unicode.IsSpace) +} + +// consumes returns whether the next byte is ch. If so, it gobbles it by +// updating s. +func consume(s *string, ch byte) (ok bool) { + if *s == "" || (*s)[0] != ch { + return false + } + *s = (*s)[1:] + return true +} + +// The following code parses Collation rules of CLDR version 24 and before. + +var lmap = map[byte]int{ + 'p': 1, + 's': 2, + 't': 3, + 'i': 5, +} + +type rulesElem struct { + Rules struct { + Common + Any []*struct { + XMLName xml.Name + rule + } `xml:",any"` + } `xml:"rules"` +} + +type rule struct { + Value string `xml:",chardata"` + Before string `xml:"before,attr"` + Any []*struct { + XMLName xml.Name + rule + } `xml:",any"` +} + +var emptyValueError = errors.New("cldr: empty rule value") + +func (r *rule) value() (string, error) { + // Convert hexadecimal Unicode codepoint notation to a string. + s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode) + r.Value = s + if s == "" { + if len(r.Any) != 1 { + return "", emptyValueError + } + r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local) + r.Any = nil + } else if len(r.Any) != 0 { + return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any) + } + return r.Value, nil +} + +func (r rule) process(p RuleProcessor, name, context, extend string) error { + v, err := r.value() + if err != nil { + return err + } + switch name { + case "p", "s", "t", "i": + if strings.HasPrefix(v, cldrIndex) { + p.Index(v[len(cldrIndex):]) + return nil + } + if err := p.Insert(lmap[name[0]], v, context, extend); err != nil { + return err + } + case "pc", "sc", "tc", "ic": + level := lmap[name[0]] + for _, s := range v { + if err := p.Insert(level, string(s), context, extend); err != nil { + return err + } + } + default: + return fmt.Errorf("cldr: unsupported tag: %q", name) + } + return nil +} + +// processXML parses the format of CLDR versions 24 and older. +func (c Collation) processXML(p RuleProcessor) (err error) { + // Collation is generated and defined in xml.go. + var v string + for _, r := range c.Rules.Any { + switch r.XMLName.Local { + case "reset": + level := 0 + switch r.Before { + case "primary", "1": + level = 1 + case "secondary", "2": + level = 2 + case "tertiary", "3": + level = 3 + case "": + default: + return fmt.Errorf("cldr: unknown level %q", r.Before) + } + v, err = r.value() + if err == nil { + err = p.Reset(v, level) + } + case "x": + var context, extend string + for _, r1 := range r.Any { + v, err = r1.value() + switch r1.XMLName.Local { + case "context": + context = v + case "extend": + extend = v + } + } + for _, r1 := range r.Any { + if t := r1.XMLName.Local; t == "context" || t == "extend" { + continue + } + r1.rule.process(p, r1.XMLName.Local, context, extend) + } + default: + err = r.rule.process(p, r.XMLName.Local, "", "") + } + if err != nil { + return err + } + } + return nil +} diff --git a/vendor/golang.org/x/text/unicode/cldr/decode.go b/vendor/golang.org/x/text/unicode/cldr/decode.go new file mode 100644 index 0000000000..e5ee4aed18 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/decode.go @@ -0,0 +1,171 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +import ( + "archive/zip" + "bytes" + "encoding/xml" + "fmt" + "io" + "io/ioutil" + "log" + "os" + "path/filepath" + "regexp" +) + +// A Decoder loads an archive of CLDR data. +type Decoder struct { + dirFilter []string + sectionFilter []string + loader Loader + cldr *CLDR + curLocale string +} + +// SetSectionFilter takes a list top-level LDML element names to which +// evaluation of LDML should be limited. It automatically calls SetDirFilter. +func (d *Decoder) SetSectionFilter(filter ...string) { + d.sectionFilter = filter + // TODO: automatically set dir filter +} + +// SetDirFilter limits the loading of LDML XML files of the specied directories. +// Note that sections may be split across directories differently for different CLDR versions. +// For more robust code, use SetSectionFilter. +func (d *Decoder) SetDirFilter(dir ...string) { + d.dirFilter = dir +} + +// A Loader provides access to the files of a CLDR archive. +type Loader interface { + Len() int + Path(i int) string + Reader(i int) (io.ReadCloser, error) +} + +var fileRe = regexp.MustCompile(".*/(.*)/(.*)\\.xml") + +// Decode loads and decodes the files represented by l. +func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error) { + d.cldr = makeCLDR() + for i := 0; i < l.Len(); i++ { + fname := l.Path(i) + if m := fileRe.FindStringSubmatch(fname); m != nil { + if len(d.dirFilter) > 0 && !in(d.dirFilter, m[1]) { + continue + } + var r io.Reader + if r, err = l.Reader(i); err == nil { + err = d.decode(m[1], m[2], r) + } + if err != nil { + return nil, err + } + } + } + d.cldr.finalize(d.sectionFilter) + return d.cldr, nil +} + +func (d *Decoder) decode(dir, id string, r io.Reader) error { + var v interface{} + var l *LDML + cldr := d.cldr + switch { + case dir == "supplemental": + v = cldr.supp + case dir == "transforms": + return nil + case dir == "bcp47": + v = cldr.bcp47 + case dir == "validity": + return nil + default: + ok := false + if v, ok = cldr.locale[id]; !ok { + l = &LDML{} + v, cldr.locale[id] = l, l + } + } + x := xml.NewDecoder(r) + if err := x.Decode(v); err != nil { + log.Printf("%s/%s: %v", dir, id, err) + return err + } + if l != nil { + if l.Identity == nil { + return fmt.Errorf("%s/%s: missing identity element", dir, id) + } + // TODO: verify when CLDR bug http://unicode.org/cldr/trac/ticket/8970 + // is resolved. + // path := strings.Split(id, "_") + // if lang := l.Identity.Language.Type; lang != path[0] { + // return fmt.Errorf("%s/%s: language was %s; want %s", dir, id, lang, path[0]) + // } + } + return nil +} + +type pathLoader []string + +func makePathLoader(path string) (pl pathLoader, err error) { + err = filepath.Walk(path, func(path string, _ os.FileInfo, err error) error { + pl = append(pl, path) + return err + }) + return pl, err +} + +func (pl pathLoader) Len() int { + return len(pl) +} + +func (pl pathLoader) Path(i int) string { + return pl[i] +} + +func (pl pathLoader) Reader(i int) (io.ReadCloser, error) { + return os.Open(pl[i]) +} + +// DecodePath loads CLDR data from the given path. +func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error) { + loader, err := makePathLoader(path) + if err != nil { + return nil, err + } + return d.Decode(loader) +} + +type zipLoader struct { + r *zip.Reader +} + +func (zl zipLoader) Len() int { + return len(zl.r.File) +} + +func (zl zipLoader) Path(i int) string { + return zl.r.File[i].Name +} + +func (zl zipLoader) Reader(i int) (io.ReadCloser, error) { + return zl.r.File[i].Open() +} + +// DecodeZip loads CLDR data from the zip archive for which r is the source. +func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error) { + buffer, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) + if err != nil { + return nil, err + } + return d.Decode(zipLoader{archive}) +} diff --git a/vendor/golang.org/x/text/unicode/cldr/makexml.go b/vendor/golang.org/x/text/unicode/cldr/makexml.go new file mode 100644 index 0000000000..6114d01cbc --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/makexml.go @@ -0,0 +1,400 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// This tool generates types for the various XML formats of CLDR. +package main + +import ( + "archive/zip" + "bytes" + "encoding/xml" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "os" + "regexp" + "strings" + + "golang.org/x/text/internal/gen" +) + +var outputFile = flag.String("output", "xml.go", "output file name") + +func main() { + flag.Parse() + + r := gen.OpenCLDRCoreZip() + buffer, err := ioutil.ReadAll(r) + if err != nil { + log.Fatal("Could not read zip file") + } + r.Close() + z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) + if err != nil { + log.Fatalf("Could not read zip archive: %v", err) + } + + var buf bytes.Buffer + + version := gen.CLDRVersion() + + for _, dtd := range files { + for _, f := range z.File { + if strings.HasSuffix(f.Name, dtd.file+".dtd") { + r, err := f.Open() + failOnError(err) + + b := makeBuilder(&buf, dtd) + b.parseDTD(r) + b.resolve(b.index[dtd.top[0]]) + b.write() + if b.version != "" && version != b.version { + println(f.Name) + log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version) + } + break + } + } + } + fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.") + fmt.Fprintf(&buf, "const Version = %q\n", version) + + gen.WriteGoFile(*outputFile, "cldr", buf.Bytes()) +} + +func failOnError(err error) { + if err != nil { + log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error()) + os.Exit(1) + } +} + +// configuration data per DTD type +type dtd struct { + file string // base file name + root string // Go name of the root XML element + top []string // create a different type for this section + + skipElem []string // hard-coded or deprecated elements + skipAttr []string // attributes to exclude + predefined []string // hard-coded elements exist of the form <name>Elem + forceRepeat []string // elements to make slices despite DTD +} + +var files = []dtd{ + { + file: "ldmlBCP47", + root: "LDMLBCP47", + top: []string{"ldmlBCP47"}, + skipElem: []string{ + "cldrVersion", // deprecated, not used + }, + }, + { + file: "ldmlSupplemental", + root: "SupplementalData", + top: []string{"supplementalData"}, + skipElem: []string{ + "cldrVersion", // deprecated, not used + }, + forceRepeat: []string{ + "plurals", // data defined in plurals.xml and ordinals.xml + }, + }, + { + file: "ldml", + root: "LDML", + top: []string{ + "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers", + }, + skipElem: []string{ + "cp", // not used anywhere + "special", // not used anywhere + "fallback", // deprecated, not used + "alias", // in Common + "default", // in Common + }, + skipAttr: []string{ + "hiraganaQuarternary", // typo in DTD, correct version included as well + }, + predefined: []string{"rules"}, + }, +} + +var comments = map[string]string{ + "ldmlBCP47": ` +// LDMLBCP47 holds information on allowable values for various variables in LDML. +`, + "supplementalData": ` +// SupplementalData holds information relevant for internationalization +// and proper use of CLDR, but that is not contained in the locale hierarchy. +`, + "ldml": ` +// LDML is the top-level type for locale-specific data. +`, + "collation": ` +// Collation contains rules that specify a certain sort-order, +// as a tailoring of the root order. +// The parsed rules are obtained by passing a RuleProcessor to Collation's +// Process method. +`, + "calendar": ` +// Calendar specifies the fields used for formatting and parsing dates and times. +// The month and quarter names are identified numerically, starting at 1. +// The day (of the week) names are identified with short strings, since there is +// no universally-accepted numeric designation. +`, + "dates": ` +// Dates contains information regarding the format and parsing of dates and times. +`, + "localeDisplayNames": ` +// LocaleDisplayNames specifies localized display names for for scripts, languages, +// countries, currencies, and variants. +`, + "numbers": ` +// Numbers supplies information for formatting and parsing numbers and currencies. +`, +} + +type element struct { + name string // XML element name + category string // elements contained by this element + signature string // category + attrKey* + + attr []*attribute // attributes supported by this element. + sub []struct { // parsed and evaluated sub elements of this element. + e *element + repeat bool // true if the element needs to be a slice + } + + resolved bool // prevent multiple resolutions of this element. +} + +type attribute struct { + name string + key string + list []string + + tag string // Go tag +} + +var ( + reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`) + reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`) + reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`) + reToken = regexp.MustCompile(`\w\-`) +) + +// builder is used to read in the DTD files from CLDR and generate Go code +// to be used with the encoding/xml package. +type builder struct { + w io.Writer + index map[string]*element + elem []*element + info dtd + version string +} + +func makeBuilder(w io.Writer, d dtd) builder { + return builder{ + w: w, + index: make(map[string]*element), + elem: []*element{}, + info: d, + } +} + +// parseDTD parses a DTD file. +func (b *builder) parseDTD(r io.Reader) { + for d := xml.NewDecoder(r); ; { + t, err := d.Token() + if t == nil { + break + } + failOnError(err) + dir, ok := t.(xml.Directive) + if !ok { + continue + } + m := reHead.FindSubmatch(dir) + dir = dir[len(m[0]):] + ename := string(m[2]) + el, elementFound := b.index[ename] + switch string(m[1]) { + case "ELEMENT": + if elementFound { + log.Fatal("parseDTD: duplicate entry for element %q", ename) + } + m := reElem.FindSubmatch(dir) + if m == nil { + log.Fatalf("parseDTD: invalid element %q", string(dir)) + } + if len(m[0]) != len(dir) { + log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0])) + } + s := string(m[1]) + el = &element{ + name: ename, + category: s, + } + b.index[ename] = el + case "ATTLIST": + if !elementFound { + log.Fatalf("parseDTD: unknown element %q", ename) + } + s := string(dir) + m := reAttr.FindStringSubmatch(s) + if m == nil { + log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir))) + } + if m[4] == "FIXED" { + b.version = m[5] + } else { + switch m[1] { + case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ : + case "type", "choice": + default: + el.attr = append(el.attr, &attribute{ + name: m[1], + key: s, + list: reToken.FindAllString(m[3], -1), + }) + el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2]) + } + } + } + } +} + +var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`) + +// resolve takes a parsed element and converts it into structured data +// that can be used to generate the XML code. +func (b *builder) resolve(e *element) { + if e.resolved { + return + } + b.elem = append(b.elem, e) + e.resolved = true + s := e.category + found := make(map[string]bool) + sequenceStart := []int{} + for len(s) > 0 { + m := reCat.FindStringSubmatch(s) + if m == nil { + log.Fatalf("%s: invalid category string %q", e.name, s) + } + repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1]) + switch m[1] { + case "": + case "(": + sequenceStart = append(sequenceStart, len(e.sub)) + case ")": + if len(sequenceStart) == 0 { + log.Fatalf("%s: unmatched closing parenthesis", e.name) + } + for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ { + e.sub[i].repeat = e.sub[i].repeat || repeat + } + sequenceStart = sequenceStart[:len(sequenceStart)-1] + default: + if in(b.info.skipElem, m[1]) { + } else if sub, ok := b.index[m[1]]; ok { + if !found[sub.name] { + e.sub = append(e.sub, struct { + e *element + repeat bool + }{sub, repeat}) + found[sub.name] = true + b.resolve(sub) + } + } else if m[1] == "#PCDATA" || m[1] == "ANY" { + } else if m[1] != "EMPTY" { + log.Fatalf("resolve:%s: element %q not found", e.name, m[1]) + } + } + s = s[len(m[0]):] + } +} + +// return true if s is contained in set. +func in(set []string, s string) bool { + for _, v := range set { + if v == s { + return true + } + } + return false +} + +var repl = strings.NewReplacer("-", " ", "_", " ") + +// title puts the first character or each character following '_' in title case and +// removes all occurrences of '_'. +func title(s string) string { + return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1) +} + +// writeElem generates Go code for a single element, recursively. +func (b *builder) writeElem(tab int, e *element) { + p := func(f string, x ...interface{}) { + f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1) + fmt.Fprintf(b.w, f, x...) + } + if len(e.sub) == 0 && len(e.attr) == 0 { + p("Common") + return + } + p("struct {") + tab++ + p("\nCommon") + for _, attr := range e.attr { + if !in(b.info.skipAttr, attr.name) { + p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name) + } + } + for _, sub := range e.sub { + if in(b.info.predefined, sub.e.name) { + p("\n%sElem", sub.e.name) + continue + } + if in(b.info.skipElem, sub.e.name) { + continue + } + p("\n%s ", title(sub.e.name)) + if sub.repeat { + p("[]") + } + p("*") + if in(b.info.top, sub.e.name) { + p(title(sub.e.name)) + } else { + b.writeElem(tab, sub.e) + } + p(" `xml:\"%s\"`", sub.e.name) + } + tab-- + p("\n}") +} + +// write generates the Go XML code. +func (b *builder) write() { + for i, name := range b.info.top { + e := b.index[name] + if e != nil { + fmt.Fprintf(b.w, comments[name]) + name := title(e.name) + if i == 0 { + name = b.info.root + } + fmt.Fprintf(b.w, "type %s ", name) + b.writeElem(0, e) + fmt.Fprint(b.w, "\n") + } + } +} diff --git a/vendor/golang.org/x/text/unicode/cldr/resolve.go b/vendor/golang.org/x/text/unicode/cldr/resolve.go new file mode 100644 index 0000000000..691b5903fe --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/resolve.go @@ -0,0 +1,602 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +// This file implements the various inheritance constructs defined by LDML. +// See http://www.unicode.org/reports/tr35/#Inheritance_and_Validity +// for more details. + +import ( + "fmt" + "log" + "reflect" + "regexp" + "sort" + "strings" +) + +// fieldIter iterates over fields in a struct. It includes +// fields of embedded structs. +type fieldIter struct { + v reflect.Value + index, n []int +} + +func iter(v reflect.Value) fieldIter { + if v.Kind() != reflect.Struct { + log.Panicf("value %v must be a struct", v) + } + i := fieldIter{ + v: v, + index: []int{0}, + n: []int{v.NumField()}, + } + i.descent() + return i +} + +func (i *fieldIter) descent() { + for f := i.field(); f.Anonymous && f.Type.NumField() > 0; f = i.field() { + i.index = append(i.index, 0) + i.n = append(i.n, f.Type.NumField()) + } +} + +func (i *fieldIter) done() bool { + return len(i.index) == 1 && i.index[0] >= i.n[0] +} + +func skip(f reflect.StructField) bool { + return !f.Anonymous && (f.Name[0] < 'A' || f.Name[0] > 'Z') +} + +func (i *fieldIter) next() { + for { + k := len(i.index) - 1 + i.index[k]++ + if i.index[k] < i.n[k] { + if !skip(i.field()) { + break + } + } else { + if k == 0 { + return + } + i.index = i.index[:k] + i.n = i.n[:k] + } + } + i.descent() +} + +func (i *fieldIter) value() reflect.Value { + return i.v.FieldByIndex(i.index) +} + +func (i *fieldIter) field() reflect.StructField { + return i.v.Type().FieldByIndex(i.index) +} + +type visitor func(v reflect.Value) error + +var stopDescent = fmt.Errorf("do not recurse") + +func (f visitor) visit(x interface{}) error { + return f.visitRec(reflect.ValueOf(x)) +} + +// visit recursively calls f on all nodes in v. +func (f visitor) visitRec(v reflect.Value) error { + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return nil + } + return f.visitRec(v.Elem()) + } + if err := f(v); err != nil { + if err == stopDescent { + return nil + } + return err + } + switch v.Kind() { + case reflect.Struct: + for i := iter(v); !i.done(); i.next() { + if err := f.visitRec(i.value()); err != nil { + return err + } + } + case reflect.Slice: + for i := 0; i < v.Len(); i++ { + if err := f.visitRec(v.Index(i)); err != nil { + return err + } + } + } + return nil +} + +// getPath is used for error reporting purposes only. +func getPath(e Elem) string { + if e == nil { + return "<nil>" + } + if e.enclosing() == nil { + return e.GetCommon().name + } + if e.GetCommon().Type == "" { + return fmt.Sprintf("%s.%s", getPath(e.enclosing()), e.GetCommon().name) + } + return fmt.Sprintf("%s.%s[type=%s]", getPath(e.enclosing()), e.GetCommon().name, e.GetCommon().Type) +} + +// xmlName returns the xml name of the element or attribute +func xmlName(f reflect.StructField) (name string, attr bool) { + tags := strings.Split(f.Tag.Get("xml"), ",") + for _, s := range tags { + attr = attr || s == "attr" + } + return tags[0], attr +} + +func findField(v reflect.Value, key string) (reflect.Value, error) { + v = reflect.Indirect(v) + for i := iter(v); !i.done(); i.next() { + if n, _ := xmlName(i.field()); n == key { + return i.value(), nil + } + } + return reflect.Value{}, fmt.Errorf("cldr: no field %q in element %#v", key, v.Interface()) +} + +var xpathPart = regexp.MustCompile(`(\pL+)(?:\[@(\pL+)='([\w-]+)'\])?`) + +func walkXPath(e Elem, path string) (res Elem, err error) { + for _, c := range strings.Split(path, "/") { + if c == ".." { + if e = e.enclosing(); e == nil { + panic("path ..") + return nil, fmt.Errorf(`cldr: ".." moves past root in path %q`, path) + } + continue + } else if c == "" { + continue + } + m := xpathPart.FindStringSubmatch(c) + if len(m) == 0 || len(m[0]) != len(c) { + return nil, fmt.Errorf("cldr: syntax error in path component %q", c) + } + v, err := findField(reflect.ValueOf(e), m[1]) + if err != nil { + return nil, err + } + switch v.Kind() { + case reflect.Slice: + i := 0 + if m[2] != "" || v.Len() > 1 { + if m[2] == "" { + m[2] = "type" + if m[3] = e.GetCommon().Default(); m[3] == "" { + return nil, fmt.Errorf("cldr: type selector or default value needed for element %s", m[1]) + } + } + for ; i < v.Len(); i++ { + vi := v.Index(i) + key, err := findField(vi.Elem(), m[2]) + if err != nil { + return nil, err + } + key = reflect.Indirect(key) + if key.Kind() == reflect.String && key.String() == m[3] { + break + } + } + } + if i == v.Len() || v.Index(i).IsNil() { + return nil, fmt.Errorf("no %s found with %s==%s", m[1], m[2], m[3]) + } + e = v.Index(i).Interface().(Elem) + case reflect.Ptr: + if v.IsNil() { + return nil, fmt.Errorf("cldr: element %q not found within element %q", m[1], e.GetCommon().name) + } + var ok bool + if e, ok = v.Interface().(Elem); !ok { + return nil, fmt.Errorf("cldr: %q is not an XML element", m[1]) + } else if m[2] != "" || m[3] != "" { + return nil, fmt.Errorf("cldr: no type selector allowed for element %s", m[1]) + } + default: + return nil, fmt.Errorf("cldr: %q is not an XML element", m[1]) + } + } + return e, nil +} + +const absPrefix = "//ldml/" + +func (cldr *CLDR) resolveAlias(e Elem, src, path string) (res Elem, err error) { + if src != "locale" { + if !strings.HasPrefix(path, absPrefix) { + return nil, fmt.Errorf("cldr: expected absolute path, found %q", path) + } + path = path[len(absPrefix):] + if e, err = cldr.resolve(src); err != nil { + return nil, err + } + } + return walkXPath(e, path) +} + +func (cldr *CLDR) resolveAndMergeAlias(e Elem) error { + alias := e.GetCommon().Alias + if alias == nil { + return nil + } + a, err := cldr.resolveAlias(e, alias.Source, alias.Path) + if err != nil { + return fmt.Errorf("%v: error evaluating path %q: %v", getPath(e), alias.Path, err) + } + // Ensure alias node was already evaluated. TODO: avoid double evaluation. + err = cldr.resolveAndMergeAlias(a) + v := reflect.ValueOf(e).Elem() + for i := iter(reflect.ValueOf(a).Elem()); !i.done(); i.next() { + if vv := i.value(); vv.Kind() != reflect.Ptr || !vv.IsNil() { + if _, attr := xmlName(i.field()); !attr { + v.FieldByIndex(i.index).Set(vv) + } + } + } + return err +} + +func (cldr *CLDR) aliasResolver() visitor { + return func(v reflect.Value) (err error) { + if e, ok := v.Addr().Interface().(Elem); ok { + err = cldr.resolveAndMergeAlias(e) + if err == nil && blocking[e.GetCommon().name] { + return stopDescent + } + } + return err + } +} + +// elements within blocking elements do not inherit. +// Taken from CLDR's supplementalMetaData.xml. +var blocking = map[string]bool{ + "identity": true, + "supplementalData": true, + "cldrTest": true, + "collation": true, + "transform": true, +} + +// Distinguishing attributes affect inheritance; two elements with different +// distinguishing attributes are treated as different for purposes of inheritance, +// except when such attributes occur in the indicated elements. +// Taken from CLDR's supplementalMetaData.xml. +var distinguishing = map[string][]string{ + "key": nil, + "request_id": nil, + "id": nil, + "registry": nil, + "alt": nil, + "iso4217": nil, + "iso3166": nil, + "mzone": nil, + "from": nil, + "to": nil, + "type": []string{ + "abbreviationFallback", + "default", + "mapping", + "measurementSystem", + "preferenceOrdering", + }, + "numberSystem": nil, +} + +func in(set []string, s string) bool { + for _, v := range set { + if v == s { + return true + } + } + return false +} + +// attrKey computes a key based on the distinguishable attributes of +// an element and it's values. +func attrKey(v reflect.Value, exclude ...string) string { + parts := []string{} + ename := v.Interface().(Elem).GetCommon().name + v = v.Elem() + for i := iter(v); !i.done(); i.next() { + if name, attr := xmlName(i.field()); attr { + if except, ok := distinguishing[name]; ok && !in(exclude, name) && !in(except, ename) { + v := i.value() + if v.Kind() == reflect.Ptr { + v = v.Elem() + } + if v.IsValid() { + parts = append(parts, fmt.Sprintf("%s=%s", name, v.String())) + } + } + } + } + sort.Strings(parts) + return strings.Join(parts, ";") +} + +// Key returns a key for e derived from all distinguishing attributes +// except those specified by exclude. +func Key(e Elem, exclude ...string) string { + return attrKey(reflect.ValueOf(e), exclude...) +} + +// linkEnclosing sets the enclosing element as well as the name +// for all sub-elements of child, recursively. +func linkEnclosing(parent, child Elem) { + child.setEnclosing(parent) + v := reflect.ValueOf(child).Elem() + for i := iter(v); !i.done(); i.next() { + vf := i.value() + if vf.Kind() == reflect.Slice { + for j := 0; j < vf.Len(); j++ { + linkEnclosing(child, vf.Index(j).Interface().(Elem)) + } + } else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct { + linkEnclosing(child, vf.Interface().(Elem)) + } + } +} + +func setNames(e Elem, name string) { + e.setName(name) + v := reflect.ValueOf(e).Elem() + for i := iter(v); !i.done(); i.next() { + vf := i.value() + name, _ = xmlName(i.field()) + if vf.Kind() == reflect.Slice { + for j := 0; j < vf.Len(); j++ { + setNames(vf.Index(j).Interface().(Elem), name) + } + } else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct { + setNames(vf.Interface().(Elem), name) + } + } +} + +// deepCopy copies elements of v recursively. All elements of v that may +// be modified by inheritance are explicitly copied. +func deepCopy(v reflect.Value) reflect.Value { + switch v.Kind() { + case reflect.Ptr: + if v.IsNil() || v.Elem().Kind() != reflect.Struct { + return v + } + nv := reflect.New(v.Elem().Type()) + nv.Elem().Set(v.Elem()) + deepCopyRec(nv.Elem(), v.Elem()) + return nv + case reflect.Slice: + nv := reflect.MakeSlice(v.Type(), v.Len(), v.Len()) + for i := 0; i < v.Len(); i++ { + deepCopyRec(nv.Index(i), v.Index(i)) + } + return nv + } + panic("deepCopy: must be called with pointer or slice") +} + +// deepCopyRec is only called by deepCopy. +func deepCopyRec(nv, v reflect.Value) { + if v.Kind() == reflect.Struct { + t := v.Type() + for i := 0; i < v.NumField(); i++ { + if name, attr := xmlName(t.Field(i)); name != "" && !attr { + deepCopyRec(nv.Field(i), v.Field(i)) + } + } + } else { + nv.Set(deepCopy(v)) + } +} + +// newNode is used to insert a missing node during inheritance. +func (cldr *CLDR) newNode(v, enc reflect.Value) reflect.Value { + n := reflect.New(v.Type()) + for i := iter(v); !i.done(); i.next() { + if name, attr := xmlName(i.field()); name == "" || attr { + n.Elem().FieldByIndex(i.index).Set(i.value()) + } + } + n.Interface().(Elem).GetCommon().setEnclosing(enc.Addr().Interface().(Elem)) + return n +} + +// v, parent must be pointers to struct +func (cldr *CLDR) inheritFields(v, parent reflect.Value) (res reflect.Value, err error) { + t := v.Type() + nv := reflect.New(t) + nv.Elem().Set(v) + for i := iter(v); !i.done(); i.next() { + vf := i.value() + f := i.field() + name, attr := xmlName(f) + if name == "" || attr { + continue + } + pf := parent.FieldByIndex(i.index) + if blocking[name] { + if vf.IsNil() { + vf = pf + } + nv.Elem().FieldByIndex(i.index).Set(deepCopy(vf)) + continue + } + switch f.Type.Kind() { + case reflect.Ptr: + if f.Type.Elem().Kind() == reflect.Struct { + if !vf.IsNil() { + if vf, err = cldr.inheritStructPtr(vf, pf); err != nil { + return reflect.Value{}, err + } + vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem)) + nv.Elem().FieldByIndex(i.index).Set(vf) + } else if !pf.IsNil() { + n := cldr.newNode(pf.Elem(), v) + if vf, err = cldr.inheritStructPtr(n, pf); err != nil { + return reflect.Value{}, err + } + vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem)) + nv.Elem().FieldByIndex(i.index).Set(vf) + } + } + case reflect.Slice: + vf, err := cldr.inheritSlice(nv.Elem(), vf, pf) + if err != nil { + return reflect.Zero(t), err + } + nv.Elem().FieldByIndex(i.index).Set(vf) + } + } + return nv, nil +} + +func root(e Elem) *LDML { + for ; e.enclosing() != nil; e = e.enclosing() { + } + return e.(*LDML) +} + +// inheritStructPtr first merges possible aliases in with v and then inherits +// any underspecified elements from parent. +func (cldr *CLDR) inheritStructPtr(v, parent reflect.Value) (r reflect.Value, err error) { + if !v.IsNil() { + e := v.Interface().(Elem).GetCommon() + alias := e.Alias + if alias == nil && !parent.IsNil() { + alias = parent.Interface().(Elem).GetCommon().Alias + } + if alias != nil { + a, err := cldr.resolveAlias(v.Interface().(Elem), alias.Source, alias.Path) + if a != nil { + if v, err = cldr.inheritFields(v.Elem(), reflect.ValueOf(a).Elem()); err != nil { + return reflect.Value{}, err + } + } + } + if !parent.IsNil() { + return cldr.inheritFields(v.Elem(), parent.Elem()) + } + } else if parent.IsNil() { + panic("should not reach here") + } + return v, nil +} + +// Must be slice of struct pointers. +func (cldr *CLDR) inheritSlice(enc, v, parent reflect.Value) (res reflect.Value, err error) { + t := v.Type() + index := make(map[string]reflect.Value) + if !v.IsNil() { + for i := 0; i < v.Len(); i++ { + vi := v.Index(i) + key := attrKey(vi) + index[key] = vi + } + } + if !parent.IsNil() { + for i := 0; i < parent.Len(); i++ { + vi := parent.Index(i) + key := attrKey(vi) + if w, ok := index[key]; ok { + index[key], err = cldr.inheritStructPtr(w, vi) + } else { + n := cldr.newNode(vi.Elem(), enc) + index[key], err = cldr.inheritStructPtr(n, vi) + } + index[key].Interface().(Elem).setEnclosing(enc.Addr().Interface().(Elem)) + if err != nil { + return v, err + } + } + } + keys := make([]string, 0, len(index)) + for k, _ := range index { + keys = append(keys, k) + } + sort.Strings(keys) + sl := reflect.MakeSlice(t, len(index), len(index)) + for i, k := range keys { + sl.Index(i).Set(index[k]) + } + return sl, nil +} + +func parentLocale(loc string) string { + parts := strings.Split(loc, "_") + if len(parts) == 1 { + return "root" + } + parts = parts[:len(parts)-1] + key := strings.Join(parts, "_") + return key +} + +func (cldr *CLDR) resolve(loc string) (res *LDML, err error) { + if r := cldr.resolved[loc]; r != nil { + return r, nil + } + x := cldr.RawLDML(loc) + if x == nil { + return nil, fmt.Errorf("cldr: unknown locale %q", loc) + } + var v reflect.Value + if loc == "root" { + x = deepCopy(reflect.ValueOf(x)).Interface().(*LDML) + linkEnclosing(nil, x) + err = cldr.aliasResolver().visit(x) + } else { + key := parentLocale(loc) + var parent *LDML + for ; cldr.locale[key] == nil; key = parentLocale(key) { + } + if parent, err = cldr.resolve(key); err != nil { + return nil, err + } + v, err = cldr.inheritFields(reflect.ValueOf(x).Elem(), reflect.ValueOf(parent).Elem()) + x = v.Interface().(*LDML) + linkEnclosing(nil, x) + } + if err != nil { + return nil, err + } + cldr.resolved[loc] = x + return x, err +} + +// finalize finalizes the initialization of the raw LDML structs. It also +// removed unwanted fields, as specified by filter, so that they will not +// be unnecessarily evaluated. +func (cldr *CLDR) finalize(filter []string) { + for _, x := range cldr.locale { + if filter != nil { + v := reflect.ValueOf(x).Elem() + t := v.Type() + for i := 0; i < v.NumField(); i++ { + f := t.Field(i) + name, _ := xmlName(f) + if name != "" && name != "identity" && !in(filter, name) { + v.Field(i).Set(reflect.Zero(f.Type)) + } + } + } + linkEnclosing(nil, x) // for resolving aliases and paths + setNames(x, "ldml") + } +} diff --git a/vendor/golang.org/x/text/unicode/cldr/slice.go b/vendor/golang.org/x/text/unicode/cldr/slice.go new file mode 100644 index 0000000000..388c983ff1 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/slice.go @@ -0,0 +1,144 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cldr + +import ( + "fmt" + "reflect" + "sort" +) + +// Slice provides utilities for modifying slices of elements. +// It can be wrapped around any slice of which the element type implements +// interface Elem. +type Slice struct { + ptr reflect.Value + typ reflect.Type +} + +// Value returns the reflect.Value of the underlying slice. +func (s *Slice) Value() reflect.Value { + return s.ptr.Elem() +} + +// MakeSlice wraps a pointer to a slice of Elems. +// It replaces the array pointed to by the slice so that subsequent modifications +// do not alter the data in a CLDR type. +// It panics if an incorrect type is passed. +func MakeSlice(slicePtr interface{}) Slice { + ptr := reflect.ValueOf(slicePtr) + if ptr.Kind() != reflect.Ptr { + panic(fmt.Sprintf("MakeSlice: argument must be pointer to slice, found %v", ptr.Type())) + } + sl := ptr.Elem() + if sl.Kind() != reflect.Slice { + panic(fmt.Sprintf("MakeSlice: argument must point to a slice, found %v", sl.Type())) + } + intf := reflect.TypeOf((*Elem)(nil)).Elem() + if !sl.Type().Elem().Implements(intf) { + panic(fmt.Sprintf("MakeSlice: element type of slice (%v) does not implement Elem", sl.Type().Elem())) + } + nsl := reflect.MakeSlice(sl.Type(), sl.Len(), sl.Len()) + reflect.Copy(nsl, sl) + sl.Set(nsl) + return Slice{ + ptr: ptr, + typ: sl.Type().Elem().Elem(), + } +} + +func (s Slice) indexForAttr(a string) []int { + for i := iter(reflect.Zero(s.typ)); !i.done(); i.next() { + if n, _ := xmlName(i.field()); n == a { + return i.index + } + } + panic(fmt.Sprintf("MakeSlice: no attribute %q for type %v", a, s.typ)) +} + +// Filter filters s to only include elements for which fn returns true. +func (s Slice) Filter(fn func(e Elem) bool) { + k := 0 + sl := s.Value() + for i := 0; i < sl.Len(); i++ { + vi := sl.Index(i) + if fn(vi.Interface().(Elem)) { + sl.Index(k).Set(vi) + k++ + } + } + sl.Set(sl.Slice(0, k)) +} + +// Group finds elements in s for which fn returns the same value and groups +// them in a new Slice. +func (s Slice) Group(fn func(e Elem) string) []Slice { + m := make(map[string][]reflect.Value) + sl := s.Value() + for i := 0; i < sl.Len(); i++ { + vi := sl.Index(i) + key := fn(vi.Interface().(Elem)) + m[key] = append(m[key], vi) + } + keys := []string{} + for k, _ := range m { + keys = append(keys, k) + } + sort.Strings(keys) + res := []Slice{} + for _, k := range keys { + nsl := reflect.New(sl.Type()) + nsl.Elem().Set(reflect.Append(nsl.Elem(), m[k]...)) + res = append(res, MakeSlice(nsl.Interface())) + } + return res +} + +// SelectAnyOf filters s to contain only elements for which attr matches +// any of the values. +func (s Slice) SelectAnyOf(attr string, values ...string) { + index := s.indexForAttr(attr) + s.Filter(func(e Elem) bool { + vf := reflect.ValueOf(e).Elem().FieldByIndex(index) + return in(values, vf.String()) + }) +} + +// SelectOnePerGroup filters s to include at most one element e per group of +// elements matching Key(attr), where e has an attribute a that matches any +// the values in v. +// If more than one element in a group matches a value in v preference +// is given to the element that matches the first value in v. +func (s Slice) SelectOnePerGroup(a string, v []string) { + index := s.indexForAttr(a) + grouped := s.Group(func(e Elem) string { return Key(e, a) }) + sl := s.Value() + sl.Set(sl.Slice(0, 0)) + for _, g := range grouped { + e := reflect.Value{} + found := len(v) + gsl := g.Value() + for i := 0; i < gsl.Len(); i++ { + vi := gsl.Index(i).Elem().FieldByIndex(index) + j := 0 + for ; j < len(v) && v[j] != vi.String(); j++ { + } + if j < found { + found = j + e = gsl.Index(i) + } + } + if found < len(v) { + sl.Set(reflect.Append(sl, e)) + } + } +} + +// SelectDraft drops all elements from the list with a draft level smaller than d +// and selects the highest draft level of the remaining. +// This method assumes that the input CLDR is canonicalized. +func (s Slice) SelectDraft(d Draft) { + s.SelectOnePerGroup("draft", drafts[len(drafts)-2-int(d):]) +} diff --git a/vendor/golang.org/x/text/unicode/cldr/xml.go b/vendor/golang.org/x/text/unicode/cldr/xml.go new file mode 100644 index 0000000000..ce3203827e --- /dev/null +++ b/vendor/golang.org/x/text/unicode/cldr/xml.go @@ -0,0 +1,1487 @@ +// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. + +package cldr + +// LDMLBCP47 holds information on allowable values for various variables in LDML. +type LDMLBCP47 struct { + Common + Version *struct { + Common + Number string `xml:"number,attr"` + } `xml:"version"` + Generation *struct { + Common + Date string `xml:"date,attr"` + } `xml:"generation"` + Keyword []*struct { + Common + Key []*struct { + Common + Extension string `xml:"extension,attr"` + Name string `xml:"name,attr"` + Description string `xml:"description,attr"` + Deprecated string `xml:"deprecated,attr"` + Preferred string `xml:"preferred,attr"` + Alias string `xml:"alias,attr"` + ValueType string `xml:"valueType,attr"` + Since string `xml:"since,attr"` + Type []*struct { + Common + Name string `xml:"name,attr"` + Description string `xml:"description,attr"` + Deprecated string `xml:"deprecated,attr"` + Preferred string `xml:"preferred,attr"` + Alias string `xml:"alias,attr"` + Since string `xml:"since,attr"` + } `xml:"type"` + } `xml:"key"` + } `xml:"keyword"` + Attribute []*struct { + Common + Name string `xml:"name,attr"` + Description string `xml:"description,attr"` + Deprecated string `xml:"deprecated,attr"` + Preferred string `xml:"preferred,attr"` + Since string `xml:"since,attr"` + } `xml:"attribute"` +} + +// SupplementalData holds information relevant for internationalization +// and proper use of CLDR, but that is not contained in the locale hierarchy. +type SupplementalData struct { + Common + Version *struct { + Common + Number string `xml:"number,attr"` + } `xml:"version"` + Generation *struct { + Common + Date string `xml:"date,attr"` + } `xml:"generation"` + CurrencyData *struct { + Common + Fractions []*struct { + Common + Info []*struct { + Common + Iso4217 string `xml:"iso4217,attr"` + Digits string `xml:"digits,attr"` + Rounding string `xml:"rounding,attr"` + CashDigits string `xml:"cashDigits,attr"` + CashRounding string `xml:"cashRounding,attr"` + } `xml:"info"` + } `xml:"fractions"` + Region []*struct { + Common + Iso3166 string `xml:"iso3166,attr"` + Currency []*struct { + Common + Before string `xml:"before,attr"` + From string `xml:"from,attr"` + To string `xml:"to,attr"` + Iso4217 string `xml:"iso4217,attr"` + Digits string `xml:"digits,attr"` + Rounding string `xml:"rounding,attr"` + CashRounding string `xml:"cashRounding,attr"` + Tender string `xml:"tender,attr"` + Alternate []*struct { + Common + Iso4217 string `xml:"iso4217,attr"` + } `xml:"alternate"` + } `xml:"currency"` + } `xml:"region"` + } `xml:"currencyData"` + TerritoryContainment *struct { + Common + Group []*struct { + Common + Contains string `xml:"contains,attr"` + Grouping string `xml:"grouping,attr"` + Status string `xml:"status,attr"` + } `xml:"group"` + } `xml:"territoryContainment"` + SubdivisionContainment *struct { + Common + Subgroup []*struct { + Common + Subtype string `xml:"subtype,attr"` + Contains string `xml:"contains,attr"` + } `xml:"subgroup"` + } `xml:"subdivisionContainment"` + LanguageData *struct { + Common + Language []*struct { + Common + Scripts string `xml:"scripts,attr"` + Territories string `xml:"territories,attr"` + Variants string `xml:"variants,attr"` + } `xml:"language"` + } `xml:"languageData"` + TerritoryInfo *struct { + Common + Territory []*struct { + Common + Gdp string `xml:"gdp,attr"` + LiteracyPercent string `xml:"literacyPercent,attr"` + Population string `xml:"population,attr"` + LanguagePopulation []*struct { + Common + LiteracyPercent string `xml:"literacyPercent,attr"` + WritingPercent string `xml:"writingPercent,attr"` + PopulationPercent string `xml:"populationPercent,attr"` + OfficialStatus string `xml:"officialStatus,attr"` + } `xml:"languagePopulation"` + } `xml:"territory"` + } `xml:"territoryInfo"` + PostalCodeData *struct { + Common + PostCodeRegex []*struct { + Common + TerritoryId string `xml:"territoryId,attr"` + } `xml:"postCodeRegex"` + } `xml:"postalCodeData"` + CalendarData *struct { + Common + Calendar []*struct { + Common + Territories string `xml:"territories,attr"` + CalendarSystem *Common `xml:"calendarSystem"` + Eras *struct { + Common + Era []*struct { + Common + Start string `xml:"start,attr"` + End string `xml:"end,attr"` + } `xml:"era"` + } `xml:"eras"` + } `xml:"calendar"` + } `xml:"calendarData"` + CalendarPreferenceData *struct { + Common + CalendarPreference []*struct { + Common + Territories string `xml:"territories,attr"` + Ordering string `xml:"ordering,attr"` + } `xml:"calendarPreference"` + } `xml:"calendarPreferenceData"` + WeekData *struct { + Common + MinDays []*struct { + Common + Count string `xml:"count,attr"` + Territories string `xml:"territories,attr"` + } `xml:"minDays"` + FirstDay []*struct { + Common + Day string `xml:"day,attr"` + Territories string `xml:"territories,attr"` + } `xml:"firstDay"` + WeekendStart []*struct { + Common + Day string `xml:"day,attr"` + Territories string `xml:"territories,attr"` + } `xml:"weekendStart"` + WeekendEnd []*struct { + Common + Day string `xml:"day,attr"` + Territories string `xml:"territories,attr"` + } `xml:"weekendEnd"` + WeekOfPreference []*struct { + Common + Locales string `xml:"locales,attr"` + Ordering string `xml:"ordering,attr"` + } `xml:"weekOfPreference"` + } `xml:"weekData"` + TimeData *struct { + Common + Hours []*struct { + Common + Allowed string `xml:"allowed,attr"` + Preferred string `xml:"preferred,attr"` + Regions string `xml:"regions,attr"` + } `xml:"hours"` + } `xml:"timeData"` + MeasurementData *struct { + Common + MeasurementSystem []*struct { + Common + Category string `xml:"category,attr"` + Territories string `xml:"territories,attr"` + } `xml:"measurementSystem"` + PaperSize []*struct { + Common + Territories string `xml:"territories,attr"` + } `xml:"paperSize"` + } `xml:"measurementData"` + UnitPreferenceData *struct { + Common + UnitPreferences []*struct { + Common + Category string `xml:"category,attr"` + Usage string `xml:"usage,attr"` + Scope string `xml:"scope,attr"` + UnitPreference []*struct { + Common + Regions string `xml:"regions,attr"` + } `xml:"unitPreference"` + } `xml:"unitPreferences"` + } `xml:"unitPreferenceData"` + TimezoneData *struct { + Common + MapTimezones []*struct { + Common + OtherVersion string `xml:"otherVersion,attr"` + TypeVersion string `xml:"typeVersion,attr"` + MapZone []*struct { + Common + Other string `xml:"other,attr"` + Territory string `xml:"territory,attr"` + } `xml:"mapZone"` + } `xml:"mapTimezones"` + ZoneFormatting []*struct { + Common + Multizone string `xml:"multizone,attr"` + TzidVersion string `xml:"tzidVersion,attr"` + ZoneItem []*struct { + Common + Territory string `xml:"territory,attr"` + Aliases string `xml:"aliases,attr"` + } `xml:"zoneItem"` + } `xml:"zoneFormatting"` + } `xml:"timezoneData"` + Characters *struct { + Common + CharacterFallback []*struct { + Common + Character []*struct { + Common + Value string `xml:"value,attr"` + Substitute []*Common `xml:"substitute"` + } `xml:"character"` + } `xml:"character-fallback"` + } `xml:"characters"` + Transforms *struct { + Common + Transform []*struct { + Common + Source string `xml:"source,attr"` + Target string `xml:"target,attr"` + Variant string `xml:"variant,attr"` + Direction string `xml:"direction,attr"` + Alias string `xml:"alias,attr"` + BackwardAlias string `xml:"backwardAlias,attr"` + Visibility string `xml:"visibility,attr"` + Comment []*Common `xml:"comment"` + TRule []*Common `xml:"tRule"` + } `xml:"transform"` + } `xml:"transforms"` + Metadata *struct { + Common + AttributeOrder *Common `xml:"attributeOrder"` + ElementOrder *Common `xml:"elementOrder"` + SerialElements *Common `xml:"serialElements"` + Suppress *struct { + Common + Attributes []*struct { + Common + Element string `xml:"element,attr"` + Attribute string `xml:"attribute,attr"` + AttributeValue string `xml:"attributeValue,attr"` + } `xml:"attributes"` + } `xml:"suppress"` + Validity *struct { + Common + Variable []*struct { + Common + Id string `xml:"id,attr"` + } `xml:"variable"` + AttributeValues []*struct { + Common + Dtds string `xml:"dtds,attr"` + Elements string `xml:"elements,attr"` + Attributes string `xml:"attributes,attr"` + Order string `xml:"order,attr"` + } `xml:"attributeValues"` + } `xml:"validity"` + Alias *struct { + Common + LanguageAlias []*struct { + Common + Replacement string `xml:"replacement,attr"` + Reason string `xml:"reason,attr"` + } `xml:"languageAlias"` + ScriptAlias []*struct { + Common + Replacement string `xml:"replacement,attr"` + Reason string `xml:"reason,attr"` + } `xml:"scriptAlias"` + TerritoryAlias []*struct { + Common + Replacement string `xml:"replacement,attr"` + Reason string `xml:"reason,attr"` + } `xml:"territoryAlias"` + SubdivisionAlias []*struct { + Common + Replacement string `xml:"replacement,attr"` + Reason string `xml:"reason,attr"` + } `xml:"subdivisionAlias"` + VariantAlias []*struct { + Common + Replacement string `xml:"replacement,attr"` + Reason string `xml:"reason,attr"` + } `xml:"variantAlias"` + ZoneAlias []*struct { + Common + Replacement string `xml:"replacement,attr"` + Reason string `xml:"reason,attr"` + } `xml:"zoneAlias"` + } `xml:"alias"` + Deprecated *struct { + Common + DeprecatedItems []*struct { + Common + Elements string `xml:"elements,attr"` + Attributes string `xml:"attributes,attr"` + Values string `xml:"values,attr"` + } `xml:"deprecatedItems"` + } `xml:"deprecated"` + Distinguishing *struct { + Common + DistinguishingItems []*struct { + Common + Exclude string `xml:"exclude,attr"` + Elements string `xml:"elements,attr"` + Attributes string `xml:"attributes,attr"` + } `xml:"distinguishingItems"` + } `xml:"distinguishing"` + Blocking *struct { + Common + BlockingItems []*struct { + Common + Elements string `xml:"elements,attr"` + } `xml:"blockingItems"` + } `xml:"blocking"` + CoverageAdditions *struct { + Common + LanguageCoverage []*struct { + Common + Values string `xml:"values,attr"` + } `xml:"languageCoverage"` + ScriptCoverage []*struct { + Common + Values string `xml:"values,attr"` + } `xml:"scriptCoverage"` + TerritoryCoverage []*struct { + Common + Values string `xml:"values,attr"` + } `xml:"territoryCoverage"` + CurrencyCoverage []*struct { + Common + Values string `xml:"values,attr"` + } `xml:"currencyCoverage"` + TimezoneCoverage []*struct { + Common + Values string `xml:"values,attr"` + } `xml:"timezoneCoverage"` + } `xml:"coverageAdditions"` + SkipDefaultLocale *struct { + Common + Services string `xml:"services,attr"` + } `xml:"skipDefaultLocale"` + DefaultContent *struct { + Common + Locales string `xml:"locales,attr"` + } `xml:"defaultContent"` + } `xml:"metadata"` + CodeMappings *struct { + Common + LanguageCodes []*struct { + Common + Alpha3 string `xml:"alpha3,attr"` + } `xml:"languageCodes"` + TerritoryCodes []*struct { + Common + Numeric string `xml:"numeric,attr"` + Alpha3 string `xml:"alpha3,attr"` + Fips10 string `xml:"fips10,attr"` + Internet string `xml:"internet,attr"` + } `xml:"territoryCodes"` + CurrencyCodes []*struct { + Common + Numeric string `xml:"numeric,attr"` + } `xml:"currencyCodes"` + } `xml:"codeMappings"` + ParentLocales *struct { + Common + ParentLocale []*struct { + Common + Parent string `xml:"parent,attr"` + Locales string `xml:"locales,attr"` + } `xml:"parentLocale"` + } `xml:"parentLocales"` + LikelySubtags *struct { + Common + LikelySubtag []*struct { + Common + From string `xml:"from,attr"` + To string `xml:"to,attr"` + } `xml:"likelySubtag"` + } `xml:"likelySubtags"` + MetazoneInfo *struct { + Common + Timezone []*struct { + Common + UsesMetazone []*struct { + Common + From string `xml:"from,attr"` + To string `xml:"to,attr"` + Mzone string `xml:"mzone,attr"` + } `xml:"usesMetazone"` + } `xml:"timezone"` + } `xml:"metazoneInfo"` + Plurals []*struct { + Common + PluralRules []*struct { + Common + Locales string `xml:"locales,attr"` + PluralRule []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"pluralRule"` + } `xml:"pluralRules"` + PluralRanges []*struct { + Common + Locales string `xml:"locales,attr"` + PluralRange []*struct { + Common + Start string `xml:"start,attr"` + End string `xml:"end,attr"` + Result string `xml:"result,attr"` + } `xml:"pluralRange"` + } `xml:"pluralRanges"` + } `xml:"plurals"` + TelephoneCodeData *struct { + Common + CodesByTerritory []*struct { + Common + Territory string `xml:"territory,attr"` + TelephoneCountryCode []*struct { + Common + Code string `xml:"code,attr"` + From string `xml:"from,attr"` + To string `xml:"to,attr"` + } `xml:"telephoneCountryCode"` + } `xml:"codesByTerritory"` + } `xml:"telephoneCodeData"` + NumberingSystems *struct { + Common + NumberingSystem []*struct { + Common + Id string `xml:"id,attr"` + Radix string `xml:"radix,attr"` + Digits string `xml:"digits,attr"` + Rules string `xml:"rules,attr"` + } `xml:"numberingSystem"` + } `xml:"numberingSystems"` + Bcp47KeywordMappings *struct { + Common + MapKeys *struct { + Common + KeyMap []*struct { + Common + Bcp47 string `xml:"bcp47,attr"` + } `xml:"keyMap"` + } `xml:"mapKeys"` + MapTypes []*struct { + Common + TypeMap []*struct { + Common + Bcp47 string `xml:"bcp47,attr"` + } `xml:"typeMap"` + } `xml:"mapTypes"` + } `xml:"bcp47KeywordMappings"` + Gender *struct { + Common + PersonList []*struct { + Common + Locales string `xml:"locales,attr"` + } `xml:"personList"` + } `xml:"gender"` + References *struct { + Common + Reference []*struct { + Common + Uri string `xml:"uri,attr"` + } `xml:"reference"` + } `xml:"references"` + LanguageMatching *struct { + Common + LanguageMatches []*struct { + Common + ParadigmLocales []*struct { + Common + Locales string `xml:"locales,attr"` + } `xml:"paradigmLocales"` + MatchVariable []*struct { + Common + Id string `xml:"id,attr"` + Value string `xml:"value,attr"` + } `xml:"matchVariable"` + LanguageMatch []*struct { + Common + Desired string `xml:"desired,attr"` + Supported string `xml:"supported,attr"` + Percent string `xml:"percent,attr"` + Distance string `xml:"distance,attr"` + Oneway string `xml:"oneway,attr"` + } `xml:"languageMatch"` + } `xml:"languageMatches"` + } `xml:"languageMatching"` + DayPeriodRuleSet []*struct { + Common + DayPeriodRules []*struct { + Common + Locales string `xml:"locales,attr"` + DayPeriodRule []*struct { + Common + At string `xml:"at,attr"` + After string `xml:"after,attr"` + Before string `xml:"before,attr"` + From string `xml:"from,attr"` + To string `xml:"to,attr"` + } `xml:"dayPeriodRule"` + } `xml:"dayPeriodRules"` + } `xml:"dayPeriodRuleSet"` + MetaZones *struct { + Common + MetazoneInfo *struct { + Common + Timezone []*struct { + Common + UsesMetazone []*struct { + Common + From string `xml:"from,attr"` + To string `xml:"to,attr"` + Mzone string `xml:"mzone,attr"` + } `xml:"usesMetazone"` + } `xml:"timezone"` + } `xml:"metazoneInfo"` + MapTimezones *struct { + Common + OtherVersion string `xml:"otherVersion,attr"` + TypeVersion string `xml:"typeVersion,attr"` + MapZone []*struct { + Common + Other string `xml:"other,attr"` + Territory string `xml:"territory,attr"` + } `xml:"mapZone"` + } `xml:"mapTimezones"` + } `xml:"metaZones"` + PrimaryZones *struct { + Common + PrimaryZone []*struct { + Common + Iso3166 string `xml:"iso3166,attr"` + } `xml:"primaryZone"` + } `xml:"primaryZones"` + WindowsZones *struct { + Common + MapTimezones *struct { + Common + OtherVersion string `xml:"otherVersion,attr"` + TypeVersion string `xml:"typeVersion,attr"` + MapZone []*struct { + Common + Other string `xml:"other,attr"` + Territory string `xml:"territory,attr"` + } `xml:"mapZone"` + } `xml:"mapTimezones"` + } `xml:"windowsZones"` + CoverageLevels *struct { + Common + ApprovalRequirements *struct { + Common + ApprovalRequirement []*struct { + Common + Votes string `xml:"votes,attr"` + Locales string `xml:"locales,attr"` + Paths string `xml:"paths,attr"` + } `xml:"approvalRequirement"` + } `xml:"approvalRequirements"` + CoverageVariable []*struct { + Common + Key string `xml:"key,attr"` + Value string `xml:"value,attr"` + } `xml:"coverageVariable"` + CoverageLevel []*struct { + Common + InLanguage string `xml:"inLanguage,attr"` + InScript string `xml:"inScript,attr"` + InTerritory string `xml:"inTerritory,attr"` + Value string `xml:"value,attr"` + Match string `xml:"match,attr"` + } `xml:"coverageLevel"` + } `xml:"coverageLevels"` + IdValidity *struct { + Common + Id []*struct { + Common + IdStatus string `xml:"idStatus,attr"` + } `xml:"id"` + } `xml:"idValidity"` + RgScope *struct { + Common + RgPath []*struct { + Common + Path string `xml:"path,attr"` + } `xml:"rgPath"` + } `xml:"rgScope"` +} + +// LDML is the top-level type for locale-specific data. +type LDML struct { + Common + Version string `xml:"version,attr"` + Identity *struct { + Common + Version *struct { + Common + Number string `xml:"number,attr"` + } `xml:"version"` + Generation *struct { + Common + Date string `xml:"date,attr"` + } `xml:"generation"` + Language *Common `xml:"language"` + Script *Common `xml:"script"` + Territory *Common `xml:"territory"` + Variant *Common `xml:"variant"` + } `xml:"identity"` + LocaleDisplayNames *LocaleDisplayNames `xml:"localeDisplayNames"` + Layout *struct { + Common + Orientation []*struct { + Common + Characters string `xml:"characters,attr"` + Lines string `xml:"lines,attr"` + CharacterOrder []*Common `xml:"characterOrder"` + LineOrder []*Common `xml:"lineOrder"` + } `xml:"orientation"` + InList []*struct { + Common + Casing string `xml:"casing,attr"` + } `xml:"inList"` + InText []*Common `xml:"inText"` + } `xml:"layout"` + ContextTransforms *struct { + Common + ContextTransformUsage []*struct { + Common + ContextTransform []*Common `xml:"contextTransform"` + } `xml:"contextTransformUsage"` + } `xml:"contextTransforms"` + Characters *struct { + Common + ExemplarCharacters []*Common `xml:"exemplarCharacters"` + Ellipsis []*Common `xml:"ellipsis"` + MoreInformation []*Common `xml:"moreInformation"` + Stopwords []*struct { + Common + StopwordList []*Common `xml:"stopwordList"` + } `xml:"stopwords"` + IndexLabels []*struct { + Common + IndexSeparator []*Common `xml:"indexSeparator"` + CompressedIndexSeparator []*Common `xml:"compressedIndexSeparator"` + IndexRangePattern []*Common `xml:"indexRangePattern"` + IndexLabelBefore []*Common `xml:"indexLabelBefore"` + IndexLabelAfter []*Common `xml:"indexLabelAfter"` + IndexLabel []*struct { + Common + IndexSource string `xml:"indexSource,attr"` + Priority string `xml:"priority,attr"` + } `xml:"indexLabel"` + } `xml:"indexLabels"` + Mapping []*struct { + Common + Registry string `xml:"registry,attr"` + } `xml:"mapping"` + ParseLenients []*struct { + Common + Scope string `xml:"scope,attr"` + Level string `xml:"level,attr"` + ParseLenient []*struct { + Common + Sample string `xml:"sample,attr"` + } `xml:"parseLenient"` + } `xml:"parseLenients"` + } `xml:"characters"` + Delimiters *struct { + Common + QuotationStart []*Common `xml:"quotationStart"` + QuotationEnd []*Common `xml:"quotationEnd"` + AlternateQuotationStart []*Common `xml:"alternateQuotationStart"` + AlternateQuotationEnd []*Common `xml:"alternateQuotationEnd"` + } `xml:"delimiters"` + Measurement *struct { + Common + MeasurementSystem []*Common `xml:"measurementSystem"` + PaperSize []*struct { + Common + Height []*Common `xml:"height"` + Width []*Common `xml:"width"` + } `xml:"paperSize"` + } `xml:"measurement"` + Dates *struct { + Common + LocalizedPatternChars []*Common `xml:"localizedPatternChars"` + DateRangePattern []*Common `xml:"dateRangePattern"` + Calendars *struct { + Common + Calendar []*Calendar `xml:"calendar"` + } `xml:"calendars"` + Fields *struct { + Common + Field []*struct { + Common + DisplayName []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"displayName"` + Relative []*Common `xml:"relative"` + RelativeTime []*struct { + Common + RelativeTimePattern []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"relativeTimePattern"` + } `xml:"relativeTime"` + RelativePeriod []*Common `xml:"relativePeriod"` + } `xml:"field"` + } `xml:"fields"` + TimeZoneNames *TimeZoneNames `xml:"timeZoneNames"` + } `xml:"dates"` + Numbers *Numbers `xml:"numbers"` + Units *struct { + Common + Unit []*struct { + Common + DisplayName []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"displayName"` + UnitPattern []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"unitPattern"` + PerUnitPattern []*Common `xml:"perUnitPattern"` + } `xml:"unit"` + UnitLength []*struct { + Common + CompoundUnit []*struct { + Common + CompoundUnitPattern []*Common `xml:"compoundUnitPattern"` + } `xml:"compoundUnit"` + Unit []*struct { + Common + DisplayName []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"displayName"` + UnitPattern []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"unitPattern"` + PerUnitPattern []*Common `xml:"perUnitPattern"` + } `xml:"unit"` + CoordinateUnit []*struct { + Common + CoordinateUnitPattern []*Common `xml:"coordinateUnitPattern"` + } `xml:"coordinateUnit"` + } `xml:"unitLength"` + DurationUnit []*struct { + Common + DurationUnitPattern []*Common `xml:"durationUnitPattern"` + } `xml:"durationUnit"` + } `xml:"units"` + ListPatterns *struct { + Common + ListPattern []*struct { + Common + ListPatternPart []*Common `xml:"listPatternPart"` + } `xml:"listPattern"` + } `xml:"listPatterns"` + Collations *struct { + Common + Version string `xml:"version,attr"` + DefaultCollation *Common `xml:"defaultCollation"` + Collation []*Collation `xml:"collation"` + } `xml:"collations"` + Posix *struct { + Common + Messages []*struct { + Common + Yesstr []*Common `xml:"yesstr"` + Nostr []*Common `xml:"nostr"` + Yesexpr []*Common `xml:"yesexpr"` + Noexpr []*Common `xml:"noexpr"` + } `xml:"messages"` + } `xml:"posix"` + CharacterLabels *struct { + Common + CharacterLabelPattern []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"characterLabelPattern"` + CharacterLabel []*Common `xml:"characterLabel"` + } `xml:"characterLabels"` + Segmentations *struct { + Common + Segmentation []*struct { + Common + Variables *struct { + Common + Variable []*struct { + Common + Id string `xml:"id,attr"` + } `xml:"variable"` + } `xml:"variables"` + SegmentRules *struct { + Common + Rule []*struct { + Common + Id string `xml:"id,attr"` + } `xml:"rule"` + } `xml:"segmentRules"` + Exceptions *struct { + Common + Exception []*Common `xml:"exception"` + } `xml:"exceptions"` + Suppressions *struct { + Common + Suppression []*Common `xml:"suppression"` + } `xml:"suppressions"` + } `xml:"segmentation"` + } `xml:"segmentations"` + Rbnf *struct { + Common + RulesetGrouping []*struct { + Common + Ruleset []*struct { + Common + Access string `xml:"access,attr"` + AllowsParsing string `xml:"allowsParsing,attr"` + Rbnfrule []*struct { + Common + Value string `xml:"value,attr"` + Radix string `xml:"radix,attr"` + Decexp string `xml:"decexp,attr"` + } `xml:"rbnfrule"` + } `xml:"ruleset"` + } `xml:"rulesetGrouping"` + } `xml:"rbnf"` + Annotations *struct { + Common + Annotation []*struct { + Common + Cp string `xml:"cp,attr"` + Tts string `xml:"tts,attr"` + } `xml:"annotation"` + } `xml:"annotations"` + Metadata *struct { + Common + CasingData *struct { + Common + CasingItem []*struct { + Common + Override string `xml:"override,attr"` + ForceError string `xml:"forceError,attr"` + } `xml:"casingItem"` + } `xml:"casingData"` + } `xml:"metadata"` + References *struct { + Common + Reference []*struct { + Common + Uri string `xml:"uri,attr"` + } `xml:"reference"` + } `xml:"references"` +} + +// Collation contains rules that specify a certain sort-order, +// as a tailoring of the root order. +// The parsed rules are obtained by passing a RuleProcessor to Collation's +// Process method. +type Collation struct { + Common + Visibility string `xml:"visibility,attr"` + Base *Common `xml:"base"` + Import []*struct { + Common + Source string `xml:"source,attr"` + } `xml:"import"` + Settings *struct { + Common + Strength string `xml:"strength,attr"` + Alternate string `xml:"alternate,attr"` + Backwards string `xml:"backwards,attr"` + Normalization string `xml:"normalization,attr"` + CaseLevel string `xml:"caseLevel,attr"` + CaseFirst string `xml:"caseFirst,attr"` + HiraganaQuaternary string `xml:"hiraganaQuaternary,attr"` + MaxVariable string `xml:"maxVariable,attr"` + Numeric string `xml:"numeric,attr"` + Private string `xml:"private,attr"` + VariableTop string `xml:"variableTop,attr"` + Reorder string `xml:"reorder,attr"` + } `xml:"settings"` + SuppressContractions *Common `xml:"suppress_contractions"` + Optimize *Common `xml:"optimize"` + Cr []*Common `xml:"cr"` + rulesElem +} + +// Calendar specifies the fields used for formatting and parsing dates and times. +// The month and quarter names are identified numerically, starting at 1. +// The day (of the week) names are identified with short strings, since there is +// no universally-accepted numeric designation. +type Calendar struct { + Common + Months *struct { + Common + MonthContext []*struct { + Common + MonthWidth []*struct { + Common + Month []*struct { + Common + Yeartype string `xml:"yeartype,attr"` + } `xml:"month"` + } `xml:"monthWidth"` + } `xml:"monthContext"` + } `xml:"months"` + MonthNames *struct { + Common + Month []*struct { + Common + Yeartype string `xml:"yeartype,attr"` + } `xml:"month"` + } `xml:"monthNames"` + MonthAbbr *struct { + Common + Month []*struct { + Common + Yeartype string `xml:"yeartype,attr"` + } `xml:"month"` + } `xml:"monthAbbr"` + MonthPatterns *struct { + Common + MonthPatternContext []*struct { + Common + MonthPatternWidth []*struct { + Common + MonthPattern []*Common `xml:"monthPattern"` + } `xml:"monthPatternWidth"` + } `xml:"monthPatternContext"` + } `xml:"monthPatterns"` + Days *struct { + Common + DayContext []*struct { + Common + DayWidth []*struct { + Common + Day []*Common `xml:"day"` + } `xml:"dayWidth"` + } `xml:"dayContext"` + } `xml:"days"` + DayNames *struct { + Common + Day []*Common `xml:"day"` + } `xml:"dayNames"` + DayAbbr *struct { + Common + Day []*Common `xml:"day"` + } `xml:"dayAbbr"` + Quarters *struct { + Common + QuarterContext []*struct { + Common + QuarterWidth []*struct { + Common + Quarter []*Common `xml:"quarter"` + } `xml:"quarterWidth"` + } `xml:"quarterContext"` + } `xml:"quarters"` + Week *struct { + Common + MinDays []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"minDays"` + FirstDay []*struct { + Common + Day string `xml:"day,attr"` + } `xml:"firstDay"` + WeekendStart []*struct { + Common + Day string `xml:"day,attr"` + Time string `xml:"time,attr"` + } `xml:"weekendStart"` + WeekendEnd []*struct { + Common + Day string `xml:"day,attr"` + Time string `xml:"time,attr"` + } `xml:"weekendEnd"` + } `xml:"week"` + Am []*Common `xml:"am"` + Pm []*Common `xml:"pm"` + DayPeriods *struct { + Common + DayPeriodContext []*struct { + Common + DayPeriodWidth []*struct { + Common + DayPeriod []*Common `xml:"dayPeriod"` + } `xml:"dayPeriodWidth"` + } `xml:"dayPeriodContext"` + } `xml:"dayPeriods"` + Eras *struct { + Common + EraNames *struct { + Common + Era []*Common `xml:"era"` + } `xml:"eraNames"` + EraAbbr *struct { + Common + Era []*Common `xml:"era"` + } `xml:"eraAbbr"` + EraNarrow *struct { + Common + Era []*Common `xml:"era"` + } `xml:"eraNarrow"` + } `xml:"eras"` + CyclicNameSets *struct { + Common + CyclicNameSet []*struct { + Common + CyclicNameContext []*struct { + Common + CyclicNameWidth []*struct { + Common + CyclicName []*Common `xml:"cyclicName"` + } `xml:"cyclicNameWidth"` + } `xml:"cyclicNameContext"` + } `xml:"cyclicNameSet"` + } `xml:"cyclicNameSets"` + DateFormats *struct { + Common + DateFormatLength []*struct { + Common + DateFormat []*struct { + Common + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + DisplayName []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"displayName"` + } `xml:"dateFormat"` + } `xml:"dateFormatLength"` + } `xml:"dateFormats"` + TimeFormats *struct { + Common + TimeFormatLength []*struct { + Common + TimeFormat []*struct { + Common + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + DisplayName []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"displayName"` + } `xml:"timeFormat"` + } `xml:"timeFormatLength"` + } `xml:"timeFormats"` + DateTimeFormats *struct { + Common + DateTimeFormatLength []*struct { + Common + DateTimeFormat []*struct { + Common + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + DisplayName []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"displayName"` + } `xml:"dateTimeFormat"` + } `xml:"dateTimeFormatLength"` + AvailableFormats []*struct { + Common + DateFormatItem []*struct { + Common + Id string `xml:"id,attr"` + Count string `xml:"count,attr"` + } `xml:"dateFormatItem"` + } `xml:"availableFormats"` + AppendItems []*struct { + Common + AppendItem []*struct { + Common + Request string `xml:"request,attr"` + } `xml:"appendItem"` + } `xml:"appendItems"` + IntervalFormats []*struct { + Common + IntervalFormatFallback []*Common `xml:"intervalFormatFallback"` + IntervalFormatItem []*struct { + Common + Id string `xml:"id,attr"` + GreatestDifference []*struct { + Common + Id string `xml:"id,attr"` + } `xml:"greatestDifference"` + } `xml:"intervalFormatItem"` + } `xml:"intervalFormats"` + } `xml:"dateTimeFormats"` + Fields []*struct { + Common + Field []*struct { + Common + DisplayName []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"displayName"` + Relative []*Common `xml:"relative"` + RelativeTime []*struct { + Common + RelativeTimePattern []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"relativeTimePattern"` + } `xml:"relativeTime"` + RelativePeriod []*Common `xml:"relativePeriod"` + } `xml:"field"` + } `xml:"fields"` +} +type TimeZoneNames struct { + Common + HourFormat []*Common `xml:"hourFormat"` + HoursFormat []*Common `xml:"hoursFormat"` + GmtFormat []*Common `xml:"gmtFormat"` + GmtZeroFormat []*Common `xml:"gmtZeroFormat"` + RegionFormat []*Common `xml:"regionFormat"` + FallbackFormat []*Common `xml:"fallbackFormat"` + FallbackRegionFormat []*Common `xml:"fallbackRegionFormat"` + AbbreviationFallback []*Common `xml:"abbreviationFallback"` + PreferenceOrdering []*Common `xml:"preferenceOrdering"` + SingleCountries []*struct { + Common + List string `xml:"list,attr"` + } `xml:"singleCountries"` + Zone []*struct { + Common + Long []*struct { + Common + Generic []*Common `xml:"generic"` + Standard []*Common `xml:"standard"` + Daylight []*Common `xml:"daylight"` + } `xml:"long"` + Short []*struct { + Common + Generic []*Common `xml:"generic"` + Standard []*Common `xml:"standard"` + Daylight []*Common `xml:"daylight"` + } `xml:"short"` + CommonlyUsed []*struct { + Common + Used string `xml:"used,attr"` + } `xml:"commonlyUsed"` + ExemplarCity []*Common `xml:"exemplarCity"` + } `xml:"zone"` + Metazone []*struct { + Common + Long []*struct { + Common + Generic []*Common `xml:"generic"` + Standard []*Common `xml:"standard"` + Daylight []*Common `xml:"daylight"` + } `xml:"long"` + Short []*struct { + Common + Generic []*Common `xml:"generic"` + Standard []*Common `xml:"standard"` + Daylight []*Common `xml:"daylight"` + } `xml:"short"` + CommonlyUsed []*struct { + Common + Used string `xml:"used,attr"` + } `xml:"commonlyUsed"` + } `xml:"metazone"` +} + +// LocaleDisplayNames specifies localized display names for for scripts, languages, +// countries, currencies, and variants. +type LocaleDisplayNames struct { + Common + LocaleDisplayPattern *struct { + Common + LocalePattern []*Common `xml:"localePattern"` + LocaleSeparator []*Common `xml:"localeSeparator"` + LocaleKeyTypePattern []*Common `xml:"localeKeyTypePattern"` + } `xml:"localeDisplayPattern"` + Languages *struct { + Common + Language []*Common `xml:"language"` + } `xml:"languages"` + Scripts *struct { + Common + Script []*Common `xml:"script"` + } `xml:"scripts"` + Territories *struct { + Common + Territory []*Common `xml:"territory"` + } `xml:"territories"` + Subdivisions *struct { + Common + Subdivision []*Common `xml:"subdivision"` + } `xml:"subdivisions"` + Variants *struct { + Common + Variant []*Common `xml:"variant"` + } `xml:"variants"` + Keys *struct { + Common + Key []*Common `xml:"key"` + } `xml:"keys"` + Types *struct { + Common + Type []*struct { + Common + Key string `xml:"key,attr"` + } `xml:"type"` + } `xml:"types"` + TransformNames *struct { + Common + TransformName []*Common `xml:"transformName"` + } `xml:"transformNames"` + MeasurementSystemNames *struct { + Common + MeasurementSystemName []*Common `xml:"measurementSystemName"` + } `xml:"measurementSystemNames"` + CodePatterns *struct { + Common + CodePattern []*Common `xml:"codePattern"` + } `xml:"codePatterns"` +} + +// Numbers supplies information for formatting and parsing numbers and currencies. +type Numbers struct { + Common + DefaultNumberingSystem []*Common `xml:"defaultNumberingSystem"` + OtherNumberingSystems []*struct { + Common + Native []*Common `xml:"native"` + Traditional []*Common `xml:"traditional"` + Finance []*Common `xml:"finance"` + } `xml:"otherNumberingSystems"` + MinimumGroupingDigits []*Common `xml:"minimumGroupingDigits"` + Symbols []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + Decimal []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"decimal"` + Group []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"group"` + List []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"list"` + PercentSign []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"percentSign"` + NativeZeroDigit []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"nativeZeroDigit"` + PatternDigit []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"patternDigit"` + PlusSign []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"plusSign"` + MinusSign []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"minusSign"` + Exponential []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"exponential"` + SuperscriptingExponent []*Common `xml:"superscriptingExponent"` + PerMille []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"perMille"` + Infinity []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"infinity"` + Nan []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"nan"` + CurrencyDecimal []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"currencyDecimal"` + CurrencyGroup []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"currencyGroup"` + TimeSeparator []*Common `xml:"timeSeparator"` + } `xml:"symbols"` + DecimalFormats []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + DecimalFormatLength []*struct { + Common + DecimalFormat []*struct { + Common + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + } `xml:"decimalFormat"` + } `xml:"decimalFormatLength"` + } `xml:"decimalFormats"` + ScientificFormats []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + ScientificFormatLength []*struct { + Common + ScientificFormat []*struct { + Common + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + } `xml:"scientificFormat"` + } `xml:"scientificFormatLength"` + } `xml:"scientificFormats"` + PercentFormats []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + PercentFormatLength []*struct { + Common + PercentFormat []*struct { + Common + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + } `xml:"percentFormat"` + } `xml:"percentFormatLength"` + } `xml:"percentFormats"` + CurrencyFormats []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + CurrencySpacing []*struct { + Common + BeforeCurrency []*struct { + Common + CurrencyMatch []*Common `xml:"currencyMatch"` + SurroundingMatch []*Common `xml:"surroundingMatch"` + InsertBetween []*Common `xml:"insertBetween"` + } `xml:"beforeCurrency"` + AfterCurrency []*struct { + Common + CurrencyMatch []*Common `xml:"currencyMatch"` + SurroundingMatch []*Common `xml:"surroundingMatch"` + InsertBetween []*Common `xml:"insertBetween"` + } `xml:"afterCurrency"` + } `xml:"currencySpacing"` + CurrencyFormatLength []*struct { + Common + CurrencyFormat []*struct { + Common + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + } `xml:"currencyFormat"` + } `xml:"currencyFormatLength"` + UnitPattern []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"unitPattern"` + } `xml:"currencyFormats"` + Currencies *struct { + Common + Currency []*struct { + Common + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + DisplayName []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"displayName"` + Symbol []*Common `xml:"symbol"` + Decimal []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"decimal"` + Group []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + } `xml:"group"` + } `xml:"currency"` + } `xml:"currencies"` + MiscPatterns []*struct { + Common + NumberSystem string `xml:"numberSystem,attr"` + Pattern []*struct { + Common + Numbers string `xml:"numbers,attr"` + Count string `xml:"count,attr"` + } `xml:"pattern"` + } `xml:"miscPatterns"` + MinimalPairs []*struct { + Common + PluralMinimalPairs []*struct { + Common + Count string `xml:"count,attr"` + } `xml:"pluralMinimalPairs"` + OrdinalMinimalPairs []*struct { + Common + Ordinal string `xml:"ordinal,attr"` + } `xml:"ordinalMinimalPairs"` + } `xml:"minimalPairs"` +} + +// Version is the version of CLDR from which the XML definitions are generated. +const Version = "31" diff --git a/vendor/golang.org/x/text/unicode/norm/maketables.go b/vendor/golang.org/x/text/unicode/norm/maketables.go new file mode 100644 index 0000000000..8d418160ca --- /dev/null +++ b/vendor/golang.org/x/text/unicode/norm/maketables.go @@ -0,0 +1,976 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// Normalization table generator. +// Data read from the web. +// See forminfo.go for a description of the trie values associated with each rune. + +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "log" + "sort" + "strconv" + "strings" + + "golang.org/x/text/internal/gen" + "golang.org/x/text/internal/triegen" + "golang.org/x/text/internal/ucd" +) + +func main() { + gen.Init() + loadUnicodeData() + compactCCC() + loadCompositionExclusions() + completeCharFields(FCanonical) + completeCharFields(FCompatibility) + computeNonStarterCounts() + verifyComputed() + printChars() + testDerived() + printTestdata() + makeTables() +} + +var ( + tablelist = flag.String("tables", + "all", + "comma-separated list of which tables to generate; "+ + "can be 'decomp', 'recomp', 'info' and 'all'") + test = flag.Bool("test", + false, + "test existing tables against DerivedNormalizationProps and generate test data for regression testing") + verbose = flag.Bool("verbose", + false, + "write data to stdout as it is parsed") +) + +const MaxChar = 0x10FFFF // anything above this shouldn't exist + +// Quick Check properties of runes allow us to quickly +// determine whether a rune may occur in a normal form. +// For a given normal form, a rune may be guaranteed to occur +// verbatim (QC=Yes), may or may not combine with another +// rune (QC=Maybe), or may not occur (QC=No). +type QCResult int + +const ( + QCUnknown QCResult = iota + QCYes + QCNo + QCMaybe +) + +func (r QCResult) String() string { + switch r { + case QCYes: + return "Yes" + case QCNo: + return "No" + case QCMaybe: + return "Maybe" + } + return "***UNKNOWN***" +} + +const ( + FCanonical = iota // NFC or NFD + FCompatibility // NFKC or NFKD + FNumberOfFormTypes +) + +const ( + MComposed = iota // NFC or NFKC + MDecomposed // NFD or NFKD + MNumberOfModes +) + +// This contains only the properties we're interested in. +type Char struct { + name string + codePoint rune // if zero, this index is not a valid code point. + ccc uint8 // canonical combining class + origCCC uint8 + excludeInComp bool // from CompositionExclusions.txt + compatDecomp bool // it has a compatibility expansion + + nTrailingNonStarters uint8 + nLeadingNonStarters uint8 // must be equal to trailing if non-zero + + forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility + + state State +} + +var chars = make([]Char, MaxChar+1) +var cccMap = make(map[uint8]uint8) + +func (c Char) String() string { + buf := new(bytes.Buffer) + + fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name) + fmt.Fprintf(buf, " ccc: %v\n", c.ccc) + fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp) + fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp) + fmt.Fprintf(buf, " state: %v\n", c.state) + fmt.Fprintf(buf, " NFC:\n") + fmt.Fprint(buf, c.forms[FCanonical]) + fmt.Fprintf(buf, " NFKC:\n") + fmt.Fprint(buf, c.forms[FCompatibility]) + + return buf.String() +} + +// In UnicodeData.txt, some ranges are marked like this: +// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;; +// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; +// parseCharacter keeps a state variable indicating the weirdness. +type State int + +const ( + SNormal State = iota // known to be zero for the type + SFirst + SLast + SMissing +) + +var lastChar = rune('\u0000') + +func (c Char) isValid() bool { + return c.codePoint != 0 && c.state != SMissing +} + +type FormInfo struct { + quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed + verified [MNumberOfModes]bool // index: MComposed or MDecomposed + + combinesForward bool // May combine with rune on the right + combinesBackward bool // May combine with rune on the left + isOneWay bool // Never appears in result + inDecomp bool // Some decompositions result in this char. + decomp Decomposition + expandedDecomp Decomposition +} + +func (f FormInfo) String() string { + buf := bytes.NewBuffer(make([]byte, 0)) + + fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed]) + fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed]) + fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward) + fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward) + fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay) + fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp) + fmt.Fprintf(buf, " decomposition: %X\n", f.decomp) + fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp) + + return buf.String() +} + +type Decomposition []rune + +func parseDecomposition(s string, skipfirst bool) (a []rune, err error) { + decomp := strings.Split(s, " ") + if len(decomp) > 0 && skipfirst { + decomp = decomp[1:] + } + for _, d := range decomp { + point, err := strconv.ParseUint(d, 16, 64) + if err != nil { + return a, err + } + a = append(a, rune(point)) + } + return a, nil +} + +func loadUnicodeData() { + f := gen.OpenUCDFile("UnicodeData.txt") + defer f.Close() + p := ucd.New(f) + for p.Next() { + r := p.Rune(ucd.CodePoint) + char := &chars[r] + + char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass)) + decmap := p.String(ucd.DecompMapping) + + exp, err := parseDecomposition(decmap, false) + isCompat := false + if err != nil { + if len(decmap) > 0 { + exp, err = parseDecomposition(decmap, true) + if err != nil { + log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err) + } + isCompat = true + } + } + + char.name = p.String(ucd.Name) + char.codePoint = r + char.forms[FCompatibility].decomp = exp + if !isCompat { + char.forms[FCanonical].decomp = exp + } else { + char.compatDecomp = true + } + if len(decmap) > 0 { + char.forms[FCompatibility].decomp = exp + } + } + if err := p.Err(); err != nil { + log.Fatal(err) + } +} + +// compactCCC converts the sparse set of CCC values to a continguous one, +// reducing the number of bits needed from 8 to 6. +func compactCCC() { + m := make(map[uint8]uint8) + for i := range chars { + c := &chars[i] + m[c.ccc] = 0 + } + cccs := []int{} + for v, _ := range m { + cccs = append(cccs, int(v)) + } + sort.Ints(cccs) + for i, c := range cccs { + cccMap[uint8(i)] = uint8(c) + m[uint8(c)] = uint8(i) + } + for i := range chars { + c := &chars[i] + c.origCCC = c.ccc + c.ccc = m[c.ccc] + } + if len(m) >= 1<<6 { + log.Fatalf("too many difference CCC values: %d >= 64", len(m)) + } +} + +// CompositionExclusions.txt has form: +// 0958 # ... +// See http://unicode.org/reports/tr44/ for full explanation +func loadCompositionExclusions() { + f := gen.OpenUCDFile("CompositionExclusions.txt") + defer f.Close() + p := ucd.New(f) + for p.Next() { + c := &chars[p.Rune(0)] + if c.excludeInComp { + log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint) + } + c.excludeInComp = true + } + if e := p.Err(); e != nil { + log.Fatal(e) + } +} + +// hasCompatDecomp returns true if any of the recursive +// decompositions contains a compatibility expansion. +// In this case, the character may not occur in NFK*. +func hasCompatDecomp(r rune) bool { + c := &chars[r] + if c.compatDecomp { + return true + } + for _, d := range c.forms[FCompatibility].decomp { + if hasCompatDecomp(d) { + return true + } + } + return false +} + +// Hangul related constants. +const ( + HangulBase = 0xAC00 + HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28) + + JamoLBase = 0x1100 + JamoLEnd = 0x1113 + JamoVBase = 0x1161 + JamoVEnd = 0x1176 + JamoTBase = 0x11A8 + JamoTEnd = 0x11C3 + + JamoLVTCount = 19 * 21 * 28 + JamoTCount = 28 +) + +func isHangul(r rune) bool { + return HangulBase <= r && r < HangulEnd +} + +func isHangulWithoutJamoT(r rune) bool { + if !isHangul(r) { + return false + } + r -= HangulBase + return r < JamoLVTCount && r%JamoTCount == 0 +} + +func ccc(r rune) uint8 { + return chars[r].ccc +} + +// Insert a rune in a buffer, ordered by Canonical Combining Class. +func insertOrdered(b Decomposition, r rune) Decomposition { + n := len(b) + b = append(b, 0) + cc := ccc(r) + if cc > 0 { + // Use bubble sort. + for ; n > 0; n-- { + if ccc(b[n-1]) <= cc { + break + } + b[n] = b[n-1] + } + } + b[n] = r + return b +} + +// Recursively decompose. +func decomposeRecursive(form int, r rune, d Decomposition) Decomposition { + dcomp := chars[r].forms[form].decomp + if len(dcomp) == 0 { + return insertOrdered(d, r) + } + for _, c := range dcomp { + d = decomposeRecursive(form, c, d) + } + return d +} + +func completeCharFields(form int) { + // Phase 0: pre-expand decomposition. + for i := range chars { + f := &chars[i].forms[form] + if len(f.decomp) == 0 { + continue + } + exp := make(Decomposition, 0) + for _, c := range f.decomp { + exp = decomposeRecursive(form, c, exp) + } + f.expandedDecomp = exp + } + + // Phase 1: composition exclusion, mark decomposition. + for i := range chars { + c := &chars[i] + f := &c.forms[form] + + // Marks script-specific exclusions and version restricted. + f.isOneWay = c.excludeInComp + + // Singletons + f.isOneWay = f.isOneWay || len(f.decomp) == 1 + + // Non-starter decompositions + if len(f.decomp) > 1 { + chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0 + f.isOneWay = f.isOneWay || chk + } + + // Runes that decompose into more than two runes. + f.isOneWay = f.isOneWay || len(f.decomp) > 2 + + if form == FCompatibility { + f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint) + } + + for _, r := range f.decomp { + chars[r].forms[form].inDecomp = true + } + } + + // Phase 2: forward and backward combining. + for i := range chars { + c := &chars[i] + f := &c.forms[form] + + if !f.isOneWay && len(f.decomp) == 2 { + f0 := &chars[f.decomp[0]].forms[form] + f1 := &chars[f.decomp[1]].forms[form] + if !f0.isOneWay { + f0.combinesForward = true + } + if !f1.isOneWay { + f1.combinesBackward = true + } + } + if isHangulWithoutJamoT(rune(i)) { + f.combinesForward = true + } + } + + // Phase 3: quick check values. + for i := range chars { + c := &chars[i] + f := &c.forms[form] + + switch { + case len(f.decomp) > 0: + f.quickCheck[MDecomposed] = QCNo + case isHangul(rune(i)): + f.quickCheck[MDecomposed] = QCNo + default: + f.quickCheck[MDecomposed] = QCYes + } + switch { + case f.isOneWay: + f.quickCheck[MComposed] = QCNo + case (i & 0xffff00) == JamoLBase: + f.quickCheck[MComposed] = QCYes + if JamoLBase <= i && i < JamoLEnd { + f.combinesForward = true + } + if JamoVBase <= i && i < JamoVEnd { + f.quickCheck[MComposed] = QCMaybe + f.combinesBackward = true + f.combinesForward = true + } + if JamoTBase <= i && i < JamoTEnd { + f.quickCheck[MComposed] = QCMaybe + f.combinesBackward = true + } + case !f.combinesBackward: + f.quickCheck[MComposed] = QCYes + default: + f.quickCheck[MComposed] = QCMaybe + } + } +} + +func computeNonStarterCounts() { + // Phase 4: leading and trailing non-starter count + for i := range chars { + c := &chars[i] + + runes := []rune{rune(i)} + // We always use FCompatibility so that the CGJ insertion points do not + // change for repeated normalizations with different forms. + if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 { + runes = exp + } + // We consider runes that combine backwards to be non-starters for the + // purpose of Stream-Safe Text Processing. + for _, r := range runes { + if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward { + break + } + c.nLeadingNonStarters++ + } + for i := len(runes) - 1; i >= 0; i-- { + if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward { + break + } + c.nTrailingNonStarters++ + } + if c.nTrailingNonStarters > 3 { + log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes) + } + + if isHangul(rune(i)) { + c.nTrailingNonStarters = 2 + if isHangulWithoutJamoT(rune(i)) { + c.nTrailingNonStarters = 1 + } + } + + if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t { + log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t) + } + if t := c.nTrailingNonStarters; t > 3 { + log.Fatalf("%U: number of trailing non-starters is %d > 3", t) + } + } +} + +func printBytes(w io.Writer, b []byte, name string) { + fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b)) + fmt.Fprintf(w, "var %s = [...]byte {", name) + for i, c := range b { + switch { + case i%64 == 0: + fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63) + case i%8 == 0: + fmt.Fprintf(w, "\n") + } + fmt.Fprintf(w, "0x%.2X, ", c) + } + fmt.Fprint(w, "\n}\n\n") +} + +// See forminfo.go for format. +func makeEntry(f *FormInfo, c *Char) uint16 { + e := uint16(0) + if r := c.codePoint; HangulBase <= r && r < HangulEnd { + e |= 0x40 + } + if f.combinesForward { + e |= 0x20 + } + if f.quickCheck[MDecomposed] == QCNo { + e |= 0x4 + } + switch f.quickCheck[MComposed] { + case QCYes: + case QCNo: + e |= 0x10 + case QCMaybe: + e |= 0x18 + default: + log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed]) + } + e |= uint16(c.nTrailingNonStarters) + return e +} + +// decompSet keeps track of unique decompositions, grouped by whether +// the decomposition is followed by a trailing and/or leading CCC. +type decompSet [7]map[string]bool + +const ( + normalDecomp = iota + firstMulti + firstCCC + endMulti + firstLeadingCCC + firstCCCZeroExcept + firstStarterWithNLead + lastDecomp +) + +var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"} + +func makeDecompSet() decompSet { + m := decompSet{} + for i := range m { + m[i] = make(map[string]bool) + } + return m +} +func (m *decompSet) insert(key int, s string) { + m[key][s] = true +} + +func printCharInfoTables(w io.Writer) int { + mkstr := func(r rune, f *FormInfo) (int, string) { + d := f.expandedDecomp + s := string([]rune(d)) + if max := 1 << 6; len(s) >= max { + const msg = "%U: too many bytes in decomposition: %d >= %d" + log.Fatalf(msg, r, len(s), max) + } + head := uint8(len(s)) + if f.quickCheck[MComposed] != QCYes { + head |= 0x40 + } + if f.combinesForward { + head |= 0x80 + } + s = string([]byte{head}) + s + + lccc := ccc(d[0]) + tccc := ccc(d[len(d)-1]) + cc := ccc(r) + if cc != 0 && lccc == 0 && tccc == 0 { + log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc) + } + if tccc < lccc && lccc != 0 { + const msg = "%U: lccc (%d) must be <= tcc (%d)" + log.Fatalf(msg, r, lccc, tccc) + } + index := normalDecomp + nTrail := chars[r].nTrailingNonStarters + nLead := chars[r].nLeadingNonStarters + if tccc > 0 || lccc > 0 || nTrail > 0 { + tccc <<= 2 + tccc |= nTrail + s += string([]byte{tccc}) + index = endMulti + for _, r := range d[1:] { + if ccc(r) == 0 { + index = firstCCC + } + } + if lccc > 0 || nLead > 0 { + s += string([]byte{lccc}) + if index == firstCCC { + log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r) + } + index = firstLeadingCCC + } + if cc != lccc { + if cc != 0 { + log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc) + } + index = firstCCCZeroExcept + } + } else if len(d) > 1 { + index = firstMulti + } + return index, s + } + + decompSet := makeDecompSet() + const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail. + decompSet.insert(firstStarterWithNLead, nLeadStr) + + // Store the uniqued decompositions in a byte buffer, + // preceded by their byte length. + for _, c := range chars { + for _, f := range c.forms { + if len(f.expandedDecomp) == 0 { + continue + } + if f.combinesBackward { + log.Fatalf("%U: combinesBackward and decompose", c.codePoint) + } + index, s := mkstr(c.codePoint, &f) + decompSet.insert(index, s) + } + } + + decompositions := bytes.NewBuffer(make([]byte, 0, 10000)) + size := 0 + positionMap := make(map[string]uint16) + decompositions.WriteString("\000") + fmt.Fprintln(w, "const (") + for i, m := range decompSet { + sa := []string{} + for s := range m { + sa = append(sa, s) + } + sort.Strings(sa) + for _, s := range sa { + p := decompositions.Len() + decompositions.WriteString(s) + positionMap[s] = uint16(p) + } + if cname[i] != "" { + fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len()) + } + } + fmt.Fprintln(w, "maxDecomp = 0x8000") + fmt.Fprintln(w, ")") + b := decompositions.Bytes() + printBytes(w, b, "decomps") + size += len(b) + + varnames := []string{"nfc", "nfkc"} + for i := 0; i < FNumberOfFormTypes; i++ { + trie := triegen.NewTrie(varnames[i]) + + for r, c := range chars { + f := c.forms[i] + d := f.expandedDecomp + if len(d) != 0 { + _, key := mkstr(c.codePoint, &f) + trie.Insert(rune(r), uint64(positionMap[key])) + if c.ccc != ccc(d[0]) { + // We assume the lead ccc of a decomposition !=0 in this case. + if ccc(d[0]) == 0 { + log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc) + } + } + } else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward { + // Handle cases where it can't be detected that the nLead should be equal + // to nTrail. + trie.Insert(c.codePoint, uint64(positionMap[nLeadStr])) + } else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 { + trie.Insert(c.codePoint, uint64(0x8000|v)) + } + } + sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]})) + if err != nil { + log.Fatal(err) + } + size += sz + } + return size +} + +func contains(sa []string, s string) bool { + for _, a := range sa { + if a == s { + return true + } + } + return false +} + +func makeTables() { + w := &bytes.Buffer{} + + size := 0 + if *tablelist == "" { + return + } + list := strings.Split(*tablelist, ",") + if *tablelist == "all" { + list = []string{"recomp", "info"} + } + + // Compute maximum decomposition size. + max := 0 + for _, c := range chars { + if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max { + max = n + } + } + + fmt.Fprintln(w, "const (") + fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.") + fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion()) + fmt.Fprintln(w) + fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform") + fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at") + fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that") + fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.") + fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max) + fmt.Fprintln(w, ")\n") + + // Print the CCC remap table. + size += len(cccMap) + fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap)) + for i := 0; i < len(cccMap); i++ { + if i%8 == 0 { + fmt.Fprintln(w) + } + fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)]) + } + fmt.Fprintln(w, "\n}\n") + + if contains(list, "info") { + size += printCharInfoTables(w) + } + + if contains(list, "recomp") { + // Note that we use 32 bit keys, instead of 64 bit. + // This clips the bits of three entries, but we know + // this won't cause a collision. The compiler will catch + // any changes made to UnicodeData.txt that introduces + // a collision. + // Note that the recomposition map for NFC and NFKC + // are identical. + + // Recomposition map + nrentries := 0 + for _, c := range chars { + f := c.forms[FCanonical] + if !f.isOneWay && len(f.decomp) > 0 { + nrentries++ + } + } + sz := nrentries * 8 + size += sz + fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz) + fmt.Fprintln(w, "var recompMap = map[uint32]rune{") + for i, c := range chars { + f := c.forms[FCanonical] + d := f.decomp + if !f.isOneWay && len(d) > 0 { + key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1])) + fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i) + } + } + fmt.Fprintf(w, "}\n\n") + } + + fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size) + gen.WriteGoFile("tables.go", "norm", w.Bytes()) +} + +func printChars() { + if *verbose { + for _, c := range chars { + if !c.isValid() || c.state == SMissing { + continue + } + fmt.Println(c) + } + } +} + +// verifyComputed does various consistency tests. +func verifyComputed() { + for i, c := range chars { + for _, f := range c.forms { + isNo := (f.quickCheck[MDecomposed] == QCNo) + if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) { + log.Fatalf("%U: NF*D QC must be No if rune decomposes", i) + } + + isMaybe := f.quickCheck[MComposed] == QCMaybe + if f.combinesBackward != isMaybe { + log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i) + } + if len(f.decomp) > 0 && f.combinesForward && isMaybe { + log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i) + } + + if len(f.expandedDecomp) != 0 { + continue + } + if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b { + // We accept these runes to be treated differently (it only affects + // segment breaking in iteration, most likely on improper use), but + // reconsider if more characters are added. + // U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;; + // U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;; + // U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;; + // U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;; + // U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;; + // U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;; + if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) { + log.Fatalf("%U: nLead was %v; want %v", i, a, b) + } + } + } + nfc := c.forms[FCanonical] + nfkc := c.forms[FCompatibility] + if nfc.combinesBackward != nfkc.combinesBackward { + log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint) + } + } +} + +// Use values in DerivedNormalizationProps.txt to compare against the +// values we computed. +// DerivedNormalizationProps.txt has form: +// 00C0..00C5 ; NFD_QC; N # ... +// 0374 ; NFD_QC; N # ... +// See http://unicode.org/reports/tr44/ for full explanation +func testDerived() { + f := gen.OpenUCDFile("DerivedNormalizationProps.txt") + defer f.Close() + p := ucd.New(f) + for p.Next() { + r := p.Rune(0) + c := &chars[r] + + var ftype, mode int + qt := p.String(1) + switch qt { + case "NFC_QC": + ftype, mode = FCanonical, MComposed + case "NFD_QC": + ftype, mode = FCanonical, MDecomposed + case "NFKC_QC": + ftype, mode = FCompatibility, MComposed + case "NFKD_QC": + ftype, mode = FCompatibility, MDecomposed + default: + continue + } + var qr QCResult + switch p.String(2) { + case "Y": + qr = QCYes + case "N": + qr = QCNo + case "M": + qr = QCMaybe + default: + log.Fatalf(`Unexpected quick check value "%s"`, p.String(2)) + } + if got := c.forms[ftype].quickCheck[mode]; got != qr { + log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr) + } + c.forms[ftype].verified[mode] = true + } + if err := p.Err(); err != nil { + log.Fatal(err) + } + // Any unspecified value must be QCYes. Verify this. + for i, c := range chars { + for j, fd := range c.forms { + for k, qr := range fd.quickCheck { + if !fd.verified[k] && qr != QCYes { + m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n" + log.Printf(m, i, j, k, qr, c.name) + } + } + } + } +} + +var testHeader = `const ( + Yes = iota + No + Maybe +) + +type formData struct { + qc uint8 + combinesForward bool + decomposition string +} + +type runeData struct { + r rune + ccc uint8 + nLead uint8 + nTrail uint8 + f [2]formData // 0: canonical; 1: compatibility +} + +func f(qc uint8, cf bool, dec string) [2]formData { + return [2]formData{{qc, cf, dec}, {qc, cf, dec}} +} + +func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData { + return [2]formData{{qc, cf, d}, {qck, cfk, dk}} +} + +var testData = []runeData{ +` + +func printTestdata() { + type lastInfo struct { + ccc uint8 + nLead uint8 + nTrail uint8 + f string + } + + last := lastInfo{} + w := &bytes.Buffer{} + fmt.Fprintf(w, testHeader) + for r, c := range chars { + f := c.forms[FCanonical] + qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) + f = c.forms[FCompatibility] + qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) + s := "" + if d == dk && qc == qck && cf == cfk { + s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d) + } else { + s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk) + } + current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s} + if last != current { + fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s) + last = current + } + } + fmt.Fprintln(w, "}") + gen.WriteGoFile("data_test.go", "norm", w.Bytes()) +} diff --git a/vendor/golang.org/x/text/unicode/norm/triegen.go b/vendor/golang.org/x/text/unicode/norm/triegen.go new file mode 100644 index 0000000000..45d711900d --- /dev/null +++ b/vendor/golang.org/x/text/unicode/norm/triegen.go @@ -0,0 +1,117 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// Trie table generator. +// Used by make*tables tools to generate a go file with trie data structures +// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte +// sequence are used to lookup offsets in the index table to be used for the +// next byte. The last byte is used to index into a table with 16-bit values. + +package main + +import ( + "fmt" + "io" +) + +const maxSparseEntries = 16 + +type normCompacter struct { + sparseBlocks [][]uint64 + sparseOffset []uint16 + sparseCount int + name string +} + +func mostFrequentStride(a []uint64) int { + counts := make(map[int]int) + var v int + for _, x := range a { + if stride := int(x) - v; v != 0 && stride >= 0 { + counts[stride]++ + } + v = int(x) + } + var maxs, maxc int + for stride, cnt := range counts { + if cnt > maxc || (cnt == maxc && stride < maxs) { + maxs, maxc = stride, cnt + } + } + return maxs +} + +func countSparseEntries(a []uint64) int { + stride := mostFrequentStride(a) + var v, count int + for _, tv := range a { + if int(tv)-v != stride { + if tv != 0 { + count++ + } + } + v = int(tv) + } + return count +} + +func (c *normCompacter) Size(v []uint64) (sz int, ok bool) { + if n := countSparseEntries(v); n <= maxSparseEntries { + return (n+1)*4 + 2, true + } + return 0, false +} + +func (c *normCompacter) Store(v []uint64) uint32 { + h := uint32(len(c.sparseOffset)) + c.sparseBlocks = append(c.sparseBlocks, v) + c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount)) + c.sparseCount += countSparseEntries(v) + 1 + return h +} + +func (c *normCompacter) Handler() string { + return c.name + "Sparse.lookup" +} + +func (c *normCompacter) Print(w io.Writer) (retErr error) { + p := func(f string, x ...interface{}) { + if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil { + retErr = err + } + } + + ls := len(c.sparseBlocks) + p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2) + p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset) + + ns := c.sparseCount + p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4) + p("var %sSparseValues = [%d]valueRange {", c.name, ns) + for i, b := range c.sparseBlocks { + p("\n// Block %#x, offset %#x", i, c.sparseOffset[i]) + var v int + stride := mostFrequentStride(b) + n := countSparseEntries(b) + p("\n{value:%#04x,lo:%#02x},", stride, uint8(n)) + for i, nv := range b { + if int(nv)-v != stride { + if v != 0 { + p(",hi:%#02x},", 0x80+i-1) + } + if nv != 0 { + p("\n{value:%#04x,lo:%#02x", nv, 0x80+i) + } + } + v = int(nv) + } + if v != 0 { + p(",hi:%#02x},", 0x80+len(b)-1) + } + } + p("\n}\n\n") + return +} |