mondash/vendor/golang.org/x/text/unicode/cldr/makexml.go

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ignore

// This tool generates types for the various XML formats of CLDR.
package main

import (
	"archive/zip"
	"bytes"
	"encoding/xml"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"os"
	"regexp"
	"strings"

	"golang.org/x/text/internal/gen"
)

var outputFile = flag.String("output", "xml.go", "output file name")

func main() {
	flag.Parse()

	r := gen.OpenCLDRCoreZip()
	buffer, err := ioutil.ReadAll(r)
	if err != nil {
		log.Fatal("Could not read zip file")
	}
	r.Close()
	z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
	if err != nil {
		log.Fatalf("Could not read zip archive: %v", err)
	}

	var buf bytes.Buffer

	version := gen.CLDRVersion()

	for _, dtd := range files {
		for _, f := range z.File {
			if strings.HasSuffix(f.Name, dtd.file+".dtd") {
				r, err := f.Open()
				failOnError(err)

				b := makeBuilder(&buf, dtd)
				b.parseDTD(r)
				b.resolve(b.index[dtd.top[0]])
				b.write()
				if b.version != "" && version != b.version {
					println(f.Name)
					log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
				}
				break
			}
		}
	}
	fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
	fmt.Fprintf(&buf, "const Version = %q\n", version)

	gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
}

func failOnError(err error) {
	if err != nil {
		log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
		os.Exit(1)
	}
}

// configuration data per DTD type
type dtd struct {
	file string   // base file name
	root string   // Go name of the root XML element
	top  []string // create a different type for this section

	skipElem    []string // hard-coded or deprecated elements
	skipAttr    []string // attributes to exclude
	predefined  []string // hard-coded elements exist of the form <name>Elem
	forceRepeat []string // elements to make slices despite DTD
}

var files = []dtd{
	{
		file: "ldmlBCP47",
		root: "LDMLBCP47",
		top:  []string{"ldmlBCP47"},
		skipElem: []string{
			"cldrVersion", // deprecated, not used
		},
	},
	{
		file: "ldmlSupplemental",
		root: "SupplementalData",
		top:  []string{"supplementalData"},
		skipElem: []string{
			"cldrVersion", // deprecated, not used
		},
		forceRepeat: []string{
			"plurals", // data defined in plurals.xml and ordinals.xml
		},
	},
	{
		file: "ldml",
		root: "LDML",
		top: []string{
			"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
		},
		skipElem: []string{
			"cp",       // not used anywhere
			"special",  // not used anywhere
			"fallback", // deprecated, not used
			"alias",    // in Common
			"default",  // in Common
		},
		skipAttr: []string{
			"hiraganaQuarternary", // typo in DTD, correct version included as well
		},
		predefined: []string{"rules"},
	},
}

var comments = map[string]string{
	"ldmlBCP47": `
// LDMLBCP47 holds information on allowable values for various variables in LDML.
`,
	"supplementalData": `
// SupplementalData holds information relevant for internationalization
// and proper use of CLDR, but that is not contained in the locale hierarchy.
`,
	"ldml": `
// LDML is the top-level type for locale-specific data.
`,
	"collation": `
// Collation contains rules that specify a certain sort-order,
// as a tailoring of the root order. 
// The parsed rules are obtained by passing a RuleProcessor to Collation's
// Process method.
`,
	"calendar": `
// Calendar specifies the fields used for formatting and parsing dates and times.
// The month and quarter names are identified numerically, starting at 1.
// The day (of the week) names are identified with short strings, since there is
// no universally-accepted numeric designation.
`,
	"dates": `
// Dates contains information regarding the format and parsing of dates and times.
`,
	"localeDisplayNames": `
// LocaleDisplayNames specifies localized display names for for scripts, languages,
// countries, currencies, and variants.
`,
	"numbers": `
// Numbers supplies information for formatting and parsing numbers and currencies.
`,
}

type element struct {
	name      string // XML element name
	category  string // elements contained by this element
	signature string // category + attrKey*

	attr []*attribute // attributes supported by this element.
	sub  []struct {   // parsed and evaluated sub elements of this element.
		e      *element
		repeat bool // true if the element needs to be a slice
	}

	resolved bool // prevent multiple resolutions of this element.
}

type attribute struct {
	name string
	key  string
	list []string

	tag string // Go tag
}

var (
	reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
	reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
	reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
	reToken = regexp.MustCompile(`\w\-`)
)

// builder is used to read in the DTD files from CLDR and generate Go code
// to be used with the encoding/xml package.
type builder struct {
	w       io.Writer
	index   map[string]*element
	elem    []*element
	info    dtd
	version string
}

func makeBuilder(w io.Writer, d dtd) builder {
	return builder{
		w:     w,
		index: make(map[string]*element),
		elem:  []*element{},
		info:  d,
	}
}

// parseDTD parses a DTD file.
func (b *builder) parseDTD(r io.Reader) {
	for d := xml.NewDecoder(r); ; {
		t, err := d.Token()
		if t == nil {
			break
		}
		failOnError(err)
		dir, ok := t.(xml.Directive)
		if !ok {
			continue
		}
		m := reHead.FindSubmatch(dir)
		dir = dir[len(m[0]):]
		ename := string(m[2])
		el, elementFound := b.index[ename]
		switch string(m[1]) {
		case "ELEMENT":
			if elementFound {
				log.Fatal("parseDTD: duplicate entry for element %q", ename)
			}
			m := reElem.FindSubmatch(dir)
			if m == nil {
				log.Fatalf("parseDTD: invalid element %q", string(dir))
			}
			if len(m[0]) != len(dir) {
				log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
			}
			s := string(m[1])
			el = &element{
				name:     ename,
				category: s,
			}
			b.index[ename] = el
		case "ATTLIST":
			if !elementFound {
				log.Fatalf("parseDTD: unknown element %q", ename)
			}
			s := string(dir)
			m := reAttr.FindStringSubmatch(s)
			if m == nil {
				log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
			}
			if m[4] == "FIXED" {
				b.version = m[5]
			} else {
				switch m[1] {
				case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
				case "type", "choice":
				default:
					el.attr = append(el.attr, &attribute{
						name: m[1],
						key:  s,
						list: reToken.FindAllString(m[3], -1),
					})
					el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
				}
			}
		}
	}
}

var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)

// resolve takes a parsed element and converts it into structured data
// that can be used to generate the XML code.
func (b *builder) resolve(e *element) {
	if e.resolved {
		return
	}
	b.elem = append(b.elem, e)
	e.resolved = true
	s := e.category
	found := make(map[string]bool)
	sequenceStart := []int{}
	for len(s) > 0 {
		m := reCat.FindStringSubmatch(s)
		if m == nil {
			log.Fatalf("%s: invalid category string %q", e.name, s)
		}
		repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
		switch m[1] {
		case "":
		case "(":
			sequenceStart = append(sequenceStart, len(e.sub))
		case ")":
			if len(sequenceStart) == 0 {
				log.Fatalf("%s: unmatched closing parenthesis", e.name)
			}
			for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
				e.sub[i].repeat = e.sub[i].repeat || repeat
			}
			sequenceStart = sequenceStart[:len(sequenceStart)-1]
		default:
			if in(b.info.skipElem, m[1]) {
			} else if sub, ok := b.index[m[1]]; ok {
				if !found[sub.name] {
					e.sub = append(e.sub, struct {
						e      *element
						repeat bool
					}{sub, repeat})
					found[sub.name] = true
					b.resolve(sub)
				}
			} else if m[1] == "#PCDATA" || m[1] == "ANY" {
			} else if m[1] != "EMPTY" {
				log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
			}
		}
		s = s[len(m[0]):]
	}
}

// return true if s is contained in set.
func in(set []string, s string) bool {
	for _, v := range set {
		if v == s {
			return true
		}
	}
	return false
}

var repl = strings.NewReplacer("-", " ", "_", " ")

// title puts the first character or each character following '_' in title case and
// removes all occurrences of '_'.
func title(s string) string {
	return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
}

// writeElem generates Go code for a single element, recursively.
func (b *builder) writeElem(tab int, e *element) {
	p := func(f string, x ...interface{}) {
		f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
		fmt.Fprintf(b.w, f, x...)
	}
	if len(e.sub) == 0 && len(e.attr) == 0 {
		p("Common")
		return
	}
	p("struct {")
	tab++
	p("\nCommon")
	for _, attr := range e.attr {
		if !in(b.info.skipAttr, attr.name) {
			p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
		}
	}
	for _, sub := range e.sub {
		if in(b.info.predefined, sub.e.name) {
			p("\n%sElem", sub.e.name)
			continue
		}
		if in(b.info.skipElem, sub.e.name) {
			continue
		}
		p("\n%s ", title(sub.e.name))
		if sub.repeat {
			p("[]")
		}
		p("*")
		if in(b.info.top, sub.e.name) {
			p(title(sub.e.name))
		} else {
			b.writeElem(tab, sub.e)
		}
		p(" `xml:\"%s\"`", sub.e.name)
	}
	tab--
	p("\n}")
}

// write generates the Go XML code.
func (b *builder) write() {
	for i, name := range b.info.top {
		e := b.index[name]
		if e != nil {
			fmt.Fprintf(b.w, comments[name])
			name := title(e.name)
			if i == 0 {
				name = b.info.root
			}
			fmt.Fprintf(b.w, "type %s ", name)
			b.writeElem(0, e)
			fmt.Fprint(b.w, "\n")
		}
	}
}
Switch to dep for vendoring, update libraries Signed-off-by: Knut Ahlers <knut@ahlers.me> 2017-11-22 20:39:52 +00:00			`// Copyright 2013 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// +build ignore`

			`// This tool generates types for the various XML formats of CLDR.`
			`package main`

			`import (`
			`"archive/zip"`
			`"bytes"`
			`"encoding/xml"`
			`"flag"`
			`"fmt"`
			`"io"`
			`"io/ioutil"`
			`"log"`
			`"os"`
			`"regexp"`
			`"strings"`

			`"golang.org/x/text/internal/gen"`
			`)`

			`var outputFile = flag.String("output", "xml.go", "output file name")`

			`func main() {`
			`flag.Parse()`

			`r := gen.OpenCLDRCoreZip()`
			`buffer, err := ioutil.ReadAll(r)`
			`if err != nil {`
			`log.Fatal("Could not read zip file")`
			`}`
			`r.Close()`
			`z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))`
			`if err != nil {`
			`log.Fatalf("Could not read zip archive: %v", err)`
			`}`

			`var buf bytes.Buffer`

			`version := gen.CLDRVersion()`

			`for _, dtd := range files {`
			`for _, f := range z.File {`
			`if strings.HasSuffix(f.Name, dtd.file+".dtd") {`
			`r, err := f.Open()`
			`failOnError(err)`

			`b := makeBuilder(&buf, dtd)`
			`b.parseDTD(r)`
			`b.resolve(b.index[dtd.top[0]])`
			`b.write()`
			`if b.version != "" && version != b.version {`
			`println(f.Name)`
			`log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)`
			`}`
			`break`
			`}`
			`}`
			`}`
			`fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")`
			`fmt.Fprintf(&buf, "const Version = %q\n", version)`

			`gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())`
			`}`

			`func failOnError(err error) {`
			`if err != nil {`
			`log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())`
			`os.Exit(1)`
			`}`
			`}`

			`// configuration data per DTD type`
			`type dtd struct {`
			`file string // base file name`
			`root string // Go name of the root XML element`
			`top []string // create a different type for this section`

			`skipElem []string // hard-coded or deprecated elements`
			`skipAttr []string // attributes to exclude`
			`predefined []string // hard-coded elements exist of the form <name>Elem`
			`forceRepeat []string // elements to make slices despite DTD`
			`}`

			`var files = []dtd{`
			`{`
			`file: "ldmlBCP47",`
			`root: "LDMLBCP47",`
			`top: []string{"ldmlBCP47"},`
			`skipElem: []string{`
			`"cldrVersion", // deprecated, not used`
			`},`
			`},`
			`{`
			`file: "ldmlSupplemental",`
			`root: "SupplementalData",`
			`top: []string{"supplementalData"},`
			`skipElem: []string{`
			`"cldrVersion", // deprecated, not used`
			`},`
			`forceRepeat: []string{`
			`"plurals", // data defined in plurals.xml and ordinals.xml`
			`},`
			`},`
			`{`
			`file: "ldml",`
			`root: "LDML",`
			`top: []string{`
			`"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",`
			`},`
			`skipElem: []string{`
			`"cp", // not used anywhere`
			`"special", // not used anywhere`
			`"fallback", // deprecated, not used`
			`"alias", // in Common`
			`"default", // in Common`
			`},`
			`skipAttr: []string{`
			`"hiraganaQuarternary", // typo in DTD, correct version included as well`
			`},`
			`predefined: []string{"rules"},`
			`},`
			`}`

			`var comments = map[string]string{`
			"ldmlBCP47": `
			`// LDMLBCP47 holds information on allowable values for various variables in LDML.`
			`,
			"supplementalData": `
			`// SupplementalData holds information relevant for internationalization`
			`// and proper use of CLDR, but that is not contained in the locale hierarchy.`
			`,
			"ldml": `
			`// LDML is the top-level type for locale-specific data.`
			`,
			"collation": `
			`// Collation contains rules that specify a certain sort-order,`
			`// as a tailoring of the root order.`
			`// The parsed rules are obtained by passing a RuleProcessor to Collation's`
			`// Process method.`
			`,
			"calendar": `
			`// Calendar specifies the fields used for formatting and parsing dates and times.`
			`// The month and quarter names are identified numerically, starting at 1.`
			`// The day (of the week) names are identified with short strings, since there is`
			`// no universally-accepted numeric designation.`
			`,
			"dates": `
			`// Dates contains information regarding the format and parsing of dates and times.`
			`,
			"localeDisplayNames": `
			`// LocaleDisplayNames specifies localized display names for for scripts, languages,`
			`// countries, currencies, and variants.`
			`,
			"numbers": `
			`// Numbers supplies information for formatting and parsing numbers and currencies.`
			`,
			`}`

			`type element struct {`
			`name string // XML element name`
			`category string // elements contained by this element`
			`signature string // category + attrKey*`

			`attr []*attribute // attributes supported by this element.`
			`sub []struct { // parsed and evaluated sub elements of this element.`
			`e *element`
			`repeat bool // true if the element needs to be a slice`
			`}`

			`resolved bool // prevent multiple resolutions of this element.`
			`}`

			`type attribute struct {`
			`name string`
			`key string`
			`list []string`

			`tag string // Go tag`
			`}`

			`var (`
			reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
			reAttr = regexp.MustCompile(` (\w+) (?:(\w+)\|\(([\w\- \\|]+)\)) (?:#([A-Z]) (?:\"([\.\d+])\")?)? ("[\w\-:]*")?`)
			reElem = regexp.MustCompile(`^ (EMPTY\|ANY\|\(.\)[\\+\?]?) $`)
			reToken = regexp.MustCompile(`\w\-`)
			`)`

			`// builder is used to read in the DTD files from CLDR and generate Go code`
			`// to be used with the encoding/xml package.`
			`type builder struct {`
			`w io.Writer`
			`index map[string]*element`
			`elem []*element`
			`info dtd`
			`version string`
			`}`

			`func makeBuilder(w io.Writer, d dtd) builder {`
			`return builder{`
			`w: w,`
			`index: make(map[string]*element),`
			`elem: []*element{},`
			`info: d,`
			`}`
			`}`

			`// parseDTD parses a DTD file.`
			`func (b *builder) parseDTD(r io.Reader) {`
			`for d := xml.NewDecoder(r); ; {`
			`t, err := d.Token()`
			`if t == nil {`
			`break`
			`}`
			`failOnError(err)`
			`dir, ok := t.(xml.Directive)`
			`if !ok {`
			`continue`
			`}`
			`m := reHead.FindSubmatch(dir)`
			`dir = dir[len(m[0]):]`
			`ename := string(m[2])`
			`el, elementFound := b.index[ename]`
			`switch string(m[1]) {`
			`case "ELEMENT":`
			`if elementFound {`
			`log.Fatal("parseDTD: duplicate entry for element %q", ename)`
			`}`
			`m := reElem.FindSubmatch(dir)`
			`if m == nil {`
			`log.Fatalf("parseDTD: invalid element %q", string(dir))`
			`}`
			`if len(m[0]) != len(dir) {`
			`log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))`
			`}`
			`s := string(m[1])`
			`el = &element{`
			`name: ename,`
			`category: s,`
			`}`
			`b.index[ename] = el`
			`case "ATTLIST":`
			`if !elementFound {`
			`log.Fatalf("parseDTD: unknown element %q", ename)`
			`}`
			`s := string(dir)`
			`m := reAttr.FindStringSubmatch(s)`
			`if m == nil {`
			`log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))`
			`}`
			`if m[4] == "FIXED" {`
			`b.version = m[5]`
			`} else {`
			`switch m[1] {`
			`case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :`
			`case "type", "choice":`
			`default:`
			`el.attr = append(el.attr, &attribute{`
			`name: m[1],`
			`key: s,`
			`list: reToken.FindAllString(m[3], -1),`
			`})`
			`el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])`
			`}`
			`}`
			`}`
			`}`
			`}`

			var reCat = regexp.MustCompile(`[ ,\\|](?:(\(\|\)\|\#?[\w_-]+)([\\+\?]?))?`)

			`// resolve takes a parsed element and converts it into structured data`
			`// that can be used to generate the XML code.`
			`func (b builder) resolve(e element) {`
			`if e.resolved {`
			`return`
			`}`
			`b.elem = append(b.elem, e)`
			`e.resolved = true`
			`s := e.category`
			`found := make(map[string]bool)`
			`sequenceStart := []int{}`
			`for len(s) > 0 {`
			`m := reCat.FindStringSubmatch(s)`
			`if m == nil {`
			`log.Fatalf("%s: invalid category string %q", e.name, s)`
			`}`
			`repeat := m[2] == "*" \|\| m[2] == "+" \|\| in(b.info.forceRepeat, m[1])`
			`switch m[1] {`
			`case "":`
			`case "(":`
			`sequenceStart = append(sequenceStart, len(e.sub))`
			`case ")":`
			`if len(sequenceStart) == 0 {`
			`log.Fatalf("%s: unmatched closing parenthesis", e.name)`
			`}`
			`for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {`
			`e.sub[i].repeat = e.sub[i].repeat \|\| repeat`
			`}`
			`sequenceStart = sequenceStart[:len(sequenceStart)-1]`
			`default:`
			`if in(b.info.skipElem, m[1]) {`
			`} else if sub, ok := b.index[m[1]]; ok {`
			`if !found[sub.name] {`
			`e.sub = append(e.sub, struct {`
			`e *element`
			`repeat bool`
			`}{sub, repeat})`
			`found[sub.name] = true`
			`b.resolve(sub)`
			`}`
			`} else if m[1] == "#PCDATA" \|\| m[1] == "ANY" {`
			`} else if m[1] != "EMPTY" {`
			`log.Fatalf("resolve:%s: element %q not found", e.name, m[1])`
			`}`
			`}`
			`s = s[len(m[0]):]`
			`}`
			`}`

			`// return true if s is contained in set.`
			`func in(set []string, s string) bool {`
			`for _, v := range set {`
			`if v == s {`
			`return true`
			`}`
			`}`
			`return false`
			`}`

			`var repl = strings.NewReplacer("-", " ", "_", " ")`

			`// title puts the first character or each character following '_' in title case and`
			`// removes all occurrences of '_'.`
			`func title(s string) string {`
			`return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)`
			`}`

			`// writeElem generates Go code for a single element, recursively.`
			`func (b builder) writeElem(tab int, e element) {`
			`p := func(f string, x ...interface{}) {`
			`f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)`
			`fmt.Fprintf(b.w, f, x...)`
			`}`
			`if len(e.sub) == 0 && len(e.attr) == 0 {`
			`p("Common")`
			`return`
			`}`
			`p("struct {")`
			`tab++`
			`p("\nCommon")`
			`for _, attr := range e.attr {`
			`if !in(b.info.skipAttr, attr.name) {`
			p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
			`}`
			`}`
			`for _, sub := range e.sub {`
			`if in(b.info.predefined, sub.e.name) {`
			`p("\n%sElem", sub.e.name)`
			`continue`
			`}`
			`if in(b.info.skipElem, sub.e.name) {`
			`continue`
			`}`
			`p("\n%s ", title(sub.e.name))`
			`if sub.repeat {`
			`p("[]")`
			`}`
			`p("*")`
			`if in(b.info.top, sub.e.name) {`
			`p(title(sub.e.name))`
			`} else {`
			`b.writeElem(tab, sub.e)`
			`}`
			p(" `xml:\"%s\"`", sub.e.name)
			`}`
			`tab--`
			`p("\n}")`
			`}`

			`// write generates the Go XML code.`
			`func (b *builder) write() {`
			`for i, name := range b.info.top {`
			`e := b.index[name]`
			`if e != nil {`
			`fmt.Fprintf(b.w, comments[name])`
			`name := title(e.name)`
			`if i == 0 {`
			`name = b.info.root`
			`}`
			`fmt.Fprintf(b.w, "type %s ", name)`
			`b.writeElem(0, e)`
			`fmt.Fprint(b.w, "\n")`
			`}`
			`}`
			`}`