mondash/vendor/golang.org/x/text/unicode/cldr/collate.go

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cldr

import (
	"bufio"
	"encoding/xml"
	"errors"
	"fmt"
	"strconv"
	"strings"
	"unicode"
	"unicode/utf8"
)

// RuleProcessor can be passed to Collator's Process method, which
// parses the rules and calls the respective method for each rule found.
type RuleProcessor interface {
	Reset(anchor string, before int) error
	Insert(level int, str, context, extend string) error
	Index(id string)
}

const (
	// cldrIndex is a Unicode-reserved sentinel value used to mark the start
	// of a grouping within an index.
	// We ignore any rule that starts with this rune.
	// See http://unicode.org/reports/tr35/#Collation_Elements for details.
	cldrIndex = "\uFDD0"

	// specialAnchor is the format in which to represent logical reset positions,
	// such as "first tertiary ignorable".
	specialAnchor = "<%s/>"
)

// Process parses the rules for the tailorings of this collation
// and calls the respective methods of p for each rule found.
func (c Collation) Process(p RuleProcessor) (err error) {
	if len(c.Cr) > 0 {
		if len(c.Cr) > 1 {
			return fmt.Errorf("multiple cr elements, want 0 or 1")
		}
		return processRules(p, c.Cr[0].Data())
	}
	if c.Rules.Any != nil {
		return c.processXML(p)
	}
	return errors.New("no tailoring data")
}

// processRules parses rules in the Collation Rule Syntax defined in
// http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings.
func processRules(p RuleProcessor, s string) (err error) {
	chk := func(s string, e error) string {
		if err == nil {
			err = e
		}
		return s
	}
	i := 0 // Save the line number for use after the loop.
	scanner := bufio.NewScanner(strings.NewReader(s))
	for ; scanner.Scan() && err == nil; i++ {
		for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {
			level := 5
			var ch byte
			switch ch, s = s[0], s[1:]; ch {
			case '&': // followed by <anchor> or '[' <key> ']'
				if s = skipSpace(s); consume(&s, '[') {
					s = chk(parseSpecialAnchor(p, s))
				} else {
					s = chk(parseAnchor(p, 0, s))
				}
			case '<': // sort relation '<'{1,4}, optionally followed by '*'.
				for level = 1; consume(&s, '<'); level++ {
				}
				if level > 4 {
					err = fmt.Errorf("level %d > 4", level)
				}
				fallthrough
			case '=': // identity relation, optionally followed by *.
				if consume(&s, '*') {
					s = chk(parseSequence(p, level, s))
				} else {
					s = chk(parseOrder(p, level, s))
				}
			default:
				chk("", fmt.Errorf("illegal operator %q", ch))
				break
			}
		}
	}
	if chk("", scanner.Err()); err != nil {
		return fmt.Errorf("%d: %v", i, err)
	}
	return nil
}

// parseSpecialAnchor parses the anchor syntax which is either of the form
//    ['before' <level>] <anchor>
// or
//    [<label>]
// The starting should already be consumed.
func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {
	i := strings.IndexByte(s, ']')
	if i == -1 {
		return "", errors.New("unmatched bracket")
	}
	a := strings.TrimSpace(s[:i])
	s = s[i+1:]
	if strings.HasPrefix(a, "before ") {
		l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)
		if err != nil {
			return s, err
		}
		return parseAnchor(p, int(l), s)
	}
	return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)
}

func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {
	anchor, s, err := scanString(s)
	if err != nil {
		return s, err
	}
	return s, p.Reset(anchor, level)
}

func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {
	var value, context, extend string
	if value, s, err = scanString(s); err != nil {
		return s, err
	}
	if strings.HasPrefix(value, cldrIndex) {
		p.Index(value[len(cldrIndex):])
		return
	}
	if consume(&s, '|') {
		if context, s, err = scanString(s); err != nil {
			return s, errors.New("missing string after context")
		}
	}
	if consume(&s, '/') {
		if extend, s, err = scanString(s); err != nil {
			return s, errors.New("missing string after extension")
		}
	}
	return s, p.Insert(level, value, context, extend)
}

// scanString scans a single input string.
func scanString(s string) (str, tail string, err error) {
	if s = skipSpace(s); s == "" {
		return s, s, errors.New("missing string")
	}
	buf := [16]byte{} // small but enough to hold most cases.
	value := buf[:0]
	for s != "" {
		if consume(&s, '\'') {
			i := strings.IndexByte(s, '\'')
			if i == -1 {
				return "", "", errors.New(`unmatched single quote`)
			}
			if i == 0 {
				value = append(value, '\'')
			} else {
				value = append(value, s[:i]...)
			}
			s = s[i+1:]
			continue
		}
		r, sz := utf8.DecodeRuneInString(s)
		if unicode.IsSpace(r) || strings.ContainsRune("&<=#", r) {
			break
		}
		value = append(value, s[:sz]...)
		s = s[sz:]
	}
	return string(value), skipSpace(s), nil
}

func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {
	if s = skipSpace(s); s == "" {
		return s, errors.New("empty sequence")
	}
	last := rune(0)
	for s != "" {
		r, sz := utf8.DecodeRuneInString(s)
		s = s[sz:]

		if r == '-' {
			// We have a range. The first element was already written.
			if last == 0 {
				return s, errors.New("range without starter value")
			}
			r, sz = utf8.DecodeRuneInString(s)
			s = s[sz:]
			if r == utf8.RuneError || r < last {
				return s, fmt.Errorf("invalid range %q-%q", last, r)
			}
			for i := last + 1; i <= r; i++ {
				if err := p.Insert(level, string(i), "", ""); err != nil {
					return s, err
				}
			}
			last = 0
			continue
		}

		if unicode.IsSpace(r) || unicode.IsPunct(r) {
			break
		}

		// normal case
		if err := p.Insert(level, string(r), "", ""); err != nil {
			return s, err
		}
		last = r
	}
	return s, nil
}

func skipSpace(s string) string {
	return strings.TrimLeftFunc(s, unicode.IsSpace)
}

// consumes returns whether the next byte is ch. If so, it gobbles it by
// updating s.
func consume(s *string, ch byte) (ok bool) {
	if *s == "" || (*s)[0] != ch {
		return false
	}
	*s = (*s)[1:]
	return true
}

// The following code parses Collation rules of CLDR version 24 and before.

var lmap = map[byte]int{
	'p': 1,
	's': 2,
	't': 3,
	'i': 5,
}

type rulesElem struct {
	Rules struct {
		Common
		Any []*struct {
			XMLName xml.Name
			rule
		} `xml:",any"`
	} `xml:"rules"`
}

type rule struct {
	Value  string `xml:",chardata"`
	Before string `xml:"before,attr"`
	Any    []*struct {
		XMLName xml.Name
		rule
	} `xml:",any"`
}

var emptyValueError = errors.New("cldr: empty rule value")

func (r *rule) value() (string, error) {
	// Convert hexadecimal Unicode codepoint notation to a string.
	s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)
	r.Value = s
	if s == "" {
		if len(r.Any) != 1 {
			return "", emptyValueError
		}
		r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)
		r.Any = nil
	} else if len(r.Any) != 0 {
		return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)
	}
	return r.Value, nil
}

func (r rule) process(p RuleProcessor, name, context, extend string) error {
	v, err := r.value()
	if err != nil {
		return err
	}
	switch name {
	case "p", "s", "t", "i":
		if strings.HasPrefix(v, cldrIndex) {
			p.Index(v[len(cldrIndex):])
			return nil
		}
		if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {
			return err
		}
	case "pc", "sc", "tc", "ic":
		level := lmap[name[0]]
		for _, s := range v {
			if err := p.Insert(level, string(s), context, extend); err != nil {
				return err
			}
		}
	default:
		return fmt.Errorf("cldr: unsupported tag: %q", name)
	}
	return nil
}

// processXML parses the format of CLDR versions 24 and older.
func (c Collation) processXML(p RuleProcessor) (err error) {
	// Collation is generated and defined in xml.go.
	var v string
	for _, r := range c.Rules.Any {
		switch r.XMLName.Local {
		case "reset":
			level := 0
			switch r.Before {
			case "primary", "1":
				level = 1
			case "secondary", "2":
				level = 2
			case "tertiary", "3":
				level = 3
			case "":
			default:
				return fmt.Errorf("cldr: unknown level %q", r.Before)
			}
			v, err = r.value()
			if err == nil {
				err = p.Reset(v, level)
			}
		case "x":
			var context, extend string
			for _, r1 := range r.Any {
				v, err = r1.value()
				switch r1.XMLName.Local {
				case "context":
					context = v
				case "extend":
					extend = v
				}
			}
			for _, r1 := range r.Any {
				if t := r1.XMLName.Local; t == "context" || t == "extend" {
					continue
				}
				r1.rule.process(p, r1.XMLName.Local, context, extend)
			}
		default:
			err = r.rule.process(p, r.XMLName.Local, "", "")
		}
		if err != nil {
			return err
		}
	}
	return nil
}
Switch to dep for vendoring, update libraries Signed-off-by: Knut Ahlers <knut@ahlers.me> 2017-11-22 20:39:52 +00:00			`// Copyright 2013 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`package cldr`

			`import (`
			`"bufio"`
			`"encoding/xml"`
			`"errors"`
			`"fmt"`
			`"strconv"`
			`"strings"`
			`"unicode"`
			`"unicode/utf8"`
			`)`

			`// RuleProcessor can be passed to Collator's Process method, which`
			`// parses the rules and calls the respective method for each rule found.`
			`type RuleProcessor interface {`
			`Reset(anchor string, before int) error`
			`Insert(level int, str, context, extend string) error`
			`Index(id string)`
			`}`

			`const (`
			`// cldrIndex is a Unicode-reserved sentinel value used to mark the start`
			`// of a grouping within an index.`
			`// We ignore any rule that starts with this rune.`
			`// See http://unicode.org/reports/tr35/#Collation_Elements for details.`
			`cldrIndex = "\uFDD0"`

			`// specialAnchor is the format in which to represent logical reset positions,`
			`// such as "first tertiary ignorable".`
			`specialAnchor = "<%s/>"`
			`)`

			`// Process parses the rules for the tailorings of this collation`
			`// and calls the respective methods of p for each rule found.`
			`func (c Collation) Process(p RuleProcessor) (err error) {`
			`if len(c.Cr) > 0 {`
			`if len(c.Cr) > 1 {`
			`return fmt.Errorf("multiple cr elements, want 0 or 1")`
			`}`
			`return processRules(p, c.Cr[0].Data())`
			`}`
			`if c.Rules.Any != nil {`
			`return c.processXML(p)`
			`}`
			`return errors.New("no tailoring data")`
			`}`

			`// processRules parses rules in the Collation Rule Syntax defined in`
			`// http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings.`
			`func processRules(p RuleProcessor, s string) (err error) {`
			`chk := func(s string, e error) string {`
			`if err == nil {`
			`err = e`
			`}`
			`return s`
			`}`
			`i := 0 // Save the line number for use after the loop.`
			`scanner := bufio.NewScanner(strings.NewReader(s))`
			`for ; scanner.Scan() && err == nil; i++ {`
			`for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {`
			`level := 5`
			`var ch byte`
			`switch ch, s = s[0], s[1:]; ch {`
			`case '&': // followed by <anchor> or '[' <key> ']'`
			`if s = skipSpace(s); consume(&s, '[') {`
			`s = chk(parseSpecialAnchor(p, s))`
			`} else {`
			`s = chk(parseAnchor(p, 0, s))`
			`}`
			`case '<': // sort relation '<'{1,4}, optionally followed by '*'.`
			`for level = 1; consume(&s, '<'); level++ {`
			`}`
			`if level > 4 {`
			`err = fmt.Errorf("level %d > 4", level)`
			`}`
			`fallthrough`
			`case '=': // identity relation, optionally followed by *.`
			`if consume(&s, '*') {`
			`s = chk(parseSequence(p, level, s))`
			`} else {`
			`s = chk(parseOrder(p, level, s))`
			`}`
			`default:`
			`chk("", fmt.Errorf("illegal operator %q", ch))`
			`break`
			`}`
			`}`
			`}`
			`if chk("", scanner.Err()); err != nil {`
			`return fmt.Errorf("%d: %v", i, err)`
			`}`
			`return nil`
			`}`

			`// parseSpecialAnchor parses the anchor syntax which is either of the form`
			`// ['before' <level>] <anchor>`
			`// or`
			`// [<label>]`
			`// The starting should already be consumed.`
			`func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {`
			`i := strings.IndexByte(s, ']')`
			`if i == -1 {`
			`return "", errors.New("unmatched bracket")`
			`}`
			`a := strings.TrimSpace(s[:i])`
			`s = s[i+1:]`
			`if strings.HasPrefix(a, "before ") {`
			`l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)`
			`if err != nil {`
			`return s, err`
			`}`
			`return parseAnchor(p, int(l), s)`
			`}`
			`return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)`
			`}`

			`func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {`
			`anchor, s, err := scanString(s)`
			`if err != nil {`
			`return s, err`
			`}`
			`return s, p.Reset(anchor, level)`
			`}`

			`func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {`
			`var value, context, extend string`
			`if value, s, err = scanString(s); err != nil {`
			`return s, err`
			`}`
			`if strings.HasPrefix(value, cldrIndex) {`
			`p.Index(value[len(cldrIndex):])`
			`return`
			`}`
			`if consume(&s, '\|') {`
			`if context, s, err = scanString(s); err != nil {`
			`return s, errors.New("missing string after context")`
			`}`
			`}`
			`if consume(&s, '/') {`
			`if extend, s, err = scanString(s); err != nil {`
			`return s, errors.New("missing string after extension")`
			`}`
			`}`
			`return s, p.Insert(level, value, context, extend)`
			`}`

			`// scanString scans a single input string.`
			`func scanString(s string) (str, tail string, err error) {`
			`if s = skipSpace(s); s == "" {`
			`return s, s, errors.New("missing string")`
			`}`
			`buf := [16]byte{} // small but enough to hold most cases.`
			`value := buf[:0]`
			`for s != "" {`
			`if consume(&s, '\'') {`
			`i := strings.IndexByte(s, '\'')`
			`if i == -1 {`
			return "", "", errors.New(`unmatched single quote`)
			`}`
			`if i == 0 {`
			`value = append(value, '\'')`
			`} else {`
			`value = append(value, s[:i]...)`
			`}`
			`s = s[i+1:]`
			`continue`
			`}`
			`r, sz := utf8.DecodeRuneInString(s)`
			`if unicode.IsSpace(r) \|\| strings.ContainsRune("&<=#", r) {`
			`break`
			`}`
			`value = append(value, s[:sz]...)`
			`s = s[sz:]`
			`}`
			`return string(value), skipSpace(s), nil`
			`}`

			`func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {`
			`if s = skipSpace(s); s == "" {`
			`return s, errors.New("empty sequence")`
			`}`
			`last := rune(0)`
			`for s != "" {`
			`r, sz := utf8.DecodeRuneInString(s)`
			`s = s[sz:]`

			`if r == '-' {`
			`// We have a range. The first element was already written.`
			`if last == 0 {`
			`return s, errors.New("range without starter value")`
			`}`
			`r, sz = utf8.DecodeRuneInString(s)`
			`s = s[sz:]`
			`if r == utf8.RuneError \|\| r < last {`
			`return s, fmt.Errorf("invalid range %q-%q", last, r)`
			`}`
			`for i := last + 1; i <= r; i++ {`
			`if err := p.Insert(level, string(i), "", ""); err != nil {`
			`return s, err`
			`}`
			`}`
			`last = 0`
			`continue`
			`}`

			`if unicode.IsSpace(r) \|\| unicode.IsPunct(r) {`
			`break`
			`}`

			`// normal case`
			`if err := p.Insert(level, string(r), "", ""); err != nil {`
			`return s, err`
			`}`
			`last = r`
			`}`
			`return s, nil`
			`}`

			`func skipSpace(s string) string {`
			`return strings.TrimLeftFunc(s, unicode.IsSpace)`
			`}`

			`// consumes returns whether the next byte is ch. If so, it gobbles it by`
			`// updating s.`
			`func consume(s *string, ch byte) (ok bool) {`
			`if s == "" \|\| (s)[0] != ch {`
			`return false`
			`}`
			`s = (s)[1:]`
			`return true`
			`}`

			`// The following code parses Collation rules of CLDR version 24 and before.`

			`var lmap = map[byte]int{`
			`'p': 1,`
			`'s': 2,`
			`'t': 3,`
			`'i': 5,`
			`}`

			`type rulesElem struct {`
			`Rules struct {`
			`Common`
			`Any []*struct {`
			`XMLName xml.Name`
			`rule`
			} `xml:",any"`
			} `xml:"rules"`
			`}`

			`type rule struct {`
			Value string `xml:",chardata"`
			Before string `xml:"before,attr"`
			`Any []*struct {`
			`XMLName xml.Name`
			`rule`
			} `xml:",any"`
			`}`

			`var emptyValueError = errors.New("cldr: empty rule value")`

			`func (r *rule) value() (string, error) {`
			`// Convert hexadecimal Unicode codepoint notation to a string.`
			`s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)`
			`r.Value = s`
			`if s == "" {`
			`if len(r.Any) != 1 {`
			`return "", emptyValueError`
			`}`
			`r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)`
			`r.Any = nil`
			`} else if len(r.Any) != 0 {`
			`return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)`
			`}`
			`return r.Value, nil`
			`}`

			`func (r rule) process(p RuleProcessor, name, context, extend string) error {`
			`v, err := r.value()`
			`if err != nil {`
			`return err`
			`}`
			`switch name {`
			`case "p", "s", "t", "i":`
			`if strings.HasPrefix(v, cldrIndex) {`
			`p.Index(v[len(cldrIndex):])`
			`return nil`
			`}`
			`if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {`
			`return err`
			`}`
			`case "pc", "sc", "tc", "ic":`
			`level := lmap[name[0]]`
			`for _, s := range v {`
			`if err := p.Insert(level, string(s), context, extend); err != nil {`
			`return err`
			`}`
			`}`
			`default:`
			`return fmt.Errorf("cldr: unsupported tag: %q", name)`
			`}`
			`return nil`
			`}`

			`// processXML parses the format of CLDR versions 24 and older.`
			`func (c Collation) processXML(p RuleProcessor) (err error) {`
			`// Collation is generated and defined in xml.go.`
			`var v string`
			`for _, r := range c.Rules.Any {`
			`switch r.XMLName.Local {`
			`case "reset":`
			`level := 0`
			`switch r.Before {`
			`case "primary", "1":`
			`level = 1`
			`case "secondary", "2":`
			`level = 2`
			`case "tertiary", "3":`
			`level = 3`
			`case "":`
			`default:`
			`return fmt.Errorf("cldr: unknown level %q", r.Before)`
			`}`
			`v, err = r.value()`
			`if err == nil {`
			`err = p.Reset(v, level)`
			`}`
			`case "x":`
			`var context, extend string`
			`for _, r1 := range r.Any {`
			`v, err = r1.value()`
			`switch r1.XMLName.Local {`
			`case "context":`
			`context = v`
			`case "extend":`
			`extend = v`
			`}`
			`}`
			`for _, r1 := range r.Any {`
			`if t := r1.XMLName.Local; t == "context" \|\| t == "extend" {`
			`continue`
			`}`
			`r1.rule.process(p, r1.XMLName.Local, context, extend)`
			`}`
			`default:`
			`err = r.rule.process(p, r.XMLName.Local, "", "")`
			`}`
			`if err != nil {`
			`return err`
			`}`
			`}`
			`return nil`
			`}`