// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ignore

// Generator for display name tables.

package main

import (
	"bytes"
	"flag"
	"fmt"
	"log"
	"reflect"
	"sort"
	"strings"

	"golang.org/x/text/internal/gen"
	"golang.org/x/text/language"
	"golang.org/x/text/unicode/cldr"
)

var (
	test = flag.Bool("test", false,
		"test existing tables; can be used to compare web data with package data.")
	outputFile = flag.String("output", "tables.go", "output file")

	stats = flag.Bool("stats", false, "prints statistics to stderr")

	short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
	draft = flag.String("draft",
		"contributed",
		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
	pkg = flag.String("package",
		"display",
		"the name of the package in which the generated file is to be included")

	tags = newTagSet("tags",
		[]language.Tag{},
		"space-separated list of tags to include or empty for all")
	dict = newTagSet("dict",
		dictTags(),
		"space-separated list or tags for which to include a Dictionary. "+
			`"" means the common list from go.text/language.`)
)

func dictTags() (tag []language.Tag) {
	// TODO: replace with language.Common.Tags() once supported.
	const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
		"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
		"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
		"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
		"zh zh-Hans zh-Hant zu"

	for _, s := range strings.Split(str, " ") {
		tag = append(tag, language.MustParse(s))
	}
	return tag
}

func main() {
	gen.Init()

	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	d.SetDirFilter("main", "supplemental")
	d.SetSectionFilter("localeDisplayNames")
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "display")

	gen.WriteCLDRVersion(w)

	b := builder{
		w:     w,
		data:  data,
		group: make(map[string]*group),
	}
	b.generate()
}

const tagForm = language.All

// tagSet is used to parse command line flags of tags. It implements the
// flag.Value interface.
type tagSet map[language.Tag]bool

func newTagSet(name string, tags []language.Tag, usage string) tagSet {
	f := tagSet(make(map[language.Tag]bool))
	for _, t := range tags {
		f[t] = true
	}
	flag.Var(f, name, usage)
	return f
}

// String implements the String method of the flag.Value interface.
func (f tagSet) String() string {
	tags := []string{}
	for t := range f {
		tags = append(tags, t.String())
	}
	sort.Strings(tags)
	return strings.Join(tags, " ")
}

// Set implements Set from the flag.Value interface.
func (f tagSet) Set(s string) error {
	if s != "" {
		for _, s := range strings.Split(s, " ") {
			if s != "" {
				tag, err := tagForm.Parse(s)
				if err != nil {
					return err
				}
				f[tag] = true
			}
		}
	}
	return nil
}

func (f tagSet) contains(t language.Tag) bool {
	if len(f) == 0 {
		return true
	}
	return f[t]
}

// builder is used to create all tables with display name information.
type builder struct {
	w *gen.CodeWriter

	data *cldr.CLDR

	fromLocs []string

	// destination tags for the current locale.
	toTags     []string
	toTagIndex map[string]int

	// list of supported tags
	supported []language.Tag

	// key-value pairs per group
	group map[string]*group

	// statistics
	sizeIndex int // total size of all indexes of headers
	sizeData  int // total size of all data of headers
	totalSize int
}

type group struct {
	// Maps from a given language to the Namer data for this language.
	lang    map[language.Tag]keyValues
	headers []header

	toTags        []string
	threeStart    int
	fourPlusStart int
}

// set sets the typ to the name for locale loc.
func (g *group) set(t language.Tag, typ, name string) {
	kv := g.lang[t]
	if kv == nil {
		kv = make(keyValues)
		g.lang[t] = kv
	}
	if kv[typ] == "" {
		kv[typ] = name
	}
}

type keyValues map[string]string

type header struct {
	tag   language.Tag
	data  string
	index []uint16
}

var versionInfo = `// Version is deprecated. Use CLDRVersion.
const Version = %#v

`

var self = language.MustParse("mul")

// generate builds and writes all tables.
func (b *builder) generate() {
	fmt.Fprintf(b.w, versionInfo, cldr.Version)

	b.filter()
	b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
		if ldn.Languages != nil {
			for _, v := range ldn.Languages.Language {
				lang := v.Type
				if lang == "root" {
					// We prefer the data from "und"
					// TODO: allow both the data for root and und somehow.
					continue
				}
				tag := tagForm.MustParse(lang)
				if tags.contains(tag) {
					g.set(loc, tag.String(), v.Data())
				}
			}
		}
	})
	b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
		if ldn.Scripts != nil {
			for _, v := range ldn.Scripts.Script {
				code := language.MustParseScript(v.Type)
				if code.IsPrivateUse() { // Qaaa..Qabx
					// TODO: data currently appears to be very meager.
					// Reconsider if we have data for English.
					if loc == language.English {
						log.Fatal("Consider including data for private use scripts.")
					}
					continue
				}
				g.set(loc, code.String(), v.Data())
			}
		}
	})
	b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
		if ldn.Territories != nil {
			for _, v := range ldn.Territories.Territory {
				g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
			}
		}
	})

	b.makeSupported()

	b.writeParents()

	b.writeGroup("lang")
	b.writeGroup("script")
	b.writeGroup("region")

	b.w.WriteConst("numSupported", len(b.supported))
	buf := bytes.Buffer{}
	for _, tag := range b.supported {
		fmt.Fprint(&buf, tag.String(), "|")
	}
	b.w.WriteConst("supported", buf.String())

	b.writeDictionaries()

	b.supported = []language.Tag{self}

	// Compute the names of locales in their own language. Some of these names
	// may be specified in their parent locales. We iterate the maximum depth
	// of the parent three times to match successive parents of tags until a
	// possible match is found.
	for i := 0; i < 4; i++ {
		b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
			parent := tag
			if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
				parent, _ = language.Raw.Compose(b)
			}
			if ldn.Languages != nil {
				for _, v := range ldn.Languages.Language {
					key := tagForm.MustParse(v.Type)
					saved := key
					if key == parent {
						g.set(self, tag.String(), v.Data())
					}
					for k := 0; k < i; k++ {
						key = key.Parent()
					}
					if key == tag {
						g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
					}
				}
			}
		})
	}

	b.writeGroup("self")
}

func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
	b.sizeIndex = 0
	b.sizeData = 0
	b.toTags = nil
	b.fromLocs = nil
	b.toTagIndex = make(map[string]int)

	g := b.group[name]
	if g == nil {
		g = &group{lang: make(map[language.Tag]keyValues)}
		b.group[name] = g
	}
	for _, loc := range b.data.Locales() {
		// We use RawLDML instead of LDML as we are managing our own inheritance
		// in this implementation.
		ldml := b.data.RawLDML(loc)

		// We do not support the POSIX variant (it is not a supported BCP 47
		// variant). This locale also doesn't happen to contain any data, so
		// we'll skip it by checking for this.
		tag, err := tagForm.Parse(loc)
		if err != nil {
			if ldml.LocaleDisplayNames != nil {
				log.Fatalf("setData: %v", err)
			}
			continue
		}
		if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
			f(g, tag, ldml.LocaleDisplayNames)
		}
	}
}

func (b *builder) filter() {
	filter := func(s *cldr.Slice) {
		if *short {
			s.SelectOnePerGroup("alt", []string{"short", ""})
		} else {
			s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
		}
		d, err := cldr.ParseDraft(*draft)
		if err != nil {
			log.Fatalf("filter: %v", err)
		}
		s.SelectDraft(d)
	}
	for _, loc := range b.data.Locales() {
		if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
			if ldn.Languages != nil {
				s := cldr.MakeSlice(&ldn.Languages.Language)
				if filter(&s); len(ldn.Languages.Language) == 0 {
					ldn.Languages = nil
				}
			}
			if ldn.Scripts != nil {
				s := cldr.MakeSlice(&ldn.Scripts.Script)
				if filter(&s); len(ldn.Scripts.Script) == 0 {
					ldn.Scripts = nil
				}
			}
			if ldn.Territories != nil {
				s := cldr.MakeSlice(&ldn.Territories.Territory)
				if filter(&s); len(ldn.Territories.Territory) == 0 {
					ldn.Territories = nil
				}
			}
		}
	}
}

// makeSupported creates a list of all supported locales.
func (b *builder) makeSupported() {
	// tags across groups
	for _, g := range b.group {
		for t, _ := range g.lang {
			b.supported = append(b.supported, t)
		}
	}
	b.supported = b.supported[:unique(tagsSorter(b.supported))]

}

type tagsSorter []language.Tag

func (a tagsSorter) Len() int           { return len(a) }
func (a tagsSorter) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }

func (b *builder) writeGroup(name string) {
	g := b.group[name]

	for _, kv := range g.lang {
		for t, _ := range kv {
			g.toTags = append(g.toTags, t)
		}
	}
	g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]

	// Allocate header per supported value.
	g.headers = make([]header, len(b.supported))
	for i, sup := range b.supported {
		kv, ok := g.lang[sup]
		if !ok {
			g.headers[i].tag = sup
			continue
		}
		data := []byte{}
		index := make([]uint16, len(g.toTags), len(g.toTags)+1)
		for j, t := range g.toTags {
			index[j] = uint16(len(data))
			data = append(data, kv[t]...)
		}
		index = append(index, uint16(len(data)))

		// Trim the tail of the index.
		// TODO: indexes can be reduced in size quite a bit more.
		n := len(index)
		for ; n >= 2 && index[n-2] == index[n-1]; n-- {
		}
		index = index[:n]

		// Workaround for a bug in CLDR 26.
		// See http://unicode.org/cldr/trac/ticket/8042.
		if cldr.Version == "26" && sup.String() == "hsb" {
			data = bytes.Replace(data, []byte{'"'}, nil, 1)
		}
		g.headers[i] = header{sup, string(data), index}
	}
	g.writeTable(b.w, name)
}

type tagsBySize []string

func (l tagsBySize) Len() int      { return len(l) }
func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l tagsBySize) Less(i, j int) bool {
	a, b := l[i], l[j]
	// Sort single-tag entries based on size first. Otherwise alphabetic.
	if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
		return len(a) < len(b)
	}
	return a < b
}

// parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
// of tags[i].
func parentIndices(tags []language.Tag) []int16 {
	index := make(map[language.Tag]int16)
	for i, t := range tags {
		index[t] = int16(i)
	}

	// Construct default parents.
	parents := make([]int16, len(tags))
	for i, t := range tags {
		parents[i] = -1
		for t = t.Parent(); t != language.Und; t = t.Parent() {
			if j, ok := index[t]; ok {
				parents[i] = j
				break
			}
		}
	}
	return parents
}

func (b *builder) writeParents() {
	parents := parentIndices(b.supported)
	fmt.Fprintf(b.w, "var parents = ")
	b.w.WriteArray(parents)
}

// writeKeys writes keys to a special index used by the display package.
// tags are assumed to be sorted by length.
func writeKeys(w *gen.CodeWriter, name string, keys []string) {
	w.Size += int(3 * reflect.TypeOf("").Size())
	w.WriteComment("Number of keys: %d", len(keys))
	fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
	for i := 2; i <= 4; i++ {
		sub := []string{}
		for _, t := range keys {
			if len(t) != i {
				break
			}
			sub = append(sub, t)
		}
		s := strings.Join(sub, "")
		w.WriteString(s)
		fmt.Fprintf(w, ",\n")
		keys = keys[len(sub):]
	}
	fmt.Fprintln(w, "\t}")
	if len(keys) > 0 {
		w.Size += int(reflect.TypeOf([]string{}).Size())
		fmt.Fprintf(w, "\t%sTagsLong = ", name)
		w.WriteSlice(keys)
	}
	fmt.Fprintln(w, ")\n")
}

// identifier creates an identifier from the given tag.
func identifier(t language.Tag) string {
	return strings.Replace(t.String(), "-", "", -1)
}

func (h *header) writeEntry(w *gen.CodeWriter, name string) {
	if len(dict) > 0 && dict.contains(h.tag) {
		fmt.Fprintf(w, "\t{ // %s\n", h.tag)
		fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
		fmt.Fprintln(w, "\t},")
	} else if len(h.data) == 0 {
		fmt.Fprintln(w, "\t\t{}, //", h.tag)
	} else {
		fmt.Fprintf(w, "\t{ // %s\n", h.tag)
		w.WriteString(h.data)
		fmt.Fprintln(w, ",")
		w.WriteSlice(h.index)
		fmt.Fprintln(w, ",\n\t},")
	}
}

// write the data for the given header as single entries. The size for this data
// was already accounted for in writeEntry.
func (h *header) writeSingle(w *gen.CodeWriter, name string) {
	if len(dict) > 0 && dict.contains(h.tag) {
		tag := identifier(h.tag)
		w.WriteConst(tag+name+"Str", h.data)

		// Note that we create a slice instead of an array. If we use an array
		// we need to refer to it as a[:] in other tables, which will cause the
		// array to always be included by the linker. See Issue 7651.
		w.WriteVar(tag+name+"Idx", h.index)
	}
}

// WriteTable writes an entry for a single Namer.
func (g *group) writeTable(w *gen.CodeWriter, name string) {
	start := w.Size
	writeKeys(w, name, g.toTags)
	w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())

	fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))

	title := strings.Title(name)
	for _, h := range g.headers {
		h.writeEntry(w, title)
	}
	fmt.Fprintln(w, "}\n")

	for _, h := range g.headers {
		h.writeSingle(w, title)
	}
	n := w.Size - start
	fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
}

func (b *builder) writeDictionaries() {
	fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
	fmt.Fprintln(b.w, "var (")
	parents := parentIndices(b.supported)

	for i, t := range b.supported {
		if dict.contains(t) {
			ident := identifier(t)
			fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
			if p := parents[i]; p == -1 {
				fmt.Fprintln(b.w, "\t\tnil,")
			} else {
				fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
			}
			fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
			fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
			fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
			fmt.Fprintln(b.w, "\t}")
		}
	}
	fmt.Fprintln(b.w, ")")

	var s string
	var a []uint16
	sz := reflect.TypeOf(s).Size()
	sz += reflect.TypeOf(a).Size()
	sz *= 3
	sz += reflect.TypeOf(&a).Size()
	n := int(sz) * len(dict)
	fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)

	b.w.Size += n
}

// unique sorts the given lists and removes duplicate entries by swapping them
// past position k, where k is the number of unique values. It returns k.
func unique(a sort.Interface) int {
	if a.Len() == 0 {
		return 0
	}
	sort.Sort(a)
	k := 1
	for i := 1; i < a.Len(); i++ {
		if a.Less(k-1, i) {
			if k != i {
				a.Swap(k, i)
			}
			k++
		}
	}
	return k
}