mondash/vendor/golang.org/x/text/internal/triegen/triegen.go

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package triegen implements a code generator for a trie for associating
// unsigned integer values with UTF-8 encoded runes.
//
// Many of the go.text packages use tries for storing per-rune information.  A
// trie is especially useful if many of the runes have the same value. If this
// is the case, many blocks can be expected to be shared allowing for
// information on many runes to be stored in little space.
//
// As most of the lookups are done directly on []byte slices, the tries use the
// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
// runes and contributes a little bit to better performance. It also naturally
// provides a fast path for ASCII.
//
// Space is also an issue. There are many code points defined in Unicode and as
// a result tables can get quite large. So every byte counts. The triegen
// package automatically chooses the smallest integer values to represent the
// tables. Compacters allow further compression of the trie by allowing for
// alternative representations of individual trie blocks.
//
// triegen allows generating multiple tries as a single structure. This is
// useful when, for example, one wants to generate tries for several languages
// that have a lot of values in common. Some existing libraries for
// internationalization store all per-language data as a dynamically loadable
// chunk. The go.text packages are designed with the assumption that the user
// typically wants to compile in support for all supported languages, in line
// with the approach common to Go to create a single standalone binary. The
// multi-root trie approach can give significant storage savings in this
// scenario.
//
// triegen generates both tables and code. The code is optimized to use the
// automatically chosen data types. The following code is generated for a Trie
// or multiple Tries named "foo":
//	- type fooTrie
//		The trie type.
//
//	- func newFooTrie(x int) *fooTrie
//		Trie constructor, where x is the index of the trie passed to Gen.
//
//	- func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
//		The lookup method, where uintX is automatically chosen.
//
//	- func lookupString, lookupUnsafe and lookupStringUnsafe
//		Variants of the above.
//
//	- var fooValues and fooIndex and any tables generated by Compacters.
//		The core trie data.
//
//	- var fooTrieHandles
//		Indexes of starter blocks in case of multiple trie roots.
//
// It is recommended that users test the generated trie by checking the returned
// value for every rune. Such exhaustive tests are possible as the the number of
// runes in Unicode is limited.
package triegen // import "golang.org/x/text/internal/triegen"

// TODO: Arguably, the internally optimized data types would not have to be
// exposed in the generated API. We could also investigate not generating the
// code, but using it through a package. We would have to investigate the impact
// on performance of making such change, though. For packages like unicode/norm,
// small changes like this could tank performance.

import (
	"encoding/binary"
	"fmt"
	"hash/crc64"
	"io"
	"log"
	"unicode/utf8"
)

// builder builds a set of tries for associating values with runes. The set of
// tries can share common index and value blocks.
type builder struct {
	Name string

	// ValueType is the type of the trie values looked up.
	ValueType string

	// ValueSize is the byte size of the ValueType.
	ValueSize int

	// IndexType is the type of trie index values used for all UTF-8 bytes of
	// a rune except the last one.
	IndexType string

	// IndexSize is the byte size of the IndexType.
	IndexSize int

	// SourceType is used when generating the lookup functions. If the user
	// requests StringSupport, all lookup functions will be generated for
	// string input as well.
	SourceType string

	Trie []*Trie

	IndexBlocks []*node
	ValueBlocks [][]uint64
	Compactions []compaction
	Checksum    uint64

	ASCIIBlock   string
	StarterBlock string

	indexBlockIdx map[uint64]int
	valueBlockIdx map[uint64]nodeIndex
	asciiBlockIdx map[uint64]int

	// Stats are used to fill out the template.
	Stats struct {
		NValueEntries int
		NValueBytes   int
		NIndexEntries int
		NIndexBytes   int
		NHandleBytes  int
	}

	err error
}

// A nodeIndex encodes the index of a node, which is defined by the compaction
// which stores it and an index within the compaction. For internal nodes, the
// compaction is always 0.
type nodeIndex struct {
	compaction int
	index      int
}

// compaction keeps track of stats used for the compaction.
type compaction struct {
	c         Compacter
	blocks    []*node
	maxHandle uint32
	totalSize int

	// Used by template-based generator and thus exported.
	Cutoff  uint32
	Offset  uint32
	Handler string
}

func (b *builder) setError(err error) {
	if b.err == nil {
		b.err = err
	}
}

// An Option can be passed to Gen.
type Option func(b *builder) error

// Compact configures the trie generator to use the given Compacter.
func Compact(c Compacter) Option {
	return func(b *builder) error {
		b.Compactions = append(b.Compactions, compaction{
			c:       c,
			Handler: c.Handler() + "(n, b)"})
		return nil
	}
}

// Gen writes Go code for a shared trie lookup structure to w for the given
// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
// return the *nameTrie for tries[x]. A value can be looked up by using one of
// the various lookup methods defined on nameTrie. It returns the table size of
// the generated trie.
func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
	// The index contains two dummy blocks, followed by the zero block. The zero
	// block is at offset 0x80, so that the offset for the zero block for
	// continuation bytes is 0.
	b := &builder{
		Name:        name,
		Trie:        tries,
		IndexBlocks: []*node{{}, {}, {}},
		Compactions: []compaction{{
			Handler: name + "Values[n<<6+uint32(b)]",
		}},
		// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
		// block.
		indexBlockIdx: map[uint64]int{0: 0},
		valueBlockIdx: map[uint64]nodeIndex{0: {}},
		asciiBlockIdx: map[uint64]int{},
	}
	b.Compactions[0].c = (*simpleCompacter)(b)

	for _, f := range opts {
		if err := f(b); err != nil {
			return 0, err
		}
	}
	b.build()
	if b.err != nil {
		return 0, b.err
	}
	if err = b.print(w); err != nil {
		return 0, err
	}
	return b.Size(), nil
}

// A Trie represents a single root node of a trie. A builder may build several
// overlapping tries at once.
type Trie struct {
	root *node

	hiddenTrie
}

// hiddenTrie contains values we want to be visible to the template generator,
// but hidden from the API documentation.
type hiddenTrie struct {
	Name         string
	Checksum     uint64
	ASCIIIndex   int
	StarterIndex int
}

// NewTrie returns a new trie root.
func NewTrie(name string) *Trie {
	return &Trie{
		&node{
			children: make([]*node, blockSize),
			values:   make([]uint64, utf8.RuneSelf),
		},
		hiddenTrie{Name: name},
	}
}

// Gen is a convenience wrapper around the Gen func passing t as the only trie
// and uses the name passed to NewTrie. It returns the size of the generated
// tables.
func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
	return Gen(w, t.Name, []*Trie{t}, opts...)
}

// node is a node of the intermediate trie structure.
type node struct {
	// children holds this node's children. It is always of length 64.
	// A child node may be nil.
	children []*node

	// values contains the values of this node. If it is non-nil, this node is
	// either a root or leaf node:
	// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
	// For leaf nodes, len(values) ==  64 and it maps the bytes in [0x80, 0xBF].
	values []uint64

	index nodeIndex
}

// Insert associates value with the given rune. Insert will panic if a non-zero
// value is passed for an invalid rune.
func (t *Trie) Insert(r rune, value uint64) {
	if value == 0 {
		return
	}
	s := string(r)
	if []rune(s)[0] != r && value != 0 {
		// Note: The UCD tables will always assign what amounts to a zero value
		// to a surrogate. Allowing a zero value for an illegal rune allows
		// users to iterate over [0..MaxRune] without having to explicitly
		// exclude surrogates, which would be tedious.
		panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
	}
	if len(s) == 1 {
		// It is a root node value (ASCII).
		t.root.values[s[0]] = value
		return
	}

	n := t.root
	for ; len(s) > 1; s = s[1:] {
		if n.children == nil {
			n.children = make([]*node, blockSize)
		}
		p := s[0] % blockSize
		c := n.children[p]
		if c == nil {
			c = &node{}
			n.children[p] = c
		}
		if len(s) > 2 && c.values != nil {
			log.Fatalf("triegen: insert(%U): found internal node with values", r)
		}
		n = c
	}
	if n.values == nil {
		n.values = make([]uint64, blockSize)
	}
	if n.children != nil {
		log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
	}
	n.values[s[0]-0x80] = value
}

// Size returns the number of bytes the generated trie will take to store. It
// needs to be exported as it is used in the templates.
func (b *builder) Size() int {
	// Index blocks.
	sz := len(b.IndexBlocks) * blockSize * b.IndexSize

	// Skip the first compaction, which represents the normal value blocks, as
	// its totalSize does not account for the ASCII blocks, which are managed
	// separately.
	sz += len(b.ValueBlocks) * blockSize * b.ValueSize
	for _, c := range b.Compactions[1:] {
		sz += c.totalSize
	}

	// TODO: this computation does not account for the fixed overhead of a using
	// a compaction, either code or data. As for data, though, the typical
	// overhead of data is in the order of bytes (2 bytes for cases). Further,
	// the savings of using a compaction should anyway be substantial for it to
	// be worth it.

	// For multi-root tries, we also need to account for the handles.
	if len(b.Trie) > 1 {
		sz += 2 * b.IndexSize * len(b.Trie)
	}
	return sz
}

func (b *builder) build() {
	// Compute the sizes of the values.
	var vmax uint64
	for _, t := range b.Trie {
		vmax = maxValue(t.root, vmax)
	}
	b.ValueType, b.ValueSize = getIntType(vmax)

	// Compute all block allocations.
	// TODO: first compute the ASCII blocks for all tries and then the other
	// nodes. ASCII blocks are more restricted in placement, as they require two
	// blocks to be placed consecutively. Processing them first may improve
	// sharing (at least one zero block can be expected to be saved.)
	for _, t := range b.Trie {
		b.Checksum += b.buildTrie(t)
	}

	// Compute the offsets for all the Compacters.
	offset := uint32(0)
	for i := range b.Compactions {
		c := &b.Compactions[i]
		c.Offset = offset
		offset += c.maxHandle + 1
		c.Cutoff = offset
	}

	// Compute the sizes of indexes.
	// TODO: different byte positions could have different sizes. So far we have
	// not found a case where this is beneficial.
	imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
	for _, ib := range b.IndexBlocks {
		if x := uint64(ib.index.index); x > imax {
			imax = x
		}
	}
	b.IndexType, b.IndexSize = getIntType(imax)
}

func maxValue(n *node, max uint64) uint64 {
	if n == nil {
		return max
	}
	for _, c := range n.children {
		max = maxValue(c, max)
	}
	for _, v := range n.values {
		if max < v {
			max = v
		}
	}
	return max
}

func getIntType(v uint64) (string, int) {
	switch {
	case v < 1<<8:
		return "uint8", 1
	case v < 1<<16:
		return "uint16", 2
	case v < 1<<32:
		return "uint32", 4
	}
	return "uint64", 8
}

const (
	blockSize = 64

	// Subtract two blocks to offset 0x80, the first continuation byte.
	blockOffset = 2

	// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
	rootBlockOffset = 3
)

var crcTable = crc64.MakeTable(crc64.ISO)

func (b *builder) buildTrie(t *Trie) uint64 {
	n := t.root

	// Get the ASCII offset. For the first trie, the ASCII block will be at
	// position 0.
	hasher := crc64.New(crcTable)
	binary.Write(hasher, binary.BigEndian, n.values)
	hash := hasher.Sum64()

	v, ok := b.asciiBlockIdx[hash]
	if !ok {
		v = len(b.ValueBlocks)
		b.asciiBlockIdx[hash] = v

		b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
		if v == 0 {
			// Add the zero block at position 2 so that it will be assigned a
			// zero reference in the lookup blocks.
			// TODO: always do this? This would allow us to remove a check from
			// the trie lookup, but at the expense of extra space. Analyze
			// performance for unicode/norm.
			b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
		}
	}
	t.ASCIIIndex = v

	// Compute remaining offsets.
	t.Checksum = b.computeOffsets(n, true)
	// We already subtracted the normal blockOffset from the index. Subtract the
	// difference for starter bytes.
	t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
	return t.Checksum
}

func (b *builder) computeOffsets(n *node, root bool) uint64 {
	// For the first trie, the root lookup block will be at position 3, which is
	// the offset for UTF-8 non-ASCII starter bytes.
	first := len(b.IndexBlocks) == rootBlockOffset
	if first {
		b.IndexBlocks = append(b.IndexBlocks, n)
	}

	// We special-case the cases where all values recursively are 0. This allows
	// for the use of a zero block to which all such values can be directed.
	hash := uint64(0)
	if n.children != nil || n.values != nil {
		hasher := crc64.New(crcTable)
		for _, c := range n.children {
			var v uint64
			if c != nil {
				v = b.computeOffsets(c, false)
			}
			binary.Write(hasher, binary.BigEndian, v)
		}
		binary.Write(hasher, binary.BigEndian, n.values)
		hash = hasher.Sum64()
	}

	if first {
		b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
	}

	// Compacters don't apply to internal nodes.
	if n.children != nil {
		v, ok := b.indexBlockIdx[hash]
		if !ok {
			v = len(b.IndexBlocks) - blockOffset
			b.IndexBlocks = append(b.IndexBlocks, n)
			b.indexBlockIdx[hash] = v
		}
		n.index = nodeIndex{0, v}
	} else {
		h, ok := b.valueBlockIdx[hash]
		if !ok {
			bestI, bestSize := 0, blockSize*b.ValueSize
			for i, c := range b.Compactions[1:] {
				if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
					bestI, bestSize = i+1, sz
				}
			}
			c := &b.Compactions[bestI]
			c.totalSize += bestSize
			v := c.c.Store(n.values)
			if c.maxHandle < v {
				c.maxHandle = v
			}
			h = nodeIndex{bestI, int(v)}
			b.valueBlockIdx[hash] = h
		}
		n.index = h
	}
	return hash
}
Switch to dep for vendoring, update libraries Signed-off-by: Knut Ahlers <knut@ahlers.me> 2017-11-22 20:39:52 +00:00			`// Copyright 2014 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Package triegen implements a code generator for a trie for associating`
			`// unsigned integer values with UTF-8 encoded runes.`
			`//`
			`// Many of the go.text packages use tries for storing per-rune information. A`
			`// trie is especially useful if many of the runes have the same value. If this`
			`// is the case, many blocks can be expected to be shared allowing for`
			`// information on many runes to be stored in little space.`
			`//`
			`// As most of the lookups are done directly on []byte slices, the tries use the`
			`// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to`
			`// runes and contributes a little bit to better performance. It also naturally`
			`// provides a fast path for ASCII.`
			`//`
			`// Space is also an issue. There are many code points defined in Unicode and as`
			`// a result tables can get quite large. So every byte counts. The triegen`
			`// package automatically chooses the smallest integer values to represent the`
			`// tables. Compacters allow further compression of the trie by allowing for`
			`// alternative representations of individual trie blocks.`
			`//`
			`// triegen allows generating multiple tries as a single structure. This is`
			`// useful when, for example, one wants to generate tries for several languages`
			`// that have a lot of values in common. Some existing libraries for`
			`// internationalization store all per-language data as a dynamically loadable`
			`// chunk. The go.text packages are designed with the assumption that the user`
			`// typically wants to compile in support for all supported languages, in line`
			`// with the approach common to Go to create a single standalone binary. The`
			`// multi-root trie approach can give significant storage savings in this`
			`// scenario.`
			`//`
			`// triegen generates both tables and code. The code is optimized to use the`
			`// automatically chosen data types. The following code is generated for a Trie`
			`// or multiple Tries named "foo":`
			`// - type fooTrie`
			`// The trie type.`
			`//`
			`// - func newFooTrie(x int) *fooTrie`
			`// Trie constructor, where x is the index of the trie passed to Gen.`
			`//`
			`// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int)`
			`// The lookup method, where uintX is automatically chosen.`
			`//`
			`// - func lookupString, lookupUnsafe and lookupStringUnsafe`
			`// Variants of the above.`
			`//`
			`// - var fooValues and fooIndex and any tables generated by Compacters.`
			`// The core trie data.`
			`//`
			`// - var fooTrieHandles`
			`// Indexes of starter blocks in case of multiple trie roots.`
			`//`
			`// It is recommended that users test the generated trie by checking the returned`
			`// value for every rune. Such exhaustive tests are possible as the the number of`
			`// runes in Unicode is limited.`
			`package triegen // import "golang.org/x/text/internal/triegen"`

			`// TODO: Arguably, the internally optimized data types would not have to be`
			`// exposed in the generated API. We could also investigate not generating the`
			`// code, but using it through a package. We would have to investigate the impact`
			`// on performance of making such change, though. For packages like unicode/norm,`
			`// small changes like this could tank performance.`

			`import (`
			`"encoding/binary"`
			`"fmt"`
			`"hash/crc64"`
			`"io"`
			`"log"`
			`"unicode/utf8"`
			`)`

			`// builder builds a set of tries for associating values with runes. The set of`
			`// tries can share common index and value blocks.`
			`type builder struct {`
			`Name string`

			`// ValueType is the type of the trie values looked up.`
			`ValueType string`

			`// ValueSize is the byte size of the ValueType.`
			`ValueSize int`

			`// IndexType is the type of trie index values used for all UTF-8 bytes of`
			`// a rune except the last one.`
			`IndexType string`

			`// IndexSize is the byte size of the IndexType.`
			`IndexSize int`

			`// SourceType is used when generating the lookup functions. If the user`
			`// requests StringSupport, all lookup functions will be generated for`
			`// string input as well.`
			`SourceType string`

			`Trie []*Trie`

			`IndexBlocks []*node`
			`ValueBlocks [][]uint64`
			`Compactions []compaction`
			`Checksum uint64`

			`ASCIIBlock string`
			`StarterBlock string`

			`indexBlockIdx map[uint64]int`
			`valueBlockIdx map[uint64]nodeIndex`
			`asciiBlockIdx map[uint64]int`

			`// Stats are used to fill out the template.`
			`Stats struct {`
			`NValueEntries int`
			`NValueBytes int`
			`NIndexEntries int`
			`NIndexBytes int`
			`NHandleBytes int`
			`}`

			`err error`
			`}`

			`// A nodeIndex encodes the index of a node, which is defined by the compaction`
			`// which stores it and an index within the compaction. For internal nodes, the`
			`// compaction is always 0.`
			`type nodeIndex struct {`
			`compaction int`
			`index int`
			`}`

			`// compaction keeps track of stats used for the compaction.`
			`type compaction struct {`
			`c Compacter`
			`blocks []*node`
			`maxHandle uint32`
			`totalSize int`

			`// Used by template-based generator and thus exported.`
			`Cutoff uint32`
			`Offset uint32`
			`Handler string`
			`}`

			`func (b *builder) setError(err error) {`
			`if b.err == nil {`
			`b.err = err`
			`}`
			`}`

			`// An Option can be passed to Gen.`
			`type Option func(b *builder) error`

			`// Compact configures the trie generator to use the given Compacter.`
			`func Compact(c Compacter) Option {`
			`return func(b *builder) error {`
			`b.Compactions = append(b.Compactions, compaction{`
			`c: c,`
			`Handler: c.Handler() + "(n, b)"})`
			`return nil`
			`}`
			`}`

			`// Gen writes Go code for a shared trie lookup structure to w for the given`
			`// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will`
			`// return the *nameTrie for tries[x]. A value can be looked up by using one of`
			`// the various lookup methods defined on nameTrie. It returns the table size of`
			`// the generated trie.`
			`func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {`
			`// The index contains two dummy blocks, followed by the zero block. The zero`
			`// block is at offset 0x80, so that the offset for the zero block for`
			`// continuation bytes is 0.`
			`b := &builder{`
			`Name: name,`
			`Trie: tries,`
			`IndexBlocks: []*node{{}, {}, {}},`
			`Compactions: []compaction{{`
			`Handler: name + "Values[n<<6+uint32(b)]",`
			`}},`
			`// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero`
			`// block.`
			`indexBlockIdx: map[uint64]int{0: 0},`
			`valueBlockIdx: map[uint64]nodeIndex{0: {}},`
			`asciiBlockIdx: map[uint64]int{},`
			`}`
			`b.Compactions[0].c = (*simpleCompacter)(b)`

			`for _, f := range opts {`
			`if err := f(b); err != nil {`
			`return 0, err`
			`}`
			`}`
			`b.build()`
			`if b.err != nil {`
			`return 0, b.err`
			`}`
			`if err = b.print(w); err != nil {`
			`return 0, err`
			`}`
			`return b.Size(), nil`
			`}`

			`// A Trie represents a single root node of a trie. A builder may build several`
			`// overlapping tries at once.`
			`type Trie struct {`
			`root *node`

			`hiddenTrie`
			`}`

			`// hiddenTrie contains values we want to be visible to the template generator,`
			`// but hidden from the API documentation.`
			`type hiddenTrie struct {`
			`Name string`
			`Checksum uint64`
			`ASCIIIndex int`
			`StarterIndex int`
			`}`

			`// NewTrie returns a new trie root.`
			`func NewTrie(name string) *Trie {`
			`return &Trie{`
			`&node{`
			`children: make([]*node, blockSize),`
			`values: make([]uint64, utf8.RuneSelf),`
			`},`
			`hiddenTrie{Name: name},`
			`}`
			`}`

			`// Gen is a convenience wrapper around the Gen func passing t as the only trie`
			`// and uses the name passed to NewTrie. It returns the size of the generated`
			`// tables.`
			`func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {`
			`return Gen(w, t.Name, []*Trie{t}, opts...)`
			`}`

			`// node is a node of the intermediate trie structure.`
			`type node struct {`
			`// children holds this node's children. It is always of length 64.`
			`// A child node may be nil.`
			`children []*node`

			`// values contains the values of this node. If it is non-nil, this node is`
			`// either a root or leaf node:`
			`// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].`
			`// For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF].`
			`values []uint64`

			`index nodeIndex`
			`}`

			`// Insert associates value with the given rune. Insert will panic if a non-zero`
			`// value is passed for an invalid rune.`
			`func (t *Trie) Insert(r rune, value uint64) {`
			`if value == 0 {`
			`return`
			`}`
			`s := string(r)`
			`if []rune(s)[0] != r && value != 0 {`
			`// Note: The UCD tables will always assign what amounts to a zero value`
			`// to a surrogate. Allowing a zero value for an illegal rune allows`
			`// users to iterate over [0..MaxRune] without having to explicitly`
			`// exclude surrogates, which would be tedious.`
			`panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))`
			`}`
			`if len(s) == 1 {`
			`// It is a root node value (ASCII).`
			`t.root.values[s[0]] = value`
			`return`
			`}`

			`n := t.root`
			`for ; len(s) > 1; s = s[1:] {`
			`if n.children == nil {`
			`n.children = make([]*node, blockSize)`
			`}`
			`p := s[0] % blockSize`
			`c := n.children[p]`
			`if c == nil {`
			`c = &node{}`
			`n.children[p] = c`
			`}`
			`if len(s) > 2 && c.values != nil {`
			`log.Fatalf("triegen: insert(%U): found internal node with values", r)`
			`}`
			`n = c`
			`}`
			`if n.values == nil {`
			`n.values = make([]uint64, blockSize)`
			`}`
			`if n.children != nil {`
			`log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)`
			`}`
			`n.values[s[0]-0x80] = value`
			`}`

			`// Size returns the number of bytes the generated trie will take to store. It`
			`// needs to be exported as it is used in the templates.`
			`func (b *builder) Size() int {`
			`// Index blocks.`
			`sz := len(b.IndexBlocks) * blockSize * b.IndexSize`

			`// Skip the first compaction, which represents the normal value blocks, as`
			`// its totalSize does not account for the ASCII blocks, which are managed`
			`// separately.`
			`sz += len(b.ValueBlocks) * blockSize * b.ValueSize`
			`for _, c := range b.Compactions[1:] {`
			`sz += c.totalSize`
			`}`

			`// TODO: this computation does not account for the fixed overhead of a using`
			`// a compaction, either code or data. As for data, though, the typical`
			`// overhead of data is in the order of bytes (2 bytes for cases). Further,`
			`// the savings of using a compaction should anyway be substantial for it to`
			`// be worth it.`

			`// For multi-root tries, we also need to account for the handles.`
			`if len(b.Trie) > 1 {`
			`sz += 2 * b.IndexSize * len(b.Trie)`
			`}`
			`return sz`
			`}`

			`func (b *builder) build() {`
			`// Compute the sizes of the values.`
			`var vmax uint64`
			`for _, t := range b.Trie {`
			`vmax = maxValue(t.root, vmax)`
			`}`
			`b.ValueType, b.ValueSize = getIntType(vmax)`

			`// Compute all block allocations.`
			`// TODO: first compute the ASCII blocks for all tries and then the other`
			`// nodes. ASCII blocks are more restricted in placement, as they require two`
			`// blocks to be placed consecutively. Processing them first may improve`
			`// sharing (at least one zero block can be expected to be saved.)`
			`for _, t := range b.Trie {`
			`b.Checksum += b.buildTrie(t)`
			`}`

			`// Compute the offsets for all the Compacters.`
			`offset := uint32(0)`
			`for i := range b.Compactions {`
			`c := &b.Compactions[i]`
			`c.Offset = offset`
			`offset += c.maxHandle + 1`
			`c.Cutoff = offset`
			`}`

			`// Compute the sizes of indexes.`
			`// TODO: different byte positions could have different sizes. So far we have`
			`// not found a case where this is beneficial.`
			`imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)`
			`for _, ib := range b.IndexBlocks {`
			`if x := uint64(ib.index.index); x > imax {`
			`imax = x`
			`}`
			`}`
			`b.IndexType, b.IndexSize = getIntType(imax)`
			`}`

			`func maxValue(n *node, max uint64) uint64 {`
			`if n == nil {`
			`return max`
			`}`
			`for _, c := range n.children {`
			`max = maxValue(c, max)`
			`}`
			`for _, v := range n.values {`
			`if max < v {`
			`max = v`
			`}`
			`}`
			`return max`
			`}`

			`func getIntType(v uint64) (string, int) {`
			`switch {`
			`case v < 1<<8:`
			`return "uint8", 1`
			`case v < 1<<16:`
			`return "uint16", 2`
			`case v < 1<<32:`
			`return "uint32", 4`
			`}`
			`return "uint64", 8`
			`}`

			`const (`
			`blockSize = 64`

			`// Subtract two blocks to offset 0x80, the first continuation byte.`
			`blockOffset = 2`

			`// Subtract three blocks to offset 0xC0, the first non-ASCII starter.`
			`rootBlockOffset = 3`
			`)`

			`var crcTable = crc64.MakeTable(crc64.ISO)`

			`func (b builder) buildTrie(t Trie) uint64 {`
			`n := t.root`

			`// Get the ASCII offset. For the first trie, the ASCII block will be at`
			`// position 0.`
			`hasher := crc64.New(crcTable)`
			`binary.Write(hasher, binary.BigEndian, n.values)`
			`hash := hasher.Sum64()`

			`v, ok := b.asciiBlockIdx[hash]`
			`if !ok {`
			`v = len(b.ValueBlocks)`
			`b.asciiBlockIdx[hash] = v`

			`b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])`
			`if v == 0 {`
			`// Add the zero block at position 2 so that it will be assigned a`
			`// zero reference in the lookup blocks.`
			`// TODO: always do this? This would allow us to remove a check from`
			`// the trie lookup, but at the expense of extra space. Analyze`
			`// performance for unicode/norm.`
			`b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))`
			`}`
			`}`
			`t.ASCIIIndex = v`

			`// Compute remaining offsets.`
			`t.Checksum = b.computeOffsets(n, true)`
			`// We already subtracted the normal blockOffset from the index. Subtract the`
			`// difference for starter bytes.`
			`t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)`
			`return t.Checksum`
			`}`

			`func (b builder) computeOffsets(n node, root bool) uint64 {`
			`// For the first trie, the root lookup block will be at position 3, which is`
			`// the offset for UTF-8 non-ASCII starter bytes.`
			`first := len(b.IndexBlocks) == rootBlockOffset`
			`if first {`
			`b.IndexBlocks = append(b.IndexBlocks, n)`
			`}`

			`// We special-case the cases where all values recursively are 0. This allows`
			`// for the use of a zero block to which all such values can be directed.`
			`hash := uint64(0)`
			`if n.children != nil \|\| n.values != nil {`
			`hasher := crc64.New(crcTable)`
			`for _, c := range n.children {`
			`var v uint64`
			`if c != nil {`
			`v = b.computeOffsets(c, false)`
			`}`
			`binary.Write(hasher, binary.BigEndian, v)`
			`}`
			`binary.Write(hasher, binary.BigEndian, n.values)`
			`hash = hasher.Sum64()`
			`}`

			`if first {`
			`b.indexBlockIdx[hash] = rootBlockOffset - blockOffset`
			`}`

			`// Compacters don't apply to internal nodes.`
			`if n.children != nil {`
			`v, ok := b.indexBlockIdx[hash]`
			`if !ok {`
			`v = len(b.IndexBlocks) - blockOffset`
			`b.IndexBlocks = append(b.IndexBlocks, n)`
			`b.indexBlockIdx[hash] = v`
			`}`
			`n.index = nodeIndex{0, v}`
			`} else {`
			`h, ok := b.valueBlockIdx[hash]`
			`if !ok {`
			`bestI, bestSize := 0, blockSize*b.ValueSize`
			`for i, c := range b.Compactions[1:] {`
			`if sz, ok := c.c.Size(n.values); ok && bestSize > sz {`
			`bestI, bestSize = i+1, sz`
			`}`
			`}`
			`c := &b.Compactions[bestI]`
			`c.totalSize += bestSize`
			`v := c.c.Store(n.values)`
			`if c.maxHandle < v {`
			`c.maxHandle = v`
			`}`
			`h = nodeIndex{bestI, int(v)}`
			`b.valueBlockIdx[hash] = h`
			`}`
			`n.index = h`
			`}`
			`return hash`
			`}`