mirror of
https://github.com/Luzifer/mondash.git
synced 2025-01-10 12:51:49 +00:00
118 lines
2.7 KiB
Go
118 lines
2.7 KiB
Go
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
// +build ignore
|
||
|
|
||
|
// Trie table generator.
|
||
|
// Used by make*tables tools to generate a go file with trie data structures
|
||
|
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
|
||
|
// sequence are used to lookup offsets in the index table to be used for the
|
||
|
// next byte. The last byte is used to index into a table with 16-bit values.
|
||
|
|
||
|
package main
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"io"
|
||
|
)
|
||
|
|
||
|
const maxSparseEntries = 16
|
||
|
|
||
|
type normCompacter struct {
|
||
|
sparseBlocks [][]uint64
|
||
|
sparseOffset []uint16
|
||
|
sparseCount int
|
||
|
name string
|
||
|
}
|
||
|
|
||
|
func mostFrequentStride(a []uint64) int {
|
||
|
counts := make(map[int]int)
|
||
|
var v int
|
||
|
for _, x := range a {
|
||
|
if stride := int(x) - v; v != 0 && stride >= 0 {
|
||
|
counts[stride]++
|
||
|
}
|
||
|
v = int(x)
|
||
|
}
|
||
|
var maxs, maxc int
|
||
|
for stride, cnt := range counts {
|
||
|
if cnt > maxc || (cnt == maxc && stride < maxs) {
|
||
|
maxs, maxc = stride, cnt
|
||
|
}
|
||
|
}
|
||
|
return maxs
|
||
|
}
|
||
|
|
||
|
func countSparseEntries(a []uint64) int {
|
||
|
stride := mostFrequentStride(a)
|
||
|
var v, count int
|
||
|
for _, tv := range a {
|
||
|
if int(tv)-v != stride {
|
||
|
if tv != 0 {
|
||
|
count++
|
||
|
}
|
||
|
}
|
||
|
v = int(tv)
|
||
|
}
|
||
|
return count
|
||
|
}
|
||
|
|
||
|
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
|
||
|
if n := countSparseEntries(v); n <= maxSparseEntries {
|
||
|
return (n+1)*4 + 2, true
|
||
|
}
|
||
|
return 0, false
|
||
|
}
|
||
|
|
||
|
func (c *normCompacter) Store(v []uint64) uint32 {
|
||
|
h := uint32(len(c.sparseOffset))
|
||
|
c.sparseBlocks = append(c.sparseBlocks, v)
|
||
|
c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
|
||
|
c.sparseCount += countSparseEntries(v) + 1
|
||
|
return h
|
||
|
}
|
||
|
|
||
|
func (c *normCompacter) Handler() string {
|
||
|
return c.name + "Sparse.lookup"
|
||
|
}
|
||
|
|
||
|
func (c *normCompacter) Print(w io.Writer) (retErr error) {
|
||
|
p := func(f string, x ...interface{}) {
|
||
|
if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
|
||
|
retErr = err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ls := len(c.sparseBlocks)
|
||
|
p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
|
||
|
p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
|
||
|
|
||
|
ns := c.sparseCount
|
||
|
p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
|
||
|
p("var %sSparseValues = [%d]valueRange {", c.name, ns)
|
||
|
for i, b := range c.sparseBlocks {
|
||
|
p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
|
||
|
var v int
|
||
|
stride := mostFrequentStride(b)
|
||
|
n := countSparseEntries(b)
|
||
|
p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
|
||
|
for i, nv := range b {
|
||
|
if int(nv)-v != stride {
|
||
|
if v != 0 {
|
||
|
p(",hi:%#02x},", 0x80+i-1)
|
||
|
}
|
||
|
if nv != 0 {
|
||
|
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
|
||
|
}
|
||
|
}
|
||
|
v = int(nv)
|
||
|
}
|
||
|
if v != 0 {
|
||
|
p(",hi:%#02x},", 0x80+len(b)-1)
|
||
|
}
|
||
|
}
|
||
|
p("\n}\n\n")
|
||
|
return
|
||
|
}
|