mirror of
https://github.com/Luzifer/mondash.git
synced 2025-01-10 12:51:49 +00:00
111 lines
2.7 KiB
Go
111 lines
2.7 KiB
Go
|
// Copyright 2013 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
// Package slug transforms strings into a normalized form well suited for use in URLs.
|
||
|
package slug
|
||
|
|
||
|
import (
|
||
|
"code.google.com/p/go.text/unicode/norm"
|
||
|
"encoding/hex"
|
||
|
"unicode"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
var lat = []*unicode.RangeTable{unicode.Letter, unicode.Number}
|
||
|
var nop = []*unicode.RangeTable{unicode.Mark, unicode.Sk, unicode.Lm}
|
||
|
|
||
|
// Slug replaces each run of characters which are not unicode letters or
|
||
|
// numbers with a single hyphen, except for leading or trailing runs. Letters
|
||
|
// will be stripped of diacritical marks and lowercased. Letter or number
|
||
|
// codepoints that do not have combining marks or a lower-cased variant will
|
||
|
// be passed through unaltered.
|
||
|
func Slug(s string) string {
|
||
|
buf := make([]rune, 0, len(s))
|
||
|
dash := false
|
||
|
for _, r := range norm.NFKD.String(s) {
|
||
|
switch {
|
||
|
// unicode 'letters' like mandarin characters pass through
|
||
|
case unicode.IsOneOf(lat, r):
|
||
|
buf = append(buf, unicode.ToLower(r))
|
||
|
dash = true
|
||
|
case unicode.IsOneOf(nop, r):
|
||
|
// skip
|
||
|
case dash:
|
||
|
buf = append(buf, '-')
|
||
|
dash = false
|
||
|
}
|
||
|
}
|
||
|
if i := len(buf) - 1; i >= 0 && buf[i] == '-' {
|
||
|
buf = buf[:i]
|
||
|
}
|
||
|
return string(buf)
|
||
|
}
|
||
|
|
||
|
// SlugAscii is identical to Slug, except that runs of one or more unicode
|
||
|
// letters or numbers that still fall outside the ASCII range will have their
|
||
|
// UTF-8 representation hex encoded and delimited by hyphens. As with Slug, in
|
||
|
// no case will hyphens appear at either end of the returned string.
|
||
|
func SlugAscii(s string) string {
|
||
|
const m = utf8.UTFMax
|
||
|
var (
|
||
|
ib [m * 3]byte
|
||
|
ob []byte
|
||
|
buf = make([]byte, 0, len(s))
|
||
|
dash = false
|
||
|
latin = true
|
||
|
)
|
||
|
for _, r := range norm.NFKD.String(s) {
|
||
|
switch {
|
||
|
case unicode.IsOneOf(lat, r):
|
||
|
r = unicode.ToLower(r)
|
||
|
n := utf8.EncodeRune(ib[:m], r)
|
||
|
if r >= 128 {
|
||
|
if latin && dash {
|
||
|
buf = append(buf, '-')
|
||
|
}
|
||
|
n = hex.Encode(ib[m:], ib[:n])
|
||
|
ob = ib[m : m+n]
|
||
|
latin = false
|
||
|
} else {
|
||
|
if !latin {
|
||
|
buf = append(buf, '-')
|
||
|
}
|
||
|
ob = ib[:n]
|
||
|
latin = true
|
||
|
}
|
||
|
dash = true
|
||
|
buf = append(buf, ob...)
|
||
|
case unicode.IsOneOf(nop, r):
|
||
|
// skip
|
||
|
case dash:
|
||
|
buf = append(buf, '-')
|
||
|
dash = false
|
||
|
latin = true
|
||
|
}
|
||
|
}
|
||
|
if i := len(buf) - 1; i >= 0 && buf[i] == '-' {
|
||
|
buf = buf[:i]
|
||
|
}
|
||
|
return string(buf)
|
||
|
}
|
||
|
|
||
|
// IsSlugAscii returns true only if SlugAscii(s) == s.
|
||
|
func IsSlugAscii(s string) bool {
|
||
|
dash := true
|
||
|
for _, r := range s {
|
||
|
switch {
|
||
|
case r == '-':
|
||
|
if dash {
|
||
|
return false
|
||
|
}
|
||
|
dash = true
|
||
|
case 'a' <= r && r <= 'z', '0' <= r && r <= '9':
|
||
|
dash = false
|
||
|
default:
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return !dash
|
||
|
}
|