repo-template/vendor/github.com/flosch/pongo2/lexer.go

package pongo2

import (
	"fmt"
	"strings"
	"unicode/utf8"
)

const (
	TokenError = iota
	EOF

	TokenHTML

	TokenKeyword
	TokenIdentifier
	TokenString
	TokenNumber
	TokenSymbol
)

var (
	tokenSpaceChars                = " \n\r\t"
	tokenIdentifierChars           = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"
	tokenIdentifierCharsWithDigits = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789"
	tokenDigits                    = "0123456789"

	// Available symbols in pongo2 (within filters/tag)
	TokenSymbols = []string{
		// 3-Char symbols

		// 2-Char symbols
		"==", ">=", "<=", "&&", "||", "{{", "}}", "{%", "%}", "!=", "<>",

		// 1-Char symbol
		"(", ")", "+", "-", "*", "<", ">", "/", "^", ",", ".", "!", "|", ":", "=", "%",
	}

	// Available keywords in pongo2
	TokenKeywords = []string{"in", "and", "or", "not", "true", "false", "as", "export"}
)

type TokenType int
type Token struct {
	Filename string
	Typ      TokenType
	Val      string
	Line     int
	Col      int
}

type lexerStateFn func() lexerStateFn
type lexer struct {
	name      string
	input     string
	start     int // start pos of the item
	pos       int // current pos
	width     int // width of last rune
	tokens    []*Token
	errored   bool
	startline int
	startcol  int
	line      int
	col       int

	in_verbatim   bool
	verbatim_name string
}

func (t *Token) String() string {
	val := t.Val
	if len(val) > 1000 {
		val = fmt.Sprintf("%s...%s", val[:10], val[len(val)-5:len(val)])
	}

	typ := ""
	switch t.Typ {
	case TokenHTML:
		typ = "HTML"
	case TokenError:
		typ = "Error"
	case TokenIdentifier:
		typ = "Identifier"
	case TokenKeyword:
		typ = "Keyword"
	case TokenNumber:
		typ = "Number"
	case TokenString:
		typ = "String"
	case TokenSymbol:
		typ = "Symbol"
	default:
		typ = "Unknown"
	}

	return fmt.Sprintf("<Token Typ=%s (%d) Val='%s' Line=%d Col=%d>",
		typ, t.Typ, val, t.Line, t.Col)
}

func lex(name string, input string) ([]*Token, *Error) {
	l := &lexer{
		name:      name,
		input:     input,
		tokens:    make([]*Token, 0, 100),
		line:      1,
		col:       1,
		startline: 1,
		startcol:  1,
	}
	l.run()
	if l.errored {
		errtoken := l.tokens[len(l.tokens)-1]
		return nil, &Error{
			Filename: name,
			Line:     errtoken.Line,
			Column:   errtoken.Col,
			Sender:   "lexer",
			ErrorMsg: errtoken.Val,
		}
	}
	return l.tokens, nil
}

func (l *lexer) value() string {
	return l.input[l.start:l.pos]
}

func (l *lexer) length() int {
	return l.pos - l.start
}

func (l *lexer) emit(t TokenType) {
	tok := &Token{
		Filename: l.name,
		Typ:      t,
		Val:      l.value(),
		Line:     l.startline,
		Col:      l.startcol,
	}

	if t == TokenString {
		// Escape sequence \" in strings
		tok.Val = strings.Replace(tok.Val, `\"`, `"`, -1)
		tok.Val = strings.Replace(tok.Val, `\\`, `\`, -1)
	}

	l.tokens = append(l.tokens, tok)
	l.start = l.pos
	l.startline = l.line
	l.startcol = l.col
}

func (l *lexer) next() rune {
	if l.pos >= len(l.input) {
		l.width = 0
		return EOF
	}
	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
	l.width = w
	l.pos += l.width
	l.col += l.width
	return r
}

func (l *lexer) backup() {
	l.pos -= l.width
	l.col -= l.width
}

func (l *lexer) peek() rune {
	r := l.next()
	l.backup()
	return r
}

func (l *lexer) ignore() {
	l.start = l.pos
	l.startline = l.line
	l.startcol = l.col
}

func (l *lexer) accept(what string) bool {
	if strings.IndexRune(what, l.next()) >= 0 {
		return true
	}
	l.backup()
	return false
}

func (l *lexer) acceptRun(what string) {
	for strings.IndexRune(what, l.next()) >= 0 {
	}
	l.backup()
}

func (l *lexer) errorf(format string, args ...interface{}) lexerStateFn {
	t := &Token{
		Filename: l.name,
		Typ:      TokenError,
		Val:      fmt.Sprintf(format, args...),
		Line:     l.startline,
		Col:      l.startcol,
	}
	l.tokens = append(l.tokens, t)
	l.errored = true
	l.startline = l.line
	l.startcol = l.col
	return nil
}

func (l *lexer) eof() bool {
	return l.start >= len(l.input)-1
}

func (l *lexer) run() {
	for {
		// TODO: Support verbatim tag names
		// https://docs.djangoproject.com/en/dev/ref/templates/builtins/#verbatim
		if l.in_verbatim {
			name := l.verbatim_name
			if name != "" {
				name += " "
			}
			if strings.HasPrefix(l.input[l.pos:], fmt.Sprintf("{%% endverbatim %s%%}", name)) { // end verbatim
				if l.pos > l.start {
					l.emit(TokenHTML)
				}
				w := len("{% endverbatim %}")
				l.pos += w
				l.col += w
				l.ignore()
				l.in_verbatim = false
			}
		} else if strings.HasPrefix(l.input[l.pos:], "{% verbatim %}") { // tag
			if l.pos > l.start {
				l.emit(TokenHTML)
			}
			l.in_verbatim = true
			w := len("{% verbatim %}")
			l.pos += w
			l.col += w
			l.ignore()
		}

		if !l.in_verbatim {
			// Ignore single-line comments {# ... #}
			if strings.HasPrefix(l.input[l.pos:], "{#") {
				if l.pos > l.start {
					l.emit(TokenHTML)
				}

				l.pos += 2 // pass '{#'
				l.col += 2

				for {
					switch l.peek() {
					case EOF:
						l.errorf("Single-line comment not closed.")
						return
					case '\n':
						l.errorf("Newline not permitted in a single-line comment.")
						return
					}

					if strings.HasPrefix(l.input[l.pos:], "#}") {
						l.pos += 2 // pass '#}'
						l.col += 2
						break
					}

					l.next()
				}
				l.ignore() // ignore whole comment

				// Comment skipped
				continue // next token
			}

			if strings.HasPrefix(l.input[l.pos:], "{{") || // variable
				strings.HasPrefix(l.input[l.pos:], "{%") { // tag
				if l.pos > l.start {
					l.emit(TokenHTML)
				}
				l.tokenize()
				if l.errored {
					return
				}
				continue
			}
		}

		switch l.peek() {
		case '\n':
			l.line++
			l.col = 0
		}
		if l.next() == EOF {
			break
		}
	}

	if l.pos > l.start {
		l.emit(TokenHTML)
	}

	if l.in_verbatim {
		l.errorf("verbatim-tag not closed, got EOF.")
	}
}

func (l *lexer) tokenize() {
	for state := l.stateCode; state != nil; {
		state = state()
	}
}

func (l *lexer) stateCode() lexerStateFn {
outer_loop:
	for {
		switch {
		case l.accept(tokenSpaceChars):
			if l.value() == "\n" {
				return l.errorf("Newline not allowed within tag/variable.")
			}
			l.ignore()
			continue
		case l.accept(tokenIdentifierChars):
			return l.stateIdentifier
		case l.accept(tokenDigits):
			return l.stateNumber
		case l.accept(`"`):
			return l.stateString
		}

		// Check for symbol
		for _, sym := range TokenSymbols {
			if strings.HasPrefix(l.input[l.start:], sym) {
				l.pos += len(sym)
				l.col += l.length()
				l.emit(TokenSymbol)

				if sym == "%}" || sym == "}}" {
					// Tag/variable end, return after emit
					return nil
				}

				continue outer_loop
			}
		}

		if l.pos < len(l.input) {
			return l.errorf("Unknown character: %q (%d)", l.peek(), l.peek())
		}

		break
	}

	// Normal shut down
	return nil
}

func (l *lexer) stateIdentifier() lexerStateFn {
	l.acceptRun(tokenIdentifierChars)
	l.acceptRun(tokenIdentifierCharsWithDigits)
	for _, kw := range TokenKeywords {
		if kw == l.value() {
			l.emit(TokenKeyword)
			return l.stateCode
		}
	}
	l.emit(TokenIdentifier)
	return l.stateCode
}

func (l *lexer) stateNumber() lexerStateFn {
	l.acceptRun(tokenDigits)
	/*
		Maybe context-sensitive number lexing?
		* comments.0.Text // first comment
		* usercomments.1.0 // second user, first comment
		* if (score >= 8.5) // 8.5 as a number

		if l.peek() == '.' {
			l.accept(".")
			if !l.accept(tokenDigits) {
				return l.errorf("Malformed number.")
			}
			l.acceptRun(tokenDigits)
		}
	*/
	l.emit(TokenNumber)
	return l.stateCode
}

func (l *lexer) stateString() lexerStateFn {
	l.ignore()
	l.startcol -= 1 // we're starting the position at the first "
	for !l.accept(`"`) {
		switch l.next() {
		case '\\':
			// escape sequence
			switch l.peek() {
			case '"', '\\':
				l.next()
			default:
				return l.errorf("Unknown escape sequence: \\%c", l.peek())
			}
		case EOF:
			return l.errorf("Unexpected EOF, string not closed.")
		case '\n':
			return l.errorf("Newline in string is not allowed.")
		}
	}
	l.backup()
	l.emit(TokenString)

	l.next()
	l.ignore()

	return l.stateCode
}
Migrate to pongo2 as template engine Signed-off-by: Knut Ahlers <knut@ahlers.me> 2018-02-07 21:20:28 +00:00			`package pongo2`

			`import (`
			`"fmt"`
			`"strings"`
			`"unicode/utf8"`
			`)`

			`const (`
			`TokenError = iota`
			`EOF`

			`TokenHTML`

			`TokenKeyword`
			`TokenIdentifier`
			`TokenString`
			`TokenNumber`
			`TokenSymbol`
			`)`

			`var (`
			`tokenSpaceChars = " \n\r\t"`
			`tokenIdentifierChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"`
			`tokenIdentifierCharsWithDigits = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789"`
			`tokenDigits = "0123456789"`

			`// Available symbols in pongo2 (within filters/tag)`
			`TokenSymbols = []string{`
			`// 3-Char symbols`

			`// 2-Char symbols`
			`"==", ">=", "<=", "&&", "\|\|", "{{", "}}", "{%", "%}", "!=", "<>",`

			`// 1-Char symbol`
			`"(", ")", "+", "-", "*", "<", ">", "/", "^", ",", ".", "!", "\|", ":", "=", "%",`
			`}`

			`// Available keywords in pongo2`
			`TokenKeywords = []string{"in", "and", "or", "not", "true", "false", "as", "export"}`
			`)`

			`type TokenType int`
			`type Token struct {`
			`Filename string`
			`Typ TokenType`
			`Val string`
			`Line int`
			`Col int`
			`}`

			`type lexerStateFn func() lexerStateFn`
			`type lexer struct {`
			`name string`
			`input string`
			`start int // start pos of the item`
			`pos int // current pos`
			`width int // width of last rune`
			`tokens []*Token`
			`errored bool`
			`startline int`
			`startcol int`
			`line int`
			`col int`

			`in_verbatim bool`
			`verbatim_name string`
			`}`

			`func (t *Token) String() string {`
			`val := t.Val`
			`if len(val) > 1000 {`
			`val = fmt.Sprintf("%s...%s", val[:10], val[len(val)-5:len(val)])`
			`}`

			`typ := ""`
			`switch t.Typ {`
			`case TokenHTML:`
			`typ = "HTML"`
			`case TokenError:`
			`typ = "Error"`
			`case TokenIdentifier:`
			`typ = "Identifier"`
			`case TokenKeyword:`
			`typ = "Keyword"`
			`case TokenNumber:`
			`typ = "Number"`
			`case TokenString:`
			`typ = "String"`
			`case TokenSymbol:`
			`typ = "Symbol"`
			`default:`
			`typ = "Unknown"`
			`}`

			`return fmt.Sprintf("<Token Typ=%s (%d) Val='%s' Line=%d Col=%d>",`
			`typ, t.Typ, val, t.Line, t.Col)`
			`}`

			`func lex(name string, input string) ([]Token, Error) {`
			`l := &lexer{`
			`name: name,`
			`input: input,`
			`tokens: make([]*Token, 0, 100),`
			`line: 1,`
			`col: 1,`
			`startline: 1,`
			`startcol: 1,`
			`}`
			`l.run()`
			`if l.errored {`
			`errtoken := l.tokens[len(l.tokens)-1]`
			`return nil, &Error{`
			`Filename: name,`
			`Line: errtoken.Line,`
			`Column: errtoken.Col,`
			`Sender: "lexer",`
			`ErrorMsg: errtoken.Val,`
			`}`
			`}`
			`return l.tokens, nil`
			`}`

			`func (l *lexer) value() string {`
			`return l.input[l.start:l.pos]`
			`}`

			`func (l *lexer) length() int {`
			`return l.pos - l.start`
			`}`

			`func (l *lexer) emit(t TokenType) {`
			`tok := &Token{`
			`Filename: l.name,`
			`Typ: t,`
			`Val: l.value(),`
			`Line: l.startline,`
			`Col: l.startcol,`
			`}`

			`if t == TokenString {`
			`// Escape sequence \" in strings`
			tok.Val = strings.Replace(tok.Val, `\"`, `"`, -1)
			tok.Val = strings.Replace(tok.Val, `\\`, `\`, -1)
			`}`

			`l.tokens = append(l.tokens, tok)`
			`l.start = l.pos`
			`l.startline = l.line`
			`l.startcol = l.col`
			`}`

			`func (l *lexer) next() rune {`
			`if l.pos >= len(l.input) {`
			`l.width = 0`
			`return EOF`
			`}`
			`r, w := utf8.DecodeRuneInString(l.input[l.pos:])`
			`l.width = w`
			`l.pos += l.width`
			`l.col += l.width`
			`return r`
			`}`

			`func (l *lexer) backup() {`
			`l.pos -= l.width`
			`l.col -= l.width`
			`}`

			`func (l *lexer) peek() rune {`
			`r := l.next()`
			`l.backup()`
			`return r`
			`}`

			`func (l *lexer) ignore() {`
			`l.start = l.pos`
			`l.startline = l.line`
			`l.startcol = l.col`
			`}`

			`func (l *lexer) accept(what string) bool {`
			`if strings.IndexRune(what, l.next()) >= 0 {`
			`return true`
			`}`
			`l.backup()`
			`return false`
			`}`

			`func (l *lexer) acceptRun(what string) {`
			`for strings.IndexRune(what, l.next()) >= 0 {`
			`}`
			`l.backup()`
			`}`

			`func (l *lexer) errorf(format string, args ...interface{}) lexerStateFn {`
			`t := &Token{`
			`Filename: l.name,`
			`Typ: TokenError,`
			`Val: fmt.Sprintf(format, args...),`
			`Line: l.startline,`
			`Col: l.startcol,`
			`}`
			`l.tokens = append(l.tokens, t)`
			`l.errored = true`
			`l.startline = l.line`
			`l.startcol = l.col`
			`return nil`
			`}`

			`func (l *lexer) eof() bool {`
			`return l.start >= len(l.input)-1`
			`}`

			`func (l *lexer) run() {`
			`for {`
			`// TODO: Support verbatim tag names`
			`// https://docs.djangoproject.com/en/dev/ref/templates/builtins/#verbatim`
			`if l.in_verbatim {`
			`name := l.verbatim_name`
			`if name != "" {`
			`name += " "`
			`}`
			`if strings.HasPrefix(l.input[l.pos:], fmt.Sprintf("{%% endverbatim %s%%}", name)) { // end verbatim`
			`if l.pos > l.start {`
			`l.emit(TokenHTML)`
			`}`
			`w := len("{% endverbatim %}")`
			`l.pos += w`
			`l.col += w`
			`l.ignore()`
			`l.in_verbatim = false`
			`}`
			`} else if strings.HasPrefix(l.input[l.pos:], "{% verbatim %}") { // tag`
			`if l.pos > l.start {`
			`l.emit(TokenHTML)`
			`}`
			`l.in_verbatim = true`
			`w := len("{% verbatim %}")`
			`l.pos += w`
			`l.col += w`
			`l.ignore()`
			`}`

			`if !l.in_verbatim {`
			`// Ignore single-line comments {# ... #}`
			`if strings.HasPrefix(l.input[l.pos:], "{#") {`
			`if l.pos > l.start {`
			`l.emit(TokenHTML)`
			`}`

			`l.pos += 2 // pass '{#'`
			`l.col += 2`

			`for {`
			`switch l.peek() {`
			`case EOF:`
			`l.errorf("Single-line comment not closed.")`
			`return`
			`case '\n':`
			`l.errorf("Newline not permitted in a single-line comment.")`
			`return`
			`}`

			`if strings.HasPrefix(l.input[l.pos:], "#}") {`
			`l.pos += 2 // pass '#}'`
			`l.col += 2`
			`break`
			`}`

			`l.next()`
			`}`
			`l.ignore() // ignore whole comment`

			`// Comment skipped`
			`continue // next token`
			`}`

			`if strings.HasPrefix(l.input[l.pos:], "{{") \|\| // variable`
			`strings.HasPrefix(l.input[l.pos:], "{%") { // tag`
			`if l.pos > l.start {`
			`l.emit(TokenHTML)`
			`}`
			`l.tokenize()`
			`if l.errored {`
			`return`
			`}`
			`continue`
			`}`
			`}`

			`switch l.peek() {`
			`case '\n':`
			`l.line++`
			`l.col = 0`
			`}`
			`if l.next() == EOF {`
			`break`
			`}`
			`}`

			`if l.pos > l.start {`
			`l.emit(TokenHTML)`
			`}`

			`if l.in_verbatim {`
			`l.errorf("verbatim-tag not closed, got EOF.")`
			`}`
			`}`

			`func (l *lexer) tokenize() {`
			`for state := l.stateCode; state != nil; {`
			`state = state()`
			`}`
			`}`

			`func (l *lexer) stateCode() lexerStateFn {`
			`outer_loop:`
			`for {`
			`switch {`
			`case l.accept(tokenSpaceChars):`
			`if l.value() == "\n" {`
			`return l.errorf("Newline not allowed within tag/variable.")`
			`}`
			`l.ignore()`
			`continue`
			`case l.accept(tokenIdentifierChars):`
			`return l.stateIdentifier`
			`case l.accept(tokenDigits):`
			`return l.stateNumber`
			case l.accept(`"`):
			`return l.stateString`
			`}`

			`// Check for symbol`
			`for _, sym := range TokenSymbols {`
			`if strings.HasPrefix(l.input[l.start:], sym) {`
			`l.pos += len(sym)`
			`l.col += l.length()`
			`l.emit(TokenSymbol)`

			`if sym == "%}" \|\| sym == "}}" {`
			`// Tag/variable end, return after emit`
			`return nil`
			`}`

			`continue outer_loop`
			`}`
			`}`

			`if l.pos < len(l.input) {`
			`return l.errorf("Unknown character: %q (%d)", l.peek(), l.peek())`
			`}`

			`break`
			`}`

			`// Normal shut down`
			`return nil`
			`}`

			`func (l *lexer) stateIdentifier() lexerStateFn {`
			`l.acceptRun(tokenIdentifierChars)`
			`l.acceptRun(tokenIdentifierCharsWithDigits)`
			`for _, kw := range TokenKeywords {`
			`if kw == l.value() {`
			`l.emit(TokenKeyword)`
			`return l.stateCode`
			`}`
			`}`
			`l.emit(TokenIdentifier)`
			`return l.stateCode`
			`}`

			`func (l *lexer) stateNumber() lexerStateFn {`
			`l.acceptRun(tokenDigits)`
			`/*`
			`Maybe context-sensitive number lexing?`
			`* comments.0.Text // first comment`
			`* usercomments.1.0 // second user, first comment`
			`* if (score >= 8.5) // 8.5 as a number`

			`if l.peek() == '.' {`
			`l.accept(".")`
			`if !l.accept(tokenDigits) {`
			`return l.errorf("Malformed number.")`
			`}`
			`l.acceptRun(tokenDigits)`
			`}`
			`*/`
			`l.emit(TokenNumber)`
			`return l.stateCode`
			`}`

			`func (l *lexer) stateString() lexerStateFn {`
			`l.ignore()`
			`l.startcol -= 1 // we're starting the position at the first "`
			for !l.accept(`"`) {
			`switch l.next() {`
			`case '\\':`
			`// escape sequence`
			`switch l.peek() {`
			`case '"', '\\':`
			`l.next()`
			`default:`
			`return l.errorf("Unknown escape sequence: \\%c", l.peek())`
			`}`
			`case EOF:`
			`return l.errorf("Unexpected EOF, string not closed.")`
			`case '\n':`
			`return l.errorf("Newline in string is not allowed.")`
			`}`
			`}`
			`l.backup()`
			`l.emit(TokenString)`

			`l.next()`
			`l.ignore()`

			`return l.stateCode`
			`}`