388 lines
6.8 KiB
Go
388 lines
6.8 KiB
Go
package dsl
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
type TokenType int
|
|
|
|
const (
|
|
TOKEN_STRING TokenType = iota
|
|
TOKEN_IDENT
|
|
TOKEN_FLOAT
|
|
TOKEN_INT
|
|
TOKEN_DURATION
|
|
|
|
TOKEN_LBRACE
|
|
TOKEN_RBRACE
|
|
TOKEN_LPAREN
|
|
TOKEN_RPAREN
|
|
TOKEN_ASSIGN
|
|
TOKEN_TILDE
|
|
|
|
TOKEN_EOF
|
|
)
|
|
|
|
func (t TokenType) String() string {
|
|
switch t {
|
|
case TOKEN_STRING:
|
|
return "STRING"
|
|
case TOKEN_IDENT:
|
|
return "IDENT"
|
|
case TOKEN_FLOAT:
|
|
return "FLOAT"
|
|
case TOKEN_INT:
|
|
return "INT"
|
|
case TOKEN_DURATION:
|
|
return "DURATION"
|
|
case TOKEN_LBRACE:
|
|
return "{"
|
|
case TOKEN_RBRACE:
|
|
return "}"
|
|
case TOKEN_LPAREN:
|
|
return "("
|
|
case TOKEN_RPAREN:
|
|
return ")"
|
|
case TOKEN_ASSIGN:
|
|
return "="
|
|
case TOKEN_TILDE:
|
|
return "~"
|
|
case TOKEN_EOF:
|
|
return "EOF"
|
|
default:
|
|
return "UNKNOWN"
|
|
}
|
|
}
|
|
|
|
type Token struct {
|
|
Type TokenType
|
|
Value string
|
|
Line int
|
|
Col int
|
|
}
|
|
|
|
func (t Token) String() string {
|
|
return fmt.Sprintf("Token(%s, %q, %d:%d)", t.Type, t.Value, t.Line, t.Col)
|
|
}
|
|
|
|
type Lexer struct {
|
|
src []rune
|
|
pos int
|
|
line int
|
|
col int
|
|
}
|
|
|
|
func NewLexer(src string) *Lexer {
|
|
return &Lexer{src: []rune(src), pos: 0, line: 1, col: 1}
|
|
}
|
|
|
|
func (l *Lexer) peek() (rune, bool) {
|
|
if l.pos >= len(l.src) {
|
|
return 0, false
|
|
}
|
|
return l.src[l.pos], true
|
|
}
|
|
|
|
func (l *Lexer) peekAt(offset int) (rune, bool) {
|
|
i := l.pos + offset
|
|
if i >= len(l.src) {
|
|
return 0, false
|
|
}
|
|
return l.src[i], true
|
|
}
|
|
|
|
func (l *Lexer) advance() rune {
|
|
ch := l.src[l.pos]
|
|
l.pos++
|
|
if ch == '\n' {
|
|
l.line++
|
|
l.col = 1
|
|
} else {
|
|
l.col++
|
|
}
|
|
return ch
|
|
}
|
|
|
|
func (l *Lexer) skipWhitespaceAndComments() {
|
|
for {
|
|
ch, ok := l.peek()
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
if ch == '/' {
|
|
next, ok2 := l.peekAt(1)
|
|
if ok2 && next == '/' {
|
|
for {
|
|
c, ok := l.peek()
|
|
if !ok || c == '\n' {
|
|
break
|
|
}
|
|
l.advance()
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
|
|
if unicode.IsSpace(ch) {
|
|
l.advance()
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) Tokenize() ([]Token, error) {
|
|
var tokens []Token
|
|
for {
|
|
l.skipWhitespaceAndComments()
|
|
ch, ok := l.peek()
|
|
if !ok {
|
|
tokens = append(tokens, Token{Type: TOKEN_EOF, Line: l.line, Col: l.col})
|
|
break
|
|
}
|
|
|
|
line, col := l.line, l.col
|
|
|
|
switch {
|
|
case ch == '{':
|
|
l.advance()
|
|
tokens = append(tokens, Token{TOKEN_LBRACE, "{", line, col})
|
|
case ch == '}':
|
|
l.advance()
|
|
tokens = append(tokens, Token{TOKEN_RBRACE, "}", line, col})
|
|
case ch == '(':
|
|
l.advance()
|
|
tokens = append(tokens, Token{TOKEN_LPAREN, "(", line, col})
|
|
case ch == ')':
|
|
l.advance()
|
|
tokens = append(tokens, Token{TOKEN_RPAREN, ")", line, col})
|
|
case ch == '=':
|
|
l.advance()
|
|
tokens = append(tokens, Token{TOKEN_ASSIGN, "=", line, col})
|
|
case ch == '~':
|
|
l.advance()
|
|
tokens = append(tokens, Token{TOKEN_TILDE, "~", line, col})
|
|
|
|
case ch == '"':
|
|
// проверяем heredoc """
|
|
if l.isHeredocStart() {
|
|
s, err := l.readHeredoc()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tokens = append(tokens, Token{TOKEN_STRING, s, line, col})
|
|
} else {
|
|
s, err := l.readString()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tokens = append(tokens, Token{TOKEN_STRING, s, line, col})
|
|
}
|
|
|
|
case unicode.IsDigit(ch) || (ch == '-' && l.isNumberNext()):
|
|
tok, err := l.readNumberOrDuration(line, col)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tokens = append(tokens, tok)
|
|
|
|
case unicode.IsLetter(ch) || ch == '_':
|
|
ident := l.readIdent()
|
|
tokens = append(tokens, Token{TOKEN_IDENT, ident, line, col})
|
|
|
|
default:
|
|
return nil, fmt.Errorf("%d:%d: unexpected character %q", line, col, ch)
|
|
}
|
|
}
|
|
|
|
return tokens, nil
|
|
}
|
|
|
|
func (l *Lexer) isHeredocStart() bool {
|
|
a, ok1 := l.peekAt(0)
|
|
b, ok2 := l.peekAt(1)
|
|
c, ok3 := l.peekAt(2)
|
|
return ok1 && ok2 && ok3 && a == '"' && b == '"' && c == '"'
|
|
}
|
|
|
|
func (l *Lexer) isNumberNext() bool {
|
|
next, ok := l.peekAt(1)
|
|
return ok && unicode.IsDigit(next)
|
|
}
|
|
|
|
func (l *Lexer) readHeredoc() (string, error) {
|
|
l.advance()
|
|
l.advance()
|
|
l.advance()
|
|
var buf strings.Builder
|
|
for {
|
|
if l.pos+2 < len(l.src) &&
|
|
l.src[l.pos] == '"' &&
|
|
l.src[l.pos+1] == '"' &&
|
|
l.src[l.pos+2] == '"' {
|
|
l.advance()
|
|
l.advance()
|
|
l.advance()
|
|
return dedentHeredoc(buf.String()), nil
|
|
}
|
|
ch, ok := l.peek()
|
|
if !ok {
|
|
return "", fmt.Errorf("unterminated heredoc")
|
|
}
|
|
buf.WriteRune(l.advance())
|
|
_ = ch
|
|
}
|
|
}
|
|
|
|
func dedentHeredoc(s string) string {
|
|
lines := strings.Split(s, "\n")
|
|
|
|
if len(lines) > 0 && strings.TrimSpace(lines[0]) == "" {
|
|
lines = lines[1:]
|
|
}
|
|
|
|
if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) == "" {
|
|
lines = lines[:len(lines)-1]
|
|
}
|
|
|
|
minIndent := -1
|
|
for _, line := range lines {
|
|
if strings.TrimSpace(line) == "" {
|
|
continue
|
|
}
|
|
indent := len(line) - len(strings.TrimLeft(line, " \t"))
|
|
if minIndent < 0 || indent < minIndent {
|
|
minIndent = indent
|
|
}
|
|
}
|
|
if minIndent < 0 {
|
|
minIndent = 0
|
|
}
|
|
|
|
var result strings.Builder
|
|
for i, line := range lines {
|
|
if len(line) >= minIndent {
|
|
result.WriteString(line[minIndent:])
|
|
} else {
|
|
result.WriteString(line)
|
|
}
|
|
if i < len(lines)-1 {
|
|
result.WriteByte('\n')
|
|
}
|
|
}
|
|
return result.String()
|
|
}
|
|
|
|
func (l *Lexer) readString() (string, error) {
|
|
l.advance()
|
|
var buf strings.Builder
|
|
for {
|
|
ch, ok := l.peek()
|
|
if !ok {
|
|
return "", fmt.Errorf("unterminated string at line %d", l.line)
|
|
}
|
|
if ch == '"' {
|
|
l.advance()
|
|
break
|
|
}
|
|
if ch == '\\' {
|
|
l.advance()
|
|
esc, ok := l.peek()
|
|
if !ok {
|
|
return "", fmt.Errorf("unterminated escape")
|
|
}
|
|
l.advance()
|
|
switch esc {
|
|
case 'n':
|
|
buf.WriteByte('\n')
|
|
case 't':
|
|
buf.WriteByte('\t')
|
|
case '\\':
|
|
buf.WriteByte('\\')
|
|
case '"':
|
|
buf.WriteByte('"')
|
|
default:
|
|
return "", fmt.Errorf("unknown escape \\%c", esc)
|
|
}
|
|
continue
|
|
}
|
|
buf.WriteRune(l.advance())
|
|
}
|
|
return buf.String(), nil
|
|
}
|
|
|
|
func (l *Lexer) readIdent() string {
|
|
var buf strings.Builder
|
|
for {
|
|
ch, ok := l.peek()
|
|
if !ok {
|
|
break
|
|
}
|
|
if unicode.IsLetter(ch) || unicode.IsDigit(ch) || ch == '_' {
|
|
buf.WriteRune(l.advance())
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
func (l *Lexer) readNumberOrDuration(line, col int) (Token, error) {
|
|
var buf strings.Builder
|
|
isFloat := false
|
|
|
|
if ch, _ := l.peek(); ch == '-' {
|
|
buf.WriteRune(l.advance())
|
|
}
|
|
|
|
for {
|
|
ch, ok := l.peek()
|
|
if !ok {
|
|
break
|
|
}
|
|
if unicode.IsDigit(ch) {
|
|
buf.WriteRune(l.advance())
|
|
} else if ch == '.' && !isFloat {
|
|
isFloat = true
|
|
buf.WriteRune(l.advance())
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
suffix := l.tryReadDurationSuffix()
|
|
if suffix != "" {
|
|
return Token{TOKEN_DURATION, buf.String() + suffix, line, col}, nil
|
|
}
|
|
|
|
if isFloat {
|
|
return Token{TOKEN_FLOAT, buf.String(), line, col}, nil
|
|
}
|
|
return Token{TOKEN_INT, buf.String(), line, col}, nil
|
|
}
|
|
|
|
func (l *Lexer) tryReadDurationSuffix() string {
|
|
ch, ok := l.peek()
|
|
if !ok {
|
|
return ""
|
|
}
|
|
if ch == 'm' {
|
|
next, ok2 := l.peekAt(1)
|
|
if ok2 && next == 's' {
|
|
l.advance()
|
|
l.advance()
|
|
return "ms"
|
|
}
|
|
l.advance()
|
|
return "m"
|
|
}
|
|
if ch == 's' {
|
|
l.advance()
|
|
return "s"
|
|
}
|
|
return ""
|
|
}
|