init:

2026-04-05 18:20:42 +03:00
commit 32737ee6d6
18 changed files with 2719 additions and 0 deletions
--- a/dsl/lexer.go
+++ b/dsl/lexer.go
@@ -0,0 +1,387 @@
+package dsl
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+)
+
+type TokenType int
+
+const (
+	TOKEN_STRING TokenType = iota
+	TOKEN_IDENT
+	TOKEN_FLOAT
+	TOKEN_INT
+	TOKEN_DURATION
+
+	TOKEN_LBRACE
+	TOKEN_RBRACE
+	TOKEN_LPAREN
+	TOKEN_RPAREN
+	TOKEN_ASSIGN
+	TOKEN_TILDE
+
+	TOKEN_EOF
+)
+
+func (t TokenType) String() string {
+	switch t {
+	case TOKEN_STRING:
+		return "STRING"
+	case TOKEN_IDENT:
+		return "IDENT"
+	case TOKEN_FLOAT:
+		return "FLOAT"
+	case TOKEN_INT:
+		return "INT"
+	case TOKEN_DURATION:
+		return "DURATION"
+	case TOKEN_LBRACE:
+		return "{"
+	case TOKEN_RBRACE:
+		return "}"
+	case TOKEN_LPAREN:
+		return "("
+	case TOKEN_RPAREN:
+		return ")"
+	case TOKEN_ASSIGN:
+		return "="
+	case TOKEN_TILDE:
+		return "~"
+	case TOKEN_EOF:
+		return "EOF"
+	default:
+		return "UNKNOWN"
+	}
+}
+
+type Token struct {
+	Type  TokenType
+	Value string
+	Line  int
+	Col   int
+}
+
+func (t Token) String() string {
+	return fmt.Sprintf("Token(%s, %q, %d:%d)", t.Type, t.Value, t.Line, t.Col)
+}
+
+type Lexer struct {
+	src  []rune
+	pos  int
+	line int
+	col  int
+}
+
+func NewLexer(src string) *Lexer {
+	return &Lexer{src: []rune(src), pos: 0, line: 1, col: 1}
+}
+
+func (l *Lexer) peek() (rune, bool) {
+	if l.pos >= len(l.src) {
+		return 0, false
+	}
+	return l.src[l.pos], true
+}
+
+func (l *Lexer) peekAt(offset int) (rune, bool) {
+	i := l.pos + offset
+	if i >= len(l.src) {
+		return 0, false
+	}
+	return l.src[i], true
+}
+
+func (l *Lexer) advance() rune {
+	ch := l.src[l.pos]
+	l.pos++
+	if ch == '\n' {
+		l.line++
+		l.col = 1
+	} else {
+		l.col++
+	}
+	return ch
+}
+
+func (l *Lexer) skipWhitespaceAndComments() {
+	for {
+		ch, ok := l.peek()
+		if !ok {
+			return
+		}
+
+		if ch == '/' {
+			next, ok2 := l.peekAt(1)
+			if ok2 && next == '/' {
+				for {
+					c, ok := l.peek()
+					if !ok || c == '\n' {
+						break
+					}
+					l.advance()
+				}
+				continue
+			}
+		}
+
+		if unicode.IsSpace(ch) {
+			l.advance()
+			continue
+		}
+		break
+	}
+}
+
+func (l *Lexer) Tokenize() ([]Token, error) {
+	var tokens []Token
+	for {
+		l.skipWhitespaceAndComments()
+		ch, ok := l.peek()
+		if !ok {
+			tokens = append(tokens, Token{Type: TOKEN_EOF, Line: l.line, Col: l.col})
+			break
+		}
+
+		line, col := l.line, l.col
+
+		switch {
+		case ch == '{':
+			l.advance()
+			tokens = append(tokens, Token{TOKEN_LBRACE, "{", line, col})
+		case ch == '}':
+			l.advance()
+			tokens = append(tokens, Token{TOKEN_RBRACE, "}", line, col})
+		case ch == '(':
+			l.advance()
+			tokens = append(tokens, Token{TOKEN_LPAREN, "(", line, col})
+		case ch == ')':
+			l.advance()
+			tokens = append(tokens, Token{TOKEN_RPAREN, ")", line, col})
+		case ch == '=':
+			l.advance()
+			tokens = append(tokens, Token{TOKEN_ASSIGN, "=", line, col})
+		case ch == '~':
+			l.advance()
+			tokens = append(tokens, Token{TOKEN_TILDE, "~", line, col})
+
+		case ch == '"':
+			// проверяем heredoc """
+			if l.isHeredocStart() {
+				s, err := l.readHeredoc()
+				if err != nil {
+					return nil, err
+				}
+				tokens = append(tokens, Token{TOKEN_STRING, s, line, col})
+			} else {
+				s, err := l.readString()
+				if err != nil {
+					return nil, err
+				}
+				tokens = append(tokens, Token{TOKEN_STRING, s, line, col})
+			}
+
+		case unicode.IsDigit(ch) || (ch == '-' && l.isNumberNext()):
+			tok, err := l.readNumberOrDuration(line, col)
+			if err != nil {
+				return nil, err
+			}
+			tokens = append(tokens, tok)
+
+		case unicode.IsLetter(ch) || ch == '_':
+			ident := l.readIdent()
+			tokens = append(tokens, Token{TOKEN_IDENT, ident, line, col})
+
+		default:
+			return nil, fmt.Errorf("%d:%d: unexpected character %q", line, col, ch)
+		}
+	}
+
+	return tokens, nil
+}
+
+func (l *Lexer) isHeredocStart() bool {
+	a, ok1 := l.peekAt(0)
+	b, ok2 := l.peekAt(1)
+	c, ok3 := l.peekAt(2)
+	return ok1 && ok2 && ok3 && a == '"' && b == '"' && c == '"'
+}
+
+func (l *Lexer) isNumberNext() bool {
+	next, ok := l.peekAt(1)
+	return ok && unicode.IsDigit(next)
+}
+
+func (l *Lexer) readHeredoc() (string, error) {
+	l.advance()
+	l.advance()
+	l.advance()
+	var buf strings.Builder
+	for {
+		if l.pos+2 < len(l.src) &&
+			l.src[l.pos] == '"' &&
+			l.src[l.pos+1] == '"' &&
+			l.src[l.pos+2] == '"' {
+			l.advance()
+			l.advance()
+			l.advance()
+			return dedentHeredoc(buf.String()), nil
+		}
+		ch, ok := l.peek()
+		if !ok {
+			return "", fmt.Errorf("unterminated heredoc")
+		}
+		buf.WriteRune(l.advance())
+		_ = ch
+	}
+}
+
+func dedentHeredoc(s string) string {
+	lines := strings.Split(s, "\n")
+
+	if len(lines) > 0 && strings.TrimSpace(lines[0]) == "" {
+		lines = lines[1:]
+	}
+
+	if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) == "" {
+		lines = lines[:len(lines)-1]
+	}
+
+	minIndent := -1
+	for _, line := range lines {
+		if strings.TrimSpace(line) == "" {
+			continue
+		}
+		indent := len(line) - len(strings.TrimLeft(line, " \t"))
+		if minIndent < 0 || indent < minIndent {
+			minIndent = indent
+		}
+	}
+	if minIndent < 0 {
+		minIndent = 0
+	}
+
+	var result strings.Builder
+	for i, line := range lines {
+		if len(line) >= minIndent {
+			result.WriteString(line[minIndent:])
+		} else {
+			result.WriteString(line)
+		}
+		if i < len(lines)-1 {
+			result.WriteByte('\n')
+		}
+	}
+	return result.String()
+}
+
+func (l *Lexer) readString() (string, error) {
+	l.advance()
+	var buf strings.Builder
+	for {
+		ch, ok := l.peek()
+		if !ok {
+			return "", fmt.Errorf("unterminated string at line %d", l.line)
+		}
+		if ch == '"' {
+			l.advance()
+			break
+		}
+		if ch == '\\' {
+			l.advance()
+			esc, ok := l.peek()
+			if !ok {
+				return "", fmt.Errorf("unterminated escape")
+			}
+			l.advance()
+			switch esc {
+			case 'n':
+				buf.WriteByte('\n')
+			case 't':
+				buf.WriteByte('\t')
+			case '\\':
+				buf.WriteByte('\\')
+			case '"':
+				buf.WriteByte('"')
+			default:
+				return "", fmt.Errorf("unknown escape \\%c", esc)
+			}
+			continue
+		}
+		buf.WriteRune(l.advance())
+	}
+	return buf.String(), nil
+}
+
+func (l *Lexer) readIdent() string {
+	var buf strings.Builder
+	for {
+		ch, ok := l.peek()
+		if !ok {
+			break
+		}
+		if unicode.IsLetter(ch) || unicode.IsDigit(ch) || ch == '_' {
+			buf.WriteRune(l.advance())
+		} else {
+			break
+		}
+	}
+	return buf.String()
+}
+
+func (l *Lexer) readNumberOrDuration(line, col int) (Token, error) {
+	var buf strings.Builder
+	isFloat := false
+
+	if ch, _ := l.peek(); ch == '-' {
+		buf.WriteRune(l.advance())
+	}
+
+	for {
+		ch, ok := l.peek()
+		if !ok {
+			break
+		}
+		if unicode.IsDigit(ch) {
+			buf.WriteRune(l.advance())
+		} else if ch == '.' && !isFloat {
+			isFloat = true
+			buf.WriteRune(l.advance())
+		} else {
+			break
+		}
+	}
+
+	suffix := l.tryReadDurationSuffix()
+	if suffix != "" {
+		return Token{TOKEN_DURATION, buf.String() + suffix, line, col}, nil
+	}
+
+	if isFloat {
+		return Token{TOKEN_FLOAT, buf.String(), line, col}, nil
+	}
+	return Token{TOKEN_INT, buf.String(), line, col}, nil
+}
+
+func (l *Lexer) tryReadDurationSuffix() string {
+	ch, ok := l.peek()
+	if !ok {
+		return ""
+	}
+	if ch == 'm' {
+		next, ok2 := l.peekAt(1)
+		if ok2 && next == 's' {
+			l.advance()
+			l.advance()
+			return "ms"
+		}
+		l.advance()
+		return "m"
+	}
+	if ch == 's' {
+		l.advance()
+		return "s"
+	}
+	return ""
+}