package dsl import ( "fmt" "strings" "unicode" ) type TokenType int const ( TOKEN_STRING TokenType = iota TOKEN_IDENT TOKEN_FLOAT TOKEN_INT TOKEN_DURATION TOKEN_LBRACE TOKEN_RBRACE TOKEN_LPAREN TOKEN_RPAREN TOKEN_ASSIGN TOKEN_TILDE TOKEN_EOF ) func (t TokenType) String() string { switch t { case TOKEN_STRING: return "STRING" case TOKEN_IDENT: return "IDENT" case TOKEN_FLOAT: return "FLOAT" case TOKEN_INT: return "INT" case TOKEN_DURATION: return "DURATION" case TOKEN_LBRACE: return "{" case TOKEN_RBRACE: return "}" case TOKEN_LPAREN: return "(" case TOKEN_RPAREN: return ")" case TOKEN_ASSIGN: return "=" case TOKEN_TILDE: return "~" case TOKEN_EOF: return "EOF" default: return "UNKNOWN" } } type Token struct { Type TokenType Value string Line int Col int } func (t Token) String() string { return fmt.Sprintf("Token(%s, %q, %d:%d)", t.Type, t.Value, t.Line, t.Col) } type Lexer struct { src []rune pos int line int col int } func NewLexer(src string) *Lexer { return &Lexer{src: []rune(src), pos: 0, line: 1, col: 1} } func (l *Lexer) peek() (rune, bool) { if l.pos >= len(l.src) { return 0, false } return l.src[l.pos], true } func (l *Lexer) peekAt(offset int) (rune, bool) { i := l.pos + offset if i >= len(l.src) { return 0, false } return l.src[i], true } func (l *Lexer) advance() rune { ch := l.src[l.pos] l.pos++ if ch == '\n' { l.line++ l.col = 1 } else { l.col++ } return ch } func (l *Lexer) skipWhitespaceAndComments() { for { ch, ok := l.peek() if !ok { return } if ch == '/' { next, ok2 := l.peekAt(1) if ok2 && next == '/' { for { c, ok := l.peek() if !ok || c == '\n' { break } l.advance() } continue } } if unicode.IsSpace(ch) { l.advance() continue } break } } func (l *Lexer) Tokenize() ([]Token, error) { var tokens []Token for { l.skipWhitespaceAndComments() ch, ok := l.peek() if !ok { tokens = append(tokens, Token{Type: TOKEN_EOF, Line: l.line, Col: l.col}) break } line, col := l.line, l.col switch { case ch == '{': l.advance() tokens = append(tokens, Token{TOKEN_LBRACE, "{", line, col}) case ch == '}': l.advance() tokens = append(tokens, Token{TOKEN_RBRACE, "}", line, col}) case ch == '(': l.advance() tokens = append(tokens, Token{TOKEN_LPAREN, "(", line, col}) case ch == ')': l.advance() tokens = append(tokens, Token{TOKEN_RPAREN, ")", line, col}) case ch == '=': l.advance() tokens = append(tokens, Token{TOKEN_ASSIGN, "=", line, col}) case ch == '~': l.advance() tokens = append(tokens, Token{TOKEN_TILDE, "~", line, col}) case ch == '"': // проверяем heredoc """ if l.isHeredocStart() { s, err := l.readHeredoc() if err != nil { return nil, err } tokens = append(tokens, Token{TOKEN_STRING, s, line, col}) } else { s, err := l.readString() if err != nil { return nil, err } tokens = append(tokens, Token{TOKEN_STRING, s, line, col}) } case unicode.IsDigit(ch) || (ch == '-' && l.isNumberNext()): tok, err := l.readNumberOrDuration(line, col) if err != nil { return nil, err } tokens = append(tokens, tok) case unicode.IsLetter(ch) || ch == '_': ident := l.readIdent() tokens = append(tokens, Token{TOKEN_IDENT, ident, line, col}) default: return nil, fmt.Errorf("%d:%d: unexpected character %q", line, col, ch) } } return tokens, nil } func (l *Lexer) isHeredocStart() bool { a, ok1 := l.peekAt(0) b, ok2 := l.peekAt(1) c, ok3 := l.peekAt(2) return ok1 && ok2 && ok3 && a == '"' && b == '"' && c == '"' } func (l *Lexer) isNumberNext() bool { next, ok := l.peekAt(1) return ok && unicode.IsDigit(next) } func (l *Lexer) readHeredoc() (string, error) { l.advance() l.advance() l.advance() var buf strings.Builder for { if l.pos+2 < len(l.src) && l.src[l.pos] == '"' && l.src[l.pos+1] == '"' && l.src[l.pos+2] == '"' { l.advance() l.advance() l.advance() return dedentHeredoc(buf.String()), nil } ch, ok := l.peek() if !ok { return "", fmt.Errorf("unterminated heredoc") } buf.WriteRune(l.advance()) _ = ch } } func dedentHeredoc(s string) string { lines := strings.Split(s, "\n") if len(lines) > 0 && strings.TrimSpace(lines[0]) == "" { lines = lines[1:] } if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) == "" { lines = lines[:len(lines)-1] } minIndent := -1 for _, line := range lines { if strings.TrimSpace(line) == "" { continue } indent := len(line) - len(strings.TrimLeft(line, " \t")) if minIndent < 0 || indent < minIndent { minIndent = indent } } if minIndent < 0 { minIndent = 0 } var result strings.Builder for i, line := range lines { if len(line) >= minIndent { result.WriteString(line[minIndent:]) } else { result.WriteString(line) } if i < len(lines)-1 { result.WriteByte('\n') } } return result.String() } func (l *Lexer) readString() (string, error) { l.advance() var buf strings.Builder for { ch, ok := l.peek() if !ok { return "", fmt.Errorf("unterminated string at line %d", l.line) } if ch == '"' { l.advance() break } if ch == '\\' { l.advance() esc, ok := l.peek() if !ok { return "", fmt.Errorf("unterminated escape") } l.advance() switch esc { case 'n': buf.WriteByte('\n') case 't': buf.WriteByte('\t') case '\\': buf.WriteByte('\\') case '"': buf.WriteByte('"') default: return "", fmt.Errorf("unknown escape \\%c", esc) } continue } buf.WriteRune(l.advance()) } return buf.String(), nil } func (l *Lexer) readIdent() string { var buf strings.Builder for { ch, ok := l.peek() if !ok { break } if unicode.IsLetter(ch) || unicode.IsDigit(ch) || ch == '_' { buf.WriteRune(l.advance()) } else { break } } return buf.String() } func (l *Lexer) readNumberOrDuration(line, col int) (Token, error) { var buf strings.Builder isFloat := false if ch, _ := l.peek(); ch == '-' { buf.WriteRune(l.advance()) } for { ch, ok := l.peek() if !ok { break } if unicode.IsDigit(ch) { buf.WriteRune(l.advance()) } else if ch == '.' && !isFloat { isFloat = true buf.WriteRune(l.advance()) } else { break } } suffix := l.tryReadDurationSuffix() if suffix != "" { return Token{TOKEN_DURATION, buf.String() + suffix, line, col}, nil } if isFloat { return Token{TOKEN_FLOAT, buf.String(), line, col}, nil } return Token{TOKEN_INT, buf.String(), line, col}, nil } func (l *Lexer) tryReadDurationSuffix() string { ch, ok := l.peek() if !ok { return "" } if ch == 'm' { next, ok2 := l.peekAt(1) if ok2 && next == 's' { l.advance() l.advance() return "ms" } l.advance() return "m" } if ch == 's' { l.advance() return "s" } return "" }