Files
judge/dsl/lexer.go
2026-04-05 18:20:42 +03:00

388 lines
6.8 KiB
Go

package dsl
import (
"fmt"
"strings"
"unicode"
)
type TokenType int
const (
TOKEN_STRING TokenType = iota
TOKEN_IDENT
TOKEN_FLOAT
TOKEN_INT
TOKEN_DURATION
TOKEN_LBRACE
TOKEN_RBRACE
TOKEN_LPAREN
TOKEN_RPAREN
TOKEN_ASSIGN
TOKEN_TILDE
TOKEN_EOF
)
func (t TokenType) String() string {
switch t {
case TOKEN_STRING:
return "STRING"
case TOKEN_IDENT:
return "IDENT"
case TOKEN_FLOAT:
return "FLOAT"
case TOKEN_INT:
return "INT"
case TOKEN_DURATION:
return "DURATION"
case TOKEN_LBRACE:
return "{"
case TOKEN_RBRACE:
return "}"
case TOKEN_LPAREN:
return "("
case TOKEN_RPAREN:
return ")"
case TOKEN_ASSIGN:
return "="
case TOKEN_TILDE:
return "~"
case TOKEN_EOF:
return "EOF"
default:
return "UNKNOWN"
}
}
type Token struct {
Type TokenType
Value string
Line int
Col int
}
func (t Token) String() string {
return fmt.Sprintf("Token(%s, %q, %d:%d)", t.Type, t.Value, t.Line, t.Col)
}
type Lexer struct {
src []rune
pos int
line int
col int
}
func NewLexer(src string) *Lexer {
return &Lexer{src: []rune(src), pos: 0, line: 1, col: 1}
}
func (l *Lexer) peek() (rune, bool) {
if l.pos >= len(l.src) {
return 0, false
}
return l.src[l.pos], true
}
func (l *Lexer) peekAt(offset int) (rune, bool) {
i := l.pos + offset
if i >= len(l.src) {
return 0, false
}
return l.src[i], true
}
func (l *Lexer) advance() rune {
ch := l.src[l.pos]
l.pos++
if ch == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
return ch
}
func (l *Lexer) skipWhitespaceAndComments() {
for {
ch, ok := l.peek()
if !ok {
return
}
if ch == '/' {
next, ok2 := l.peekAt(1)
if ok2 && next == '/' {
for {
c, ok := l.peek()
if !ok || c == '\n' {
break
}
l.advance()
}
continue
}
}
if unicode.IsSpace(ch) {
l.advance()
continue
}
break
}
}
func (l *Lexer) Tokenize() ([]Token, error) {
var tokens []Token
for {
l.skipWhitespaceAndComments()
ch, ok := l.peek()
if !ok {
tokens = append(tokens, Token{Type: TOKEN_EOF, Line: l.line, Col: l.col})
break
}
line, col := l.line, l.col
switch {
case ch == '{':
l.advance()
tokens = append(tokens, Token{TOKEN_LBRACE, "{", line, col})
case ch == '}':
l.advance()
tokens = append(tokens, Token{TOKEN_RBRACE, "}", line, col})
case ch == '(':
l.advance()
tokens = append(tokens, Token{TOKEN_LPAREN, "(", line, col})
case ch == ')':
l.advance()
tokens = append(tokens, Token{TOKEN_RPAREN, ")", line, col})
case ch == '=':
l.advance()
tokens = append(tokens, Token{TOKEN_ASSIGN, "=", line, col})
case ch == '~':
l.advance()
tokens = append(tokens, Token{TOKEN_TILDE, "~", line, col})
case ch == '"':
// проверяем heredoc """
if l.isHeredocStart() {
s, err := l.readHeredoc()
if err != nil {
return nil, err
}
tokens = append(tokens, Token{TOKEN_STRING, s, line, col})
} else {
s, err := l.readString()
if err != nil {
return nil, err
}
tokens = append(tokens, Token{TOKEN_STRING, s, line, col})
}
case unicode.IsDigit(ch) || (ch == '-' && l.isNumberNext()):
tok, err := l.readNumberOrDuration(line, col)
if err != nil {
return nil, err
}
tokens = append(tokens, tok)
case unicode.IsLetter(ch) || ch == '_':
ident := l.readIdent()
tokens = append(tokens, Token{TOKEN_IDENT, ident, line, col})
default:
return nil, fmt.Errorf("%d:%d: unexpected character %q", line, col, ch)
}
}
return tokens, nil
}
func (l *Lexer) isHeredocStart() bool {
a, ok1 := l.peekAt(0)
b, ok2 := l.peekAt(1)
c, ok3 := l.peekAt(2)
return ok1 && ok2 && ok3 && a == '"' && b == '"' && c == '"'
}
func (l *Lexer) isNumberNext() bool {
next, ok := l.peekAt(1)
return ok && unicode.IsDigit(next)
}
func (l *Lexer) readHeredoc() (string, error) {
l.advance()
l.advance()
l.advance()
var buf strings.Builder
for {
if l.pos+2 < len(l.src) &&
l.src[l.pos] == '"' &&
l.src[l.pos+1] == '"' &&
l.src[l.pos+2] == '"' {
l.advance()
l.advance()
l.advance()
return dedentHeredoc(buf.String()), nil
}
ch, ok := l.peek()
if !ok {
return "", fmt.Errorf("unterminated heredoc")
}
buf.WriteRune(l.advance())
_ = ch
}
}
func dedentHeredoc(s string) string {
lines := strings.Split(s, "\n")
if len(lines) > 0 && strings.TrimSpace(lines[0]) == "" {
lines = lines[1:]
}
if len(lines) > 0 && strings.TrimSpace(lines[len(lines)-1]) == "" {
lines = lines[:len(lines)-1]
}
minIndent := -1
for _, line := range lines {
if strings.TrimSpace(line) == "" {
continue
}
indent := len(line) - len(strings.TrimLeft(line, " \t"))
if minIndent < 0 || indent < minIndent {
minIndent = indent
}
}
if minIndent < 0 {
minIndent = 0
}
var result strings.Builder
for i, line := range lines {
if len(line) >= minIndent {
result.WriteString(line[minIndent:])
} else {
result.WriteString(line)
}
if i < len(lines)-1 {
result.WriteByte('\n')
}
}
return result.String()
}
func (l *Lexer) readString() (string, error) {
l.advance()
var buf strings.Builder
for {
ch, ok := l.peek()
if !ok {
return "", fmt.Errorf("unterminated string at line %d", l.line)
}
if ch == '"' {
l.advance()
break
}
if ch == '\\' {
l.advance()
esc, ok := l.peek()
if !ok {
return "", fmt.Errorf("unterminated escape")
}
l.advance()
switch esc {
case 'n':
buf.WriteByte('\n')
case 't':
buf.WriteByte('\t')
case '\\':
buf.WriteByte('\\')
case '"':
buf.WriteByte('"')
default:
return "", fmt.Errorf("unknown escape \\%c", esc)
}
continue
}
buf.WriteRune(l.advance())
}
return buf.String(), nil
}
func (l *Lexer) readIdent() string {
var buf strings.Builder
for {
ch, ok := l.peek()
if !ok {
break
}
if unicode.IsLetter(ch) || unicode.IsDigit(ch) || ch == '_' {
buf.WriteRune(l.advance())
} else {
break
}
}
return buf.String()
}
func (l *Lexer) readNumberOrDuration(line, col int) (Token, error) {
var buf strings.Builder
isFloat := false
if ch, _ := l.peek(); ch == '-' {
buf.WriteRune(l.advance())
}
for {
ch, ok := l.peek()
if !ok {
break
}
if unicode.IsDigit(ch) {
buf.WriteRune(l.advance())
} else if ch == '.' && !isFloat {
isFloat = true
buf.WriteRune(l.advance())
} else {
break
}
}
suffix := l.tryReadDurationSuffix()
if suffix != "" {
return Token{TOKEN_DURATION, buf.String() + suffix, line, col}, nil
}
if isFloat {
return Token{TOKEN_FLOAT, buf.String(), line, col}, nil
}
return Token{TOKEN_INT, buf.String(), line, col}, nil
}
func (l *Lexer) tryReadDurationSuffix() string {
ch, ok := l.peek()
if !ok {
return ""
}
if ch == 'm' {
next, ok2 := l.peekAt(1)
if ok2 && next == 's' {
l.advance()
l.advance()
return "ms"
}
l.advance()
return "m"
}
if ch == 's' {
l.advance()
return "s"
}
return ""
}