//go:generate stringer -type=LexemeType package markdown import ( "fmt" // "os" "strings" "sync" // "unicode" "unicode/utf8" "go.uber.org/zap" "go.uber.org/zap/zapcore" ) const ( LexError LexemeType = iota LexEOF LexIdent LexOpenLink LexCloseLink LexHeading LexBlockRef LexAlias LexText ) const ( EOF rune = 0 ) const ( OpenLink = "[[" CloseLink = "]]" Alias = "|" Heading = "#" BlockRef = "#^" EscapeChar = `\` ) func Lex(name, input string, level zapcore.Level) *Lexer { encoderCfg := zap.NewProductionEncoderConfig() encoderCfg.EncodeTime = zapcore.ISO8601TimeEncoder config := zap.Config{ Level: zap.NewAtomicLevelAt(level), EncoderConfig: encoderCfg, OutputPaths: []string{ "./lexer.log", "stdout", }, Encoding: "console", ErrorOutputPaths: []string{ "stderr", }, InitialFields: map[string]interface{}{ "lexer": name, // "pid": os.Getpid(), }, } l := &Lexer{ L: zap.Must(config.Build()).Named("lexer"), name: name, input: input, state: lexText, Items: make([]Lexeme, 0), } // go l.run() l.run() return l } /* shouldn't need this in non-concurrent implementation func (l *Lexer) NextItem() Item { for { select { case item := <-l.items: return item default: if l.state == nil { l.L.Named("NextItem").Errorw("state should not be nil") return Item{ Typ: ItemError, Val: "state is nil, should not be", } } l.state = l.state(l) } } } */ func (l *Lexer) ignore() { l.SetStart(l.pos) } func (l *Lexer) backup() { l.SetPos(l.GetPos() - l.GetWidth()) } type Lexer struct { L *zap.Logger name, input string start, pos, width int state stateFn // Items chan Item Items []Lexeme widthMutex, startMutex, posMutex, chanMutex sync.Mutex } func (l *Lexer) peek() rune { L := l.L.Named("peek") L.Debug("peeking") r := l.next() l.backup() return r } func (l *Lexer) accept(valid string) bool { L := l.L.Named("accept").With( zap.String("input", valid), ) if strings.ContainsRune(valid, l.next()) { L.Debug("matched input") return true } L.Debug("rejected input") l.backup() return false } func (l *Lexer) acceptRun(valid string) { L := l.L.Named("acceptRun").With( zap.String("input", valid), ) L.Debug("scanning") for strings.ContainsRune(valid, l.next()) { } l.backup() } func (l *Lexer) emit(t LexemeType) { i := Lexeme{t, l.input[l.GetStart():l.GetPos()]} L := l.L.Named("emit").With( zap.String("item", i.String()), ) L.Info("emitting lexeme") l.Items = append(l.Items, i) l.SetStart(l.GetPos()) /* original concurrent implementation defer l.chanMutex.Unlock() l.chanMutex.Lock() i := Item{t, l.input[l.GetStart():l.GetPos()]} L := l.L.With( zap.Int("pos", l.GetPos()), zap.Int("width", l.GetWidth()), ).Named("emit") L.Debug("emitting item", zap.String("item", i.String()), ) l.items <- i */ } func (l *Lexer) errorf(format string, args ...interface{}) stateFn { L := l.L.Named("errorf") errorItem := Lexeme{ LexError, fmt.Sprintf(format, args...), } L.Debug("emitting errorItem", zap.String("error", errorItem.String()), ) l.Items = append(l.Items, errorItem) return nil } func (l *Lexer) next() rune { var r rune L := l.L.Named("next") if l.GetPos() >= len(l.input) { L.Debug("end of input reached") l.SetWidth(0) return EOF } r, width := utf8.DecodeRuneInString(l.input[l.GetPos():]) L.Debug("found rune", zap.String("rune", string(r)), zap.Int("width", width), ) l.SetWidth(width) l.SetPos(l.GetPos() + l.GetWidth()) return r } func (l *Lexer) run() { for state := lexText; state != nil; { state = state(l) } /* original concurrent implementation defer l.chanMutex.Unlock() for state := lexText; state != nil; { state = state(l) } l.chanMutex.Lock() close(l.items) */ } func (l *Lexer) GetPos() int { defer l.posMutex.Unlock() l.posMutex.Lock() l.L.Named("GetPos").Debug("getting current position", zap.Int("old", l.pos), ) return l.pos } func (l *Lexer) SetPos(pos int) { defer l.posMutex.Unlock() l.posMutex.Lock() l.L.Named("SetPos").Debug("setting new position", zap.Int("new", pos), zap.Int("old", l.pos), ) l.pos = pos } func (l *Lexer) GetWidth() int { defer l.widthMutex.Unlock() l.widthMutex.Lock() l.L.Named("GetWidth").Debug("setting new width", zap.Int("old", l.width), ) return l.width } func (l *Lexer) SetWidth(width int) { defer l.widthMutex.Unlock() l.widthMutex.Lock() l.L.Named("SetWidth").Debug("setting new width", zap.Int("new", width), zap.Int("old", l.width), ) l.width = width } func (l *Lexer) GetStart() int { defer l.startMutex.Unlock() l.startMutex.Lock() l.L.Named("GetStart").Debug("getting old start", zap.Int("old", l.start), ) return l.start } func (l *Lexer) SetStart(start int) { defer l.startMutex.Unlock() l.startMutex.Lock() l.L.Named("SetStart").Debug("setting new start", zap.Int("new", start), zap.Int("old", l.start), ) l.start = start } type stateFn func(*Lexer) stateFn type LexemeType int type Lexeme struct { Typ LexemeType Val string } func (i Lexeme) String() string { switch i.Typ { case LexEOF: return "EOF" case LexError: return i.Val } if len(i.Val) > 10 { // return fmt.Sprintf("%s:%.10q...", i.Typ, i.Val) return fmt.Sprintf("%s:%q", i.Typ, i.Val) } return fmt.Sprintf("%s:%q", i.Typ, i.Val) }