You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
obsidian-markdown/lexer.go

300 lines
5.4 KiB
Go

//go:generate stringer -type=LexemeType
package markdown
import (
"fmt"
// "os"
"strings"
"sync"
// "unicode"
"unicode/utf8"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
const (
LexError LexemeType = iota
LexEOF
LexIdent
LexOpenLink
LexCloseLink
LexHeading
LexBlockRef
LexAlias
LexText
)
const (
EOF rune = 0
)
const (
OpenLink = "[["
CloseLink = "]]"
Alias = "|"
Heading = "#"
BlockRef = "#^"
EscapeChar = `\`
)
func Lex(name, input string, level zapcore.Level) *Lexer {
encoderCfg := zap.NewProductionEncoderConfig()
encoderCfg.EncodeTime = zapcore.ISO8601TimeEncoder
config := zap.Config{
Level: zap.NewAtomicLevelAt(level),
EncoderConfig: encoderCfg,
OutputPaths: []string{
"./lexer.log",
"stdout",
},
Encoding: "console",
ErrorOutputPaths: []string{
"stderr",
},
InitialFields: map[string]interface{}{
"lexer": name,
// "pid": os.Getpid(),
},
}
l := &Lexer{
L: zap.Must(config.Build()).Named("lexer"),
name: name,
input: input,
state: lexText,
Items: make([]Lexeme, 0),
}
// go l.run()
l.run()
return l
}
/* shouldn't need this in non-concurrent implementation
func (l *Lexer) NextItem() Item {
for {
select {
case item := <-l.items:
return item
default:
if l.state == nil {
l.L.Named("NextItem").Errorw("state should not be nil")
return Item{
Typ: ItemError,
Val: "state is nil, should not be",
}
}
l.state = l.state(l)
}
}
}
*/
func (l *Lexer) ignore() {
l.SetStart(l.pos)
}
func (l *Lexer) backup() {
l.SetPos(l.GetPos() - l.GetWidth())
}
type Lexer struct {
L *zap.Logger
name, input string
start, pos, width int
state stateFn
// Items chan Item
Items []Lexeme
widthMutex, startMutex, posMutex, chanMutex sync.Mutex
}
func (l *Lexer) peek() rune {
L := l.L.Named("peek")
L.Debug("peeking")
r := l.next()
l.backup()
return r
}
func (l *Lexer) accept(valid string) bool {
L := l.L.Named("accept").With(
zap.String("input", valid),
)
if strings.ContainsRune(valid, l.next()) {
L.Debug("matched input")
return true
}
L.Debug("rejected input")
l.backup()
return false
}
func (l *Lexer) acceptRun(valid string) {
L := l.L.Named("acceptRun").With(
zap.String("input", valid),
)
L.Debug("scanning")
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
func (l *Lexer) emit(t LexemeType) {
i := Lexeme{t, l.input[l.GetStart():l.GetPos()]}
L := l.L.Named("emit").With(
zap.String("item", i.String()),
)
L.Info("emitting lexeme")
l.Items = append(l.Items, i)
l.SetStart(l.GetPos())
/* original concurrent implementation
defer l.chanMutex.Unlock()
l.chanMutex.Lock()
i := Item{t, l.input[l.GetStart():l.GetPos()]}
L := l.L.With(
zap.Int("pos", l.GetPos()),
zap.Int("width", l.GetWidth()),
).Named("emit")
L.Debug("emitting item",
zap.String("item", i.String()),
)
l.items <- i
*/
}
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
L := l.L.Named("errorf")
errorItem := Lexeme{
LexError,
fmt.Sprintf(format, args...),
}
L.Debug("emitting errorItem",
zap.String("error", errorItem.String()),
)
l.Items = append(l.Items, errorItem)
return nil
}
func (l *Lexer) next() rune {
var r rune
L := l.L.Named("next")
if l.GetPos() >= len(l.input) {
L.Debug("end of input reached")
l.SetWidth(0)
return EOF
}
r, width := utf8.DecodeRuneInString(l.input[l.GetPos():])
L.Debug("found rune",
zap.String("rune", string(r)),
zap.Int("width", width),
)
l.SetWidth(width)
l.SetPos(l.GetPos() + l.GetWidth())
return r
}
func (l *Lexer) run() {
for state := lexText; state != nil; {
state = state(l)
}
/* original concurrent implementation
defer l.chanMutex.Unlock()
for state := lexText; state != nil; {
state = state(l)
}
l.chanMutex.Lock()
close(l.items)
*/
}
func (l *Lexer) GetPos() int {
defer l.posMutex.Unlock()
l.posMutex.Lock()
l.L.Named("GetPos").Debug("getting current position",
zap.Int("old", l.pos),
)
return l.pos
}
func (l *Lexer) SetPos(pos int) {
defer l.posMutex.Unlock()
l.posMutex.Lock()
l.L.Named("SetPos").Debug("setting new position",
zap.Int("new", pos),
zap.Int("old", l.pos),
)
l.pos = pos
}
func (l *Lexer) GetWidth() int {
defer l.widthMutex.Unlock()
l.widthMutex.Lock()
l.L.Named("GetWidth").Debug("setting new width",
zap.Int("old", l.width),
)
return l.width
}
func (l *Lexer) SetWidth(width int) {
defer l.widthMutex.Unlock()
l.widthMutex.Lock()
l.L.Named("SetWidth").Debug("setting new width",
zap.Int("new", width),
zap.Int("old", l.width),
)
l.width = width
}
func (l *Lexer) GetStart() int {
defer l.startMutex.Unlock()
l.startMutex.Lock()
l.L.Named("GetStart").Debug("getting old start",
zap.Int("old", l.start),
)
return l.start
}
func (l *Lexer) SetStart(start int) {
defer l.startMutex.Unlock()
l.startMutex.Lock()
l.L.Named("SetStart").Debug("setting new start",
zap.Int("new", start),
zap.Int("old", l.start),
)
l.start = start
}
type stateFn func(*Lexer) stateFn
type LexemeType int
type Lexeme struct {
Typ LexemeType
Val string
}
func (i Lexeme) String() string {
switch i.Typ {
case LexEOF:
return "EOF"
case LexError:
return i.Val
}
if len(i.Val) > 10 {
// return fmt.Sprintf("%s:%.10q...", i.Typ, i.Val)
return fmt.Sprintf("%s:%q", i.Typ, i.Val)
}
return fmt.Sprintf("%s:%q", i.Typ, i.Val)
}