You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wikilinks-parser/lexer.go

221 lines
3.6 KiB
Go

//go:generate stringer -type=ItemType
package wikilink
import (
"fmt"
"strings"
"sync"
1 year ago
// "unicode"
"unicode/utf8"
"go.uber.org/zap"
)
const (
ItemError ItemType = iota
ItemEOF
ItemIdent
ItemOpenLink
ItemCloseLink
ItemHeading
ItemBlockRef
ItemAlias
ItemText
)
const (
EOF rune = 0
)
const (
OpenLink = "[["
CloseLink = "]]"
Alias = "|"
Heading = "#"
BlockRef = "#^"
)
func Lex(name, input string) *Lexer {
l := &Lexer{
L: zap.NewExample().Sugar().Named("lexer"),
name: name,
input: input,
1 year ago
state: lexText,
items: make(chan Item, 2),
}
go l.run()
return l
}
func (l *Lexer) NextItem() Item {
for {
select {
case item := <-l.items:
return item
default:
if l.state == nil {
1 year ago
l.L.Named("NextItem").Errorw("state should not be nil")
return Item{
Typ: ItemError,
Val: "state is nil, should not be",
}
}
l.state = l.state(l)
}
}
}
func (l *Lexer) ignore() {
l.SetStart(l.pos)
}
func (l *Lexer) backup() {
l.SetPos(l.GetPos() - l.GetWidth())
}
type Lexer struct {
L *zap.SugaredLogger
name, input string
start, pos, width int
state stateFn
items chan Item
widthMutex, startMutex, posMutex, chanMutex sync.Mutex
}
func (l *Lexer) peek() rune {
r := l.next()
l.backup()
return r
}
func (l *Lexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
func (l *Lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
func (l *Lexer) emit(t ItemType) {
defer l.chanMutex.Unlock()
l.chanMutex.Lock()
i := Item{t, l.input[l.GetStart():l.GetPos()]}
L := l.L.With(
zap.Int("pos", l.GetPos()),
zap.Int("width", l.GetWidth()),
).Named("emit")
L.Debugw("emitting item",
zap.String("item", i.String()),
)
l.items <- i
l.SetStart(l.GetPos())
}
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
L := l.L.Named("errorf")
errorItem := Item{
ItemError,
fmt.Sprintf(format, args...),
}
L.Debugw("emitting errorItem",
zap.String("error", errorItem.String()),
)
l.items <- errorItem
return nil
}
func (l *Lexer) next() rune {
var r rune
if l.GetPos() >= len(l.input) {
l.SetWidth(0)
return EOF
}
r, width := utf8.DecodeRuneInString(l.input[l.GetPos():])
l.SetWidth(width)
l.SetPos(l.GetPos() + l.GetWidth())
return r
}
func (l *Lexer) run() {
defer l.chanMutex.Unlock()
for state := lexText; state != nil; {
state = state(l)
}
l.chanMutex.Lock()
close(l.items)
}
func (l *Lexer) GetPos() int {
defer l.posMutex.Unlock()
l.posMutex.Lock()
return l.pos
}
func (l *Lexer) SetPos(pos int) {
defer l.posMutex.Unlock()
l.posMutex.Lock()
l.pos = pos
}
func (l *Lexer) GetWidth() int {
defer l.widthMutex.Unlock()
l.widthMutex.Lock()
return l.width
}
func (l *Lexer) SetWidth(width int) {
defer l.widthMutex.Unlock()
l.widthMutex.Lock()
l.width = width
}
func (l *Lexer) GetStart() int {
defer l.startMutex.Unlock()
l.startMutex.Lock()
return l.start
}
func (l *Lexer) SetStart(start int) {
defer l.startMutex.Unlock()
l.startMutex.Lock()
l.start = start
}
type stateFn func(*Lexer) stateFn
type ItemType int
type Item struct {
Typ ItemType
Val string
}
func (i Item) String() string {
switch i.Typ {
case ItemEOF:
return "EOF"
case ItemError:
return i.Val
}
if len(i.Val) > 10 {
// return fmt.Sprintf("%s:%.10q...", i.Typ, i.Val)
return fmt.Sprintf("%s:%q", i.Typ, i.Val)
}
return fmt.Sprintf("%s:%q", i.Typ, i.Val)
}