You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
299 lines
5.4 KiB
Go
299 lines
5.4 KiB
Go
//go:generate stringer -type=LexemeType
|
|
package wikilink
|
|
|
|
import (
|
|
"fmt"
|
|
// "os"
|
|
"strings"
|
|
"sync"
|
|
|
|
// "unicode"
|
|
"unicode/utf8"
|
|
|
|
"go.uber.org/zap"
|
|
"go.uber.org/zap/zapcore"
|
|
)
|
|
|
|
const (
|
|
LexError LexemeType = iota
|
|
LexEOF
|
|
LexIdent
|
|
LexOpenLink
|
|
LexCloseLink
|
|
LexHeading
|
|
LexBlockRef
|
|
LexAlias
|
|
LexText
|
|
)
|
|
|
|
const (
|
|
EOF rune = 0
|
|
)
|
|
|
|
const (
|
|
OpenLink = "[["
|
|
CloseLink = "]]"
|
|
Alias = "|"
|
|
Heading = "#"
|
|
BlockRef = "#^"
|
|
)
|
|
|
|
func Lex(name, input string, level zapcore.Level) *Lexer {
|
|
encoderCfg := zap.NewProductionEncoderConfig()
|
|
encoderCfg.EncodeTime = zapcore.ISO8601TimeEncoder
|
|
|
|
config := zap.Config{
|
|
Level: zap.NewAtomicLevelAt(level),
|
|
EncoderConfig: encoderCfg,
|
|
OutputPaths: []string{
|
|
"./lexer.log",
|
|
"stdout",
|
|
},
|
|
Encoding: "console",
|
|
ErrorOutputPaths: []string{
|
|
"stderr",
|
|
},
|
|
InitialFields: map[string]interface{}{
|
|
"lexer": name,
|
|
// "pid": os.Getpid(),
|
|
},
|
|
}
|
|
|
|
l := &Lexer{
|
|
L: zap.Must(config.Build()).Named("lexer"),
|
|
name: name,
|
|
input: input,
|
|
state: lexText,
|
|
Items: make([]Lexeme, 0),
|
|
}
|
|
// go l.run()
|
|
l.run()
|
|
|
|
return l
|
|
}
|
|
|
|
/* shouldn't need this in non-concurrent implementation
|
|
func (l *Lexer) NextItem() Item {
|
|
for {
|
|
select {
|
|
case item := <-l.items:
|
|
return item
|
|
default:
|
|
if l.state == nil {
|
|
l.L.Named("NextItem").Errorw("state should not be nil")
|
|
return Item{
|
|
Typ: ItemError,
|
|
Val: "state is nil, should not be",
|
|
}
|
|
}
|
|
|
|
l.state = l.state(l)
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
func (l *Lexer) ignore() {
|
|
l.SetStart(l.pos)
|
|
}
|
|
|
|
func (l *Lexer) backup() {
|
|
l.SetPos(l.GetPos() - l.GetWidth())
|
|
}
|
|
|
|
type Lexer struct {
|
|
L *zap.Logger
|
|
name, input string
|
|
start, pos, width int
|
|
state stateFn
|
|
// Items chan Item
|
|
Items []Lexeme
|
|
widthMutex, startMutex, posMutex, chanMutex sync.Mutex
|
|
}
|
|
|
|
func (l *Lexer) peek() rune {
|
|
L := l.L.Named("peek")
|
|
L.Debug("peeking")
|
|
r := l.next()
|
|
l.backup()
|
|
|
|
return r
|
|
}
|
|
|
|
func (l *Lexer) accept(valid string) bool {
|
|
L := l.L.Named("accept").With(
|
|
zap.String("input", valid),
|
|
)
|
|
if strings.ContainsRune(valid, l.next()) {
|
|
L.Debug("matched input")
|
|
return true
|
|
}
|
|
|
|
L.Debug("rejected input")
|
|
l.backup()
|
|
|
|
return false
|
|
}
|
|
|
|
func (l *Lexer) acceptRun(valid string) {
|
|
L := l.L.Named("acceptRun").With(
|
|
zap.String("input", valid),
|
|
)
|
|
L.Debug("scanning")
|
|
for strings.ContainsRune(valid, l.next()) {
|
|
}
|
|
l.backup()
|
|
}
|
|
|
|
func (l *Lexer) emit(t LexemeType) {
|
|
i := Lexeme{t, l.input[l.GetStart():l.GetPos()]}
|
|
L := l.L.Named("emit").With(
|
|
zap.String("item", i.String()),
|
|
)
|
|
L.Info("emitting lexeme")
|
|
l.Items = append(l.Items, i)
|
|
l.SetStart(l.GetPos())
|
|
/* original concurrent implementation
|
|
defer l.chanMutex.Unlock()
|
|
l.chanMutex.Lock()
|
|
i := Item{t, l.input[l.GetStart():l.GetPos()]}
|
|
L := l.L.With(
|
|
zap.Int("pos", l.GetPos()),
|
|
zap.Int("width", l.GetWidth()),
|
|
).Named("emit")
|
|
|
|
L.Debug("emitting item",
|
|
zap.String("item", i.String()),
|
|
)
|
|
l.items <- i
|
|
*/
|
|
}
|
|
|
|
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
|
L := l.L.Named("errorf")
|
|
errorItem := Lexeme{
|
|
LexError,
|
|
fmt.Sprintf(format, args...),
|
|
}
|
|
L.Debug("emitting errorItem",
|
|
zap.String("error", errorItem.String()),
|
|
)
|
|
|
|
l.Items = append(l.Items, errorItem)
|
|
return nil
|
|
}
|
|
|
|
func (l *Lexer) next() rune {
|
|
var r rune
|
|
L := l.L.Named("next")
|
|
if l.GetPos() >= len(l.input) {
|
|
L.Debug("end of input reached")
|
|
l.SetWidth(0)
|
|
return EOF
|
|
}
|
|
r, width := utf8.DecodeRuneInString(l.input[l.GetPos():])
|
|
L.Debug("found rune",
|
|
zap.String("rune", string(r)),
|
|
zap.Int("width", width),
|
|
)
|
|
l.SetWidth(width)
|
|
l.SetPos(l.GetPos() + l.GetWidth())
|
|
return r
|
|
}
|
|
|
|
func (l *Lexer) run() {
|
|
for state := lexText; state != nil; {
|
|
state = state(l)
|
|
}
|
|
/* original concurrent implementation
|
|
defer l.chanMutex.Unlock()
|
|
for state := lexText; state != nil; {
|
|
state = state(l)
|
|
}
|
|
l.chanMutex.Lock()
|
|
close(l.items)
|
|
*/
|
|
}
|
|
|
|
func (l *Lexer) GetPos() int {
|
|
defer l.posMutex.Unlock()
|
|
l.posMutex.Lock()
|
|
l.L.Named("GetPos").Debug("getting current position",
|
|
zap.Int("old", l.pos),
|
|
)
|
|
return l.pos
|
|
}
|
|
|
|
func (l *Lexer) SetPos(pos int) {
|
|
defer l.posMutex.Unlock()
|
|
l.posMutex.Lock()
|
|
l.L.Named("SetPos").Debug("setting new position",
|
|
zap.Int("new", pos),
|
|
zap.Int("old", l.pos),
|
|
)
|
|
l.pos = pos
|
|
}
|
|
|
|
func (l *Lexer) GetWidth() int {
|
|
defer l.widthMutex.Unlock()
|
|
l.widthMutex.Lock()
|
|
l.L.Named("GetWidth").Debug("setting new width",
|
|
zap.Int("old", l.width),
|
|
)
|
|
return l.width
|
|
}
|
|
|
|
func (l *Lexer) SetWidth(width int) {
|
|
defer l.widthMutex.Unlock()
|
|
l.widthMutex.Lock()
|
|
l.L.Named("SetWidth").Debug("setting new width",
|
|
zap.Int("new", width),
|
|
zap.Int("old", l.width),
|
|
)
|
|
l.width = width
|
|
}
|
|
|
|
func (l *Lexer) GetStart() int {
|
|
defer l.startMutex.Unlock()
|
|
l.startMutex.Lock()
|
|
l.L.Named("GetStart").Debug("getting old start",
|
|
zap.Int("old", l.start),
|
|
)
|
|
return l.start
|
|
}
|
|
|
|
func (l *Lexer) SetStart(start int) {
|
|
defer l.startMutex.Unlock()
|
|
l.startMutex.Lock()
|
|
l.L.Named("SetStart").Debug("setting new start",
|
|
zap.Int("new", start),
|
|
zap.Int("old", l.start),
|
|
)
|
|
l.start = start
|
|
}
|
|
|
|
type stateFn func(*Lexer) stateFn
|
|
|
|
type LexemeType int
|
|
|
|
type Lexeme struct {
|
|
Typ LexemeType
|
|
Val string
|
|
}
|
|
|
|
func (i Lexeme) String() string {
|
|
switch i.Typ {
|
|
|
|
case LexEOF:
|
|
return "EOF"
|
|
case LexError:
|
|
return i.Val
|
|
}
|
|
|
|
if len(i.Val) > 10 {
|
|
// return fmt.Sprintf("%s:%.10q...", i.Typ, i.Val)
|
|
return fmt.Sprintf("%s:%q", i.Typ, i.Val)
|
|
}
|
|
return fmt.Sprintf("%s:%q", i.Typ, i.Val)
|
|
}
|