non-concurrent, test cases back to expected state

main
Nick Dumas 1 year ago
parent b9e1bab075
commit 0c7b00e9b5

@ -0,0 +1,15 @@
package main
import (
"log"
"code.ndumas.com/ndumas/wikilink-parser"
)
func main() {
l := wikilink.Lex("debugLexer", `[[#heading]]`)
for _, item := range l.Items {
item := item
log.Printf("%#+v\n", item)
}
}

@ -1,31 +0,0 @@
// Code generated by "stringer -type=ItemType"; DO NOT EDIT.
package wikilink
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[ItemError-0]
_ = x[ItemEOF-1]
_ = x[ItemIdent-2]
_ = x[ItemOpenLink-3]
_ = x[ItemCloseLink-4]
_ = x[ItemHeading-5]
_ = x[ItemBlockRef-6]
_ = x[ItemAlias-7]
_ = x[ItemText-8]
}
const _ItemType_name = "ItemErrorItemEOFItemIdentItemOpenLinkItemCloseLinkItemHeadingItemBlockRefItemAliasItemText"
var _ItemType_index = [...]uint8{0, 9, 16, 25, 37, 50, 61, 73, 82, 90}
func (i ItemType) String() string {
if i < 0 || i >= ItemType(len(_ItemType_index)-1) {
return "ItemType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _ItemType_name[_ItemType_index[i]:_ItemType_index[i+1]]
}

@ -0,0 +1,31 @@
// Code generated by "stringer -type=LexemeType"; DO NOT EDIT.
package wikilink
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[LexError-0]
_ = x[LexEOF-1]
_ = x[LexIdent-2]
_ = x[LexOpenLink-3]
_ = x[LexCloseLink-4]
_ = x[LexHeading-5]
_ = x[LexBlockRef-6]
_ = x[LexAlias-7]
_ = x[LexText-8]
}
const _LexemeType_name = "LexErrorLexEOFLexIdentLexOpenLinkLexCloseLinkLexHeadingLexBlockRefLexAliasLexText"
var _LexemeType_index = [...]uint8{0, 8, 14, 22, 33, 45, 55, 66, 74, 81}
func (i LexemeType) String() string {
if i < 0 || i >= LexemeType(len(_LexemeType_index)-1) {
return "LexemeType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _LexemeType_name[_LexemeType_index[i]:_LexemeType_index[i+1]]
}

@ -1,4 +1,4 @@
//go:generate stringer -type=ItemType
//go:generate stringer -type=LexemeType
package wikilink
import (
@ -12,15 +12,15 @@ import (
)
const (
ItemError ItemType = iota
ItemEOF
ItemIdent
ItemOpenLink
ItemCloseLink
ItemHeading
ItemBlockRef
ItemAlias
ItemText
LexError LexemeType = iota
LexEOF
LexIdent
LexOpenLink
LexCloseLink
LexHeading
LexBlockRef
LexAlias
LexText
)
const (
@ -41,13 +41,15 @@ func Lex(name, input string) *Lexer {
name: name,
input: input,
state: lexText,
items: make(chan Item, 2),
Items: make([]Lexeme, 0),
}
go l.run()
// go l.run()
l.run()
return l
}
/* shouldn't need this in non-concurrent implementation
func (l *Lexer) NextItem() Item {
for {
select {
@ -66,6 +68,7 @@ func (l *Lexer) NextItem() Item {
}
}
}
*/
func (l *Lexer) ignore() {
l.SetStart(l.pos)
@ -76,15 +79,18 @@ func (l *Lexer) backup() {
}
type Lexer struct {
L *zap.SugaredLogger
name, input string
start, pos, width int
state stateFn
items chan Item
L *zap.SugaredLogger
name, input string
start, pos, width int
state stateFn
// Items chan Item
Items []Lexeme
widthMutex, startMutex, posMutex, chanMutex sync.Mutex
}
func (l *Lexer) peek() rune {
L := l.L.Named("peek")
L.Debug("peeking")
r := l.next()
l.backup()
@ -92,22 +98,39 @@ func (l *Lexer) peek() rune {
}
func (l *Lexer) accept(valid string) bool {
L := l.L.Named("accept").With(
zap.String("input", valid),
)
if strings.ContainsRune(valid, l.next()) {
L.Debug("matched input")
return true
}
L.Debug("rejected input")
l.backup()
return false
}
func (l *Lexer) acceptRun(valid string) {
L := l.L.Named("acceptRun").With(
zap.String("input", valid),
)
L.Debug("scanning")
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
func (l *Lexer) emit(t ItemType) {
func (l *Lexer) emit(t LexemeType) {
i := Lexeme{t, l.input[l.GetStart():l.GetPos()]}
L := l.L.Named("emit").With(
zap.String("item", i.String()),
)
L.Debug("emitting lexeme")
l.Items = append(l.Items, i)
l.SetStart(l.GetPos())
/* original concurrent implementation
defer l.chanMutex.Unlock()
l.chanMutex.Lock()
i := Item{t, l.input[l.GetStart():l.GetPos()]}
@ -120,95 +143,127 @@ func (l *Lexer) emit(t ItemType) {
zap.String("item", i.String()),
)
l.items <- i
l.SetStart(l.GetPos())
*/
}
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
L := l.L.Named("errorf")
errorItem := Item{
ItemError,
errorItem := Lexeme{
LexError,
fmt.Sprintf(format, args...),
}
L.Debugw("emitting errorItem",
zap.String("error", errorItem.String()),
)
l.items <- errorItem
l.Items = append(l.Items, errorItem)
return nil
}
func (l *Lexer) next() rune {
var r rune
L := l.L.Named("next")
if l.GetPos() >= len(l.input) {
L.Debug("end of input reached")
l.SetWidth(0)
return EOF
}
r, width := utf8.DecodeRuneInString(l.input[l.GetPos():])
L.Debugw("found rune",
zap.String("rune", string(r)),
zap.Int("width", width),
)
l.SetWidth(width)
l.SetPos(l.GetPos() + l.GetWidth())
return r
}
func (l *Lexer) run() {
for state := lexText; state != nil; {
state = state(l)
}
/* original concurrent implementation
defer l.chanMutex.Unlock()
for state := lexText; state != nil; {
state = state(l)
}
l.chanMutex.Lock()
close(l.items)
*/
}
func (l *Lexer) GetPos() int {
defer l.posMutex.Unlock()
l.posMutex.Lock()
l.L.Named("GetPos").Debugw("getting current position",
zap.Int("old", l.pos),
)
return l.pos
}
func (l *Lexer) SetPos(pos int) {
defer l.posMutex.Unlock()
l.posMutex.Lock()
l.L.Named("SetPos").Debugw("setting new position",
zap.Int("new", pos),
zap.Int("old", l.pos),
)
l.pos = pos
}
func (l *Lexer) GetWidth() int {
defer l.widthMutex.Unlock()
l.widthMutex.Lock()
l.L.Named("GetWidth").Debugw("setting new width",
zap.Int("old", l.width),
)
return l.width
}
func (l *Lexer) SetWidth(width int) {
defer l.widthMutex.Unlock()
l.widthMutex.Lock()
l.L.Named("SetWidth").Debugw("setting new width",
zap.Int("new", width),
zap.Int("old", l.width),
)
l.width = width
}
func (l *Lexer) GetStart() int {
defer l.startMutex.Unlock()
l.startMutex.Lock()
l.L.Named("GetStart").Debugw("getting old start",
zap.Int("old", l.start),
)
return l.start
}
func (l *Lexer) SetStart(start int) {
defer l.startMutex.Unlock()
l.startMutex.Lock()
l.L.Named("SetStart").Debugw("setting new start",
zap.Int("new", start),
zap.Int("old", l.start),
)
l.start = start
}
type stateFn func(*Lexer) stateFn
type ItemType int
type LexemeType int
type Item struct {
Typ ItemType
type Lexeme struct {
Typ LexemeType
Val string
}
func (i Item) String() string {
func (i Lexeme) String() string {
switch i.Typ {
case ItemEOF:
case LexEOF:
return "EOF"
case ItemError:
case LexError:
return i.Val
}

@ -11,68 +11,75 @@ func Test_Lexer(t *testing.T) {
tcs := []struct {
name string
in string
expected []wikilink.Item
expected []wikilink.Lexeme
}{
{
name: "wikilink", in: "[[wikilink]]", expected: []wikilink.Item{
{Typ: wikilink.ItemOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"},
{Typ: wikilink.ItemCloseLink, Val: "]]"},
name: "wikilink", in: "[[wikilink]]", expected: []wikilink.Lexeme{
{Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
},
},
{
name: "wikilink|display name", in: "[[wikilink|display name]]", expected: []wikilink.Item{
{Typ: wikilink.ItemOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"},
{Typ: wikilink.ItemAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "display name"},
{Typ: wikilink.ItemCloseLink, Val: "]]"},
name: "wikilink|display name", in: "[[wikilink|display name]]", expected: []wikilink.Lexeme{
{Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.LexIdent, Val: "display name"},
{Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
},
},
{
name: "wikilink|display name|second pipe", in: "[[wikilink|display name|second pipe]]", expected: []wikilink.Item{
{Typ: wikilink.ItemOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"},
{Typ: wikilink.ItemAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "display name"},
{Typ: wikilink.ItemAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "second pipe"},
{Typ: wikilink.ItemCloseLink, Val: "]]"},
name: "wikilink|display name|second pipe", in: "[[wikilink|display name|second pipe]]", expected: []wikilink.Lexeme{
{Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.LexIdent, Val: "display name"},
{Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.LexIdent, Val: "second pipe"},
{Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
},
},
{
name: "wikilink with numeric alias|420|second pipe", in: "[[wikilink|420|second pipe]]", expected: []wikilink.Item{
{Typ: wikilink.ItemOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"},
{Typ: wikilink.ItemAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "420"},
{Typ: wikilink.ItemAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "second pipe"},
{Typ: wikilink.ItemCloseLink, Val: "]]"},
name: "wikilink with numeric alias|420|second pipe", in: "[[wikilink|420|second pipe]]", expected: []wikilink.Lexeme{
{Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.LexIdent, Val: "420"},
{Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.LexIdent, Val: "second pipe"},
{Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
},
},
{
name: "wikilink with spaces in filename", in: "[[wikilink spaces]]", expected: []wikilink.Item{
{Typ: wikilink.ItemOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink spaces"},
{Typ: wikilink.ItemCloseLink, Val: "]]"},
name: "wikilink with spaces in filename", in: "[[wikilink spaces]]", expected: []wikilink.Lexeme{
{Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.LexIdent, Val: "wikilink spaces"},
{Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
},
},
{
name: "#heading", in: "[[#heading]]", expected: []wikilink.Item{
{Typ: wikilink.ItemOpenLink, Val: "[["},
{Typ: wikilink.ItemHeading, Val: "#"},
{Typ: wikilink.ItemIdent, Val: "heading"},
{Typ: wikilink.ItemCloseLink, Val: "]]"},
name: "#heading", in: "[[#heading]]", expected: []wikilink.Lexeme{
{Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.LexHeading, Val: "#"},
{Typ: wikilink.LexIdent, Val: "heading"},
{Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
},
},
{
name: "wikilink#heading", in: "[[wikilink#heading]]", expected: []wikilink.Item{
{Typ: wikilink.ItemOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"},
{Typ: wikilink.ItemHeading, Val: "#"},
{Typ: wikilink.ItemIdent, Val: "heading"},
{Typ: wikilink.ItemCloseLink, Val: "]]"},
name: "wikilink#heading", in: "[[wikilink#heading]]", expected: []wikilink.Lexeme{
{Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.LexHeading, Val: "#"},
{Typ: wikilink.LexIdent, Val: "heading"},
{Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
},
},
/*
@ -94,8 +101,13 @@ func Test_Lexer(t *testing.T) {
// t.Parallel()
l := wikilink.Lex("testLexer", tc.in)
defer l.L.Sync()
for _, e := range tc.expected {
n := l.NextItem()
if len(tc.expected) != len(l.Items) {
t.Logf("expected %d tokens, got %d\n", len(tc.expected), len(l.Items))
t.Fail()
return
}
for i, e := range tc.expected {
n := l.Items[i]
if e.Typ != n.Typ {
t.Logf("expected Type %s, received %s", e.Typ.String(), n.Typ.String())
t.Fail()

@ -28,26 +28,34 @@ func isBlockRef(s string) bool {
func lexIdent(l *Lexer) stateFn {
for {
L := l.L.Named("lexIdent")
if isCloseLink(l.input[l.GetPos():]) {
L.Debug("found CloseLink")
l.emit(LexIdent)
return lexCloseLink
}
s := l.input[l.GetPos():]
r := l.next()
l.L.Named("lexIdent").Debug("stepping through lexIdent",
zap.String("r", string(r)),
L = l.L.With(
zap.String("rune", string(r)),
)
s := l.input[l.GetPos():]
L.Debug("stepping through lexIdent")
if r == '\\' { // i think this will handle escape characters?
break
}
switch {
case isBlockRef(s):
l.emit(ItemIdent)
L.Debug("found BlockRef")
l.emit(LexIdent)
return lexBlockRef
case isAlias(s):
l.emit(ItemIdent)
L.Debug("found Alias")
l.emit(LexIdent)
return lexAlias
case isCloseLink(s):
l.emit(ItemIdent)
return lexCloseLink
case isHeading(s):
l.emit(ItemIdent)
L.Debug("found Heading")
l.emit(LexIdent)
return lexHeading
}
@ -57,34 +65,36 @@ func lexIdent(l *Lexer) stateFn {
func lexHeading(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(Heading))
l.emit(ItemHeading)
l.emit(LexHeading)
return lexIdent
}
func lexBlockRef(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(BlockRef))
l.emit(ItemBlockRef)
l.emit(LexBlockRef)
return lexIdent
}
func lexAlias(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(Alias))
l.emit(ItemAlias)
l.emit(LexAlias)
return lexIdent
}
func lexText(l *Lexer) stateFn {
L := l.L.Named("lexText")
for {
if isOpenLink(l.input[l.GetPos():]) {
L.Debug("found openLink")
return lexOpenLink
}
r := l.next()
switch {
case r == EOF || r == '\n':
l.emit(ItemText)
l.emit(LexText)
return nil
}
}
@ -92,14 +102,14 @@ func lexText(l *Lexer) stateFn {
func lexOpenLink(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(OpenLink))
l.emit(ItemOpenLink)
l.emit(LexOpenLink)
return lexIdent
}
func lexCloseLink(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(CloseLink))
l.emit(ItemCloseLink)
l.emit(LexCloseLink)
return lexText
}

Loading…
Cancel
Save