non-concurrent, test cases back to expected state

main
Nick Dumas 1 year ago
parent b9e1bab075
commit 0c7b00e9b5

@ -0,0 +1,15 @@
package main
import (
"log"
"code.ndumas.com/ndumas/wikilink-parser"
)
func main() {
l := wikilink.Lex("debugLexer", `[[#heading]]`)
for _, item := range l.Items {
item := item
log.Printf("%#+v\n", item)
}
}

@ -1,31 +0,0 @@
// Code generated by "stringer -type=ItemType"; DO NOT EDIT.
package wikilink
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[ItemError-0]
_ = x[ItemEOF-1]
_ = x[ItemIdent-2]
_ = x[ItemOpenLink-3]
_ = x[ItemCloseLink-4]
_ = x[ItemHeading-5]
_ = x[ItemBlockRef-6]
_ = x[ItemAlias-7]
_ = x[ItemText-8]
}
const _ItemType_name = "ItemErrorItemEOFItemIdentItemOpenLinkItemCloseLinkItemHeadingItemBlockRefItemAliasItemText"
var _ItemType_index = [...]uint8{0, 9, 16, 25, 37, 50, 61, 73, 82, 90}
func (i ItemType) String() string {
if i < 0 || i >= ItemType(len(_ItemType_index)-1) {
return "ItemType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _ItemType_name[_ItemType_index[i]:_ItemType_index[i+1]]
}

@ -0,0 +1,31 @@
// Code generated by "stringer -type=LexemeType"; DO NOT EDIT.
package wikilink
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[LexError-0]
_ = x[LexEOF-1]
_ = x[LexIdent-2]
_ = x[LexOpenLink-3]
_ = x[LexCloseLink-4]
_ = x[LexHeading-5]
_ = x[LexBlockRef-6]
_ = x[LexAlias-7]
_ = x[LexText-8]
}
const _LexemeType_name = "LexErrorLexEOFLexIdentLexOpenLinkLexCloseLinkLexHeadingLexBlockRefLexAliasLexText"
var _LexemeType_index = [...]uint8{0, 8, 14, 22, 33, 45, 55, 66, 74, 81}
func (i LexemeType) String() string {
if i < 0 || i >= LexemeType(len(_LexemeType_index)-1) {
return "LexemeType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _LexemeType_name[_LexemeType_index[i]:_LexemeType_index[i+1]]
}

@ -1,4 +1,4 @@
//go:generate stringer -type=ItemType //go:generate stringer -type=LexemeType
package wikilink package wikilink
import ( import (
@ -12,15 +12,15 @@ import (
) )
const ( const (
ItemError ItemType = iota LexError LexemeType = iota
ItemEOF LexEOF
ItemIdent LexIdent
ItemOpenLink LexOpenLink
ItemCloseLink LexCloseLink
ItemHeading LexHeading
ItemBlockRef LexBlockRef
ItemAlias LexAlias
ItemText LexText
) )
const ( const (
@ -41,13 +41,15 @@ func Lex(name, input string) *Lexer {
name: name, name: name,
input: input, input: input,
state: lexText, state: lexText,
items: make(chan Item, 2), Items: make([]Lexeme, 0),
} }
go l.run() // go l.run()
l.run()
return l return l
} }
/* shouldn't need this in non-concurrent implementation
func (l *Lexer) NextItem() Item { func (l *Lexer) NextItem() Item {
for { for {
select { select {
@ -66,6 +68,7 @@ func (l *Lexer) NextItem() Item {
} }
} }
} }
*/
func (l *Lexer) ignore() { func (l *Lexer) ignore() {
l.SetStart(l.pos) l.SetStart(l.pos)
@ -80,11 +83,14 @@ type Lexer struct {
name, input string name, input string
start, pos, width int start, pos, width int
state stateFn state stateFn
items chan Item // Items chan Item
Items []Lexeme
widthMutex, startMutex, posMutex, chanMutex sync.Mutex widthMutex, startMutex, posMutex, chanMutex sync.Mutex
} }
func (l *Lexer) peek() rune { func (l *Lexer) peek() rune {
L := l.L.Named("peek")
L.Debug("peeking")
r := l.next() r := l.next()
l.backup() l.backup()
@ -92,22 +98,39 @@ func (l *Lexer) peek() rune {
} }
func (l *Lexer) accept(valid string) bool { func (l *Lexer) accept(valid string) bool {
L := l.L.Named("accept").With(
zap.String("input", valid),
)
if strings.ContainsRune(valid, l.next()) { if strings.ContainsRune(valid, l.next()) {
L.Debug("matched input")
return true return true
} }
L.Debug("rejected input")
l.backup() l.backup()
return false return false
} }
func (l *Lexer) acceptRun(valid string) { func (l *Lexer) acceptRun(valid string) {
L := l.L.Named("acceptRun").With(
zap.String("input", valid),
)
L.Debug("scanning")
for strings.ContainsRune(valid, l.next()) { for strings.ContainsRune(valid, l.next()) {
} }
l.backup() l.backup()
} }
func (l *Lexer) emit(t ItemType) { func (l *Lexer) emit(t LexemeType) {
i := Lexeme{t, l.input[l.GetStart():l.GetPos()]}
L := l.L.Named("emit").With(
zap.String("item", i.String()),
)
L.Debug("emitting lexeme")
l.Items = append(l.Items, i)
l.SetStart(l.GetPos())
/* original concurrent implementation
defer l.chanMutex.Unlock() defer l.chanMutex.Unlock()
l.chanMutex.Lock() l.chanMutex.Lock()
i := Item{t, l.input[l.GetStart():l.GetPos()]} i := Item{t, l.input[l.GetStart():l.GetPos()]}
@ -120,95 +143,127 @@ func (l *Lexer) emit(t ItemType) {
zap.String("item", i.String()), zap.String("item", i.String()),
) )
l.items <- i l.items <- i
l.SetStart(l.GetPos()) */
} }
func (l *Lexer) errorf(format string, args ...interface{}) stateFn { func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
L := l.L.Named("errorf") L := l.L.Named("errorf")
errorItem := Item{ errorItem := Lexeme{
ItemError, LexError,
fmt.Sprintf(format, args...), fmt.Sprintf(format, args...),
} }
L.Debugw("emitting errorItem", L.Debugw("emitting errorItem",
zap.String("error", errorItem.String()), zap.String("error", errorItem.String()),
) )
l.items <- errorItem l.Items = append(l.Items, errorItem)
return nil return nil
} }
func (l *Lexer) next() rune { func (l *Lexer) next() rune {
var r rune var r rune
L := l.L.Named("next")
if l.GetPos() >= len(l.input) { if l.GetPos() >= len(l.input) {
L.Debug("end of input reached")
l.SetWidth(0) l.SetWidth(0)
return EOF return EOF
} }
r, width := utf8.DecodeRuneInString(l.input[l.GetPos():]) r, width := utf8.DecodeRuneInString(l.input[l.GetPos():])
L.Debugw("found rune",
zap.String("rune", string(r)),
zap.Int("width", width),
)
l.SetWidth(width) l.SetWidth(width)
l.SetPos(l.GetPos() + l.GetWidth()) l.SetPos(l.GetPos() + l.GetWidth())
return r return r
} }
func (l *Lexer) run() { func (l *Lexer) run() {
for state := lexText; state != nil; {
state = state(l)
}
/* original concurrent implementation
defer l.chanMutex.Unlock() defer l.chanMutex.Unlock()
for state := lexText; state != nil; { for state := lexText; state != nil; {
state = state(l) state = state(l)
} }
l.chanMutex.Lock() l.chanMutex.Lock()
close(l.items) close(l.items)
*/
} }
func (l *Lexer) GetPos() int { func (l *Lexer) GetPos() int {
defer l.posMutex.Unlock() defer l.posMutex.Unlock()
l.posMutex.Lock() l.posMutex.Lock()
l.L.Named("GetPos").Debugw("getting current position",
zap.Int("old", l.pos),
)
return l.pos return l.pos
} }
func (l *Lexer) SetPos(pos int) { func (l *Lexer) SetPos(pos int) {
defer l.posMutex.Unlock() defer l.posMutex.Unlock()
l.posMutex.Lock() l.posMutex.Lock()
l.L.Named("SetPos").Debugw("setting new position",
zap.Int("new", pos),
zap.Int("old", l.pos),
)
l.pos = pos l.pos = pos
} }
func (l *Lexer) GetWidth() int { func (l *Lexer) GetWidth() int {
defer l.widthMutex.Unlock() defer l.widthMutex.Unlock()
l.widthMutex.Lock() l.widthMutex.Lock()
l.L.Named("GetWidth").Debugw("setting new width",
zap.Int("old", l.width),
)
return l.width return l.width
} }
func (l *Lexer) SetWidth(width int) { func (l *Lexer) SetWidth(width int) {
defer l.widthMutex.Unlock() defer l.widthMutex.Unlock()
l.widthMutex.Lock() l.widthMutex.Lock()
l.L.Named("SetWidth").Debugw("setting new width",
zap.Int("new", width),
zap.Int("old", l.width),
)
l.width = width l.width = width
} }
func (l *Lexer) GetStart() int { func (l *Lexer) GetStart() int {
defer l.startMutex.Unlock() defer l.startMutex.Unlock()
l.startMutex.Lock() l.startMutex.Lock()
l.L.Named("GetStart").Debugw("getting old start",
zap.Int("old", l.start),
)
return l.start return l.start
} }
func (l *Lexer) SetStart(start int) { func (l *Lexer) SetStart(start int) {
defer l.startMutex.Unlock() defer l.startMutex.Unlock()
l.startMutex.Lock() l.startMutex.Lock()
l.L.Named("SetStart").Debugw("setting new start",
zap.Int("new", start),
zap.Int("old", l.start),
)
l.start = start l.start = start
} }
type stateFn func(*Lexer) stateFn type stateFn func(*Lexer) stateFn
type ItemType int type LexemeType int
type Item struct { type Lexeme struct {
Typ ItemType Typ LexemeType
Val string Val string
} }
func (i Item) String() string { func (i Lexeme) String() string {
switch i.Typ { switch i.Typ {
case ItemEOF: case LexEOF:
return "EOF" return "EOF"
case ItemError: case LexError:
return i.Val return i.Val
} }

@ -11,68 +11,75 @@ func Test_Lexer(t *testing.T) {
tcs := []struct { tcs := []struct {
name string name string
in string in string
expected []wikilink.Item expected []wikilink.Lexeme
}{ }{
{ {
name: "wikilink", in: "[[wikilink]]", expected: []wikilink.Item{ name: "wikilink", in: "[[wikilink]]", expected: []wikilink.Lexeme{
{Typ: wikilink.ItemOpenLink, Val: "[["}, {Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"}, {Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.ItemCloseLink, Val: "]]"}, {Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
}, },
}, },
{ {
name: "wikilink|display name", in: "[[wikilink|display name]]", expected: []wikilink.Item{ name: "wikilink|display name", in: "[[wikilink|display name]]", expected: []wikilink.Lexeme{
{Typ: wikilink.ItemOpenLink, Val: "[["}, {Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"}, {Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.ItemAlias, Val: "|"}, {Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "display name"}, {Typ: wikilink.LexIdent, Val: "display name"},
{Typ: wikilink.ItemCloseLink, Val: "]]"}, {Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
}, },
}, },
{ {
name: "wikilink|display name|second pipe", in: "[[wikilink|display name|second pipe]]", expected: []wikilink.Item{ name: "wikilink|display name|second pipe", in: "[[wikilink|display name|second pipe]]", expected: []wikilink.Lexeme{
{Typ: wikilink.ItemOpenLink, Val: "[["}, {Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"}, {Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.ItemAlias, Val: "|"}, {Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "display name"}, {Typ: wikilink.LexIdent, Val: "display name"},
{Typ: wikilink.ItemAlias, Val: "|"}, {Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "second pipe"}, {Typ: wikilink.LexIdent, Val: "second pipe"},
{Typ: wikilink.ItemCloseLink, Val: "]]"}, {Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
}, },
}, },
{ {
name: "wikilink with numeric alias|420|second pipe", in: "[[wikilink|420|second pipe]]", expected: []wikilink.Item{ name: "wikilink with numeric alias|420|second pipe", in: "[[wikilink|420|second pipe]]", expected: []wikilink.Lexeme{
{Typ: wikilink.ItemOpenLink, Val: "[["}, {Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"}, {Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.ItemAlias, Val: "|"}, {Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "420"}, {Typ: wikilink.LexIdent, Val: "420"},
{Typ: wikilink.ItemAlias, Val: "|"}, {Typ: wikilink.LexAlias, Val: "|"},
{Typ: wikilink.ItemIdent, Val: "second pipe"}, {Typ: wikilink.LexIdent, Val: "second pipe"},
{Typ: wikilink.ItemCloseLink, Val: "]]"}, {Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
}, },
}, },
{ {
name: "wikilink with spaces in filename", in: "[[wikilink spaces]]", expected: []wikilink.Item{ name: "wikilink with spaces in filename", in: "[[wikilink spaces]]", expected: []wikilink.Lexeme{
{Typ: wikilink.ItemOpenLink, Val: "[["}, {Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink spaces"}, {Typ: wikilink.LexIdent, Val: "wikilink spaces"},
{Typ: wikilink.ItemCloseLink, Val: "]]"}, {Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
}, },
}, },
{ {
name: "#heading", in: "[[#heading]]", expected: []wikilink.Item{ name: "#heading", in: "[[#heading]]", expected: []wikilink.Lexeme{
{Typ: wikilink.ItemOpenLink, Val: "[["}, {Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.ItemHeading, Val: "#"}, {Typ: wikilink.LexHeading, Val: "#"},
{Typ: wikilink.ItemIdent, Val: "heading"}, {Typ: wikilink.LexIdent, Val: "heading"},
{Typ: wikilink.ItemCloseLink, Val: "]]"}, {Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
}, },
}, },
{ {
name: "wikilink#heading", in: "[[wikilink#heading]]", expected: []wikilink.Item{ name: "wikilink#heading", in: "[[wikilink#heading]]", expected: []wikilink.Lexeme{
{Typ: wikilink.ItemOpenLink, Val: "[["}, {Typ: wikilink.LexOpenLink, Val: "[["},
{Typ: wikilink.ItemIdent, Val: "wikilink"}, {Typ: wikilink.LexIdent, Val: "wikilink"},
{Typ: wikilink.ItemHeading, Val: "#"}, {Typ: wikilink.LexHeading, Val: "#"},
{Typ: wikilink.ItemIdent, Val: "heading"}, {Typ: wikilink.LexIdent, Val: "heading"},
{Typ: wikilink.ItemCloseLink, Val: "]]"}, {Typ: wikilink.LexCloseLink, Val: "]]"},
{Typ: wikilink.LexText, Val: ""},
}, },
}, },
/* /*
@ -94,8 +101,13 @@ func Test_Lexer(t *testing.T) {
// t.Parallel() // t.Parallel()
l := wikilink.Lex("testLexer", tc.in) l := wikilink.Lex("testLexer", tc.in)
defer l.L.Sync() defer l.L.Sync()
for _, e := range tc.expected { if len(tc.expected) != len(l.Items) {
n := l.NextItem() t.Logf("expected %d tokens, got %d\n", len(tc.expected), len(l.Items))
t.Fail()
return
}
for i, e := range tc.expected {
n := l.Items[i]
if e.Typ != n.Typ { if e.Typ != n.Typ {
t.Logf("expected Type %s, received %s", e.Typ.String(), n.Typ.String()) t.Logf("expected Type %s, received %s", e.Typ.String(), n.Typ.String())
t.Fail() t.Fail()

@ -28,26 +28,34 @@ func isBlockRef(s string) bool {
func lexIdent(l *Lexer) stateFn { func lexIdent(l *Lexer) stateFn {
for { for {
L := l.L.Named("lexIdent")
if isCloseLink(l.input[l.GetPos():]) {
L.Debug("found CloseLink")
l.emit(LexIdent)
return lexCloseLink
}
s := l.input[l.GetPos():]
r := l.next() r := l.next()
l.L.Named("lexIdent").Debug("stepping through lexIdent", L = l.L.With(
zap.String("r", string(r)), zap.String("rune", string(r)),
) )
s := l.input[l.GetPos():] L.Debug("stepping through lexIdent")
if r == '\\' { // i think this will handle escape characters? if r == '\\' { // i think this will handle escape characters?
break break
} }
switch { switch {
case isBlockRef(s): case isBlockRef(s):
l.emit(ItemIdent) L.Debug("found BlockRef")
l.emit(LexIdent)
return lexBlockRef return lexBlockRef
case isAlias(s): case isAlias(s):
l.emit(ItemIdent) L.Debug("found Alias")
l.emit(LexIdent)
return lexAlias return lexAlias
case isCloseLink(s):
l.emit(ItemIdent)
return lexCloseLink
case isHeading(s): case isHeading(s):
l.emit(ItemIdent) L.Debug("found Heading")
l.emit(LexIdent)
return lexHeading return lexHeading
} }
@ -57,34 +65,36 @@ func lexIdent(l *Lexer) stateFn {
func lexHeading(l *Lexer) stateFn { func lexHeading(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(Heading)) l.SetPos(l.GetPos() + len(Heading))
l.emit(ItemHeading) l.emit(LexHeading)
return lexIdent return lexIdent
} }
func lexBlockRef(l *Lexer) stateFn { func lexBlockRef(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(BlockRef)) l.SetPos(l.GetPos() + len(BlockRef))
l.emit(ItemBlockRef) l.emit(LexBlockRef)
return lexIdent return lexIdent
} }
func lexAlias(l *Lexer) stateFn { func lexAlias(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(Alias)) l.SetPos(l.GetPos() + len(Alias))
l.emit(ItemAlias) l.emit(LexAlias)
return lexIdent return lexIdent
} }
func lexText(l *Lexer) stateFn { func lexText(l *Lexer) stateFn {
L := l.L.Named("lexText")
for { for {
if isOpenLink(l.input[l.GetPos():]) { if isOpenLink(l.input[l.GetPos():]) {
L.Debug("found openLink")
return lexOpenLink return lexOpenLink
} }
r := l.next() r := l.next()
switch { switch {
case r == EOF || r == '\n': case r == EOF || r == '\n':
l.emit(ItemText) l.emit(LexText)
return nil return nil
} }
} }
@ -92,14 +102,14 @@ func lexText(l *Lexer) stateFn {
func lexOpenLink(l *Lexer) stateFn { func lexOpenLink(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(OpenLink)) l.SetPos(l.GetPos() + len(OpenLink))
l.emit(ItemOpenLink) l.emit(LexOpenLink)
return lexIdent return lexIdent
} }
func lexCloseLink(l *Lexer) stateFn { func lexCloseLink(l *Lexer) stateFn {
l.SetPos(l.GetPos() + len(CloseLink)) l.SetPos(l.GetPos() + len(CloseLink))
l.emit(ItemCloseLink) l.emit(LexCloseLink)
return lexText return lexText
} }

Loading…
Cancel
Save