diff --git a/cmd/demo/main.go b/cmd/demo/main.go new file mode 100644 index 0000000..1e4f13f --- /dev/null +++ b/cmd/demo/main.go @@ -0,0 +1,15 @@ +package main + +import ( + "log" + + "code.ndumas.com/ndumas/wikilink-parser" +) + +func main() { + l := wikilink.Lex("debugLexer", `[[#heading]]`) + for _, item := range l.Items { + item := item + log.Printf("%#+v\n", item) + } +} diff --git a/itemtype_string.go b/itemtype_string.go deleted file mode 100644 index 26df3bb..0000000 --- a/itemtype_string.go +++ /dev/null @@ -1,31 +0,0 @@ -// Code generated by "stringer -type=ItemType"; DO NOT EDIT. - -package wikilink - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[ItemError-0] - _ = x[ItemEOF-1] - _ = x[ItemIdent-2] - _ = x[ItemOpenLink-3] - _ = x[ItemCloseLink-4] - _ = x[ItemHeading-5] - _ = x[ItemBlockRef-6] - _ = x[ItemAlias-7] - _ = x[ItemText-8] -} - -const _ItemType_name = "ItemErrorItemEOFItemIdentItemOpenLinkItemCloseLinkItemHeadingItemBlockRefItemAliasItemText" - -var _ItemType_index = [...]uint8{0, 9, 16, 25, 37, 50, 61, 73, 82, 90} - -func (i ItemType) String() string { - if i < 0 || i >= ItemType(len(_ItemType_index)-1) { - return "ItemType(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _ItemType_name[_ItemType_index[i]:_ItemType_index[i+1]] -} diff --git a/lexemetype_string.go b/lexemetype_string.go new file mode 100644 index 0000000..5c60840 --- /dev/null +++ b/lexemetype_string.go @@ -0,0 +1,31 @@ +// Code generated by "stringer -type=LexemeType"; DO NOT EDIT. + +package wikilink + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[LexError-0] + _ = x[LexEOF-1] + _ = x[LexIdent-2] + _ = x[LexOpenLink-3] + _ = x[LexCloseLink-4] + _ = x[LexHeading-5] + _ = x[LexBlockRef-6] + _ = x[LexAlias-7] + _ = x[LexText-8] +} + +const _LexemeType_name = "LexErrorLexEOFLexIdentLexOpenLinkLexCloseLinkLexHeadingLexBlockRefLexAliasLexText" + +var _LexemeType_index = [...]uint8{0, 8, 14, 22, 33, 45, 55, 66, 74, 81} + +func (i LexemeType) String() string { + if i < 0 || i >= LexemeType(len(_LexemeType_index)-1) { + return "LexemeType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _LexemeType_name[_LexemeType_index[i]:_LexemeType_index[i+1]] +} diff --git a/lexer.go b/lexer.go index f4bda94..7242f2c 100644 --- a/lexer.go +++ b/lexer.go @@ -1,4 +1,4 @@ -//go:generate stringer -type=ItemType +//go:generate stringer -type=LexemeType package wikilink import ( @@ -12,15 +12,15 @@ import ( ) const ( - ItemError ItemType = iota - ItemEOF - ItemIdent - ItemOpenLink - ItemCloseLink - ItemHeading - ItemBlockRef - ItemAlias - ItemText + LexError LexemeType = iota + LexEOF + LexIdent + LexOpenLink + LexCloseLink + LexHeading + LexBlockRef + LexAlias + LexText ) const ( @@ -41,13 +41,15 @@ func Lex(name, input string) *Lexer { name: name, input: input, state: lexText, - items: make(chan Item, 2), + Items: make([]Lexeme, 0), } - go l.run() + // go l.run() + l.run() return l } +/* shouldn't need this in non-concurrent implementation func (l *Lexer) NextItem() Item { for { select { @@ -66,6 +68,7 @@ func (l *Lexer) NextItem() Item { } } } +*/ func (l *Lexer) ignore() { l.SetStart(l.pos) @@ -76,15 +79,18 @@ func (l *Lexer) backup() { } type Lexer struct { - L *zap.SugaredLogger - name, input string - start, pos, width int - state stateFn - items chan Item + L *zap.SugaredLogger + name, input string + start, pos, width int + state stateFn + // Items chan Item + Items []Lexeme widthMutex, startMutex, posMutex, chanMutex sync.Mutex } func (l *Lexer) peek() rune { + L := l.L.Named("peek") + L.Debug("peeking") r := l.next() l.backup() @@ -92,22 +98,39 @@ func (l *Lexer) peek() rune { } func (l *Lexer) accept(valid string) bool { + L := l.L.Named("accept").With( + zap.String("input", valid), + ) if strings.ContainsRune(valid, l.next()) { + L.Debug("matched input") return true } + L.Debug("rejected input") l.backup() return false } func (l *Lexer) acceptRun(valid string) { + L := l.L.Named("acceptRun").With( + zap.String("input", valid), + ) + L.Debug("scanning") for strings.ContainsRune(valid, l.next()) { } l.backup() } -func (l *Lexer) emit(t ItemType) { +func (l *Lexer) emit(t LexemeType) { + i := Lexeme{t, l.input[l.GetStart():l.GetPos()]} + L := l.L.Named("emit").With( + zap.String("item", i.String()), + ) + L.Debug("emitting lexeme") + l.Items = append(l.Items, i) + l.SetStart(l.GetPos()) + /* original concurrent implementation defer l.chanMutex.Unlock() l.chanMutex.Lock() i := Item{t, l.input[l.GetStart():l.GetPos()]} @@ -120,95 +143,127 @@ func (l *Lexer) emit(t ItemType) { zap.String("item", i.String()), ) l.items <- i - l.SetStart(l.GetPos()) + */ } func (l *Lexer) errorf(format string, args ...interface{}) stateFn { L := l.L.Named("errorf") - errorItem := Item{ - ItemError, + errorItem := Lexeme{ + LexError, fmt.Sprintf(format, args...), } L.Debugw("emitting errorItem", zap.String("error", errorItem.String()), ) - l.items <- errorItem + l.Items = append(l.Items, errorItem) return nil } func (l *Lexer) next() rune { var r rune + L := l.L.Named("next") if l.GetPos() >= len(l.input) { + L.Debug("end of input reached") l.SetWidth(0) return EOF } r, width := utf8.DecodeRuneInString(l.input[l.GetPos():]) + L.Debugw("found rune", + zap.String("rune", string(r)), + zap.Int("width", width), + ) l.SetWidth(width) l.SetPos(l.GetPos() + l.GetWidth()) return r } func (l *Lexer) run() { + for state := lexText; state != nil; { + state = state(l) + } + /* original concurrent implementation defer l.chanMutex.Unlock() for state := lexText; state != nil; { state = state(l) } l.chanMutex.Lock() close(l.items) + */ } func (l *Lexer) GetPos() int { defer l.posMutex.Unlock() l.posMutex.Lock() + l.L.Named("GetPos").Debugw("getting current position", + zap.Int("old", l.pos), + ) return l.pos } func (l *Lexer) SetPos(pos int) { defer l.posMutex.Unlock() l.posMutex.Lock() + l.L.Named("SetPos").Debugw("setting new position", + zap.Int("new", pos), + zap.Int("old", l.pos), + ) l.pos = pos } func (l *Lexer) GetWidth() int { defer l.widthMutex.Unlock() l.widthMutex.Lock() + l.L.Named("GetWidth").Debugw("setting new width", + zap.Int("old", l.width), + ) return l.width } func (l *Lexer) SetWidth(width int) { defer l.widthMutex.Unlock() l.widthMutex.Lock() + l.L.Named("SetWidth").Debugw("setting new width", + zap.Int("new", width), + zap.Int("old", l.width), + ) l.width = width } func (l *Lexer) GetStart() int { defer l.startMutex.Unlock() l.startMutex.Lock() + l.L.Named("GetStart").Debugw("getting old start", + zap.Int("old", l.start), + ) return l.start } func (l *Lexer) SetStart(start int) { defer l.startMutex.Unlock() l.startMutex.Lock() + l.L.Named("SetStart").Debugw("setting new start", + zap.Int("new", start), + zap.Int("old", l.start), + ) l.start = start } type stateFn func(*Lexer) stateFn -type ItemType int +type LexemeType int -type Item struct { - Typ ItemType +type Lexeme struct { + Typ LexemeType Val string } -func (i Item) String() string { +func (i Lexeme) String() string { switch i.Typ { - case ItemEOF: + case LexEOF: return "EOF" - case ItemError: + case LexError: return i.Val } diff --git a/lexer_test.go b/lexer_test.go index ad7042a..2bc3225 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -11,68 +11,75 @@ func Test_Lexer(t *testing.T) { tcs := []struct { name string in string - expected []wikilink.Item + expected []wikilink.Lexeme }{ { - name: "wikilink", in: "[[wikilink]]", expected: []wikilink.Item{ - {Typ: wikilink.ItemOpenLink, Val: "[["}, - {Typ: wikilink.ItemIdent, Val: "wikilink"}, - {Typ: wikilink.ItemCloseLink, Val: "]]"}, + name: "wikilink", in: "[[wikilink]]", expected: []wikilink.Lexeme{ + {Typ: wikilink.LexOpenLink, Val: "[["}, + {Typ: wikilink.LexIdent, Val: "wikilink"}, + {Typ: wikilink.LexCloseLink, Val: "]]"}, + {Typ: wikilink.LexText, Val: ""}, }, }, { - name: "wikilink|display name", in: "[[wikilink|display name]]", expected: []wikilink.Item{ - {Typ: wikilink.ItemOpenLink, Val: "[["}, - {Typ: wikilink.ItemIdent, Val: "wikilink"}, - {Typ: wikilink.ItemAlias, Val: "|"}, - {Typ: wikilink.ItemIdent, Val: "display name"}, - {Typ: wikilink.ItemCloseLink, Val: "]]"}, + name: "wikilink|display name", in: "[[wikilink|display name]]", expected: []wikilink.Lexeme{ + {Typ: wikilink.LexOpenLink, Val: "[["}, + {Typ: wikilink.LexIdent, Val: "wikilink"}, + {Typ: wikilink.LexAlias, Val: "|"}, + {Typ: wikilink.LexIdent, Val: "display name"}, + {Typ: wikilink.LexCloseLink, Val: "]]"}, + {Typ: wikilink.LexText, Val: ""}, }, }, { - name: "wikilink|display name|second pipe", in: "[[wikilink|display name|second pipe]]", expected: []wikilink.Item{ - {Typ: wikilink.ItemOpenLink, Val: "[["}, - {Typ: wikilink.ItemIdent, Val: "wikilink"}, - {Typ: wikilink.ItemAlias, Val: "|"}, - {Typ: wikilink.ItemIdent, Val: "display name"}, - {Typ: wikilink.ItemAlias, Val: "|"}, - {Typ: wikilink.ItemIdent, Val: "second pipe"}, - {Typ: wikilink.ItemCloseLink, Val: "]]"}, + name: "wikilink|display name|second pipe", in: "[[wikilink|display name|second pipe]]", expected: []wikilink.Lexeme{ + {Typ: wikilink.LexOpenLink, Val: "[["}, + {Typ: wikilink.LexIdent, Val: "wikilink"}, + {Typ: wikilink.LexAlias, Val: "|"}, + {Typ: wikilink.LexIdent, Val: "display name"}, + {Typ: wikilink.LexAlias, Val: "|"}, + {Typ: wikilink.LexIdent, Val: "second pipe"}, + {Typ: wikilink.LexCloseLink, Val: "]]"}, + {Typ: wikilink.LexText, Val: ""}, }, }, { - name: "wikilink with numeric alias|420|second pipe", in: "[[wikilink|420|second pipe]]", expected: []wikilink.Item{ - {Typ: wikilink.ItemOpenLink, Val: "[["}, - {Typ: wikilink.ItemIdent, Val: "wikilink"}, - {Typ: wikilink.ItemAlias, Val: "|"}, - {Typ: wikilink.ItemIdent, Val: "420"}, - {Typ: wikilink.ItemAlias, Val: "|"}, - {Typ: wikilink.ItemIdent, Val: "second pipe"}, - {Typ: wikilink.ItemCloseLink, Val: "]]"}, + name: "wikilink with numeric alias|420|second pipe", in: "[[wikilink|420|second pipe]]", expected: []wikilink.Lexeme{ + {Typ: wikilink.LexOpenLink, Val: "[["}, + {Typ: wikilink.LexIdent, Val: "wikilink"}, + {Typ: wikilink.LexAlias, Val: "|"}, + {Typ: wikilink.LexIdent, Val: "420"}, + {Typ: wikilink.LexAlias, Val: "|"}, + {Typ: wikilink.LexIdent, Val: "second pipe"}, + {Typ: wikilink.LexCloseLink, Val: "]]"}, + {Typ: wikilink.LexText, Val: ""}, }, }, { - name: "wikilink with spaces in filename", in: "[[wikilink spaces]]", expected: []wikilink.Item{ - {Typ: wikilink.ItemOpenLink, Val: "[["}, - {Typ: wikilink.ItemIdent, Val: "wikilink spaces"}, - {Typ: wikilink.ItemCloseLink, Val: "]]"}, + name: "wikilink with spaces in filename", in: "[[wikilink spaces]]", expected: []wikilink.Lexeme{ + {Typ: wikilink.LexOpenLink, Val: "[["}, + {Typ: wikilink.LexIdent, Val: "wikilink spaces"}, + {Typ: wikilink.LexCloseLink, Val: "]]"}, + {Typ: wikilink.LexText, Val: ""}, }, }, { - name: "#heading", in: "[[#heading]]", expected: []wikilink.Item{ - {Typ: wikilink.ItemOpenLink, Val: "[["}, - {Typ: wikilink.ItemHeading, Val: "#"}, - {Typ: wikilink.ItemIdent, Val: "heading"}, - {Typ: wikilink.ItemCloseLink, Val: "]]"}, + name: "#heading", in: "[[#heading]]", expected: []wikilink.Lexeme{ + {Typ: wikilink.LexOpenLink, Val: "[["}, + {Typ: wikilink.LexHeading, Val: "#"}, + {Typ: wikilink.LexIdent, Val: "heading"}, + {Typ: wikilink.LexCloseLink, Val: "]]"}, + {Typ: wikilink.LexText, Val: ""}, }, }, { - name: "wikilink#heading", in: "[[wikilink#heading]]", expected: []wikilink.Item{ - {Typ: wikilink.ItemOpenLink, Val: "[["}, - {Typ: wikilink.ItemIdent, Val: "wikilink"}, - {Typ: wikilink.ItemHeading, Val: "#"}, - {Typ: wikilink.ItemIdent, Val: "heading"}, - {Typ: wikilink.ItemCloseLink, Val: "]]"}, + name: "wikilink#heading", in: "[[wikilink#heading]]", expected: []wikilink.Lexeme{ + {Typ: wikilink.LexOpenLink, Val: "[["}, + {Typ: wikilink.LexIdent, Val: "wikilink"}, + {Typ: wikilink.LexHeading, Val: "#"}, + {Typ: wikilink.LexIdent, Val: "heading"}, + {Typ: wikilink.LexCloseLink, Val: "]]"}, + {Typ: wikilink.LexText, Val: ""}, }, }, /* @@ -94,8 +101,13 @@ func Test_Lexer(t *testing.T) { // t.Parallel() l := wikilink.Lex("testLexer", tc.in) defer l.L.Sync() - for _, e := range tc.expected { - n := l.NextItem() + if len(tc.expected) != len(l.Items) { + t.Logf("expected %d tokens, got %d\n", len(tc.expected), len(l.Items)) + t.Fail() + return + } + for i, e := range tc.expected { + n := l.Items[i] if e.Typ != n.Typ { t.Logf("expected Type %s, received %s", e.Typ.String(), n.Typ.String()) t.Fail() diff --git a/states.go b/states.go index f76f686..d52ad4e 100644 --- a/states.go +++ b/states.go @@ -28,26 +28,34 @@ func isBlockRef(s string) bool { func lexIdent(l *Lexer) stateFn { for { + L := l.L.Named("lexIdent") + if isCloseLink(l.input[l.GetPos():]) { + L.Debug("found CloseLink") + l.emit(LexIdent) + return lexCloseLink + } + s := l.input[l.GetPos():] r := l.next() - l.L.Named("lexIdent").Debug("stepping through lexIdent", - zap.String("r", string(r)), + L = l.L.With( + zap.String("rune", string(r)), ) - s := l.input[l.GetPos():] + L.Debug("stepping through lexIdent") if r == '\\' { // i think this will handle escape characters? break } + switch { case isBlockRef(s): - l.emit(ItemIdent) + L.Debug("found BlockRef") + l.emit(LexIdent) return lexBlockRef case isAlias(s): - l.emit(ItemIdent) + L.Debug("found Alias") + l.emit(LexIdent) return lexAlias - case isCloseLink(s): - l.emit(ItemIdent) - return lexCloseLink case isHeading(s): - l.emit(ItemIdent) + L.Debug("found Heading") + l.emit(LexIdent) return lexHeading } @@ -57,34 +65,36 @@ func lexIdent(l *Lexer) stateFn { func lexHeading(l *Lexer) stateFn { l.SetPos(l.GetPos() + len(Heading)) - l.emit(ItemHeading) + l.emit(LexHeading) return lexIdent } func lexBlockRef(l *Lexer) stateFn { l.SetPos(l.GetPos() + len(BlockRef)) - l.emit(ItemBlockRef) + l.emit(LexBlockRef) return lexIdent } func lexAlias(l *Lexer) stateFn { l.SetPos(l.GetPos() + len(Alias)) - l.emit(ItemAlias) + l.emit(LexAlias) return lexIdent } func lexText(l *Lexer) stateFn { + L := l.L.Named("lexText") for { if isOpenLink(l.input[l.GetPos():]) { + L.Debug("found openLink") return lexOpenLink } r := l.next() switch { case r == EOF || r == '\n': - l.emit(ItemText) + l.emit(LexText) return nil } } @@ -92,14 +102,14 @@ func lexText(l *Lexer) stateFn { func lexOpenLink(l *Lexer) stateFn { l.SetPos(l.GetPos() + len(OpenLink)) - l.emit(ItemOpenLink) + l.emit(LexOpenLink) return lexIdent } func lexCloseLink(l *Lexer) stateFn { l.SetPos(l.GetPos() + len(CloseLink)) - l.emit(ItemCloseLink) + l.emit(LexCloseLink) return lexText }