From 24b7320e0f3a24be1057db2c14afb726f148606b Mon Sep 17 00:00:00 2001 From: Nick Dumas Date: Sat, 24 Jun 2023 15:10:09 -0400 Subject: [PATCH] testable lexer, in theory --- lexer.go | 181 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 113 insertions(+), 68 deletions(-) diff --git a/lexer.go b/lexer.go index 5db9aa0..112aedb 100644 --- a/lexer.go +++ b/lexer.go @@ -1,200 +1,245 @@ +//go:generate stringer -type=ItemType package wikilink import ( "fmt" + "log" "strings" "unicode/utf8" ) const ( - eof rune = -1 + ItemError ItemType = iota + ItemEOF + ItemText + ItemOpenLink + ItemCloseLink + ItemFragment + ItemAlias ) const ( - openLink = "[[" - closeLink = "]]" - alias = "|" - blockRef = "#^" + EOF rune = 0 ) -func lex(name, input string) *lexer { - l := &lexer{ +const ( + OpenLink = "[[" + CloseLink = "]]" + Alias = "|" + BlockRef = "#^" +) + +func Lex(name, input string) *Lexer { + l := &Lexer{ name: name, input: input, state: lexText, - items: make(chan item, 2), + items: make(chan Item, 2), } go l.run() + return l } -func (l *lexer) nextItem() item { +func (l *Lexer) NextItem() Item { for { select { case item := <-l.items: return item default: + if l.state == nil { + return Item{ + Typ: ItemError, + Val: "state is nil, should not be", + } + } + l.state = l.state(l) } } } -func (l *lexer) ignore() { +func (l *Lexer) ignore() { l.start = l.pos } -func (l *lexer) backup() { +func (l *Lexer) backup() { l.pos -= l.width } -type lexer struct { +type Lexer struct { name, input string start, pos, width int state stateFn - items chan item + items chan Item } -func (l *lexer) peek() rune { +func (l *Lexer) peek() rune { r := l.next() l.backup() + return r } -func (l *lexer) accept(valid string) bool { - if strings.IndexRune(valid, l.next()) >= 0 { +func (l *Lexer) accept(valid string) bool { + if strings.ContainsRune(valid, l.next()) { return true } + l.backup() + return false } -func (l *lexer) acceptRun(valid string) { - for strings.IndexRune(valid, l.next()) >= 0 { +func (l *Lexer) acceptRun(valid string) { + for strings.ContainsRune(valid, l.next()) { } l.backup() } -func (l *lexer) emit(t itemType) { - l.items <- item{t, l.input[l.start:l.pos]} +func (l *Lexer) emit(t ItemType) { + i := Item{t, l.input[l.start:l.pos]} + log.Printf("emitting Item: %#+v\n", i) + l.items <- i l.start = l.pos } -func (l *lexer) errorf(format string, args ...interface{}) stateFn { - l.items <- item{ - itemError, +func (l *Lexer) errorf(format string, args ...interface{}) stateFn { + log.Printf("entering errorf: %q\n", format) + l.items <- Item{ + ItemError, fmt.Sprintf(format, args...), } + return nil } -func lexFragment(l *lexer) stateFn { - return l.errorf("lexFragment not implemented") +func lexFragment(l *Lexer) stateFn { + log.Println("entering lexFragment") + for { + if strings.HasPrefix(l.input[l.pos:], CloseLink) { + return lexCloseLink + } + + if l.peek() == '^' { + l.next() + l.emit(ItemFragment) + l.acceptRun("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -.,") + + return lexInsideLink + } + + return lexInsideLink + } } -func lexAlias(l *lexer) stateFn { - return l.errorf("lexAlias not implemented") +func lexAlias(l *Lexer) stateFn { + // l.accept + log.Println("entering lexAlias") + return lexInsideLink } -func lexInsideLink(l *lexer) stateFn { +func lexInsideLink(l *Lexer) stateFn { + log.Println("entering lexInsideLink") for { - if strings.HasPrefix(l.input[l.pos:], closeLink) { + if strings.HasPrefix(l.input[l.pos:], CloseLink) { return lexCloseLink } r := l.next() switch { - case r == eof: + case r == EOF: case r == '\n': return l.errorf("unclosed link") case r == '#': + l.emit(ItemText) return lexFragment case r == '|': + l.emit(ItemText) return lexAlias + case l.peek() == ']': + l.emit(ItemText) + return lexCloseLink } } } -func lexOpenLink(l *lexer) stateFn { - l.pos += len(openLink) - l.emit(itemOpenLink) +func lexOpenLink(l *Lexer) stateFn { + log.Println("entering lexOpenLink") + l.pos += len(OpenLink) + l.emit(ItemOpenLink) return lexInsideLink } -func lexCloseLink(l *lexer) stateFn { - l.pos += len(closeLink) - l.emit(itemCloseLink) +func lexCloseLink(l *Lexer) stateFn { + log.Println("entering lexCloseLink") + l.pos += len(CloseLink) + l.emit(ItemCloseLink) return lexText } -func lexText(l *lexer) stateFn { +func lexText(l *Lexer) stateFn { + log.Println("entering lexText") + for { - if strings.HasPrefix(l.input[l.pos:], openLink) { + if strings.HasPrefix(l.input[l.pos:], OpenLink) { if l.pos > l.start { - l.emit(itemText) + l.emit(ItemText) } return lexOpenLink } - if l.next() == eof { + if l.next() == EOF { break } if l.pos > l.start { - l.emit(itemText) + l.emit(ItemText) } - l.emit(itemEOF) + l.emit(ItemEOF) return nil } return nil } -func (l *lexer) next() rune { +func (l *Lexer) next() rune { var r rune if l.pos >= len(l.input) { l.width = 0 - return eof + return EOF } r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width return r } -func (l *lexer) run() { +func (l *Lexer) run() { for state := lexText; state != nil; { state = state(l) } close(l.items) } -type stateFn func(*lexer) stateFn - -type itemType int +type stateFn func(*Lexer) stateFn -const ( - itemError itemType = iota - itemEOF - itemText - itemOpenLink - itemCloseLink - itemBlockRef - itemAlias -) +type ItemType int -type item struct { - typ itemType - val string +type Item struct { + Typ ItemType + Val string } -func (i item) String() string { - switch i.typ { +func (i Item) String() string { + switch i.Typ { - case itemEOF: + case ItemEOF: return "EOF" - case itemError: - return i.val + case ItemError: + return i.Val } - if len(i.val) > 10 { - return fmt.Sprintf("%.10q...", i.val) + + if len(i.Val) > 10 { + return fmt.Sprintf("%s:%.10q...", i.Typ, i.Val) } - return fmt.Sprintf("%q...", i.val) + return fmt.Sprintf("%s:%q...", i.Typ, i.Val) }