From ef8775d22484820c75d9d492583a27e7c49629be Mon Sep 17 00:00:00 2001 From: Nick Dumas Date: Fri, 23 Jun 2023 23:57:13 -0400 Subject: [PATCH] first draft of the lexer --- lexer.go | 196 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) diff --git a/lexer.go b/lexer.go index 7a0130a..af9ae28 100644 --- a/lexer.go +++ b/lexer.go @@ -1 +1,197 @@ package wikilink + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +const ( + eof rune = -1 +) + +const ( + openLink = "[[" + closeLink = "]]" + alias = "|" + blockRef = "#^" +) + +func lex(name, input string) *lexer { + l := &lexer{ + name: name, + input: input, + state: lexText + items: make(chan item, 2), + } + go l.run() + return l +} + +func (l *lexer) nextItem() item { + for { + select { + case item := <-l.items: + return item + default: + l.state = l.state(l) + } + } +} + +func (l *lexer) state( + +func (l *lexer) ignore() { + l.start = l.pos +} + +func (l *lexer) backup() { + l.pos -= l.width +} + +type lexer struct { + name, input string + start, pos, width int + items chan item +} + +func (l *lexer) peek() rune { + r := l.next() + l.backup() + return r +} + +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptRun(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 { + } + l.backup() +} + +func (l *lexer) emit(t itemType) { + l.items <- item{t, l.input[l.start:l.pos]} + l.start = l.pos +} + +func (l *lexer) errorf(format string, args ...interface{}) stateFn { + l.items <- item{ + itemError, + fmt.Sprintf(format, args...), + } + return nil +} + +func lexFragment (l *lexer) stateFn +func lexAlias (l *lexer) stateFn + +func lexInsideLink(l *lexer) stateFn { + for { + if strings.HasPrefix(l.input[l.pos:], closeLink) { + return lexCloseLink + } + + r := l.next() + + switch { + case r == eof: + case r == '\n': + return l.errorf("unclosed link") + case r == "#": + next = l.peek() + return lexFragment + case r == "|": + return lexAlias + } + } +} + +func lexOpenLink(l *lexer) stateFn { + l.pos += len(openLink) + l.emit(itemOpenLink) + return lexInsideLink +} + +func lexCloseLink(l *lexer) stateFn { + l.pos += len(closeLink) + l.emit(itemCloseLink) + return lexText +} + +func lexText(l *lexer) stateFn { + for { + if strings.HasPrefix(l.input[l.pos:], openLink) { + if l.pos > l.start { + l.emit(itemText) + } + return lexOpenLink + } + + if l.next() == eof { + break + } + if l.pos > l.start { + l.emit(itemText) + } + l.emit(itemEOF) + return nil + } + return nil +} + +func (l *lexer) next() rune { + var r rune + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +func (l *lexer) run() { + for state := lexText; state != nil; { + state = state(l) + } + close(l.items) +} + +type stateFn func(*lexer) stateFn + +type itemType int + +const ( + itemError itemType = iota + itemEOF + itemText + itemOpenLink + itemCloseLink + itemBlockRef + itemAlias +) + +type item struct { + typ itemType + val string +} + +func (i item) String() string { + switch i.typ { + + case itemEOF: + return "EOF" + case itemError: + return i.val + } + if len(i.val) > 10 { + return fmt.Sprintf("%.10q...", i.val) + } + return fmt.Sprintf("%q...", i.val) +}