From 949b5c240e28bb41b554193c09c7f4b6465ef78c Mon Sep 17 00:00:00 2001 From: Nick Dumas Date: Sun, 25 Jun 2023 10:05:27 -0400 Subject: [PATCH] I believe I now have a working wikilink lexer --- itemtype_string.go | 12 ++-- lexer.go | 7 ++- states.go | 146 ++++++++++++++++++++++++--------------------- 3 files changed, 91 insertions(+), 74 deletions(-) diff --git a/itemtype_string.go b/itemtype_string.go index 4209011..26df3bb 100644 --- a/itemtype_string.go +++ b/itemtype_string.go @@ -10,16 +10,18 @@ func _() { var x [1]struct{} _ = x[ItemError-0] _ = x[ItemEOF-1] - _ = x[ItemText-2] + _ = x[ItemIdent-2] _ = x[ItemOpenLink-3] _ = x[ItemCloseLink-4] - _ = x[ItemFragment-5] - _ = x[ItemAlias-6] + _ = x[ItemHeading-5] + _ = x[ItemBlockRef-6] + _ = x[ItemAlias-7] + _ = x[ItemText-8] } -const _ItemType_name = "ItemErrorItemEOFItemTextItemOpenLinkItemCloseLinkItemFragmentItemAlias" +const _ItemType_name = "ItemErrorItemEOFItemIdentItemOpenLinkItemCloseLinkItemHeadingItemBlockRefItemAliasItemText" -var _ItemType_index = [...]uint8{0, 9, 16, 24, 36, 49, 61, 70} +var _ItemType_index = [...]uint8{0, 9, 16, 25, 37, 50, 61, 73, 82, 90} func (i ItemType) String() string { if i < 0 || i >= ItemType(len(_ItemType_index)-1) { diff --git a/lexer.go b/lexer.go index 012259a..b12e53d 100644 --- a/lexer.go +++ b/lexer.go @@ -12,11 +12,13 @@ import ( const ( ItemError ItemType = iota ItemEOF - ItemText + ItemIdent ItemOpenLink ItemCloseLink - ItemFragment + ItemHeading + ItemBlockRef ItemAlias + ItemText ) const ( @@ -27,6 +29,7 @@ const ( OpenLink = "[[" CloseLink = "]]" Alias = "|" + Heading = "#" BlockRef = "#^" ) diff --git a/states.go b/states.go index c8938b5..dec93ce 100644 --- a/states.go +++ b/states.go @@ -1,55 +1,103 @@ package wikilink import ( - "log" "strings" - "unicode" ) -func lexFragment(l *Lexer) stateFn { - log.Println("entering lexFragment") - for { - if strings.HasPrefix(l.input[l.pos:], CloseLink) { - return lexCloseLink - } +func isOpenLink(s string) bool { + return strings.HasPrefix(s, OpenLink) +} - if l.peek() == '^' { - l.next() - l.emit(ItemFragment) - l.acceptRun("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -.,") +func isCloseLink(s string) bool { + return strings.HasPrefix(s, CloseLink) +} - return lexInsideLink - } +func isAlias(s string) bool { + return strings.HasPrefix(s, Alias) +} - return lexInsideLink - } +func isHeading(s string) bool { + return strings.HasPrefix(s, Heading) } -func lexAlias(l *Lexer) stateFn { - // l.accept - log.Println("entering lexAlias") +func isBlockRef(s string) bool { + return strings.HasPrefix(s, BlockRef) +} + +func lexIdent(l *Lexer) stateFn { for { - if strings.HasPrefix(l.input[l.pos:], CloseLink) { + r := l.next() + s := l.input[l.pos:] + if r == '\\' { // i think this will handle escape characters? + break + } + switch { + case isBlockRef(s): + l.emit(ItemIdent) + return lexBlockRef + case isAlias(s): + l.emit(ItemIdent) + return lexAlias + case isCloseLink(s): + l.emit(ItemIdent) return lexCloseLink + case isHeading(s): + l.emit(ItemIdent) + return lexHeading } - r := l.next() - } + return nil +} + +func lexHeading(l *Lexer) stateFn { + l.pos += len(Heading) + l.emit(ItemHeading) + + return lexIdent +} + +func lexBlockRef(l *Lexer) stateFn { + l.pos += len(BlockRef) + l.emit(ItemBlockRef) + + return lexIdent +} + +func lexAlias(l *Lexer) stateFn { + l.pos += len(Alias) l.emit(ItemAlias) - return lexInsideLink + return lexIdent } -func lexIdent(l *Lexer) stateFn { +func lexText(l *Lexer) stateFn { for { - if strings.HasPrefix(l.input[l.pos:], CloseLink) { - return lexCloseLink + if isOpenLink(l.input[l.pos:]) { + return lexOpenLink + } + r := l.next() + switch { + case r == EOF || r == '\n': + return l.errorf("wikilink terminated incorrectly") } - // r := l.next() } - return lexInsideLink } +func lexOpenLink(l *Lexer) stateFn { + l.pos += len(OpenLink) + l.emit(ItemOpenLink) + + return lexIdent +} + +func lexCloseLink(l *Lexer) stateFn { + l.pos += len(CloseLink) + l.emit(ItemCloseLink) + + return lexText +} + +/* func lexInsideLink(l *Lexer) stateFn { log.Println("entering lexInsideLink") @@ -67,8 +115,9 @@ func lexInsideLink(l *Lexer) stateFn { return l.errorf("unclosed link") case r == '#': l.emit(ItemText) + if l.peek() = '^' { return lexBlockRef } - return lexFragment + return lexHeading case r == '|': l.emit(ItemText) @@ -77,42 +126,5 @@ func lexInsideLink(l *Lexer) stateFn { } } -func lexOpenLink(l *Lexer) stateFn { - log.Println("entering lexOpenLink") - l.pos += len(OpenLink) - l.emit(ItemOpenLink) - - return lexInsideLink -} -func lexCloseLink(l *Lexer) stateFn { - log.Println("entering lexCloseLink") - l.pos += len(CloseLink) - l.emit(ItemCloseLink) - - return lexText -} - -func lexText(l *Lexer) stateFn { - log.Println("entering lexText") - - for { - if strings.HasPrefix(l.input[l.pos:], OpenLink) { - if l.pos > l.start { - l.emit(ItemText) - } - - return lexOpenLink - } - - if l.next() == EOF { - break - } - if l.pos > l.start { - l.emit(ItemText) - } - l.emit(ItemEOF) - return nil - } - return nil -} +*/