testable lexer, in theory

dev
Nick Dumas 1 year ago
parent 2b0d0f8bdb
commit 24b7320e0f

@ -1,200 +1,245 @@
//go:generate stringer -type=ItemType
package wikilink package wikilink
import ( import (
"fmt" "fmt"
"log"
"strings" "strings"
"unicode/utf8" "unicode/utf8"
) )
const ( const (
eof rune = -1 ItemError ItemType = iota
ItemEOF
ItemText
ItemOpenLink
ItemCloseLink
ItemFragment
ItemAlias
) )
const ( const (
openLink = "[[" EOF rune = 0
closeLink = "]]"
alias = "|"
blockRef = "#^"
) )
func lex(name, input string) *lexer { const (
l := &lexer{ OpenLink = "[["
CloseLink = "]]"
Alias = "|"
BlockRef = "#^"
)
func Lex(name, input string) *Lexer {
l := &Lexer{
name: name, name: name,
input: input, input: input,
state: lexText, state: lexText,
items: make(chan item, 2), items: make(chan Item, 2),
} }
go l.run() go l.run()
return l return l
} }
func (l *lexer) nextItem() item { func (l *Lexer) NextItem() Item {
for { for {
select { select {
case item := <-l.items: case item := <-l.items:
return item return item
default: default:
if l.state == nil {
return Item{
Typ: ItemError,
Val: "state is nil, should not be",
}
}
l.state = l.state(l) l.state = l.state(l)
} }
} }
} }
func (l *lexer) ignore() { func (l *Lexer) ignore() {
l.start = l.pos l.start = l.pos
} }
func (l *lexer) backup() { func (l *Lexer) backup() {
l.pos -= l.width l.pos -= l.width
} }
type lexer struct { type Lexer struct {
name, input string name, input string
start, pos, width int start, pos, width int
state stateFn state stateFn
items chan item items chan Item
} }
func (l *lexer) peek() rune { func (l *Lexer) peek() rune {
r := l.next() r := l.next()
l.backup() l.backup()
return r return r
} }
func (l *lexer) accept(valid string) bool { func (l *Lexer) accept(valid string) bool {
if strings.IndexRune(valid, l.next()) >= 0 { if strings.ContainsRune(valid, l.next()) {
return true return true
} }
l.backup() l.backup()
return false return false
} }
func (l *lexer) acceptRun(valid string) { func (l *Lexer) acceptRun(valid string) {
for strings.IndexRune(valid, l.next()) >= 0 { for strings.ContainsRune(valid, l.next()) {
} }
l.backup() l.backup()
} }
func (l *lexer) emit(t itemType) { func (l *Lexer) emit(t ItemType) {
l.items <- item{t, l.input[l.start:l.pos]} i := Item{t, l.input[l.start:l.pos]}
log.Printf("emitting Item: %#+v\n", i)
l.items <- i
l.start = l.pos l.start = l.pos
} }
func (l *lexer) errorf(format string, args ...interface{}) stateFn { func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- item{ log.Printf("entering errorf: %q\n", format)
itemError, l.items <- Item{
ItemError,
fmt.Sprintf(format, args...), fmt.Sprintf(format, args...),
} }
return nil return nil
} }
func lexFragment(l *lexer) stateFn { func lexFragment(l *Lexer) stateFn {
return l.errorf("lexFragment not implemented") log.Println("entering lexFragment")
for {
if strings.HasPrefix(l.input[l.pos:], CloseLink) {
return lexCloseLink
}
if l.peek() == '^' {
l.next()
l.emit(ItemFragment)
l.acceptRun("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -.,")
return lexInsideLink
}
return lexInsideLink
}
} }
func lexAlias(l *lexer) stateFn { func lexAlias(l *Lexer) stateFn {
return l.errorf("lexAlias not implemented") // l.accept
log.Println("entering lexAlias")
return lexInsideLink
} }
func lexInsideLink(l *lexer) stateFn { func lexInsideLink(l *Lexer) stateFn {
log.Println("entering lexInsideLink")
for { for {
if strings.HasPrefix(l.input[l.pos:], closeLink) { if strings.HasPrefix(l.input[l.pos:], CloseLink) {
return lexCloseLink return lexCloseLink
} }
r := l.next() r := l.next()
switch { switch {
case r == eof: case r == EOF:
case r == '\n': case r == '\n':
return l.errorf("unclosed link") return l.errorf("unclosed link")
case r == '#': case r == '#':
l.emit(ItemText)
return lexFragment return lexFragment
case r == '|': case r == '|':
l.emit(ItemText)
return lexAlias return lexAlias
case l.peek() == ']':
l.emit(ItemText)
return lexCloseLink
} }
} }
} }
func lexOpenLink(l *lexer) stateFn { func lexOpenLink(l *Lexer) stateFn {
l.pos += len(openLink) log.Println("entering lexOpenLink")
l.emit(itemOpenLink) l.pos += len(OpenLink)
l.emit(ItemOpenLink)
return lexInsideLink return lexInsideLink
} }
func lexCloseLink(l *lexer) stateFn { func lexCloseLink(l *Lexer) stateFn {
l.pos += len(closeLink) log.Println("entering lexCloseLink")
l.emit(itemCloseLink) l.pos += len(CloseLink)
l.emit(ItemCloseLink)
return lexText return lexText
} }
func lexText(l *lexer) stateFn { func lexText(l *Lexer) stateFn {
log.Println("entering lexText")
for { for {
if strings.HasPrefix(l.input[l.pos:], openLink) { if strings.HasPrefix(l.input[l.pos:], OpenLink) {
if l.pos > l.start { if l.pos > l.start {
l.emit(itemText) l.emit(ItemText)
} }
return lexOpenLink return lexOpenLink
} }
if l.next() == eof { if l.next() == EOF {
break break
} }
if l.pos > l.start { if l.pos > l.start {
l.emit(itemText) l.emit(ItemText)
} }
l.emit(itemEOF) l.emit(ItemEOF)
return nil return nil
} }
return nil return nil
} }
func (l *lexer) next() rune { func (l *Lexer) next() rune {
var r rune var r rune
if l.pos >= len(l.input) { if l.pos >= len(l.input) {
l.width = 0 l.width = 0
return eof return EOF
} }
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width l.pos += l.width
return r return r
} }
func (l *lexer) run() { func (l *Lexer) run() {
for state := lexText; state != nil; { for state := lexText; state != nil; {
state = state(l) state = state(l)
} }
close(l.items) close(l.items)
} }
type stateFn func(*lexer) stateFn type stateFn func(*Lexer) stateFn
type itemType int
const ( type ItemType int
itemError itemType = iota
itemEOF
itemText
itemOpenLink
itemCloseLink
itemBlockRef
itemAlias
)
type item struct { type Item struct {
typ itemType Typ ItemType
val string Val string
} }
func (i item) String() string { func (i Item) String() string {
switch i.typ { switch i.Typ {
case itemEOF: case ItemEOF:
return "EOF" return "EOF"
case itemError: case ItemError:
return i.val return i.Val
} }
if len(i.val) > 10 {
return fmt.Sprintf("%.10q...", i.val) if len(i.Val) > 10 {
return fmt.Sprintf("%s:%.10q...", i.Typ, i.Val)
} }
return fmt.Sprintf("%q...", i.val) return fmt.Sprintf("%s:%q...", i.Typ, i.Val)
} }

Loading…
Cancel
Save