testable lexer, in theory

dev
Nick Dumas 1 year ago
parent 2b0d0f8bdb
commit 24b7320e0f

@ -1,200 +1,245 @@
//go:generate stringer -type=ItemType
package wikilink
import (
"fmt"
"log"
"strings"
"unicode/utf8"
)
const (
eof rune = -1
ItemError ItemType = iota
ItemEOF
ItemText
ItemOpenLink
ItemCloseLink
ItemFragment
ItemAlias
)
const (
openLink = "[["
closeLink = "]]"
alias = "|"
blockRef = "#^"
EOF rune = 0
)
func lex(name, input string) *lexer {
l := &lexer{
const (
OpenLink = "[["
CloseLink = "]]"
Alias = "|"
BlockRef = "#^"
)
func Lex(name, input string) *Lexer {
l := &Lexer{
name: name,
input: input,
state: lexText,
items: make(chan item, 2),
items: make(chan Item, 2),
}
go l.run()
return l
}
func (l *lexer) nextItem() item {
func (l *Lexer) NextItem() Item {
for {
select {
case item := <-l.items:
return item
default:
if l.state == nil {
return Item{
Typ: ItemError,
Val: "state is nil, should not be",
}
}
l.state = l.state(l)
}
}
}
func (l *lexer) ignore() {
func (l *Lexer) ignore() {
l.start = l.pos
}
func (l *lexer) backup() {
func (l *Lexer) backup() {
l.pos -= l.width
}
type lexer struct {
type Lexer struct {
name, input string
start, pos, width int
state stateFn
items chan item
items chan Item
}
func (l *lexer) peek() rune {
func (l *Lexer) peek() rune {
r := l.next()
l.backup()
return r
}
func (l *lexer) accept(valid string) bool {
if strings.IndexRune(valid, l.next()) >= 0 {
func (l *Lexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
func (l *lexer) acceptRun(valid string) {
for strings.IndexRune(valid, l.next()) >= 0 {
func (l *Lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
func (l *lexer) emit(t itemType) {
l.items <- item{t, l.input[l.start:l.pos]}
func (l *Lexer) emit(t ItemType) {
i := Item{t, l.input[l.start:l.pos]}
log.Printf("emitting Item: %#+v\n", i)
l.items <- i
l.start = l.pos
}
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- item{
itemError,
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
log.Printf("entering errorf: %q\n", format)
l.items <- Item{
ItemError,
fmt.Sprintf(format, args...),
}
return nil
}
func lexFragment(l *lexer) stateFn {
return l.errorf("lexFragment not implemented")
func lexFragment(l *Lexer) stateFn {
log.Println("entering lexFragment")
for {
if strings.HasPrefix(l.input[l.pos:], CloseLink) {
return lexCloseLink
}
if l.peek() == '^' {
l.next()
l.emit(ItemFragment)
l.acceptRun("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -.,")
return lexInsideLink
}
return lexInsideLink
}
}
func lexAlias(l *lexer) stateFn {
return l.errorf("lexAlias not implemented")
func lexAlias(l *Lexer) stateFn {
// l.accept
log.Println("entering lexAlias")
return lexInsideLink
}
func lexInsideLink(l *lexer) stateFn {
func lexInsideLink(l *Lexer) stateFn {
log.Println("entering lexInsideLink")
for {
if strings.HasPrefix(l.input[l.pos:], closeLink) {
if strings.HasPrefix(l.input[l.pos:], CloseLink) {
return lexCloseLink
}
r := l.next()
switch {
case r == eof:
case r == EOF:
case r == '\n':
return l.errorf("unclosed link")
case r == '#':
l.emit(ItemText)
return lexFragment
case r == '|':
l.emit(ItemText)
return lexAlias
case l.peek() == ']':
l.emit(ItemText)
return lexCloseLink
}
}
}
func lexOpenLink(l *lexer) stateFn {
l.pos += len(openLink)
l.emit(itemOpenLink)
func lexOpenLink(l *Lexer) stateFn {
log.Println("entering lexOpenLink")
l.pos += len(OpenLink)
l.emit(ItemOpenLink)
return lexInsideLink
}
func lexCloseLink(l *lexer) stateFn {
l.pos += len(closeLink)
l.emit(itemCloseLink)
func lexCloseLink(l *Lexer) stateFn {
log.Println("entering lexCloseLink")
l.pos += len(CloseLink)
l.emit(ItemCloseLink)
return lexText
}
func lexText(l *lexer) stateFn {
func lexText(l *Lexer) stateFn {
log.Println("entering lexText")
for {
if strings.HasPrefix(l.input[l.pos:], openLink) {
if strings.HasPrefix(l.input[l.pos:], OpenLink) {
if l.pos > l.start {
l.emit(itemText)
l.emit(ItemText)
}
return lexOpenLink
}
if l.next() == eof {
if l.next() == EOF {
break
}
if l.pos > l.start {
l.emit(itemText)
l.emit(ItemText)
}
l.emit(itemEOF)
l.emit(ItemEOF)
return nil
}
return nil
}
func (l *lexer) next() rune {
func (l *Lexer) next() rune {
var r rune
if l.pos >= len(l.input) {
l.width = 0
return eof
return EOF
}
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return r
}
func (l *lexer) run() {
func (l *Lexer) run() {
for state := lexText; state != nil; {
state = state(l)
}
close(l.items)
}
type stateFn func(*lexer) stateFn
type stateFn func(*Lexer) stateFn
type itemType int
type ItemType int
const (
itemError itemType = iota
itemEOF
itemText
itemOpenLink
itemCloseLink
itemBlockRef
itemAlias
)
type item struct {
typ itemType
val string
type Item struct {
Typ ItemType
Val string
}
func (i item) String() string {
switch i.typ {
func (i Item) String() string {
switch i.Typ {
case itemEOF:
case ItemEOF:
return "EOF"
case itemError:
return i.val
case ItemError:
return i.Val
}
if len(i.val) > 10 {
return fmt.Sprintf("%.10q...", i.val)
if len(i.Val) > 10 {
return fmt.Sprintf("%s:%.10q...", i.Typ, i.Val)
}
return fmt.Sprintf("%q...", i.val)
return fmt.Sprintf("%s:%q...", i.Typ, i.Val)
}

Loading…
Cancel
Save