//go:generate stringer -type=ItemType package wikilink import ( "fmt" "log" "strings" // "unicode" "unicode/utf8" ) const ( ItemError ItemType = iota ItemEOF ItemText ItemOpenLink ItemCloseLink ItemFragment ItemAlias ) const ( EOF rune = 0 ) const ( OpenLink = "[[" CloseLink = "]]" Alias = "|" BlockRef = "#^" ) func Lex(name, input string) *Lexer { l := &Lexer{ name: name, input: input, state: lexText, items: make(chan Item, 2), } go l.run() return l } func (l *Lexer) NextItem() Item { for { select { case item := <-l.items: return item default: if l.state == nil { return Item{ Typ: ItemError, Val: "state is nil, should not be", } } l.state = l.state(l) } } } func (l *Lexer) ignore() { l.start = l.pos } func (l *Lexer) backup() { l.pos -= l.width } type Lexer struct { name, input string start, pos, width int state stateFn items chan Item } func (l *Lexer) peek() rune { r := l.next() l.backup() return r } func (l *Lexer) accept(valid string) bool { if strings.ContainsRune(valid, l.next()) { return true } l.backup() return false } func (l *Lexer) acceptRun(valid string) { for strings.ContainsRune(valid, l.next()) { } l.backup() } func (l *Lexer) emit(t ItemType) { i := Item{t, l.input[l.start:l.pos]} log.Printf("emitting Item: %#+v\n", i) l.items <- i l.start = l.pos } func (l *Lexer) errorf(format string, args ...interface{}) stateFn { log.Printf("entering errorf: %q\n", format) l.items <- Item{ ItemError, fmt.Sprintf(format, args...), } return nil } func lexFragment(l *Lexer) stateFn { log.Println("entering lexFragment") for { if strings.HasPrefix(l.input[l.pos:], CloseLink) { return lexCloseLink } if l.peek() == '^' { l.next() l.emit(ItemFragment) l.acceptRun("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -.,") return lexInsideLink } return lexInsideLink } } func lexAlias(l *Lexer) stateFn { // l.accept log.Println("entering lexAlias") if strings.HasPrefix(l.input[l.pos:], CloseLink) { return lexCloseLink } l.emit(ItemAlias) return lexInsideLink } func lexInsideLink(l *Lexer) stateFn { log.Println("entering lexInsideLink") for { if strings.HasPrefix(l.input[l.pos:], CloseLink) { return lexCloseLink } r := l.next() switch { case r == EOF: case r == '\n': return l.errorf("unclosed link") case r == '#': l.emit(ItemText) return lexFragment case r == '|': l.emit(ItemText) return lexAlias case l.peek() == ']': l.emit(ItemText) return lexCloseLink } } } func lexOpenLink(l *Lexer) stateFn { log.Println("entering lexOpenLink") l.pos += len(OpenLink) l.emit(ItemOpenLink) return lexInsideLink } func lexCloseLink(l *Lexer) stateFn { log.Println("entering lexCloseLink") l.pos += len(CloseLink) l.emit(ItemCloseLink) return lexText } func lexText(l *Lexer) stateFn { log.Println("entering lexText") for { if strings.HasPrefix(l.input[l.pos:], OpenLink) { if l.pos > l.start { l.emit(ItemText) } return lexOpenLink } if l.next() == EOF { break } if l.pos > l.start { l.emit(ItemText) } l.emit(ItemEOF) return nil } return nil } func (l *Lexer) next() rune { var r rune if l.pos >= len(l.input) { l.width = 0 return EOF } r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width return r } func (l *Lexer) run() { for state := lexText; state != nil; { state = state(l) } close(l.items) } type stateFn func(*Lexer) stateFn type ItemType int type Item struct { Typ ItemType Val string } func (i Item) String() string { switch i.Typ { case ItemEOF: return "EOF" case ItemError: return i.Val } if len(i.Val) > 10 { return fmt.Sprintf("%s:%.10q...", i.Typ, i.Val) } return fmt.Sprintf("%s:%q...", i.Typ, i.Val) }