From 8098d75ae9aec8084cf7642cd5d8d4e32cfff7dc Mon Sep 17 00:00:00 2001 From: Nick Dumas Date: Tue, 7 Feb 2023 02:02:11 +0000 Subject: [PATCH] saving the lexer --- cmd/main.go | 5 ++--- go.mod | 2 +- lexer/lexer.go | 38 +++++++++++++++++++++++++++++------- lexer/lexer_test.go | 47 ++++++++++++++++++++++++++++++++------------- test-cases | 10 ---------- 5 files changed, 68 insertions(+), 34 deletions(-) delete mode 100644 test-cases diff --git a/cmd/main.go b/cmd/main.go index 77cf9e0..04bf083 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -3,12 +3,11 @@ package main import ( "log" - "github.com/therealfakemoot/obsidian-parser" + "github.com/therealfakemoot/wikilinks-parser/lexer" ) func main() { - l, tokens := lexer.Lex("[[foo/bar/butts.png]]") - go l.Run() + _, tokens := lexer.Lex(`[[foo/bar/butts.png]]`) for t := range tokens { log.Printf("%#v\n", t) } diff --git a/go.mod b/go.mod index a1da3a2..016c362 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ -module github.com/therealfakemoot/obsidian-parser +module github.com/therealfakemoot/wikilinks-parser go 1.19 diff --git a/lexer/lexer.go b/lexer/lexer.go index 0a44c7a..2e6d588 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -6,10 +6,21 @@ import ( "strings" ) -type Token struct { - Raw string -} +type Token int + +const ( + LinkBeginToken = iota + LinkEndToken + LinkTextToken + LinkAliasBeginToken + LinkAliasTextToken + LinkSectionBeginToken + LinkSectionTextToken + LinkBlockBeginToken + LinkBlockTextToken +) +/* var ( LinkBeginToken = Token{Raw: "[["} LinkEndToken = Token{Raw: "]]"} @@ -21,10 +32,12 @@ var ( LinkBlockBeginToken = Token{Raw: "^"} LinkBlockTextToken = Token{} ) +*/ type Lexer struct { input []string pos int + link bool Tokens chan Token } @@ -79,13 +92,24 @@ func (l *Lexer) Run() { } if s == "[" && l.Peek() == "[" { + l.link = true l.Next() l.Emit(LinkBeginToken) - } - if s == "]" && l.Peek() == "]" { - l.Next() - l.Emit(LinkBeginToken) + switch s { + case "|": + l.Emit(LinkAliasBeginToken) + case "#": + l.Emit(LinkSectionBeginToken) + case "^": + l.Emit(LinkBlockBeginToken) + } + + if s == "]" && l.Peek() == "]" { + l.link = false + l.Next() + l.Emit(LinkEndToken) + } } } diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index eb6ebfb..5ab4817 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -4,26 +4,47 @@ import ( "testing" ) -func Test_Lexer(t *testing.T) { +func Test_LexerSimple(t *testing.T) { tt := []string{ - "[[Regular Link]]", - /* - "![[Transcluded Link]]", - "[[Regular Link|Alias]]", - "[[Regular Link#Subsection of page]]", - "[[Regular Link^link to block]]", - "[[Regular Link#Subsection of page|Alias]]", - "[[Regular Link^link to block|Alias]]", - "[[Regular Link\|Alias]]", - "[[Regular Link^link to block\|Alias]]", - "[[Regular Link#Subsection of page\|Alias]]", - */ + "[[", + "]]", + "[[foo]]", + "[[foo]]", + "[[foo|bar]]", } for _, tc := range tt { + t.Run(tc, func(t *testing.T) { + t.Logf("checking %q", tc) + _, tokens := Lex(tc) + for tok := range tokens { + t.Logf("found token: %#v", tok) + } + }) + } +} + +/* + +func Test_LexerFull(t *testing.T) { + tt := []string{ + `[[Regular Link]]`, + `![[Transcluded Link]]`, + `[[Regular Link|Alias]]`, + `[[Regular Link#Subsection of page]]`, + `[[Regular Link^link to block]]`, + `[[Regular Link#Subsection of page|Alias]]`, + `[[Regular Link^link to block|Alias]]`, + `[[Regular Link\|Alias]]`, + `[[Regular Link^link to block\|Alias]]`, + `[[Regular Link#Subsection of page\|Alias]]`, + } + + for _, tc := range tt { t.Run(tc, func(t *testing.T) { t.Fail() }) } } +*/ diff --git a/test-cases b/test-cases deleted file mode 100644 index cb4df09..0000000 --- a/test-cases +++ /dev/null @@ -1,10 +0,0 @@ -[[Regular Link]] -![[Transcluded Link]] -[[Regular Link|Alias]] -[[Regular Link#Subsection of page]] -[[Regular Link^link to block]] -[[Regular Link#Subsection of page|Alias]] -[[Regular Link^link to block|Alias]] -[[Regular Link\|Alias]] -[[Regular Link^link to block\|Alias]] -[[Regular Link#Subsection of page\|Alias]]