feat: implement tokenizer
This commit is contained in:
parent
7885da4e29
commit
1e6773ac28
2 changed files with 63 additions and 0 deletions
40
src/fortheck/token.gleam
Normal file
40
src/fortheck/token.gleam
Normal file
|
@ -0,0 +1,40 @@
|
|||
import gleam/bool
|
||||
import gleam/int
|
||||
import gleam/iterator.{type Iterator}
|
||||
import gleam/result
|
||||
import gleam/string
|
||||
|
||||
pub type Token {
|
||||
Word(String)
|
||||
Number(Int)
|
||||
}
|
||||
|
||||
pub fn from_string(token: String) -> Token {
|
||||
case int.parse(token) {
|
||||
Ok(n) -> Number(n)
|
||||
_ -> Word(token |> string.uppercase)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_whitespace(string: String) -> Bool {
|
||||
case string {
|
||||
"" | " " | "\n" | "\t" -> True
|
||||
_ -> False
|
||||
}
|
||||
}
|
||||
|
||||
fn yield_token(acc: String, string: String) -> iterator.Step(Token, String) {
|
||||
use <- bool.guard(when: acc == "" && string == "", return: iterator.Done)
|
||||
|
||||
let #(char, rest) = string.pop_grapheme(string) |> result.unwrap(#("", ""))
|
||||
|
||||
case is_whitespace(char), acc == "" {
|
||||
True, True -> yield_token("", rest)
|
||||
True, _ -> iterator.Next(from_string(acc), rest)
|
||||
False, _ -> yield_token(acc <> char, rest)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize(string: String) -> Iterator(Token) {
|
||||
iterator.unfold(from: string, with: yield_token("", _))
|
||||
}
|
23
test/token_test.gleam
Normal file
23
test/token_test.gleam
Normal file
|
@ -0,0 +1,23 @@
|
|||
import fortheck/token
|
||||
import gleam/iterator
|
||||
import gleeunit
|
||||
import gleeunit/should
|
||||
|
||||
pub fn main() {
|
||||
gleeunit.main()
|
||||
}
|
||||
|
||||
pub fn tokenize_test() {
|
||||
let string = "3 4\nMUL\t5 \n\n \n dIv"
|
||||
|
||||
string
|
||||
|> token.tokenize
|
||||
|> iterator.to_list
|
||||
|> should.equal([
|
||||
token.Number(3),
|
||||
token.Number(4),
|
||||
token.Word("MUL"),
|
||||
token.Number(5),
|
||||
token.Word("DIV"),
|
||||
])
|
||||
}
|
Loading…
Reference in a new issue