feat: implement tokenizer

This commit is contained in:
Alex 2024-09-19 11:36:39 +02:00
parent 7885da4e29
commit 1e6773ac28
Signed by: l-x
SSH key fingerprint: SHA256:MK3uQVPHEV0Oo2ry/dAqvVK3pAwegKAwSlyfgLd/yQM
2 changed files with 63 additions and 0 deletions

40
src/fortheck/token.gleam Normal file
View file

@ -0,0 +1,40 @@
import gleam/bool
import gleam/int
import gleam/iterator.{type Iterator}
import gleam/result
import gleam/string
pub type Token {
Word(String)
Number(Int)
}
pub fn from_string(token: String) -> Token {
case int.parse(token) {
Ok(n) -> Number(n)
_ -> Word(token |> string.uppercase)
}
}
fn is_whitespace(string: String) -> Bool {
case string {
"" | " " | "\n" | "\t" -> True
_ -> False
}
}
fn yield_token(acc: String, string: String) -> iterator.Step(Token, String) {
use <- bool.guard(when: acc == "" && string == "", return: iterator.Done)
let #(char, rest) = string.pop_grapheme(string) |> result.unwrap(#("", ""))
case is_whitespace(char), acc == "" {
True, True -> yield_token("", rest)
True, _ -> iterator.Next(from_string(acc), rest)
False, _ -> yield_token(acc <> char, rest)
}
}
pub fn tokenize(string: String) -> Iterator(Token) {
iterator.unfold(from: string, with: yield_token("", _))
}

23
test/token_test.gleam Normal file
View file

@ -0,0 +1,23 @@
import fortheck/token
import gleam/iterator
import gleeunit
import gleeunit/should
pub fn main() {
gleeunit.main()
}
pub fn tokenize_test() {
let string = "3 4\nMUL\t5 \n\n \n dIv"
string
|> token.tokenize
|> iterator.to_list
|> should.equal([
token.Number(3),
token.Number(4),
token.Word("MUL"),
token.Number(5),
token.Word("DIV"),
])
}