From 1e6773ac28cd3f4348dd4a690e1e1c66be742f8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20W=C3=BChr?= Date: Thu, 19 Sep 2024 11:36:39 +0200 Subject: [PATCH] feat: implement tokenizer --- src/fortheck/token.gleam | 40 ++++++++++++++++++++++++++++++++++++++++ test/token_test.gleam | 23 +++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 src/fortheck/token.gleam create mode 100644 test/token_test.gleam diff --git a/src/fortheck/token.gleam b/src/fortheck/token.gleam new file mode 100644 index 0000000..4db5920 --- /dev/null +++ b/src/fortheck/token.gleam @@ -0,0 +1,40 @@ +import gleam/bool +import gleam/int +import gleam/iterator.{type Iterator} +import gleam/result +import gleam/string + +pub type Token { + Word(String) + Number(Int) +} + +pub fn from_string(token: String) -> Token { + case int.parse(token) { + Ok(n) -> Number(n) + _ -> Word(token |> string.uppercase) + } +} + +fn is_whitespace(string: String) -> Bool { + case string { + "" | " " | "\n" | "\t" -> True + _ -> False + } +} + +fn yield_token(acc: String, string: String) -> iterator.Step(Token, String) { + use <- bool.guard(when: acc == "" && string == "", return: iterator.Done) + + let #(char, rest) = string.pop_grapheme(string) |> result.unwrap(#("", "")) + + case is_whitespace(char), acc == "" { + True, True -> yield_token("", rest) + True, _ -> iterator.Next(from_string(acc), rest) + False, _ -> yield_token(acc <> char, rest) + } +} + +pub fn tokenize(string: String) -> Iterator(Token) { + iterator.unfold(from: string, with: yield_token("", _)) +} diff --git a/test/token_test.gleam b/test/token_test.gleam new file mode 100644 index 0000000..8199b19 --- /dev/null +++ b/test/token_test.gleam @@ -0,0 +1,23 @@ +import fortheck/token +import gleam/iterator +import gleeunit +import gleeunit/should + +pub fn main() { + gleeunit.main() +} + +pub fn tokenize_test() { + let string = "3 4\nMUL\t5 \n\n \n dIv" + + string + |> token.tokenize + |> iterator.to_list + |> should.equal([ + token.Number(3), + token.Number(4), + token.Word("MUL"), + token.Number(5), + token.Word("DIV"), + ]) +}