From 9a895737fd66082f9a5eb11c43097321520590c9 Mon Sep 17 00:00:00 2001 From: Sloane Perrault Date: Fri, 30 Jun 2023 06:03:07 -0400 Subject: [PATCH] fix base 32 encoding --- lib/type_id.ex | 2 +- lib/type_id/base32.ex | 230 ----------------------------------------- lib/type_id/base_32.ex | 58 +++++++++++ test/type_id_test.exs | 28 +++++ 4 files changed, 87 insertions(+), 231 deletions(-) delete mode 100644 lib/type_id/base32.ex create mode 100644 lib/type_id/base_32.ex diff --git a/lib/type_id.ex b/lib/type_id.ex index b4d7ea6..5ec44d0 100644 --- a/lib/type_id.ex +++ b/lib/type_id.ex @@ -104,7 +104,7 @@ defmodule TypeID do end defp validate_prefix!(prefix) do - unless prefix =~ ~r/[a-z]{0,63}/ do + unless prefix =~ ~r/^[a-z]{0,63}$/ do raise ArgumentError, "invalid prefix: #{prefix}. prefix should match [a-z]{0,63}" end diff --git a/lib/type_id/base32.ex b/lib/type_id/base32.ex deleted file mode 100644 index 1bacc25..0000000 --- a/lib/type_id/base32.ex +++ /dev/null @@ -1,230 +0,0 @@ -defmodule TypeID.Base32 do - @moduledoc false - import Bitwise - - # Implements base 32 encoding using the a lowercase crockford alphabet - # https://www.crockford.com/base32.html - - # Borrows heavily from the core `Base` module's implementation - - crockford_alphabet = ~c"0123456789ABCDEFGHJKMNPQRSTVWXYZ" - - to_lower_enc = &Enum.map(&1, fn c -> if c in ?A..?Z, do: c - ?A + ?a, else: c end) - to_lower_dec = - &Enum.map(&1, fn {encoding, value} = pair -> - if encoding in ?A..?Z do - {encoding - ?A + ?a, value} - else - pair - end - end) - - lower = to_lower_enc.(crockford_alphabet) - - - encoded = for e1 <- lower, e2 <- lower, do: bsl(e1, 8) + e2 - - to_decode_list = fn alphabet -> - alphabet = Enum.sort(alphabet) - map = Map.new(alphabet) - {min, _} = List.first(alphabet) - {max, _} = List.last(alphabet) - {min, Enum.map(min..max, &map[&1])} - end - - {min, decoded} = - lower - |> Enum.with_index() - |> to_lower_dec.() - |> to_decode_list.() - - @spec encode(binary()) :: binary() - def encode(data) when is_binary(data) do - do_encode(data, "") - end - - @spec decode(binary()) :: {:ok, binary()} | :error - def decode(string) when is_binary(string) do - {:ok, decode!(string)} - rescue - ArgumentError -> :error - end - - @spec decode!(binary()) :: binary() | no_return() - def decode!(string) when is_binary(string) do - do_decode!(string) - end - - @compile {:inline, [do_encode: 1]} - defp do_encode(byte) do - elem({unquote_splicing(encoded)}, byte) - end - - defp do_encode(<>, acc) do - do_encode( - rest, - << - acc::binary, - do_encode(c1)::16, - do_encode(c2)::16, - do_encode(c3)::16, - do_encode(c4)::16 - >> - ) - end - - defp do_encode(<>, acc) do - << - acc::binary, - do_encode(c1)::16, - do_encode(c2)::16, - do_encode(c3)::16, - c4 |> bsl(3) |> do_encode() |> band(0x00FF)::8 - >> - end - - defp do_encode(<>, acc) do - << - acc::binary, - do_encode(c1)::16, - do_encode(c2)::16, - c3 |> bsl(1) |> do_encode() |> band(0x00FF)::8 - >> - end - - defp do_encode(<>, acc) do - << - acc::binary, - do_encode(c1)::16, - c2 |> bsl(4) |> do_encode()::16 - >> - end - - defp do_encode(<>, acc) do - < bsl(2) |> do_encode()::16>> - end - - defp do_encode(<<>>, acc) do - acc - end - - defp do_decode!(<<>>), do: <<>> - - defp do_decode!(string) when is_binary(string) do - segs = div(byte_size(string) + 7, 8) - 1 - <> = string - - main = - for <>, into: <<>> do - << - do_decode!(c1)::5, - do_decode!(c2)::5, - do_decode!(c3)::5, - do_decode!(c4)::5, - do_decode!(c5)::5, - do_decode!(c6)::5, - do_decode!(c7)::5, - do_decode!(c8)::5 - >> - end - - case rest do - <> -> - <> - - <> -> - << - main::bits, - do_decode!(c1)::5, - do_decode!(c2)::5, - do_decode!(c3)::5, - bsr(do_decode!(c4), 4)::1 - >> - - <> -> - << - main::bits, - do_decode!(c1)::5, - do_decode!(c2)::5, - do_decode!(c3)::5, - do_decode!(c4)::5, - bsr(do_decode!(c5), 1)::4 - >> - - <> -> - << - main::bits, - do_decode!(c1)::5, - do_decode!(c2)::5, - do_decode!(c3)::5, - do_decode!(c4)::5, - do_decode!(c5)::5, - do_decode!(c6)::5, - bsr(do_decode!(c7), 3)::2 - >> - - <> -> - << - main::bits, - do_decode!(c1)::5, - do_decode!(c2)::5, - do_decode!(c3)::5, - do_decode!(c4)::5, - do_decode!(c5)::5, - do_decode!(c6)::5, - do_decode!(c7)::5, - do_decode!(c8)::5 - >> - - <> -> - <> - - <> -> - << - main::bits, - do_decode!(c1)::5, - do_decode!(c2)::5, - do_decode!(c3)::5, - bsr(do_decode!(c4), 4)::1 - >> - - <> -> - << - main::bits, - do_decode!(c1)::5, - do_decode!(c2)::5, - do_decode!(c3)::5, - do_decode!(c4)::5, - bsr(do_decode!(c5), 1)::4 - >> - - <> -> - << - main::bits, - do_decode!(c1)::5, - do_decode!(c2)::5, - do_decode!(c3)::5, - do_decode!(c4)::5, - do_decode!(c5)::5, - do_decode!(c6)::5, - bsr(do_decode!(c7), 3)::2 - >> - end - end - - defp do_decode!(char) do - try do - elem({unquote_splicing(decoded)}, char - unquote(min)) - rescue - _ -> bad_character!(char) - else - nil -> bad_character!(char) - char -> char - end - end - - defp bad_character!(byte) do - raise ArgumentError, - "non-alphabet character found: #{inspect(<>, binaries: :as_strings)} (byte #{byte})" - end -end diff --git a/lib/type_id/base_32.ex b/lib/type_id/base_32.ex new file mode 100644 index 0000000..34804f5 --- /dev/null +++ b/lib/type_id/base_32.ex @@ -0,0 +1,58 @@ +defmodule TypeID.Base32 do + @moduledoc false + + # Implements base 32 encoding using the a lowercase crockford alphabet + # https://www.crockford.com/base32.html + + crockford_alphabet = ~c"0123456789abcdefghjkmnpqrstvwxyz" + + @spec encode(binary()) :: binary() + def encode( + <> + ) do + <> + end + + @spec decode(binary()) :: {:ok, binary()} | :error + def decode(string) when is_binary(string) do + {:ok, decode!(string)} + rescue + ArgumentError -> :error + end + + @spec decode!(binary()) :: binary() | no_return() + def decode!( + <> + ) do + <> + end + + @compile {:inline, [do_encode: 1]} + defp do_encode(byte) do + elem({unquote_splicing(crockford_alphabet)}, byte) + end + + for {char, byte} <- Enum.with_index(crockford_alphabet) do + defp do_decode(unquote(char)), do: unquote(byte) + end + + defp do_decode(char), do: bad_character!(char) + + defp bad_character!(byte) do + raise ArgumentError, + "non-alphabet character found: #{inspect(<>, binaries: :as_strings)} (byte #{byte})" + end +end diff --git a/test/type_id_test.exs b/test/type_id_test.exs index 3b76643..daa2676 100644 --- a/test/type_id_test.exs +++ b/test/type_id_test.exs @@ -26,12 +26,40 @@ defmodule TypeIDTest do describe "serialization" do test "to_string/1 and from_string!/1 are idempotent" do tid1 = TypeID.from_string!("test_01h44had5rfswbvpc383ktj0aa") + tid2 = tid1 |> TypeID.to_string() |> TypeID.from_string!() + assert tid1 == tid2 end + + test "from_string/1" do + assert {:ok, _} = TypeID.from_string("test_01h44xf16gf47v3s4khvc3c5ga") + assert :error == TypeID.from_string("-invalid_01h44xf16gf47v3s4khvc3c5ga") + end + + test "from!/2 and from/2 validates the prefix" do + assert_raise ArgumentError, fn -> + TypeID.from!("-invalid-prefix-", "01h44had5rfswbvpc383ktj0aa") + end + + assert :error == TypeID.from("-invalid-prefix-", "01h44had5rfswbvpc383ktj0aa") + end + + test "from!/2 and from/2 validate the suffix" do + assert_raise ArgumentError, fn -> + TypeID.from!("test", "0ih44had5rfswbvpc383ktj0aa") + end + + assert :error == TypeID.from("test", "0ih44had5rfswbvpc383ktj0aa") + end end + test "verification" do + tid = TypeID.from_string!("test_01h44yssjcf5daefvfr0yb70s8") + assert "test" == TypeID.type(tid) + assert "018909ec-e64c-795a-a73f-6fc03cb38328" == TypeID.uuid(tid) + end end