fix base 32 encoding

This commit is contained in:
Sloane Perrault 2023-06-30 06:03:07 -04:00
parent b34945f79d
commit 9a895737fd
No known key found for this signature in database
4 changed files with 87 additions and 231 deletions

View file

@ -104,7 +104,7 @@ defmodule TypeID do
end
defp validate_prefix!(prefix) do
unless prefix =~ ~r/[a-z]{0,63}/ do
unless prefix =~ ~r/^[a-z]{0,63}$/ do
raise ArgumentError, "invalid prefix: #{prefix}. prefix should match [a-z]{0,63}"
end

View file

@ -1,230 +0,0 @@
defmodule TypeID.Base32 do
@moduledoc false
import Bitwise
# Implements base 32 encoding using the a lowercase crockford alphabet
# https://www.crockford.com/base32.html
# Borrows heavily from the core `Base` module's implementation
crockford_alphabet = ~c"0123456789ABCDEFGHJKMNPQRSTVWXYZ"
to_lower_enc = &Enum.map(&1, fn c -> if c in ?A..?Z, do: c - ?A + ?a, else: c end)
to_lower_dec =
&Enum.map(&1, fn {encoding, value} = pair ->
if encoding in ?A..?Z do
{encoding - ?A + ?a, value}
else
pair
end
end)
lower = to_lower_enc.(crockford_alphabet)
encoded = for e1 <- lower, e2 <- lower, do: bsl(e1, 8) + e2
to_decode_list = fn alphabet ->
alphabet = Enum.sort(alphabet)
map = Map.new(alphabet)
{min, _} = List.first(alphabet)
{max, _} = List.last(alphabet)
{min, Enum.map(min..max, &map[&1])}
end
{min, decoded} =
lower
|> Enum.with_index()
|> to_lower_dec.()
|> to_decode_list.()
@spec encode(binary()) :: binary()
def encode(data) when is_binary(data) do
do_encode(data, "")
end
@spec decode(binary()) :: {:ok, binary()} | :error
def decode(string) when is_binary(string) do
{:ok, decode!(string)}
rescue
ArgumentError -> :error
end
@spec decode!(binary()) :: binary() | no_return()
def decode!(string) when is_binary(string) do
do_decode!(string)
end
@compile {:inline, [do_encode: 1]}
defp do_encode(byte) do
elem({unquote_splicing(encoded)}, byte)
end
defp do_encode(<<c1::10, c2::10, c3::10, c4::10, rest::binary>>, acc) do
do_encode(
rest,
<<
acc::binary,
do_encode(c1)::16,
do_encode(c2)::16,
do_encode(c3)::16,
do_encode(c4)::16
>>
)
end
defp do_encode(<<c1::10, c2::10, c3::10, c4::2>>, acc) do
<<
acc::binary,
do_encode(c1)::16,
do_encode(c2)::16,
do_encode(c3)::16,
c4 |> bsl(3) |> do_encode() |> band(0x00FF)::8
>>
end
defp do_encode(<<c1::10, c2::10, c3::4>>, acc) do
<<
acc::binary,
do_encode(c1)::16,
do_encode(c2)::16,
c3 |> bsl(1) |> do_encode() |> band(0x00FF)::8
>>
end
defp do_encode(<<c1::10, c2::6>>, acc) do
<<
acc::binary,
do_encode(c1)::16,
c2 |> bsl(4) |> do_encode()::16
>>
end
defp do_encode(<<c1::8>>, acc) do
<<acc::binary, c1 |> bsl(2) |> do_encode()::16>>
end
defp do_encode(<<>>, acc) do
acc
end
defp do_decode!(<<>>), do: <<>>
defp do_decode!(string) when is_binary(string) do
segs = div(byte_size(string) + 7, 8) - 1
<<main::size(segs)-binary-unit(64), rest::binary>> = string
main =
for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
<<
do_decode!(c1)::5,
do_decode!(c2)::5,
do_decode!(c3)::5,
do_decode!(c4)::5,
do_decode!(c5)::5,
do_decode!(c6)::5,
do_decode!(c7)::5,
do_decode!(c8)::5
>>
end
case rest do
<<c1::8, c2::8, ?=, ?=, ?=, ?=, ?=, ?=>> ->
<<main::bits, do_decode!(c1)::5, bsr(do_decode!(c2), 2)::3>>
<<c1::8, c2::8, c3::8, c4::8, ?=, ?=, ?=, ?=>> ->
<<
main::bits,
do_decode!(c1)::5,
do_decode!(c2)::5,
do_decode!(c3)::5,
bsr(do_decode!(c4), 4)::1
>>
<<c1::8, c2::8, c3::8, c4::8, c5::8, ?=, ?=, ?=>> ->
<<
main::bits,
do_decode!(c1)::5,
do_decode!(c2)::5,
do_decode!(c3)::5,
do_decode!(c4)::5,
bsr(do_decode!(c5), 1)::4
>>
<<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, ?=>> ->
<<
main::bits,
do_decode!(c1)::5,
do_decode!(c2)::5,
do_decode!(c3)::5,
do_decode!(c4)::5,
do_decode!(c5)::5,
do_decode!(c6)::5,
bsr(do_decode!(c7), 3)::2
>>
<<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8>> ->
<<
main::bits,
do_decode!(c1)::5,
do_decode!(c2)::5,
do_decode!(c3)::5,
do_decode!(c4)::5,
do_decode!(c5)::5,
do_decode!(c6)::5,
do_decode!(c7)::5,
do_decode!(c8)::5
>>
<<c1::8, c2::8>> ->
<<main::bits, do_decode!(c1)::5, bsr(do_decode!(c2), 2)::3>>
<<c1::8, c2::8, c3::8, c4::8>> ->
<<
main::bits,
do_decode!(c1)::5,
do_decode!(c2)::5,
do_decode!(c3)::5,
bsr(do_decode!(c4), 4)::1
>>
<<c1::8, c2::8, c3::8, c4::8, c5::8>> ->
<<
main::bits,
do_decode!(c1)::5,
do_decode!(c2)::5,
do_decode!(c3)::5,
do_decode!(c4)::5,
bsr(do_decode!(c5), 1)::4
>>
<<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8>> ->
<<
main::bits,
do_decode!(c1)::5,
do_decode!(c2)::5,
do_decode!(c3)::5,
do_decode!(c4)::5,
do_decode!(c5)::5,
do_decode!(c6)::5,
bsr(do_decode!(c7), 3)::2
>>
end
end
defp do_decode!(char) do
try do
elem({unquote_splicing(decoded)}, char - unquote(min))
rescue
_ -> bad_character!(char)
else
nil -> bad_character!(char)
char -> char
end
end
defp bad_character!(byte) do
raise ArgumentError,
"non-alphabet character found: #{inspect(<<byte>>, binaries: :as_strings)} (byte #{byte})"
end
end

58
lib/type_id/base_32.ex Normal file
View file

@ -0,0 +1,58 @@
defmodule TypeID.Base32 do
@moduledoc false
# Implements base 32 encoding using the a lowercase crockford alphabet
# https://www.crockford.com/base32.html
crockford_alphabet = ~c"0123456789abcdefghjkmnpqrstvwxyz"
@spec encode(binary()) :: binary()
def encode(
<<c1::3, c2::5, c3::5, c4::5, c5::5, c6::5, c7::5, c8::5, c9::5, c10::5, c11::5, c12::5,
c13::5, c14::5, c15::5, c16::5, c17::5, c18::5, c19::5, c20::5, c21::5, c22::5, c23::5,
c24::5, c25::5, c26::5>>
) do
<<do_encode(c1)::8, do_encode(c2)::8, do_encode(c3)::8, do_encode(c4)::8, do_encode(c5)::8,
do_encode(c6)::8, do_encode(c7)::8, do_encode(c8)::8, do_encode(c9)::8, do_encode(c10)::8,
do_encode(c11)::8, do_encode(c12)::8, do_encode(c13)::8, do_encode(c14)::8,
do_encode(c15)::8, do_encode(c16)::8, do_encode(c17)::8, do_encode(c18)::8,
do_encode(c19)::8, do_encode(c20)::8, do_encode(c21)::8, do_encode(c22)::8,
do_encode(c23)::8, do_encode(c24)::8, do_encode(c25)::8, do_encode(c26)::8>>
end
@spec decode(binary()) :: {:ok, binary()} | :error
def decode(string) when is_binary(string) do
{:ok, decode!(string)}
rescue
ArgumentError -> :error
end
@spec decode!(binary()) :: binary() | no_return()
def decode!(
<<c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19,
c20, c21, c22, c23, c24, c25, c26>>
) do
<<do_decode(c1)::3, do_decode(c2)::5, do_decode(c3)::5, do_decode(c4)::5, do_decode(c5)::5,
do_decode(c6)::5, do_decode(c7)::5, do_decode(c8)::5, do_decode(c9)::5, do_decode(c10)::5,
do_decode(c11)::5, do_decode(c12)::5, do_decode(c13)::5, do_decode(c14)::5,
do_decode(c15)::5, do_decode(c16)::5, do_decode(c17)::5, do_decode(c18)::5,
do_decode(c19)::5, do_decode(c20)::5, do_decode(c21)::5, do_decode(c22)::5,
do_decode(c23)::5, do_decode(c24)::5, do_decode(c25)::5, do_decode(c26)::5>>
end
@compile {:inline, [do_encode: 1]}
defp do_encode(byte) do
elem({unquote_splicing(crockford_alphabet)}, byte)
end
for {char, byte} <- Enum.with_index(crockford_alphabet) do
defp do_decode(unquote(char)), do: unquote(byte)
end
defp do_decode(char), do: bad_character!(char)
defp bad_character!(byte) do
raise ArgumentError,
"non-alphabet character found: #{inspect(<<byte>>, binaries: :as_strings)} (byte #{byte})"
end
end

View file

@ -26,12 +26,40 @@ defmodule TypeIDTest do
describe "serialization" do
test "to_string/1 and from_string!/1 are idempotent" do
tid1 = TypeID.from_string!("test_01h44had5rfswbvpc383ktj0aa")
tid2 =
tid1
|> TypeID.to_string()
|> TypeID.from_string!()
assert tid1 == tid2
end
test "from_string/1" do
assert {:ok, _} = TypeID.from_string("test_01h44xf16gf47v3s4khvc3c5ga")
assert :error == TypeID.from_string("-invalid_01h44xf16gf47v3s4khvc3c5ga")
end
test "from!/2 and from/2 validates the prefix" do
assert_raise ArgumentError, fn ->
TypeID.from!("-invalid-prefix-", "01h44had5rfswbvpc383ktj0aa")
end
assert :error == TypeID.from("-invalid-prefix-", "01h44had5rfswbvpc383ktj0aa")
end
test "from!/2 and from/2 validate the suffix" do
assert_raise ArgumentError, fn ->
TypeID.from!("test", "0ih44had5rfswbvpc383ktj0aa")
end
assert :error == TypeID.from("test", "0ih44had5rfswbvpc383ktj0aa")
end
end
test "verification" do
tid = TypeID.from_string!("test_01h44yssjcf5daefvfr0yb70s8")
assert "test" == TypeID.type(tid)
assert "018909ec-e64c-795a-a73f-6fc03cb38328" == TypeID.uuid(tid)
end
end