poke-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 3/4] std.pk: Implement strtok


From: Jose E. Marchesi
Subject: Re: [PATCH 3/4] std.pk: Implement strtok
Date: Sun, 29 Jan 2023 17:51:03 +0100
User-agent: Gnus/5.13 (Gnus v5.13)

Hi Arsen.

Thanks for the patch.

> diff --git a/doc/poke.texi b/doc/poke.texi
> index 54a369c8..ff498220 100644
> --- a/doc/poke.texi
> +++ b/doc/poke.texi
> @@ -15720,6 +15720,7 @@ work on strings:
>  * rtrim::            Remove trailing characters.
>  * strchr::              Locate a character in a string, from the beginning.
>  * strrchr::          Locate a character in a string, from the end.
> +* strtok::              String tokenization.
>  @end menu

Again the weird indentation in the menu.

>  @node ltrim
> @@ -15776,6 +15777,97 @@ It returns the index of the last occurrence of the 
> character @var{c}
>  in the string @var{s}.  If the character is not found in the string,
>  this function returns -1.
>  
> +@node strtok
> +@subsection @code{strtok}
> +@cindex @code{strtok}
> +@cindex string, tokenizing
> +
> +@code{strtok} is a helper for tokenizing strings.  The synopsis of
> +this API are:
> +
> +@example
> +type Tokenizer =
> +  struct
> +  @{
> +    uint<64> i;
> +    string str;
> +    computed uint<32> more;
> +
> +    method get_more = uint<32>: @{ @dots{} @}
> +
> +    method peek = char: @{ @dots{} @}
> +    method pop = char: @{ @dots{} @}
> +
> +    method pop_number = (int<32> @var{base} = 10) int<64>: @{ @dots{} @}
> +
> +    method popdelim = (string @var{delimiters}) string: @{ @dots{} @}
> +    method poprdelim = (string @var{delimiters}) string: @{ @dots{} @}
> +  @}
> +
> +fun strtok = (string @var{a}) Tokenizer: @{ @dots{} @}
> +@end example

What about calling the type String_Tokenizer instead of Tokenizer.  Less
generic.

> +@deftypefun Tokenizer strtok (string @var{a})
> +Creates a new tokenizer for the string @var{a}, initially on the zero
> +position.
> +@end deftypefun
> +
> +@cindex @code{Tokenizer}
> +The members of the @code{Tokenizer} class are:
> +
> +@deftypeivar Tokenizer uint<64> i
> +Offset to the next character to be tokenized, i.e. to the first
> +character that has not already been consumed.
> +@end deftypeivar
> +
> +@deftypeivar Tokenizer string str
> +The string being tokenized.  This string is never tokenized.
> +@end deftypeivar
> +
> +@deftypeivar Tokenizer uint<32> more
> +A read-only computed property whose value is truthy if there's more
> +characters and falsey otherwise.
> +@end deftypeivar
> +
> +@deftypemethod Tokenizer char poke ()
> +Returns the first unread character of the string, but does not advance
> +the @var{i} offset.
> +
> +Raises @code{E_out_of_bounds} if at the end of the string.
> +@end deftypemethod
> +
> +@deftypemethod Tokenizer char peek ()
> +Returns the first unread character of the string, and advances the
> +tokenizer.
> +
> +Raises @code{E_out_of_bounds} if at the end of the string.
> +@end deftypemethod
> +
> +@deftypemethod Tokenizer int<64> pop_number (int<32> @var{base} = 10)
> +Returns the number at the start of the string and advances the
> +tokenizer in the given @var{base}.  The bases that are supported are
> +the same as for @code{strtoi}.
> +@xref{strtoi} for a list of supported bases.
> +
> +Raises @code{E_out_of_bounds} if at the end of the string.
> +@end deftypemethod
> +
> +@deftypemethod Tokenizer string popdelim (string @var{delim})
> +Returns the substring up to the first character also present in the
> +string @var{delim}.  Advances the tokenizer to after the delimiter
> +character (i.e. it consumes the delimiter character).
> +
> +Raises @code{E_out_of_bounds} if at the end of the string.
> +@end deftypemethod
> +
> +@deftypemethod Tokenizer string poprdelim (string @var{delim})
> +Returns the substring up to the last character also present in the
> +string @var{delim}.  Advances the tokenizer to after the delimiter
> +character (i.e. it consumes the delimiter character).
> +
> +Raises @code{E_out_of_bounds} if at the end of the string.
> +@end deftypemethod
> +
>  @node Character Functions
>  @section Character Functions
>  The Poke standard library provides the following functions to deal
> @@ -16263,6 +16355,9 @@ It returns an offset that is the result of aligning 
> the given
>  @node Concept Index
>  @appendixsec Concept Index
>  
> +@c Merge the findex into the cpindex.
> +@syncodeindex fn cp
> +
>  @printindex cp
>  
>  @node PVM Instruction Index
> diff --git a/libpoke/std.pk b/libpoke/std.pk
> index a9ed3305..02776052 100644
> --- a/libpoke/std.pk
> +++ b/libpoke/std.pk
> @@ -546,3 +546,107 @@ fun isxdigit = (uint<8> c) int<32>:
>  {
>    return (c - '0' < 10UB) || ((c | 0x20UB) - 'a' < 6UB);
>  }
> +
> +/* A helper object to tokenize a string.  */
> +
> +type Tokenizer =
> +  struct
> +  {
> +    uint<64> i;
> +    string str;
> +    computed uint<32> more;
> +
> +    method get_more = uint<32>:
> +    {
> +      return i < str'length;
> +    }
> +
> +    method peek = uint<8>:
> +    {
> +      if (!get_more ())
> +        raise E_out_of_bounds;
> +
> +      return str[i];
> +    }
> +
> +    method pop = uint<8>:
> +    {
> +      if (!get_more ())
> +        raise E_out_of_bounds;
> +      return str[i++];
> +    }
> +
> +    method pop_number = (int<32> base = 10) int<64>:
> +    {
> +      if (!get_more ())
> +        raise E_out_of_bounds;
> +
> +      var res = strtoi (str, base, i);
> +      if (res.off == i)
> +        raise E_inval;
> +
> +      i = res.off;
> +      return res.val;
> +    }
> +
> +    method popdelim = (string delimiters) string:
> +    {
> +      /* TODO(arsen): Replace with some set-based thing at some point.  */
> +      if (delimiters'length == 0)
> +        raise E_inval;
> +
> +      if (!get_more ())
> +        raise E_out_of_bounds;
> +
> +      var j = i;
> +      for (; j < str'length; j++)
> +        {
> +          var c = str[j];
> +          for (d in delimiters)
> +            {
> +              if (c != d)
> +                continue;
> +
> +              var s = str[i:j];
> +              i = j + 1;
> +              return s;
> +            }
> +        }
> +      var s = str[i:];
> +      i = str'length;
> +      return s;
> +    }
> +
> +    method poprdelim = (string delimiters) string:
> +    {
> +      /* TODO(arsen): Replace with some set-based thing at some point.  */
> +      if (delimiters'length == 0)
> +        raise E_inval;
> +
> +      if (!get_more ())
> +        raise E_out_of_bounds;
> +
> +      var j = str'length - 1;
> +      for (; j >= i && j < str'length; j--)
> +        {
> +          var c = str[j];
> +          for (d in delimiters)
> +            {
> +              if (c != d)
> +                continue;
> +
> +              var s = str[i:j];
> +              i = j + 1;
> +              return s;
> +            }
> +        }
> +      var s = str[i:];
> +      i = str'length;
> +      return s;
> +    }
> +  };
> +
> +fun strtok = (string a) Tokenizer:
> +{
> +  return Tokenizer { str = a, i = 0 };
> +}
> diff --git a/testsuite/poke.std/std-test.pk b/testsuite/poke.std/std-test.pk
> index 924767f5..5b8de358 100644
> --- a/testsuite/poke.std/std-test.pk
> +++ b/testsuite/poke.std/std-test.pk
> @@ -382,6 +382,48 @@ var tests = [
>          assert (x.val == 1);
>        },
>    },
> +  PkTest {
> +    name = "strtok",
> +    func = lambda (string name) void:
> +      {
> +        var x = strtok ("abc123def-foo-bar-baz-xy");
> +        try
> +          {
> +            /* Test whether we emit E_inval for zero-width number parses.  */
> +            x.pop_number ();
> +            assert (0, "unreachable reached!");
> +          }
> +        catch if E_inval
> +          {
> +            assert (1, "expects exception");
> +          }
> +        assert (x.pop () == 'a');
> +        assert (x.more);
> +        assert (x.pop () == 'b');
> +        assert (x.more);
> +        assert (x.pop () == 'c');
> +        assert (x.more);
> +        assert (x.pop_number (8) == 0o123);
> +        assert (x.more);
> +        assert (x.popdelim ("-") == "def");
> +        assert (x.more);
> +        assert (x.peek () == 'f');
> +        assert (x.more);
> +        assert (x.poprdelim ("-") == "foo-bar-baz");
> +        assert (x.more);
> +        assert (x.popdelim ("-") == "xy");
> +        assert (!x.more);
> +        try
> +          {
> +            x.pop ();
> +            assert (0, "unreachable reached!");
> +          }
> +        catch if E_out_of_bounds
> +          {
> +            assert (1, "expects exception");
> +          }
> +      },
> +  },
>  ];
>  
>  exit (pktest_run (tests) ? 0 : 1);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]