bug-gettext
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] python-brace-format: support conversion specifier and unnamed ar


From: Terence D. Honles
Subject: [PATCH] python-brace-format: support conversion specifier and unnamed arguments
Date: Wed, 19 Apr 2023 00:37:49 +0200

This change updates the `python-brace-format` to detect a format string
using unnamed arguments (supported since Python 2.7 & 3.1), and also
allows specifying a conversion specifier which was previously overlooked
in the parser.

x-ref: https://docs.python.org/3/library/string.html#format-string-syntax
---
 gettext-tools/src/format-python-brace.c   | 255 +++++++++++++++++-----
 gettext-tools/src/format.h                |   4 +
 gettext-tools/src/xg-message.c            |   4 +-
 gettext-tools/tests/format-python-brace-1 |  34 ++-
 gettext-tools/tests/format-python-brace-2 |  15 ++
 5 files changed, 249 insertions(+), 63 deletions(-)

diff --git a/gettext-tools/src/format-python-brace.c 
b/gettext-tools/src/format-python-brace.c
index cd5307721..4d0175522 100644
--- a/gettext-tools/src/format-python-brace.c
+++ b/gettext-tools/src/format-python-brace.c
@@ -39,7 +39,7 @@
      https://docs.python.org/3/library/string.html#formatstrings
    A format string directive here consists of
      - an opening brace '{',
-     - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
+     - an optional identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
      - an optional sequence of
          - getattr ('.' identifier) or
          - getitem ('[' identifier ']')
@@ -61,6 +61,10 @@
              - 'e', 'E', 'f', 'F', 'g', 'G', 'n', '%' for floating-point 
values,
      - a closing brace '}'.
    Brace characters '{' and '}' can be escaped by doubling them: '{{' and '}}'.
+
+   When specifying a directive without an identifier it will be auto named
+   sequentially, and mixing named and unnamed arguments is not supported in
+   Python.
 */
 
 struct named_arg
@@ -72,10 +76,13 @@ struct spec
 {
   unsigned int directives;
   unsigned int named_arg_count;
+  unsigned int unnamed_arg_count;
   unsigned int allocated;
   struct named_arg *named;
 };
 
+#define INVALID_MIXES_NAMED_UNNAMED() \
+  xstrdup (_("The string refers to arguments both through argument names and 
through unnamed argument specifications."))
 
 /* Forward declaration of local functions.  */
 static void free_named_args (struct spec *spec);
@@ -143,6 +150,7 @@ parse_directive (struct spec *spec,
   const char *const format_start = format;
   const char *name_start;
   char c;
+  bool is_named = true;
 
   c = *++format;
   if (c == '{')
@@ -156,15 +164,20 @@ parse_directive (struct spec *spec,
   if (!parse_named_field (spec, &format, translated, fdi, invalid_reason)
       && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason))
     {
-      *invalid_reason =
-        xasprintf (_("In the directive number %u, '%c' cannot start a field 
name."),
-                   spec->directives, *format);
-      FDI_SET (format, FMTDIR_ERROR);
-      return false;
+      if (!is_toplevel)
+        {
+          *invalid_reason =
+            xasprintf (_("In the directive number %u, '%c' cannot start a 
field name."),
+                       spec->directives, *format);
+          FDI_SET (format, FMTDIR_ERROR);
+          return false;
+        }
+      else
+        is_named = false;
     }
 
   /* Parse '.' (getattr) or '[..]' (getitem) operators followed by a
-     name.  If must not recurse, but can be specifed in a chain, such
+     name.  It must not recurse, but can be specifed in a chain, such
      as "foo.bar.baz[0]".  */
   for (;;)
     {
@@ -213,7 +226,31 @@ parse_directive (struct spec *spec,
     }
 
   /* Here c == *format.  */
-  if (c == ':')
+  if (c == '!')
+    {
+      if (!is_toplevel)
+        {
+          *invalid_reason =
+            xasprintf (_("In the directive number %u, no more nesting is 
allowed in a format specifier."),
+                       spec->directives);
+          FDI_SET (format, FMTDIR_ERROR);
+          return false;
+        }
+
+      c = *++format;
+      if (c != 'r' && c != 's' && c != 'a')
+        {
+          *invalid_reason =
+            xasprintf (_("In the directive number %u, there is an invalid 
conversion for a format specifier."),
+                       spec->directives);
+          FDI_SET (format, FMTDIR_ERROR);
+          return false;
+        }
+
+      format++;
+    }
+
+  if (*format == ':')
     {
       if (!is_toplevel)
         {
@@ -318,26 +355,54 @@ parse_directive (struct spec *spec,
 
   if (is_toplevel)
     {
-      char *name;
-      size_t n = format - name_start;
+      if (is_named)
+        {
+          /* Named and unnamed specifications are exclusive.  */
+          if (spec->unnamed_arg_count > 0)
+            {
+              *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
+              FDI_SET (name_start - 1, FMTDIR_ERROR);
+              return false;
+            }
 
-      FDI_SET (name_start - 1, FMTDIR_START);
+          char *name;
+          size_t n = format - name_start;
 
-      name = XNMALLOC (n + 1, char);
-      memcpy (name, name_start, n);
-      name[n] = '\0';
+          FDI_SET (name_start - 1, FMTDIR_START);
 
-      spec->directives++;
+          name = XNMALLOC (n + 1, char);
+          memcpy (name, name_start, n);
+          name[n] = '\0';
 
-      if (spec->allocated == spec->named_arg_count)
-        {
-          spec->allocated = 2 * spec->allocated + 1;
-          spec->named = (struct named_arg *) xrealloc (spec->named, 
spec->allocated * sizeof (struct named_arg));
+          spec->directives++;
+
+          if (spec->allocated == spec->named_arg_count)
+            {
+              spec->allocated = 2 * spec->allocated + 1;
+              spec->named = (struct named_arg *) xrealloc (spec->named, 
spec->allocated * sizeof (struct named_arg));
+            }
+          spec->named[spec->named_arg_count].name = name;
+          spec->named_arg_count++;
+
+          FDI_SET (format, FMTDIR_END);
         }
-      spec->named[spec->named_arg_count].name = name;
-      spec->named_arg_count++;
+      else
+        {
+          /* Named and unnamed specifications are exclusive.  */
+          if (spec->named_arg_count > 0)
+            {
+              *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
+              FDI_SET (name_start - 1, FMTDIR_ERROR);
+              return false;
+            }
 
-      FDI_SET (format, FMTDIR_END);
+          FDI_SET (name_start - 1, FMTDIR_START);
+
+          spec->directives++;
+          spec->unnamed_arg_count++;
+
+          FDI_SET (format, FMTDIR_END);
+        }
     }
 
   *formatp = ++format;
@@ -383,6 +448,7 @@ format_parse (const char *format, bool translated, char 
*fdi,
 
   spec.directives = 0;
   spec.named_arg_count = 0;
+  spec.unnamed_arg_count = 0;
   spec.allocated = 0;
   spec.named = NULL;
 
@@ -456,49 +522,78 @@ format_check (void *msgid_descr, void *msgstr_descr, bool 
equality,
   struct spec *spec2 = (struct spec *) msgstr_descr;
   bool err = false;
 
-  if (spec1->named_arg_count + spec2->named_arg_count > 0)
+  if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
     {
-      unsigned int i, j;
-      unsigned int n1 = spec1->named_arg_count;
-      unsigned int n2 = spec2->named_arg_count;
-
-      /* Check the argument names in spec1 are contained in those of spec2.
-         Both arrays are sorted.  We search for the differences.  */
-      for (i = 0, j = 0; i < n1 || j < n2; )
+      if (error_logger)
+        error_logger (_("format specifications in '%s' expect a mapping, those 
in '%s' expect a tuple"),
+                      pretty_msgid, pretty_msgstr);
+      err = true;
+    }
+  else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
+    {
+      if (error_logger)
+        error_logger (_("format specifications in '%s' expect a tuple, those 
in '%s' expect a mapping"),
+                      pretty_msgid, pretty_msgstr);
+      err = true;
+    }
+  else
+    {
+      if (spec1->named_arg_count + spec2->named_arg_count > 0)
         {
-          int cmp = (i >= n1 ? 1 :
-                     j >= n2 ? -1 :
-                     strcmp (spec1->named[i].name, spec2->named[j].name));
+          unsigned int i, j;
+          unsigned int n1 = spec1->named_arg_count;
+          unsigned int n2 = spec2->named_arg_count;
 
-          if (cmp > 0)
+          /* Check the argument names in spec1 are contained in those of spec2.
+             Both arrays are sorted.  We search for the differences.  */
+          for (i = 0, j = 0; i < n1 || j < n2; )
             {
-              if (equality)
+              int cmp = (i >= n1 ? 1 :
+                         j >= n2 ? -1 :
+                         strcmp (spec1->named[i].name, spec2->named[j].name));
+
+              if (cmp > 0)
                 {
-                  if (error_logger)
-                    error_logger (_("a format specification for argument '%s' 
doesn't exist in '%s'"),
-                                  spec2->named[i].name, pretty_msgid);
-                  err = true;
-                  break;
+                  if (equality)
+                    {
+                      if (error_logger)
+                        error_logger (_("a format specification for argument 
'%s' doesn't exist in '%s'"),
+                                      spec2->named[i].name, pretty_msgid);
+                      err = true;
+                      break;
+                    }
+                  else
+                    j++;
                 }
-              else
-                j++;
-            }
-          else if (cmp < 0)
-            {
-              if (equality)
+              else if (cmp < 0)
                 {
-                  if (error_logger)
-                    error_logger (_("a format specification for argument '%s' 
doesn't exist in '%s'"),
-                                  spec1->named[i].name, pretty_msgstr);
-                  err = true;
-                  break;
+                  if (equality)
+                    {
+                      if (error_logger)
+                        error_logger (_("a format specification for argument 
'%s' doesn't exist in '%s'"),
+                                      spec1->named[i].name, pretty_msgstr);
+                      err = true;
+                      break;
+                    }
+                  else
+                    i++;
                 }
               else
-                i++;
+                j++, i++;
             }
-          else
-            j++, i++;
         }
+
+        if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
+          {
+            /* Check the argument types are the same.  */
+            if (spec1->unnamed_arg_count != spec2->unnamed_arg_count)
+              {
+                if (error_logger)
+                  error_logger (_("number of format specifications in '%s' and 
'%s' does not match"),
+                                pretty_msgid, pretty_msgstr);
+                err = true;
+              }
+          }
     }
 
   return err;
@@ -515,6 +610,29 @@ struct formatstring_parser formatstring_python_brace =
 };
 
 
+unsigned int
+get_python_brace_format_unnamed_arg_count (const char *string)
+{
+  /* Parse the format string.  */
+  char *invalid_reason = NULL;
+  struct spec *descr =
+    (struct spec *) format_parse (string, false, NULL, &invalid_reason);
+
+  if (descr != NULL)
+    {
+      unsigned int result = descr->unnamed_arg_count;
+
+      format_free (descr);
+      return result;
+    }
+  else
+    {
+      free (invalid_reason);
+      return 0;
+    }
+}
+
+
 #ifdef TEST
 
 /* Test program: Print the argument list specification returned by
@@ -534,14 +652,31 @@ format_print (void *descr)
       return;
     }
 
-  printf ("{");
-  for (i = 0; i < spec->named_arg_count; i++)
+  if (spec->named_arg_count > 0)
+    {
+      if (spec->unnamed_arg_count > 0)
+        abort ();
+
+      printf ("{");
+      for (i = 0; i < spec->named_arg_count; i++)
+        {
+          if (i > 0)
+            printf (", ");
+          printf ("'%s'", spec->named[i].name);
+        }
+      printf ("}");
+    }
+  else
     {
-      if (i > 0)
-        printf (", ");
-      printf ("'%s'", spec->named[i].name);
+      printf ("(");
+      for (i = 0; i < spec->unnamed_arg_count; i++)
+        {
+          if (i > 0)
+            printf (" ");
+          printf ("*");
+        }
+      printf (")");
     }
-  printf ("}");
 }
 
 int
diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h
index 4cd318654..cad2579af 100644
--- a/gettext-tools/src/format.h
+++ b/gettext-tools/src/format.h
@@ -148,6 +148,10 @@ extern void
    string.  */
 extern unsigned int get_python_format_unnamed_arg_count (const char *string);
 
+/* Returns the number of unnamed arguments consumed by a Python brace format
+   string.  */
+extern unsigned int get_python_brace_format_unnamed_arg_count (const char 
*string);
+
 /* Check whether both formats strings contain compatible format
    specifications for format type i (0 <= i < NFORMATS).
    Return the number of errors that were seen.  */
diff --git a/gettext-tools/src/xg-message.c b/gettext-tools/src/xg-message.c
index 7b053798c..c888bb1a5 100644
--- a/gettext-tools/src/xg-message.c
+++ b/gettext-tools/src/xg-message.c
@@ -214,8 +214,10 @@ static void
 warn_format_string (enum is_format is_format[NFORMATS], const char *string,
                     lex_pos_ty *pos, const char *pretty_msgstr)
 {
-  if (possible_format_p (is_format[format_python])
+  if ((possible_format_p (is_format[format_python])
       && get_python_format_unnamed_arg_count (string) > 1)
+      || (possible_format_p(is_format[format_python_brace])
+      && get_python_brace_format_unnamed_arg_count (string) > 1))
     {
       char buffer[21];
 
diff --git a/gettext-tools/tests/format-python-brace-1 
b/gettext-tools/tests/format-python-brace-1
index 0028f7fdb..6b4b17380 100755
--- a/gettext-tools/tests/format-python-brace-1
+++ b/gettext-tools/tests/format-python-brace-1
@@ -12,18 +12,32 @@ cat <<\EOF > f-pyb-1.data
 "abc{0}"
 # Valid: a named argument
 "abc{value}"
-# Invalid: an empty name
+# Valid: an unnamed argument
 "abc{}"
 # Invalid: unterminated name
 "abc{value"
 # Valid: three arguments, two with equal names
 "abc{addr},{char},{addr}"
+# Valid: three unnamed arguments
+"abc{},{},{}"
+# Invalid: mixing of named and unnamed arguments
+"abc{addr},{}"
 # Valid: getattr operator
 "abc{value.name}"
+# Valid: getattr operator on unnamed argument
+"abc{.name}"
 # Invalid: getattr operator with numeric field name
 "abc{value.0}"
+# Invalid: getattr operator with numeric field name on unnamed argument
+"abc{.0}"
 # Valid: getitem operator
 "abc{value[name]}"
+# Valid: getitem operator on unnamed argument
+"abc{[name]}"
+# Valid: numeric getitem operator
+"abc{value[0]}"
+# Valid: numeric getitem operator on unnamed argument
+"abc{[0]}"
 # Invalid: unterminated getitem operator
 "abc{value[name}"
 # Invalid: unterminated getitem operator
@@ -32,14 +46,28 @@ cat <<\EOF > f-pyb-1.data
 "abc{value[!]}"
 # Valid: use of both getattr and getitem operators
 "abc{value.v[name]}"
+# Valid: use of both getattr and getitem operators on unnamed argument
+"abc{.v[name]}"
 # Valid: use of both getitem and getattr operators
 "abc{value[name].v}"
+# Valid: use of both getitem and getattr operators on unnamed argument
+"abc{[name].v}"
+# Valid: conversion specifiers
+"abc{value!s},{value!r},{value!a}"
+# Invalid: conversion specifier
+"abc{value!x}"
 # Valid: format specifier
 "abc{value:0}"
+# Valid: conversion and format specifier
+"abc{value!s:0}"
 # Valid: format specifier after getattr operator
 "abc{value.name:0}"
+# Valid: conversion specifier after getattr operator
+"abc{value.name!r}"
 # Valid: format specifier after getitem operator
 "abc{value[name]:0}"
+# Valid: conversion specifier after getitem operator
+"abc{value[name]!r}"
 # Valid: standard format specifier
 "abc{value:<<-#012.34e}"
 # Invalid: empty precision
@@ -48,10 +76,12 @@ cat <<\EOF > f-pyb-1.data
 "abc{value:<c>}"
 # Valid: nested format specifier
 "abc{value:{foo}}"
-# Invalid: too many nesting of format specifier
+# Invalid: too much nesting of format specifier
 "abc{value:{foo:0}}"
 # Invalid: nested format specifier, in the middle of other format specifiers
 "abc{value:0{foo}0}"
+# Invalid: nested conversion specifier
+"abc{value:{foo!r}}"
 EOF
 
 : ${XGETTEXT=xgettext}
diff --git a/gettext-tools/tests/format-python-brace-2 
b/gettext-tools/tests/format-python-brace-2
index 376ad266e..4fb5ce0f1 100755
--- a/gettext-tools/tests/format-python-brace-2
+++ b/gettext-tools/tests/format-python-brace-2
@@ -7,6 +7,15 @@ cat <<\EOF > f-pyb-2.data
 # Valid: same named arguments
 msgid  "abc{date}{time}"
 msgstr "xyz{date}{time}"
+# Valid: same unnamed arguments
+msgid  "abc{}{}"
+msgstr "xyz{}{}"
+# Invalid: too few arguments
+msgid  "abc{}def{}"
+msgstr "xyz{}"
+# Invalid: too many arguments
+msgid  "abc{}def"
+msgstr "xyz{}vw{}"
 # Valid: permutation
 msgid  "abc{x3}{x1}{x2}def"
 msgstr "xyz{x2}{x1}{x3}"
@@ -19,6 +28,12 @@ msgstr "xyz{x2}"
 # Invalid: added argument
 msgid  "abc{foo}def"
 msgstr "xyz{foo}uvw{zoo}"
+# Invalid: unnamed vs. named arguments
+msgid  "abc{}def"
+msgstr "xyz{value}"
+# Invalid: named vs. unnamed arguments
+msgid  "abc{value}def"
+msgstr "xyz{}"
 # Valid: multiple reuse of same argument
 msgid  "{foo} {bar} {baz}"
 msgstr "{baz} {bar} {foo} {bar}"
-- 
2.40.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]