bug-gettext
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [bug-gettext] [PATCH] its: Add new preserveSpaceRule "paragraph"


From: Bruno Haible
Subject: Re: [bug-gettext] [PATCH] its: Add new preserveSpaceRule "paragraph"
Date: Tue, 26 Mar 2019 23:17:45 +0100
User-agent: KMail/5.1.3 (Linux/4.4.0-141-generic; KDE/5.18.0; x86_64; ; )

Hi Daiki,

> I have rewritten the
> loop with the parallel pointers instead of memmove as attached.  As a
> bonus, the behavior becomes closer to intltool: now it can normalize
> paragraph boundaries with "\n\n".

Sorry, I did not review the rewrite in time. (With variables named 'p'
and 'pp' and state variables like 'last_ws', the code was frightening.)

But the test xgettext-its-1 fails. The reason is that in these two lines

                pp += strspn (pp, " \t\n");
                if (*pp == '\n')

the condition (*pp == '\n') is always false: after you go past all
whitespace characters, the next character is never a newline.

I fixed this by replacing this condition with a memchr() invocation.
While at it, I also removed the state variable 'last_ws' and the
initial trim() call, and renamed the variables to identifiers that
are more descriptive.

Bruno


index 7f6a1c0..f67c05e 100755
diff --git a/autogen.sh b/autogen.sh
*** a/autogen.sh
--- b/autogen.sh
***************
*** 226,231 ****
--- 226,232 ----
        locale
        localename
        lock
+       memchr
        memmove
        memset
        minmax
index c542db4..8b72e1e 100644
diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c
*** a/gettext-tools/src/its.c
--- b/gettext-tools/src/its.c
***************
*** 403,463 ****
        /* Normalize whitespaces within the text, keeping paragraph
           boundaries.  */
        {
!         char *result, *p, *out;
! 
!         result = trim (text);
!         for (p = out = result; *p != '\0';)
            {
!             char *pp, *pend = NULL, *next = NULL;
!             bool last_ws = false;
  
!             /* Find a paragraph boundary.  */
!             for (pp = p; *pp != '\0';)
!               {
!                 char *nl = strchrnul (pp, '\n');
!                 if (*nl == '\0')
!                   {
!                     pend = nl;
!                     next = pend;
!                     break;
!                   }
!                 pp = nl + 1;
!                 pp += strspn (pp, " \t\n");
!                 if (*pp == '\n')
!                   {
!                     pend = nl;
!                     next = pp + 1;
!                     break;
!                   }
!               }
  
!             /* Normalize whitespaces in the paragraph.  */
!             assert (pend != NULL);
!             for (pp = p; pp < pend; pp++)
!               if (!(*pp == ' ' || *pp == '\t' || *pp == '\n'))
!                 break;
!             for (; pp < pend; pp++)
!               {
!                 if (*pp == ' ' || *pp == '\t' || *pp == '\n')
                    {
!                     if (!last_ws)
                        {
!                         *out++ = ' ';
!                         last_ws = true;
                        }
                    }
!                 else
!                   {
!                     *out++ = *pp;
!                     last_ws = false;
!                   }
!               }
!             if (*pend != '\0')
                {
                  memcpy (out, "\n\n", 2);
                  out += 2;
                }
!             p = next;
            }
          *out = '\0';
          return result;
--- 403,480 ----
        /* Normalize whitespaces within the text, keeping paragraph
           boundaries.  */
        {
!         char *result = xstrdup (text);
!         /* Go through the string, shrinking it, reading from *p++
!            and writing to *out++.  (result <= out <= p.)  */
!         const char *start_of_paragraph;
!         char *out;
! 
!         out = result;
!         for (start_of_paragraph = result; *start_of_paragraph != '\0';)
            {
!             const char *end_of_paragraph;
!             const char *next_paragraph;
  
!             /* Find the next paragraph boundary.  */
!             {
!               const char *p;
  
!               for (p = start_of_paragraph;;)
!                 {
!                   const char *nl = strchrnul (p, '\n');
!                   if (*nl == '\0')
!                     {
!                       end_of_paragraph = nl;
!                       next_paragraph = end_of_paragraph;
!                       break;
!                     }
!                   p = nl + 1;
                    {
!                     const char *past_whitespace = p + strspn (p, " \t\n");
!                     if (memchr (p, '\n', past_whitespace - p) != NULL)
                        {
!                         end_of_paragraph = nl;
!                         next_paragraph = past_whitespace;
!                         break;
                        }
+                     p = past_whitespace;
                    }
!                 }
!             }
! 
!             /* Normalize whitespaces in the paragraph.  */
!             {
!               const char *p;
! 
!               /* Remove whitespace at the beginning of the paragraph.  */
!               for (p = start_of_paragraph; p < end_of_paragraph; p++)
!                 if (!(*p == ' ' || *p == '\t' || *p == '\n'))
!                   break;
! 
!               for (; p < end_of_paragraph;)
!                 {
!                   if (*p == ' ' || *p == '\t' || *p == '\n')
!                     {
!                       /* Normalize whitespace inside the paragraph, and
!                          remove whitespace at the end of the paragraph.  */
!                       do
!                         p++;
!                       while (p < end_of_paragraph
!                              && (*p == ' ' || *p == '\t' || *p == '\n'));
!                       if (p < end_of_paragraph)
!                         *out++ = ' ';
!                     }
!                   else
!                     *out++ = *p++;
!                 }
!             }
! 
!             if (*next_paragraph != '\0')
                {
                  memcpy (out, "\n\n", 2);
                  out += 2;
                }
!             start_of_paragraph = next_paragraph;
            }
          *out = '\0';
          return result;
index 975a547..22e9163 100755
diff --git a/gettext-tools/tests/xgettext-its-1 
b/gettext-tools/tests/xgettext-its-1
*** a/gettext-tools/tests/xgettext-its-1
--- b/gettext-tools/tests/xgettext-its-1
***************
*** 176,185 ****
      This is the first paragraph with
  a newline.
    
!     This is  the  second paragprah with spaces.
  
  
!     This is the last paragraph.</p>
    </message>
    <message>
      <p xml:space="paragraph">This is the only one paragraph</p>
--- 176,185 ----
      This is the first paragraph with
  a newline.
    
!     This is  the  second paragraph with spaces.
  
  
!     This is the last paragraph.     </p>
    </message>
    <message>
      <p xml:space="paragraph">This is the only one paragraph</p>
***************
*** 277,283 ****
  msgid ""
  "This is the first paragraph with a newline.\n"
  "\n"
! "This is the second paragprah with spaces.\n"
  "\n"
  "This is the last paragraph."
  msgstr ""
--- 277,283 ----
  msgid ""
  "This is the first paragraph with a newline.\n"
  "\n"
! "This is the second paragraph with spaces.\n"
  "\n"
  "This is the last paragraph."
  msgstr ""




reply via email to

[Prev in Thread] Current Thread [Next in Thread]