[6449] move XSParagraph C sources up a level

texinfo-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[6449] move XSParagraph C sources up a level

From:	Gavin D. Smith
Subject:	[6449] move XSParagraph C sources up a level
Date:	Wed, 22 Jul 2015 13:09:59 +0000
Revision: 6449
          http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=6449
Author:   gavin
Date:     2015-07-22 13:09:59 +0000 (Wed, 22 Jul 2015)
Log Message:
-----------
move XSParagraph C sources up a level

Modified Paths:
--------------
    trunk/ChangeLog
    trunk/tp/Texinfo/Convert/Plaintext.pm
    trunk/tp/Texinfo/Convert/XSParagraph/Makefile.am
    trunk/tp/Texinfo/Convert/XSParagraph/XSParagraph.xs

Added Paths:
-----------
    trunk/tp/Texinfo/Convert/XSParagraph/text.c
    trunk/tp/Texinfo/Convert/XSParagraph/text.h
    trunk/tp/Texinfo/Convert/XSParagraph/xspara.c
    trunk/tp/Texinfo/Convert/XSParagraph/xspara.h

Removed Paths:
-------------
    trunk/tp/Texinfo/Convert/XSParagraph/mylib/

Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog     2015-07-22 13:01:05 UTC (rev 6448)
+++ trunk/ChangeLog     2015-07-22 13:09:59 UTC (rev 6449)
@@ -1,5 +1,10 @@
 2015-07-22  Gavin Smith  <address@hidden>
+       
+       * tp/Texinfo/Convert/XSParagraph/mylib: Move all files up a 
+       level.
 
+2015-07-22  Gavin Smith  <address@hidden>
+
        * tp/Texinfo/Convert/XSParagraph/mylib/xspara.c (xspara_add_text):
        Remove space doubling at end of sentence within a @w.
 

Modified: trunk/tp/Texinfo/Convert/Plaintext.pm
===================================================================
--- trunk/tp/Texinfo/Convert/Plaintext.pm       2015-07-22 13:01:05 UTC (rev 
6448)
+++ trunk/tp/Texinfo/Convert/Plaintext.pm       2015-07-22 13:09:59 UTC (rev 
6449)
@@ -663,8 +663,8 @@
   if ($type eq 'line') {
     $container = Texinfo::Convert::Line->new($container_conf);
   } elsif ($type eq 'paragraph') {
-    $container = Texinfo::Convert::Paragraph->new($container_conf);
-    #$container = XSParagraph->new($container_conf);
+    #$container = Texinfo::Convert::Paragraph->new($container_conf);
+    $container = XSParagraph->new($container_conf);
   } elsif ($type eq 'unfilled') {
     $container = Texinfo::Convert::UnFilled->new($container_conf);
   } else {

Modified: trunk/tp/Texinfo/Convert/XSParagraph/Makefile.am
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/Makefile.am    2015-07-22 13:01:05 UTC 
(rev 6448)
+++ trunk/tp/Texinfo/Convert/XSParagraph/Makefile.am    2015-07-22 13:09:59 UTC 
(rev 6449)
@@ -71,8 +71,8 @@
 # -------------------------------------------------------
 
 modules_LTLIBRARIES = XSParagraph.la
-XSParagraph_la_SOURCES = XSParagraph.c mylib/xspara.c mylib/xspara.h \
-                        mylib/text.c mylib/text.h ppport.h
+XSParagraph_la_SOURCES = XSParagraph.c xspara.c xspara.h \
+                        text.c text.h ppport.h
 
 BUILT_SOURCES = XSParagraph.c
 

Modified: trunk/tp/Texinfo/Convert/XSParagraph/XSParagraph.xs
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/XSParagraph.xs 2015-07-22 13:01:05 UTC 
(rev 6448)
+++ trunk/tp/Texinfo/Convert/XSParagraph/XSParagraph.xs 2015-07-22 13:09:59 UTC 
(rev 6449)
@@ -8,7 +8,7 @@
 
 #include "ppport.h"
 
-#include "mylib/xspara.h"
+#include "xspara.h"
 
 
 MODULE = XSParagraph   PACKAGE = XSParagraph     PREFIX = xspara_      

Copied: trunk/tp/Texinfo/Convert/XSParagraph/text.c (from rev 6423, 
trunk/tp/Texinfo/Convert/XSParagraph/mylib/text.c)
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/text.c                         (rev 0)
+++ trunk/tp/Texinfo/Convert/XSParagraph/text.c 2015-07-22 13:09:59 UTC (rev 
6449)
@@ -0,0 +1,62 @@
+/* Copyright 2014, 2015 */
+
+#define _GNU_SOURCE
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "text.h"
+
+/* Make sure there are LEN free bytes. */
+static void
+text_alloc (TEXT *t, size_t len)
+{
+  if (t->end + len > t->space)
+    {
+      /* FIXME: Double it instead? */
+      t->space = t->end + len;
+      if (t->space < 10)
+        t->space = 10;
+      t->text = realloc (t->text, t->space);
+      if (!t->text)
+        abort ();
+    }
+}
+
+void
+text_printf (TEXT *t, char *format, ...)
+{
+  va_list v;
+  char *s;
+
+  va_start (v, format);
+  vasprintf (&s, format, v);
+  text_append (t, s);
+  free (s);
+  va_end (v);
+}
+
+void
+text_append_n (TEXT *t, char *s, size_t len)
+{
+  text_alloc (t, len + 1);
+  memcpy (t->text + t->end, s, len);
+  t->end += len;
+  t->text[t->end] = '\0';
+}
+
+void
+text_append (TEXT *t, char *s)
+{
+  size_t len = strlen (s);
+  text_append_n (t, s, len);
+}
+
+void
+text_init (TEXT *t)
+{
+  t->end = t->space = 0;
+  t->text = 0;
+}

Copied: trunk/tp/Texinfo/Convert/XSParagraph/text.h (from rev 6391, 
trunk/tp/Texinfo/Convert/XSParagraph/mylib/text.h)
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/text.h                         (rev 0)
+++ trunk/tp/Texinfo/Convert/XSParagraph/text.h 2015-07-22 13:09:59 UTC (rev 
6449)
@@ -0,0 +1,14 @@
+/* Copyright 2014, 2015 */
+
+typedef struct TEXT {
+    char *text;
+    size_t space;
+    size_t end;
+} TEXT;
+
+void text_init (TEXT *t);
+void text_append (TEXT *t, char *s);
+void text_append_n (TEXT *t, char *s, size_t len);
+void text_printf (TEXT *t, char *format, ...);
+
+#define text_base(t) ((t)->space ? (t)->text : (char *) 0)

Copied: trunk/tp/Texinfo/Convert/XSParagraph/xspara.c (from rev 6448, 
trunk/tp/Texinfo/Convert/XSParagraph/mylib/xspara.c)
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/xspara.c                               
(rev 0)
+++ trunk/tp/Texinfo/Convert/XSParagraph/xspara.c       2015-07-22 13:09:59 UTC 
(rev 6449)
@@ -0,0 +1,1043 @@
+/* Copyright 2010, 2011, 2012, 2014, 2015 Free Software Foundation, Inc. */
+
+#define _GNU_SOURCE
+
+#include <config.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <locale.h>
+#ifndef _WIN32
+#include <langinfo.h>
+#endif
+#include <wchar.h>
+#include <wctype.h>
+
+/* See "How do I use all this in extensions in 'man perlguts'. */
+#define PERL_NO_GET_CONTEXT
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include "ppport.h"
+
+#include "xspara.h"
+
+#include "text.h"
+
+typedef struct {
+    TEXT space; /* Pending space, to be output before the pending word. */
+    TEXT word; /* Pending word.  If outputting this would have led to
+                  the line to be too long, the line should have been cut before
+                  saving it. */
+
+    /* When word.end == 0, this indicates a word of length 0. */
+    int invisible_pending_word;
+
+    /* Length of space in multibyte characters. */
+    int space_counter;
+
+    /* Characters added so far in current word. */
+    int word_counter; 
+
+    /* -2 means we are not at the end of a sentence (undefined in Perl),
+       1 means we are at the end of a sentence and French spacing is off,
+       -1 means we are at the end of a sentence and French spacing is on.
+       0 means it is "inhibited". */
+    int end_sentence;
+
+    int max; /* Maximum length of line. */
+    int indent_length; /* Columns to indent this line. */
+    int indent_length_next; /* Columns to indent the rest of the lines. */
+    int counter; /* Columns so far on this line. */
+
+    int lines_counter; /* Lines so far added in paragraph. */
+    int end_line_count; /* Number of newlines so far in an output unit, i.e.
+                           with add_text or add_next. */
+
+    wint_t last_letter; /* Last letter in word, used to decide if we're
+                            at the end of a sentence. */
+
+    /* Options set with set_space_protection. */
+    int protect_spaces; /* Line break forbidden, as in @w. */
+    int ignore_columns; /* Don't cut line at right margin.  Used by
+                           @flushleft and @flushright. */
+    int keep_end_lines; /* A newline in the input ends a line in the output.
+                           Used by @flushleft and @flushright. */
+    int french_spacing; /* Only one space, not two, after a full stop. */
+} PARAGRAPH;
+
+static PARAGRAPH state;
+
+#ifdef _WIN32
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <errno.h>
+
+char *
+w32_setlocale (int category, const char *value)
+{
+  if (_stricmp (value, "en_us.utf-8") != 0)
+    return NULL;
+
+  /* Switch to the Windows U.S. English locale with its default
+     codeset.  We will handle the non-ASCII text ourselves, so the
+     codeset is unimportant, and Windows doesn't support UTF-8 as the
+     codeset anyway.  */
+  return setlocale (category, "ENU");
+}
+#define setlocale(c,v)  w32_setlocale(c,v)
+
+size_t
+mbrlen (const char * __restrict__ mbs, size_t n, mbstate_t * __restrict__ ps)
+{
+  unsigned char byte1 = *mbs;
+
+  if (ps != NULL)
+    {
+      errno = ENOSYS;
+      return -1;
+    }
+
+  return
+    ((byte1 & 0x80) == 0) ? 1 : ((byte1 & 0x20) == 0) ? 2 :
+    ((byte1 & 0x10) == 0) ? 3 : 4;
+}
+
+/* Convert a UTF-8 encoded multibyte string to a wide character.  */
+size_t
+mbrtowc (wchar_t * __restrict__ pwc, const char * __restrict__ mbs, size_t n,
+        mbstate_t * __restrict__ ps)
+{
+  if (mbs == NULL)
+    return 0;
+  else
+    {
+      wchar_t wc[2];
+      size_t n_utf16 = MultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS,
+                                           mbs, n, wc, 2);
+      if (n_utf16 == 0)
+       {
+         errno = EILSEQ;
+         return (size_t)-1;
+       }
+      if (ps != NULL)
+       {
+         errno = ENOSYS;
+         return (size_t)-1;
+       }
+      /* We don't support UTF-16 surrogates, because the calling code
+        doesn't, and because character classification functions on
+        Windows don't support anything beyond the BMP anyway.  So we
+        return the first character of the surrogate pair and set
+        errno.  */
+      if (n_utf16 > 1)
+       errno = ENOSYS;
+      if (pwc != NULL)
+       *pwc = wc[0];
+
+      return mbrlen (mbs, n, ps);
+    }
+}
+
+int
+iswspace (wint_t wc)
+{
+  /* See Unicode's Proplist.txt.  */
+  if ((wc >= 0x09 && wc <= 0x0D)
+      || wc == 0x20
+      || wc == 0x85
+      || wc == 0xA0
+      || wc == 0x1680
+      || (wc >= 0x2000 && wc <= 0x200A)
+      || wc == 0x2028
+      || wc == 0x2029
+      || wc == 0x202F
+      || wc == 0x205F
+      || wc == 0x3000)
+    return 1;
+
+  return 0;
+}
+
+/* FIXME: Provide a real implementation.  */
+int
+wcwidth (const wchar_t wc)
+{
+  return wc == 0 ? 0 : 1;
+}
+
+int
+iswupper (wint_t wc)
+{
+  WORD char_type;
+  BOOL status = GetStringTypeW (CT_CTYPE1, &wc, 1, &char_type);
+
+  if (!status || (char_type & C1_UPPER) == 0)
+    return 0;
+
+  return 1;
+}
+
+#endif
+
+void
+xspara_hello (void)
+{
+  char *utf8_locale = 0;
+  int len;
+  char *cur;
+  char *dot;
+
+  dTHX;
+
+  //puts ("initializing XSParagraph");
+  if (setlocale (LC_CTYPE, "en_US.UTF-8")
+      || setlocale (LC_CTYPE, "en_US.utf8"))
+    goto success;
+
+  cur = setlocale (LC_CTYPE, 0); /* Name of current locale. */
+  if (!cur)
+    goto failure;
+  len = strlen (cur);
+  if (len >= 6 && !memcmp (".UTF-8", cur + len - 6, 6)
+      || len >= 5 && !memcmp (".utf8", cur + len - 5, 5)
+      || len >= 6 && !memcmp (".utf-8", cur + len - 6, 6)
+      || len >= 5 && !memcmp (".UTF8", cur + len - 5, 5))
+    {
+      setlocale (LC_CTYPE, ""); /* Use the locale from the environment. */
+      goto success;
+    }
+
+  /* Otherwise try altering the current locale name. */
+  dot = strchr (cur, '.');
+  if (!dot)
+    dot = cur + len;
+  utf8_locale = malloc (len + 6 + 1); /* enough to add ".UTF-8" to end */
+  memcpy (utf8_locale, cur, dot - cur);
+  dot = utf8_locale + (dot - cur);
+  memcpy (dot, ".UTF-8", 7);
+  if (setlocale (LC_CTYPE, utf8_locale))
+    goto success;
+
+  memcpy (dot, ".utf8", 6);
+  if (setlocale (LC_CTYPE, utf8_locale))
+    goto success;
+      
+  if (1)
+    {
+failure:
+      fprintf (stderr, "Couldn't set UTF-8 character type in locale.\n");
+      abort ();
+    }
+  else
+    {
+success: ;
+      free (utf8_locale);
+      /*
+      fprintf (stderr, "tried to set LC_CTYPE to UTF-8.\n");
+      fprintf (stderr, "character encoding is: %s\n",
+               nl_langinfo (CODESET));
+       */
+    }
+}
+
+int
+xspara_new (HV *conf)
+{
+  dTHX; /* Perl boiler plate */
+
+  /* Avoid leaking the memory used last time. */
+  free (state.space.text);
+  free (state.word.text);
+
+  //fprintf (stderr, "PARAGRAPH\n");
+
+  /* Default values for formatter. */
+  memset (&state, 0, sizeof (state));
+  state.max = 72;
+  state.indent_length_next = -1; /* Special value meaning undefined. */
+  state.end_sentence = -2; /* Special value meaning undefined. */
+  state.last_letter = L'\0';
+
+  if (conf)
+    xspara_set_state (conf);
+
+  /* This could be a paragraph ID. */
+  return 0;
+}
+
+
+/* Set the state internal to this C module from the Perl hash. */
+void
+xspara_set_state (HV *hash)
+{
+#define FETCH(key) hv_fetch (hash, key, strlen (key), 0)
+#define FETCH_INT(key,where) { val = FETCH(key); \
+                               if (val) { where = SvIV (*val); } }
+
+  SV **val;
+  
+  dTHX; /* This is boilerplate for interacting with Perl. */
+
+  /* None of this is really needed, under the big assumption that
+     we only have one "paragraph" object going at once. */
+
+  /* Alternatively, let the "paragraph" object be an integer giving
+     an index into an array of PARAGRAPH objects. */
+
+  /* We could have multiple paragraphs going at once for a footnote. */
+
+  /* Fetch all these so they are set, and reset for each paragraph. */
+  FETCH_INT("end_sentence", state.end_sentence);
+  FETCH_INT("max", state.max);
+
+  FETCH_INT("indent_length", state.indent_length);
+  FETCH_INT("indent_length_next", state.indent_length_next);
+  FETCH_INT("counter", state.counter); 
+
+  FETCH_INT("word_counter", state.word_counter);
+
+  FETCH_INT("lines_counter", state.lines_counter);
+  FETCH_INT("end_line_count", state.end_line_count);
+
+  FETCH_INT("protect_spaces", state.protect_spaces);
+  FETCH_INT("ignore_columns", state.ignore_columns);
+  FETCH_INT("keep_end_lines", state.keep_end_lines);
+  FETCH_INT("frenchspacing", state.french_spacing);
+
+  val = FETCH("word");
+  if (val)
+    {
+      fprintf (stderr, "Bug: setting 'word' is not supported.\n");
+      abort ();
+    }
+  val = FETCH("space");
+  if (val)
+    {
+      fprintf (stderr, "Bug: setting 'space' is not supported.\n");
+      abort ();
+    }
+  return;
+#if  0
+
+  val = FETCH("word");
+  if (!val)
+    {
+      report ("word is null");
+      abort ();
+    }
+
+  //free (state.word);
+  //state.word = SvPV (*val, state.word_len);
+  /* FIXME: strdup etc. */
+
+  return;
+
+  val = FETCH("space");
+  state.space.text = SvPV (*val, state.space.end);
+
+#endif
+
+
+#undef FETCH
+#undef FETCH_INT
+}
+
+/* Move the state back into the Perl hash. */
+void
+xspara_get_state (HV *hash)
+{
+  /* TODO: The last argument of hv_store would be a precomputed hash, which
+     would save the time of calculating it. */
+#define STORE(key) hv_store (hash, key, strlen (key), val, 0)
+
+  SV *val;
+
+  /* Don't do anything. */
+  return;
+
+  dTHX; /* Perl boilerplate. */
+
+  val = newSViv (state.end_sentence);
+  STORE("end_sentence");
+
+  val = newSViv (state.counter);
+  STORE("counter");
+
+  val = newSViv (state.word_counter);
+  STORE("word_counter");
+
+  val = newSViv (state.lines_counter);
+  STORE("lines_counter");
+
+  return;
+
+
+#undef STORE
+}
+
+
+/************************************************************************/
+
+
+/* Append a newline character to RESULT. */
+void
+xspara__cut_line (TEXT *result)
+{
+  if (!state.ignore_columns)
+    {
+      xspara__end_line ();
+
+      /* Another option is to use Perl strings, and use
+         "sv_catpv" for this. */
+      text_append (result, "\n");
+    }
+}
+
+int
+xspara_end_line_count (void)
+{
+  return state.end_line_count;
+}
+
+/* End a line (throwing away a pending space, which we don't need)
+   Note _end_line in Paragraph.pm returned "\n". */
+void
+xspara__end_line (void)
+{
+  state.counter = 0;
+  state.space.end = 0;
+  state.space_counter = 0;
+
+  /* This will only be true for the first line of output. */
+  if (state.indent_length_next != -1)
+    {
+      state.indent_length = state.indent_length_next;
+      state.indent_length_next = -1;
+    }
+
+  state.lines_counter++;
+  state.end_line_count++;
+}
+
+char *
+xspara_end_line (void)
+{
+  state.end_line_count = 0;
+  xspara__end_line ();
+  return "\n";
+}
+
+/* Return concatenation of SPACE and WORD. */
+char *
+xspara_get_pending (void)
+{
+  TEXT t;
+  text_init (&t);
+  text_append_n (&t, state.space.text, state.space.end);
+  text_append_n (&t, state.word.text, state.word.end);
+  return t.text;
+}
+
+/* Append to RESULT pending space followed by pending word.  Assume we don't 
+   need to wrap a line.  Only add spaces without a word if ADD_SPACES. */
+void
+xspara__add_pending_word (TEXT *result, int add_spaces)
+{
+  if (state.word.end == 0 && !state.invisible_pending_word && !add_spaces)
+    return;
+
+  if (state.indent_length > state.counter)
+    {
+      int i;
+      /* If we are not up to the left margin yet, output spaces to get there, 
+         and ignore 'state.space', the pending space string.  In this case 
+         state.counter is probably 0.  */
+
+      //fprintf (stderr, "INDENT\n");
+      for (i = 0; i < state.indent_length - state.counter; i++)
+        text_append (result, " ");
+      state.counter = state.indent_length;
+    }
+  else if (state.space.end > 0)
+    {
+      //fprintf (stderr, "ADD_SPACES\n");
+      text_append_n (result, state.space.text, state.space.end);
+
+      state.counter += state.space_counter;
+      state.space.end = 0;
+      state.space_counter = 0;
+    }
+
+  if (state.word.end > 0 || state.invisible_pending_word)
+    {
+      //fprintf (stderr, "ADD_WORD\n");
+      text_append_n (result, state.word.text, state.word.end);
+      state.counter += state.word_counter;
+
+      state.word.end = 0;
+      state.word_counter = 0;
+      state.invisible_pending_word = 0;
+    }
+}
+
+/* Function for users of this module. */
+char *
+xspara_add_pending_word (int add_spaces)
+{
+  TEXT ret;
+
+  text_init (&ret);
+  state.end_line_count = 0;
+  xspara__add_pending_word (&ret, add_spaces);
+  if (ret.text)
+    return ret.text;
+  else
+    return "";
+}
+
+/* End a paragraph. */
+char *
+xspara_end (void)
+{
+  TEXT ret;
+  text_init (&ret);
+  state.end_line_count = 0;
+  xspara__add_pending_word (&ret, 0);
+  if (state.counter != 0)
+    {
+      text_append (&ret, "\n");
+      state.lines_counter++;
+      state.end_line_count++;
+    }
+  if (ret.text)
+    return ret.text;
+  else
+    return "";
+}
+
+/* Add WORD to paragraph in RESULT, not refilling WORD.  If we go past the end 
+   of the line start a new one. */
+// 184
+void
+xspara__add_next (TEXT *result,
+                  char *word, int word_len,
+                  char *space,
+                  int end_sentence,
+                  int transparent)
+{
+  //return word;
+
+  if (word) // 196
+    {
+      if (state.word.end == 0 && !state.invisible_pending_word)
+        {
+          /* Check if we are at the end of a sentence and if we need to
+             output two spaces after the full stop.  If so, check if the
+             word we are given begins with whitespace.  If it doesn't,
+             double the pending space.
+
+             We checked above if there was a pending word because if there
+             was, it is due to be output after the end-sentence whitespace,
+             not the string that was passed as an argument to this function.  
+           */
+
+          if (state.counter != 0 && state.space.end > 0
+              && state.end_sentence == 1 && !state.french_spacing)
+            {
+              wchar_t wc;
+              size_t char_len;
+
+              char_len = mbrtowc (&wc, word, 10, NULL);
+              if ((long) char_len > 0 && !iswspace (wc))
+                {
+                  /* Make the pending space up to two spaces. */
+                  while (state.space_counter < 2)
+                    {
+                      text_append_n (&state.space, " ", 1);
+                      state.space_counter++;
+                    }
+                }
+
+              state.end_sentence = -2;
+            }
+        }
+
+      text_append_n (&state.word, word, word_len); // 213
+
+      if (strchr (word, '\n')) // 215
+        {
+          /* If there was a newline in the word we just added, put the entire
+             pending ouput in the results string, and start a new line.
+             TODO: Does line_counter get incremented properly in this 
+             circumstance? */
+          /* TODO: Could we just call _add_pending_word here? */
+          text_append_n (result, state.space.text, state.space.end);
+          state.space.end = 0;
+          state.space_counter = 0;
+          text_append_n (result, state.word.text, state.word.end);
+          state.word.end = 0;
+          state.word_counter = 0;
+          state.invisible_pending_word = 0;
+
+          xspara__end_line ();
+        }
+      else
+        {
+          /* The possibility of two-column characters is ignored here. */
+
+          /* Calculate length of multibyte string in characters. */
+          int len = 0;
+          int left = word_len;
+          wchar_t w;
+          char *p = word;
+
+          while (left > 0)
+            {
+              int char_len = mbrtowc (&w, p, 10, NULL);
+              left -= char_len;
+              p += char_len;
+              len++;
+            }
+
+          state.word_counter += len;
+        }
+
+      /* TODO: Shift this into the "else" clause above, because 
+         xspara__end_line would have set state.counter to 0. */
+      if (state.counter != 0
+          && state.counter + state.word_counter + state.space_counter
+              > state.max)
+        {
+          xspara__cut_line (result); // 244
+        }
+    }
+
+  if (space) // 247
+    {
+      if (state.protect_spaces)
+        ;
+        /* The Perl code uses a non-existent "_add_text" function.  Presumably
+           this part of the code is never reached. */
+      else
+        {
+          xspara__add_pending_word (result, 0);
+
+          state.space.end = 0;
+          state.space_counter = 0;
+          text_append (&state.space, space);
+          state.space_counter++;
+          if (state.counter + state.space_counter > state.max)
+            xspara__cut_line (result); // 255
+        }
+    }
+
+  if (end_sentence != -2)
+    {
+      state.end_sentence = end_sentence;
+    }
+
+}
+
+/* Like _add_next but zero end_line_count at beginning. */
+char *
+xspara_add_next (char *text, int text_len, int end_sentence)
+{
+  TEXT t;
+
+  text_init (&t);
+  state.end_line_count = 0;
+  //fprintf (stderr, "PASSED EOS %d\n", end_sentence);
+  xspara__add_next (&t, text, text_len, 0, end_sentence, 0);
+
+  if (t.space > 0)
+    return t.text;
+  else
+    return "";
+}
+
+void
+xspara_inhibit_end_sentence (void)
+{
+  state.end_sentence = 0;
+}
+
+void
+xspara_allow_end_sentence (void)
+{
+  state.last_letter = L'a'; /* A lower-case letter. */
+}
+
+/* -1 in a parameter means leave that value as it is. */
+char *
+xspara_set_space_protection (int protect_spaces,
+                             int ignore_columns,
+                             int keep_end_lines,
+                             int french_spacing)
+{
+  if (protect_spaces != -1)
+    state.protect_spaces = protect_spaces;
+  if (ignore_columns != -1)
+    state.ignore_columns = ignore_columns;
+  if (keep_end_lines != -1)
+    state.keep_end_lines = keep_end_lines;
+
+  /*fprintf (stderr, "SETTING SPACE (%d, %d, %d, %d)\n",
+                                   protect_spaces,
+                                   ignore_columns,
+                                   keep_end_lines,
+                                   french_spacing);*/
+
+  /* If at the end of a sentence, and due to output the end of sentence
+     space, and we switch to French spacing, then make the space up to
+     two spaces.
+
+     FIXME: This seems back-to-front: We want two spaces if we switch FROM
+     French spacing. */
+
+  if (state.french_spacing == 0
+      && french_spacing != -1 && french_spacing != 0
+      && state.end_sentence != -2 && state.end_sentence != 0
+      && state.counter != 0
+      && state.space.end > 0
+      && state.word.end == 0 && !state.invisible_pending_word)
+    {
+      //fprintf (stderr, "SWITCH TO FRENCH SPACING\n");
+      while (state.space_counter < 2)
+        {
+          text_append_n (&state.space, " ", 1);
+          state.space_counter++;
+        }
+
+      /* End of sentence done. */
+      state.end_sentence = -2;
+    }
+
+  if (french_spacing != -1)
+    {
+      //fprintf (stderr, "setting french sp\n");
+      state.french_spacing = french_spacing;
+    }
+
+ if (protect_spaces != -1 && state.protect_spaces)
+   {
+     if (state.word.end == 0)
+       {
+         //fprintf (stderr, "ADD INVISIBLE WORD\n");
+         /* In _add_pending_word this meant that an "empty word" would
+            be output.  This makes "a @w{} b" -> "a  b", not "a b", and
+            "a @w{}" at end of paragraph -> "a ", not "a". */
+
+         state.invisible_pending_word = 1;
+       }
+   }
+
+ return ""; /* TODO: Check if we can remove this. */
+}
+
+/*****************************************************************/
+
+
+/* Return string to be added to paragraph contents, wrapping text. This 
+   function relies on there being a UTF-8 locale in LC_CTYPE for mbrtowc to
+   work correctly. */
+char *
+xspara_add_text (char *text)
+{
+  char *p = text;
+  wchar_t wc;
+  size_t char_len;
+  TEXT result;
+  dTHX;
+
+  text_init (&result);
+
+  state.end_line_count = 0;
+
+  while (*p)
+    {
+      char_len = mbrtowc (&wc, p, 10, NULL);
+      if ((long) char_len == 0)
+        break; /* Null character. Shouldn't happen. */
+      else if ((long) char_len < 0)
+        {
+          p++; /* Invalid.  Just try to keep going. */
+          continue;
+        }
+
+      if (iswspace (wc))
+        {
+          state.last_letter = L'\0';
+
+          /* If protect_spaces is on, ... */
+          if (state.protect_spaces) // 338
+            {
+              /* Append the spaces to the pending word. */
+              text_append_n (&state.word, p, char_len);
+              state.word_counter++;
+
+              if (strchr (state.word.text, '\n'))
+                {
+                  /* Replace any '\n' with a ' '. Note that state.word_counter 
+                     will still be correct after this. */
+                  char *ptr = state.word.text;
+                  while (*ptr)
+                    {
+                      if (*ptr == '\n')
+                        *ptr = ' ';
+                      ptr++;
+                    }
+                }
+
+              if (state.counter != 0
+                  && state.counter + state.word_counter + state.space_counter
+                     > state.max)
+                {
+                  xspara__cut_line (&result);
+                }
+            }
+          else // 368 /* protect_spaces off */
+            {
+              xspara__add_pending_word (&result, 0);
+
+              if (state.counter != 0)
+                {
+                  /* If we are at the end of a sentence where two spaces
+                     are required. */
+                  if (state.end_sentence == 1
+                      && !state.french_spacing) // 371
+                    {
+                      char *q = p + char_len;
+                      wchar_t q_char;
+                      size_t q_len;
+                      int at_least_two = 0;
+
+                      //fprintf (stderr, "DOUBLING SPACE\n");
+
+                      /* Check if the next character is whitespace as well. */
+                      q_len = mbrtowc (&q_char, q, 10, NULL);
+                      if ((long) q_len > 0)
+                        {
+                          if (iswspace (q_char))
+                            at_least_two = 1;
+                        }
+
+                      /* If we have an existing pending space, or if we have
+                         at least two whitespace characters in a row, set the
+                         pending space to be two whitespace characters.
+
+                         I don't know why we do it this way. */
+                      if (state.space_counter >= 1 || at_least_two)
+                        {
+                          if (state.space_counter > 0)
+                            {
+                              /* Truncuate to at most 2 spaces, and replace 
any 
+                                 '\n' or '\r' characters with ' '. */
+
+                              TEXT new_space;
+                              char *pspace;
+                              int len;
+                              int i;
+
+                              text_init (&new_space);
+                              pspace = state.space.text;
+                              state.space_counter = 0;
+
+                              for (i = 0; i < 2; i++)
+                                {
+                                  if (!*pspace)
+                                    break;
+                                  len = mbrlen (pspace, 10, NULL);
+
+                                  /* Subtitute newlines in the pending space
+                                     with spaces. */
+                                  if (*pspace == '\n' || *pspace == '\r')
+                                    text_append_n (&new_space, " ", 1);
+                                  else
+                                    text_append_n (&new_space, pspace, len);
+                                  state.space_counter++;
+
+                                  pspace += len;
+                                }
+
+                              state.space.end = 0;
+                              text_append_n (&state.space,
+                                             new_space.text, new_space.end);
+                              free (new_space.text);
+                            }
+
+                          /* Now get characters from the input. */
+                          while (state.space_counter < 2)
+                            {
+                              if (*p == '\n' || *p == '\r')
+                                text_append_n (&state.space, " ", 1);
+                              else
+                                text_append_n (&state.space, p, char_len);
+                              state.space_counter++;
+
+                              p += char_len;
+                              char_len = mbrtowc (&wc, p, 10, NULL);
+                              if ((long) char_len <= 0 || !iswspace (wc))
+                                break;
+                            }
+
+                          /* Skip any more following whitespace. */
+                          while ((long) char_len > 0 && iswspace (wc))
+                            {
+                              p += char_len;
+                              char_len = mbrtowc (&wc, p, 10, NULL);
+                            }
+
+                          /* Make it up to two characters. */
+                          while (state.space_counter < 2)
+                            {
+                              text_append_n (&state.space, " ", 1);
+                              state.space_counter++;
+                            }
+
+                          /* Reset the end_sentence flag. */
+                          state.end_sentence = -2; // 381
+                          continue;
+                        }
+                      else
+                        {
+                          /* Otherwise, an extra space is added
+                             in _add_next. */
+                          state.space.end = 0;
+                          state.space_counter = 0;
+                          if (*p == '\n' || *p == '\r')
+                            text_append_n (&state.space, " ", 1);
+                          else
+                            text_append_n (&state.space, p, char_len);
+                          state.space_counter++;
+                        }
+
+                    }
+                  else /* Not at end of sentence. */ // 388
+                    {
+                      //fprintf (stderr, "NOT END S\n");
+                      /* Only save the first space. */
+                      if (state.space_counter < 1)
+                        {
+                          if (*p == '\n' || *p == '\r')
+                            text_append_n (&state.space, " ", 1);
+                          else
+                            text_append_n (&state.space, p, char_len); // 391
+                          state.space_counter++;
+                        }
+                    }
+                }
+            } // 394
+
+          /* If not enough space in the line for the pending space, start
+             a new line. */
+          if (state.counter + state.space_counter > state.max)
+            {
+              xspara__cut_line (&result); // 400
+            }
+
+          if (*p == '\n' && state.keep_end_lines)
+            {
+              xspara__end_line ();
+              text_append (&result, "\n");
+            }
+        }
+      else /************** Not a white space character. *****************/
+        {
+          int width = wcwidth (wc);
+          /*************** Double width character. *********************/
+          if (width == 2) // 406
+            {
+
+              state.last_letter = L'\0';
+
+              /* It appears we allow a line break in between Chinese 
characters 
+                 even if there was no space between them, unlike single-width 
+                 characters. */
+
+              /* Append wc to state.word. */
+              text_append_n (&state.word, p, char_len);
+
+              state.word_counter += 2;
+
+              if (state.counter != 0
+                  && state.counter + state.word_counter > state.max)
+                {
+                  xspara__cut_line (&result);
+                }
+              xspara__add_pending_word (&result, 0);
+              state.end_sentence = -2;
+            }
+          /*************** Word character ******************************/
+          else if (width == 1) // 427
+            {
+              char *added_word;
+              added_word = malloc (char_len + 1);
+              memcpy (added_word, p, char_len);
+              added_word[char_len] = '\0';
+
+              xspara__add_next (&result, added_word, char_len, NULL, -2, 0);
+              free (added_word);
+
+              /* Now check if it is considered as an end of sentence, and
+                 set state.end_sentence if it is. */
+
+              if (strchr (".?!", *p))
+                {
+                  /* Doesn't count if preceded by an upper-case letter. */
+                  if (!iswupper (state.last_letter))
+                    {
+                      //fprintf (stderr, "END_SENTENCE (%d)\n",
+                               //state.french_spacing);
+
+                      if (state.french_spacing)
+                        state.end_sentence = -1;
+                      else
+                        state.end_sentence = 1;
+                    }
+                }
+              else if (strchr ("\"')]", *p))
+                {
+                  /* '"', '\'', ']' and ')' are ignored for the purpose
+                   of deciding whether a full stop ends a sentence. */
+                }
+              else
+                {
+                  /* Otherwise reset the end of sentence marker: a full stop 
in 
+                     a string like "aaaa.bbbb" doesn't mark an end of 
+                     sentence. */
+                  state.end_sentence = -2;
+                  state.last_letter = wc;
+                }
+            }
+          else if (wc == L'\b')
+            {
+              /* Code to say that a following full stop (or question or
+                 exclamation mark) may be an end of sentence. */
+              xspara_allow_end_sentence ();
+            }
+          else
+            {
+              /* Not printable, possibly a tab, or a combining character.
+                 Add it to the pending word without increasing the column 
+                 count. */
+              text_append_n (&state.word, p, char_len);
+            }
+        }
+
+      p += char_len;
+    }
+
+  if (result.space > 0)
+    return result.text;
+  else
+    return "";
+  /* TODO: I still don't know if Perl's PV's need unique storage, on the
+     heap or w/e. */
+
+}
+
+

Copied: trunk/tp/Texinfo/Convert/XSParagraph/xspara.h (from rev 6391, 
trunk/tp/Texinfo/Convert/XSParagraph/mylib/xspara.h)
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/xspara.h                               
(rev 0)
+++ trunk/tp/Texinfo/Convert/XSParagraph/xspara.h       2015-07-22 13:09:59 UTC 
(rev 6449)
@@ -0,0 +1,13 @@
+int xspara_new (HV *conf);
+void xspara_set_state (HV *state);
+void xspara_get_state (HV *state);
+void xspara_hello (void);
+char *xspara_add_next (char *, int, int end_sentence);
+char *xspara_add_text (char *);
+char *xspara_set_space_protection (int space_protection, int ignore_columns,
+                             int keep_end_lines, int french_spacing);
+void xspara__end_line (void);
+char *xspara_end_line (void);
+char *xspara_get_pending (void);
+char *xspara_end (void);
+char *xspara_add_pending_word (int add_spaces);
[Prev in Thread]
Current Thread
[Next in Thread]
[6449] move XSParagraph C sources up a level, Gavin D. Smith <=
Prev by Date: [6448] end of sentence in @w
Next by Date: [6450] move XSParagraph.pm up a level
Previous by thread: [6448] end of sentence in @w
Next by thread: [6450] move XSParagraph.pm up a level
Index(es):
- Date
- Thread