bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH]: Parse ISO 8601 extended date and time of day format


From: J.T. Conklin
Subject: [PATCH]: Parse ISO 8601 extended date and time of day format
Date: Wed, 17 Aug 2011 17:13:03 -0700
User-agent: Gnus/5.1006 (Gnus v5.10.6) XEmacs/21.4 (Jumbo Shrimp, berkeley-unix)

Quite a long time ago, I posted a proof of concept change to
parse-datetime.y to enable parsing of ISO 8601 "extended date and time
of day expressions" using a 'T' separator character.

While waiting for my copyright assignment paperwork to clear, I found
and fixed some corner cases related to the 'T' characters use as both
a military zone token and the date/time separator; added some test
cases; and tweaked the documentation.  

Not content to leave well enough alone, I started adding support for
"basic date and time of day expressions", got stuck in grammar
conflict hell, and ran out of spare time.

This change rolls things back to support extended format, the 'T'
corner case fixes, test cases, and doc changes.

Many thanks to Jim Meyering who's been waiting very patiently and 
gently nudging me to complete this patch.

    --jtc


>From 2c5a1222e6f1059c6f817c994d5c8fbb612f53bc Mon Sep 17 00:00:00 2001
From: J.T. Conklin <address@hidden>
Date: Wed, 17 Aug 2011 16:40:49 -0700
Subject: [PATCH] Parse ISO 8601 extended date and time of day format

* doc/parse-datetime.texi (General date syntax): replace use of
deprecated --iso-8601 option with --rfc-3339 in example of date
command output formats that can be parsed.

* tests/test-parse-datetime.c (tm_diff): New function, taken from
lib/parse-datetime.y.
(gmt_offset): New function.
(main): Add additional test cases to validate ISO8601 extended
date and time of day format parsing.

* lib/parse-datetime.y: Parse ISO 8601 extended date and time
of day representation using the 'T' separator character.
---
 ChangeLog                   |   15 ++++
 doc/parse-datetime.texi     |    4 +-
 lib/parse-datetime.y        |   95 ++++++++++++++++++--------
 tests/test-parse-datetime.c |  154 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 236 insertions(+), 32 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index aab48d7..9d3ebbf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2011-08-17  J.T. Conklin  <address@hidden>
+
+       * doc/parse-datetime.texi (General date syntax): replace use of
+       deprecated --iso-8601 option with --rfc-3339 in example of date
+       command output formats that can be parsed.
+
+       * tests/test-parse-datetime.c (tm_diff): New function, taken from
+       lib/parse-datetime.y.
+       (gmt_offset): New function.
+       (main): Add additional test cases to validate ISO8601 extended
+       date and time of day parsing.
+
+       * lib/parse-datetime.y: Parse ISO 8601 extended date and time
+       of day representation using the 'T' separator character.
+
 2011-08-15  Ben Pfaff  <address@hidden>
 
        relocatable-prog: fix link error
diff --git a/doc/parse-datetime.texi b/doc/parse-datetime.texi
index 2f1ab34..b159f96 100644
--- a/doc/parse-datetime.texi
+++ b/doc/parse-datetime.texi
@@ -126,8 +126,8 @@ $ LC_ALL=C TZ=UTC0 date
 Mon Mar  1 00:21:42 UTC 2004
 $ TZ=UTC0 date +'%Y-%m-%d %H:%M:%SZ'
 2004-03-01 00:21:42Z
-$ date --iso-8601=ns | tr T ' '  # --iso-8601 is a GNU extension.
-2004-02-29 16:21:42,692722128-0800
+$ date --rfc-3339=ns  # --rfc-3339 is a GNU extension.
+2004-02-29 16:21:42.692722128-08:00
 $ date --rfc-2822  # a GNU extension
 Sun, 29 Feb 2004 16:21:42 -0800
 $ date +'%Y-%m-%d %H:%M:%S %z'  # %z is a GNU extension.
diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
index 23a9a41..027f797 100644
--- a/lib/parse-datetime.y
+++ b/lib/parse-datetime.y
@@ -285,8 +285,8 @@ set_hhmmss (parser_control *pc, long int hour, long int 
minutes,
 %parse-param { parser_control *pc }
 %lex-param { parser_control *pc }
 
-/* This grammar has 20 shift/reduce conflicts. */
-%expect 20
+/* This grammar has 31 shift/reduce conflicts. */
+%expect 31
 
 %union
 {
@@ -307,7 +307,7 @@ set_hhmmss (parser_control *pc, long int hour, long int 
minutes,
 %token <textintval> tSNUMBER tUNUMBER
 %token <timespec> tSDECIMAL_NUMBER tUDECIMAL_NUMBER
 
-%type <intval> o_colon_minutes o_merid
+%type <intval> o_colon_minutes
 %type <timespec> seconds signed_seconds unsigned_seconds
 
 %type <rel> relunit relunit_snumber dayshift
@@ -333,7 +333,9 @@ items:
   ;
 
 item:
-    time
+    datetime
+      { pc->times_seen++; pc->dates_seen++; }
+  | time
       { pc->times_seen++; }
   | local_zone
       { pc->local_zones_seen++; }
@@ -348,35 +350,61 @@ item:
   | hybrid
   ;
 
+datetime:
+    iso_8601_datetime
+  ;
+
+iso_8601_datetime:
+    iso_8601_date 'T' iso_8601_time
+  ;
+
 time:
     tUNUMBER tMERIDIAN
       {
         set_hhmmss (pc, $1.value, 0, 0, 0);
         pc->meridian = $2;
       }
-  | tUNUMBER ':' tUNUMBER o_merid
+  | tUNUMBER ':' tUNUMBER tMERIDIAN
       {
         set_hhmmss (pc, $1.value, $3.value, 0, 0);
         pc->meridian = $4;
       }
-  | tUNUMBER ':' tUNUMBER tSNUMBER o_colon_minutes
+  | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tMERIDIAN
+      {
+        set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
+        pc->meridian = $6;
+      }
+  | iso_8601_time
+  ;
+
+iso_8601_time:
+    tUNUMBER zone_offset
+      {
+        set_hhmmss (pc, $1.value, 0, 0, 0);
+       pc->meridian = MER24;
+      }
+  | tUNUMBER ':' tUNUMBER o_zone_offset
       {
         set_hhmmss (pc, $1.value, $3.value, 0, 0);
         pc->meridian = MER24;
-        pc->zones_seen++;
-        pc->time_zone = time_zone_hhmm (pc, $4, $5);
       }
-  | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_merid
+  | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_zone_offset
       {
         set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
-        pc->meridian = $6;
+        pc->meridian = MER24;
       }
-  | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tSNUMBER o_colon_minutes
+  ;
+
+o_zone_offset:
+  /* empty */
+  | zone_offset
+  ;
+
+zone_offset:
+    tSNUMBER o_colon_minutes
       {
-        set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
-        pc->meridian = MER24;
         pc->zones_seen++;
-        pc->time_zone = time_zone_hhmm (pc, $6, $7);
+        pc->time_zone = time_zone_hhmm (pc, $1, $2);
       }
   ;
 
@@ -393,12 +421,19 @@ local_zone:
       }
   ;
 
+/* Note 'T' is a special case, as it is used as the separator in ISO
+   8601 date and time of day representation. */
 zone:
     tZONE
       { pc->time_zone = $1; }
+  | 'T'
+      { pc->time_zone = HOUR(7); }
   | tZONE relunit_snumber
       { pc->time_zone = $1;
         apply_relative_time (pc, $2, 1); }
+  | 'T' relunit_snumber
+      { pc->time_zone = HOUR(7);
+        apply_relative_time (pc, $2, 1); }
   | tZONE tSNUMBER o_colon_minutes
       { pc->time_zone = $1 + time_zone_hhmm (pc, $2, $3); }
   | tDAYZONE
@@ -456,13 +491,6 @@ date:
             pc->year = $5;
           }
       }
-  | tUNUMBER tSNUMBER tSNUMBER
-      {
-        /* ISO 8601 format.  YYYY-MM-DD.  */
-        pc->year = $1;
-        pc->month = -$2.value;
-        pc->day = -$3.value;
-      }
   | tUNUMBER tMONTH tSNUMBER
       {
         /* e.g. 17-JUN-1992.  */
@@ -501,6 +529,17 @@ date:
         pc->month = $2;
         pc->year = $3;
       }
+  | iso_8601_date
+  ;
+
+iso_8601_date:
+    tUNUMBER tSNUMBER tSNUMBER
+      {
+        /* ISO 8601 format.  YYYY-MM-DD.  */
+        pc->year = $1;
+        pc->month = -$2.value;
+        pc->day = -$3.value;
+      }
   ;
 
 rel:
@@ -612,13 +651,6 @@ o_colon_minutes:
       { $$ = $2.value; }
   ;
 
-o_merid:
-    /* empty */
-      { $$ = MER24; }
-  | tMERIDIAN
-      { $$ = $1; }
-  ;
-
 %%
 
 static table const meridian_table[] =
@@ -773,7 +805,10 @@ static table const time_zone_table[] =
   { NULL, 0, 0 }
 };
 
-/* Military time zone table. */
+/* Military time zone table.
+
+   Note 'T' is a special case, as it is used as the separator in ISO
+   8601 date and time of day representation. */
 static table const military_table[] =
 {
   { "A", tZONE, -HOUR ( 1) },
@@ -794,7 +829,7 @@ static table const military_table[] =
   { "Q", tZONE,  HOUR ( 4) },
   { "R", tZONE,  HOUR ( 5) },
   { "S", tZONE,  HOUR ( 6) },
-  { "T", tZONE,  HOUR ( 7) },
+  { "T", 'T',    0 },
   { "U", tZONE,  HOUR ( 8) },
   { "V", tZONE,  HOUR ( 9) },
   { "W", tZONE,  HOUR (10) },
diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c
index 45dbae6..4cb85d5 100644
--- a/tests/test-parse-datetime.c
+++ b/tests/test-parse-datetime.c
@@ -48,17 +48,171 @@ static const char* const day_table[] =
   NULL
 };
 
+
+#if ! HAVE_TM_GMTOFF
+/* Shift A right by B bits portably, by dividing A by 2**B and
+   truncating towards minus infinity.  A and B should be free of side
+   effects, and B should be in the range 0 <= B <= INT_BITS - 2, where
+   INT_BITS is the number of useful bits in an int.  GNU code can
+   assume that INT_BITS is at least 32.
+
+   ISO C99 says that A >> B is implementation-defined if A < 0.  Some
+   implementations (e.g., UNICOS 9.0 on a Cray Y-MP EL) don't shift
+   right in the usual way when A < 0, so SHR falls back on division if
+   ordinary A >> B doesn't seem to be the usual signed shift.  */
+#define SHR(a, b)       \
+  (-1 >> 1 == -1        \
+   ? (a) >> (b)         \
+   : (a) / (1 << (b)) - ((a) % (1 << (b)) < 0))
+
+#define TM_YEAR_BASE 1900
+
+/* Yield the difference between *A and *B,
+   measured in seconds, ignoring leap seconds.
+   The body of this function is taken directly from the GNU C Library;
+   see src/strftime.c.  */
+static long int
+tm_diff (struct tm const *a, struct tm const *b)
+{
+  /* Compute intervening leap days correctly even if year is negative.
+     Take care to avoid int overflow in leap day calculations.  */
+  int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3);
+  int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3);
+  int a100 = a4 / 25 - (a4 % 25 < 0);
+  int b100 = b4 / 25 - (b4 % 25 < 0);
+  int a400 = SHR (a100, 2);
+  int b400 = SHR (b100, 2);
+  int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);
+  long int ayear = a->tm_year;
+  long int years = ayear - b->tm_year;
+  long int days = (365 * years + intervening_leap_days
+                   + (a->tm_yday - b->tm_yday));
+  return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour))
+                + (a->tm_min - b->tm_min))
+          + (a->tm_sec - b->tm_sec));
+}
+#endif /* ! HAVE_TM_GMTOFF */
+
+long
+gmt_offset()
+{
+  time_t now;
+  long gmtoff;
+
+  time(&now);
+
+#if !HAVE_TM_GMTOFF
+  struct tm tm_local = *localtime(&now);
+  struct tm tm_gmt   = *gmtime(&now);
+
+  gmtoff = tm_diff(&tm_local, &tm_gmt);
+#else
+  gmtoff = localtime(&now)->tm_gmtoff;
+#endif
+
+  return gmtoff;
+}
+
 int
 main (int argc _GL_UNUSED, char **argv)
 {
   struct timespec result;
   struct timespec result2;
+  struct timespec expected;
   struct timespec now;
   const char *p;
   int i;
+  long gmtoff;
 
   set_program_name (argv[0]);
 
+  gmtoff = gmt_offset();
+
+
+  /* ISO 8601 extended date and time of day representation,
+     'T' separator, local time zone */
+  p = "2011-05-01T11:55:18";
+  expected.tv_sec = 1304250918 - gmtoff;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+  /* ISO 8601 extended date and time of day representation,
+     ' ' separator, local time zone */
+  p = "2011-05-01 11:55:18";
+  expected.tv_sec = 1304250918 - gmtoff;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601, extended date and time of day representation,
+     'T' separator, UTC */
+  p = "2011-05-01T11:55:18Z";
+  expected.tv_sec = 1304250918;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+  /* ISO 8601, extended date and time of day representation,
+     ' ' separator, UTC */
+  p = "2011-05-01 11:55:18Z";
+  expected.tv_sec = 1304250918;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 extended date and time of day representation,
+     'T' separator, w/UTC offset */
+  p = "2011-05-01T11:55:18-07:00";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+  /* ISO 8601 extended date and time of day representation,
+     ' ' separator, w/UTC offset */
+  p = "2011-05-01 11:55:18-07:00";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 extended date and time of day representation,
+     'T' separator, w/hour only UTC offset */
+  p = "2011-05-01T11:55:18-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+  /* ISO 8601 extended date and time of day representation,
+     ' ' separator, w/hour only UTC offset */
+  p = "2011-05-01 11:55:18-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
   now.tv_sec = 4711;
   now.tv_nsec = 1267;
   p = "now";
-- 
1.6.5.6



-- 
J.T. Conklin



reply via email to

[Prev in Thread] Current Thread [Next in Thread]