[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[pre-lexer 19/21] data-in: Rewrite logic for recoding input, and get rid
From: |
Ben Pfaff |
Subject: |
[pre-lexer 19/21] data-in: Rewrite logic for recoding input, and get rid of src_enc member. |
Date: |
Thu, 23 Sep 2010 21:20:55 -0700 |
The logic used here seemed actually wrong for non-binary numeric formats
and AHEX format: we want these translated into the native encoding so that
we can interpret digits and letters properly without additional work.
The only case where we care about the output encoding is A format.
---
src/data/data-in.c | 59 ++++++++++++++++++++++++++++++++-------------------
1 files changed, 37 insertions(+), 22 deletions(-)
diff --git a/src/data/data-in.c b/src/data/data-in.c
index 673ebea..b8226b3 100644
--- a/src/data/data-in.c
+++ b/src/data/data-in.c
@@ -55,7 +55,6 @@
/* Information about parsing one data field. */
struct data_in
{
- const char *src_enc; /* Encoding of source. */
struct substring input; /* Source. */
enum fmt_type format; /* Input format. */
@@ -66,8 +65,6 @@ struct data_in
int last_column; /* Last column. */
};
-
-
typedef bool data_in_parser_func (struct data_in *);
#define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \
static data_in_parser_func parse_##METHOD;
@@ -102,7 +99,9 @@ data_in (struct substring input, const char *encoding,
struct data_in i;
- char *s = NULL;
+ enum fmt_category cat;
+ const char *dest_encoding;
+ char *s;
bool ok;
assert ((width != 0) == fmt_is_string (format));
@@ -114,7 +113,6 @@ data_in (struct substring input, const char *encoding,
i.first_column = first_column;
i.last_column = last_column;
- i.src_enc = encoding;
if (ss_is_empty (input))
{
@@ -122,24 +120,45 @@ data_in (struct substring input, const char *encoding,
return true;
}
- if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL |
FMT_CAT_LEGACY))
+ cat = fmt_get_category (format);
+ if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL
+ | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))
{
- i.input = input;
+ /* We're going to parse these into numbers. For this purpose we want to
+ deal with them in the local "C" encoding. Any character not in that
+ encoding wouldn't be valid anyhow. */
+ dest_encoding = LEGACY_NATIVE;
+ }
+ else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY))
+ {
+ /* Don't recode these binary formats at all, since they are not text. */
+ dest_encoding = NULL;
}
else
{
- const char *dest_encoding;
-
- if ( dict == NULL)
- {
- assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY |
FMT_CAT_STRING)));
- dest_encoding = LEGACY_NATIVE;
- }
+ assert (cat == FMT_CAT_STRING);
+ if (format == FMT_AHEX)
+ {
+ /* We want the hex digits in the local "C" encoding, even though the
+ result may not be in that encoding. */
+ dest_encoding = LEGACY_NATIVE;
+ }
else
- dest_encoding = dict_get_encoding (dict);
+ {
+ /* Use the final output encoding. */
+ dest_encoding = dict_get_encoding (dict);
+ }
+ }
- s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length
(input));
- i.input = ss_cstr (s);
+ if (dest_encoding != NULL)
+ {
+ i.input = recode_substring_pool (dest_encoding, encoding, input, NULL);
+ s = i.input.string;
+ }
+ else
+ {
+ i.input = input;
+ s = NULL;
}
ok = handlers[i.format] (&i);
@@ -147,6 +166,7 @@ data_in (struct substring input, const char *encoding,
default_result (&i);
free (s);
+
return ok;
}
@@ -710,11 +730,6 @@ parse_AHEX (struct data_in *i)
return false;
}
- if (0 != strcmp (i->src_enc, LEGACY_NATIVE))
- {
- hi = legacy_to_native (i->src_enc, hi);
- lo = legacy_to_native (i->src_enc, lo);
- }
if (!c_isxdigit (hi) || !c_isxdigit (lo))
{
data_warning (i, _("Field must contain only hex digits."));
--
1.7.1
- [pre-lexer 00/21] preparation for work on lexer, Ben Pfaff, 2010/09/24
- [pre-lexer 01/21] str: Make ss_alloc_substring() allocate null-terminated strings., Ben Pfaff, 2010/09/24
- [pre-lexer 13/21] command: Remove superfluous trailing spaces from command names., Ben Pfaff, 2010/09/24
- [pre-lexer 19/21] data-in: Rewrite logic for recoding input, and get rid of src_enc member.,
Ben Pfaff <=
- [pre-lexer 03/21] i18n: New function recode_substring_pool()., Ben Pfaff, 2010/09/24
- [pre-lexer 11/21] lexer: Use lex_is_string() more consistently., Ben Pfaff, 2010/09/24
- [pre-lexer 04/21] syntax-string-source: Fix format string problems., Ben Pfaff, 2010/09/24
- [pre-lexer 08/21] Make translation easier., Ben Pfaff, 2010/09/24
- [pre-lexer 14/21] command: Add specific DATASET unimplemented commands., Ben Pfaff, 2010/09/24
- [pre-lexer 12/21] command: Remove INSERT from list of unimplemented commands., Ben Pfaff, 2010/09/24
- [pre-lexer 20/21] data-in: Make data_in() parameters more uniform., Ben Pfaff, 2010/09/24
- [pre-lexer 02/21] i18n: Use UTF8 macro instead of "UTF8" literal string., Ben Pfaff, 2010/09/24
- [pre-lexer 09/21] lexer: Improve translatability of lex_error()., Ben Pfaff, 2010/09/24
- [pre-lexer 15/21] message: Consistently initialize locator; use 0 for "no line number"., Ben Pfaff, 2010/09/24