diff --git a/libguile/discouraged.c b/libguile/discouraged.c index 9efd92a..203a1c7 100644 --- a/libguile/discouraged.c +++ b/libguile/discouraged.c @@ -264,7 +264,7 @@ SCM_DEFINE (scm_make_keyword_from_dash_symbol, "make-keyword-from-dash-symbol", SCM dash_string, non_dash_symbol; SCM_ASSERT (scm_is_symbol (symbol) - && ('-' == scm_i_symbol_chars(symbol)[0]), + && (scm_i_symbol_ref_eq_char (symbol, 0, '-')), symbol, SCM_ARG1, FUNC_NAME); dash_string = scm_symbol_to_string (symbol); diff --git a/libguile/eval.c b/libguile/eval.c index 48b2299..6ce9978 100644 --- a/libguile/eval.c +++ b/libguile/eval.c @@ -3323,8 +3323,11 @@ call_dsubr_1 (SCM proc, SCM arg1) { return (scm_from_double (SCM_DSUBRF (proc) (scm_i_fraction2double (arg1)))); } - SCM_WTA_DISPATCH_1 (*SCM_SUBR_GENERIC (proc), arg1, - SCM_ARG1, scm_i_symbol_chars (SCM_SNAME (proc))); + { + char *str = scm_to_locale_string (scm_symbol_to_string (SCM_SNAME (proc))); + SCM_WTA_DISPATCH_1 (*SCM_SUBR_GENERIC (proc), arg1, SCM_ARG1, str); + free (str); + } } static SCM diff --git a/libguile/eval.i.c b/libguile/eval.i.c index 573a7b5..915ec6a 100644 --- a/libguile/eval.i.c +++ b/libguile/eval.i.c @@ -1235,9 +1235,11 @@ dispatch: { RETURN (scm_from_double (SCM_DSUBRF (proc) (scm_i_fraction2double (arg1)))); } - SCM_WTA_DISPATCH_1 (*SCM_SUBR_GENERIC (proc), arg1, - SCM_ARG1, - scm_i_symbol_chars (SCM_SNAME (proc))); + { + char *str = scm_to_locale_string (scm_symbol_to_string (SCM_SNAME (proc))); + SCM_WTA_DISPATCH_1 (*SCM_SUBR_GENERIC (proc), arg1, SCM_ARG1, str); + free (str); + } case scm_tc7_cxr: RETURN (scm_i_chase_pairs (arg1, (scm_t_bits) SCM_SUBRF (proc))); case scm_tc7_rpsubr: @@ -1763,8 +1765,11 @@ tail: { RETURN (scm_from_double (SCM_DSUBRF (proc) (scm_i_fraction2double (arg1)))); } - SCM_WTA_DISPATCH_1 (*SCM_SUBR_GENERIC (proc), arg1, - SCM_ARG1, scm_i_symbol_chars (SCM_SNAME (proc))); + { + char *str = scm_to_locale_string (scm_symbol_to_string (SCM_SNAME (proc))); + SCM_WTA_DISPATCH_1 (*SCM_SUBR_GENERIC (proc), arg1, SCM_ARG1, str); + free (str); + } case scm_tc7_cxr: if (SCM_UNLIKELY (SCM_UNBNDP (arg1) || !scm_is_null (args))) scm_wrong_num_args (proc); diff --git a/libguile/filesys.c b/libguile/filesys.c index ec33328..b1999b3 100644 --- a/libguile/filesys.c +++ b/libguile/filesys.c @@ -43,6 +43,7 @@ #include "libguile/vectors.h" #include "libguile/lang.h" #include "libguile/dynwind.h" +#include "libguile/eq.h" #include "libguile/validate.h" #include "libguile/filesys.h" @@ -1561,31 +1562,33 @@ SCM_DEFINE (scm_dirname, "dirname", 1, 0, 0, "component, @code{.} is returned.") #define FUNC_NAME s_scm_dirname { - const char *s; long int i; unsigned long int len; SCM_VALIDATE_STRING (1, filename); - s = scm_i_string_chars (filename); len = scm_i_string_length (filename); i = len - 1; #ifdef __MINGW32__ - while (i >= 0 && (s[i] == '/' || s[i] == '\\')) --i; - while (i >= 0 && (s[i] != '/' && s[i] != '\\')) --i; - while (i >= 0 && (s[i] == '/' || s[i] == '\\')) --i; + while (i >= 0 && (scm_i_string_ref_eq_char (filename, i, '/') + || scm_i_string_ref_eq_char (filename, i, '\\'))) --i; + while (i >= 0 && (scm_i_string_ref_neq_char (filename, i, '/') + && scm_i_string_ref_neq_char (filename, i, '\\'))) --i; + while (i >= 0 && (scm_i_string_ref_eq_char (filename, i, '/') + || scm_i_string_ref_eq_char (filename, i, '\\'))) --i; #else - while (i >= 0 && s[i] == '/') --i; - while (i >= 0 && s[i] != '/') --i; - while (i >= 0 && s[i] == '/') --i; + while (i >= 0 && scm_i_string_ref_eq_char (filename, i, '/')) --i; + while (i >= 0 && scm_i_string_ref_neq_char (filename, i, '/')) --i; + while (i >= 0 && scm_i_string_ref_eq_char (filename, i, '/')) --i; #endif /* ndef __MINGW32__ */ if (i < 0) { #ifdef __MINGW32__ - if (len > 0 && (s[0] == '/' || s[0] == '\\')) + if (len > 0 && (scm_i_string_ref_eq_char (filename, 0, '/') + || scm_i_string_ref_eq_char (filename, 0, '\\'))) #else - if (len > 0 && s[0] == '/') + if (len > 0 && scm_i_string_ref_eq_char (filename, 0, '/')) #endif /* ndef __MINGW32__ */ return scm_c_substring (filename, 0, 1); else @@ -1604,11 +1607,9 @@ SCM_DEFINE (scm_basename, "basename", 1, 1, 0, "@var{basename}, it is removed also.") #define FUNC_NAME s_scm_basename { - const char *f, *s = 0; int i, j, len, end; SCM_VALIDATE_STRING (1, filename); - f = scm_i_string_chars (filename); len = scm_i_string_length (filename); if (SCM_UNBNDP (suffix)) @@ -1616,30 +1617,42 @@ SCM_DEFINE (scm_basename, "basename", 1, 1, 0, else { SCM_VALIDATE_STRING (2, suffix); - s = scm_i_string_chars (suffix); j = scm_i_string_length (suffix) - 1; } i = len - 1; #ifdef __MINGW32__ - while (i >= 0 && (f[i] == '/' || f[i] == '\\')) --i; + while (i >= 0 && (scm_i_string_ref_eq_char (filename, i, '/') + || scm_i_string_ref_eq_char (filename, i, '\\'))) + --i; #else - while (i >= 0 && f[i] == '/') --i; + while (i >= 0 && scm_i_string_ref_eq_char (filename, i, '/')) + --i; #endif /* ndef __MINGW32__ */ end = i; - while (i >= 0 && j >= 0 && f[i] == s[j]) --i, --j; + while (i >= 0 && j >= 0 + && scm_is_true (scm_eqv_p (scm_i_string_ref (filename, i), + scm_i_string_ref (suffix, j)))) + { + --i; + --j; + } if (j == -1) end = i; #ifdef __MINGW32__ - while (i >= 0 && f[i] != '/' && f[i] != '\\') --i; + while (i >= 0 && (scm_i_string_ref_neq_char (filename, i, '/') + || scm_i_string_ref_neq_char (filename, i, '\\'))) + --i; #else - while (i >= 0 && f[i] != '/') --i; + while (i >= 0 && scm_i_string_ref_neq_char (filename, i, '/')) + --i; #endif /* ndef __MINGW32__ */ if (i == end) { #ifdef __MINGW32__ - if (len > 0 && (f[0] == '/' || f[0] == '\\')) + if (len > 0 && (scm_i_string_ref_eq_char (filename, 0, '/') + || scm_i_string_ref_eq_char (filename, 0, '\\')) #else - if (len > 0 && f[0] == '/') + if (len > 0 && scm_i_string_ref_eq_char (filename, 0, '/')) #endif /* ndef __MINGW32__ */ return scm_c_substring (filename, 0, 1); else diff --git a/libguile/gc-mark.c b/libguile/gc-mark.c index 88bea80..75cc12c 100644 --- a/libguile/gc-mark.c +++ b/libguile/gc-mark.c @@ -246,7 +246,6 @@ scm_gc_mark_dependencies (SCM p) scm_t_bits * vtable_data = (scm_t_bits *) word0; SCM layout = SCM_PACK (vtable_data [scm_vtable_index_layout]); long len = scm_i_symbol_length (layout); - const char *fields_desc = scm_i_symbol_chars (layout); scm_t_bits *struct_data = (scm_t_bits *) SCM_STRUCT_DATA (ptr); if (vtable_data[scm_struct_i_flags] & SCM_STRUCTF_ENTITY) @@ -259,11 +258,11 @@ scm_gc_mark_dependencies (SCM p) long x; for (x = 0; x < len - 2; x += 2, ++struct_data) - if (fields_desc[x] == 'p') + if (scm_i_symbol_ref_eq_char (layout, x, 'p')) scm_gc_mark (SCM_PACK (*struct_data)); - if (fields_desc[x] == 'p') + if (scm_i_symbol_ref_eq_char (layout, x, 'p')) { - if (SCM_LAYOUT_TAILP (fields_desc[x + 1])) + if (SCM_LAYOUT_TAILP (scm_i_symbol_ref_to_char (layout, x+1, SCM_SUB))) for (x = *struct_data++; x; --x, ++struct_data) scm_gc_mark (SCM_PACK (*struct_data)); else diff --git a/libguile/goops.c b/libguile/goops.c index b623212..fa81c6b 100644 --- a/libguile/goops.c +++ b/libguile/goops.c @@ -279,10 +279,17 @@ SCM_DEFINE (scm_class_of, "class-of", 1, 0, 0, else { SCM name = SCM_STRUCT_TABLE_NAME (SCM_CDR (handle)); - SCM class = scm_make_extended_class (scm_is_true (name) - ? scm_i_symbol_chars (name) - : 0, + SCM class; + char *str = (char *)0; + if (scm_is_true (name)) + { + str = scm_to_locale_string (name); + class = scm_make_extended_class (str, SCM_I_OPERATORP (x)); + free (str); + } + else + class = scm_make_extended_class (0, SCM_I_OPERATORP (x)); SCM_SET_STRUCT_TABLE_CLASS (SCM_CDR (handle), class); return class; } @@ -1527,11 +1534,11 @@ wrap_init (SCM class, SCM *m, long n) { long i; scm_t_bits slayout = SCM_STRUCT_DATA (class)[scm_vtable_index_layout]; - const char *layout = scm_i_symbol_chars (SCM_PACK (slayout)); + SCM layout = SCM_PACK (slayout); /* Set all SCM-holding slots to unbound */ for (i = 0; i < n; i++) - if (layout[i*2] == 'p') + if (scm_i_symbol_ref_eq_char (layout, i*2, 'p')) m[i] = SCM_GOOPS_UNBOUND; else m[i] = 0; @@ -2785,10 +2792,14 @@ make_struct_class (void *closure SCM_UNUSED, SCM vtable, SCM data, SCM prev SCM_UNUSED) { if (scm_is_true (SCM_STRUCT_TABLE_NAME (data))) - SCM_SET_STRUCT_TABLE_CLASS (data, - scm_make_extended_class - (scm_i_symbol_chars (SCM_STRUCT_TABLE_NAME (data)), - SCM_CLASS_FLAGS (vtable) & SCM_CLASSF_OPERATOR)); + { + char *str = scm_to_locale_string (scm_symbol_to_string (SCM_STRUCT_TABLE_NAME (data))); + SCM_SET_STRUCT_TABLE_CLASS (data, + scm_make_extended_class + (str, + SCM_CLASS_FLAGS (vtable) & SCM_CLASSF_OPERATOR)); + free (str); + } return SCM_UNSPECIFIED; } diff --git a/libguile/hash.c b/libguile/hash.c index 7a49de6..7c7bcb2 100644 --- a/libguile/hash.c +++ b/libguile/hash.c @@ -49,6 +49,18 @@ scm_string_hash (const unsigned char *str, size_t len) return h; } +unsigned long +scm_i_string_hash (SCM str) +{ + size_t len = scm_i_string_length (str); + size_t i = 0; + + unsigned long h = 0; + while (len-- > 0) + h = scm_i_string_ref_to_ulong (str, i++) + h * 37; + return h; +} + /* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */ /* Dirk:FIXME:: scm_hasher could be made static. */ @@ -114,8 +126,7 @@ scm_hasher(SCM obj, unsigned long n, size_t d) case scm_tc7_string: { unsigned long hash = - scm_string_hash ((const unsigned char *) scm_i_string_chars (obj), - scm_i_string_length (obj)) % n; + scm_i_string_hash (obj) % n; scm_remember_upto_here_1 (obj); return hash; } diff --git a/libguile/hash.h b/libguile/hash.h index bbf9b25..879f510 100644 --- a/libguile/hash.h +++ b/libguile/hash.h @@ -27,6 +27,7 @@ SCM_API unsigned long scm_string_hash (const unsigned char *str, size_t len); +SCM_INTERNAL unsigned long scm_i_string_hash (SCM str); SCM_API unsigned long scm_hasher (SCM obj, unsigned long n, size_t d); SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n); SCM_API SCM scm_hashq (SCM obj, SCM n); diff --git a/libguile/numbers.c b/libguile/numbers.c index 52dfb73..28d1089 100644 --- a/libguile/numbers.c +++ b/libguile/numbers.c @@ -2434,10 +2434,16 @@ scm_i_print_complex (double real, double imag, SCM port) int scm_i_print_fraction (SCM sexp, SCM port, scm_print_state *pstate SCM_UNUSED) { + char *buf; + size_t len; SCM str; + str = scm_number_to_string (sexp, SCM_UNDEFINED); - scm_lfwrite (scm_i_string_chars (str), scm_i_string_length (str), port); + buf = scm_to_locale_stringn (str, &len); scm_remember_upto_here_1 (str); + + scm_lfwrite (buf, len, port); + free (buf); return !0; } @@ -3059,6 +3065,8 @@ SCM_DEFINE (scm_string_to_number, "string->number", 1, 1, 0, { SCM answer; unsigned int base; + char *buf; + size_t len; SCM_VALIDATE_STRING (1, string); if (SCM_UNBNDP (radix)) @@ -3066,9 +3074,10 @@ SCM_DEFINE (scm_string_to_number, "string->number", 1, 1, 0, else base = scm_to_unsigned_integer (radix, 2, INT_MAX); - answer = scm_c_locale_stringn_to_number (scm_i_string_chars (string), - scm_i_string_length (string), - base); + buf = scm_to_locale_stringn (string, &len); + answer = scm_c_locale_stringn_to_number (buf, len, base); + free (buf); + scm_remember_upto_here_1 (string); return answer; } diff --git a/libguile/ports.c b/libguile/ports.c index 1f49708..650b489 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -632,21 +632,22 @@ SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0, */ static long -scm_i_mode_bits_n (const char *modes, size_t n) +scm_i_mode_bits_n (SCM modes) { return (SCM_OPN - | (memchr (modes, 'r', n) || memchr (modes, '+', n) ? SCM_RDNG : 0) - | ( memchr (modes, 'w', n) - || memchr (modes, 'a', n) - || memchr (modes, '+', n) ? SCM_WRTNG : 0) - | (memchr (modes, '0', n) ? SCM_BUF0 : 0) - | (memchr (modes, 'l', n) ? SCM_BUFLINE : 0)); + | (scm_i_string_contains_char (modes, 'r') + || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0) + | ( scm_i_string_contains_char (modes, 'w') + || scm_i_string_contains_char (modes, 'a') + || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0) + | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0) + | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0)); } long scm_mode_bits (char *modes) { - return scm_i_mode_bits_n (modes, strlen (modes)); + return scm_i_mode_bits (scm_from_locale_string (modes)); } long @@ -657,9 +658,7 @@ scm_i_mode_bits (SCM modes) if (!scm_is_string (modes)) scm_wrong_type_arg_msg (NULL, 0, modes, "string"); - bits = scm_i_mode_bits_n (scm_i_string_chars (modes), - scm_i_string_length (modes)); - scm_remember_upto_here_1 (modes); + bits = scm_i_mode_bits_n (modes); return bits; } @@ -1343,13 +1342,18 @@ SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0, "@var{port} is not supplied, the current-input-port is used.") #define FUNC_NAME s_scm_unread_string { + char *buf; + size_t len; + SCM_VALIDATE_STRING (1, str); if (SCM_UNBNDP (port)) port = scm_current_input_port (); else SCM_VALIDATE_OPINPORT (2, port); - scm_ungets (scm_i_string_chars (str), scm_i_string_length (str), port); + buf = scm_to_locale_stringn (str, &len); + scm_ungets (buf, len, port); + free (buf); return str; } diff --git a/libguile/posix.c b/libguile/posix.c index 78fd295..35a610a 100644 --- a/libguile/posix.c +++ b/libguile/posix.c @@ -1689,30 +1689,28 @@ SCM_DEFINE (scm_mknod, "mknod", 4, 0, 0, #define FUNC_NAME s_scm_mknod { int val; - const char *p; int ctype = 0; SCM_VALIDATE_STRING (1, path); SCM_VALIDATE_SYMBOL (2, type); - p = scm_i_symbol_chars (type); - if (strcmp (p, "regular") == 0) + if (scm_i_symbol_strcmp (type, "regular") == 0) ctype = S_IFREG; - else if (strcmp (p, "directory") == 0) + else if (scm_i_symbol_strcmp (type, "directory") == 0) ctype = S_IFDIR; #ifdef S_IFLNK /* systems without symlinks probably don't have S_IFLNK defined */ - else if (strcmp (p, "symlink") == 0) + else if (scm_i_symbol_strcmp (type, "symlink") == 0) ctype = S_IFLNK; #endif - else if (strcmp (p, "block-special") == 0) + else if (scm_i_symbol_strcmp (type, "block-special") == 0) ctype = S_IFBLK; - else if (strcmp (p, "char-special") == 0) + else if (scm_i_symbol_strcmp (type, "char-special") == 0) ctype = S_IFCHR; - else if (strcmp (p, "fifo") == 0) + else if (scm_i_symbol_strcmp (type, "fifo") == 0) ctype = S_IFIFO; #ifdef S_IFSOCK - else if (strcmp (p, "socket") == 0) + else if (scm_i_symbol_strcmp (type, "socket") == 0) ctype = S_IFSOCK; #endif else diff --git a/libguile/print.c b/libguile/print.c index fa4cb1e..ed74fb1 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -288,16 +288,38 @@ print_circref (SCM port, scm_print_state *pstate, SCM ref) scm_putc ('#', port); } +static void +lfwrite_substring (SCM str, size_t start, size_t end, SCM port) +{ + SCM substr; + char *subbuf; + size_t sublen; + substr = scm_substring (str, + scm_from_size_t (start), + scm_from_size_t (end)); + subbuf = scm_to_locale_stringn (substr, &sublen); + scm_lfwrite (subbuf, sublen, port); + free (subbuf); +} + +static void +lfwrite_subsymbol (SCM sym, size_t start, size_t end, SCM port) +{ + lfwrite_substring (scm_symbol_to_string (sym), start, end, port); +} + /* Print the name of a symbol. */ static int -quote_keywordish_symbol (const char *str, size_t len) +quote_keywordish_symbol (SCM str) { SCM option; + size_t len = scm_i_symbol_length (str); /* LEN is guaranteed to be > 0. */ - if (str[0] != ':' && str[len-1] != ':') + if (scm_i_symbol_ref_neq_char (str, 0, ':') + && scm_i_symbol_ref_neq_char (str, len - 1, ':')); return 0; option = SCM_PRINT_KEYWORD_STYLE; @@ -311,6 +333,12 @@ quote_keywordish_symbol (const char *str, size_t len) void scm_print_symbol_name (const char *str, size_t len, SCM port) { + scm_i_print_symbol_name (scm_from_locale_stringn (str, len), port); +} + +void +scm_i_print_symbol_name (SCM sym, SCM port) +{ /* This points to the first character that has not yet been written to the * port. */ size_t pos = 0; @@ -330,18 +358,23 @@ scm_print_symbol_name (const char *str, size_t len, SCM port) * simpler and faster. */ int maybe_weird = 0; size_t mw_pos = 0; - - if (len == 0 || str[0] == '\'' || str[0] == '`' || str[0] == ',' - || quote_keywordish_symbol (str, len) - || (str[0] == '.' && len == 1) - || scm_is_true (scm_c_locale_stringn_to_number (str, len, 10))) + size_t len = scm_i_symbol_length (sym); + + if (len == 0 + || scm_i_symbol_ref_eq_char (sym, 0, '\'') + || scm_i_symbol_ref_eq_char (sym, 0, '`') + || scm_i_symbol_ref_eq_char (sym, 0, ',') + || quote_keywordish_symbol (sym) + || (scm_i_symbol_ref_eq_char (sym, 0, '.') && len == 1) + || scm_is_true (scm_string_to_number (scm_symbol_to_string (sym), + scm_from_int (10)))) { scm_lfwrite ("#{", 2, port); weird = 1; } for (end = pos; end < len; ++end) - switch (str[end]) + switch (scm_i_symbol_ref_to_char (sym, end, SCM_SUB)) { #ifdef BRACKETS_AS_PARENS case '[': @@ -366,11 +399,12 @@ scm_print_symbol_name (const char *str, size_t len, SCM port) weird = 1; } if (pos < end) - scm_lfwrite (str + pos, end - pos, port); + lfwrite_subsymbol (sym, pos, end, port); { char buf[2]; + buf[0] = '\\'; - buf[1] = str[end]; + buf[1] = scm_i_symbol_ref_to_char (sym, end, '?'); scm_lfwrite (buf, 2, port); } pos = end + 1; @@ -388,7 +422,7 @@ scm_print_symbol_name (const char *str, size_t len, SCM port) break; } if (pos < end) - scm_lfwrite (str + pos, end - pos, port); + lfwrite_subsymbol (sym, pos, end, port); if (weird) scm_lfwrite ("}#", 2, port); } @@ -548,60 +582,30 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_string: if (SCM_WRITINGP (pstate)) { - size_t i, j, len; - const char *data; - - scm_putc ('"', port); - len = scm_i_string_length (exp); - data = scm_i_string_chars (exp); - for (i = 0, j = 0; i < len; ++i) - { - unsigned char ch = data[i]; - if ((ch < 32 && ch != '\n') || (127 <= ch && ch < 148)) - { - static char const hex[]="0123456789abcdef"; - char buf[4]; - - scm_lfwrite (data+j, i-j, port); - buf[0] = '\\'; - buf[1] = 'x'; - buf[2] = hex [ch / 16]; - buf[3] = hex [ch % 16]; - scm_lfwrite (buf, 4, port); - data = scm_i_string_chars (exp); - j = i+1; - } - else if (ch == '"' || ch == '\\') - { - scm_lfwrite (data+j, i-j, port); - scm_putc ('\\', port); - data = scm_i_string_chars (exp); - j = i; - } - } - scm_lfwrite (data+j, i-j, port); - scm_putc ('"', port); + char *str = scm_i_string_to_write_sz (exp); + scm_lfwrite (str, strlen(str), port); + free (str); scm_remember_upto_here_1 (exp); } else - scm_lfwrite (scm_i_string_chars (exp), scm_i_string_length (exp), - port); - scm_remember_upto_here_1 (exp); + { + char *str; + size_t len; + str = scm_to_locale_stringn (exp, &len); + scm_lfwrite (str, len, port); + free (str); + scm_remember_upto_here_1 (exp); + } break; case scm_tc7_symbol: if (scm_i_symbol_is_interned (exp)) { - scm_print_symbol_name (scm_i_symbol_chars (exp), - scm_i_symbol_length (exp), - port); - scm_remember_upto_here_1 (exp); + scm_i_print_symbol_name (exp, port); } else { scm_puts ("#', port); @@ -654,7 +658,13 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) ? "#', port); break; @@ -963,9 +973,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1, SCM port, answer = SCM_UNSPECIFIED; int fReturnString = 0; int writingp; - const char *start; - const char *end; - const char *p; + size_t start, p, len; if (scm_is_eq (destination, SCM_BOOL_T)) { @@ -988,57 +996,64 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1, SCM_VALIDATE_STRING (2, message); SCM_VALIDATE_REST_ARGUMENT (args); - start = scm_i_string_chars (message); - end = start + scm_i_string_length (message); - for (p = start; p != end; ++p) - if (*p == '~') - { - if (++p == end) - break; - - switch (*p) - { - case 'A': case 'a': - writingp = 0; - break; - case 'S': case 's': - writingp = 1; + p = 0; + start = 0; + len = scm_i_string_length (message); + while (p < len) + { + if (scm_i_string_ref_eq_char (message, p++, '~')) + { + if (p >= len) break; - case '~': - scm_lfwrite (start, p - start, port); - start = p + 1; - continue; - case '%': - scm_lfwrite (start, p - start - 1, port); - scm_newline (port); - start = p + 1; - continue; - default: - SCM_MISC_ERROR ("FORMAT: Unsupported format option ~~~A - use (ice-9 format) instead", - scm_list_1 (SCM_MAKE_CHAR (*p))); - - } - - - if (!scm_is_pair (args)) - SCM_MISC_ERROR ("FORMAT: Missing argument for ~~~A", - scm_list_1 (SCM_MAKE_CHAR (*p))); - - scm_lfwrite (start, p - start - 1, port); - /* we pass destination here */ - scm_prin1 (SCM_CAR (args), destination, writingp); - args = SCM_CDR (args); - start = p + 1; - } - - scm_lfwrite (start, p - start, port); + + switch (scm_i_string_ref_to_char (message, p++, '?')) + { + case 'A': case 'a': + writingp = 0; + break; + case 'S': case 's': + writingp = 1; + break; + case '~': + /* Two tildes in sequence. Write everything up through + the first tilde. */ + lfwrite_substring (message, start, p-1, port); + start = p; + continue; + case '%': + /* Write everything except for "~%", and then a newline. */ + lfwrite_substring (message, start, p-2, port); + scm_newline (port); + start = p; + continue; + default: + SCM_MISC_ERROR ("FORMAT: Unsupported format option ~~~A - use (ice-9 format) instead", + scm_list_1 (scm_i_string_ref (message, p-1))); + } + + + if (!scm_is_pair (args)) + SCM_MISC_ERROR ("FORMAT: Missing argument for ~~~A", + scm_list_1 (scm_i_string_ref (message, p-1))); + /* Write everything except for the "~a" or "~s", and then + the argument. */ + lfwrite_substring (message, start, p-2, port); + /* we pass destination here */ + scm_prin1 (SCM_CAR (args), destination, writingp); + args = SCM_CDR (args); + start = p; + } + } + /* Write the remainder. */ + lfwrite_substring (message, start, p, port); + if (!scm_is_eq (args, SCM_EOL)) SCM_MISC_ERROR ("FORMAT: ~A superfluous arguments", scm_list_1 (scm_length (args))); - + if (fReturnString) answer = scm_strport_to_string (destination); - + return scm_return_first (answer, message); } #undef FUNC_NAME diff --git a/libguile/print.h b/libguile/print.h index 8974a75..470f82d 100644 --- a/libguile/print.h +++ b/libguile/print.h @@ -81,6 +81,7 @@ SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port); SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port); SCM_API void scm_iprlist (char *hdr, SCM exp, int tlr, SCM port, scm_print_state *pstate); SCM_API void scm_print_symbol_name (const char *str, size_t len, SCM port); +SCM_INTERNAL void scm_i_print_symbol_name (SCM sym, SCM port); SCM_API void scm_prin1 (SCM exp, SCM port, int writingp); SCM_API void scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate); SCM_API SCM scm_write (SCM obj, SCM port); diff --git a/libguile/random.c b/libguile/random.c index 8d2ff03..0432731 100644 --- a/libguile/random.c +++ b/libguile/random.c @@ -381,12 +381,16 @@ SCM_DEFINE (scm_seed_to_random_state, "seed->random-state", 1, 0, 0, #define FUNC_NAME s_scm_seed_to_random_state { SCM res; + char *str; + size_t len; if (SCM_NUMBERP (seed)) seed = scm_number_to_string (seed, SCM_UNDEFINED); SCM_VALIDATE_STRING (1, seed); - res = make_rstate (scm_c_make_rstate (scm_i_string_chars (seed), - scm_i_string_length (seed))); + str = scm_to_locale_stringn (seed, &len); scm_remember_upto_here_1 (seed); + + res = make_rstate (scm_c_make_rstate (str, len)); + free(str); return res; } diff --git a/libguile/rdelim.c b/libguile/rdelim.c index c9cc016..aba18bf 100644 --- a/libguile/rdelim.c +++ b/libguile/rdelim.c @@ -59,11 +59,9 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0, size_t cstart; size_t cend; int c; - const char *cdelims; size_t num_delims; SCM_VALIDATE_STRING (1, delims); - cdelims = scm_i_string_chars (delims); num_delims = scm_i_string_length (delims); SCM_VALIDATE_STRING (2, str); @@ -82,7 +80,7 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0, c = scm_getc (port); for (k = 0; k < num_delims; k++) { - if (cdelims[k] == c) + if (scm_i_string_ref_eq_int (delims, k, c)) { if (scm_is_false (gobble)) scm_ungetc (c, port); diff --git a/libguile/rw.c b/libguile/rw.c index 3e81474..887f197 100644 --- a/libguile/rw.c +++ b/libguile/rw.c @@ -206,6 +206,7 @@ SCM_DEFINE (scm_write_string_partial, "write-string/partial", 1, 3, 0, #define FUNC_NAME s_scm_write_string_partial { const char *src; + size_t srclen; long write_len; int fdes; @@ -214,7 +215,7 @@ SCM_DEFINE (scm_write_string_partial, "write-string/partial", 1, 3, 0, size_t last; SCM_VALIDATE_STRING (1, str); - src = scm_i_string_chars (str); + src = scm_to_locale_stringn (str, &srclen); scm_i_get_substring_spec (scm_i_string_length (str), start, &offset, end, &last); src += offset; diff --git a/libguile/socket.c b/libguile/socket.c index f34b6d4..de8690d 100644 --- a/libguile/socket.c +++ b/libguile/socket.c @@ -32,6 +32,7 @@ #include "libguile/strings.h" #include "libguile/vectors.h" #include "libguile/dynwind.h" +#include "libguile/srfi-13.h" #include "libguile/validate.h" #include "libguile/socket.h" @@ -1427,6 +1428,7 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0, int flg; char *dest; size_t len; + SCM msg; SCM_VALIDATE_OPFPORT (1, sock); SCM_VALIDATE_STRING (2, buf); @@ -1437,9 +1439,11 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0, fd = SCM_FPORT_FDES (sock); len = scm_i_string_length (buf); - dest = scm_i_string_writable_chars (buf); + dest = scm_malloc (len); SCM_SYSCALL (rv = recv (fd, dest, len, flg)); - scm_i_string_stop_writing (); + msg = scm_take_locale_stringn (dest, len); + scm_string_copy_x (buf, scm_from_int (0), + msg, scm_from_int (0), scm_from_size_t (len)); if (rv == -1) SCM_SYSERROR; @@ -1468,7 +1472,7 @@ SCM_DEFINE (scm_send, "send", 2, 1, 0, int rv; int fd; int flg; - const char *src; + char *src; size_t len; sock = SCM_COERCE_OUTPORT (sock); @@ -1480,10 +1484,9 @@ SCM_DEFINE (scm_send, "send", 2, 1, 0, flg = scm_to_int (flags); fd = SCM_FPORT_FDES (sock); - len = scm_i_string_length (message); - src = scm_i_string_writable_chars (message); + src = scm_to_locale_stringn (message, &len); SCM_SYSCALL (rv = send (fd, src, len, flg)); - scm_i_string_stop_writing (); + free (src); if (rv == -1) SCM_SYSERROR; @@ -1549,12 +1552,16 @@ SCM_DEFINE (scm_recvfrom, "recvfrom!", 2, 3, 0, /* recvfrom will not necessarily return an address. usually nothing is returned for stream sockets. */ - buf = scm_i_string_writable_chars (str); + buf = scm_malloc (cend - offset); ((struct sockaddr *) &addr)->sa_family = AF_UNSPEC; - SCM_SYSCALL (rv = recvfrom (fd, buf + offset, + SCM_SYSCALL (rv = recvfrom (fd, buf, cend - offset, flg, (struct sockaddr *) &addr, &addr_size)); - scm_i_string_stop_writing (); + { + SCM msg = scm_take_locale_stringn (buf, cend - offset); + scm_string_copy_x (str, scm_from_size_t (offset), + msg, scm_from_size_t (0), scm_from_size_t (cend - offset)); + } if (rv == -1) SCM_SYSERROR; @@ -1596,6 +1603,8 @@ SCM_DEFINE (scm_sendto, "sendto", 3, 1, 1, int flg; struct sockaddr *soka; size_t size; + char *localestr; + size_t len; sock = SCM_COERCE_OUTPORT (sock); SCM_VALIDATE_FPORT (1, sock); @@ -1622,10 +1631,14 @@ SCM_DEFINE (scm_sendto, "sendto", 3, 1, 1, SCM_VALIDATE_CONS (5, args_and_flags); flg = SCM_NUM2ULONG (5, SCM_CAR (args_and_flags)); } + + localestr = scm_to_locale_stringn (message, &len); SCM_SYSCALL (rv = sendto (fd, - scm_i_string_chars (message), - scm_i_string_length (message), + localestr, + len, flg, soka, size)); + free (localestr); + if (rv == -1) { int save_errno = errno; diff --git a/libguile/srfi-14.c b/libguile/srfi-14.c index 908e0c8..c85495c 100644 --- a/libguile/srfi-14.c +++ b/libguile/srfi-14.c @@ -533,7 +533,6 @@ SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0, { SCM cs; long * p; - const char * s; size_t k = 0, len; SCM_VALIDATE_STRING (1, str); @@ -545,12 +544,15 @@ SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0, cs = scm_char_set_copy (base_cs); } p = (long *) SCM_SMOB_DATA (cs); - s = scm_i_string_chars (str); len = scm_i_string_length (str); while (k < len) { - int c = s[k++]; - p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); + /* Look up the character and return SUB if the character is not + 8-bit. This kludge is only until character sets don't assume + 8-bit characters. */ + int c = scm_i_string_ref_to_char (str, k++, SCM_SUB); + if (c != (int) SCM_SUB) + p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); } scm_remember_upto_here_1 (str); return cs; @@ -566,18 +568,20 @@ SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0, #define FUNC_NAME s_scm_string_to_char_set_x { long * p; - const char * s; size_t k = 0, len; SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_SMOB (2, base_cs, charset); p = (long *) SCM_SMOB_DATA (base_cs); - s = scm_i_string_chars (str); len = scm_i_string_length (str); while (k < len) { - int c = s[k++]; - p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); + /* Look up the character and return SUB if the character is not + 8-bit. This kludge is only until character sets don't assume + 8-bit characters. */ + int c = scm_i_string_ref_to_char (str, k++, SCM_SUB); + if (c != (int) SCM_SUB) + p[c / SCM_BITS_PER_LONG] |= 1L << (c % SCM_BITS_PER_LONG); } scm_remember_upto_here_1 (str); return base_cs; diff --git a/libguile/stime.c b/libguile/stime.c index 34c8a98..343bd63 100644 --- a/libguile/stime.c +++ b/libguile/stime.c @@ -56,6 +56,8 @@ #include "libguile/validate.h" #include "libguile/stime.h" +#include "unistr.h" + #ifdef HAVE_UNISTD_H #include #endif @@ -636,18 +638,20 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, { struct tm t; - char *tbuf; + scm_t_uint8 *tbuf; int size = 50; - const char *fmt; - char *myfmt; + scm_t_uint8 *fmt; + scm_t_uint8 *myfmt; int len; SCM result; SCM_VALIDATE_STRING (1, format); bdtime2c (stime, &t, SCM_ARG2, FUNC_NAME); - fmt = scm_i_string_chars (format); - len = scm_i_string_length (format); + /* Convert string to UTF-8 so that non-ASCII characters in the + format are passed through unchanged. */ + fmt = scm_i_string_to_u8sz (format); + len = strlen ((const char *)fmt); /* Ugly hack: strftime can return 0 if its buffer is too small, but some valid time strings (e.g. "%p") can sometimes produce @@ -655,10 +659,12 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, character to the format string, so that valid returns are always nonzero. */ myfmt = scm_malloc (len+2); - *myfmt = 'x'; - strncpy(myfmt+1, fmt, len); + *myfmt = (scm_t_uint8) 'x'; + strncpy((char *)myfmt+1, (const char *)fmt, len); myfmt[len+1] = 0; + free (fmt); + tbuf = scm_malloc (size); { #if !defined (HAVE_TM_ZONE) @@ -692,7 +698,8 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, /* Use `nstrftime ()' from Gnulib, which supports all GNU extensions supported by glibc. */ - while ((len = nstrftime (tbuf, size, myfmt, &t, 0, 0)) == 0) + while ((len = nstrftime ((char *)tbuf, size, + (const char *)myfmt, &t, 0, 0)) == 0) { free (tbuf); size *= 2; @@ -707,10 +714,11 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0, } #endif } + free (myfmt); - result = scm_from_locale_stringn (tbuf + 1, len - 1); + result = scm_i_string_from_u8sz ((const scm_t_uint8 *)(tbuf + 1)); free (tbuf); - free (myfmt); + #if HAVE_STRUCT_TM_TM_ZONE free ((char *) t.tm_zone); #endif @@ -734,14 +742,17 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0, #define FUNC_NAME s_scm_strptime { struct tm t; - const char *fmt, *str, *rest; + scm_t_uint8 *fmt, *str, *rest; + size_t used_len; long zoff; SCM_VALIDATE_STRING (1, format); SCM_VALIDATE_STRING (2, string); - fmt = scm_i_string_chars (format); - str = scm_i_string_chars (string); + /* Convert strings to UTF-8 so that non-ASCII characters are passed + through unchanged. */ + fmt = scm_i_string_to_u8sz (format); + str = scm_i_string_to_u8sz (string); /* initialize the struct tm */ #define tm_init(field) t.field = 0 @@ -763,7 +774,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0, fields, hence the use of SCM_CRITICAL_SECTION_START. */ t.tm_isdst = -1; SCM_CRITICAL_SECTION_START; - rest = strptime (str, fmt, &t); + rest = (scm_t_uint8 *)strptime ((const char *)str, (const char *)fmt, &t); SCM_CRITICAL_SECTION_END; if (rest == NULL) { @@ -782,8 +793,13 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0, zoff = 0; #endif + /* Compute the number of UTF-8 characters. */ + used_len = u8_strnlen (str, rest-str); + free (str); + free (fmt); + return scm_cons (filltime (&t, zoff, NULL), - scm_from_signed_integer (rest - str)); + scm_from_signed_integer (used_len)); } #undef FUNC_NAME #endif /* HAVE_STRPTIME */ diff --git a/libguile/strings.c b/libguile/strings.c index c138026..11b642b 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -32,6 +32,8 @@ #include "libguile/validate.h" #include "libguile/dynwind.h" +#include "uniconv.h" + /* {Strings} @@ -733,6 +735,264 @@ scm_c_string_set_x (SCM str, size_t p, SCM chr) } } + + +/* General-purpose string operators. */ + + +/* Return the Xth character of STR as a scheme character */ +SCM +scm_i_string_ref (SCM str, size_t x) +{ + SCM_ASSERT_TYPE (scm_is_string (str), str, SCM_ARG1, "string_ref", "string"); + return (SCM_MAKE_CHAR (scm_i_string_chars (str)[x])); +} + +SCM +scm_i_symbol_ref (SCM str, size_t x) +{ + /* Keep these assertions in until everything is right. */ + SCM_ASSERT_TYPE (scm_is_symbol (str), str, SCM_ARG1, "symbol_ref", "symbol"); + return (SCM_MAKE_CHAR (scm_i_symbol_chars (str)[x])); +} + +/* Return the Xth character in STR as a C char. If it cannot be + represented as a C char, return REPLACE. */ +char +scm_i_string_ref_to_char (SCM str, size_t x, char replace) +{ + SCM_ASSERT_TYPE (scm_is_string (str), str, SCM_ARG1, "string_ref", "string"); + if (x >= scm_i_string_length (str)) + scm_out_of_range (NULL, scm_from_size_t (x)); + return (scm_i_string_chars (str)[x]); +} + +char +scm_i_symbol_ref_to_char (SCM str, size_t x, char replace) +{ + SCM_ASSERT_TYPE (scm_is_symbol (str), str, SCM_ARG1, "symbol_ref", "symbol"); + if (x >= scm_i_string_length (str)) + scm_out_of_range (NULL, scm_from_size_t (x)); + return (scm_i_symbol_chars (str)[x]); +} + +/* Return the Xth character in STR as an unsigned long. */ +unsigned long +scm_i_string_ref_to_ulong (SCM str, size_t x) +{ + if (x >= scm_i_string_length (str)) + scm_out_of_range (NULL, scm_from_size_t (x)); + return ((unsigned long) scm_i_string_chars (str)[x]); + +} + +/* Return TRUE if the Xth character is C. */ +int +scm_i_string_ref_eq_char (SCM str, size_t x, char c) +{ + if (x >= scm_i_string_length (str)) + scm_out_of_range (NULL, scm_from_size_t (x)); + return (scm_i_string_chars (str)[x] == c); +} + +/* Return TRUE if the Xth character is C. */ +int +scm_i_symbol_ref_eq_char (SCM str, size_t x, char c) +{ + if (x >= scm_i_symbol_length (str)) + scm_out_of_range (NULL, scm_from_size_t (x)); + return (scm_i_symbol_chars (str)[x] == c); +} + +/* Return TRUE if the Xth character is not C. */ +int +scm_i_string_ref_neq_char (SCM str, size_t x, char c) +{ + if (x >= scm_i_string_length (str)) + scm_out_of_range (NULL, scm_from_size_t (x)); + return (scm_i_string_chars (str)[x] != c); +} + +/* Return TRUE if the Xth character is not C. */ +int +scm_i_symbol_ref_neq_char (SCM str, size_t x, char c) +{ + if (x >= scm_i_symbol_length (str)) + scm_out_of_range (NULL, scm_from_size_t (x)); + return (scm_i_symbol_chars (str)[x] != c); +} + +/* Return TRUE if the Xth character is C. */ +int +scm_i_string_ref_eq_int (SCM str, size_t x, int c) +{ + if (x >= scm_i_string_length (str)) + scm_out_of_range (NULL, scm_from_size_t (x)); + return ((int)(scm_i_string_chars (str)[x]) == c); +} + +size_t +scm_i_string_contains_char (SCM str, char ch) +{ + size_t i; + size_t len = scm_i_string_length (str); + + i = 0; + while (i < len) + { + if (scm_i_string_chars (str)[i] == ch) + return 1; + i++; + } + return 0; +} + +int +scm_i_symbol_strcmp (SCM sym, const char *str) +{ + return strcmp (scm_to_locale_string (scm_symbol_to_string (sym)), str); +} + +/* Special-purpose string operators for specific cases. */ + +char * +scm_i_string_to_write_sz (SCM str) +{ + size_t ilen, olen; + const char *idata; + char *odata; + unsigned char ch; + int i,j; + + /* Count the number of characters needed for output. */ + ilen = scm_i_string_length (str); + idata = scm_i_string_chars (str); + olen = 3; /* 2 quotation marks. 1 for '\0' */ + for (i=0; i < ilen; i++) + { + ch = (unsigned char) idata[i]; + if((ch < 32 && ch != '\n') || (127 <= ch && ch < 148)) + olen += 4; + else if (ch == '"' || ch == '\\') + olen += 2; + else + olen ++; + } + odata = scm_malloc (olen); + j = 0; + odata[j++] = '"'; + for (i=0; i < ilen; i++) + { + ch = (unsigned char) idata[i]; + if ((ch < 32 && ch != '\n') || (127 <= ch && ch < 148)) + { + static char const hex[]="0123456789abcdef"; + odata[j++] = '\\'; + odata[j++] = 'x'; + odata[j++] = hex [ch / 16]; + odata[j++] = hex [ch % 16]; + } + else if (ch == '"' || ch == '\\') + { + odata[j++] = '\\'; + odata[j++] = ch; + } + else + odata[j++] = ch; + } + odata[j++] = '"'; + odata[j] = '\0'; + scm_remember_upto_here_1 (str); + + return odata; +} + +/* Create a null-terminated utf-8 C string. */ +scm_t_uint8 * +scm_i_string_to_u8sz (SCM str) +{ + scm_t_uint8 *u8sz; + + /* This is temporary code. Until wide characters are properly + added, pretend that the string is encoded as Latin1. */ + u8sz = u8_strconv_from_encoding (scm_i_string_chars (str), + "ISO-8859-1", + iconveh_question_mark); + if (u8sz == NULL) + scm_memory_error ("unpacking strings"); + return (u8sz); +} + +/* Create a string from a null-terminated utf-8 C string. */ +SCM +scm_i_string_from_u8sz (const scm_t_uint8 *u8sz) +{ + char *buf, *dst; + SCM str; + + /* This is temporary code. Until wide characters are properly + added, pretend that the string is encoded as Latin1. */ + buf = u8_strconv_to_encoding (u8sz, "ISO-8859-1", + iconveh_question_mark); + if (buf == NULL) + scm_memory_error ("packing strings"); + + str = scm_i_make_string (strlen (buf), &dst); + strcpy (dst, buf); + free (buf); + + return str; +} + + +/* Return a zero-terminated ASCII string representation of STR in a + statically allocated buffer. Characters that can't be represented + in ASCII will be represented with '?'. */ +#define SCM_FAILSAFE_STRING_LEN (160) + +static const char * +make_failsafe_ascii_sz (const char *str, size_t len) +{ + static char buf[SCM_FAILSAFE_STRING_LEN]; + char *sz = buf; + size_t i; + + if (len > SCM_FAILSAFE_STRING_LEN - 1) + len = SCM_FAILSAFE_STRING_LEN - 1; + + for (i = 0; i < len; i++) + if (str[i] == 0 || str[i] > 127) + sz[i] = '?'; + else + sz[i] = str[i]; + sz[len] = '\0'; + return (const char *) sz; +} + +const char * +scm_i_string_to_failsafe_ascii_sz (SCM msg) +{ + size_t len; + const char *str = scm_i_string_chars (msg); + + len = scm_i_string_length (msg); + + return make_failsafe_ascii_sz (str, len); +} + +const char * +scm_i_symbol_to_failsafe_ascii_sz (SCM msg) +{ + const char *str = scm_i_symbol_chars (msg); + size_t len = scm_i_symbol_length (msg); + + return make_failsafe_ascii_sz (str, len); +} + + + +#undef SCM_FAILSAFE_STRING_LEN + SCM_DEFINE (scm_substring, "substring", 2, 1, 0, (SCM str, SCM start, SCM end), "Return a newly allocated string formed from the characters\n" diff --git a/libguile/strings.h b/libguile/strings.h index ca5f52c..df96f2b 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -153,6 +153,31 @@ SCM_INTERNAL void scm_i_get_substring_spec (size_t len, SCM end, size_t *cend); SCM_INTERNAL SCM scm_i_take_stringbufn (char *str, size_t len); +SCM_INTERNAL SCM scm_i_string_ref (SCM str, size_t x); +SCM_INTERNAL SCM scm_i_symbol_ref (SCM str, size_t x); +SCM_INTERNAL char scm_i_string_ref_to_char (SCM str, size_t x, char replace); +SCM_INTERNAL char scm_i_symbol_ref_to_char (SCM str, size_t x, char replace); +SCM_INTERNAL unsigned long scm_i_string_ref_to_ulong (SCM str, size_t x); + +SCM_INTERNAL int scm_i_string_ref_eq_char (SCM str, size_t x, char c); +SCM_INTERNAL int scm_i_symbol_ref_eq_char (SCM str, size_t x, char c); +SCM_INTERNAL int scm_i_string_ref_neq_char (SCM str, size_t x, char c); +SCM_INTERNAL int scm_i_symbol_ref_neq_char (SCM str, size_t x, char c); +SCM_INTERNAL int scm_i_string_ref_eq_int (SCM str, size_t x, int c); + +SCM_INTERNAL size_t scm_i_string_contains_char (SCM str, char ch); +SCM_INTERNAL int scm_i_symbol_strcmp (SCM sym, const char *str); + +SCM_INTERNAL char *scm_i_string_to_write_sz (SCM str); +SCM_INTERNAL scm_t_uint8 *scm_i_string_to_u8sz (SCM str); +SCM_INTERNAL SCM scm_i_string_from_u8sz (const scm_t_uint8 *str); +SCM_INTERNAL const char *scm_i_string_to_failsafe_ascii_sz (SCM str); +SCM_INTERNAL const char *scm_i_symbol_to_failsafe_ascii_sz (SCM str); + +/* For ASCII strings, SUB can be used to represent an invalid + character. */ +#define SCM_SUB ('\x1A') + /* deprecated stuff */ #if SCM_ENABLE_DEPRECATED diff --git a/libguile/struct.c b/libguile/struct.c index cae0f31..ac6822d 100644 --- a/libguile/struct.c +++ b/libguile/struct.c @@ -29,7 +29,7 @@ #include "libguile/hashtab.h" #include "libguile/ports.h" #include "libguile/strings.h" - +#include "libguile/srfi-13.h" #include "libguile/validate.h" #include "libguile/struct.h" @@ -62,7 +62,6 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0, SCM_VALIDATE_STRING (1, fields); { /* scope */ - const char * field_desc; size_t len; int x; @@ -71,11 +70,9 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0, SCM_MISC_ERROR ("odd length field specification: ~S", scm_list_1 (fields)); - field_desc = scm_i_string_chars (fields); - for (x = 0; x < len; x += 2) { - switch (field_desc[x]) + switch (scm_i_string_ref_to_char (fields, x, '?')) { case 'u': case 'p': @@ -87,13 +84,13 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0, break; default: SCM_MISC_ERROR ("unrecognized field type: ~S", - scm_list_1 (SCM_MAKE_CHAR (field_desc[x]))); + scm_list_1 (scm_i_string_ref (fields, x))); } - switch (field_desc[x + 1]) + switch (scm_i_string_ref_to_char (fields, x + 1, '?')) { case 'w': - if (field_desc[x] == 's') + if (scm_i_string_ref_eq_char (fields, x, 's')) SCM_MISC_ERROR ("self fields not writable", SCM_EOL); case 'r': case 'o': @@ -101,7 +98,7 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0, case 'R': case 'W': case 'O': - if (field_desc[x] == 's') + if (scm_i_string_ref_eq_char (fields, x, 's')) SCM_MISC_ERROR ("self fields not allowed in tail array", SCM_EOL); if (x != len - 2) @@ -110,12 +107,12 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0, break; default: SCM_MISC_ERROR ("unrecognized ref specification: ~S", - scm_list_1 (SCM_MAKE_CHAR (field_desc[x + 1]))); + scm_list_1 (scm_i_string_ref (fields, x+1))); } #if 0 - if (field_desc[x] == 'd') + if (scm_i_string_ref_eq_char (fields, x, 'd')) { - if (field_desc[x + 2] != '-') + if (scm_i_string_ref_neq_char (fields, x+2, '-')) SCM_MISC_ERROR ("missing dash field at position ~A", scm_list_1 (scm_from_int (x / 2))); x += 2; @@ -137,21 +134,22 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0, static void scm_struct_init (SCM handle, SCM layout, scm_t_bits * mem, int tail_elts, SCM inits) { - unsigned const char *fields_desc = - (unsigned const char *) scm_i_symbol_chars (layout) - 2; unsigned char prot = 0; int n_fields = scm_i_symbol_length (layout) / 2; int tailp = 0; + int i; + i = -2; while (n_fields) { if (!tailp) { - fields_desc += 2; - prot = fields_desc[1]; + i += 2; + prot = (unsigned char) scm_i_symbol_ref_to_char (layout, i+1, SCM_SUB); if (SCM_LAYOUT_TAILP (prot)) { tailp = 1; + prot = prot == 'R' ? 'r' : prot == 'W' ? 'w' : 'o'; *mem++ = tail_elts; n_fields += tail_elts - 1; @@ -159,8 +157,8 @@ scm_struct_init (SCM handle, SCM layout, scm_t_bits * mem, int tail_elts, SCM in break; } } - - switch (*fields_desc) + + switch (scm_i_symbol_ref_to_char (layout, i, SCM_SUB)) { #if 0 case 'i': @@ -204,7 +202,7 @@ scm_struct_init (SCM handle, SCM layout, scm_t_bits * mem, int tail_elts, SCM in *mem = scm_num2dbl (SCM_CAR (inits), "scm_struct_init"); inits = SCM_CDR (inits); } - fields_desc += 2; + i += 2; break; #endif @@ -236,7 +234,8 @@ SCM_DEFINE (scm_struct_vtable_p, "struct-vtable?", 1, 0, 0, { SCM layout; scm_t_bits * mem; - int tmp; + SCM tmp; + size_t len; if (!SCM_STRUCTP (x)) return SCM_BOOL_F; @@ -247,11 +246,14 @@ SCM_DEFINE (scm_struct_vtable_p, "struct-vtable?", 1, 0, 0, < scm_i_string_length (required_vtable_fields)) return SCM_BOOL_F; - tmp = strncmp (scm_i_symbol_chars (layout), - scm_i_string_chars (required_vtable_fields), - scm_i_string_length (required_vtable_fields)); - scm_remember_upto_here_1 (required_vtable_fields); - if (tmp) + len = scm_i_string_length (required_vtable_fields); + tmp = scm_string_eq (scm_symbol_to_string (layout), + required_vtable_fields, + scm_from_size_t (0), + scm_from_size_t (len), + scm_from_size_t (0), + scm_from_size_t (len)); + if (scm_is_false (tmp)) return SCM_BOOL_F; mem = SCM_STRUCT_DATA (x); @@ -645,7 +647,6 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0, size_t layout_len; size_t p; scm_t_bits n_fields; - const char *fields_desc; char field_type = 0; @@ -655,7 +656,6 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0, data = SCM_STRUCT_DATA (handle); p = scm_to_size_t (pos); - fields_desc = scm_i_symbol_chars (layout); layout_len = scm_i_symbol_length (layout); if (SCM_STRUCT_VTABLE_FLAGS (handle) & SCM_STRUCTF_LIGHT) /* no extra words */ @@ -668,8 +668,8 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0, if (p * 2 < layout_len) { char ref; - field_type = fields_desc[p * 2]; - ref = fields_desc[p * 2 + 1]; + field_type = scm_i_symbol_ref_to_char (layout, p * 2, '?'); + ref = scm_i_symbol_ref_to_char (layout, p * 2 + 1, '?'); if ((ref != 'r') && (ref != 'w')) { if ((ref == 'R') || (ref == 'W')) @@ -678,8 +678,8 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0, SCM_MISC_ERROR ("ref denied for field ~A", scm_list_1 (pos)); } } - else if (fields_desc[layout_len - 1] != 'O') - field_type = fields_desc[layout_len - 2]; + else if (scm_i_symbol_ref_neq_char (layout, layout_len - 1, 'O')) + field_type = scm_i_symbol_ref_to_char(layout, layout_len - 2, '?'); else SCM_MISC_ERROR ("ref denied for field ~A", scm_list_1 (pos)); @@ -707,7 +707,7 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0, default: SCM_MISC_ERROR ("unrecognized field type: ~S", - scm_list_1 (SCM_MAKE_CHAR (field_type))); + scm_list_1 (scm_i_symbol_ref (layout, layout_len - 2))); } return answer; @@ -727,8 +727,8 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0, size_t layout_len; size_t p; int n_fields; - const char *fields_desc; char field_type = 0; + SCM field_scm_type; SCM_VALIDATE_STRUCT (1, handle); @@ -736,7 +736,6 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0, data = SCM_STRUCT_DATA (handle); p = scm_to_size_t (pos); - fields_desc = scm_i_symbol_chars (layout); layout_len = scm_i_symbol_length (layout); if (SCM_STRUCT_VTABLE_FLAGS (handle) & SCM_STRUCTF_LIGHT) /* no extra words */ @@ -749,13 +748,17 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0, if (p * 2 < layout_len) { char set_x; - field_type = fields_desc[p * 2]; - set_x = fields_desc [p * 2 + 1]; + field_type = scm_i_symbol_ref_to_char (layout, p * 2, '?'); + field_scm_type = scm_i_symbol_ref (layout, p * 2); + set_x = scm_i_symbol_ref_to_char (layout, p * 2 + 1, '?'); if (set_x != 'w') SCM_MISC_ERROR ("set! denied for field ~A", scm_list_1 (pos)); } - else if (fields_desc[layout_len - 1] == 'W') - field_type = fields_desc[layout_len - 2]; + else if (scm_i_symbol_ref_eq_char (layout, layout_len - 1, 'W')) + { + field_type = scm_i_symbol_ref_to_char (layout, layout_len - 2, '?'); + field_scm_type = scm_i_symbol_ref (layout, layout_len - 2); + } else SCM_MISC_ERROR ("set! denied for field ~A", scm_list_1 (pos)); @@ -784,7 +787,7 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0, default: SCM_MISC_ERROR ("unrecognized field type: ~S", - scm_list_1 (SCM_MAKE_CHAR (field_type))); + scm_list_1 (field_scm_type)); } return val; diff --git a/libguile/symbols.c b/libguile/symbols.c index e208e5a..8940118 100644 --- a/libguile/symbols.c +++ b/libguile/symbols.c @@ -36,6 +36,7 @@ #include "libguile/modules.h" #include "libguile/read.h" #include "libguile/srfi-13.h" +#include "libguile/eq.h" #include "libguile/validate.h" #include "libguile/symbols.h" @@ -88,11 +89,11 @@ scm_i_hash_symbol (SCM obj, unsigned long n, void *closure) } static SCM -lookup_interned_symbol (const char *name, size_t len, - unsigned long raw_hash) +lookup_interned_symbol (SCM name, unsigned long raw_hash) { /* Try to find the symbol in the symbols table */ SCM l; + size_t len = scm_i_string_length (name); unsigned long hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols); for (l = SCM_HASHTABLE_BUCKET (symbols, hash); @@ -103,13 +104,13 @@ lookup_interned_symbol (const char *name, size_t len, if (scm_i_symbol_hash (sym) == raw_hash && scm_i_symbol_length (sym) == len) { - const char *chrs = scm_i_symbol_chars (sym); size_t i = len; while (i != 0) { --i; - if (name[i] != chrs[i]) + if (scm_is_false (scm_eqv_p (scm_i_string_ref (name, i), + scm_i_symbol_ref (sym, i)))) goto next_symbol; } @@ -141,32 +142,12 @@ intern_symbol (SCM symbol) } static SCM -scm_i_c_mem2symbol (const char *name, size_t len) -{ - SCM symbol; - size_t raw_hash = scm_string_hash ((const unsigned char *) name, len); - - symbol = lookup_interned_symbol (name, len, raw_hash); - if (scm_is_false (symbol)) - { - /* The symbol was not found, create it. */ - symbol = scm_i_c_make_symbol (name, len, 0, raw_hash, - scm_cons (SCM_BOOL_F, SCM_EOL)); - intern_symbol (symbol); - } - - return symbol; -} - -static SCM scm_i_mem2symbol (SCM str) { SCM symbol; - const char *name = scm_i_string_chars (str); - size_t len = scm_i_string_length (str); - size_t raw_hash = scm_string_hash ((const unsigned char *) name, len); + size_t raw_hash = scm_i_string_hash (str); - symbol = lookup_interned_symbol (name, len, raw_hash); + symbol = lookup_interned_symbol (str, raw_hash); if (scm_is_false (symbol)) { /* The symbol was not found, create it. */ @@ -182,9 +163,7 @@ scm_i_mem2symbol (SCM str) static SCM scm_i_mem2uninterned_symbol (SCM str) { - const char *name = scm_i_string_chars (str); - size_t len = scm_i_string_length (str); - size_t raw_hash = scm_string_hash ((const unsigned char *) name, len); + size_t raw_hash = scm_i_string_hash (str); return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL)); @@ -388,44 +367,23 @@ SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0, SCM scm_from_locale_symbol (const char *sym) { - return scm_i_c_mem2symbol (sym, strlen (sym)); + return scm_from_locale_symboln (sym, -1); } SCM scm_from_locale_symboln (const char *sym, size_t len) { - return scm_i_c_mem2symbol (sym, len); + SCM str = scm_from_locale_stringn (sym, len); + return scm_i_mem2symbol (str); } SCM scm_take_locale_symboln (char *sym, size_t len) { - SCM res; - unsigned long raw_hash; - - if (len == (size_t)-1) - len = strlen (sym); - else - { - /* Ensure STR is null terminated. A realloc for 1 extra byte should - often be satisfied from the alignment padding after the block, with - no actual data movement. */ - sym = scm_realloc (sym, len+1); - sym[len] = '\0'; - } - - raw_hash = scm_string_hash ((unsigned char *)sym, len); - res = lookup_interned_symbol (sym, len, raw_hash); - if (scm_is_false (res)) - { - res = scm_i_c_take_symbol (sym, len, 0, raw_hash, - scm_cons (SCM_BOOL_F, SCM_EOL)); - intern_symbol (res); - } - else - free (sym); + SCM str; - return res; + str = scm_take_locale_stringn (sym, len); + return scm_i_mem2symbol (str); } SCM diff --git a/libguile/throw.c b/libguile/throw.c index e0dda27..290158e 100644 --- a/libguile/throw.c +++ b/libguile/throw.c @@ -743,16 +743,17 @@ scm_ithrow (SCM key, SCM args, int noreturn SCM_UNUSED) */ fprintf (stderr, "throw from within critical section.\n"); if (scm_is_symbol (key)) - fprintf (stderr, "error key: %s\n", scm_i_symbol_chars (key)); + fprintf (stderr, "error key: %s\n", + scm_i_symbol_to_failsafe_ascii_sz (key)); for (; scm_is_pair (s); s = scm_cdr (s), i++) { char const *str = NULL; if (scm_is_string (scm_car (s))) - str = scm_i_string_chars (scm_car (s)); + str = scm_i_string_to_failsafe_ascii_sz (scm_car (s)); else if (scm_is_symbol (scm_car (s))) - str = scm_i_symbol_chars (scm_car (s)); + str = scm_i_symbol_to_failsafe_ascii_sz (scm_car (s)); if (str != NULL) fprintf (stderr, "argument %d: %s\n", i, str); diff --git a/libguile/unif.c b/libguile/unif.c index daf0850..29ca857 100644 --- a/libguile/unif.c +++ b/libguile/unif.c @@ -1072,7 +1072,6 @@ SCM_DEFINE (scm_enclose_array, "enclose-array", 1, 0, 1, #define FUNC_NAME s_scm_enclose_array { SCM axv, res, ra_inr; - const char *c_axv; scm_t_array_dim vdim, *s = &vdim; int ndim, j, k, ninr, noutr; @@ -1120,10 +1119,9 @@ SCM_DEFINE (scm_enclose_array, "enclose-array", 1, 0, 1, SCM_I_ARRAY_DIMS (ra_inr)[k].inc = s[j].inc; scm_c_string_set_x (axv, j, SCM_MAKE_CHAR (1)); } - c_axv = scm_i_string_chars (axv); for (j = 0, k = 0; k < noutr; k++, j++) { - while (c_axv[j]) + while (!scm_i_string_ref_eq_char (axv, j, '\0')) j++; SCM_I_ARRAY_DIMS (res)[k].lbnd = s[j].lbnd; SCM_I_ARRAY_DIMS (res)[k].ubnd = s[j].ubnd; @@ -2252,12 +2250,10 @@ scm_istr2bve (SCM str) SCM res = vec; scm_t_uint32 mask; - size_t k, j; - const char *c_str; + size_t k, j, i; scm_t_uint32 *data; data = scm_bitvector_writable_elements (vec, &handle, NULL, NULL, NULL); - c_str = scm_i_string_chars (str); for (k = 0; k < (len + 31) / 32; k++) { @@ -2266,7 +2262,7 @@ scm_istr2bve (SCM str) if (j > 32) j = 32; for (mask = 1L; j--; mask <<= 1) - switch (*c_str++) + switch (scm_i_string_ref_to_char (str, i++, '\0')) { case '0': break;