From 88126627a01185c7a88a01269ef46f00c1466106 Mon Sep 17 00:00:00 2001 From: Freja Nordsiek Date: Mon, 19 Jun 2017 01:00:01 +0200 Subject: [PATCH] Added read and print options for R7RS bytevector notation. * libguile/private-options.h: Added read and print options. * libguile/read.c: Added and implemented R7RS bytevector reading option. * libguile/print.c: Added R7RS bytevector print option. * libguile/bytevector.c (scm_i_print_bytevector): Implemented option to print bytevectors using R7RS notation. * test-suite/tests/reader.test: Added tests for the read option. * test-suite/tests/print.test: Added tests for the print option. * doc/ref/api-evaluation.texi (Scheme Read and Scheme Write): Updated to reflect added read and print options. * doc/ref/api-data.texi (Bytevectors): Updated to reflect added read and print options for bytevectors. * doc/ref/srfi-modules.texi (SRFI-4): Added warning about the added read and print options conflicting with unsigned 8-bit integers. --- doc/ref/api-data.texi | 11 +++++++++++ doc/ref/api-evaluation.texi | 3 +++ doc/ref/srfi-modules.texi | 7 +++++++ libguile/bytevectors.c | 9 ++++++++- libguile/print.c | 2 ++ libguile/private-options.h | 6 ++++-- libguile/read.c | 29 ++++++++++++++++++++++++----- test-suite/tests/print.test | 17 ++++++++++++++++- test-suite/tests/reader.test | 13 +++++++++++++ 9 files changed, 88 insertions(+), 9 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index acdf9ca..17f4c07 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -4572,6 +4572,17 @@ they do not need to be quoted: @result{} #vu8(1 53 204) @end lisp +R7RS uses a different syntax for bytevectors, which uses the prefix @code{#u8} +to make it more in line with SRFI-4 (@pxref{SRFI-4}). This syntax can be +enabled for reading and writing by enabling the @code{'r7rs-bytevectors} read +option with @code{(read-enable 'r7rs-bytevectors)} (@pxref{Scheme Read}) +and print option with @code{(print-enable 'r7rs-bytevectors)} +(@pxref{Scheme Write}) respectively. Note that enabling these read and +print options will mean that SRFI-4 unsigned 8-bit integers (which are a +separate type in Guile) cannot be created using the @code{#u8} prefix and it +will not be possible to distinguish bytevectors from SRFI-4 unsigned 8-bit +integers from their printed forms. + Bytevectors can be used with the binary input/output primitives of the R6RS (@pxref{R6RS I/O Ports}). diff --git a/doc/ref/api-evaluation.texi b/doc/ref/api-evaluation.texi index 565ccdb..a63a3dd 100644 --- a/doc/ref/api-evaluation.texi +++ b/doc/ref/api-evaluation.texi @@ -343,6 +343,7 @@ hungry-eol-escapes no In strings, consume leading whitespace after an escaped end-of-line. curly-infix no Support SRFI-105 curly infix expressions. r7rs-symbols no Support R7RS |...| symbol notation. +r7rs-bytevectors no Support R7RS #u8(...) bytevector notation in addition to R6RS #vu8(...). @end smalllisp Guile allows read options to be set on a per-port basis in one of two @@ -465,6 +466,8 @@ escape-newlines yes Render newlines as \n when printing using `write'. r7rs-symbols no Escape symbols using R7RS |...| symbol notation. +r7rs-bytevectors no Print bytevectors using R7RS #u8(...) notation + instead of R6RS #vu8(...) notation. @end smalllisp These options may be modified with the print-set! syntax. diff --git a/doc/ref/srfi-modules.texi b/doc/ref/srfi-modules.texi index b1776c6..2532ec6 100644 --- a/doc/ref/srfi-modules.texi +++ b/doc/ref/srfi-modules.texi @@ -1438,6 +1438,13 @@ for a three element list @code{(1 #f 3)}, but for Guile @code{(1 #f3)} is invalid. @code{(1 #f 3)} is almost certainly what one should write anyway to make the intention clear, so this is rarely a problem. +Note that the read syntax for unsigned 8-bit integers here conflicts +with the R7RS read syntax of bytevectors. When the @code{'r7rs-bytevectors} +read option is set with @code{(read-enable 'r7rs-bytevectors)}, the @code{#u8} +tag will make bytevectors instead of unsigned 8-bit integer vectors. And +similarly, the two types cannot be distinguished when printing when the +equivalent printing option is set with @code{(print-enable 'r7rs-bytevectors)}. address@hidden, for more information. @node SRFI-4 API @subsubsection SRFI-4 - API diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c index 5008d23..48a2dae 100644 --- a/libguile/bytevectors.c +++ b/libguile/bytevectors.c @@ -35,6 +35,7 @@ #include "libguile/array-handle.h" #include "libguile/uniform.h" #include "libguile/srfi-4.h" +#include "libguile/private-options.h" #include #include @@ -404,7 +405,13 @@ scm_i_print_bytevector (SCM bv, SCM port, scm_print_state *pstate SCM_UNUSED) scm_array_get_handle (bv, &h); scm_putc ('#', port); - scm_write (scm_array_handle_element_type (&h), port); + /* VU8 bytevectors are printed with u8 when r7rs-bytevectors print option is + enabled. Otherwise, they are printed the default way (vu8). */ + if (SCM_PRINT_R7RS_BYTEVECTORS_P + && SCM_BYTEVECTOR_ELEMENT_TYPE (bv) == SCM_ARRAY_ELEMENT_TYPE_VU8) + scm_puts ("u8", port); + else + scm_write (scm_array_handle_element_type (&h), port); scm_putc ('(', port); for (i = h.dims[0].lbnd, ubnd = h.dims[0].ubnd, inc = h.dims[0].inc; i <= ubnd; i += inc) diff --git a/libguile/print.c b/libguile/print.c index 8090c01..714fed0 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -119,6 +119,8 @@ scm_t_option scm_print_opts[] = { "Escape symbols using R7RS |...| symbol notation." }, { SCM_OPTION_BOOLEAN, "datum-labels", 0, "Print cyclic data using SRFI-38 datum label notation." }, + { SCM_OPTION_BOOLEAN, "r7rs-bytevectors", 0, + "Print bytevectors using R7RS #u8(...) notation instead of R6RS #vu8(...) notation."}, { 0 }, }; diff --git a/libguile/private-options.h b/libguile/private-options.h index 5205dfb..885a307 100644 --- a/libguile/private-options.h +++ b/libguile/private-options.h @@ -54,7 +54,8 @@ SCM_INTERNAL scm_t_option scm_print_opts[]; #define SCM_PRINT_ESCAPE_NEWLINES_P scm_print_opts[3].val #define SCM_PRINT_R7RS_SYMBOLS_P scm_print_opts[4].val #define SCM_PRINT_DATUM_LABELS_P scm_print_opts[5].val -#define SCM_N_PRINT_OPTIONS 6 +#define SCM_PRINT_R7RS_BYTEVECTORS_P scm_print_opts[6].val +#define SCM_N_PRINT_OPTIONS 7 /* @@ -71,7 +72,8 @@ SCM_INTERNAL scm_t_option scm_read_opts[]; #define SCM_HUNGRY_EOL_ESCAPES_P scm_read_opts[6].val #define SCM_CURLY_INFIX_P scm_read_opts[7].val #define SCM_R7RS_SYMBOLS_P scm_read_opts[8].val +#define SCM_R7RS_BYTEVECTORS_P scm_read_opts[9].val -#define SCM_N_READ_OPTIONS 9 +#define SCM_N_READ_OPTIONS 10 #endif /* PRIVATE_OPTIONS */ diff --git a/libguile/read.c b/libguile/read.c index f1adc8f..7dbf45b 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -90,6 +90,8 @@ scm_t_option scm_read_opts[] = "Support SRFI-105 curly infix expressions."}, { SCM_OPTION_BOOLEAN, "r7rs-symbols", 0, "Support R7RS |...| symbol notation."}, + { SCM_OPTION_BOOLEAN, "r7rs-bytevectors", 0, + "Support R7RS #u8(...) bytevector notation in addition to R6RS #vu8(...)."}, { 0, }, }; @@ -116,6 +118,7 @@ struct t_read_context unsigned int curly_infix_p : 1; unsigned int neoteric_p : 1; unsigned int r7rs_symbols_p : 1; + unsigned int r7rs_bytevectors_p : 1; SCM datum_label_table, datum_label_tag; }; @@ -1475,9 +1478,14 @@ static SCM scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_context *ctx, long line, int column) { - chr = scm_getc (port); - if (chr != 'u') - goto syntax; + /* If the bytevector style is R6RS, there is a 'u' to read. If it is R7RS + style, the 'u' was already read. */ + if (!ctx->r7rs_bytevectors_p) + { + chr = scm_getc (port); + if (chr != 'u') + goto syntax; + } chr = scm_getc (port); if (chr != '8') @@ -1796,13 +1804,19 @@ scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_context *ctx, case '(': return (scm_read_vector (chr, port, ctx, line, column)); case 's': - case 'u': case 'f': case 'c': /* This one may return either a boolean or an SRFI-4 vector. */ return (scm_read_srfi4_vector (chr, port, ctx, line, column)); case 'v': return (scm_read_bytevector (chr, port, ctx, line, column)); + case 'u': + /* Will be a bytevector if doing r7rs bytevectors, and an SRFI-4 vector + otherwise. */ + if (ctx->r7rs_bytevectors_p) + return (scm_read_bytevector (chr, port, ctx, line, column)); + else + return (scm_read_srfi4_vector (chr, port, ctx, line, column)); case '*': return (scm_read_guile_bit_vector (chr, port, ctx, line, column)); case 't': @@ -2383,9 +2397,10 @@ SCM_SYMBOL (sym_port_read_options, "port-read-options"); #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12 #define READ_OPTION_CURLY_INFIX_P 14 #define READ_OPTION_R7RS_SYMBOLS_P 16 +#define READ_OPTION_R7RS_BYTEVECTORS_P 18 /* The total width in bits of the per-port overrides */ -#define READ_OPTIONS_NUM_BITS 18 +#define READ_OPTIONS_NUM_BITS 20 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1) #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL @@ -2421,6 +2436,7 @@ SCM_SYMBOL (sym_square_brackets, "square-brackets"); SCM_SYMBOL (sym_hungry_eol_escapes, "hungry-eol-escapes"); SCM_SYMBOL (sym_curly_infix, "curly-infix"); SCM_SYMBOL (sym_r7rs_symbols, "r7rs-symbols"); +SCM_SYMBOL (sym_r7rs_bytevectors, "r7rs-bytevectors"); /* Special 'inherit' value for 'set-port-read-option!'. */ SCM_SYMBOL (sym_inherit, "inherit"); @@ -2469,6 +2485,8 @@ SCM_DEFINE (scm_set_port_read_option_x, "set-port-read-option!", 3, 0, 0, option_code = READ_OPTION_CURLY_INFIX_P; else if (scm_is_eq (option, sym_r7rs_symbols)) option_code = READ_OPTION_R7RS_SYMBOLS_P; + else if (scm_is_eq (option, sym_r7rs_bytevectors)) + option_code = READ_OPTION_R7RS_BYTEVECTORS_P; else scm_wrong_type_arg_msg ("set-port-read-option!", 2, option, "valid read option symbol"); @@ -2562,6 +2580,7 @@ init_read_context (SCM port, scm_t_read_context *ctx) RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p); RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p); RESOLVE_BOOLEAN_OPTION (R7RS_SYMBOLS_P, r7rs_symbols_p); + RESOLVE_BOOLEAN_OPTION (R7RS_BYTEVECTORS_P, r7rs_bytevectors_p); #undef RESOLVE_BOOLEAN_OPTION diff --git a/test-suite/tests/print.test b/test-suite/tests/print.test index 01bc994..5ced167 100644 --- a/test-suite/tests/print.test +++ b/test-suite/tests/print.test @@ -17,6 +17,7 @@ ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA (define-module (test-suite test-print) + #:use-module (rnrs bytevectors) #:use-module (ice-9 pretty-print) #:use-module (test-suite lib)) @@ -86,7 +87,21 @@ (pass-if-equal "ends with backslash" "|foo\\x5c;|" (write-with-options '(r7rs-symbols) - (string->symbol "foo\\"))))) + (string->symbol "foo\\")))) + + (with-test-prefix "r7rs-bytevectors" + + (pass-if-equal "off" + "#vu8(3 0 203 1)" + (write-with-options '() (u8-list->bytevector '(3 0 203 1)))) + + (pass-if-equal "on" + "#u8(0 6 255 103)" + (write-with-options '(r7rs-bytevectors) (u8-list->bytevector '(0 6 255 103)))) + + (pass-if-equal "on - doesn't affect other SRFI-4 types" + "#u16(0 6 255 103)" + (write-with-options '(r7rs-bytevectors) #u16(0 6 255 103))))) (with-test-prefix "pretty-print" diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index 18c0293..ae4fd5f 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -19,6 +19,7 @@ ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA (define-module (test-suite reader) + :use-module (rnrs bytevectors) :use-module (srfi srfi-1) :use-module (test-suite lib)) @@ -243,6 +244,18 @@ (with-read-options '(r7rs-symbols) (lambda () (read-string "(a |H\\x65;llo, this is \\| a \"test\"| b)")))) + (pass-if "r7rs-bytevectors off" + (let ((bv1 (u8-list->bytevector '(1 2 3 200))) + (bv2 (with-read-options '() + (lambda () + (read-string "#vu8(1 2 3 200)"))))) + (and (bytevector=? bv1 bv2) (not (u8vector? bv2))))) + (pass-if "r7rs-bytevectors on" + (let ((bv1 (u8-list->bytevector '(1 2 3 200))) + (bv2 (with-read-options '(r7rs-bytevectors) + (lambda () + (read-string "#u8(1 2 3 200)"))))) + (and (bytevector=? bv1 bv2) (not (u8vector? bv2))))) (pass-if "prefix keywords" (eq? #:keyword (with-read-options '(keywords prefix case-insensitive) -- 2.9.4