guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 03/03: Add R6RS bytevector->string, string->bytevector


From: Andy Wingo
Subject: [Guile-commits] 03/03: Add R6RS bytevector->string, string->bytevector
Date: Tue, 21 Jun 2016 09:31:29 +0000 (UTC)

wingo pushed a commit to branch master
in repository guile.

commit 4e27e3c054442189f05355f631176d94b4f5019f
Author: Andy Wingo <address@hidden>
Date:   Tue Jun 21 11:27:21 2016 +0200

    Add R6RS bytevector->string, string->bytevector
    
    * module/rnrs/io/ports.scm (string->bytevector):
      (bytevector->string): New procedures.
    * module/rnrs.scm: Export new procedures.
    * test-suite/tests/r6rs-ports.test: Add string->bytevector and
      bytevector->string tests.
---
 module/rnrs.scm                  |    1 +
 module/rnrs/io/ports.scm         |   31 +++++++++
 test-suite/tests/r6rs-ports.test |  134 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+)

diff --git a/module/rnrs.scm b/module/rnrs.scm
index 4368216..d2b4cb3 100644
--- a/module/rnrs.scm
+++ b/module/rnrs.scm
@@ -170,6 +170,7 @@
          make-transcoder transcoder-codec transcoder-eol-style
           transcoder-error-handling-mode native-transcoder
          latin-1-codec utf-8-codec utf-16-codec
+          string->bytevector bytevector->string
          
          eof-object? port? input-port? output-port? eof-object port-eof?
          port-transcoder
diff --git a/module/rnrs/io/ports.scm b/module/rnrs/io/ports.scm
index 8ff6748..5ddc3d5 100644
--- a/module/rnrs/io/ports.scm
+++ b/module/rnrs/io/ports.scm
@@ -36,6 +36,9 @@
           transcoder-error-handling-mode native-transcoder
           latin-1-codec utf-8-codec utf-16-codec
            
+          ;; transcoding bytevectors
+          bytevector->string string->bytevector
+
           ;; input & output ports
           port? input-port? output-port?
           port-eof?
@@ -110,6 +113,7 @@
           (only (ice-9 ports internal)
                 port-write-buffer port-buffer-bytevector port-line-buffered?)
           (only (rnrs bytevectors) bytevector-length)
+          (prefix (ice-9 iconv) iconv:)
           (rnrs enums)
           (rnrs records syntactic)
           (rnrs exceptions)
@@ -173,6 +177,33 @@
 
 
 ;;;
+;;; Transcoding bytevectors
+;;;
+
+(define (string->bytevector str transcoder)
+  "Encode @var{str} using @var{transcoder}, returning a bytevector."
+  (iconv:string->bytevector
+   str
+   (transcoder-codec transcoder)
+   (case (transcoder-error-handling-mode transcoder)
+     ((raise) 'error)
+     ((replace) 'substitute)
+     (else (error "unsupported error handling mode"
+                  (transcoder-error-handling-mode transcoder))))))
+
+(define (bytevector->string bv transcoder)
+  "Decode @var{bv} using @var{transcoder}, returning a string."
+  (iconv:bytevector->string
+   bv
+   (transcoder-codec transcoder)
+   (case (transcoder-error-handling-mode transcoder)
+     ((raise) 'error)
+     ((replace) 'substitute)
+     (else (error "unsupported error handling mode"
+                  (transcoder-error-handling-mode transcoder))))))
+
+
+;;;
 ;;; Internal helpers
 ;;;
 
diff --git a/test-suite/tests/r6rs-ports.test b/test-suite/tests/r6rs-ports.test
index 8c4ef57..b3f11bb 100644
--- a/test-suite/tests/r6rs-ports.test
+++ b/test-suite/tests/r6rs-ports.test
@@ -1065,6 +1065,140 @@ not `set-port-position!'"
   (with-test-prefix "open-file-input/output-port [input]"
     (test-input-file-opener open-file-input/output-port (test-file))))
 
+(define exception:encoding-error
+  '(encoding-error . ""))
+
+(define exception:decoding-error
+  '(decoding-error . ""))
+
+
+(with-test-prefix "ascii string"
+  (let ((s "Hello, World!"))
+    ;; For ASCII, all of these encodings should be the same.
+
+    (pass-if "to ascii bytevector"
+      (equal? (string->bytevector s (make-transcoder "ASCII"))
+              #vu8(72 101 108 108 111 44 32 87 111 114 108 100 33)))
+  
+    (pass-if "to ascii bytevector (length check)"
+      (equal? (string-length s)
+              (bytevector-length
+               (string->bytevector s (make-transcoder "ascii")))))
+  
+    (pass-if "from ascii bytevector"
+      (equal? s
+              (bytevector->string
+               (string->bytevector s (make-transcoder "ascii"))
+               (make-transcoder "ascii"))))
+  
+    (pass-if "to utf-8 bytevector"
+      (equal? (string->bytevector s (make-transcoder "ASCII"))
+              (string->bytevector s (make-transcoder "utf-8"))))
+  
+    (pass-if "to UTF-8 bytevector (testing encoding case sensitivity)"
+      (equal? (string->bytevector s (make-transcoder "ascii"))
+              (string->bytevector s (make-transcoder "UTF-8"))))
+  
+    (pass-if "from utf-8 bytevector"
+      (equal? s
+              (bytevector->string
+               (string->bytevector s (make-transcoder "utf-8"))
+               (make-transcoder "utf-8"))))
+  
+    (pass-if "to latin1 bytevector"
+      (equal? (string->bytevector s (make-transcoder "ASCII"))
+              (string->bytevector s (make-transcoder "latin1"))))
+
+    (pass-if "from latin1 bytevector"
+      (equal? s
+              (bytevector->string
+               (string->bytevector s (make-transcoder "utf-8"))
+               (make-transcoder "utf-8"))))))
+
+(with-test-prefix "narrow non-ascii string"
+  (let ((s "été"))
+    (pass-if "to latin1 bytevector"
+      (equal? (string->bytevector s (make-transcoder "latin1"))
+              #vu8(233 116 233)))
+  
+    (pass-if "to latin1 bytevector (length check)"
+      (equal? (string-length s)
+              (bytevector-length
+               (string->bytevector s (make-transcoder "latin1")))))
+  
+    (pass-if "from latin1 bytevector"
+      (equal? s
+              (bytevector->string
+               (string->bytevector s (make-transcoder "latin1"))
+               (make-transcoder "latin1"))))
+  
+    (pass-if "to utf-8 bytevector"
+      (equal? (string->bytevector s (make-transcoder "utf-8"))
+              #vu8(195 169 116 195 169)))
+
+    (pass-if "from utf-8 bytevector"
+      (equal? s
+              (bytevector->string
+               (string->bytevector s (make-transcoder "utf-8"))
+               (make-transcoder "utf-8"))))
+
+    (pass-if-exception "encode latin1 as ascii" exception:encoding-error
+      (string->bytevector s (make-transcoder "ascii"
+                                             (native-eol-style)
+                                             (error-handling-mode raise))))
+
+    (pass-if-exception "misparse latin1 as utf8" exception:decoding-error
+      (bytevector->string
+       (string->bytevector s (make-transcoder "latin1"))
+       (make-transcoder "utf-8"
+                        (native-eol-style)
+                        (error-handling-mode raise))))
+
+    (pass-if "misparse latin1 as utf8 with substitutions"
+      (equal? (bytevector->string
+               (string->bytevector s (make-transcoder "latin1"))
+               (make-transcoder "utf-8" (native-eol-style)
+                                (error-handling-mode replace)))
+              "\uFFFDt\uFFFD"))
+
+    (pass-if-exception "misparse latin1 as ascii" exception:decoding-error
+      (bytevector->string (string->bytevector s (make-transcoder "latin1"))
+                          (make-transcoder "ascii"
+                                           (native-eol-style)
+                                           (error-handling-mode raise))))))
+
+
+(with-test-prefix "wide non-ascii string"
+  (let ((s "ΧΑΟΣ"))
+    (pass-if "to utf-8 bytevector"
+      (equal? (string->bytevector s (make-transcoder "utf-8"))
+              #vu8(206 167 206 145 206 159 206 163) ))
+
+    (pass-if "from utf-8 bytevector"
+      (equal? s
+              (bytevector->string
+               (string->bytevector s (make-transcoder "utf-8"))
+               (make-transcoder "utf-8"))))
+
+    (pass-if-exception "encode as ascii" exception:encoding-error
+      (string->bytevector s (make-transcoder "ascii"
+                                             (native-eol-style)
+                                             (error-handling-mode raise))))
+
+    (pass-if-exception "encode as latin1" exception:encoding-error
+      (string->bytevector s (make-transcoder "latin1"
+                                             (native-eol-style)
+                                             (error-handling-mode raise))))
+
+    (pass-if "encode as ascii with substitutions"
+      (equal? (make-string (string-length s) #\?)
+              (bytevector->string
+               (string->bytevector s (make-transcoder
+                                      "ascii"
+                                      (native-eol-style)
+                                      (error-handling-mode replace)))
+               (make-transcoder "ascii"))))))
+
 ;;; Local Variables:
 ;;; mode: scheme
 ;;; eval: (put 'guard 'scheme-indent-function 1)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]