guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

02/06: status: Gracefully handle invalid UTF-8 in build logs.


From: Ludovic Courtès
Subject: 02/06: status: Gracefully handle invalid UTF-8 in build logs.
Date: Tue, 9 Oct 2018 12:54:28 -0400 (EDT)

civodul pushed a commit to branch master
in repository guix.

commit fe17037b387c6eca0c45f0526d2761e982a192bb
Author: Ludovic Courtès <address@hidden>
Date:   Tue Oct 9 09:53:02 2018 +0200

    status: Gracefully handle invalid UTF-8 in build logs.
    
    * guix/status.scm (maybe-utf8->string): New procedure.
    (build-event-output-port): Use it in lieu of 'utf8->string'.
    * tests/status.scm ("build-output-port, UTF-8")
    ("current-build-output-port, UTF-8 + garbage"): New tests.
---
 guix/status.scm  | 19 ++++++++++++++++++-
 tests/status.scm | 22 +++++++++++++++++++++-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/guix/status.scm b/guix/status.scm
index c695606..13537c7 100644
--- a/guix/status.scm
+++ b/guix/status.scm
@@ -34,6 +34,7 @@
   #:use-module (ice-9 match)
   #:use-module (ice-9 format)
   #:use-module (ice-9 binary-ports)
+  #:autoload   (ice-9 rdelim) (read-string)
   #:use-module (rnrs bytevectors)
   #:use-module ((system foreign)
                 #:select (bytevector->pointer pointer->bytevector))
@@ -429,6 +430,22 @@ ON-CHANGE can display the build status, build events, etc."
 (define %newline
   (char-set #\return #\newline))
 
+(define (maybe-utf8->string bv)
+  "Attempt to decode BV as UTF-8 string and return it.  Gracefully handle the
+case where BV does not contain only valid UTF-8."
+  (catch 'decoding-error
+    (lambda ()
+      (utf8->string bv))
+    (lambda _
+      ;; This is the sledgehammer but it's the only safe way we have to
+      ;; properly handle this.  It's expensive but it's rarely needed.
+      (let ((port (open-bytevector-input-port bv)))
+        (set-port-encoding! port "UTF-8")
+        (set-port-conversion-strategy! port 'substitute)
+        (let ((str (read-string port)))
+          (close-port port)
+          str)))))
+
 (define* (build-event-output-port proc #:optional (seed (build-status)))
   "Return an output port for use as 'current-build-output-port' that calls
 PROC with its current state value, initialized with SEED, on every build
@@ -464,7 +481,7 @@ The second return value is a thunk to retrieve the current 
state."
       (pointer->bytevector ptr count)))
 
   (define (write! bv offset count)
-    (let loop ((str (utf8->string (bytevector-range bv offset count))))
+    (let loop ((str (maybe-utf8->string (bytevector-range bv offset count))))
       (match (string-index str %newline)
         ((? integer? cr)
          (let ((tail (string-take str (+ 1 cr))))
diff --git a/tests/status.scm b/tests/status.scm
index 04dedb7..486ad04 100644
--- a/tests/status.scm
+++ b/tests/status.scm
@@ -20,7 +20,9 @@
   #:use-module (guix status)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-11)
-  #:use-module (srfi srfi-64))
+  #:use-module (srfi srfi-64)
+  #:use-module (rnrs bytevectors)
+  #:use-module (rnrs io ports))
 
 (test-begin "status")
 
@@ -112,4 +114,22 @@
       (display "@ substituter-succeeded baz\n" port)
       (list first (get-status)))))
 
+(test-equal "build-output-port, UTF-8"
+  '((build-log "lambda is λ!\n"))
+  (let-values (((port get-status) (build-event-output-port cons '()))
+               ((bv)              (string->utf8 "lambda is λ!\n")))
+    (put-bytevector port bv)
+    (force-output port)
+    (get-status)))
+
+(test-equal "current-build-output-port, UTF-8 + garbage"
+  ;; What about a mixture of UTF-8 + garbage?
+  '((build-log "garbage: �lambda: λ\n"))
+  (let-values (((port get-status) (build-event-output-port cons '())))
+    (display "garbage: " port)
+    (put-bytevector port #vu8(128))
+    (put-bytevector port (string->utf8 "lambda: λ\n"))
+    (force-output port)
+    (get-status)))
+
 (test-end "status")



reply via email to

[Prev in Thread] Current Thread [Next in Thread]