This will be useful for checking UTF-8 validity.
(define-ccl-program ccl-check-utf-8
'(0
((r0 = 1)
(loop
(read-if (r1 < #x80) (repeat)
((r0 = 0)
(if (r1 < #xC2) (end))
(read r2)
(if ((r2 & #xC0) != #x80) (end))
(if (r1 < #xE0) ((r0 = 1) (repeat)))
(read r2)
(if ((r2 & #xC0) != #x80) (end))
(if (r1 < #xF0) ((r0 = 1) (repeat)))
(read r2)
(if ((r2 & #xC0) != #x80) (end))
(if (r1 < #xF8) ((r0 = 1) (repeat)))
(read r2)
(if ((r2 & #xC0) != #x80) (end))
(if (r1 == #xF8) ((r0 = 1) (repeat)))
(end))))))
"Check if the input unibyte string is a valid UTF-8 sequence or not.
If it is valid, set the register `r0' to 1, else set it to 0.")
(defun string-utf-8-p (string)
"Return non-nil iff STRING is a unibyte string of valid UTF-8 sequence."
(if (or (not (stringp string))
(multibyte-string-p string))
(error "Not a unibyte string: %s" string))
(let ((status (make-vector 9 0)))
(ccl-execute-on-string ccl-check-utf-8 status string)
(= (aref status 0) 1)))