emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/international/mule.el [lexbind]


From: Miles Bader
Subject: [Emacs-diffs] Changes to emacs/lisp/international/mule.el [lexbind]
Date: Tue, 14 Oct 2003 19:39:32 -0400

Index: emacs/lisp/international/mule.el
diff -c emacs/lisp/international/mule.el:1.147.2.2 
emacs/lisp/international/mule.el:1.147.2.3
*** emacs/lisp/international/mule.el:1.147.2.2  Fri Apr  4 01:20:22 2003
--- emacs/lisp/international/mule.el    Tue Oct 14 19:39:23 2003
***************
*** 372,378 ****
                     (setq split (split-char trans)
                           charset (car split)))
                 (cond ((eq charset 'ascii)
!                       char)
                       ((eq charset 'latin-iso8859-1)
                        (+ (nth 1 split) 128))
                       ((eq charset 'mule-unicode-0100-24ff)
--- 372,378 ----
                     (setq split (split-char trans)
                           charset (car split)))
                 (cond ((eq charset 'ascii)
!                       (or trans char))
                       ((eq charset 'latin-iso8859-1)
                        (+ (nth 1 split) 128))
                       ((eq charset 'mule-unicode-0100-24ff)
***************
*** 817,822 ****
--- 817,827 ----
    The value is a symbol whose name is the `MIME-charset' parameter of
    the coding system.
  
+   o mime-text-unsuitable
+ 
+   A non-nil value means the `mime-charset' property names a charset
+   which is unsuitable for the top-level media type \"text\".
+ 
    o valid-codes (meaningful only for a coding system based on CCL)
  
    The value is a list to indicate valid byte ranges of the encoded
***************
*** 1116,1121 ****
--- 1121,1134 ----
                     first eol)))
      first))
  
+ (defun autoload-coding-system (symbol form)
+   "Define SYMBOL as a coding-system that is defined on demand.
+ 
+ FROM is a form to evaluate to define the coding-system."
+   (put symbol 'coding-system-define-form form)
+   (setq coding-system-alist (cons (list (symbol-name symbol))
+                                 coding-system-alist)))
+ 
  (defun set-buffer-file-coding-system (coding-system &optional force)
    "Set the file coding-system of the current buffer to CODING-SYSTEM.
  This means that when you save the buffer, it will be converted
***************
*** 1126,1132 ****
  leaves the end-of-line conversion unspecified, FORCE controls what to
  do.  If FORCE is nil, get the unspecified aspect (or aspects) from the
  buffer's previous `buffer-file-coding-system' value (if it is
! specified there).  Otherwise, levae it unspecified.
  
  This marks the buffer modified so that the succeeding \\[save-buffer]
  surely saves the buffer with CODING-SYSTEM.  From a program, if you
--- 1139,1145 ----
  leaves the end-of-line conversion unspecified, FORCE controls what to
  do.  If FORCE is nil, get the unspecified aspect (or aspects) from the
  buffer's previous `buffer-file-coding-system' value (if it is
! specified there).  Otherwise, leave it unspecified.
  
  This marks the buffer modified so that the succeeding \\[save-buffer]
  surely saves the buffer with CODING-SYSTEM.  From a program, if you
***************
*** 1318,1425 ****
  
  ;;; X selections
  
! (defvar non-standard-icccm-encodings-alist
    '(("ISO8859-15" . latin-iso8859-15)
      ("ISO8859-14" . latin-iso8859-14)
      ("KOI8-R" . koi8-r)
      ("BIG5-0" . big5))
!   "Alist of font charset names defined by XLFD.
! The cdr of each element is the corresponding Emacs charset or coding system.")
  
  ;; Functions to support "Non-Standard Character Set Encodings" defined
  ;; by the COMPOUND-TEXT spec.
! ;; We support that by converting the leading sequence of the
! ;; ``extended segment'' to the corresponding ISO-2022 sequences (if
! ;; the leading sequence names an Emacs charset), or decode the segment
! ;; (if it names a coding system).  Encoding does the reverse.
  ;; This function also supports "The UTF-8 encoding" described in the
  ;; section 7 of the documentation fo COMPOUND-TEXT distributed with
  ;; XFree86.
  
  (defun ctext-post-read-conversion (len)
    "Decode LEN characters encoded as Compound Text with Extended Segments."
-   (buffer-disable-undo)       ; minimize consing due to insertions and 
deletions
-   (narrow-to-region (point) (+ (point) len))
    (save-match-data
!     (let ((pt (point-marker))
!         (oldpt (point-marker))
!         (newpt (make-marker))
!         (modified-p (buffer-modified-p))
!         (case-fold-search nil)
!         ;; We need multibyte conversion of "TO" type because the
!         ;; buffer may be multibyte, and, in that case, the pattern
!         ;; must contain eight-bit-control/graphic characters.
!         (pattern (string-to-multibyte 
"\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002\\|\e%G[^\e]+\e%@"))
!         last-coding-system-used
!         encoding textlen chset)
!       (while (re-search-forward pattern nil 'move)
!       (set-marker newpt (point))
!       (set-marker pt (match-beginning 0))
!       (if (= (preceding-char) ?@)
!           ;; We found embedded utf-8 sequence.
!           (progn
!             (delete-char -3)          ; delete ESC % @ at the tail
!             (goto-char pt)
!             (delete-char 3)           ; delete ESC % G at the head
!             (if (> pt oldpt)
!                 (decode-coding-region oldpt pt 'ctext-no-compositions))
!             (decode-coding-region pt newpt 'mule-utf-8)
!             (goto-char newpt)
!             (set-marker oldpt newpt))
!         (setq encoding (match-string 3))
!         (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
!                             (- (aref (match-string 2) 1) 128))
!                          (1+ (length encoding))))
!         (setq
!          chset (cdr (assoc-ignore-case encoding
!                                        non-standard-icccm-encodings-alist)))
!         (cond ((null chset)
!                ;; This charset is not supported--leave this extended
!                ;; segment unaltered and skip over it.
!                (goto-char (+ (point) textlen)))
!               ((charsetp chset)
!                ;; If it's a charset, replace the leading escape sequence
!                ;; with a standard ISO-2022 sequence.  We will decode all
!                ;; such segments later, in one go, when we exit the loop
!                ;; or find an extended segment that names a coding
!                ;; system, not a charset.
!                (replace-match
!                 (concat "\\1"
!                         (if (= 0 (charset-iso-graphic-plane chset))
!                             ;; GL charsets
!                             (if (= 1 (charset-dimension chset)) "(" "$(")
!                           ;; GR charsets
!                           (if (= 96 (charset-chars chset))
!                               "-"
!                             (if (= 1 (charset-dimension chset)) ")" "$)")))
!                         (string (charset-iso-final-char chset)))
!                 t)
!                (goto-char (+ (point) textlen)))
!               ((coding-system-p chset)
!                ;; If it's a coding system, we need to decode the segment
!                ;; right away.  But first, decode what we've skipped
!                ;; across until now.
!                (when (> pt oldpt)
!                  (decode-coding-region oldpt pt 'ctext-no-compositions))
!                (delete-region pt newpt)
!                (set-marker newpt (+ newpt textlen))
!                (decode-coding-region pt newpt chset)
!                (goto-char newpt)
!                (set-marker oldpt newpt)))))
!       ;; Decode what's left.
!       (when (> (point) oldpt)
!       (decode-coding-region oldpt (point) 'ctext-no-compositions))
!       ;; This buffer started as unibyte, because the string we get from
!       ;; the X selection is a unibyte string.  We must now make it
!       ;; multibyte, so that the decoded text is inserted as multibyte
!       ;; into its buffer.
!       (set-buffer-multibyte t)
!       (set-buffer-modified-p modified-p)
!       (- (point-max) (point-min)))))
  
  ;; If you add charsets here, be sure to modify the regexp used by
  ;; ctext-pre-write-conversion to look up non-standard charsets.
! (defvar non-standard-designations-alist
    '(("$(0" . (big5 "big5-0" 2))
      ("$(1" . (big5 "big5-0" 2))
      ;; The following are actually standard; generating extended
--- 1331,1429 ----
  
  ;;; X selections
  
! (defvar ctext-non-standard-encodings-alist
    '(("ISO8859-15" . latin-iso8859-15)
      ("ISO8859-14" . latin-iso8859-14)
      ("KOI8-R" . koi8-r)
      ("BIG5-0" . big5))
!   "Alist of non-standard encoding names vs Emacs coding systems.
! This alist is used to decode an extened segment of a compound text.")
! 
! (defvar ctext-non-standard-encodings-regexp
!   (string-to-multibyte
!    (concat
!     ;; For non-standard encodings.
!     "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
!     "\\|"
!     ;; For UTF-8 encoding.
!     "\\(address@hidden)")))
  
  ;; Functions to support "Non-Standard Character Set Encodings" defined
  ;; by the COMPOUND-TEXT spec.
! ;; We support that by decoding the whole data by `ctext' which just
! ;; pertains byte sequences belonging to ``extended segment'', then
! ;; decoding those byte sequences one by one in Lisp.
  ;; This function also supports "The UTF-8 encoding" described in the
  ;; section 7 of the documentation fo COMPOUND-TEXT distributed with
  ;; XFree86.
  
  (defun ctext-post-read-conversion (len)
    "Decode LEN characters encoded as Compound Text with Extended Segments."
    (save-match-data
!     (save-restriction
!       (let ((case-fold-search nil)
!           (in-workbuf (string= (buffer-name) " *code-converting-work*"))
!           last-coding-system-used
!           pos bytes)
!       (or in-workbuf
!           (narrow-to-region (point) (+ (point) len)))
!       (decode-coding-region (point-min) (point-max) 'ctext)
!       (if in-workbuf
!           (set-buffer-multibyte t))
!       (while (re-search-forward ctext-non-standard-encodings-regexp
!                                 nil 'move)
!         (setq pos (match-beginning 0))
!         (if (match-beginning 1)
!             ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
!             (let* ((M (char-after (+ pos 4)))
!                    (L (char-after (+ pos 5)))
!                    (encoding (match-string 2))
!                    (coding (or (cdr (assoc-ignore-case 
!                                      encoding
!                                      ctext-non-standard-encodings-alist))
!                                (coding-system-p
!                                 (intern (downcase encoding))))))
!               (setq bytes (- (+ (* (- M 128) 128) (- L 128))
!                              (- (point) (+ pos 6))))
!               (when coding
!                 (delete-region pos (point))
!                 (forward-char bytes)
!                 (decode-coding-region (- (point) bytes) (point) coding)))
!           ;; ESC % G --UTF-8-BYTES-- ESC % @
!           (setq bytes (- (point) pos))
!           (decode-coding-region (- (point) bytes) (point) 'utf-8))))
!       (goto-char (point-min))
!       (- (point-max) (point)))))
! 
! ;; From X registry 2001/06/01
! ;; 20. NON-STANDARD CHARACTER SET ENCODINGS
! 
! ;; See Section 6 of the Compound Text standard.
! 
! ;; Name                                               Reference
! ;; ----                                               ---------
! ;; "DEC.CNS11643.1986-2"                              [53]
! ;;    CNS11643 2-plane using the recommended
! ;;    internal representation scheme
! ;; "DEC.DTSCS.1990-2"                         [54]
! ;;    DEC Taiwan Supplemental Character Set
! ;; "fujitsu.u90x03"                           [87]
! ;; "ILA"                                              [62]
! ;;    registry prefix
! ;; "IPSYS"                                            [59]
! ;;    registry prefix
! ;; "omron_UDC"                                        [45]
! ;;         omron User Defined Charset
! ;; "omron_UDC_ja"                                     [45]
! ;;         omron User Defined Charset for Japanese
! ;; "omron_UDC_zh"                                     [45]
! ;;         omron User Defined Charset for Chinese(Main land)
! ;; "omron_UDC_tw"                                     [45]
! ;;         omron User Defined Charset for Chinese(Taiwan)
  
  ;; If you add charsets here, be sure to modify the regexp used by
  ;; ctext-pre-write-conversion to look up non-standard charsets.
! (defvar ctext-non-standard-designations-alist
    '(("$(0" . (big5 "big5-0" 2))
      ("$(1" . (big5 "big5-0" 2))
      ;; The following are actually standard; generating extended
***************
*** 1451,1494 ****
    "Encode characters between FROM and TO as Compound Text w/Extended Segments.
  
  If FROM is a string, or if the current buffer is not the one set up for us
! by run_pre_post_conversion_on_str, generate a new temp buffer, insert the
  text, and convert it in the temporary buffer.  Otherwise, convert in-place."
-   (cond ((and (string= (buffer-name) " *code-converting-work*")
-             (not (stringp from)))
-        ; Minimize consing due to subsequent insertions and deletions.
-        (buffer-disable-undo)
-        (narrow-to-region from to))
-       (t
-        (let ((buf (current-buffer)))
-          (set-buffer (generate-new-buffer " *temp"))
-          (buffer-disable-undo)
-          (if (stringp from)
-              (insert from)
-            (insert-buffer-substring buf from to))
-          (setq from (point-min) to (point-max)))))
-   (encode-coding-region from to 'ctext-no-compositions)
-   ;; Replace ISO-2022 charset designations with extended segments, for
-   ;; those charsets that are not part of the official X registry.
    (save-match-data
!     (goto-char (point-min))
!     (let ((newpt (make-marker))
!         (case-fold-search nil)
!         pt desig encode-info encoding chset noctets textlen)
!       (set-buffer-multibyte nil)
!       ;; The regexp below finds the leading sequences for big5.
        (while (re-search-forward "\e\\(\$([01]\\)" nil 'move)
!       (setq desig (match-string 1)
!             pt (point-marker)
!             encode-info (cdr (assoc desig non-standard-designations-alist))
              encoding (car encode-info)
              chset (cadr encode-info)
              noctets (car (cddr encode-info)))
        (skip-chars-forward "^\e")
-       (set-marker newpt (point))
        (cond
         ((eq encoding t)  ; only the leading sequence needs to be changed
!         (setq textlen (+ (- newpt pt) (length chset) 1))
!         ;; Generate the ICCCM control sequence for an extended segment.
          (replace-match (format "\e%%/%d%c%c%s"
                                 noctets
                                 (+ (/ textlen 128) 128)
--- 1455,1501 ----
    "Encode characters between FROM and TO as Compound Text w/Extended Segments.
  
  If FROM is a string, or if the current buffer is not the one set up for us
! by encode-coding-string, generate a new temp buffer, insert the
  text, and convert it in the temporary buffer.  Otherwise, convert in-place."
    (save-match-data
!     ;; Setup a working buffer if necessary.
!     (cond ((stringp from)
!          (let ((buf (current-buffer)))
!            (set-buffer (generate-new-buffer " *temp"))
!            (set-buffer-multibyte (multibyte-string-p from))
!            (insert from)))
!         ((not (string= (buffer-name) " *code-converting-work*"))
!          (let ((buf (current-buffer))
!                (multibyte enable-multibyte-characters))
!            (set-buffer (generate-new-buffer " *temp"))
!            (set-buffer-multibyte multibyte)
!            (insert-buffer-substring buf from to))))
! 
!     ;; Now we can encode the whole buffer.
!     (let ((case-fold-search nil)
!         last-coding-system-used
!         pos posend desig encode-info encoding chset noctets textlen)
!       (goto-char (point-min))
!       ;; At first encode the whole buffer.
!       (encode-coding-region (point-min) (point-max) 'ctext-no-compositions)
!       ;; Then replace ISO-2022 charset designations with extended
!       ;; segments, for those charsets that are not part of the
!       ;; official X registry.  The regexp below finds the leading
!       ;; sequences for big5.
        (while (re-search-forward "\e\\(\$([01]\\)" nil 'move)
!       (setq pos (match-beginning 0)
!             posend (point)
!             desig (match-string 1)
!             encode-info (cdr (assoc desig
!                                     ctext-non-standard-designations-alist))
              encoding (car encode-info)
              chset (cadr encode-info)
              noctets (car (cddr encode-info)))
        (skip-chars-forward "^\e")
        (cond
         ((eq encoding t)  ; only the leading sequence needs to be changed
!         (setq textlen (+ (- (point) posend) (length chset) 1))
!         ;; Generate the control sequence for an extended segment.
          (replace-match (format "\e%%/%d%c%c%s"
                                 noctets
                                 (+ (/ textlen 128) 128)
***************
*** 1496,1515 ****
                                 chset)
                         t t))
         ((coding-system-p encoding) ; need to recode the entire segment...
!         (set-marker pt (match-beginning 0))
!         (decode-coding-region pt newpt 'ctext-no-compositions)
!         (set-buffer-multibyte t)
!         (encode-coding-region pt newpt encoding)
!         (set-buffer-multibyte nil)
!         (setq textlen (+ (- newpt pt) (length chset) 1))
!         (goto-char pt)
!         (insert (format "\e%%/%d%c%c%s"
!                         noctets
!                         (+ (/ textlen 128) 128)
!                         (+ (% textlen 128) 128)
!                         chset))))
!       (goto-char newpt))))
!   (set-buffer-multibyte t)
    ;; Must return nil, as build_annotations_2 expects that.
    nil)
  
--- 1503,1519 ----
                                 chset)
                         t t))
         ((coding-system-p encoding) ; need to recode the entire segment...
!         (decode-coding-region pos (point) 'ctext-no-compositions)
!         (encode-coding-region pos (point) encoding)
!         (setq textlen (+ (- (point) pos) (length chset) 1))
!         (save-excursion
!           (goto-char pos)
!           (insert (format "\e%%/%d%c%c%s"
!                           noctets
!                           (+ (/ textlen 128) 128)
!                           (+ (% textlen 128) 128)
!                           chset))))))
!       (goto-char (point-min))))
    ;; Must return nil, as build_annotations_2 expects that.
    nil)
  
***************
*** 1550,1563 ****
                                   sgml-html-meta-auto-coding-function)
    "A list of functions which attempt to determine a coding system.
  
! Each function in this list should be written to operate on the current
! buffer, but should not modify it in any way.  It should take one
! argument SIZE, past which it should not search.  If a function
! succeeds in determining a coding system, it should return that coding
! system.  Otherwise, it should return nil.
  
! Any `coding:' tags present have a higher priority than the
! functions in this list."
    :group 'files
    :group 'mule
    :type '(repeat function))
--- 1554,1571 ----
                                   sgml-html-meta-auto-coding-function)
    "A list of functions which attempt to determine a coding system.
  
! Each function in this list should be written to operate on the
! current buffer, but should not modify it in any way.  The buffer
! will contain undecoded text of parts of the file.  Each function
! should take one argument, SIZE, which says how many
! characters (starting from point) it should look at.
! 
! If one of these functions succeeds in determining a coding
! system, it should return that coding system.  Otherwise, it
! should return nil.
  
! If a file has a `coding:' tag, that takes precedence over these
! functions, so they won't be called at all."
    :group 'files
    :group 'mule
    :type '(repeat function))
***************
*** 1696,1703 ****
  
  (setq set-auto-coding-function 'set-auto-coding)
  
! (defun after-insert-file-set-buffer-file-coding-system (inserted)
!   "Set `buffer-file-coding-system' of current buffer after text is inserted."
    (if last-coding-system-used
        (let ((coding-system
             (find-new-buffer-file-coding-system last-coding-system-used))
--- 1704,1715 ----
  
  (setq set-auto-coding-function 'set-auto-coding)
  
! (defun after-insert-file-set-coding (inserted)
!   "Set `buffer-file-coding-system' of current buffer after text is inserted.
! INSERTED is the number of characters that were inserted, as figured
! in the situation before this function.  Return the number of characters
! inserted, as figured in the situation after.  The two numbers can be
! different if the buffer has become unibyte."
    (if last-coding-system-used
        (let ((coding-system
             (find-new-buffer-file-coding-system last-coding-system-used))
***************
*** 1713,1727 ****
                   (= (buffer-size) inserted))
              ;; For coding systems no-conversion and raw-text...,
              ;; edit the buffer as unibyte.
!             (let ((pos-byte (position-bytes (+ (point) inserted))))
                (set-buffer-multibyte nil)
!               (setq inserted (- pos-byte (position-bytes (point))))))
          (set-buffer-modified-p modified-p))))
    inserted)
  
- (add-hook 'after-insert-file-functions
-         'after-insert-file-set-buffer-file-coding-system)
- 
  ;; The coding-spec and eol-type of coding-system returned is decided
  ;; independently in the following order.
  ;;    1. That of buffer-file-coding-system locally bound.
--- 1725,1736 ----
                   (= (buffer-size) inserted))
              ;; For coding systems no-conversion and raw-text...,
              ;; edit the buffer as unibyte.
!             (let ((pos-marker (copy-marker (+ (point) inserted))))
                (set-buffer-multibyte nil)
!               (setq inserted (- pos-marker (point)))))
          (set-buffer-modified-p modified-p))))
    inserted)
  
  ;; The coding-spec and eol-type of coding-system returned is decided
  ;; independently in the following order.
  ;;    1. That of buffer-file-coding-system locally bound.
***************
*** 1851,1859 ****
            (setq coding (funcall set-auto-coding-function
                                  filename (- (point-max) (point-min)))))
        (or coding
!           (setq coding (find-operation-coding-system
!                         'insert-file-contents
!                         filename visit beg end replace)))
        (if (coding-system-p coding)
            (or enable-multibyte-characters
                (setq coding
--- 1860,1868 ----
            (setq coding (funcall set-auto-coding-function
                                  filename (- (point-max) (point-min)))))
        (or coding
!           (setq coding (car (find-operation-coding-system
!                              'insert-file-contents
!                              filename visit beg end replace))))
        (if (coding-system-p coding)
            (or enable-multibyte-characters
                (setq coding
***************
*** 2045,2051 ****
  (defun sgml-xml-auto-coding-function (size)
    "Determine whether the buffer is XML, and if so, its encoding.
  This function is intended to be added to `auto-coding-functions'."
!   (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" nil t)
      (let ((end (save-excursion
                 ;; This is a hack.
                 (re-search-forward "\"\\s-*\\?>" size t))))
--- 2054,2061 ----
  (defun sgml-xml-auto-coding-function (size)
    "Determine whether the buffer is XML, and if so, its encoding.
  This function is intended to be added to `auto-coding-functions'."
!   (setq size (+ (point) size))
!   (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t)
      (let ((end (save-excursion
                 ;; This is a hack.
                 (re-search-forward "\"\\s-*\\?>" size t))))
***************
*** 2062,2068 ****
  (defun sgml-html-meta-auto-coding-function (size)
    "If the buffer has an HTML meta tag, use it to determine encoding.
  This function is intended to be added to `auto-coding-functions'."
!   (setq size (min size
                  ;; Only search forward 10 lines
                  (save-excursion
                    (forward-line 10)
--- 2072,2078 ----
  (defun sgml-html-meta-auto-coding-function (size)
    "If the buffer has an HTML meta tag, use it to determine encoding.
  This function is intended to be added to `auto-coding-functions'."
!   (setq size (min (+ (point) size)
                  ;; Only search forward 10 lines
                  (save-excursion
                    (forward-line 10)
***************
*** 2079,2082 ****
--- 2089,2093 ----
  ;;;
  (provide 'mule)
  
+ ;;; arch-tag: 9aebaa6e-0e8a-40a9-b857-cb5d04a39e7c
  ;;; mule.el ends here




reply via email to

[Prev in Thread] Current Thread [Next in Thread]