bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

gnus/mm-util.el changes


From: Dave Love
Subject: gnus/mm-util.el changes
Date: 28 Oct 2001 18:23:35 +0000
User-agent: Gnus/5.09 (Gnus v5.9.0) Emacs/21.0.107

The most important part of this is the change to
`mm-mule-charset-to-mime-charset' to DTRT with arbitrary coding
systems, such as those with translation tables or CCL ones like
mac-roman.  Please at least install that unless handa can see
something wrong with it.  I once posted it to the Gnus list, but had
no comment.

Regarding `mm-charset-to-coding-system', I think the recent change to
add windows-1250 to `mm-charset-synonym-alist' was wrong.  I shouldn't
have added windows-1252 originally, and had removed it from my source
some time ago.  Contrary to the doc of that variable, these are valid
charsets, and putting them there screws you if you later load proper
coding systems for them.  The more-or-less correct way to treat them
otherwise is to make coding system aliases -- that will not affect
later proper definitions of the coding systems.  [I think handa said
that the MIME name should be the canonical one for coding systems.]  I
don't understand the other entries in that list, but have made them
conditional so that the list is null in Emacs 21.

The change to `mm-charset-to-coding-system' makes the above change
moot -- defined coding systems will get checked first -- but I think
mm-charset-synonym-alist should still be changed.

The ...-mule4 stuff isn't actually incorrect, but I never understood
why it was introduced, since it makes things less efficient in Mule 5;
I suggest undoing its effects as below, or changing its usage.

2001-10-27  Dave Love  <fx@gnu.org>

        * mm-util.el (mm-mime-mule-charset-alist): Make it correct by
        construction.
        (mm-charset-synonym-alist): Remove windows-125[02].  Make other
        entries conditional on not having a coding system defined for
        them.
        (mm-mule-charset-to-mime-charset): Use
        find-coding-systems-for-charsets if defined.
        (mm-charset-to-coding-system): Don't use
        mm-get-coding-system-list.  Look in mm-charset-synonym-alist
        later.  Add last resort search of coding systems.
        (mm-enable-multibyte-mule4, mm-disable-multibyte-mule4)
        (mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like
        Mule 4.
        (mm-find-mime-charset-region): Re-write.
        (mm-with-unibyte-current-buffer): Restore buffer as well as
        multibyteness.

Index: mm-util.el
===================================================================
RCS file: /cvs/emacs/lisp/gnus/mm-util.el,v
retrieving revision 1.18
diff -u -p -c -r1.18 mm-util.el
cvs server: conflicting specifications of output style
*** mm-util.el  2001/09/18 14:59:24     1.18
--- mm-util.el  2001/10/28 15:08:20
***************
*** 32,37 ****
--- 32,39 ----
    (or (and (fboundp 'coding-system-p) (coding-system-p sym))
        (memq sym (mm-get-coding-system-list))))
  
+ ;; Fixme: some of the cars here aren't valid MIME charsets.  That
+ ;; should only matter with XEmacs, though.
  (defvar mm-mime-mule-charset-alist
    `((us-ascii ascii)
      (iso-8859-1 latin-iso8859-1)
***************
*** 84,89 ****
--- 86,105 ----
                                      'safe-charsets))))))
    "Alist of MIME-charset/MULE-charsets.")
  
+ ;; Correct by construction, but should be unnecessary:
+ (when (fboundp 'coding-system-list)
+   (setq mm-mime-mule-charset-alist
+       (apply
+        'nconc
+        (mapcar
+         (lambda (cs)
+           (when (and (coding-system-get cs 'mime-charset)
+                      (not (eq t (coding-system-get cs 'safe-charsets))))
+             (list (cons (coding-system-get cs 'mime-charset)
+                         (delq 'ascii
+                               (coding-system-get cs 'safe-charsets))))))
+         (sort-coding-systems (coding-system-list 'base-only))))))
+ 
  (eval-and-compile
    (mapcar
     (lambda (elem)
***************
*** 137,154 ****
        (setq mm-coding-system-list (mm-coding-system-list))))
  
  (defvar mm-charset-synonym-alist
!   `((big5 . cn-big5)
!     (gb2312 . cn-gb-2312)
!     ;; Windows-1252 is actually a superset of Latin-1.  See also
!     ;; `gnus-article-dumbquotes-map'.
!     ,(unless (mm-coding-system-p 'windows-1252) ; should be defined eventually
!        '(windows-1252 . iso-8859-1))
!     ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
!     ;; Outlook users in Czech republic. Use this to allow reading of their
!     ;; e-mails. cp1250 should be defined by M-x codepage-setup.
!     ,(unless (mm-coding-system-p 'windows-1250)       ; should be defined 
eventually
!        '(windows-1250 . cp1250))
!     (x-ctext . ctext))
    "A mapping from invalid charset names to the real charset names.")
  
  (defvar mm-binary-coding-system
--- 153,168 ----
        (setq mm-coding-system-list (mm-coding-system-list))))
  
  (defvar mm-charset-synonym-alist
!   `(
!     ;; Perfectly fine?  A valid MIME name, anyhow.
!     ,@(unless (mm-coding-system-p 'big5)
!       '((big5 . cn-big5)))
!       ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
!     ,@(unless (mm-coding-system-p 'x-ctext)
!       '((x-ctext . ctext)))
!     ;; Apparently not defined in Emacs 20, but is a valid MIME name.
!     ,@(unless (mm-coding-system-p 'gb2312)
!       '((gb2312 . cn-gb-2312))))
    "A mapping from invalid charset names to the real charset names.")
  
  (defvar mm-binary-coding-system
***************
*** 185,198 ****
  
  (defun mm-mule-charset-to-mime-charset (charset)
    "Return the MIME charset corresponding to the given Mule CHARSET."
!   (let ((alist mm-mime-mule-charset-alist)
!       out)
!     (while alist
!       (when (memq charset (cdar alist))
!       (setq out (caar alist)
!             alist nil))
!       (pop alist))
!     out))
  
  (defun mm-charset-to-coding-system (charset &optional lbt)
    "Return coding-system corresponding to CHARSET.
--- 199,219 ----
  
  (defun mm-mule-charset-to-mime-charset (charset)
    "Return the MIME charset corresponding to the given Mule CHARSET."
!   (if (fboundp 'find-coding-systems-for-charsets)
!       (let (mime)
!       (dolist (cs (find-coding-systems-for-charsets (list charset)))
!         (unless mime
!           (when cs
!             (setq mime (coding-system-get cs 'mime-charset)))))
!       mime)
!     (let ((alist mm-mime-mule-charset-alist)
!         out)
!       (while alist
!       (when (memq charset (cdar alist))
!         (setq out (caar alist)
!               alist nil))
!       (pop alist))
!       out)))
  
  (defun mm-charset-to-coding-system (charset &optional lbt)
    "Return coding-system corresponding to CHARSET.
*************** If optional argument LBT (`unix', `dos' 
*** 201,209 ****
  used as the line break code type of the coding system."
    (when (stringp charset)
      (setq charset (intern (downcase charset))))
-   (setq charset
-       (or (cdr (assq charset mm-charset-synonym-alist))
-           charset))
    (when lbt
      (setq charset (intern (format "%s-%s" charset lbt))))
    (cond
--- 222,227 ----
*************** used as the line break code type of the 
*** 215,226 ****
      'ascii)
     ;; Check to see whether we can handle this charset.  (This depends
     ;; on there being some coding system matching each `mime-charset'
!    ;; coding sysytem property defined, as there should be.)
!    ((memq charset (mm-get-coding-system-list))
      charset)
!    ;; Nope.
!    (t
!     nil)))
  
  (if (fboundp 'subst-char-in-string)
      (defsubst mm-replace-chars-in-string (string from to)
--- 233,259 ----
      'ascii)
     ;; Check to see whether we can handle this charset.  (This depends
     ;; on there being some coding system matching each `mime-charset'
!    ;; property defined, as there should be.)
!    ((and (coding-system-p charset)
! ;;; Doing this would potentially weed out incorrect charsets.
! ;;;    charset
! ;;;    (eq charset (coding-system-get charset 'mime-charset))
!        )
!     charset)
!    ;; Translate invalid charsets.
!    ((coding-system-p (setq charset
!                          (cdr (assq charset
!                                     mm-charset-synonym-alist))))
      charset)
!    ;; Last resort: search the coding system list for entries which
!    ;; have the right mime-charset in case the canonical name isn't
!    ;; defined (though it should be).
!    ((let (cs)
!       (dolist (c coding-system-list)
!       (if (and (null cs)
!                (eq charset (coding-system-get c 'mime-charset)))
!           (setq cs c)))
!       cs))))
  
  (if (fboundp 'subst-char-in-string)
      (defsubst mm-replace-chars-in-string (string from to)
*************** This is a no-op in XEmacs."
*** 252,271 ****
    (when (fboundp 'set-buffer-multibyte)
      (set-buffer-multibyte nil)))
  
  (defsubst mm-enable-multibyte-mule4 ()
    "Enable multibyte in the current buffer.
  Only used in Emacs Mule 4."
    (when (and (fboundp 'set-buffer-multibyte)
               (boundp 'enable-multibyte-characters)
             (default-value 'enable-multibyte-characters)
!            (not (charsetp 'eight-bit-control)))
      (set-buffer-multibyte t)))
  
  (defsubst mm-disable-multibyte-mule4 ()
    "Disable multibyte in the current buffer.
  Only used in Emacs Mule 4."
    (when (and (fboundp 'set-buffer-multibyte)
!            (not (charsetp 'eight-bit-control)))
      (set-buffer-multibyte nil)))
  
  (defun mm-preferred-coding-system (charset)
--- 285,310 ----
    (when (fboundp 'set-buffer-multibyte)
      (set-buffer-multibyte nil)))
  
+ ;; The clauses in the -mule4 functions are commented-out, since they
+ ;; should only make things less speed and space efficient in Emacs 21
+ ;; -- the multibyte eight-bit characters have a leading byte.  -- fx
+ 
  (defsubst mm-enable-multibyte-mule4 ()
    "Enable multibyte in the current buffer.
  Only used in Emacs Mule 4."
    (when (and (fboundp 'set-buffer-multibyte)
               (boundp 'enable-multibyte-characters)
             (default-value 'enable-multibyte-characters)
!            ;; (not (charsetp 'eight-bit-control))
!            )
      (set-buffer-multibyte t)))
  
  (defsubst mm-disable-multibyte-mule4 ()
    "Disable multibyte in the current buffer.
  Only used in Emacs Mule 4."
    (when (and (fboundp 'set-buffer-multibyte)
!            ;; (not (charsetp 'eight-bit-control))
!            )
      (set-buffer-multibyte nil)))
  
  (defun mm-preferred-coding-system (charset)
*************** If the charset is `composition', return 
*** 329,350 ****
        (setq list (delete head list))
        (setq result (cons head result)))
      (nreverse result)))
- 
- (defun mm-find-mime-charset-region (b e)
-   "Return the MIME charsets needed to encode the region between B and E."
-   (let ((charsets (mapcar 'mm-mime-charset
-                         (delq 'ascii
-                               (mm-find-charset-region b e)))))
-     (when (memq 'iso-2022-jp-2 charsets)
-       (setq charsets (delq 'iso-2022-jp charsets)))
-     (setq charsets (mm-delete-duplicates charsets))
-     (if (and (> (length charsets) 1)
-            (fboundp 'find-coding-systems-region)
-            (let ((cs (find-coding-systems-region b e)))
-              (or (memq 'utf-8 cs) (memq 'mule-utf-8 cs))))
-       '(utf-8)
-       charsets)))
  
  (defsubst mm-multibyte-p ()
    "Say whether multibyte is enabled."
    (if (and (not (featurep 'xemacs))
--- 368,376 ----
        (setq list (delete head list))
        (setq result (cons head result)))
      (nreverse result)))
  
+ ;; It's not clear whether this is supposed to mean the global or local
+ ;; setting.  I think it's used inconsistently.  -- fx
  (defsubst mm-multibyte-p ()
    "Say whether multibyte is enabled."
    (if (and (not (featurep 'xemacs))
*************** If the charset is `composition', return 
*** 352,357 ****
--- 378,414 ----
        enable-multibyte-characters
      (featurep 'mule)))
  
+ (defun mm-find-mime-charset-region (b e)
+   "Return the MIME charsets needed to encode the region between B and E.
+ Nil means ASCII, a single-element list represents an appropriate MIME
+ charset, and a longer list means no appropriate charset."
+   ;; The return possibilities of this function are a mess...
+   (or (and
+        (mm-multibyte-p)
+        ;; How are you supposed to do this in XEmacs?
+        (fboundp 'find-coding-systems-region)
+        ;; Find the mime-charset of the most preferred coding
+        ;; system that has one.
+        (let ((systems (find-coding-systems-region b e))
+            result)
+        ;; Fixme: The `mime-charset' (`x-ctext') of `compound-text'
+        ;; is not in the IANA list.
+        (setq systems (delq 'compound-text systems))
+        (unless (equal systems '(undecided))
+          (while systems
+            (let ((cs (coding-system-get (pop systems) 'mime-charset)))
+              (if cs
+                  (setq systems nil
+                        result (list cs))))))
+        result))
+       ;; Otherwise we're not multibyte or a single coding system won't
+       ;; cover it.
+       (mm-delete-duplicates
+        (mapcar 'mm-mime-charset
+              (delq 'iso-2022-jp       ; ??
+                    (delq 'ascii
+                          (mm-find-charset-region b e)))))))
+ 
  (defmacro mm-with-unibyte-buffer (&rest forms)
    "Create a temporary buffer, and evaluate FORMS there like `progn'.
  Use unibyte mode for this."
*************** Use unibyte mode for this."
*** 361,376 ****
  (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
  
  (defmacro mm-with-unibyte-current-buffer (&rest forms)
!   "Evaluate FORMS with current current buffer temporarily made unibyte.
  Also bind `default-enable-multibyte-characters' to nil.
  Equivalent to `progn' in XEmacs"
!   (let ((multibyte (make-symbol "multibyte")))
      `(if (fboundp 'set-buffer-multibyte)
!        (let ((,multibyte enable-multibyte-characters))
           (unwind-protect
               (let (default-enable-multibyte-characters)
                 (set-buffer-multibyte nil)
                 ,@forms)
             (set-buffer-multibyte ,multibyte)))
         (progn
         ,@forms))))
--- 418,436 ----
  (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
  
  (defmacro mm-with-unibyte-current-buffer (&rest forms)
!   "Evaluate FORMS with current buffer temporarily made unibyte.
  Also bind `default-enable-multibyte-characters' to nil.
  Equivalent to `progn' in XEmacs"
!   (let ((multibyte (make-symbol "multibyte"))
!       (buffer (make-symbol "buffer")))
      `(if (fboundp 'set-buffer-multibyte)
!        (let ((,multibyte enable-multibyte-characters)
!              (,buffer (current-buffer)))
           (unwind-protect
               (let (default-enable-multibyte-characters)
                 (set-buffer-multibyte nil)
                 ,@forms)
+            (set-buffer ,buffer)
             (set-buffer-multibyte ,multibyte)))
         (progn
         ,@forms))))
*************** Equivalent to `progn' in XEmacs"
*** 381,389 ****
    "Evaluate FORMS there like `progn' in current buffer.
  Mule4 only."
    (let ((multibyte (make-symbol "multibyte")))
!     `(if (or (featurep 'xemacs)
!            (not (fboundp 'set-buffer-multibyte))
!            (charsetp 'eight-bit-control)) ;; For Emacs Mule 4 only.
         (progn
           ,@forms)
         (let ((,multibyte (default-value 'enable-multibyte-characters)))
--- 441,449 ----
    "Evaluate FORMS there like `progn' in current buffer.
  Mule4 only."
    (let ((multibyte (make-symbol "multibyte")))
!     `(if (or (not (fboundp 'set-buffer-multibyte))
!            ;; (charsetp 'eight-bit-control)
!            ) ;; For Emacs Mule 4 only.
         (progn
           ,@forms)
         (let ((,multibyte (default-value 'enable-multibyte-characters)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]