emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/gnus/deuglify.el [emacs-unicode-2]


From: Miles Bader
Subject: [Emacs-diffs] Changes to emacs/lisp/gnus/deuglify.el [emacs-unicode-2]
Date: Thu, 09 Sep 2004 09:44:17 -0400

Index: emacs/lisp/gnus/deuglify.el
diff -c /dev/null emacs/lisp/gnus/deuglify.el:1.2.2.1
*** /dev/null   Thu Sep  9 09:37:12 2004
--- emacs/lisp/gnus/deuglify.el Thu Sep  9 09:36:24 2004
***************
*** 0 ****
--- 1,472 ----
+ ;;; deuglify.el --- deuglify broken Outlook (Express) articles
+ 
+ ;; Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ ;; Copyright (C) 2001, 2002 Raymond Scholz
+ 
+ ;; Author: Raymond Scholz <address@hidden>
+ ;;         Thomas Steffen (unwrapping algorithm,
+ ;;                         based on an idea of Stefan Monnier)
+ ;; Keywords: mail, news
+ 
+ ;; This file is part of GNU Emacs.
+ 
+ ;; GNU Emacs is free software; you can redistribute it and/or modify
+ ;; it under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ 
+ ;; GNU Emacs is distributed in the hope that it will be useful,
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.        See the
+ ;; GNU General Public License for more details.
+ 
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GNU Emacs; see the file COPYING.  If not, write to the
+ ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ ;; Boston, MA 02111-1307, USA.
+ 
+ ;;; Commentary:
+ 
+ ;; This file enables Gnus to repair broken citations produced by
+ ;; common user agents like MS Outlook (Express).  It may repair
+ ;; articles of other user agents too.
+ ;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+ ;;
+ ;; Outlook sometimes wraps cited lines before sending a message as
+ ;; seen in this example:
+ ;;
+ ;; Example #1
+ ;; ----------
+ ;;
+ ;; John Doe wrote:
+ ;;
+ ;; > This sentence no verb.  This sentence no verb.  This sentence
+ ;; no
+ ;; > verb.  This sentence no verb.  This sentence no verb.  This
+ ;; > sentence no verb.
+ ;;
+ ;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those
+ ;; erroneously wrapped lines and will unwrap them.  I.e. putting the
+ ;; wrapped parts ("no" in this example) back where they belong (at the
+ ;; end of the cited line above).
+ ;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;
+ ;; Note that some people not only use broken user agents but also
+ ;; practice a bad citation style by omitting blank lines between the
+ ;; cited text and their own text.
+ ;:
+ ;; Example #2
+ ;; ----------
+ ;;
+ ;; John Doe wrote:
+ ;;
+ ;; > This sentence no verb.  This sentence no verb.  This sentence no
+ ;; You forgot in all your sentences.
+ ;; > verb.  This sentence no verb.  This sentence no verb.  This
+ ;; > sentence no verb.
+ ;;
+ ;; Unwrapping "You forgot in all your sentences." would be illegal as
+ ;; this part wasn't intended to be cited text.
+ ;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting
+ ;; citation line will be of a certain maximum length.  You can control
+ ;; this by adjusting `gnus-outlook-deuglify-unwrap-max'.  Also
+ ;; unwrapping will only be done if the line above the (possibly)
+ ;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'.
+ ;;
+ ;; Furthermore no unwrapping will be undertaken if the last character
+ ;; is one of the chars specified in
+ ;; `gnus-outlook-deuglify-unwrap-stop-chars'.  Setting this to ".?!"
+ ;; inhibits unwrapping if the cited line ends with a full stop,
+ ;; question mark or exclamation mark.  Note that this variable
+ ;; defaults to `nil', triggering a few false positives but generally
+ ;; giving you better results.
+ ;;
+ ;; Unwrapping works on every level of citation.  Thus you will be able
+ ;; repair broken citations of broken user agents citing broken
+ ;; citations of broken user agents citing broken citations...
+ ;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;
+ ;; Citations are commonly introduced with an attribution line
+ ;; indicating who wrote the cited text.  Outlook adds superfluous
+ ;; information that can be found in the header of the message to this
+ ;; line and often wraps it.
+ ;;
+ ;; If that weren't enough, lots of people write their own text above
+ ;; the cited text and cite the complete original article below.
+ ;;
+ ;; Example #3
+ ;; ----------
+ ;;
+ ;; Hey, John.  There's no in all your sentences!
+ ;;
+ ;; John Doe <address@hidden> wrote in message
+ ;; news:address@hidden
+ ;; > This sentence no verb.  This sentence no verb.  This sentence
+ ;; no
+ ;; > verb.  This sentence no verb.  This sentence no verb.  This
+ ;; > sentence no verb.
+ ;; >
+ ;; > Bye, John
+ ;;
+ ;; Repairing the attribution line will be done by function
+ ;; `gnus-article-outlook-repair-attribution which calls other function that
+ ;; try to recognize and repair broken attribution lines.  See variable
+ ;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be
+ ;; cut off from the beginning of an attribution line and variable
+ ;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are
+ ;; required to be found in an attribution line.  These function return
+ ;; the point where the repaired attribution line starts.
+ ;;
+ ;; Rearranging the article so that the cited text appears above the
+ ;; new text will be done by function
+ ;; `gnus-article-outlook-rearrange-citation'.  This function calls
+ ;; `gnus-article-outlook-repair-attribution to find and repair an attribution
+ ;; line.
+ ;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;
+ ;; Well, and that's what the message will look like after applying
+ ;; deuglification:
+ ;;
+ ;; Example #3 (deuglified)
+ ;; -----------------------
+ ;;
+ ;; John Doe <address@hidden> wrote:
+ ;;
+ ;; > This sentence no verb.  This sentence no verb.  This sentence no
+ ;; > verb.  This sentence no verb.  This sentence no verb.  This
+ ;; > sentence no verb.
+ ;; >
+ ;; > Bye, John
+ ;;
+ ;; Hey, John.  There's no in all your sentences!
+ ;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 
+ ;; Usage
+ ;; -----
+ ;;
+ ;; Press `W k' in the Summary Buffer.
+ ;;
+ ;; Non recommended usage :-)
+ ;; ---------------------
+ ;;
+ ;; To automatically invoke deuglification on every article you read,
+ ;; put something like that in your .gnus:
+ ;;
+ ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines)
+ ;;
+ ;; or _one_ of the following lines:
+ ;;
+ ;; ;; repair broken attribution lines
+ ;; (add-hook 'gnus-article-decode-hook 
'gnus-article-outlook-repair-attribution)
+ ;;
+ ;; ;; repair broken attribution lines and citations
+ ;; (add-hook 'gnus-article-decode-hook 
'gnus-article-outlook-rearrange-citation)
+ ;;
+ ;; Note that there always may be some false positives, so I suggest
+ ;; using the manual invocation.  After deuglification you may want to
+ ;; refill the whole article using `W w'.
+ ;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;
+ ;; Limitations
+ ;; -----------
+ ;;
+ ;; As I said before there may (or will) be a few false positives on
+ ;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'.
+ ;;
+ ;; `gnus-article-outlook-repair-attribution will only fix the first
+ ;; attribution line found in the article.  Furthermore it fixed to
+ ;; certain kinds of attributions.  And there may be horribly many
+ ;; false positives, vanishing lines and so on -- so don't trust your
+ ;; eyes.  Again I recommend manual invocation.
+ ;;
+ ;; `gnus-article-outlook-rearrange-citation' carries all the limitations of
+ ;; `gnus-article-outlook-repair-attribution.
+ ;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;
+ ;; See ChangeLog for other changes.
+ ;;
+ ;; Revision 1.5  2002/01/27 14:39:17  rscholz
+ ;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit
+ ;;   unwrapping if one these chars is first in the possibly wrapped line.
+ ;; * Improved rearranging of the article.
+ ;; * New function `gnus-outlook-repair-attribution-block' for repairing
+ ;;   those big "Original Message (following some headers)" attributions.
+ ;;
+ ;; Revision 1.4  2002/01/03 14:05:00  rscholz
+ ;; Renamed `gnus-outlook-deuglify-article' to
+ ;; `gnus-article-outlook-deuglify-article'.
+ ;; Made it easier to deuglify the article while being in Gnus' Article
+ ;; Edit Mode.  (suggested by Phil Nitschke)
+ ;;
+ ;;
+ ;; Revision 1.3  2002/01/02 23:35:54  rscholz
+ ;; Fix a bug that caused succeeding long attribution lines to be
+ ;; unwrapped.  Minor doc fixes and regular expression tuning.
+ ;;
+ ;; Revision 1.2  2001/12/30 20:14:34  rscholz
+ ;; Clean up source.
+ ;;
+ ;; Revision 1.1  2001/12/30 20:13:32  rscholz
+ ;; Initial revision
+ ;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+ ;;; Code:
+ 
+ (require 'gnus-art)
+ (require 'gnus-sum)
+ 
+ (defconst gnus-outlook-deuglify-version "1.5 Gnus version"
+   "Version of gnus-outlook-deuglify.")
+ 
+ ;;; User Customizable Variables:
+ 
+ (defgroup gnus-outlook-deuglify nil
+   "Deuglify articles generated by broken user agents like MS Outlook 
(Express).")
+ 
+ ;;;###autoload
+ (defcustom gnus-outlook-deuglify-unwrap-min 45
+   "Minimum length of the cited line above the (possibly) wrapped line."
+   :type 'integer
+   :group 'gnus-outlook-deuglify)
+ 
+ ;;;###autoload
+ (defcustom gnus-outlook-deuglify-unwrap-max 95
+   "Maximum length of the cited line after unwrapping."
+   :type 'integer
+   :group 'gnus-outlook-deuglify)
+ 
+ (defcustom gnus-outlook-deuglify-cite-marks ">|#%"
+   "Characters that indicate cited lines."
+   :type 'string
+   :group 'gnus-outlook-deuglify)
+ 
+ (defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil
+   "Characters that inhibit unwrapping if they are the last one on the cited 
line above the possible wrapped line."
+   :type '(radio (const :format "None  " nil)
+               (string :size 0 :value ".?!"))
+   :group 'gnus-outlook-deuglify)
+ 
+ (defcustom gnus-outlook-deuglify-no-wrap-chars "`"
+   "Characters that inhibit unwrapping if they are the first one in the 
possibly wrapped line."
+   :type 'string
+   :group 'gnus-outlook-deuglify)
+ 
+ (defcustom  gnus-outlook-deuglify-attrib-cut-regexp
+   "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, "
+   "Regular expression matching the beginning of an attribution line that 
should be cut off."
+   :type 'string
+   :group 'gnus-outlook-deuglify)
+ 
+ (defcustom gnus-outlook-deuglify-attrib-verb-regexp
+   "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a 
écrit\\|schreef\\|escribió"
+   "Regular expression matching the verb used in an attribution line."
+   :type 'string
+   :group 'gnus-outlook-deuglify)
+ 
+ (defcustom  gnus-outlook-deuglify-attrib-end-regexp
+   ": *\\|\\.\\.\\."
+   "Regular expression matching the end of an attribution line."
+   :type 'string
+   :group 'gnus-outlook-deuglify)
+ 
+ ;;;###autoload
+ (defcustom gnus-outlook-display-hook nil
+   "A hook called after an deuglified article has been prepared.
+ It is run after `gnus-article-prepare-hook'."
+   :type 'hook
+   :group 'gnus-outlook-deuglify)
+ 
+ ;; Functions
+ 
+ (defun gnus-outlook-display-article-buffer ()
+   "Redisplay current buffer or article buffer."
+   (with-current-buffer (or gnus-article-buffer (current-buffer))
+     ;; "Emulate" `gnus-article-prepare-display' without calling
+     ;; it. Calling `gnus-article-prepare-display' on an already
+     ;; prepared article removes all MIME parts.  I'm unsure whether
+     ;; this is a bug or not.
+     (gnus-article-highlight t)
+     (gnus-treat-article nil)
+     (gnus-run-hooks 'gnus-article-prepare-hook
+                   'gnus-outlook-display-hook)))
+ 
+ ;;;###autoload
+ (defun gnus-article-outlook-unwrap-lines (&optional nodisplay)
+   "Unwrap lines that appear to be wrapped citation lines.
+ You can control what lines will be unwrapped by frobbing
+ `gnus-outlook-deuglify-unwrap-min' and `gnus-outlook-deuglify-unwrap-max',
+ indicating the minimum and maximum length of an unwrapped citation line.  If
+ NODISPLAY is non-nil, don't redisplay the article buffer."
+   (interactive "P")
+   (save-excursion
+     (let ((case-fold-search nil)
+         (inhibit-read-only t)
+         (cite-marks gnus-outlook-deuglify-cite-marks)
+         (no-wrap gnus-outlook-deuglify-no-wrap-chars)
+         (stop-chars gnus-outlook-deuglify-unwrap-stop-chars))
+       (gnus-with-article-buffer
+       (article-goto-body)
+       (while (re-search-forward
+               (concat
+                "^\\([ \t" cite-marks "]*\\)"
+                "\\([" cite-marks "].*[^\n " stop-chars "]\\)[ \t]?\n"
+                "\\1\\([^\n " cite-marks no-wrap "]+.*\\)$")
+               nil t)
+         (let ((len12 (- (match-end 2) (match-beginning 1)))
+             (len3 (- (match-end 3) (match-beginning 3))))
+           (if (and (> len12 gnus-outlook-deuglify-unwrap-min)
+                    (< (+ len12 len3) gnus-outlook-deuglify-unwrap-max))
+               (progn
+                 (replace-match "\\1\\2 \\3")
+                 (goto-char (match-beginning 0)))))))))
+   (unless nodisplay (gnus-outlook-display-article-buffer)))
+ 
+ (defun gnus-outlook-rearrange-article (attr-start)
+   "Put the text from ATTR-START to the end of buffer at the top of the 
article buffer."
+   (save-excursion
+     (let ((inhibit-read-only t)
+         (cite-marks gnus-outlook-deuglify-cite-marks))
+       (gnus-with-article-buffer
+       (article-goto-body)
+       ;; article does not start with attribution
+       (unless (= (point) attr-start)
+         (gnus-kill-all-overlays)
+         (let ((cur (point))
+               ;; before signature or end of buffer
+               (to (if (gnus-article-search-signature)
+                       (point)
+                     (point-max))))
+           ;; handle the case where the full quote is below the
+           ;; signature
+           (if (< to attr-start)
+               (setq to (point-max)))
+           (transpose-regions cur attr-start attr-start to)))))))
+ 
+ ;; John Doe <address@hidden> wrote in message
+ ;; news:address@hidden
+ 
+ (defun gnus-outlook-repair-attribution-outlook ()
+   "Repair a broken attribution line (Outlook)."
+   (save-excursion
+     (let ((case-fold-search nil)
+         (inhibit-read-only t)
+         (cite-marks gnus-outlook-deuglify-cite-marks))
+       (gnus-with-article-buffer
+       (article-goto-body)
+       (if (re-search-forward
+            (concat "^\\([^" cite-marks "].+\\)"
+                    "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\)"
+                    "\\(.*\n?[^\n" cite-marks "].*\\)?"
+                    "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
+            nil t)
+           (progn
+             (gnus-kill-all-overlays)
+             (replace-match "\\1\\2\\4")
+             (match-beginning 0)))))))
+ 
+ 
+ ;; ----- Original Message -----
+ ;; From: "John Doe" <address@hidden>
+ ;; To: "Doe Foundation" <address@hidden>
+ ;; Sent: Monday, November 19, 2001 12:13 PM
+ ;; Subject: More Doenuts
+ 
+ (defun gnus-outlook-repair-attribution-block ()
+   "Repair a big broken attribution block."
+   (save-excursion
+     (let ((case-fold-search nil)
+         (inhibit-read-only t)
+         (cite-marks gnus-outlook-deuglify-cite-marks))
+       (gnus-with-article-buffer
+       (article-goto-body)
+       (if (re-search-forward
+             (concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n"
+                    "[^\n:]+:[ \t]*\\([^\n]+\\)\n"
+                    "\\([^\n:]+:[ \t]*[^\n]+\n\\)+")
+            nil t)
+           (progn
+             (gnus-kill-all-overlays)
+             (replace-match "\\1 wrote:\n")
+             (match-beginning 0)))))))
+ 
+ ;; On Wed, 16 Jan 2002 23:23:30 +0100, John Doe <address@hidden> wrote:
+ 
+ (defun gnus-outlook-repair-attribution-other ()
+   "Repair a broken attribution line (other user agents than Outlook)."
+   (save-excursion
+     (let ((case-fold-search nil)
+         (inhibit-read-only t)
+         (cite-marks gnus-outlook-deuglify-cite-marks))
+       (gnus-with-article-buffer
+       (article-goto-body)
+       (if (re-search-forward
+            (concat "^\\("gnus-outlook-deuglify-attrib-cut-regexp"\\)?"
+                    "\\([^" cite-marks "].+\\)\n\\([^\n" cite-marks "].*\\)?"
+                    "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\).*"
+                    "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
+            nil t)
+           (progn
+             (gnus-kill-all-overlays)
+             (replace-match "\\4 \\5\\6\\7")
+             (match-beginning 0)))))))
+ 
+ ;;;###autoload
+ (defun gnus-article-outlook-repair-attribution (&optional nodisplay)
+   "Repair a broken attribution line.
+ If NODISPLAY is non-nil, don't redisplay the article buffer."
+   (interactive "P")
+   (let ((attrib-start
+        (or
+         (gnus-outlook-repair-attribution-other)
+         (gnus-outlook-repair-attribution-block)
+         (gnus-outlook-repair-attribution-outlook))))
+     (unless nodisplay (gnus-outlook-display-article-buffer))
+     attrib-start))
+ 
+ (defun gnus-article-outlook-rearrange-citation (&optional nodisplay)
+   "Repair broken citations.
+ If NODISPLAY is non-nil, don't redisplay the article buffer."
+   (interactive "P")
+   (let ((attrib-start (gnus-article-outlook-repair-attribution 'nodisplay)))
+     ;; rearrange citations if an attribution line has been recognized
+     (if attrib-start
+       (gnus-outlook-rearrange-article attrib-start)))
+   (unless nodisplay (gnus-outlook-display-article-buffer)))
+ 
+ ;;;###autoload
+ (defun gnus-outlook-deuglify-article (&optional nodisplay)
+   "Full deuglify of broken Outlook (Express) articles.
+ Treat dumbquotes, unwrap lines, repair attribution and rearrange citation.  If
+ NODISPLAY is non-nil, don't redisplay the article buffer."
+   (interactive "P")
+   ;; apply treatment of dumb quotes
+   (gnus-article-treat-dumbquotes)
+   ;; repair wrapped cited lines
+   (gnus-article-outlook-unwrap-lines 'nodisplay)
+   ;; repair attribution line and rearrange citation.
+   (gnus-article-outlook-rearrange-citation 'nodisplay)
+   (unless nodisplay (gnus-outlook-display-article-buffer)))
+ 
+ ;;;###autoload
+ (defun gnus-article-outlook-deuglify-article ()
+   "Deuglify broken Outlook (Express) articles and redisplay."
+   (interactive)
+   (gnus-outlook-deuglify-article nil))
+ 
+ (provide 'deuglify)
+ 
+ ;; Local Variables:
+ ;; coding: iso-8859-1
+ ;; End:
+ 
+ ;;; arch-tag: 5f895cc9-51a9-487c-b42e-28844d79eb73
+ ;;; deuglify.el ends here




reply via email to

[Prev in Thread] Current Thread [Next in Thread]