[Top][All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

txutils.el - file conversion utility

From: Tim X
Subject: txutils.el - file conversion utility
Date: Sun, 24 Sep 2006 17:59:28 +1000
User-agent: Gnus/5.110006 (No Gnus v0.6) Emacs/22.0.50 (gnu/linux)

;;      Filename: /home/tcross/projects/emacs-convert/txutils.el
;; Creation Date: Wednesday, 20 September 2006 10:13 PM EST
;; Last Modified: Sunday, 24 September 2006 05:35 PM EST
;;        Author: Tim Cross <address@hidden>
;;   Description: Convert files from doc, ps, pdf, ppt to a format
;;                which can be viewed within emacs (i.e. text or html)

;;; Copyright (C) 2006. Tim Cross <address@hidden>
;;; All Rights Reserved.
;;; This file is not part of GNU Emacs, but the same permissions apply.
;;; GNU Emacs is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 2, or (at your option)
;;; any later version.
;;; GNU Emacs is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; GNU General Public License for more details.
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Emacs; see the file COPYING.  If not, write to
;;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
;;; Commentary
;;; ==========
;;; The very simple idea behind this basic utility is to make accessing
;;; files in .doc, .pdf, .ps and .ppt more easily accessible without
;;; having to leave emacs or manually convert the file format prior
;;; to being able to view the contents in emacs.
;;; There are packages which will enable calls to external viewers
;;; for files of specific formats, such as xpdf for pdf etc. However,
;;; I wanted to have everything within emacs as this makes integration,
;;; cutting/pasting etc a lot easier, plus as a blind user, most
;;; external utilities are of little use because they don't also include
;;; speech support.
;;; The objective here is to have things setup so that when browsing
;;; a directory with dired, you can just hit 'v' for any file you want to
;;; view and you will be presented with a text or html version without
;;; needing to do any manual conversion - or even careing about what
;;; would need to be done.
;;; You need the following packages (or at least utilities which will
;;; do the same thing). Most of these are fairly standard with many Linux 
;;; distros these days. 
;;; The wv utilities which contain wvText for converting MS Word docs
;;; The xpdf utilities which include pdftotext for converting PDF to text
;;; The Ghostscript package which contains pstotext for converting PS to text
;;; The ppthtml utility for converting MS Power Point files to html
;;; A configured and working browse-url setup. I use w3m as my browser
;;; Customizing 
;;; ===========
;;; The variables used to hold the paths to programs used to convert
;;; various file formats have been defined using the custom package.
;;; These conversion programs are called with two arguments, the input 
;;; file and the output file for the converted text. Use the command
;;; M-x customize-group <ret> txutils <ret> to set these values if the
;;; default values are not sufficient.
;;; Installation 
;;; ============
;;; Pretty straight forward. Place this file somewhere in your load path 
;;; and put a (require 'txutils) in your .emacs. You may want to byte
;;; compile this file.
;;; Reporting Bugs
;;; ==============
;;; This is the first bit of elisp I've allowed out into the world and 
;;; while I am really learning to love both elisp and cl lisp, I'm still 
;;; very much a novice. Therefore, there IS bugs and probably some pretty 
;;; poor style within this stuff. Feedback, bug reports and suggestions 
;;; always welcome. Send e-mail to address@hidden
;;; Emacspeak Users Note - I've not attempted to enhance this code to provide 
;;; better spoken or audio icon feedback. When the code matures a bit and
;;; once I get some feedback, I will see if a re-worked version can be 
;;; included in emacspeak. In the meantime, feel free to use 'advice' to 
;;; improve things. 

(require 'custom)
(require 'browse-url)

(defgroup txutils nil
  "Customize group for txutils."
  :prefix "txutils-"
  :group 'External)

(defcustom txutils-msword2text-prog "/usr/bin/wvText"
  "Program to convert MS Word .doc files to text."
  :type 'string
  :group 'txutils)

(defcustom txutils-pdf2text-prog "/usr/bin/pdftotext"
  "Program to convert PDF file to text."
  :type 'string
  :group 'txutils)

(defcustom txutils-ps2text-prog "/usr/bin/pstotext"
  "Program to convert PostScript files to text."
  :type 'string
  :group 'txutils)

(defcustom txutils-ppt2html-prog "/usr/bin/ppthtml"
  "Program to convert MS PowerPoint slides to HTML."
  :type 'string
  :group 'txutils)

(defun txutils-run-command (cmd arg1 arg2 &optional output-buffer)
  "Execute shell command with arguments, putting output in buffer."
  (if output-buffer
      (if (= 0 (shell-command (format "%s %s %s" cmd arg1 arg2)
                              output-buffer "*txutils-output*"))
    (if (= 0 (shell-command (format "%s %s %s" cmd arg1 arg2)

(defun txutils-quote-expand-file-name (file-name)
  "Expand file name and quote special chars if required."
  (shell-quote-argument (expand-file-name file-name)))

(defun txutils-file-type (file-name)
  "Return symbol representing file type."
   ((string-match "\\.\\(?:DOC\\|doc\\)$" file-name)
   ((string-match "\\.\\(?:PDF\\|pdf\\)$" file-name)
   ((string-match "\\.\\(?:PS\\|ps\\)$" file-name)
   ((string-match "\\.\\(?:PPT\\|ppt\\)$" file-name)
   ((string-match "\\.\\(?:HTML?\\|html?\\)$" file-name)
   (t 'plain)))

(defun txutils-make-temp-name (orig-name type)
  "Create a temp file name from original file name."
  (let ((name-prefix (file-name-nondirectory orig-name)))
     ((eq 'ppt type)
      (make-temp-file name-prefix nil ".html"))
     (t (make-temp-file name-prefix nil ".txt")))))

(defun txutils-do-file-conversion (file-name)
  "Based on file extension, convert file to text. Return name of text file."
  (interactive "fFile to convert: ")
  (let* ((file-type (txutils-file-type file-name))
         (output-file (txutils-make-temp-name file-name file-type)))
    (message "Performing file conversion for %s." file-name)
     ((eq 'doc file-type)
      (if (txutils-run-command txutils-msword2text-prog
                               (txutils-quote-expand-file-name file-name)
                               (txutils-quote-expand-file-name output-file))
     ((eq 'pdf file-type)
      (if (txutils-run-command txutils-pdf2text-prog
                               (txutils-quote-expand-file-name file-name)
                               (txutils-quote-expand-file-name output-file))
     ((eq 'ps file-type)
      (if (txutils-run-command txutils-ps2text-prog
                               (concat "-output " 
                               (txutils-quote-expand-file-name file-name))
     ((eq 'ppt file-type)
      (if (txutils-run-command txutils-ppt2html-prog
                               (txutils-quote-expand-file-name file-name)
                               (concat "> " 
     ((eq 'html file-type)
     (t file-name))))

(defadvice view-file (around txutils pre act comp)
  "Perform file conversion or call web browser to view contents of file."
  (let (ad-new-arg
    (setq file-type (txutils-file-type (ad-get-arg 0)))
    (when (and (not (eq 'plain file-type))
               (not (eq 'html file-type)))
      (setq ad-new-arg (txutils-do-file-conversion (ad-get-arg 0)))
      (ad-set-arg 0 ad-new-arg))
    (if (eq 'html (txutils-file-type (ad-get-arg 0)))
        (browse-url-of-buffer nil))))

(provide 'txutils)

tcross (at) rapttech dot com dot au

reply via email to

[Prev in Thread] Current Thread [Next in Thread]