(defun concat* (text) (let ((r "")) (dolist (i text) (and (equal i "-") (setq i "\n-")) (setq r (concat r (format "%s " i )))) r)) (defun html->read-structure-name (limit buffer offset back) (with-current-buffer (or buffer (current-buffer)) (skip-chars-forward " ") (forward-char offset) (let ((w (upcase (buffer-substring-no-properties (point) (prog2 (search-forward-regexp limit nil t) (match-beginning 0)))))) (backward-char back) (if (member (elt w 0) '(?#)) (format "\"%s\"" w) w)))) (defun html->add-return (&optional buffer) (with-current-buffer (or buffer (current-buffer)) (goto-char (point-min)) (search-forward "
" nil t) ;; (goto-char (match-beginning 0)) ;; (insert-char ?\n 1) ;; (goto-char (match-end 0)))) )) (defun html->extract-article (&optional buffer) (with-current-buffer (or buffer (current-buffer)) (goto-char (point-min)) (if (search-forward "
filter (&optional buffer) (with-current-buffer (or buffer (current-buffer)) (let ((lim (html->extract-article buffer))) (if lim (progn (setq other-definitions (html->extract-definitions buffer)) (delete-region (cadr lim) (point-max)) (delete-region 1 (car lim)) t) (switch-to-buffer buffer) ())))) (defun html->extract-definitions (&optional buffer) (with-current-buffer (or buffer (current-buffer)) (let (beg end a w list-definitions) (goto-char 1) (while (search-forward "return sendRequest(5," nil t) (search-forward "\'") (setq beg (match-end 0)) (search-forward "\'") (setq end (match-beginning 0)) (setq a (buffer-substring-no-properties beg end)) (search-forward "

") (setq beg (match-end 0)) (search-forward "

") (setq end (match-beginning 0)) (setq w (buffer-substring-no-properties beg end)) (search-forward "

") (setq beg (match-end 0)) (search-forward "

") (setq end (match-beginning 0)) (setq w (concat w "-" (buffer-substring-no-properties beg end))) (setq list-definitions (append list-definitions (list (cons w a))))) list-definitions ))) (defun html-step-forward (&optional buffer) (with-current-buffer (or buffer (current-buffer)) (and (eobp) (error "il y a une erreur dans le fichier (fin dy fichier)")) (skip-chars-forward " \f\t\n\r\v") (let ((p (point))) (if (equal (char-after) ?<) (prog1 nil (search-forward ">" nil t) (save-excursion (search-backward "<" nil t) (when (> (point) p) (error "Charactère défendu dans le texte (position %d - %d)" p (point))))) (search-forward "<" nil t) (backward-char) (skip-chars-backward " \f\t\n\r\v"))))) (defun html-step-backward (&optional buffer) (with-current-buffer (or buffer (current-buffer)) (skip-chars-backward " \f\t\n\r\v") (let ((p (point))) (if (equal (char-before) ?>) (prog1 nil (search-backward "<" nil t) (save-excursion (search-forward ">" nil t) (when (< (point) p) (error "Charactère défendu dans le texte (position %d - %d)" (point) p)))) (search-backward ">" nil t) (forward-char) (skip-chars-forward " \f\t\n\r\v"))))) (defun cnrtl-get (mot) (let* ((address (concat "http://www.cnrtl.fr/definition/" (symbol-name mot))) (coding-system-for-write 'utf-8) (coding-system-for-read 'utf-8) (temporary-file-directory "/tmp/cnrtl/") (f (prog2 (or (file-exists-p temporary-file-directory) (make-directory temporary-file-directory)) (make-temp-file (concat "cnrtl" "-" "mot")))) (buffer (url-retrieve-synchronously address)) other-definitions ) ;;(html-check-coding-system buffer) (when (html->filter buffer) (html->add-return buffer) (with-current-buffer (or buffer (current-buffer)) (write-region nil nil f)) (setq buffer (find-file-noselect f)) (with-current-buffer buffer (html->lisp) (goto-char (point-max)) (message "evaluating the html cpde...") (let ((r (eval-last-sexp t))) (erase-buffer) (insert r) )) (switch-to-buffer buffer) (goto-char (point-min))))) (defun html->read-tag nil (if (equal (char-after) ?<) (let ((tag-name (prog2 (forward-char) (html->read-structure-name "[ >]" (current-buffer) 0 1))) info name ) (catch 'END-TAG (while t (when (or (looking-at ">") (looking-back ">")) (throw 'END-TAG 0)) (setq name (html->read-structure-name "[ =>]" (current-buffer) 0 1)) (if (looking-at "=\"") (setq info (append info (list (cons name (html->read-structure-name "\"" (current-buffer) 2 0))))) (setq info (cons (list name) info))))) (and (looking-at ">") (forward-char)) (cons tag-name info)) nil)) (defun html->read-text nil (let ((w (buffer-substring-no-properties (point) (prog2 (html-step-forward) (point))))) (cond ((string-equal w (string 9830)) (setq w "\n\n §")) ((string-equal w (string 8722)) (setq w "\n\n #"))) w)) (defconst html-special-tags '(("BR" . "NEWLINE"))) (defun html->struct nil (skip-chars-forward " \f\t\n\r\v") (let ((p (point))) (setq s (or (html->read-tag) (html->read-text))) (delete-region p (point)) (setq type (cond ((listp s) (cond ((assoc (car s) html-special-tags) 'SPECIAL-TAG) ((member "/" (cadr s)) 'UNUSED-TAG) ((equal (elt (car s) 0) ?/) 'CLOSE-TAG) (t 'OPEN-TAG))) ('TEXT))) (setq expr (cond ((eq type 'SPECIAL-TAG) (format "(%s) " (cdr (assoc (car s) html-special-tags)))) ((eq type 'UNUSED-TAG) "") ((eq type 'CLOSE-TAG) (setq depth (1- depth)) ") ") ((eq type 'OPEN-TAG) (setq depth (1+ depth)) (format "(%s \'%s " (car s) (cdr s))) ((eq type 'TEXT) (format "\"%s\" " s)))))) (defun html->lisp () (message "html->lisp start") (catch 'END (let ((depth 0) s type expr) (while t (html->struct) (insert expr) (when (zerop depth) (throw 'END nil))))) (message "html->lisp end") ) (defun html->pretty-print (text indent) (let (r s) (setq s (split-string text)) (catch 'STOP (while t (let (nl) (while (or (and s (< (length nl) (- 100 indent))) (and s (null (string-match "[[:alpha:]]" (substring (car s) 0 1))) (not (member (elt (car s) 0) '(?\( ?\«) )))) (setq nl (concat nl (if (member (elt (car s) 0) '(?, ?.)) "" " ") (car s)) s (cdr s))) (setq r (concat r (concat "\n" (make-string indent ? ) nl)))) (and (null s) (throw 'STOP r)))))) ;; html definition (defun B (prop &rest text) (concat* text)) (defun SUP (prop &rest text) (concat* text)) (defun I (prop &rest text) (concat* text)) (defun CENTER (prop &rest text) (concat* text)) (defun INPUT (prop &rest text) "") (defconst div-forbidden-classes nil) (defun DIV (prop &rest text) (let* ((dclass (cdr (assoc 'CLASS prop))) (did (cdr (assoc 'ID prop))) indent (E "") ) (cond ((member dclass div-forbidden-classes) ) ((string-equal (caar prop) "ID") (setq indent 4)) ((string-equal "TLF_PARAPUTIR" dclass) (and text (setcar (cdr text) (substring (cadr text) (string-match "[[:graph:]]" (cadr text))))) (setq E (concat* text)) (add-text-properties 0 1 '(face (:foreground "green" )) E) (setq E (concat "\n\n " E))) ((and text (string-equal "TLF_PAROTHERS" dclass) (or (string-match "Prononc." (car text)) (string-match "BBG." (car text)) (string-match "STAT." (car text)) (string-match "Étymol." (car text)))) ) ((and text (string-equal "TLF_PAROTHERS" dclass) (string-match "Rem." (car text))) (setq E (concat* text)) (add-text-properties 0 (length E) '(face (:foreground "yellow" )) E) (setq E (html->pretty-print E 7)) (setq E (concat "\n" E)) ) ((string-equal "TLF_CVEDETTE" dclass) (let* (z (def (mapcar 'car other-definitions))) (setq header-line-format (concat "« " (concat* text) " »" (format " %s " def)))) (setq indent nil) ) ((string-equal "TLF_PARAH" dclass) (and text (setcar text (substring (car text) 0 (string-match " " (car text))))) (and text (setcar (cdr text) (substring (cadr text) (string-match "[[:graph:]]" (cadr text))))) (setq E (concat* text)) (setq E (concat "\n\n " E)) ) ((string-equal "TLF_TABULATION" dclass) (setq indent 10)) (t (setq indent 0) (setq text (append text '("\n"))))) (if indent (concat "\n" (make-string indent ? ) (concat* text)) E))) (defconst span-forbidden-classes nil) (defun SPAN (prop &rest text) (let ((sclass (cdar prop)) r i c forbid) (cond ((member sclass span-forbidden-classes) "" ) ((string-equal sclass "TLF_CMOT") (setq c '(face (:foreground "red" ))) (concat* text)) ((string-equal sclass "TLF_CCODE") (setq c '(face (:foreground "green" ))) (concat* text)) ((string-equal sclass "TLF_CPLAN") (setq c '(face (:foreground "DeepPink1" ))) (concat* text)) ((string-equal sclass "TLF_CEMPLOI") (setq c '(face (:foreground "cyan" )) i 3)) ((string-equal sclass "TLF_CDEFINITION") (setq c '(face (:foreground "DodgerBlue" )) i 5)) ((string-equal sclass "TLF_PARAH") (setq c '(face (:foreground "SlateBlue" ))) (concat* text)) ((string-equal sclass "TLF_CSYNONIME") (setq c '(face (:foreground "MediumSeaGreen" )) i 10)) ((string-equal sclass "TLF_CSYNTAGME") (setq c '(face (:foreground "DarkGreen" )) i 12)) ((string-equal sclass "TLF_CTITRE") (setq c '(face (:foreground "LawnGreen" )) forbid t)) ((string-equal sclass "TLF_CCROCHET") (setq c '(face (:foreground "IndianRed" )) i 5)) ((string-equal sclass "TLF_CAUTEUR") (setq c '(face (:foreground "DarkKhaki" )) forbid t)) ((string-equal sclass "TLF_SMALLCAPS") (when text (setq text (list (upcase (concat* text))) c '(face (:foreground "orange" )))) ) ((string-equal sclass "TLF_CEXEMPLE") (setq c '(face (:foreground "tomato1" )) i 15) ) ((string-equal sclass "TLF_CDATE") (setq c '(face (:foreground "BlueViolet" )) forbid t)) ((string-equal sclass "TLF_CCONSTRUCTION") (setq c '(face (:foreground "LemonChiffon3" )) i 14)) ((string-equal sclass "TLF_CSOURCE") (setq c '(face (:foreground "LavenderBlush3" ))) (concat* text)) ((string-equal sclass "TLF_CDOMAINE") (setq c '(face (:foreground "DeepSkyBlue4 " ))) (concat* text)) ((string-equal sclass "TLF_CPUBLICATION") (setq c '(face (:foreground "LemonChiffon3" ))) (concat* text)) (t (message "balise %s inconnue" sclass) (setq c '(face (:foreground "LemonChiffon3" ))))) (setq r (concat* text)) (and i (setq r (html->pretty-print r i))) (add-text-properties 0 (length r) c r) (if forbid "X" r))) (defun TR (prop &rest text) text) (defun TD (prop &rest text) text) (defun TABLE (prop &rest rows) rows) (defun NEWLINE (&rest x) "\n") ;; (cnrtl-get 'marmite)