;; hyphenate.el - build and manage pattern trie ;; Copyright Héctor Lahoz 2016 ;; ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;; ;; this program is based on the work of Franklin M. Liang ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (eval-when-compile (require 'cl)) ; (optimize (safety 0)) ;; uncomment for production (defstruct ptrie:node children ;; CAR - no match; CDR - match (next position) (char nil :read-only t) (final nil)) (defvar pattern-trie (make-ptrie:node :char ?\s :children '(nil . nil)) "Root of the patterns trie") (defun ptrie:print-trie (n path) "Print the tree recursively" (let ((path_ (concat path (make-string 1 (ptrie:node-char n))))) (if (null (cdr (ptrie:node-children n))) (progn (princ path_) (princ " - ") (princ (ptrie:node-final n)) (princ "\n")) (ptrie:print-trie (cdr (ptrie:node-children n)) path_)) (when (car (ptrie:node-children n)) (ptrie:print-trie (car (ptrie:node-children n)) path)))) (defun ptrie:print-node (n) "Print node N for debugging" (let ((ret1 "Node: :") (ret2 " - ")) ;; I don't understand why this is necessary ;; it seems the string referenced by ret2 is kept between calls and it is not initialised (aset ret2 0 ?\s) (aset ret2 2 ?\s) (aset ret1 6 (ptrie:node-char n)) (if (null (ptrie:node-children n)) (setq ret2 "no children") (when (car (ptrie:node-children n)) (aset ret2 0 (ptrie:node-char (car (ptrie:node-children n))))) (when (cdr (ptrie:node-children n)) (aset ret2 2 (ptrie:node-char (cdr (ptrie:node-children n)))))) (concat ret1 ret2))) (defun ptrie:find-next-char (node char &optional create) "Returns the node corresponding to CHAR. Add a new node when CREATE is t and requested node doesn't exist" (let ((prev node) n new (set-prev-link 'setcdr)) (setq n (cdr (ptrie:node-children prev))) (while (and n ;; works too when (null node-children) (> char (ptrie:node-char n))) (setq prev n) (setq set-prev-link 'setcar) (setq n (car (ptrie:node-children n)))) (when (or (null n) (/= char (ptrie:node-char n))) (if (null create) (setq n nil) (setq new (make-ptrie:node :char char :children (cons n nil))) (when (null (ptrie:node-children prev)) (setf (ptrie:node-children prev) '(nil . nil))) (funcall set-prev-link (ptrie:node-children prev) new) (setq n new))) n)) (defun find-pattern (trie p) "Return pattern indicated by P starting at TRIE or nil if not found" (let ((n trie)) (dotimes (i (length p) (ptrie:node-final n)) (when (null (setq n (ptrie:find-next-char n (aref p i)))) (return nil))))) (defun add-pattern (trie p) "Add pattern P to trie TRIE" (let ((pnw (pat-nw p)) (n trie) char) (dotimes (i (length pnw)) (setq char (aref pnw i)) (setq n (ptrie:find-next-char n char t))) (setf (ptrie:node-final n) p))) (defun pat-nw (str) "Reomve weight digits from STR" (let ((ret nil) (char nil) (char-str nil) (l (length str))) (do ((i (- l 1) (1- i))) ((< i 0)) (setq char (aref str i)) (setq char-str (substring-no-properties str i (1+ i))) (if (not (string-match "[[:digit:]]" char-str)) (push char ret))) (concat ret))) (defun read-pattern (buf) (let* ((pat)) (setq pat (buffer-substring (point) (progn (beginning-of-line 2) (- (point) 1)))) (if (or (equal pat "") (equal pat "\n")) nil pat))) (defun load-patterns (file) (let ((hyphen-patterns (find-file-read-only file)) (pat nil) (pat-nw nil) (n pattern-trie) (tmp) (i)) (while (setq pat (read-pattern hyphen-patterns)) (add-pattern pattern-trie pat)))) (defmacro digitp (c) "True if c is a digit" (if (and (< 47 (eval c)) (> 58 (eval c))) 't 'nil)) ;; TODO optimise (defun ly:hyphenate-word (word) "Returns WORD with hyphens added" (let* (s-word pat weight ret p-found (hpos 0) ;; add markers at beginning and end (delim-word (concat "." word ".")) (hyphen-weights (make-vector (length delim-word) 0))) (dotimes (anchor (length delim-word)) (setq s-word (substring delim-word anchor)) (do ((end 1 (1+ end))) ((> end (length s-word))) (when (setq pat (find-pattern pattern-trie (substring s-word 0 end))) ;; store weights (setq hpos 0) (dotimes (pos (length pat)) (if (not (digitp (aref pat pos))) (setq hpos (1+ hpos)) (setq weight (- (aref pat pos) ?0)) (when (> weight (aref hyphen-weights (+ anchor hpos))) (aset hyphen-weights (+ anchor hpos) weight))))))) (dotimes (i (length word)) ;; avoid hyphens before word (when i == 1) ;; e.g. pattern "1de" matches the word "de" so it produces " -- de" ;; perhaps we should modify the preceding algorithm, not to include ;; them in the first place (when (and (/= i 1) (= (% (aref hyphen-weights (1+ i)) 2) 1)) (push " -- " ret)) (push (aref word i) ret)) (mapconcat (lambda (s) (if (stringp s) s (string s))) (nreverse ret) ""))) (defun ly:hyphenate-region (beg end) "Add lilypond centered hyphens to every word in the region" (interactive "r") (save-excursion (goto-char beg) (search-forward "{" (line-beginning-position 2) t) (let ((end (copy-marker end)) word-beg) (while (< (point) end) (skip-chars-forward "^a-zA-Záéíóúñäëöüß") ;; find next word (setq word-beg (point)) (forward-word) (insert (ly:hyphenate-word (prog1 (buffer-substring-no-properties word-beg (point)) (delete-region word-beg (point)))))))))