[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[nongnu] elpa/subed 169a98497d 1/2: Make it easier to use aeneas for for
From: |
ELPA Syncer |
Subject: |
[nongnu] elpa/subed 169a98497d 1/2: Make it easier to use aeneas for forced alignment |
Date: |
Mon, 7 Nov 2022 06:59:27 -0500 (EST) |
branch: elpa/subed
commit 169a98497d95e8cd162545807895573dbdb79f0c
Author: Sacha Chua <sacha@sachachua.com>
Commit: Sacha Chua <sacha@sachachua.com>
Make it easier to use aeneas for forced alignment
Forced alignment lets you get the timestamps based on an audio file
and a text file. subed-align can use the aeneas forced alignment tool
to process the current text file or subtitle file.
* subed/subed-align.el: New file with the subed-align command.
---
subed/subed-align.el | 81 ++++++++++++++++++++++++++++++++++++++++++++
subed/subed-align.el.license | 3 ++
2 files changed, 84 insertions(+)
diff --git a/subed/subed-align.el b/subed/subed-align.el
new file mode 100644
index 0000000000..d1b7e873ca
--- /dev/null
+++ b/subed/subed-align.el
@@ -0,0 +1,81 @@
+;;; subed-align.el --- use forced alignment tools like aeneas -*-
lexical-binding: t; -*-
+
+;; Copyright (C) 2022 Sacha Chua
+
+;; Author: Sacha Chua <sacha@sachachua.com>
+;; Keywords: multimedia
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This has some extra support for using Aeneas for forced alignment
+;; in order to get VTT or SRT timestamps from a plain text file and an
+;; audio file.
+;;
+;; You will also need aeneas and its dependencies:
https://github.com/readbeyond/aeneas
+;;
+;;; Code:
+
+(defvar subed-align-command '("python3" "-m" "aeneas.tools.execute_task")
+ "Command to run aeneas.")
+
+(defvar subed-align-language "eng"
+ "Language code.")
+
+;;;###autoload
+(defun subed-align (audio-file text-file format)
+ "Align AUDIO-FILE with TEXT-FILE to get timestamps.
+Return a buffer with FORMAT."
+ (interactive
+ (list
+ (or subed-mpv-video-file (read-file-name "Audio file: "))
+ (buffer-file-name)
+ (completing-read "Format: " '("AUD" "CSV" "EAF" "JSON" "SMIL" "SRT" "SSV"
"SUB" "TEXTGRID" "TSV" "TTML" "TXT" "VTT" "XML"))))
+ (let ((new-file
+ (and (buffer-file-name)
+ (expand-file-name
+ (concat (file-name-sans-extension (buffer-file-name)) "."
(downcase format)))))
+ temp-file subtitles)
+ (when (or (null (file-exists-p new-file))
+ (yes-or-no-p (format "%s exists. Overwrite? "
(file-name-nondirectory new-file))))
+ (when (derived-mode-p 'subed-mode)
+ (setq subtitles (subed-subtitle-list))
+ (setq temp-file (make-temp-file "subed-align" nil ".txt"))
+ (with-temp-file temp-file
+ (insert (mapconcat (lambda (o) (elt o 3)) subtitles "\n\n"))))
+ (apply
+ #'call-process
+ (car subed-align-command)
+ nil
+ (get-buffer-create "*subed-aeneas*")
+ t
+ (append (cdr subed-align-command)
+ (list (expand-file-name audio-file)
+ (or temp-file (expand-file-name text-file))
+ (format
"task_language=%s|os_task_file_format=%s|is_text_type=%s"
+ subed-align-language
+ (downcase format)
+ (if temp-file
+ "subtitles"
+ "plain"))
+ new-file)))
+ (delete-file temp-file)
+ (find-file new-file)
+ (when (string= format "VTT")
+ (goto-char (point-min))
+ (flush-lines "^[0-9]+$")))))
+
+(provide 'subed-align)
+;;; subed-align.el ends here
diff --git a/subed/subed-align.el.license b/subed/subed-align.el.license
new file mode 100644
index 0000000000..1c67cfabee
--- /dev/null
+++ b/subed/subed-align.el.license
@@ -0,0 +1,3 @@
+;;;; SPDX-FileCopyrightText: 2022 Sacha Chua
+;;;;
+;;;; SPDX-License-Identifier: GPL-3.0-or-later