-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathextract-subs
More file actions
executable file
·150 lines (131 loc) · 4.26 KB
/
extract-subs
File metadata and controls
executable file
·150 lines (131 loc) · 4.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/bin/bash
# SPDX-FileCopyrightText: 2021 - 2024 sudorook <daemon@nullcodon.com>
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
set -euo pipefail
ROOT="$(dirname "${0}")"
source "${ROOT}"/globals
! check_command ffmpeg ffprobe && exit 3
function parse_streams {
local count
local res
local codec_type
local codec_tag
local codec_tag_string
local codec_name
local lang
local idx
count="$(ffprobe -v error \
-show_entries format=nb_streams \
-of default=noprint_wrappers=1:nokey=1 "${1}")"
for idx in $(seq 0 "$((count - 1))"); do
res="$(ffprobe -v error -show_entries stream -select_streams "${idx}" file:"${1}")"
codec_type="$(echo "${res}" | sed -n "s/^codec_type=\(.*\)/\1/p")"
codec_name="$(echo "${res}" | sed -n "s/^codec_name=\(.*\)/\1/p")"
codec_tag="$(echo "${res}" | sed -n "s/^codec_tag=\(.*\)/\1/p")"
codec_tag_string="$(echo "${res}" | sed -n "s/^codec_tag_string=\(.*\)/\1/p")"
lang="$(echo "${res}" | sed -n "s/^TAG:language=\(.*\)/\1/p")"
if [[ -z "${lang}" ]]; then
lang="${LANG_DEFAULT}"
fi
if [[ "${codec_type}" = subtitle ]]; then
STREAMS+=("${idx}")
TITLES+=("${lang}")
if [[ "${codec_tag_string}" = tx3g ]]; then
FORMATS+=("${codec_name} (${codec_tag_string} / ${codec_tag})")
else
FORMATS+=("${codec_name}")
fi
fi
done
}
function extract_streams {
local cmd="ffmpeg -probesize 1G -analyzeduration 1G -v warning"
local tmp
local out
for i in $(seq 0 $((${#TITLES[@]} - 1))); do
cmd="${cmd} -i file:${IN@Q} -map 0:${STREAMS[${i}]}"
case "${FORMATS[${i}]}" in
ass)
cmd="${cmd} -c copy"
out="${NAME}_${STREAMS[${i}]}_${TITLES[${i}]}.ass"
;;
dvd_subtitle)
cmd="${cmd} -c copy"
if [[ "${EXTENSION}" = mp4 ]]; then
out="${NAME}_${STREAMS[${i}]}_${TITLES[${i}]}.mp4"
else
out="${NAME}_${STREAMS[${i}]}_${TITLES[${i}]}.mkv"
fi
;;
"mov_text (tx3g / 0x67337874)")
cmd="${cmd} -c subrip"
out="${NAME}_${STREAMS[${i}]}_${TITLES[${i}]}.srt"
;;
mov_text | srt | subrip)
cmd="${cmd} -c subrip"
out="${NAME}_${STREAMS[${i}]}_${TITLES[${i}]}.srt"
;;
hdmv_pgs_subtitle)
cmd="${cmd} -c copy"
out="${NAME}_${STREAMS[${i}]}_${TITLES[${i}]}.sup"
;;
webvtt)
cmd="${cmd} -c copy"
out="${NAME}_${STREAMS[${i}]}_${TITLES[${i}]}.vtt"
;;
*)
show_warning "Format ${FORMATS[${i}]} not supported. Skipping."
continue
;;
esac
cmd="${cmd} file:${out@Q} -y"
show_listitem "${cmd}"
eval "${cmd}"
# If the Aegisub Project Garbage is included in the header, the MIME type
# is incorrectly set as 'application/x-wine-extension-ini' for some reason.
# It's correctly interpreted as 'text/plain' once deleted.
case "${FORMATS[${i}]}" in
ass | ssa)
# dos-format files require the '\r', otherwise everything below the
# header is deleted.
sed -i '/\[Aegisub Project Garbage\]/,/^\r*$/d' "${out}"
;;
esac
# Delete binary 0x00. It shows up sometimes and causes the file to be
# interpreted as containing binary data.
tmp="$(mktemp)"
tr -d '\000' < "${out}" > "${tmp}" && mv "${tmp}" "${out}"
done
}
# Main
if ! [ -f "${1}" ]; then
show_error "ERROR: ${1} does not exist. Exiting."
exit 3
fi
LANG_DEFAULT=eng
IN="${1}"
NAME="${IN%.*}"
EXTENSION="${IN##*.}"
STREAMS=()
TITLES=()
FORMATS=()
parse_streams "${IN}"
if [ ${#STREAMS[@]} = 0 ]; then
show_warning "No subtitle streams found in ${1}. Skipping..."
exit
fi
extract_streams