Blame | Last modification | View Log | RSS feed
#!/bin/sh
appname="${0##*/}"
ver="0.6.1.2006031907"
copy="2005, 2006"
mail_feedback="dvd@PointedEars.de"
# ----------------------------------------------------------------------------
# DVD Subtitles 0.6.1 -- Extracts subtitles from Video DVD data to a text file
# Copyright (C) 2005, 2006 Thomas Lahn <PointedEars@gmx.de>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License (GPL) as published
# by the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU GPL along with this
# program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
## Standard shell script disclaimer blurb thing:
##
## This script is a hack. It's brute force. It's horrible.
## It doesn't use Artificial Intelligence. It doesn't use Virtual Reality.
## It's not perl. It's not python. It probably won't work unchanged on
## the "other" thousands of unices. But it worksforme. --ramiro
# (from /usr/local/mozilla/run-mozilla.sh)
#
# This is work in progress. If you have an improvement, patch,
# idea, whatever, on how to make this script better, please
# send it to <dvd@PointedEars.de>
_title ()
{
echo "\
${extd}DVD Subtitles $ver
Copyright (C) $copy Thomas Lahn <$mail_feedback>$norm
Distributed under the terms of the GNU General Public License (GPL), see
COPYING file or http://www.gnu.org/licenses/licenses.html#GPL for details.
"
}
_help ()
{
echo "\
Extracts a subtitle stream from Video DVD data and converts it to a text file.
$extd$appname$norm [options] ${ital}SOURCE$norm
$extd$appname$norm [$extd-hVkl$norm] [${ital}SOURCE$norm [${ital}TITLE$norm\
[${ital}SUBTITLE$norm [${ital}TARGET$norm [${ital}GREY_LEVELS$norm]]]]]
Any option argument is overwritten by the respective additional program
argument. Options may be given in any order, and are also considered
options if located after the first program argument. Too many program
arguments are silently ignored.
$extd-c$norm, $extd--compile$norm ${ital}FILE$norm | ${ital}DIRECTORY$norm
Compile subtitle text files according to subtitle index\
${ital}FILE$norm
or to subtitle index files in ${ital}DIRECTORY$norm to\
${ital}TARGET$norm. Clean up if this is successful, then exit.
$extd-k$norm, $extd--keep$norm Keep subtitle stream file even if\
conversion is successful.
$extd-l$norm, $extd--list$norm List subtitles for TITLE using\
${extd}mplayer$norm(1) and exit.
If TITLE is not provided or \`$extd-$norm', list subtitles for
title #2 (as title #1 may be an intro without subtitles)
and exit.
SOURCE Video DVD data source, i.e. a device (usually /dev/dvd),
a directory (e.g. one containing content created via
${extd}dvdbackup$norm(1)) or a Video DVD image file.
If \`$extd-$norm', a previously created subtitle stream file\
named
${extd}subtitle_stream-$norm*$extd-${norm}TITLE${extd}-${norm}SUBTITLE\
in the current working
directory will be used for only the stream-to-graphics-
to-text conversion instead. Both TITLE and SUBTITLE
must not be \`$extd-$norm' in that case.
The default is \`$extd/dev/dvd$norm'.
$extd-t$norm, $extd--title$norm
TITLE Number of the title (1-n) which will be accessed for
subtitle stream extraction. If left out or \`$extd-$norm', the
program uses ${extd}mplayer$norm(1) to detect how many titles are
available on the DVD data source and asks for the
title to be accessed.
$extd-s$norm, $extd--subtitle-id$norm
SUBTITLE ID of the subtitle stream to be extracted (0-n).
If left out or \`$extd-$norm', the program uses its\
${extd}-l$norm option to detect
which subtitles are available for the given TITLE and asks
for the ID to be used.
$extd-o$norm, $extd--output-target$norm
TARGET Name of the resulting subtitles text file.
If not provided, the file is named after the subtitle stream
file. NOTE: Unlike previous versions, this version appends
the filename suffix $extd.srt$norm automagically ONLY in that\
case.
$extd-g$norm, $extd--grey-levels$norm
GREY_LEVELS Optional grey-levels value\
(\`c0$extd,${norm}c1$extd,${norm}c2$extd,${norm}c3' with 0 <= cN <= 255,
where 0 is black and 255 is white) to be used for converting
the subtitle stream graphics to text via OCR. The default is
\`${extd}255,255,0,255$norm'. Unfortunately, the\
${extd}subtitle2pgm$norm program,
which requires this value, appears to be poorly documented;
if you find a more detailed, working documentation, please
refer to it and inform this program's author about it.
$extd-h$norm, $extd--help$norm Display this help and exit.
$extd-v$norm, $extd--verbose$norm Be verbose. The number of ${extd}-v$norm\
options specify the level of
verbosity.
$extd-V$norm, $extd--version$norm Display version information and exit.
${extd}EXIT STATUS$norm
${extd} 0$norm Successful program execution
${extd} 1$norm Error detecting/extracting subtitle stream, or cancelled
without selecting a title number or subtitle ID
${extd} 2$norm Unable to convert subtitle stream to image files
${extd} 3$norm Cancelled due to ${extd}gocr$norm(1) error or without\
entering another
grey-levels value
${extd} 4$norm Unable to compile to text file
${extd} 5$norm Unable to clean up
${extd}127$norm Insufficient number of arguments / help was displayed
See the $extd$appname$norm(1) manpage for complete documentation."
}
if test -z "$LINES" -o -z "$COLUMNS" ; then
eval `stty size 2>/dev/null | (read L C; \
echo LINES=${L:-24} COLUMNS=${C:-80})`
fi
test $LINES -eq 0 && LINES=24
test $COLUMNS -eq 0 && COLUMNS=80
if test "$TERM" != "raw" && stty size >/dev/null 2>&1 ; then
# esc=`echo -en "\033"`
# extd="${esc}[1m"
extd=`tput bold 2>/dev/null`
ital=`tput sitm 2>/dev/null`
# norm=`echo -en "${esc}[m\017"`
norm=`tput sgr0 2>/dev/null`
else
esc=""
extd=""
norm=""
fi
# Note that we use `"$@"' to let each command-line parameter expand to a
# separate word. The quotes around `$@' are essential!
# We need `tmp' as the `eval set --' would nuke the return value of getopt.
[ "$1" = "-vv" ] && echo "$extd
Debug output for POSIX conform command-line parsing
Original arguments: $*" >&2
if `getopt -T >/dev/null 2>&1` ; [ $? = 4 ] ; then
getopt_type=long
[ "$1" = "-vv" ] && echo "getopt(1) type: enhanced" >&2
tmp=`getopt -o c:klg:o:S::s:t:hVv \
-l compile:,keep-stream,list,grey-levels:,output-target:\
,spell-check,subtitle-id:,title:,help,verbose,version \
-n "$appname" -s sh \
-- "$@"`
else
getopt_type=short
[ "$1" = "-vv" ] && echo "getopt(1) type: old" >&2
tmp=`getopt c:klg:o:S:s:t:hVv "$@"`
fi
# exit status
ESUCCESS=0
ECANTEXTRACT=1
ECANTCONVERT=2
EOCRERROR=3
ECANTCOMPILE=4
ECANTCLEANUP=5
EARGERROR=127
getopt_exit_code=$?
help=0
verbose=0
version=0
list=0
source='/dev/dvd'
title='-'
sid='-'
target='-'
args=''
keep=0
compile=0
if [ $getopt_exit_code -eq 0 ]; then
## getopt returns error code 0 for successful parsing, 1 if
## getopt(3) returns errors, 2 if it does not understand its
## own parameters, 3 if an internal error occurs like out-of-
## memory, and 4 if it is called with -T.
#
# Note the quotes around `$tmp': they are essential!
# echo $tmp
# remove "--"
# for i in $tmp; do if [ "$i" != "--" ]; then tmp2="${tmp2} $i"; fi; done
eval set -- "$tmp"
[ "$1" = "-vv" ] && echo "New arguments: $*$norm
" >&2
while true ; do
case "$1" in
-h | --help)
help=1
shift;;
-v | --verbose)
let verbose++
shift;;
-V | --version)
version=1
shift;;
-c | --compile)
compile=1
source=$2
shift 2;;
-k | --keep)
keep=1
shift;;
-l | --list)
list=1
shift;;
-g | --grey-levels)
grey_levels=$2
shift 2;;
-o | --output-target)
target=$2
shift 2;;
-s | --subtitle-id)
sid=$2
shift 2;;
-t | --title)
title=$2
shift 2;;
--)
shift
break;;
esac
done
[ -n "$*" ] && args=$args" $*"
set -- $args
else
[ $verbose -gt 1 ] && echo "getopt exited: $getopt_exit_code
" >&2
if [ $getopt_exit_code -eq 1 -o $getopt_exit_code -eq 2 ]; then
help=1
else
exit $getopt_exit_code
fi
fi
[ $list -eq 0 ] && _title
[ $version -eq 1 ] && exit $ESUCCESS
[ $help -eq 1 ] &&
{
_help "$0"
exit $EARGERROR
}
result=$ESUCCESS
[ $compile -eq 0 ] &&
{
[ -n "$1" ] && source=$1
[ -z "$title" -a -n "$2" ] && title=$2
[ -z "$sid" -a -n "$3" ] && sid=$3
[ -z "$target" -a -n "$4" ] && target=$4
[ -z "$grey_levels" -a -n "$5" ] && grey_levels=$5
getsubtitles ()
{
mplayer -dvd-device "$1" -vo null -ao null -frames 0 \
-v "dvd://${2:-2}" 2>&1 |
sed -n '/sid/ s/^[^:]\{1,\}:[[:space:]]//p'
# echo "$subtitles"
}
[ $list -eq 1 ] &&
{
# first title may be only an intro
[ "$title" = '-' ] && title=2
tmp=`getsubtitles $source $title`
if [ -n "$tmp" ]; then
echo "${extd}The following subtitles are available for title #$title:$norm
$tmp"
exit $ESUCCESS
else
exit $ECANTEXTRACT
fi
}
[ "$sid" != '-' -a "$title" != '-' ] &&
{
subtitles=`getsubtitles $source $title`
[ -n "$subtitles" ] &&
st_descr=`echo "$subtitles" | grep "^$sid[[:space:]]" |
cut -f 2- -d ' '`
}
if [ "$source" = '-' ]; then
stream_file=`ls subtitle_stream-*-$title-$sid 2>/dev/null | head -n 1`
if [ $? -eq 0 ]; then
read -r -s -p "Use '$stream_file' [Y/n]? " -n 1
case $REPLY in
[Nn])
echo $REPLY
exit 1;;
*)
echo Y
esac
echo
id=${stream_file#*-}
else
echo "$appname: No such file: subtitle_stream-*-$title-$sid" >&2
exit $ECANTEXTRACT
fi
else
read_error ()
{
case $1 in
0) subject='titles available on this DVD source.';;
*) subject='subtitles available for this title.';;
esac
echo >&2 "\
Sorry, there are no $subject
Please verify that the DVD data source is available and
that its filesystem is consistent."
unset subject
exit $ECANTEXTRACT
}
if [ "$title" = '-' ]; then
titles=`mplayer -dvd-device "$source" -vo null -ao null -frames 0 \
-v dvd:// 2>&1 | egrep '[0-9]+ titles'`
num_titles=`echo "$titles" | awk '{print $3}'`
[ $(($num_titles)) -lt 1 ] && read_error 0
echo "$titles"
while true
do
read -r -p "\
${extd}Enter title# (1-$num_titles), or nothing to abort: $norm"
if [ -n "$REPLY" ]; then
title=$(($REPLY))
[ $title -ge 1 -a $title -le $num_titles ] && break
else
exit $ECANTEXTRACT
fi
done
echo
fi
if [ "$sid" = '-' ]; then
if [ -z "$subtitles" ]; then subtitles=`"$0" -lt "$title" "$source"`; fi
if [ $? -eq 0 ]; then
sid_max=`echo "$subtitles" | tail -n 1 | awk '{print $1}'`
echo "$subtitles"
while true; do
read -r -p "\
${extd}Enter ID of subtitle stream (0-$sid_max) to extract, or nothing to abort: $norm"
if [ -n "$REPLY" ]; then
sid=$(($REPLY))
[ $sid -ge 0 -a $sid -lt $sid_max ] && break
else
exit $ECANTEXTRACT
fi
done
else
read_error 1
fi
fi
# remove trailing /
source=${source%/}
vol=`(echo $(volname $source 2>/dev/null); exit $?;) || echo ${source##*/}`
id=$vol-$title-$sid
stream_file=subtitle_stream-$id
unset REPLY
[ -f "$stream_file" ] &&
{
read -r -s -p "${extd}Use existing '$stream_file' [Y/n]? $norm" -n 1
case $REPLY in
[Nn])
echo $REPLY
read -r -s -p "${extd}Overwrite existing '$stream_file' [y/N]? $norm"\
-n 1 REPLY2
case $REPLY2 in
[Yy]) echo $REPLY2;;
*)
echo N
id=$vol-$title-$sid-$$
stream_file=subtitle_stream-$id
echo "Using '$stream_file'"
esac
echo
unset REPLY2;;
*)
echo Y
REPLY='y'
esac
echo
}
[ "$target" = '-' ] && target="$stream_file.srt"
[ -z "$REPLY" -o "$REPLY" == 'N' -o "$REPLY" == 'n' ] &&
{
echo "\
${extd}Extracting subtitle stream $norm$sid${st_descr:+ ($st_descr)}$extd
of title $norm#$title$extd
on $norm$source$extd
to \"$norm$target$extd\" ...$norm
" >&2
> "$stream_file"
tccat -i "$source" -T "$title" -L |
tcextract -x ps1 -t vob -a 0x2$sid > "$stream_file"
}
fi
if [ -f "$stream_file" -a -s "$stream_file" ]; then
echo "${extd}... done.$norm"
else
echo "${extd}... failed.$norm"
[ -f "$stream_file" ] && rm ./"$stream_file"
exit $ECANTEXTRACT
fi
result=$ESUCCESS
while true
do
echo -n "
${extd}Converting subtitle stream \"$norm$stream_file$extd\"
to Netpbm Portable Greymaps (PGMs) ... $norm" | fold -s >&2
subtitle2pgm -o ./${id}- ${grey_levels:+-c "$grey_levels"} < $stream_file
result=$?
if [ $result -eq 0 ]; then
echo "${extd}done.$norm
${extd}Converting PGMs to text files (TXTs) using GNU OCR (gocr) ... $norm"
st_lang=$(echo "$st_descr" | sed 's/.*language: \([a-z]\{2\}\)/\1/')
pgm2txt ${st_lang:+-f $st_lang} ./${id}-
result=$?
if [ $result -eq 0 ]; then
echo "${extd}done, using a grey-levels value of\
\`$norm${grey_levels:-255,255,0,255}$extd'.$norm"
break
else
echo "${extd}failed.
If the conversion was cancelled due to inappropriate grey-levels value
\`${grey_levels:-255,255,0,255}', you may try another value,\
else you should abort:
$norm"
select grey_levels in \
`[ "$grey_levels" != '0,255,255,255' ] && echo 0,255,255,255` \
`[ "$grey_levels" != '255,0,255,255' ] && echo 255,0,255,255` \
`[ -n "$grey_levels" -a "$grey_levels" != '255,255,0,255' ] &&
echo 255,255,0,255` \
`[ "$grey_levels" != '255,255,255,0' ] && echo 255,255,255,0` \
Other \
Abort
do
case $grey_levels in
Other)
read -r -p "
${extd}Enter new value (\`c0$extd,${norm}c1$extd,${norm}c2$extd,${norm}c3'\
with 0 <= cN <= 255), or nothing to select a value:
$norm" grey_levels
[ -n "$grey_levels" ] && break;;
Abort)
result=$EOCRERROR
break;;
*)
break
esac
done
fi
else
echo "${extd}failed.$norm"
result=$ECANTCONVERT
break
fi
done
echo -n "${extd}Cleaning up PGMs ... $norm"
rm ./${id}-*.pgm
if [ $? -eq 0 ]; then
echo "${extd}done.$norm"
else
echo "${extd}failed.$norm"
fi
}
[ $result -eq $ESUCCESS ] &&
{
if [ $compile -eq 1 ]; then
try_file ()
{
[ ! -f "$source" ] && source="$source.srtx"
[ -f "$source" ]
}
if [ -d "$source" ]; then
# TODO: loop through all .srtx files in the directory
if [ "$source" != '.' ]; then cd "$source"; fi
[ $? -eq 0 ] &&
{
ls "$source"/*.srtx 2>/dev/null
}
echo >&2 "$appname: $source: Directory compile is not yet supported."
exit $ECANTCOMPILE
elif try_file; then
d=${source%/*}
if [ -a -d "$d" -a "$d" != '.' ]; then cd "$d"; fi
id=${source%-*}
stream_file=subtitle_stream-$id
source="./${source##*/}"
else
echo >&2 "$appname: $source: No such file or directory."
exit $ECANTCOMPILE
fi
else
source="${id}-.srtx"
fi
unset REPLY
[ -f "$target" ] &&
{
# TODO: allow for diff
read -r -s -p "${extd}'$target' exists.
Append, overwrite, create new file, or abort [a/o/n/Esc]? $norm" \
-n 1
case $REPLY in
[Oo]) echo $REPLY;;
[Aa]) echo $REPLY;;
[Nn])
echo $REPLY
target="${target%.*}-$$.${target##*.}";;
*)
REPLY=Abort
echo $REPLY
exit $ECANTCOMPILE;;
esac
}
echo "
${extd}Compiling TXTs into \"$norm$target$extd\" ... $norm"
case "$REPLY" in
[Oo]) > "$target";;
[Aa])
# append new content marker here
echo "
-- `whoami`@`hostname` -- `date` --
" >> "$target"
esac
(
# sed: thx to Erkan Yanar <erkan.yanar@t-online.de>, see
# message ID <lduohb.v74.ln@510002093148-0001.dialin.t-online.de>
srttool -s -i "$source"
# only if srttool is not available
if [ $? -gt 1 ]; then
sed -n 's,^\(\([^/]*\)\(/\)\(.*\.pgm\.txt\)\)$,/^\2\\\3\4$/ { \
r \1 \
d \
},gp' "$source" | sed -f - "$source"
fi
) >> "$target"
if [ $? -eq 0 ]; then
echo "${extd}... done.$norm"
else
echo "${extd}... failed.$norm"
exit $ECANTCOMPILE
fi
echo -n "${extd}Cleaning up
${id}-*.pgm.txt files ... $norm"
rm ./"${id}"-*.pgm.txt
if [ $? -eq 0 ]; then
echo "${extd}done.$norm"
else
echo "${extd}failed.$norm"
result=$ECANTCLEANUP
fi
echo -n " ${extd}Subtitle index file '${id}-.srtx' ... $norm"
rm ./"${id}-.srtx" # "$target.srtx"
if [ $? -eq 0 ]; then
echo "${extd}done.$norm"
else
echo "${extd}failed.$norm"
result=$ECANTCLEANUP
fi
[ $keep -eq 0 -a -f "./$stream_file" ] &&
{
echo -n " ${extd}Subtitle stream file '${stream_file}' ... $norm"
rm ./"$stream_file"
if [ $? -eq 0 ]; then
echo "${extd}done.$norm"
else
echo "${extd}failed.$norm"
result=$ECANTCLEANUP
fi
}
exit $result
}