Blame | Last modification | View Log | RSS feed
#!/bin/shappname="${0##*/}"ver="0.6.1.2006031907"copy="2005, 2006"mail_feedback="dvd@PointedEars.de"# ----------------------------------------------------------------------------# DVD Subtitles 0.6.1 -- Extracts subtitles from Video DVD data to a text file# Copyright (C) 2005, 2006 Thomas Lahn <PointedEars@gmx.de>## This program is free software; you can redistribute it and/or modify it# under the terms of the GNU General Public License (GPL) as published# by the Free Software Foundation; either version 2 of the License, or# (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.## You should have received a copy of the GNU GPL along with this# program; if not, write to the Free Software Foundation, Inc.,# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.### Standard shell script disclaimer blurb thing:#### This script is a hack. It's brute force. It's horrible.## It doesn't use Artificial Intelligence. It doesn't use Virtual Reality.## It's not perl. It's not python. It probably won't work unchanged on## the "other" thousands of unices. But it worksforme. --ramiro# (from /usr/local/mozilla/run-mozilla.sh)## This is work in progress. If you have an improvement, patch,# idea, whatever, on how to make this script better, please# send it to <dvd@PointedEars.de>_title (){echo "\${extd}DVD Subtitles $verCopyright (C) $copy Thomas Lahn <$mail_feedback>$normDistributed under the terms of the GNU General Public License (GPL), seeCOPYING file or http://www.gnu.org/licenses/licenses.html#GPL for details."}_help (){echo "\Extracts a subtitle stream from Video DVD data and converts it to a text file.$extd$appname$norm [options] ${ital}SOURCE$norm$extd$appname$norm [$extd-hVkl$norm] [${ital}SOURCE$norm [${ital}TITLE$norm\[${ital}SUBTITLE$norm [${ital}TARGET$norm [${ital}GREY_LEVELS$norm]]]]]Any option argument is overwritten by the respective additional programargument. Options may be given in any order, and are also consideredoptions if located after the first program argument. Too many programarguments are silently ignored.$extd-c$norm, $extd--compile$norm ${ital}FILE$norm | ${ital}DIRECTORY$normCompile subtitle text files according to subtitle index\${ital}FILE$normor to subtitle index files in ${ital}DIRECTORY$norm to\${ital}TARGET$norm. Clean up if this is successful, then exit.$extd-k$norm, $extd--keep$norm Keep subtitle stream file even if\conversion is successful.$extd-l$norm, $extd--list$norm List subtitles for TITLE using\${extd}mplayer$norm(1) and exit.If TITLE is not provided or \`$extd-$norm', list subtitles fortitle #2 (as title #1 may be an intro without subtitles)and exit.SOURCE Video DVD data source, i.e. a device (usually /dev/dvd),a directory (e.g. one containing content created via${extd}dvdbackup$norm(1)) or a Video DVD image file.If \`$extd-$norm', a previously created subtitle stream file\named${extd}subtitle_stream-$norm*$extd-${norm}TITLE${extd}-${norm}SUBTITLE\in the current workingdirectory will be used for only the stream-to-graphics-to-text conversion instead. Both TITLE and SUBTITLEmust not be \`$extd-$norm' in that case.The default is \`$extd/dev/dvd$norm'.$extd-t$norm, $extd--title$normTITLE Number of the title (1-n) which will be accessed forsubtitle stream extraction. If left out or \`$extd-$norm', theprogram uses ${extd}mplayer$norm(1) to detect how many titles areavailable on the DVD data source and asks for thetitle to be accessed.$extd-s$norm, $extd--subtitle-id$normSUBTITLE ID of the subtitle stream to be extracted (0-n).If left out or \`$extd-$norm', the program uses its\${extd}-l$norm option to detectwhich subtitles are available for the given TITLE and asksfor the ID to be used.$extd-o$norm, $extd--output-target$normTARGET Name of the resulting subtitles text file.If not provided, the file is named after the subtitle streamfile. NOTE: Unlike previous versions, this version appendsthe filename suffix $extd.srt$norm automagically ONLY in that\case.$extd-g$norm, $extd--grey-levels$normGREY_LEVELS Optional grey-levels value\(\`c0$extd,${norm}c1$extd,${norm}c2$extd,${norm}c3' with 0 <= cN <= 255,where 0 is black and 255 is white) to be used for convertingthe subtitle stream graphics to text via OCR. The default is\`${extd}255,255,0,255$norm'. Unfortunately, the\${extd}subtitle2pgm$norm program,which requires this value, appears to be poorly documented;if you find a more detailed, working documentation, pleaserefer to it and inform this program's author about it.$extd-h$norm, $extd--help$norm Display this help and exit.$extd-v$norm, $extd--verbose$norm Be verbose. The number of ${extd}-v$norm\options specify the level ofverbosity.$extd-V$norm, $extd--version$norm Display version information and exit.${extd}EXIT STATUS$norm${extd} 0$norm Successful program execution${extd} 1$norm Error detecting/extracting subtitle stream, or cancelledwithout selecting a title number or subtitle ID${extd} 2$norm Unable to convert subtitle stream to image files${extd} 3$norm Cancelled due to ${extd}gocr$norm(1) error or without\entering anothergrey-levels value${extd} 4$norm Unable to compile to text file${extd} 5$norm Unable to clean up${extd}127$norm Insufficient number of arguments / help was displayedSee the $extd$appname$norm(1) manpage for complete documentation."}if test -z "$LINES" -o -z "$COLUMNS" ; theneval `stty size 2>/dev/null | (read L C; \echo LINES=${L:-24} COLUMNS=${C:-80})`fitest $LINES -eq 0 && LINES=24test $COLUMNS -eq 0 && COLUMNS=80if test "$TERM" != "raw" && stty size >/dev/null 2>&1 ; then# esc=`echo -en "\033"`# extd="${esc}[1m"extd=`tput bold 2>/dev/null`ital=`tput sitm 2>/dev/null`# norm=`echo -en "${esc}[m\017"`norm=`tput sgr0 2>/dev/null`elseesc=""extd=""norm=""fi# Note that we use `"$@"' to let each command-line parameter expand to a# separate word. The quotes around `$@' are essential!# We need `tmp' as the `eval set --' would nuke the return value of getopt.[ "$1" = "-vv" ] && echo "$extdDebug output for POSIX conform command-line parsingOriginal arguments: $*" >&2if `getopt -T >/dev/null 2>&1` ; [ $? = 4 ] ; thengetopt_type=long[ "$1" = "-vv" ] && echo "getopt(1) type: enhanced" >&2tmp=`getopt -o c:klg:o:S::s:t:hVv \-l compile:,keep-stream,list,grey-levels:,output-target:\,spell-check,subtitle-id:,title:,help,verbose,version \-n "$appname" -s sh \-- "$@"`elsegetopt_type=short[ "$1" = "-vv" ] && echo "getopt(1) type: old" >&2tmp=`getopt c:klg:o:S:s:t:hVv "$@"`fi# exit statusESUCCESS=0ECANTEXTRACT=1ECANTCONVERT=2EOCRERROR=3ECANTCOMPILE=4ECANTCLEANUP=5EARGERROR=127getopt_exit_code=$?help=0verbose=0version=0list=0source='/dev/dvd'title='-'sid='-'target='-'args=''keep=0compile=0if [ $getopt_exit_code -eq 0 ]; then## getopt returns error code 0 for successful parsing, 1 if## getopt(3) returns errors, 2 if it does not understand its## own parameters, 3 if an internal error occurs like out-of-## memory, and 4 if it is called with -T.## Note the quotes around `$tmp': they are essential!# echo $tmp# remove "--"# for i in $tmp; do if [ "$i" != "--" ]; then tmp2="${tmp2} $i"; fi; doneeval set -- "$tmp"[ "$1" = "-vv" ] && echo "New arguments: $*$norm" >&2while true ; docase "$1" in-h | --help)help=1shift;;-v | --verbose)let verbose++shift;;-V | --version)version=1shift;;-c | --compile)compile=1source=$2shift 2;;-k | --keep)keep=1shift;;-l | --list)list=1shift;;-g | --grey-levels)grey_levels=$2shift 2;;-o | --output-target)target=$2shift 2;;-s | --subtitle-id)sid=$2shift 2;;-t | --title)title=$2shift 2;;--)shiftbreak;;esacdone[ -n "$*" ] && args=$args" $*"set -- $argselse[ $verbose -gt 1 ] && echo "getopt exited: $getopt_exit_code" >&2if [ $getopt_exit_code -eq 1 -o $getopt_exit_code -eq 2 ]; thenhelp=1elseexit $getopt_exit_codefifi[ $list -eq 0 ] && _title[ $version -eq 1 ] && exit $ESUCCESS[ $help -eq 1 ] &&{_help "$0"exit $EARGERROR}result=$ESUCCESS[ $compile -eq 0 ] &&{[ -n "$1" ] && source=$1[ -z "$title" -a -n "$2" ] && title=$2[ -z "$sid" -a -n "$3" ] && sid=$3[ -z "$target" -a -n "$4" ] && target=$4[ -z "$grey_levels" -a -n "$5" ] && grey_levels=$5getsubtitles (){mplayer -dvd-device "$1" -vo null -ao null -frames 0 \-v "dvd://${2:-2}" 2>&1 |sed -n '/sid/ s/^[^:]\{1,\}:[[:space:]]//p'# echo "$subtitles"}[ $list -eq 1 ] &&{# first title may be only an intro[ "$title" = '-' ] && title=2tmp=`getsubtitles $source $title`if [ -n "$tmp" ]; thenecho "${extd}The following subtitles are available for title #$title:$norm$tmp"exit $ESUCCESSelseexit $ECANTEXTRACTfi}[ "$sid" != '-' -a "$title" != '-' ] &&{subtitles=`getsubtitles $source $title`[ -n "$subtitles" ] &&st_descr=`echo "$subtitles" | grep "^$sid[[:space:]]" |cut -f 2- -d ' '`}if [ "$source" = '-' ]; thenstream_file=`ls subtitle_stream-*-$title-$sid 2>/dev/null | head -n 1`if [ $? -eq 0 ]; thenread -r -s -p "Use '$stream_file' [Y/n]? " -n 1case $REPLY in[Nn])echo $REPLYexit 1;;*)echo Yesacechoid=${stream_file#*-}elseecho "$appname: No such file: subtitle_stream-*-$title-$sid" >&2exit $ECANTEXTRACTfielseread_error (){case $1 in0) subject='titles available on this DVD source.';;*) subject='subtitles available for this title.';;esacecho >&2 "\Sorry, there are no $subjectPlease verify that the DVD data source is available andthat its filesystem is consistent."unset subjectexit $ECANTEXTRACT}if [ "$title" = '-' ]; thentitles=`mplayer -dvd-device "$source" -vo null -ao null -frames 0 \-v dvd:// 2>&1 | egrep '[0-9]+ titles'`num_titles=`echo "$titles" | awk '{print $3}'`[ $(($num_titles)) -lt 1 ] && read_error 0echo "$titles"while truedoread -r -p "\${extd}Enter title# (1-$num_titles), or nothing to abort: $norm"if [ -n "$REPLY" ]; thentitle=$(($REPLY))[ $title -ge 1 -a $title -le $num_titles ] && breakelseexit $ECANTEXTRACTfidoneechofiif [ "$sid" = '-' ]; thenif [ -z "$subtitles" ]; then subtitles=`"$0" -lt "$title" "$source"`; fiif [ $? -eq 0 ]; thensid_max=`echo "$subtitles" | tail -n 1 | awk '{print $1}'`echo "$subtitles"while true; doread -r -p "\${extd}Enter ID of subtitle stream (0-$sid_max) to extract, or nothing to abort: $norm"if [ -n "$REPLY" ]; thensid=$(($REPLY))[ $sid -ge 0 -a $sid -lt $sid_max ] && breakelseexit $ECANTEXTRACTfidoneelseread_error 1fifi# remove trailing /source=${source%/}vol=`(echo $(volname $source 2>/dev/null); exit $?;) || echo ${source##*/}`id=$vol-$title-$sidstream_file=subtitle_stream-$idunset REPLY[ -f "$stream_file" ] &&{read -r -s -p "${extd}Use existing '$stream_file' [Y/n]? $norm" -n 1case $REPLY in[Nn])echo $REPLYread -r -s -p "${extd}Overwrite existing '$stream_file' [y/N]? $norm"\-n 1 REPLY2case $REPLY2 in[Yy]) echo $REPLY2;;*)echo Nid=$vol-$title-$sid-$$stream_file=subtitle_stream-$idecho "Using '$stream_file'"esacechounset REPLY2;;*)echo YREPLY='y'esacecho}[ "$target" = '-' ] && target="$stream_file.srt"[ -z "$REPLY" -o "$REPLY" == 'N' -o "$REPLY" == 'n' ] &&{echo "\${extd}Extracting subtitle stream $norm$sid${st_descr:+ ($st_descr)}$extdof title $norm#$title$extdon $norm$source$extdto \"$norm$target$extd\" ...$norm" >&2> "$stream_file"tccat -i "$source" -T "$title" -L |tcextract -x ps1 -t vob -a 0x2$sid > "$stream_file"}fiif [ -f "$stream_file" -a -s "$stream_file" ]; thenecho "${extd}... done.$norm"elseecho "${extd}... failed.$norm"[ -f "$stream_file" ] && rm ./"$stream_file"exit $ECANTEXTRACTfiresult=$ESUCCESSwhile truedoecho -n "${extd}Converting subtitle stream \"$norm$stream_file$extd\"to Netpbm Portable Greymaps (PGMs) ... $norm" | fold -s >&2subtitle2pgm -o ./${id}- ${grey_levels:+-c "$grey_levels"} < $stream_fileresult=$?if [ $result -eq 0 ]; thenecho "${extd}done.$norm${extd}Converting PGMs to text files (TXTs) using GNU OCR (gocr) ... $norm"st_lang=$(echo "$st_descr" | sed 's/.*language: \([a-z]\{2\}\)/\1/')pgm2txt ${st_lang:+-f $st_lang} ./${id}-result=$?if [ $result -eq 0 ]; thenecho "${extd}done, using a grey-levels value of\\`$norm${grey_levels:-255,255,0,255}$extd'.$norm"breakelseecho "${extd}failed.If the conversion was cancelled due to inappropriate grey-levels value\`${grey_levels:-255,255,0,255}', you may try another value,\else you should abort:$norm"select grey_levels in \`[ "$grey_levels" != '0,255,255,255' ] && echo 0,255,255,255` \`[ "$grey_levels" != '255,0,255,255' ] && echo 255,0,255,255` \`[ -n "$grey_levels" -a "$grey_levels" != '255,255,0,255' ] &&echo 255,255,0,255` \`[ "$grey_levels" != '255,255,255,0' ] && echo 255,255,255,0` \Other \Abortdocase $grey_levels inOther)read -r -p "${extd}Enter new value (\`c0$extd,${norm}c1$extd,${norm}c2$extd,${norm}c3'\with 0 <= cN <= 255), or nothing to select a value:$norm" grey_levels[ -n "$grey_levels" ] && break;;Abort)result=$EOCRERRORbreak;;*)breakesacdonefielseecho "${extd}failed.$norm"result=$ECANTCONVERTbreakfidoneecho -n "${extd}Cleaning up PGMs ... $norm"rm ./${id}-*.pgmif [ $? -eq 0 ]; thenecho "${extd}done.$norm"elseecho "${extd}failed.$norm"fi}[ $result -eq $ESUCCESS ] &&{if [ $compile -eq 1 ]; thentry_file (){[ ! -f "$source" ] && source="$source.srtx"[ -f "$source" ]}if [ -d "$source" ]; then# TODO: loop through all .srtx files in the directoryif [ "$source" != '.' ]; then cd "$source"; fi[ $? -eq 0 ] &&{ls "$source"/*.srtx 2>/dev/null}echo >&2 "$appname: $source: Directory compile is not yet supported."exit $ECANTCOMPILEelif try_file; thend=${source%/*}if [ -a -d "$d" -a "$d" != '.' ]; then cd "$d"; fiid=${source%-*}stream_file=subtitle_stream-$idsource="./${source##*/}"elseecho >&2 "$appname: $source: No such file or directory."exit $ECANTCOMPILEfielsesource="${id}-.srtx"fiunset REPLY[ -f "$target" ] &&{# TODO: allow for diffread -r -s -p "${extd}'$target' exists.Append, overwrite, create new file, or abort [a/o/n/Esc]? $norm" \-n 1case $REPLY in[Oo]) echo $REPLY;;[Aa]) echo $REPLY;;[Nn])echo $REPLYtarget="${target%.*}-$$.${target##*.}";;*)REPLY=Abortecho $REPLYexit $ECANTCOMPILE;;esac}echo "${extd}Compiling TXTs into \"$norm$target$extd\" ... $norm"case "$REPLY" in[Oo]) > "$target";;[Aa])# append new content marker hereecho "-- `whoami`@`hostname` -- `date` --" >> "$target"esac(# sed: thx to Erkan Yanar <erkan.yanar@t-online.de>, see# message ID <lduohb.v74.ln@510002093148-0001.dialin.t-online.de>srttool -s -i "$source"# only if srttool is not availableif [ $? -gt 1 ]; thensed -n 's,^\(\([^/]*\)\(/\)\(.*\.pgm\.txt\)\)$,/^\2\\\3\4$/ { \r \1 \d \},gp' "$source" | sed -f - "$source"fi) >> "$target"if [ $? -eq 0 ]; thenecho "${extd}... done.$norm"elseecho "${extd}... failed.$norm"exit $ECANTCOMPILEfiecho -n "${extd}Cleaning up${id}-*.pgm.txt files ... $norm"rm ./"${id}"-*.pgm.txtif [ $? -eq 0 ]; thenecho "${extd}done.$norm"elseecho "${extd}failed.$norm"result=$ECANTCLEANUPfiecho -n " ${extd}Subtitle index file '${id}-.srtx' ... $norm"rm ./"${id}-.srtx" # "$target.srtx"if [ $? -eq 0 ]; thenecho "${extd}done.$norm"elseecho "${extd}failed.$norm"result=$ECANTCLEANUPfi[ $keep -eq 0 -a -f "./$stream_file" ] &&{echo -n " ${extd}Subtitle stream file '${stream_file}' ... $norm"rm ./"$stream_file"if [ $? -eq 0 ]; thenecho "${extd}done.$norm"elseecho "${extd}failed.$norm"result=$ECANTCLEANUPfi}exit $result}