Subversion Repositories LCARS

Rev

Rev 20 | Blame | Compare with Previous | Last modification | View Log | RSS feed

1
#!/bin/sh
appname="${0##*/}"
ver="0.6.1.2007011316"
copy="2005-2007"
mail_feedback="dvd@PointedEars.de"
# ----------------------------------------------------------------------------
# DVD Subtitles 0.6.1 -- Extracts subtitles from DVD-Video data to a text file
# Copyright (C) 2005, 2006  Thomas Lahn <PointedEars@gmx.de>
#
#     This program is free software; you can redistribute it and/or modify it
#     under the terms of the GNU General Public License (GPL) as published
#     by the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU GPL along with this
#     program; if not, write to the Free Software Foundation, Inc.,
#     51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
#
## Standard shell script disclaimer blurb thing:
##
## This script is a hack.  It's brute force.  It's horrible.
## It doesn't use Artificial Intelligence.  It doesn't use Virtual Reality.
## It's not perl.  It's not python.  It probably won't work unchanged on
## the "other" thousands of unices.  But it worksforme.  --ramiro
# (from /usr/local/mozilla/run-mozilla.sh)
#
#     This is work in progress.  If you have an improvement, patch,
#     idea, whatever, on how to make this script better, please
#     send it to <dvd@PointedEars.de>

_title ()
{
  echo "\
${extd}DVD Subtitles $ver
Copyright (C) $copy  Thomas Lahn <$mail_feedback>$norm
Distributed under the terms of the GNU General Public License (GPL), see
COPYING file or http://www.gnu.org/licenses/licenses.html#GPL for details.
"

}

_help ()
{
  echo "\
Extracts a subtitle stream from DVD-Video data and converts it to a text file.

$extd$appname$norm [options] ${ital}SOURCE$norm

Deprecated:
  $extd$appname$norm [$extd-hVkl$norm] [${ital}SOURCE$norm [${ital}TITLE$norm\
[${ital}SUBTITLE$norm [${ital}TARGET$norm [${ital}GREY_LEVELS$norm]]]]]
         
Any option argument is overwritten by the respective additional program
argument.  Options may be given in any order, and are also considered
options if located after the first program argument.  Too many program
arguments are silently ignored.

$extd-c$norm, $extd--compile$norm ${ital}FILE$norm | ${ital}DIRECTORY$norm
             Compile subtitle text files according to subtitle index\
${ital}FILE$norm
             or to subtitle index files in ${ital}DIRECTORY$norm to\
${ital}TARGET$norm.  Clean up
             if this is successful, then exit.

$extd-k$norm, $extd--keep$norm   Keep subtitle stream file even if\
conversion is successful.

$extd-l$norm, $extd--list$norm   List subtitles for TITLE using\
${extd}mplayer$norm(1) and exit.
               If TITLE is not provided or \`$extd-$norm', list subtitles for
               title #2 (as title #1 may be an intro without subtitles)
               and exit.

SOURCE       Video DVD data source, i.e. a device (usually /dev/dvd),
               a directory (e.g. one containing content created via
               ${extd}dvdbackup$norm(1)) or a Video DVD image file.
               If \`
$extd-$norm', a previously created subtitle stream file\
named
               ${extd}subtitle_stream-$norm*$extd-${norm}TITLE${extd}-${norm}SUBTITLE\
in the current working
               directory will be used for only the stream-to-graphics-
               to-text conversion instead.  Both TITLE and SUBTITLE
               must not be \`$extd-$norm' in that case.
               The default is \`
$extd/dev/dvd$norm'.

$extd-t$norm, $extd--title$norm
  TITLE        Number of the title (1-n) which will be accessed for
               subtitle stream extraction.  If left out or \`$extd-$norm', the
               program uses ${extd}mplayer$norm(1) to detect how many titles are
               available on the DVD data source and asks for the
               title to be accessed.

$extd-s$norm, $extd--subtitle-id$norm
  SUBTITLE     ID of the subtitle stream to be extracted (0-n).
               If left out or \`
$extd-$norm', the program uses its\
${extd}-l$norm option to detect
               which subtitles are available for the given TITLE and asks
               for the ID to be used.

$extd-o$norm, $extd--output-target$norm
  TARGET       Name of the resulting subtitles text file.
               If not provided, the file is named after the subtitle stream
               file.  NOTE: Unlike previous versions, this version appends
               the filename suffix $extd.srt$norm automagically ONLY in that\
case.
               
$extd-g$norm, $extd--grey-levels$norm
  GREY_LEVELS  Optional grey-levels value\
(\`c0$extd,${norm}c1$extd,${norm}c2$extd,${norm}c3' with 0 <= cN <= 255,
               where 0 is black and 255 is white) to be used for converting
               the subtitle stream graphics to text via OCR.  The default is
               \`
${extd}255,255,0,255$norm'.  Unfortunately, the\
${extd}subtitle2pgm$norm program,
               which requires this value, appears to be poorly documented;
               if you find a more detailed, working documentation, please
               refer to it and inform this program's author about it.

$extd-h$norm, $extd--help$norm     Display this help and exit.
$extd-v$norm, $extd--verbose$norm  Be verbose.  The number of ${extd}-v$norm\
options specify the level of
                 verbosity.
$extd-V$norm, $extd--version$norm  Display version information and exit.
               
${extd}EXIT STATUS$norm
  ${extd}  0$norm  Successful program execution
  ${extd}  1$norm  Error detecting/extracting subtitle stream, or cancelled
         without selecting a title number or subtitle ID
  ${extd}  2$norm  Unable to convert subtitle stream to image files
  ${extd}  3$norm  Cancelled due to ${extd}gocr$norm(1) error or without\
entering another
         grey-levels value
  ${extd}  4$norm  Unable to compile to text file
  ${extd}  5$norm  Unable to clean up
  ${extd}127$norm  Insufficient number of arguments / help was displayed
 
See the $extd$appname$norm(1) manpage for complete documentation."

}
                                                     
if test -z "$LINES" -o -z "$COLUMNS" ; then
    eval `stty size 2>/dev/null | (read L C; \
    echo LINES=${L:-24} COLUMNS=${C:-80})`
fi
test $LINES   -eq 0 && LINES=24
test $COLUMNS -eq 0 && COLUMNS=80

if test "$TERM" != "raw" && stty size >/dev/null 2>&1 ; then
# esc=`echo -en "\033"`
# extd="${esc}[1m"
  extd=`tput bold 2>/dev/null`
  ital=`tput sitm 2>/dev/null`
# norm=`echo -en "${esc}[m\017"`
  norm=`tput sgr0 2>/dev/null`
else
  esc=""
  extd=""
  norm=""
fi

# Note that we use `"$@"' to let each command-line parameter expand to a
# separate word. The quotes around `$@' are essential!
# We need `tmp' as the `eval set --' would nuke the return value of getopt.

[ "$1" = "-vv" ] && echo "$extd
Debug output for POSIX conform command-line parsing

Original arguments: $*"
>&2
if `getopt -T >/dev/null 2>&1` ; [ $? = 4 ] ; then
  getopt_type=long
  [ "$1" = "-vv" ] && echo "getopt(1) type:     enhanced" >&2
  tmp=`getopt -o c:klg:o:S::s:t:hVv \
              -l compile:,keep-stream,list,grey-levels:,output-target:\
,spell-check,subtitle-id:,title:,help,verbose,version \
              -n "$appname" -s sh \
              -- "$@"`
else
  getopt_type=short
  [ "$1" = "-vv" ] && echo "getopt(1) type:     old" >&2
  tmp=`getopt c:klg:o:S:s:t:hVv "$@"`
fi

# exit status
ESUCCESS=0
ECANTEXTRACT=1
ECANTCONVERT=2
EOCRERROR=3
ECANTCOMPILE=4
ECANTCLEANUP=5
EARGERROR=127

getopt_exit_code=$?
help=0
verbose=0
version=0
list=0
source='/dev/dvd'
title='-'
sid='-'
target='-'
args=''
keep=0
compile=0
if [ $getopt_exit_code -eq 0 ]; then
##     getopt  returns  error  code 0 for successful parsing, 1 if
##     getopt(3) returns errors, 2 if it does not understand  its
##     own parameters, 3 if an internal error occurs like out-of-
##     memory, and 4 if it is called with -T.
#
# Note the quotes around `$tmp': they are essential!
#  echo $tmp
# remove "--"
#  for i in $tmp; do if [ "$i" != "--" ]; then tmp2="${tmp2} $i"; fi; done
  eval set -- "$tmp"
  [ "$1" = "-vv" ] && echo "New arguments:      $*$norm
"
>&2
  while true ; do
    case "$1" in
      -h | --help)
        help=1
        shift;;
       
      -v | --verbose)
        let verbose++
        shift;;
       
      -V | --version)
        version=1
        shift;;
     
      -c | --compile)
        compile=1
        source=$2
        shift 2;;
     
      -k | --keep)
        keep=1
        shift;;
       
      -l | --list)
        list=1
        shift;;
       
      -g | --grey-levels)
        grey_levels=$2
        shift 2;;
       
      -o | --output-target)
        target=$2
        shift 2;;
       
      -s | --subtitle-id)
        sid=$2
        shift 2;;
       
      -t | --title)
        title=$2
        shift 2;;
       
      --)
        shift
        break;;
    esac
  done
  [ -n "$*" ] && args=$args" $*"
  set -- $args
else
  [ $verbose -gt 1 ] && echo "getopt exited: $getopt_exit_code
  "
>&2
  if [ $getopt_exit_code -eq 1 -o $getopt_exit_code -eq 2 ]; then
    help=1
  else
    exit $getopt_exit_code
  fi
fi

[ $list -eq 0 ] && _title
[ $version -eq 1 ] && exit $ESUCCESS
[ $help -eq 1 ] &&
{
  _help "$0"
  exit $EARGERROR
}

result=$ESUCCESS
[ $compile -eq 0 ] &&
{
  [ -n "$1"                      ] && source=$1
  [ -z "$title"       -a -n "$2" ] && title=$2
  [ -z "$sid"         -a -n "$3" ] && sid=$3
  [ -z "$target"      -a -n "$4" ] && target=$4
  [ -z "$grey_levels" -a -n "$5" ] && grey_levels=$5
 
  getsubtitles ()
  {
    mplayer -dvd-device "$1" -vo null -ao null -frames 0 \
            -v "dvd://${2:-2}" 2>&1 |
      sed -n '/sid/ s/^[^:]\{1,\}:[[:space:]]//p'
  # echo "$subtitles"
  }
 
  [ $list -eq 1 ] &&
  {
    # first title may be only an intro
    [ "$title" = '-' ] && title=2
    tmp=`getsubtitles $source $title`
    if [ -n "$tmp" ]; then
      echo "${extd}The following subtitles are available for title #$title:$norm
$tmp"

      exit $ESUCCESS
    else
      exit $ECANTEXTRACT
    fi
  }

  [ "$sid" != '-' -a "$title" != '-' ] &&
  {
    subtitles=`getsubtitles $source $title`
    [ -n "$subtitles" ] &&
      st_descr=`echo "$subtitles" | grep "^$sid[[:space:]]" |
                  cut -f 2- -d ' '`
  }
 
  if [ "$source" = '-' ]; then
    stream_file=`ls subtitle_stream-*-$title-$sid 2>/dev/null | head -n 1`
    if [ $? -eq 0 ]; then
      read -r -s -p "Use '$stream_file' [Y/n]? " -n 1
      case $REPLY in
        [Nn])
          echo $REPLY
          exit 1;;
        *)
          echo Y
      esac
      echo
   
      id=${stream_file#*-}
    else
      echo "$appname: No such file: subtitle_stream-*-$title-$sid" >&2
      exit $ECANTEXTRACT
    fi
  else
    read_error ()
    {
      case $1 in
        0) subject='titles available on this DVD source.';;
        *) subject='subtitles available for this title.';;
      esac

      echo >&2 "\
Sorry, there are no $subject
Please verify that the DVD data source is available and
that its filesystem is consistent."


      unset subject
      exit $ECANTEXTRACT
    }
   
    if [ "$title" = '-' ]; then
      titles=`mplayer -dvd-device "$source" -vo null -ao null -frames 0 \
                      -v dvd:// 2>&1 | egrep '[0-9]+ titles'`
      num_titles=`echo "$titles" | awk '{print $3}'`
 
      [ $(($num_titles)) -lt 1 ] && read_error 0
 
      echo "$titles"
      while true
      do
        read -r -p "\
${extd}Enter title# (1-$num_titles), or nothing to abort: $norm"

        if [ -n "$REPLY" ]; then
          title=$(($REPLY))
          [ $title -ge 1 -a $title -le $num_titles ] && break
        else
          exit $ECANTEXTRACT
        fi
      done
      echo
    fi
 
    if [ "$sid" = '-' ]; then
      if [ -z "$subtitles" ]; then subtitles=`"$0" -lt "$title" "$source"`; fi
      if [ $? -eq 0 ]; then
        sid_max=`echo "$subtitles" | tail -n 1 | awk '{print $1}'`      
        echo "$subtitles"
        while true; do
          read -r -p "\
${extd}Enter ID of subtitle stream (0-$sid_max) to extract, or nothing to abort: $norm"

          if [ -n "$REPLY" ]; then
            sid=$(($REPLY))
            [ $sid -ge 0 -a $sid -lt $sid_max ] && break
          else
            exit $ECANTEXTRACT
          fi
        done
      else
        read_error 1
      fi
    fi
 
    # remove trailing /
    source=${source%/}
 
    vol=`(echo $(volname $source 2>/dev/null); exit $?;) || echo ${source##*/}`
    id=$vol-$title-$sid
    stream_file=subtitle_stream-$id
   
    unset REPLY
    [ -f "$stream_file" ] &&
    {
      read -r -s -p "${extd}Use existing '$stream_file' [Y/n]? $norm" -n 1
      case $REPLY in
        [Nn])
          echo $REPLY
          read -r -s -p "${extd}Overwrite existing '$stream_file' [y/N]? $norm"\
               -n 1 REPLY2
          case $REPLY2 in
            [Yy]) echo $REPLY2;;
            *)
              echo N
              id=$vol-$title-$sid-$$
              stream_file=subtitle_stream-$id
              echo "Using '$stream_file'"
          esac
          echo
          unset REPLY2;;
        *)
          echo Y
          REPLY='y'
      esac
      echo
    }
   
    [ "$target" = '-' ] && target="$stream_file.srt"
         
    [ -z "$REPLY" -o "$REPLY" == 'N' -o "$REPLY" == 'n' ] &&
    {
      echo "\
${extd}Extracting subtitle stream $norm$sid${st_descr:+ ($st_descr)}$extd
of title $norm#$title$extd
on $norm$source$extd
to \"$norm$target$extd\" ...$norm
"
>&2
     
      > "$stream_file"
      tccat -i "$source" -T "$title" -L |
        tcextract -x ps1 -t vob -a 0x2$sid > "$stream_file"
    }
  fi
 
  if [ -f "$stream_file" -a -s "$stream_file" ]; then
    echo "${extd}... done.$norm"
  else
    echo "${extd}... failed.$norm"
    [ -f "$stream_file" ] && rm ./"$stream_file"
    exit $ECANTEXTRACT
  fi
 
  result=$ESUCCESS
  while true
  do
    echo -n "
${extd}Converting subtitle stream \"$norm$stream_file$extd\"
to Netpbm Portable Greymaps (PGMs) ... $norm"
| fold -s >&2
    subtitle2pgm -o ./${id}- ${grey_levels:+-c "$grey_levels"} < $stream_file
    result=$?
    if [ $result -eq 0 ]; then
      echo "${extd}done.$norm
 
${extd}Converting PGMs to text files (TXTs) using GNU OCR (gocr) ... $norm"

      st_lang=$(echo "$st_descr" | sed 's/.*language: \([a-z]\{2\}\)/\1/')
      pgm2txt ${st_lang:+-f $st_lang} ./${id}-
      result=$?
      if [ $result -eq 0 ]; then
        echo "${extd}done, using a grey-levels value of\
\`$norm${grey_levels:-255,255,0,255}$extd'.$norm"
        break
      else
        echo "${extd}failed.
 
If the conversion was cancelled due to inappropriate grey-levels value
\`
${grey_levels:-255,255,0,255}', you may try another value,\
else you should abort:
$norm"

        select grey_levels in \
            `[ "$grey_levels" != '0,255,255,255' ] && echo 0,255,255,255` \
            `[ "$grey_levels" != '255,0,255,255' ] && echo 255,0,255,255` \
            `[ -n "$grey_levels" -a "$grey_levels" != '255,255,0,255' ] &&
              echo 255,255,0,255` \
            `[ "$grey_levels" != '255,255,255,0' ] && echo 255,255,255,0` \
            Other \
            Abort
        do
          case $grey_levels in
            Other)
              read -r -p "
${extd}Enter new value (\`c0$extd,${norm}c1$extd,${norm}c2$extd,${norm}c3'\
 with 0 <= cN <= 255), or nothing to select a value:
$norm" grey_levels
              [ -n "$grey_levels" ] && break;;
             
            Abort)
              result=$EOCRERROR
              break;;
 
            *)
              break
          esac
        done
      fi
    else
      echo "${extd}failed.$norm"
      result=$ECANTCONVERT
      break
    fi
  done
 
  echo -n "${extd}Cleaning up PGMs ... $norm"
  rm ./${id}-*.pgm
  if [ $? -eq 0 ]; then
    echo "${extd}done.$norm"
  else
    echo "${extd}failed.$norm"
  fi
}

[ $result -eq $ESUCCESS ] &&
{
  if [ $compile -eq 1 ]; then
    try_file ()
    {
      [ ! -f "$source" ] && source="$source.srtx"
      [ -f "$source" ]
    }
 
    if [ -d "$source" ]; then
# TODO: loop through all .srtx files in the directory
      if [ "$source" != '.' ]; then cd "$source"; fi
      [ $? -eq 0 ] &&
      {
        ls "$source"/*.srtx 2>/dev/null
      }
      echo >&2 "$appname: $source: Directory compile is not yet supported."
      exit $ECANTCOMPILE
    elif try_file; then
      d=${source%/*}
      if [ -a -d "$d" -a "$d" != '.' ]; then cd "$d"; fi
      id=${source%-*}
      stream_file=subtitle_stream-$id
      source="./${source##*/}"
    else
      echo >&2 "$appname: $source: No such file or directory."
      exit $ECANTCOMPILE
    fi
  else
    source="${id}-.srtx"
  fi
 
  unset REPLY
  [ -f "$target" ] &&
  {
# TODO: allow for diff
    read -r -s -p "${extd}'$target' exists.
Append, overwrite, create new file, or abort [a/o/n/Esc]? $norm" \
         -n 1
    case $REPLY in
      [Oo]) echo $REPLY;;
      [Aa]) echo $REPLY;;
      [Nn])
        echo $REPLY
        target="${target%.*}-$$.${target##*.}";;
      *)
        REPLY=Abort
        echo $REPLY
        exit $ECANTCOMPILE;;
    esac
  }
 
  echo "
${extd}Compiling TXTs into \"$norm$target$extd\" ... $norm"

  case "$REPLY" in
    [Oo]) > "$target";;
    [Aa])
      # append new content marker here
      echo "
-- `
whoami`@`hostname` -- `date` --
"
>> "$target"
  esac

  (
# sed: thx to Erkan Yanar <erkan.yanar@t-online.de>, see
# message ID <lduohb.v74.ln@510002093148-0001.dialin.t-online.de>
    srttool -s -i "$source"

    # only if srttool is not available
    if [ $? -gt 1 ]; then
      sed -n 's,^\(\([^/]*\)\(/\)\(.*\.pgm\.txt\)\)$,/^\2\\\3\4$/ { \
          r \1 \
          d \
        },gp'
"$source" | sed -f - "$source"
    fi
  ) >> "$target"

  if [ $? -eq 0 ]; then
    echo "${extd}... done.$norm"
  else
    echo "${extd}... failed.$norm"
    exit $ECANTCOMPILE
  fi

  echo -n "${extd}Cleaning up
  ${id}-*.pgm.txt files ... $norm"

  rm ./"${id}"-*.pgm.txt
  if [ $? -eq 0 ]; then
    echo "${extd}done.$norm"
  else
    echo "${extd}failed.$norm"
    result=$ECANTCLEANUP
  fi
 
  echo -n "  ${extd}Subtitle index file '${id}-.srtx' ... $norm"
  rm ./"${id}-.srtx" # "$target.srtx"
  if [ $? -eq 0 ]; then
    echo "${extd}done.$norm"
  else
    echo "${extd}failed.$norm"
    result=$ECANTCLEANUP
  fi
   
  [ $keep -eq 0 -a -f "./$stream_file" ] &&
  {
    echo -n "  ${extd}Subtitle stream file '${stream_file}' ... $norm"
    rm ./"$stream_file"
   
    if [ $? -eq 0 ]; then
      echo "${extd}done.$norm"
    else
      echo "${extd}failed.$norm"
      result=$ECANTCLEANUP
    fi
  }

  exit $result
}