Subversion Repositories LCARS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
20 PointedEar 1
#!/bin/sh
2
appname="${0##*/}"
3
ver="0.6.1.2006031907"
4
copy="2005, 2006"
5
mail_feedback="dvd@PointedEars.de"
6
# ----------------------------------------------------------------------------
7
# DVD Subtitles 0.6.1 -- Extracts subtitles from Video DVD data to a text file
8
# Copyright (C) 2005, 2006  Thomas Lahn <PointedEars@gmx.de>
9
#
10
#     This program is free software; you can redistribute it and/or modify it
11
#     under the terms of the GNU General Public License (GPL) as published
12
#     by the Free Software Foundation; either version 2 of the License, or
13
#     (at your option) any later version.
14
#
15
#     This program is distributed in the hope that it will be useful,
16
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
17
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
#     GNU General Public License for more details.
19
#
20
#     You should have received a copy of the GNU GPL along with this
21
#     program; if not, write to the Free Software Foundation, Inc.,
22
#     51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
23
#
24
## Standard shell script disclaimer blurb thing:
25
##
26
## This script is a hack.  It's brute force.  It's horrible.
27
## It doesn't use Artificial Intelligence.  It doesn't use Virtual Reality.
28
## It's not perl.  It's not python.  It probably won't work unchanged on
29
## the "other" thousands of unices.  But it worksforme.  --ramiro
30
# (from /usr/local/mozilla/run-mozilla.sh)
31
#
32
#     This is work in progress.  If you have an improvement, patch,
33
#     idea, whatever, on how to make this script better, please
34
#     send it to <dvd@PointedEars.de>
35
 
36
_title ()
37
{
38
  echo "\
39
${extd}DVD Subtitles $ver
40
Copyright (C) $copy  Thomas Lahn <$mail_feedback>$norm
41
Distributed under the terms of the GNU General Public License (GPL), see
42
COPYING file or http://www.gnu.org/licenses/licenses.html#GPL for details.
43
"
44
}
45
 
46
_help ()
47
{
48
  echo "\
49
Extracts a subtitle stream from Video DVD data and converts it to a text file.
50
 
51
$extd$appname$norm [options] ${ital}SOURCE$norm
52
$extd$appname$norm [$extd-hVkl$norm] [${ital}SOURCE$norm [${ital}TITLE$norm\
53
 [${ital}SUBTITLE$norm [${ital}TARGET$norm [${ital}GREY_LEVELS$norm]]]]]
54
 
55
Any option argument is overwritten by the respective additional program
56
argument.  Options may be given in any order, and are also considered
57
options if located after the first program argument.  Too many program
58
arguments are silently ignored.
59
 
60
$extd-c$norm, $extd--compile$norm ${ital}FILE$norm | ${ital}DIRECTORY$norm
61
             Compile subtitle text files according to subtitle index\
62
 ${ital}FILE$norm
63
             or to subtitle index files in ${ital}DIRECTORY$norm to\
64
 ${ital}TARGET$norm.  Clean up if this is successful, then exit.
65
 
66
$extd-k$norm, $extd--keep$norm   Keep subtitle stream file even if\
67
 conversion is successful.
68
 
69
$extd-l$norm, $extd--list$norm   List subtitles for TITLE using\
70
 ${extd}mplayer$norm(1) and exit.
71
               If TITLE is not provided or \`$extd-$norm', list subtitles for
72
               title #2 (as title #1 may be an intro without subtitles)
73
               and exit.
74
 
75
SOURCE       Video DVD data source, i.e. a device (usually /dev/dvd),
76
               a directory (e.g. one containing content created via
77
               ${extd}dvdbackup$norm(1)) or a Video DVD image file.
78
               If \`$extd-$norm', a previously created subtitle stream file\
79
 named
80
               ${extd}subtitle_stream-$norm*$extd-${norm}TITLE${extd}-${norm}SUBTITLE\
81
 in the current working
82
               directory will be used for only the stream-to-graphics-
83
               to-text conversion instead.  Both TITLE and SUBTITLE
84
               must not be \`$extd-$norm' in that case.
85
               The default is \`$extd/dev/dvd$norm'.
86
 
87
$extd-t$norm, $extd--title$norm
88
  TITLE        Number of the title (1-n) which will be accessed for
89
               subtitle stream extraction.  If left out or \`$extd-$norm', the
90
               program uses ${extd}mplayer$norm(1) to detect how many titles are
91
               available on the DVD data source and asks for the
92
               title to be accessed.
93
 
94
$extd-s$norm, $extd--subtitle-id$norm
95
  SUBTITLE     ID of the subtitle stream to be extracted (0-n).
96
               If left out or \`$extd-$norm', the program uses its\
97
 ${extd}-l$norm option to detect
98
               which subtitles are available for the given TITLE and asks
99
               for the ID to be used.
100
 
101
$extd-o$norm, $extd--output-target$norm
102
  TARGET       Name of the resulting subtitles text file.
103
               If not provided, the file is named after the subtitle stream
104
               file.  NOTE: Unlike previous versions, this version appends
105
               the filename suffix $extd.srt$norm automagically ONLY in that\
106
 case.
107
 
108
$extd-g$norm, $extd--grey-levels$norm
109
  GREY_LEVELS  Optional grey-levels value\
110
 (\`c0$extd,${norm}c1$extd,${norm}c2$extd,${norm}c3' with 0 <= cN <= 255,
111
               where 0 is black and 255 is white) to be used for converting
112
               the subtitle stream graphics to text via OCR.  The default is
113
               \`${extd}255,255,0,255$norm'.  Unfortunately, the\
114
 ${extd}subtitle2pgm$norm program,
115
               which requires this value, appears to be poorly documented;
116
               if you find a more detailed, working documentation, please
117
               refer to it and inform this program's author about it.
118
 
119
$extd-h$norm, $extd--help$norm     Display this help and exit.
120
$extd-v$norm, $extd--verbose$norm  Be verbose.  The number of ${extd}-v$norm\
121
 options specify the level of
122
                 verbosity.
123
$extd-V$norm, $extd--version$norm  Display version information and exit.
124
 
125
${extd}EXIT STATUS$norm
126
  ${extd}  0$norm  Successful program execution
127
  ${extd}  1$norm  Error detecting/extracting subtitle stream, or cancelled
128
         without selecting a title number or subtitle ID
129
  ${extd}  2$norm  Unable to convert subtitle stream to image files
130
  ${extd}  3$norm  Cancelled due to ${extd}gocr$norm(1) error or without\
131
 entering another
132
         grey-levels value
133
  ${extd}  4$norm  Unable to compile to text file
134
  ${extd}  5$norm  Unable to clean up
135
  ${extd}127$norm  Insufficient number of arguments / help was displayed
136
 
137
See the $extd$appname$norm(1) manpage for complete documentation."
138
}
139
 
140
if test -z "$LINES" -o -z "$COLUMNS" ; then
141
    eval `stty size 2>/dev/null | (read L C; \
142
    echo LINES=${L:-24} COLUMNS=${C:-80})`
143
fi
144
test $LINES   -eq 0 && LINES=24
145
test $COLUMNS -eq 0 && COLUMNS=80
146
 
147
if test "$TERM" != "raw" && stty size >/dev/null 2>&1 ; then
148
# esc=`echo -en "\033"`
149
# extd="${esc}[1m"
150
  extd=`tput bold 2>/dev/null`
151
  ital=`tput sitm 2>/dev/null`
152
# norm=`echo -en "${esc}[m\017"`
153
  norm=`tput sgr0 2>/dev/null`
154
else
155
  esc=""
156
  extd=""
157
  norm=""
158
fi
159
 
160
# Note that we use `"$@"' to let each command-line parameter expand to a
161
# separate word. The quotes around `$@' are essential!
162
# We need `tmp' as the `eval set --' would nuke the return value of getopt.
163
 
164
[ "$1" = "-vv" ] && echo "$extd
165
Debug output for POSIX conform command-line parsing
166
 
167
Original arguments: $*" >&2
168
if `getopt -T >/dev/null 2>&1` ; [ $? = 4 ] ; then
169
  getopt_type=long
170
  [ "$1" = "-vv" ] && echo "getopt(1) type:     enhanced" >&2
171
  tmp=`getopt -o c:klg:o:S::s:t:hVv \
172
              -l compile:,keep-stream,list,grey-levels:,output-target:\
173
,spell-check,subtitle-id:,title:,help,verbose,version \
174
              -n "$appname" -s sh \
175
              -- "$@"`
176
else
177
  getopt_type=short
178
  [ "$1" = "-vv" ] && echo "getopt(1) type:     old" >&2
179
  tmp=`getopt c:klg:o:S:s:t:hVv "$@"`
180
fi
181
 
182
# exit status
183
ESUCCESS=0
184
ECANTEXTRACT=1
185
ECANTCONVERT=2
186
EOCRERROR=3
187
ECANTCOMPILE=4
188
ECANTCLEANUP=5
189
EARGERROR=127
190
 
191
getopt_exit_code=$?
192
help=0
193
verbose=0
194
version=0
195
list=0
196
source='/dev/dvd'
197
title='-'
198
sid='-'
199
target='-'
200
args=''
201
keep=0
202
compile=0
203
if [ $getopt_exit_code -eq 0 ]; then
204
##     getopt  returns  error  code 0 for successful parsing, 1 if
205
##     getopt(3) returns errors, 2 if it does not understand  its
206
##     own parameters, 3 if an internal error occurs like out-of-
207
##     memory, and 4 if it is called with -T.
208
#
209
# Note the quotes around `$tmp': they are essential!
210
#  echo $tmp
211
# remove "--"
212
#  for i in $tmp; do if [ "$i" != "--" ]; then tmp2="${tmp2} $i"; fi; done
213
  eval set -- "$tmp"
214
  [ "$1" = "-vv" ] && echo "New arguments:      $*$norm
215
" >&2
216
  while true ; do
217
    case "$1" in
218
      -h | --help)
219
        help=1
220
        shift;;
221
 
222
      -v | --verbose)
223
        let verbose++
224
        shift;;
225
 
226
      -V | --version)
227
        version=1
228
        shift;;
229
 
230
      -c | --compile)
231
        compile=1
232
        source=$2
233
        shift 2;;
234
 
235
      -k | --keep)
236
        keep=1
237
        shift;;
238
 
239
      -l | --list)
240
        list=1
241
        shift;;
242
 
243
      -g | --grey-levels)
244
        grey_levels=$2
245
        shift 2;;
246
 
247
      -o | --output-target)
248
        target=$2
249
        shift 2;;
250
 
251
      -s | --subtitle-id)
252
        sid=$2
253
        shift 2;;
254
 
255
      -t | --title)
256
        title=$2
257
        shift 2;;
258
 
259
      --)
260
        shift
261
        break;;
262
    esac
263
  done
264
  [ -n "$*" ] && args=$args" $*"
265
  set -- $args
266
else
267
  [ $verbose -gt 1 ] && echo "getopt exited: $getopt_exit_code
268
  " >&2
269
  if [ $getopt_exit_code -eq 1 -o $getopt_exit_code -eq 2 ]; then
270
    help=1
271
  else
272
    exit $getopt_exit_code
273
  fi
274
fi
275
 
276
[ $list -eq 0 ] && _title
277
[ $version -eq 1 ] && exit $ESUCCESS
278
[ $help -eq 1 ] &&
279
{
280
  _help "$0"
281
  exit $EARGERROR
282
}
283
 
284
result=$ESUCCESS
285
[ $compile -eq 0 ] &&
286
{
287
  [ -n "$1"                      ] && source=$1
288
  [ -z "$title"       -a -n "$2" ] && title=$2
289
  [ -z "$sid"         -a -n "$3" ] && sid=$3
290
  [ -z "$target"      -a -n "$4" ] && target=$4
291
  [ -z "$grey_levels" -a -n "$5" ] && grey_levels=$5
292
 
293
  getsubtitles ()
294
  {
295
    mplayer -dvd-device "$1" -vo null -ao null -frames 0 \
296
            -v "dvd://${2:-2}" 2>&1 |
297
      sed -n '/sid/ s/^[^:]\{1,\}:[[:space:]]//p'
298
  # echo "$subtitles"
299
  }
300
 
301
  [ $list -eq 1 ] &&
302
  {
303
    # first title may be only an intro
304
    [ "$title" = '-' ] && title=2
305
    tmp=`getsubtitles $source $title`
306
    if [ -n "$tmp" ]; then
307
      echo "${extd}The following subtitles are available for title #$title:$norm
308
$tmp"
309
      exit $ESUCCESS
310
    else
311
      exit $ECANTEXTRACT
312
    fi
313
  }
314
 
315
  [ "$sid" != '-' -a "$title" != '-' ] &&
316
  {
317
    subtitles=`getsubtitles $source $title`
318
    [ -n "$subtitles" ] &&
319
      st_descr=`echo "$subtitles" | grep "^$sid[[:space:]]" |
320
                  cut -f 2- -d ' '`
321
  }
322
 
323
  if [ "$source" = '-' ]; then
324
    stream_file=`ls subtitle_stream-*-$title-$sid 2>/dev/null | head -n 1`
325
    if [ $? -eq 0 ]; then
326
      read -r -s -p "Use '$stream_file' [Y/n]? " -n 1
327
      case $REPLY in
328
        [Nn])
329
          echo $REPLY
330
          exit 1;;
331
        *)
332
          echo Y
333
      esac
334
      echo
335
 
336
      id=${stream_file#*-}
337
    else
338
      echo "$appname: No such file: subtitle_stream-*-$title-$sid" >&2
339
      exit $ECANTEXTRACT
340
    fi
341
  else
342
    read_error ()
343
    {
344
      case $1 in
345
        0) subject='titles available on this DVD source.';;
346
        *) subject='subtitles available for this title.';;
347
      esac
348
 
349
      echo >&2 "\
350
Sorry, there are no $subject
351
Please verify that the DVD data source is available and
352
that its filesystem is consistent."
353
 
354
      unset subject
355
      exit $ECANTEXTRACT
356
    }
357
 
358
    if [ "$title" = '-' ]; then
359
      titles=`mplayer -dvd-device "$source" -vo null -ao null -frames 0 \
360
                      -v dvd:// 2>&1 | egrep '[0-9]+ titles'`
361
      num_titles=`echo "$titles" | awk '{print $3}'`
362
 
363
      [ $(($num_titles)) -lt 1 ] && read_error 0
364
 
365
      echo "$titles"
366
      while true
367
      do
368
        read -r -p "\
369
${extd}Enter title# (1-$num_titles), or nothing to abort: $norm"
370
        if [ -n "$REPLY" ]; then
371
          title=$(($REPLY))
372
          [ $title -ge 1 -a $title -le $num_titles ] && break
373
        else
374
          exit $ECANTEXTRACT
375
        fi
376
      done
377
      echo
378
    fi
379
 
380
    if [ "$sid" = '-' ]; then
381
      if [ -z "$subtitles" ]; then subtitles=`"$0" -lt "$title" "$source"`; fi
382
      if [ $? -eq 0 ]; then
383
        sid_max=`echo "$subtitles" | tail -n 1 | awk '{print $1}'`
384
        echo "$subtitles"
385
        while true; do
386
          read -r -p "\
387
${extd}Enter ID of subtitle stream (0-$sid_max) to extract, or nothing to abort: $norm"
388
          if [ -n "$REPLY" ]; then
389
            sid=$(($REPLY))
390
            [ $sid -ge 0 -a $sid -lt $sid_max ] && break
391
          else
392
            exit $ECANTEXTRACT
393
          fi
394
        done
395
      else
396
        read_error 1
397
      fi
398
    fi
399
 
400
    # remove trailing /
401
    source=${source%/}
402
 
403
    vol=`(echo $(volname $source 2>/dev/null); exit $?;) || echo ${source##*/}`
404
    id=$vol-$title-$sid
405
    stream_file=subtitle_stream-$id
406
 
407
    unset REPLY
408
    [ -f "$stream_file" ] &&
409
    {
410
      read -r -s -p "${extd}Use existing '$stream_file' [Y/n]? $norm" -n 1
411
      case $REPLY in
412
        [Nn])
413
          echo $REPLY
414
          read -r -s -p "${extd}Overwrite existing '$stream_file' [y/N]? $norm"\
415
               -n 1 REPLY2
416
          case $REPLY2 in
417
            [Yy]) echo $REPLY2;;
418
            *)
419
              echo N
420
              id=$vol-$title-$sid-$$
421
              stream_file=subtitle_stream-$id
422
              echo "Using '$stream_file'"
423
          esac
424
          echo
425
          unset REPLY2;;
426
        *)
427
          echo Y
428
          REPLY='y'
429
      esac
430
      echo
431
    }
432
 
433
    [ "$target" = '-' ] && target="$stream_file.srt"
434
 
435
    [ -z "$REPLY" -o "$REPLY" == 'N' -o "$REPLY" == 'n' ] &&
436
    {
437
      echo "\
438
${extd}Extracting subtitle stream $norm$sid${st_descr:+ ($st_descr)}$extd
439
of title $norm#$title$extd
440
on $norm$source$extd
441
to \"$norm$target$extd\" ...$norm
442
" >&2
443
 
444
      > "$stream_file"
445
      tccat -i "$source" -T "$title" -L |
446
        tcextract -x ps1 -t vob -a 0x2$sid > "$stream_file"
447
    }
448
  fi
449
 
450
  if [ -f "$stream_file" -a -s "$stream_file" ]; then
451
    echo "${extd}... done.$norm"
452
  else
453
    echo "${extd}... failed.$norm"
454
    [ -f "$stream_file" ] && rm ./"$stream_file"
455
    exit $ECANTEXTRACT
456
  fi
457
 
458
  result=$ESUCCESS
459
  while true
460
  do
461
    echo -n "
462
${extd}Converting subtitle stream \"$norm$stream_file$extd\"
463
to Netpbm Portable Greymaps (PGMs) ... $norm" | fold -s >&2
464
    subtitle2pgm -o ./${id}- ${grey_levels:+-c "$grey_levels"} < $stream_file
465
    result=$?
466
    if [ $result -eq 0 ]; then
467
      echo "${extd}done.$norm
468
 
469
${extd}Converting PGMs to text files (TXTs) using GNU OCR (gocr) ... $norm"
470
      st_lang=$(echo "$st_descr" | sed 's/.*language: \([a-z]\{2\}\)/\1/')
471
      pgm2txt ${st_lang:+-f $st_lang} ./${id}-
472
      result=$?
473
      if [ $result -eq 0 ]; then
474
        echo "${extd}done, using a grey-levels value of\
475
 \`$norm${grey_levels:-255,255,0,255}$extd'.$norm"
476
        break
477
      else
478
        echo "${extd}failed.
479
 
480
If the conversion was cancelled due to inappropriate grey-levels value
481
\`${grey_levels:-255,255,0,255}', you may try another value,\
482
 else you should abort:
483
$norm"
484
        select grey_levels in \
485
            `[ "$grey_levels" != '0,255,255,255' ] && echo 0,255,255,255` \
486
            `[ "$grey_levels" != '255,0,255,255' ] && echo 255,0,255,255` \
487
            `[ -n "$grey_levels" -a "$grey_levels" != '255,255,0,255' ] &&
488
              echo 255,255,0,255` \
489
            `[ "$grey_levels" != '255,255,255,0' ] && echo 255,255,255,0` \
490
            Other \
491
            Abort
492
        do
493
          case $grey_levels in
494
            Other)
495
              read -r -p "
496
${extd}Enter new value (\`c0$extd,${norm}c1$extd,${norm}c2$extd,${norm}c3'\
497
 with 0 <= cN <= 255), or nothing to select a value:
498
$norm" grey_levels
499
              [ -n "$grey_levels" ] && break;;
500
 
501
            Abort)
502
              result=$EOCRERROR
503
              break;;
504
 
505
            *)
506
              break
507
          esac
508
        done
509
      fi
510
    else
511
      echo "${extd}failed.$norm"
512
      result=$ECANTCONVERT
513
      break
514
    fi
515
  done
516
 
517
  echo -n "${extd}Cleaning up PGMs ... $norm"
518
  rm ./${id}-*.pgm
519
  if [ $? -eq 0 ]; then
520
    echo "${extd}done.$norm"
521
  else
522
    echo "${extd}failed.$norm"
523
  fi
524
}
525
 
526
[ $result -eq $ESUCCESS ] &&
527
{
528
  if [ $compile -eq 1 ]; then
529
    try_file ()
530
    {
531
      [ ! -f "$source" ] && source="$source.srtx"
532
      [ -f "$source" ]
533
    }
534
 
535
    if [ -d "$source" ]; then
536
# TODO: loop through all .srtx files in the directory
537
      if [ "$source" != '.' ]; then cd "$source"; fi
538
      [ $? -eq 0 ] &&
539
      {
540
        ls "$source"/*.srtx 2>/dev/null
541
      }
542
      echo >&2 "$appname: $source: Directory compile is not yet supported."
543
      exit $ECANTCOMPILE
544
    elif try_file; then
545
      d=${source%/*}
546
      if [ -a -d "$d" -a "$d" != '.' ]; then cd "$d"; fi
547
      id=${source%-*}
548
      stream_file=subtitle_stream-$id
549
      source="./${source##*/}"
550
    else
551
      echo >&2 "$appname: $source: No such file or directory."
552
      exit $ECANTCOMPILE
553
    fi
554
  else
555
    source="${id}-.srtx"
556
  fi
557
 
558
  unset REPLY
559
  [ -f "$target" ] &&
560
  {
561
# TODO: allow for diff
562
    read -r -s -p "${extd}'$target' exists.
563
Append, overwrite, create new file, or abort [a/o/n/Esc]? $norm" \
564
         -n 1
565
    case $REPLY in
566
      [Oo]) echo $REPLY;;
567
      [Aa]) echo $REPLY;;
568
      [Nn])
569
        echo $REPLY
570
        target="${target%.*}-$$.${target##*.}";;
571
      *)
572
        REPLY=Abort
573
        echo $REPLY
574
        exit $ECANTCOMPILE;;
575
    esac
576
  }
577
 
578
  echo "
579
${extd}Compiling TXTs into \"$norm$target$extd\" ... $norm"
580
 
581
  case "$REPLY" in
582
    [Oo]) > "$target";;
583
    [Aa])
584
      # append new content marker here
585
      echo "
586
-- `whoami`@`hostname` -- `date` --
587
" >> "$target"
588
  esac
589
 
590
  (
591
# sed: thx to Erkan Yanar <erkan.yanar@t-online.de>, see
592
# message ID <lduohb.v74.ln@510002093148-0001.dialin.t-online.de>
593
    srttool -s -i "$source"
594
 
595
    # only if srttool is not available
596
    if [ $? -gt 1 ]; then
597
      sed -n 's,^\(\([^/]*\)\(/\)\(.*\.pgm\.txt\)\)$,/^\2\\\3\4$/ { \
598
          r \1 \
599
          d \
600
        },gp' "$source" | sed -f - "$source"
601
    fi
602
  ) >> "$target"
603
 
604
  if [ $? -eq 0 ]; then
605
    echo "${extd}... done.$norm"
606
  else
607
    echo "${extd}... failed.$norm"
608
    exit $ECANTCOMPILE
609
  fi
610
 
611
  echo -n "${extd}Cleaning up
612
  ${id}-*.pgm.txt files ... $norm"
613
  rm ./"${id}"-*.pgm.txt
614
  if [ $? -eq 0 ]; then
615
    echo "${extd}done.$norm"
616
  else
617
    echo "${extd}failed.$norm"
618
    result=$ECANTCLEANUP
619
  fi
620
 
621
  echo -n "  ${extd}Subtitle index file '${id}-.srtx' ... $norm"
622
  rm ./"${id}-.srtx" # "$target.srtx"
623
  if [ $? -eq 0 ]; then
624
    echo "${extd}done.$norm"
625
  else
626
    echo "${extd}failed.$norm"
627
    result=$ECANTCLEANUP
628
  fi
629
 
630
  [ $keep -eq 0 -a -f "./$stream_file" ] &&
631
  {
632
    echo -n "  ${extd}Subtitle stream file '${stream_file}' ... $norm"
633
    rm ./"$stream_file"
634
 
635
    if [ $? -eq 0 ]; then
636
      echo "${extd}done.$norm"
637
    else
638
      echo "${extd}failed.$norm"
639
      result=$ECANTCLEANUP
640
    fi
641
  }
642
 
643
  exit $result
644
}