Rev 294 | Go to most recent revision | Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 293 | PointedEar | 1 | """ |
| 2 | Created on 2014-10-20 |
||
| 3 | |||
| 4 | @author: Thomas 'PointedEars' Lahn <mail@PointedEars.de> |
||
| 5 | |||
| 6 | """ |
||
| 7 | |||
| 8 | from os import chdir, stat |
||
| 9 | from sys import stderr |
||
| 10 | from os.path import dirname, realpath, basename |
||
| 11 | from pickle import dump, load |
||
| 12 | from re import match, DOTALL, search, sub, split, compile |
||
| 13 | from copy import deepcopy |
||
| 14 | |||
| 15 | debug_level = 2 |
||
| 16 | |||
| 17 | def dmsg(*args, **kwargs): |
||
| 18 | if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None: |
||
| 19 | kwargs['min_level'] = 1 |
||
| 20 | |||
| 21 | if not hasattr(kwargs, 'file'): |
||
| 22 | kwargs['file'] = stderr |
||
| 23 | |||
| 24 | if debug_level >= kwargs['min_level']: |
||
| 25 | del kwargs['min_level'] |
||
| 26 | print(*args, **kwargs) |
||
| 27 | |||
| 28 | def sort_dict_alnum_english_key(phrase): |
||
| 29 | return sub(r'\{(.+)\}', r'\1', phrase[0]).lower() |
||
| 30 | |||
| 31 | class Dictionary(dict): |
||
| 32 | """ |
||
| 33 | classdocs |
||
| 34 | |||
| 35 | """ |
||
| 36 | _keys = "ipa|en|lit|pos|com|tag|ex" |
||
| 37 | _expressions = {} |
||
| 38 | |||
| 39 | def load (self, dictionary_file): |
||
| 40 | dictionary = self |
||
| 41 | |||
| 42 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
||
| 43 | |||
| 44 | chdir(dirname(realpath(__file__))) |
||
| 45 | |||
| 46 | pickle_file = basename(dictionary_file) + '.pickle' |
||
| 47 | |||
| 48 | try: |
||
| 49 | pickle_mtime = stat(pickle_file).st_mtime |
||
| 50 | except FileNotFoundError: |
||
| 51 | pickle_mtime = None |
||
| 52 | |||
| 53 | if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime: |
||
| 54 | dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1) |
||
| 55 | phrase = None |
||
| 56 | key = None |
||
| 57 | value = None |
||
| 58 | with open(dictionary_file) as f: |
||
| 59 | indent = None |
||
| 60 | |||
| 61 | for line in f: |
||
| 62 | m = match(r'^\s*vuh:\s*(?P<phrase>.+)', line) |
||
| 63 | if m is not None: |
||
| 64 | phrase = m.group("phrase") |
||
| 65 | dictionary[phrase] = {} |
||
| 66 | indent = None |
||
| 67 | else: |
||
| 68 | m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line) |
||
| 69 | if m is not None: |
||
| 70 | # join previous value if necessary |
||
| 71 | if type(value) == list: |
||
| 72 | dictionary[phrase][key] = ' '.join(value) |
||
| 73 | |||
| 74 | indent = m.group("indent") |
||
| 75 | key = m.group("key") |
||
| 76 | value = m.group("value") |
||
| 77 | # assign a string for memory efficiency |
||
| 78 | dictionary[phrase][key] = value |
||
| 79 | elif indent is not None: |
||
| 80 | m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line) |
||
| 81 | if m is not None: |
||
| 82 | if len(m.group("indent")) == len(indent) + 2: |
||
| 83 | continuation = m.group("continuation") |
||
| 84 | if type(value) == str: |
||
| 85 | # when a continuation is first found, convert to a list |
||
| 86 | # because there could be more continuations |
||
| 87 | value = dictionary[phrase][key] = [value, continuation] |
||
| 88 | else: |
||
| 89 | value.append(continuation) |
||
| 90 | |||
| 91 | # join last value if necessary |
||
| 92 | if type(value) == list: |
||
| 93 | dictionary[phrase][key] = ' '.join(value) |
||
| 94 | |||
| 95 | dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1) |
||
| 96 | # TODO: Pickle should only contain strings to be small |
||
| 97 | with open(pickle_file, mode='wb') as f: dump(dictionary, f) |
||
| 98 | dmsg(' done.', min_level=1) |
||
| 99 | else: |
||
| 100 | dmsg('from {0} ...'.format(pickle_file), end='', min_level=1) |
||
| 101 | with open(pickle_file, mode='rb') as f: pickle = load(f) |
||
| 102 | for key, value in pickle.items(): |
||
| 103 | dictionary[key] = value |
||
| 104 | |||
| 105 | dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1) |
||
| 106 | |||
| 107 | def clean (self): |
||
| 108 | dictionary = self |
||
| 109 | |||
| 110 | parens_re = compile(r'\(.+\)', DOTALL) |
||
| 111 | braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL) |
||
| 112 | semicolon_re = compile(r'\s*;\s*') |
||
| 113 | |||
| 114 | for orig_phrase, data in list(dictionary.items()): |
||
| 115 | # if there are optional or alternating parts |
||
| 116 | if search(parens_re, orig_phrase): |
||
| 117 | if orig_phrase.find('|') > -1: |
||
| 118 | # TODO alternation |
||
| 119 | pass |
||
| 120 | else: |
||
| 121 | # TODO optional parts |
||
| 122 | pass |
||
| 123 | |||
| 124 | if orig_phrase.find(';') > -1: |
||
| 125 | synonyms = map( |
||
| 126 | lambda x: sub(braces_re, r'\1', x), |
||
| 127 | split(semicolon_re, orig_phrase)) |
||
| 128 | |||
| 129 | for synonym in synonyms: |
||
| 130 | dictionary[synonym] = deepcopy(data) |
||
| 131 | |||
| 132 | del dictionary[orig_phrase] |
||
| 133 | else: |
||
| 134 | m = match(braces_re, orig_phrase) |
||
| 135 | if m is not None: |
||
| 136 | dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase]) |
||
| 137 | del dictionary[orig_phrase] |