| 10,7 → 10,6 |
| from os.path import dirname, realpath, basename |
| from pickle import dump, load |
| from re import match, DOTALL, search, sub, split, compile |
| from copy import deepcopy |
| |
| debug_level = 2 |
| |
| 37,8 → 36,6 |
| _expressions = {} |
| |
| def load (self, dictionary_file, language_key='en'): |
| dictionary = self |
| |
| dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
| |
| chdir(dirname(realpath(__file__))) |
| 62,7 → 59,7 |
| m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line) |
| if m is not None: |
| phrase = m.group("phrase") |
| dictionary[phrase] = {} |
| self[phrase] = {} |
| indent = None |
| else: |
| m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line) |
| 69,13 → 66,13 |
| if m is not None: |
| # join previous value if necessary |
| if type(value) == list: |
| dictionary[phrase][key] = ' '.join(value) |
| self[phrase][key] = ' '.join(value) |
| |
| indent = m.group("indent") |
| key = m.group("key") |
| value = m.group("value") |
| # assign a string for memory efficiency |
| dictionary[phrase][key] = value |
| self[phrase][key] = value |
| elif indent is not None: |
| m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line) |
| if m is not None: |
| 84,34 → 81,34 |
| if type(value) == str: |
| # when a continuation is first found, convert to a list |
| # because there could be more continuations |
| value = dictionary[phrase][key] = [value, continuation] |
| value = self[phrase][key] = [value, continuation] |
| else: |
| value.append(continuation) |
| |
| # join last value if necessary |
| if type(value) == list: |
| dictionary[phrase][key] = ' '.join(value) |
| self[phrase][key] = ' '.join(value) |
| |
| dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1) |
| # TODO: Pickle should only contain strings to be small |
| with open(pickle_file, mode='wb') as f: dump(dictionary, f) |
| with open(pickle_file, mode='wb') as f: dump(self, f) |
| dmsg(' done.', min_level=1) |
| else: |
| dmsg('from {0} ...'.format(pickle_file), end='', min_level=1) |
| with open(pickle_file, mode='rb') as f: pickle = load(f) |
| for key, value in pickle.items(): |
| dictionary[key] = value |
| self[key] = value |
| |
| dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1) |
| dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
| |
| def clean (self): |
| dictionary = self |
| |
| parens_re = compile(r'\(.+\)', DOTALL) |
| braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL) |
| braces_re = compile( |
| r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
| DOTALL) |
| semicolon_re = compile(r'\s*;\s*') |
| |
| for orig_phrase, data in list(dictionary.items()): |
| for orig_phrase, data in list(self.items()): |
| # if there are optional or alternating parts |
| if search(parens_re, orig_phrase): |
| if orig_phrase.find('|') > -1: |
| 127,11 → 124,17 |
| split(semicolon_re, orig_phrase)) |
| |
| for synonym in synonyms: |
| dictionary[synonym] = deepcopy(data) |
| self[synonym] = data |
| |
| del dictionary[orig_phrase] |
| del self[orig_phrase] |
| else: |
| m = match(braces_re, orig_phrase) |
| if m is not None: |
| dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase]) |
| del dictionary[orig_phrase] |
| phrase = m.group("phrase") |
| m2 = match(parens_re, phrase) |
| if m2 is not None: |
| # TODO alternation and optional parts |
| pass |
| |
| self[phrase] = data |
| del self[orig_phrase] |