10,7 → 10,6 |
from os.path import dirname, realpath, basename |
from pickle import dump, load |
from re import match, DOTALL, search, sub, split, compile |
from copy import deepcopy |
|
debug_level = 2 |
|
37,8 → 36,6 |
_expressions = {} |
|
def load (self, dictionary_file, language_key='en'): |
dictionary = self |
|
dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
|
chdir(dirname(realpath(__file__))) |
62,7 → 59,7 |
m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line) |
if m is not None: |
phrase = m.group("phrase") |
dictionary[phrase] = {} |
self[phrase] = {} |
indent = None |
else: |
m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line) |
69,13 → 66,13 |
if m is not None: |
# join previous value if necessary |
if type(value) == list: |
dictionary[phrase][key] = ' '.join(value) |
self[phrase][key] = ' '.join(value) |
|
indent = m.group("indent") |
key = m.group("key") |
value = m.group("value") |
# assign a string for memory efficiency |
dictionary[phrase][key] = value |
self[phrase][key] = value |
elif indent is not None: |
m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line) |
if m is not None: |
84,34 → 81,34 |
if type(value) == str: |
# when a continuation is first found, convert to a list |
# because there could be more continuations |
value = dictionary[phrase][key] = [value, continuation] |
value = self[phrase][key] = [value, continuation] |
else: |
value.append(continuation) |
|
# join last value if necessary |
if type(value) == list: |
dictionary[phrase][key] = ' '.join(value) |
self[phrase][key] = ' '.join(value) |
|
dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1) |
# TODO: Pickle should only contain strings to be small |
with open(pickle_file, mode='wb') as f: dump(dictionary, f) |
with open(pickle_file, mode='wb') as f: dump(self, f) |
dmsg(' done.', min_level=1) |
else: |
dmsg('from {0} ...'.format(pickle_file), end='', min_level=1) |
with open(pickle_file, mode='rb') as f: pickle = load(f) |
for key, value in pickle.items(): |
dictionary[key] = value |
self[key] = value |
|
dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1) |
dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
|
def clean (self): |
dictionary = self |
|
parens_re = compile(r'\(.+\)', DOTALL) |
braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL) |
braces_re = compile( |
r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
DOTALL) |
semicolon_re = compile(r'\s*;\s*') |
|
for orig_phrase, data in list(dictionary.items()): |
for orig_phrase, data in list(self.items()): |
# if there are optional or alternating parts |
if search(parens_re, orig_phrase): |
if orig_phrase.find('|') > -1: |
127,11 → 124,17 |
split(semicolon_re, orig_phrase)) |
|
for synonym in synonyms: |
dictionary[synonym] = deepcopy(data) |
self[synonym] = data |
|
del dictionary[orig_phrase] |
del self[orig_phrase] |
else: |
m = match(braces_re, orig_phrase) |
if m is not None: |
dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase]) |
del dictionary[orig_phrase] |
phrase = m.group("phrase") |
m2 = match(parens_re, phrase) |
if m2 is not None: |
# TODO alternation and optional parts |
pass |
|
self[phrase] = data |
del self[orig_phrase] |