29,7 → 29,8 |
|
class Dictionary(dict): |
""" |
classdocs |
A Dictionary (not to be confused with its ancestor, dict) |
represents a word dictionary stored in a file. |
|
""" |
_language_key = 'en' |
37,6 → 38,13 |
_expressions = {} |
|
def load (self, dictionary_file, language_key='en'): |
""" |
Loads a word dictionary from a file. |
:param dictionary_file: |
:type dictionary_file: |
:param language_key: |
:type language_key: |
""" |
self._language_key = language_key |
|
dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
105,8 → 113,11 |
dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
|
def clean (self): |
""" |
Cleans dictionary entries |
""" |
re_parens = compile(r'\(.+\)', DOTALL) |
re_parens_no_alt = compile(r'\(([^|]+)\)', DOTALL) |
re_parens_no_alt = compile(r'\(([^\|]+)\)', DOTALL) |
re_braces = compile( |
r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
DOTALL) |
134,7 → 145,11 |
else: |
m = match(re_braces, orig_phrase) |
if m is not None: |
phrase = m.group("phrase") |
phrase = m.group('phrase') |
|
if callable(getattr(self, 'clean_entry', None)): |
phrase = self.clean_entry(phrase) |
|
m_parens = search(re_parens, phrase) |
if m_parens is not None: |
# alternation and optional parts |
148,6 → 163,13 |
del self[orig_phrase] |
|
def translate (self, phrase): |
""" |
Translate a phrase according to this dictionary. |
For language-specific processing, this method should be |
called/overridden by inheriting classes. |
:param phrase: |
:type phrase: str |
""" |
translation = self.get(phrase.lower(), None) |
if translation is not None: |
translation[self._language_key] = phrase |
156,7 → 178,13 |
return None |
|
def translate_expression (self, phrase): |
for expression, data in list(self._expressions.items()): |
""" |
Translate a phrase according entries in this dictionary |
based on regular expressions. |
:param phrase: |
:type phrase: |
""" |
for expression, data in sorted(self._expressions.items(), key=lambda item:-len(item[1])): |
expression_match = match(expression, phrase) |
if expression_match is not None: |
data[self._language_key] = expression_match.group(0) |