32,10 → 32,13 |
classdocs |
|
""" |
_language_key = 'en' |
_keys = "ipa|en|lit|pos|com|tag|ex" |
_expressions = {} |
|
def load (self, dictionary_file, language_key='en'): |
self._language_key = language_key |
|
dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
|
chdir(dirname(realpath(__file__))) |
56,7 → 59,7 |
indent = None |
|
for line in f: |
m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line) |
m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(self._language_key), line) |
if m is not None: |
phrase = m.group("phrase") |
self[phrase] = {} |
102,15 → 105,16 |
dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
|
def clean (self): |
parens_re = compile(r'\(.+\)', DOTALL) |
braces_re = compile( |
re_parens = compile(r'\(.+\)', DOTALL) |
re_parens_no_alt = compile(r'\(([^|]+)\)', DOTALL) |
re_braces = compile( |
r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
DOTALL) |
semicolon_re = compile(r'\s*;\s*') |
re_semicolon = compile(r'\s*;\s*') |
|
for orig_phrase, data in list(self.items()): |
# if there are optional or alternating parts |
if search(parens_re, orig_phrase): |
if search(re_parens, orig_phrase): |
if orig_phrase.find('|') > -1: |
# TODO alternation |
pass |
120,8 → 124,8 |
|
if orig_phrase.find(';') > -1: |
synonyms = map( |
lambda x: sub(braces_re, r'\1', x), |
split(semicolon_re, orig_phrase)) |
lambda x: sub(re_braces, r'\1', x), |
split(re_semicolon, orig_phrase)) |
|
for synonym in synonyms: |
self[synonym] = data |
128,13 → 132,34 |
|
del self[orig_phrase] |
else: |
m = match(braces_re, orig_phrase) |
m = match(re_braces, orig_phrase) |
if m is not None: |
phrase = m.group("phrase") |
m2 = match(parens_re, phrase) |
if m2 is not None: |
# TODO alternation and optional parts |
pass |
m_parens = search(re_parens, phrase) |
if m_parens is not None: |
# alternation and optional parts |
expr = sub(re_parens_no_alt, r'(?:\1)?', phrase) |
expr = sub('~', '(?=.)', expr) |
self._expressions[expr] = data |
else: |
# remove braces |
self[phrase] = data |
|
self[phrase] = data |
del self[orig_phrase] |
|
def translate (self, phrase): |
translation = self.get(phrase.lower(), None) |
if translation is not None: |
translation[self._language_key] = phrase |
return translation |
|
return None |
|
def translate_expression (self, phrase): |
for expression, data in list(self._expressions.items()): |
expression_match = match(expression, phrase) |
if expression_match is not None: |
data[self._language_key] = expression_match.group(0) |
return data |
|
return None |