| 32,10 → 32,13 |
| classdocs |
| |
| """ |
| _language_key = 'en' |
| _keys = "ipa|en|lit|pos|com|tag|ex" |
| _expressions = {} |
| |
| def load (self, dictionary_file, language_key='en'): |
| self._language_key = language_key |
| |
| dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
| |
| chdir(dirname(realpath(__file__))) |
| 56,7 → 59,7 |
| indent = None |
| |
| for line in f: |
| m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line) |
| m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(self._language_key), line) |
| if m is not None: |
| phrase = m.group("phrase") |
| self[phrase] = {} |
| 102,15 → 105,16 |
| dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
| |
| def clean (self): |
| parens_re = compile(r'\(.+\)', DOTALL) |
| braces_re = compile( |
| re_parens = compile(r'\(.+\)', DOTALL) |
| re_parens_no_alt = compile(r'\(([^|]+)\)', DOTALL) |
| re_braces = compile( |
| r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
| DOTALL) |
| semicolon_re = compile(r'\s*;\s*') |
| re_semicolon = compile(r'\s*;\s*') |
| |
| for orig_phrase, data in list(self.items()): |
| # if there are optional or alternating parts |
| if search(parens_re, orig_phrase): |
| if search(re_parens, orig_phrase): |
| if orig_phrase.find('|') > -1: |
| # TODO alternation |
| pass |
| 120,8 → 124,8 |
| |
| if orig_phrase.find(';') > -1: |
| synonyms = map( |
| lambda x: sub(braces_re, r'\1', x), |
| split(semicolon_re, orig_phrase)) |
| lambda x: sub(re_braces, r'\1', x), |
| split(re_semicolon, orig_phrase)) |
| |
| for synonym in synonyms: |
| self[synonym] = data |
| 128,13 → 132,34 |
| |
| del self[orig_phrase] |
| else: |
| m = match(braces_re, orig_phrase) |
| m = match(re_braces, orig_phrase) |
| if m is not None: |
| phrase = m.group("phrase") |
| m2 = match(parens_re, phrase) |
| if m2 is not None: |
| # TODO alternation and optional parts |
| pass |
| m_parens = search(re_parens, phrase) |
| if m_parens is not None: |
| # alternation and optional parts |
| expr = sub(re_parens_no_alt, r'(?:\1)?', phrase) |
| expr = sub('~', '(?=.)', expr) |
| self._expressions[expr] = data |
| else: |
| # remove braces |
| self[phrase] = data |
| |
| self[phrase] = data |
| del self[orig_phrase] |
| |
| def translate (self, phrase): |
| translation = self.get(phrase.lower(), None) |
| if translation is not None: |
| translation[self._language_key] = phrase |
| return translation |
| |
| return None |
| |
| def translate_expression (self, phrase): |
| for expression, data in list(self._expressions.items()): |
| expression_match = match(expression, phrase) |
| if expression_match is not None: |
| data[self._language_key] = expression_match.group(0) |
| return data |
| |
| return None |