Rev 296 | Rev 298 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 296 | Rev 297 | ||
|---|---|---|---|
| Line 27... | Line 27... | ||
| 27 | def sort_dict_alnum_english_key(phrase): |
27 | def sort_dict_alnum_english_key(phrase): |
| 28 | return sub(r'\{(.+)\}', r'\1', phrase[0]).lower() |
28 | return sub(r'\{(.+)\}', r'\1', phrase[0]).lower() |
| 29 | 29 | ||
| 30 | class Dictionary(dict): |
30 | class Dictionary(dict): |
| 31 | """
|
31 | """
|
| - | 32 | A Dictionary (not to be confused with its ancestor, dict)
|
|
| 32 | classdocs
|
33 | represents a word dictionary stored in a file.
|
| 33 |
|
34 |
|
| 34 | """
|
35 | """
|
| 35 | _language_key = 'en' |
36 | _language_key = 'en' |
| 36 | _keys = "ipa|en|lit|pos|com|tag|ex" |
37 | _keys = "ipa|en|lit|pos|com|tag|ex" |
| 37 | _expressions = {} |
38 | _expressions = {} |
| 38 | 39 | ||
| 39 | def load (self, dictionary_file, language_key='en'): |
40 | def load (self, dictionary_file, language_key='en'): |
| - | 41 | """
|
|
| - | 42 | Loads a word dictionary from a file.
|
|
| - | 43 | :param dictionary_file:
|
|
| - | 44 | :type dictionary_file:
|
|
| - | 45 | :param language_key:
|
|
| - | 46 | :type language_key:
|
|
| - | 47 | """
|
|
| 40 | self._language_key = language_key |
48 | self._language_key = language_key |
| 41 | 49 | ||
| 42 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
50 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
| 43 | 51 | ||
| 44 | chdir(dirname(realpath(__file__))) |
52 | chdir(dirname(realpath(__file__))) |
| Line 103... | Line 111... | ||
| 103 | self[key] = value |
111 | self[key] = value |
| 104 | 112 | ||
| 105 | dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
113 | dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
| 106 | 114 | ||
| 107 | def clean (self): |
115 | def clean (self): |
| - | 116 | """
|
|
| - | 117 | Cleans dictionary entries
|
|
| - | 118 | """
|
|
| 108 | re_parens = compile(r'\(.+\)', DOTALL) |
119 | re_parens = compile(r'\(.+\)', DOTALL) |
| 109 | re_parens_no_alt = compile(r'\(([^|]+)\)', DOTALL) |
120 | re_parens_no_alt = compile(r'\(([^\|]+)\)', DOTALL) |
| 110 | re_braces = compile( |
121 | re_braces = compile( |
| 111 | r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
122 | r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
| 112 | DOTALL)
|
123 | DOTALL)
|
| 113 | re_semicolon = compile(r'\s*;\s*') |
124 | re_semicolon = compile(r'\s*;\s*') |
| 114 | 125 | ||
| Line 132... | Line 143... | ||
| 132 | 143 | ||
| 133 | del self[orig_phrase] |
144 | del self[orig_phrase] |
| 134 | else:
|
145 | else:
|
| 135 | m = match(re_braces, orig_phrase) |
146 | m = match(re_braces, orig_phrase) |
| 136 | if m is not None: |
147 | if m is not None: |
| 137 | phrase = m.group("phrase") |
148 | phrase = m.group('phrase') |
| - | 149 | ||
| - | 150 | if callable(getattr(self, 'clean_entry', None)): |
|
| - | 151 | phrase = self.clean_entry(phrase) |
|
| - | 152 | ||
| 138 | m_parens = search(re_parens, phrase) |
153 | m_parens = search(re_parens, phrase) |
| 139 | if m_parens is not None: |
154 | if m_parens is not None: |
| 140 | # alternation and optional parts
|
155 | # alternation and optional parts
|
| 141 | expr = sub(re_parens_no_alt, r'(?:\1)?', phrase) |
156 | expr = sub(re_parens_no_alt, r'(?:\1)?', phrase) |
| 142 | expr = sub('~', '(?=.)', expr) |
157 | expr = sub('~', '(?=.)', expr) |
| Line 146... | Line 161... | ||
| 146 | self[phrase] = data |
161 | self[phrase] = data |
| 147 | 162 | ||
| 148 | del self[orig_phrase] |
163 | del self[orig_phrase] |
| 149 | 164 | ||
| 150 | def translate (self, phrase): |
165 | def translate (self, phrase): |
| - | 166 | """
|
|
| - | 167 | Translate a phrase according to this dictionary.
|
|
| - | 168 | For language-specific processing, this method should be
|
|
| - | 169 | called/overridden by inheriting classes.
|
|
| - | 170 | :param phrase:
|
|
| - | 171 | :type phrase: str
|
|
| - | 172 | """
|
|
| 151 | translation = self.get(phrase.lower(), None) |
173 | translation = self.get(phrase.lower(), None) |
| 152 | if translation is not None: |
174 | if translation is not None: |
| 153 | translation[self._language_key] = phrase |
175 | translation[self._language_key] = phrase |
| 154 | return translation
|
176 | return translation
|
| 155 | 177 | ||
| 156 | return None |
178 | return None |
| 157 | 179 | ||
| 158 | def translate_expression (self, phrase): |
180 | def translate_expression (self, phrase): |
| - | 181 | """
|
|
| - | 182 | Translate a phrase according entries in this dictionary
|
|
| - | 183 | based on regular expressions.
|
|
| - | 184 | :param phrase:
|
|
| - | 185 | :type phrase:
|
|
| - | 186 | """
|
|
| 159 | for expression, data in list(self._expressions.items()): |
187 | for expression, data in sorted(self._expressions.items(), key=lambda item:-len(item[1])): |
| 160 | expression_match = match(expression, phrase) |
188 | expression_match = match(expression, phrase) |
| 161 | if expression_match is not None: |
189 | if expression_match is not None: |
| 162 | data[self._language_key] = expression_match.group(0) |
190 | data[self._language_key] = expression_match.group(0) |
| 163 | return data
|
191 | return data
|
| 164 | 192 | ||