Rev 295 | Rev 297 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 295 | Rev 296 | ||
|---|---|---|---|
| Line 30... | Line 30... | ||
| 30 | class Dictionary(dict): | 30 | class Dictionary(dict): | 
| 31 |     """
 | 31 |     """
 | 
| 32 |     classdocs
 | 32 |     classdocs
 | 
| 33 |    
 | 33 |    
 | 
| 34 |     """
 | 34 |     """
 | 
| - | 35 | _language_key = 'en' | |
| 35 | _keys = "ipa|en|lit|pos|com|tag|ex" | 36 | _keys = "ipa|en|lit|pos|com|tag|ex" | 
| 36 | _expressions = {} | 37 | _expressions = {} | 
| 37 | 38 | ||
| 38 | def load (self, dictionary_file, language_key='en'): | 39 | def load (self, dictionary_file, language_key='en'): | 
| - | 40 | self._language_key = language_key | |
| - | 41 | ||
| 39 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) | 42 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) | 
| 40 | 43 | ||
| 41 | chdir(dirname(realpath(__file__))) | 44 | chdir(dirname(realpath(__file__))) | 
| 42 | 45 | ||
| 43 | pickle_file = basename(dictionary_file) + '.pickle' | 46 | pickle_file = basename(dictionary_file) + '.pickle' | 
| Line 54... | Line 57... | ||
| 54 | value = None | 57 | value = None | 
| 55 | with open(dictionary_file) as f: | 58 | with open(dictionary_file) as f: | 
| 56 | indent = None | 59 | indent = None | 
| 57 | 60 | ||
| 58 | for line in f: | 61 | for line in f: | 
| 59 | m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line) | 62 | m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(self._language_key), line) | 
| 60 | if m is not None: | 63 | if m is not None: | 
| 61 | phrase = m.group("phrase") | 64 | phrase = m.group("phrase") | 
| 62 | self[phrase] = {} | 65 | self[phrase] = {} | 
| 63 | indent = None | 66 | indent = None | 
| 64 |                     else:
 | 67 |                     else:
 | 
| Line 100... | Line 103... | ||
| 100 | self[key] = value | 103 | self[key] = value | 
| 101 | 104 | ||
| 102 | dmsg(' done ({0} entries).'.format(len(self)), min_level=1) | 105 | dmsg(' done ({0} entries).'.format(len(self)), min_level=1) | 
| 103 | 106 | ||
| 104 | def clean (self): | 107 | def clean (self): | 
| 105 | parens_re = compile(r'\(.+\)', DOTALL) | 108 | re_parens = compile(r'\(.+\)', DOTALL) | 
| - | 109 | re_parens_no_alt = compile(r'\(([^|]+)\)', DOTALL) | |
| 106 | braces_re = compile( | 110 | re_braces = compile( | 
| 107 | r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', | 111 | r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', | 
| 108 |             DOTALL)
 | 112 |             DOTALL)
 | 
| 109 | semicolon_re = compile(r'\s*;\s*') | 113 | re_semicolon = compile(r'\s*;\s*') | 
| 110 | 114 | ||
| 111 | for orig_phrase, data in list(self.items()): | 115 | for orig_phrase, data in list(self.items()): | 
| 112 |             # if there are optional or alternating parts
 | 116 |             # if there are optional or alternating parts
 | 
| 113 | if search(parens_re, orig_phrase): | 117 | if search(re_parens, orig_phrase): | 
| 114 | if orig_phrase.find('|') > -1: | 118 | if orig_phrase.find('|') > -1: | 
| 115 |                     # TODO alternation
 | 119 |                     # TODO alternation
 | 
| 116 |                     pass
 | 120 |                     pass
 | 
| 117 |                 else:
 | 121 |                 else:
 | 
| 118 |                     # TODO optional parts
 | 122 |                     # TODO optional parts
 | 
| 119 |                     pass
 | 123 |                     pass
 | 
| 120 | 124 | ||
| 121 | if orig_phrase.find(';') > -1: | 125 | if orig_phrase.find(';') > -1: | 
| 122 | synonyms = map( | 126 | synonyms = map( | 
| 123 | lambda x: sub(braces_re, r'\1', x), | 127 | lambda x: sub(re_braces, r'\1', x), | 
| 124 | split(semicolon_re, orig_phrase)) | 128 | split(re_semicolon, orig_phrase)) | 
| 125 | 129 | ||
| 126 | for synonym in synonyms: | 130 | for synonym in synonyms: | 
| 127 | self[synonym] = data | 131 | self[synonym] = data | 
| 128 | 132 | ||
| 129 | del self[orig_phrase] | 133 | del self[orig_phrase] | 
| 130 |             else:
 | 134 |             else:
 | 
| 131 | m = match(braces_re, orig_phrase) | 135 | m = match(re_braces, orig_phrase) | 
| 132 | if m is not None: | 136 | if m is not None: | 
| 133 | phrase = m.group("phrase") | 137 | phrase = m.group("phrase") | 
| 134 | m2 = match(parens_re, phrase) | 138 | m_parens = search(re_parens, phrase) | 
| 135 | if m2 is not None: | 139 | if m_parens is not None: | 
| 136 |                         # TODO alternation and optional parts
 | 140 |                         # alternation and optional parts
 | 
| - | 141 | expr = sub(re_parens_no_alt, r'(?:\1)?', phrase) | |
| - | 142 | expr = sub('~', '(?=.)', expr) | |
| - | 143 | self._expressions[expr] = data | |
| 137 |                         pass
 | 144 |                     else:
 | 
| - | 145 |                         # remove braces
 | |
| - | 146 | self[phrase] = data | |
| 138 | 147 | ||
| 139 | self[phrase] = data | - | |
| 140 | del self[orig_phrase] | 148 | del self[orig_phrase] | 
| - | 149 | ||
| - | 150 | def translate (self, phrase): | |
| - | 151 | translation = self.get(phrase.lower(), None) | |
| - | 152 | if translation is not None: | |
| - | 153 | translation[self._language_key] = phrase | |
| - | 154 |             return translation
 | |
| - | 155 | ||
| - | 156 | return None | |
| - | 157 | ||
| - | 158 | def translate_expression (self, phrase): | |
| - | 159 | for expression, data in list(self._expressions.items()): | |
| - | 160 | expression_match = match(expression, phrase) | |
| - | 161 | if expression_match is not None: | |
| - | 162 | data[self._language_key] = expression_match.group(0) | |
| - | 163 |                 return data
 | |
| - | 164 | ||
| - | 165 | return None | |