Rev 295 | Rev 297 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 295 | Rev 296 | ||
---|---|---|---|
Line 30... | Line 30... | ||
30 | class Dictionary(dict): |
30 | class Dictionary(dict): |
31 | """
|
31 | """
|
32 | classdocs
|
32 | classdocs
|
33 |
|
33 |
|
34 | """
|
34 | """
|
- | 35 | _language_key = 'en' |
|
35 | _keys = "ipa|en|lit|pos|com|tag|ex" |
36 | _keys = "ipa|en|lit|pos|com|tag|ex" |
36 | _expressions = {} |
37 | _expressions = {} |
37 | 38 | ||
38 | def load (self, dictionary_file, language_key='en'): |
39 | def load (self, dictionary_file, language_key='en'): |
- | 40 | self._language_key = language_key |
|
- | 41 | ||
39 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
42 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
40 | 43 | ||
41 | chdir(dirname(realpath(__file__))) |
44 | chdir(dirname(realpath(__file__))) |
42 | 45 | ||
43 | pickle_file = basename(dictionary_file) + '.pickle' |
46 | pickle_file = basename(dictionary_file) + '.pickle' |
Line 54... | Line 57... | ||
54 | value = None |
57 | value = None |
55 | with open(dictionary_file) as f: |
58 | with open(dictionary_file) as f: |
56 | indent = None |
59 | indent = None |
57 | 60 | ||
58 | for line in f: |
61 | for line in f: |
59 | m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line) |
62 | m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(self._language_key), line) |
60 | if m is not None: |
63 | if m is not None: |
61 | phrase = m.group("phrase") |
64 | phrase = m.group("phrase") |
62 | self[phrase] = {} |
65 | self[phrase] = {} |
63 | indent = None |
66 | indent = None |
64 | else:
|
67 | else:
|
Line 100... | Line 103... | ||
100 | self[key] = value |
103 | self[key] = value |
101 | 104 | ||
102 | dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
105 | dmsg(' done ({0} entries).'.format(len(self)), min_level=1) |
103 | 106 | ||
104 | def clean (self): |
107 | def clean (self): |
105 | parens_re = compile(r'\(.+\)', DOTALL) |
108 | re_parens = compile(r'\(.+\)', DOTALL) |
- | 109 | re_parens_no_alt = compile(r'\(([^|]+)\)', DOTALL) |
|
106 | braces_re = compile( |
110 | re_braces = compile( |
107 | r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
111 | r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$', |
108 | DOTALL)
|
112 | DOTALL)
|
109 | semicolon_re = compile(r'\s*;\s*') |
113 | re_semicolon = compile(r'\s*;\s*') |
110 | 114 | ||
111 | for orig_phrase, data in list(self.items()): |
115 | for orig_phrase, data in list(self.items()): |
112 | # if there are optional or alternating parts
|
116 | # if there are optional or alternating parts
|
113 | if search(parens_re, orig_phrase): |
117 | if search(re_parens, orig_phrase): |
114 | if orig_phrase.find('|') > -1: |
118 | if orig_phrase.find('|') > -1: |
115 | # TODO alternation
|
119 | # TODO alternation
|
116 | pass
|
120 | pass
|
117 | else:
|
121 | else:
|
118 | # TODO optional parts
|
122 | # TODO optional parts
|
119 | pass
|
123 | pass
|
120 | 124 | ||
121 | if orig_phrase.find(';') > -1: |
125 | if orig_phrase.find(';') > -1: |
122 | synonyms = map( |
126 | synonyms = map( |
123 | lambda x: sub(braces_re, r'\1', x), |
127 | lambda x: sub(re_braces, r'\1', x), |
124 | split(semicolon_re, orig_phrase)) |
128 | split(re_semicolon, orig_phrase)) |
125 | 129 | ||
126 | for synonym in synonyms: |
130 | for synonym in synonyms: |
127 | self[synonym] = data |
131 | self[synonym] = data |
128 | 132 | ||
129 | del self[orig_phrase] |
133 | del self[orig_phrase] |
130 | else:
|
134 | else:
|
131 | m = match(braces_re, orig_phrase) |
135 | m = match(re_braces, orig_phrase) |
132 | if m is not None: |
136 | if m is not None: |
133 | phrase = m.group("phrase") |
137 | phrase = m.group("phrase") |
134 | m2 = match(parens_re, phrase) |
138 | m_parens = search(re_parens, phrase) |
135 | if m2 is not None: |
139 | if m_parens is not None: |
136 | # TODO alternation and optional parts
|
140 | # alternation and optional parts
|
- | 141 | expr = sub(re_parens_no_alt, r'(?:\1)?', phrase) |
|
- | 142 | expr = sub('~', '(?=.)', expr) |
|
- | 143 | self._expressions[expr] = data |
|
137 | pass
|
144 | else:
|
- | 145 | # remove braces
|
|
- | 146 | self[phrase] = data |
|
138 | 147 | ||
139 | self[phrase] = data |
- | |
140 | del self[orig_phrase] |
148 | del self[orig_phrase] |
- | 149 | ||
- | 150 | def translate (self, phrase): |
|
- | 151 | translation = self.get(phrase.lower(), None) |
|
- | 152 | if translation is not None: |
|
- | 153 | translation[self._language_key] = phrase |
|
- | 154 | return translation
|
|
- | 155 | ||
- | 156 | return None |
|
- | 157 | ||
- | 158 | def translate_expression (self, phrase): |
|
- | 159 | for expression, data in list(self._expressions.items()): |
|
- | 160 | expression_match = match(expression, phrase) |
|
- | 161 | if expression_match is not None: |
|
- | 162 | data[self._language_key] = expression_match.group(0) |
|
- | 163 | return data
|
|
- | 164 | ||
- | 165 | return None |