WebSVN - LCARS - Diff - Rev 295 and 296 - /trunk/tools/eazytrans/Dictionary.py

 """
 Created on 2014-10-20
 @author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
 """
 from os import chdir, stat
 from sys import stderr
 from os.path import dirname, realpath, basename
 from pickle import dump, load
 from re import match, DOTALL, search, sub, split, compile
 debug_level = 2
 def dmsg(*args, **kwargs):
     if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
         kwargs['min_level'] = 1
     if not hasattr(kwargs, 'file'):
         kwargs['file'] = stderr
     if debug_level >= kwargs['min_level']:
         del kwargs['min_level']
         print(*args, **kwargs)
 def sort_dict_alnum_english_key(phrase):
     return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
 class Dictionary(dict):
     """
     classdocs
     """
+    _language_key = 'en'
     _keys = "ipa|en|lit|pos|com|tag|ex"
     _expressions = {}
     def load (self, dictionary_file, language_key='en'):
+        self._language_key = language_key
         dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
         chdir(dirname(realpath(__file__)))
         pickle_file = basename(dictionary_file) + '.pickle'
         try:
             pickle_mtime = stat(pickle_file).st_mtime
         except FileNotFoundError:
             pickle_mtime = None
         if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
             dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
             phrase = None
             key = None
             value = None
             with open(dictionary_file) as f:
                 indent = None
                 for line in f:
-                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
+                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(self._language_key), line)
                     if m is not None:
                         phrase = m.group("phrase")
                         self[phrase] = {}
                         indent = None
                     else:
                         m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
                         if m is not None:
                             # join previous value if necessary
                             if type(value) == list:
                                 self[phrase][key] = ' '.join(value)
                             indent = m.group("indent")
                             key = m.group("key")
                             value = m.group("value")
                             # assign a string for memory efficiency
                             self[phrase][key] = value
                         elif indent is not None:
                             m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
                             if m is not None:
                                 if len(m.group("indent")) == len(indent) + 2:
                                     continuation = m.group("continuation")
                                     if type(value) == str:
                                         # when a continuation is first found, convert to a list
                                         # because there could be more continuations
                                         value = self[phrase][key] = [value, continuation]
                                     else:
                                         value.append(continuation)
             # join last value if necessary
             if type(value) == list:
                 self[phrase][key] = ' '.join(value)
             dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
             # TODO: Pickle should only contain strings to be small
             with open(pickle_file, mode='wb') as f: dump(self, f)
             dmsg(' done.', min_level=1)
         else:
             dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
             with open(pickle_file, mode='rb') as f: pickle = load(f)
             for key, value in pickle.items():
                 self[key] = value
         dmsg(' done ({0} entries).'.format(len(self)), min_level=1)
     def clean (self):
-        parens_re = compile(r'\(.+\)', DOTALL)
+        re_parens = compile(r'\(.+\)', DOTALL)
+        re_parens_no_alt = compile(r'\(([^|]+)\)', DOTALL)
-        braces_re = compile(
+        re_braces = compile(
             r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$',
             DOTALL)
-        semicolon_re = compile(r'\s*;\s*')
+        re_semicolon = compile(r'\s*;\s*')
         for orig_phrase, data in list(self.items()):
             # if there are optional or alternating parts
-            if search(parens_re, orig_phrase):
+            if search(re_parens, orig_phrase):
                 if orig_phrase.find('|') > -1:
                     # TODO alternation
                     pass
                 else:
                     # TODO optional parts
                     pass
             if orig_phrase.find(';') > -1:
                 synonyms = map(
-                    lambda x: sub(braces_re, r'\1', x),
+                    lambda x: sub(re_braces, r'\1', x),
-                    split(semicolon_re, orig_phrase))
+                    split(re_semicolon, orig_phrase))
                 for synonym in synonyms:
                     self[synonym] = data
                 del self[orig_phrase]
             else:
-                m = match(braces_re, orig_phrase)
+                m = match(re_braces, orig_phrase)
                 if m is not None:
                     phrase = m.group("phrase")
-                    m2 = match(parens_re, phrase)
+                    m_parens = search(re_parens, phrase)
-                    if m2 is not None:
+                    if m_parens is not None:
-                        # TODO alternation and optional parts
+                        # alternation and optional parts
+                        expr = sub(re_parens_no_alt, r'(?:\1)?', phrase)
+                        expr = sub('~', '(?=.)', expr)
+                        self._expressions[expr] = data
-                        pass
+                    else:
+                        # remove braces
+                        self[phrase] = data
-                    self[phrase] = data
                     del self[orig_phrase]
+    def translate (self, phrase):
+        translation = self.get(phrase.lower(), None)
+        if translation is not None:
+            translation[self._language_key] = phrase
+            return translation
+        return None
+    def translate_expression (self, phrase):
+        for expression, data in list(self._expressions.items()):
+            expression_match = match(expression, phrase)
+            if expression_match is not None:
+                data[self._language_key] = expression_match.group(0)
+                return data
+        return None

Subversion Repositories LCARS

(root)/trunk/tools/eazytrans/Dictionary.py - Rev 295 → 296