Subversion Repositories LCARS

Rev

Rev 294 | Rev 296 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
293 PointedEar 1
"""
2
Created on 2014-10-20
3
 
4
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
5
 
6
"""
7
 
8
from os import chdir, stat
9
from sys import stderr
10
from os.path import dirname, realpath, basename
11
from pickle import dump, load
12
from re import match, DOTALL, search, sub, split, compile
13
 
14
debug_level = 2
15
 
16
def dmsg(*args, **kwargs):
17
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
18
        kwargs['min_level'] = 1
19
 
20
    if not hasattr(kwargs, 'file'):
21
        kwargs['file'] = stderr
22
 
23
    if debug_level >= kwargs['min_level']:
24
        del kwargs['min_level']
25
        print(*args, **kwargs)
26
 
27
def sort_dict_alnum_english_key(phrase):
28
    return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
29
 
30
class Dictionary(dict):
31
    """
32
    classdocs
33
 
34
    """
35
    _keys = "ipa|en|lit|pos|com|tag|ex"
36
    _expressions = {}
37
 
294 PointedEar 38
    def load (self, dictionary_file, language_key='en'):
293 PointedEar 39
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
40
 
41
        chdir(dirname(realpath(__file__)))
42
 
43
        pickle_file = basename(dictionary_file) + '.pickle'
44
 
45
        try:
46
            pickle_mtime = stat(pickle_file).st_mtime
47
        except FileNotFoundError:
48
            pickle_mtime = None
49
 
50
        if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
51
            dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
52
            phrase = None
53
            key = None
54
            value = None
55
            with open(dictionary_file) as f:
56
                indent = None
57
 
58
                for line in f:
294 PointedEar 59
                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
293 PointedEar 60
                    if m is not None:
61
                        phrase = m.group("phrase")
295 PointedEar 62
                        self[phrase] = {}
293 PointedEar 63
                        indent = None
64
                    else:
65
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
66
                        if m is not None:
67
                            # join previous value if necessary
68
                            if type(value) == list:
295 PointedEar 69
                                self[phrase][key] = ' '.join(value)
293 PointedEar 70
 
71
                            indent = m.group("indent")
72
                            key = m.group("key")
73
                            value = m.group("value")
74
                            # assign a string for memory efficiency
295 PointedEar 75
                            self[phrase][key] = value
293 PointedEar 76
                        elif indent is not None:
77
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
78
                            if m is not None:
79
                                if len(m.group("indent")) == len(indent) + 2:
80
                                    continuation = m.group("continuation")
81
                                    if type(value) == str:
82
                                        # when a continuation is first found, convert to a list
83
                                        # because there could be more continuations
295 PointedEar 84
                                        value = self[phrase][key] = [value, continuation]
293 PointedEar 85
                                    else:
86
                                        value.append(continuation)
87
 
88
            # join last value if necessary
89
            if type(value) == list:
295 PointedEar 90
                self[phrase][key] = ' '.join(value)
293 PointedEar 91
 
92
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
93
            # TODO: Pickle should only contain strings to be small
295 PointedEar 94
            with open(pickle_file, mode='wb') as f: dump(self, f)
293 PointedEar 95
            dmsg(' done.', min_level=1)
96
        else:
97
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
98
            with open(pickle_file, mode='rb') as f: pickle = load(f)
99
            for key, value in pickle.items():
295 PointedEar 100
                self[key] = value
293 PointedEar 101
 
295 PointedEar 102
        dmsg(' done ({0} entries).'.format(len(self)), min_level=1)
293 PointedEar 103
 
104
    def clean (self):
105
        parens_re = compile(r'\(.+\)', DOTALL)
295 PointedEar 106
        braces_re = compile(
107
            r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$',
108
            DOTALL)
293 PointedEar 109
        semicolon_re = compile(r'\s*;\s*')
110
 
295 PointedEar 111
        for orig_phrase, data in list(self.items()):
293 PointedEar 112
            # if there are optional or alternating parts
113
            if search(parens_re, orig_phrase):
114
                if orig_phrase.find('|') > -1:
115
                    # TODO alternation
116
                    pass
117
                else:
118
                    # TODO optional parts
119
                    pass
120
 
121
            if orig_phrase.find(';') > -1:
122
                synonyms = map(
123
                    lambda x: sub(braces_re, r'\1', x),
124
                    split(semicolon_re, orig_phrase))
125
 
126
                for synonym in synonyms:
295 PointedEar 127
                    self[synonym] = data
293 PointedEar 128
 
295 PointedEar 129
                del self[orig_phrase]
293 PointedEar 130
            else:
131
                m = match(braces_re, orig_phrase)
132
                if m is not None:
295 PointedEar 133
                    phrase = m.group("phrase")
134
                    m2 = match(parens_re, phrase)
135
                    if m2 is not None:
136
                        # TODO alternation and optional parts
137
                        pass
138
 
139
                    self[phrase] = data
140
                    del self[orig_phrase]