Subversion Repositories LCARS

Rev

Rev 294 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
293 PointedEar 1
"""
2
Created on 2014-10-20
3
 
4
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
5
 
6
"""
7
 
8
from os import chdir, stat
9
from sys import stderr
10
from os.path import dirname, realpath, basename
11
from pickle import dump, load
12
from re import match, DOTALL, search, sub, split, compile
13
from copy import deepcopy
14
 
15
debug_level = 2
16
 
17
def dmsg(*args, **kwargs):
18
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
19
        kwargs['min_level'] = 1
20
 
21
    if not hasattr(kwargs, 'file'):
22
        kwargs['file'] = stderr
23
 
24
    if debug_level >= kwargs['min_level']:
25
        del kwargs['min_level']
26
        print(*args, **kwargs)
27
 
28
def sort_dict_alnum_english_key(phrase):
29
    return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
30
 
31
class Dictionary(dict):
32
    """
33
    classdocs
34
 
35
    """
36
    _keys = "ipa|en|lit|pos|com|tag|ex"
37
    _expressions = {}
38
 
39
    def load (self, dictionary_file):
40
        dictionary = self
41
 
42
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
43
 
44
        chdir(dirname(realpath(__file__)))
45
 
46
        pickle_file = basename(dictionary_file) + '.pickle'
47
 
48
        try:
49
            pickle_mtime = stat(pickle_file).st_mtime
50
        except FileNotFoundError:
51
            pickle_mtime = None
52
 
53
        if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
54
            dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
55
            phrase = None
56
            key = None
57
            value = None
58
            with open(dictionary_file) as f:
59
                indent = None
60
 
61
                for line in f:
62
                    m = match(r'^\s*vuh:\s*(?P<phrase>.+)', line)
63
                    if m is not None:
64
                        phrase = m.group("phrase")
65
                        dictionary[phrase] = {}
66
                        indent = None
67
                    else:
68
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
69
                        if m is not None:
70
                            # join previous value if necessary
71
                            if type(value) == list:
72
                                dictionary[phrase][key] = ' '.join(value)
73
 
74
                            indent = m.group("indent")
75
                            key = m.group("key")
76
                            value = m.group("value")
77
                            # assign a string for memory efficiency
78
                            dictionary[phrase][key] = value
79
                        elif indent is not None:
80
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
81
                            if m is not None:
82
                                if len(m.group("indent")) == len(indent) + 2:
83
                                    continuation = m.group("continuation")
84
                                    if type(value) == str:
85
                                        # when a continuation is first found, convert to a list
86
                                        # because there could be more continuations
87
                                        value = dictionary[phrase][key] = [value, continuation]
88
                                    else:
89
                                        value.append(continuation)
90
 
91
            # join last value if necessary
92
            if type(value) == list:
93
                dictionary[phrase][key] = ' '.join(value)
94
 
95
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
96
            # TODO: Pickle should only contain strings to be small
97
            with open(pickle_file, mode='wb') as f: dump(dictionary, f)
98
            dmsg(' done.', min_level=1)
99
        else:
100
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
101
            with open(pickle_file, mode='rb') as f: pickle = load(f)
102
            for key, value in pickle.items():
103
                dictionary[key] = value
104
 
105
        dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1)
106
 
107
    def clean (self):
108
        dictionary = self
109
 
110
        parens_re = compile(r'\(.+\)', DOTALL)
111
        braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL)
112
        semicolon_re = compile(r'\s*;\s*')
113
 
114
        for orig_phrase, data in list(dictionary.items()):
115
            # if there are optional or alternating parts
116
            if search(parens_re, orig_phrase):
117
                if orig_phrase.find('|') > -1:
118
                    # TODO alternation
119
                    pass
120
                else:
121
                    # TODO optional parts
122
                    pass
123
 
124
            if orig_phrase.find(';') > -1:
125
                synonyms = map(
126
                    lambda x: sub(braces_re, r'\1', x),
127
                    split(semicolon_re, orig_phrase))
128
 
129
                for synonym in synonyms:
130
                    dictionary[synonym] = deepcopy(data)
131
 
132
                del dictionary[orig_phrase]
133
            else:
134
                m = match(braces_re, orig_phrase)
135
                if m is not None:
136
                    dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase])
137
                    del dictionary[orig_phrase]