Subversion Repositories LCARS

Rev

Rev 293 | Rev 295 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 293 Rev 294
1
"""
1
"""
2
Created on 2014-10-20
2
Created on 2014-10-20
3

3

4
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
4
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
5

5

6
"""
6
"""
7
7
8
from os import chdir, stat
8
from os import chdir, stat
9
from sys import stderr
9
from sys import stderr
10
from os.path import dirname, realpath, basename
10
from os.path import dirname, realpath, basename
11
from pickle import dump, load
11
from pickle import dump, load
12
from re import match, DOTALL, search, sub, split, compile
12
from re import match, DOTALL, search, sub, split, compile
13
from copy import deepcopy
13
from copy import deepcopy
14
14
15
debug_level = 2
15
debug_level = 2
16
16
17
def dmsg(*args, **kwargs):
17
def dmsg(*args, **kwargs):
18
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
18
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
19
        kwargs['min_level'] = 1
19
        kwargs['min_level'] = 1
20
20
21
    if not hasattr(kwargs, 'file'):
21
    if not hasattr(kwargs, 'file'):
22
        kwargs['file'] = stderr
22
        kwargs['file'] = stderr
23
23
24
    if debug_level >= kwargs['min_level']:
24
    if debug_level >= kwargs['min_level']:
25
        del kwargs['min_level']
25
        del kwargs['min_level']
26
        print(*args, **kwargs)
26
        print(*args, **kwargs)
27
27
28
def sort_dict_alnum_english_key(phrase):
28
def sort_dict_alnum_english_key(phrase):
29
    return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
29
    return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
30
30
31
class Dictionary(dict):
31
class Dictionary(dict):
32
    """
32
    """
33
    classdocs
33
    classdocs
34
   
34
   
35
    """
35
    """
36
    _keys = "ipa|en|lit|pos|com|tag|ex"
36
    _keys = "ipa|en|lit|pos|com|tag|ex"
37
    _expressions = {}
37
    _expressions = {}
38
38
39
    def load (self, dictionary_file):
39
    def load (self, dictionary_file, language_key='en'):
40
        dictionary = self
40
        dictionary = self
41
41
42
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
42
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
43
43
44
        chdir(dirname(realpath(__file__)))
44
        chdir(dirname(realpath(__file__)))
45
45
46
        pickle_file = basename(dictionary_file) + '.pickle'
46
        pickle_file = basename(dictionary_file) + '.pickle'
47
47
48
        try:
48
        try:
49
            pickle_mtime = stat(pickle_file).st_mtime
49
            pickle_mtime = stat(pickle_file).st_mtime
50
        except FileNotFoundError:
50
        except FileNotFoundError:
51
            pickle_mtime = None
51
            pickle_mtime = None
52
52
53
        if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
53
        if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
54
            dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
54
            dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
55
            phrase = None
55
            phrase = None
56
            key = None
56
            key = None
57
            value = None
57
            value = None
58
            with open(dictionary_file) as f:
58
            with open(dictionary_file) as f:
59
                indent = None
59
                indent = None
60
60
61
                for line in f:
61
                for line in f:
62
                    m = match(r'^\s*vuh:\s*(?P<phrase>.+)', line)
62
                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
63
                    if m is not None:
63
                    if m is not None:
64
                        phrase = m.group("phrase")
64
                        phrase = m.group("phrase")
65
                        dictionary[phrase] = {}
65
                        dictionary[phrase] = {}
66
                        indent = None
66
                        indent = None
67
                    else:
67
                    else:
68
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
68
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
69
                        if m is not None:
69
                        if m is not None:
70
                            # join previous value if necessary
70
                            # join previous value if necessary
71
                            if type(value) == list:
71
                            if type(value) == list:
72
                                dictionary[phrase][key] = ' '.join(value)
72
                                dictionary[phrase][key] = ' '.join(value)
73
73
74
                            indent = m.group("indent")
74
                            indent = m.group("indent")
75
                            key = m.group("key")
75
                            key = m.group("key")
76
                            value = m.group("value")
76
                            value = m.group("value")
77
                            # assign a string for memory efficiency
77
                            # assign a string for memory efficiency
78
                            dictionary[phrase][key] = value
78
                            dictionary[phrase][key] = value
79
                        elif indent is not None:
79
                        elif indent is not None:
80
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
80
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
81
                            if m is not None:
81
                            if m is not None:
82
                                if len(m.group("indent")) == len(indent) + 2:
82
                                if len(m.group("indent")) == len(indent) + 2:
83
                                    continuation = m.group("continuation")
83
                                    continuation = m.group("continuation")
84
                                    if type(value) == str:
84
                                    if type(value) == str:
85
                                        # when a continuation is first found, convert to a list
85
                                        # when a continuation is first found, convert to a list
86
                                        # because there could be more continuations
86
                                        # because there could be more continuations
87
                                        value = dictionary[phrase][key] = [value, continuation]
87
                                        value = dictionary[phrase][key] = [value, continuation]
88
                                    else:
88
                                    else:
89
                                        value.append(continuation)
89
                                        value.append(continuation)
90
90
91
            # join last value if necessary
91
            # join last value if necessary
92
            if type(value) == list:
92
            if type(value) == list:
93
                dictionary[phrase][key] = ' '.join(value)
93
                dictionary[phrase][key] = ' '.join(value)
94
94
95
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
95
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
96
            # TODO: Pickle should only contain strings to be small
96
            # TODO: Pickle should only contain strings to be small
97
            with open(pickle_file, mode='wb') as f: dump(dictionary, f)
97
            with open(pickle_file, mode='wb') as f: dump(dictionary, f)
98
            dmsg(' done.', min_level=1)
98
            dmsg(' done.', min_level=1)
99
        else:
99
        else:
100
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
100
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
101
            with open(pickle_file, mode='rb') as f: pickle = load(f)
101
            with open(pickle_file, mode='rb') as f: pickle = load(f)
102
            for key, value in pickle.items():
102
            for key, value in pickle.items():
103
                dictionary[key] = value
103
                dictionary[key] = value
104
104
105
        dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1)
105
        dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1)
106
106
107
    def clean (self):
107
    def clean (self):
108
        dictionary = self
108
        dictionary = self
109
109
110
        parens_re = compile(r'\(.+\)', DOTALL)
110
        parens_re = compile(r'\(.+\)', DOTALL)
111
        braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL)
111
        braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL)
112
        semicolon_re = compile(r'\s*;\s*')
112
        semicolon_re = compile(r'\s*;\s*')
113
113
114
        for orig_phrase, data in list(dictionary.items()):
114
        for orig_phrase, data in list(dictionary.items()):
115
            # if there are optional or alternating parts
115
            # if there are optional or alternating parts
116
            if search(parens_re, orig_phrase):
116
            if search(parens_re, orig_phrase):
117
                if orig_phrase.find('|') > -1:
117
                if orig_phrase.find('|') > -1:
118
                    # TODO alternation
118
                    # TODO alternation
119
                    pass
119
                    pass
120
                else:
120
                else:
121
                    # TODO optional parts
121
                    # TODO optional parts
122
                    pass
122
                    pass
123
123
124
            if orig_phrase.find(';') > -1:
124
            if orig_phrase.find(';') > -1:
125
                synonyms = map(
125
                synonyms = map(
126
                    lambda x: sub(braces_re, r'\1', x),
126
                    lambda x: sub(braces_re, r'\1', x),
127
                    split(semicolon_re, orig_phrase))
127
                    split(semicolon_re, orig_phrase))
128
128
129
                for synonym in synonyms:
129
                for synonym in synonyms:
130
                    dictionary[synonym] = deepcopy(data)
130
                    dictionary[synonym] = deepcopy(data)
131
131
132
                del dictionary[orig_phrase]
132
                del dictionary[orig_phrase]
133
            else:
133
            else:
134
                m = match(braces_re, orig_phrase)
134
                m = match(braces_re, orig_phrase)
135
                if m is not None:
135
                if m is not None:
136
                    dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase])
136
                    dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase])
137
                    del dictionary[orig_phrase]
137
                    del dictionary[orig_phrase]
138
 
138