Subversion Repositories LCARS

Rev

Rev 294 | Rev 296 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 294 Rev 295
1
"""
1
"""
2
Created on 2014-10-20
2
Created on 2014-10-20
3

3

4
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
4
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
5

5

6
"""
6
"""
7
7
8
from os import chdir, stat
8
from os import chdir, stat
9
from sys import stderr
9
from sys import stderr
10
from os.path import dirname, realpath, basename
10
from os.path import dirname, realpath, basename
11
from pickle import dump, load
11
from pickle import dump, load
12
from re import match, DOTALL, search, sub, split, compile
12
from re import match, DOTALL, search, sub, split, compile
13
from copy import deepcopy
-
 
14
13
15
debug_level = 2
14
debug_level = 2
16
15
17
def dmsg(*args, **kwargs):
16
def dmsg(*args, **kwargs):
18
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
17
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
19
        kwargs['min_level'] = 1
18
        kwargs['min_level'] = 1
20
19
21
    if not hasattr(kwargs, 'file'):
20
    if not hasattr(kwargs, 'file'):
22
        kwargs['file'] = stderr
21
        kwargs['file'] = stderr
23
22
24
    if debug_level >= kwargs['min_level']:
23
    if debug_level >= kwargs['min_level']:
25
        del kwargs['min_level']
24
        del kwargs['min_level']
26
        print(*args, **kwargs)
25
        print(*args, **kwargs)
27
26
28
def sort_dict_alnum_english_key(phrase):
27
def sort_dict_alnum_english_key(phrase):
29
    return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
28
    return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
30
29
31
class Dictionary(dict):
30
class Dictionary(dict):
32
    """
31
    """
33
    classdocs
32
    classdocs
34
   
33
   
35
    """
34
    """
36
    _keys = "ipa|en|lit|pos|com|tag|ex"
35
    _keys = "ipa|en|lit|pos|com|tag|ex"
37
    _expressions = {}
36
    _expressions = {}
38
37
39
    def load (self, dictionary_file, language_key='en'):
38
    def load (self, dictionary_file, language_key='en'):
40
        dictionary = self
-
 
41
-
 
42
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
39
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
43
40
44
        chdir(dirname(realpath(__file__)))
41
        chdir(dirname(realpath(__file__)))
45
42
46
        pickle_file = basename(dictionary_file) + '.pickle'
43
        pickle_file = basename(dictionary_file) + '.pickle'
47
44
48
        try:
45
        try:
49
            pickle_mtime = stat(pickle_file).st_mtime
46
            pickle_mtime = stat(pickle_file).st_mtime
50
        except FileNotFoundError:
47
        except FileNotFoundError:
51
            pickle_mtime = None
48
            pickle_mtime = None
52
49
53
        if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
50
        if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
54
            dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
51
            dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
55
            phrase = None
52
            phrase = None
56
            key = None
53
            key = None
57
            value = None
54
            value = None
58
            with open(dictionary_file) as f:
55
            with open(dictionary_file) as f:
59
                indent = None
56
                indent = None
60
57
61
                for line in f:
58
                for line in f:
62
                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
59
                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
63
                    if m is not None:
60
                    if m is not None:
64
                        phrase = m.group("phrase")
61
                        phrase = m.group("phrase")
65
                        dictionary[phrase] = {}
62
                        self[phrase] = {}
66
                        indent = None
63
                        indent = None
67
                    else:
64
                    else:
68
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
65
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
69
                        if m is not None:
66
                        if m is not None:
70
                            # join previous value if necessary
67
                            # join previous value if necessary
71
                            if type(value) == list:
68
                            if type(value) == list:
72
                                dictionary[phrase][key] = ' '.join(value)
69
                                self[phrase][key] = ' '.join(value)
73
70
74
                            indent = m.group("indent")
71
                            indent = m.group("indent")
75
                            key = m.group("key")
72
                            key = m.group("key")
76
                            value = m.group("value")
73
                            value = m.group("value")
77
                            # assign a string for memory efficiency
74
                            # assign a string for memory efficiency
78
                            dictionary[phrase][key] = value
75
                            self[phrase][key] = value
79
                        elif indent is not None:
76
                        elif indent is not None:
80
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
77
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
81
                            if m is not None:
78
                            if m is not None:
82
                                if len(m.group("indent")) == len(indent) + 2:
79
                                if len(m.group("indent")) == len(indent) + 2:
83
                                    continuation = m.group("continuation")
80
                                    continuation = m.group("continuation")
84
                                    if type(value) == str:
81
                                    if type(value) == str:
85
                                        # when a continuation is first found, convert to a list
82
                                        # when a continuation is first found, convert to a list
86
                                        # because there could be more continuations
83
                                        # because there could be more continuations
87
                                        value = dictionary[phrase][key] = [value, continuation]
84
                                        value = self[phrase][key] = [value, continuation]
88
                                    else:
85
                                    else:
89
                                        value.append(continuation)
86
                                        value.append(continuation)
90
87
91
            # join last value if necessary
88
            # join last value if necessary
92
            if type(value) == list:
89
            if type(value) == list:
93
                dictionary[phrase][key] = ' '.join(value)
90
                self[phrase][key] = ' '.join(value)
94
91
95
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
92
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
96
            # TODO: Pickle should only contain strings to be small
93
            # TODO: Pickle should only contain strings to be small
97
            with open(pickle_file, mode='wb') as f: dump(dictionary, f)
94
            with open(pickle_file, mode='wb') as f: dump(self, f)
98
            dmsg(' done.', min_level=1)
95
            dmsg(' done.', min_level=1)
99
        else:
96
        else:
100
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
97
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
101
            with open(pickle_file, mode='rb') as f: pickle = load(f)
98
            with open(pickle_file, mode='rb') as f: pickle = load(f)
102
            for key, value in pickle.items():
99
            for key, value in pickle.items():
103
                dictionary[key] = value
100
                self[key] = value
104
101
105
        dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1)
102
        dmsg(' done ({0} entries).'.format(len(self)), min_level=1)
106
103
107
    def clean (self):
104
    def clean (self):
108
        dictionary = self
-
 
109
-
 
110
        parens_re = compile(r'\(.+\)', DOTALL)
105
        parens_re = compile(r'\(.+\)', DOTALL)
111
        braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL)
106
        braces_re = compile(
-
 
107
            r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$',
-
 
108
            DOTALL)
112
        semicolon_re = compile(r'\s*;\s*')
109
        semicolon_re = compile(r'\s*;\s*')
113
110
114
        for orig_phrase, data in list(dictionary.items()):
111
        for orig_phrase, data in list(self.items()):
115
            # if there are optional or alternating parts
112
            # if there are optional or alternating parts
116
            if search(parens_re, orig_phrase):
113
            if search(parens_re, orig_phrase):
117
                if orig_phrase.find('|') > -1:
114
                if orig_phrase.find('|') > -1:
118
                    # TODO alternation
115
                    # TODO alternation
119
                    pass
116
                    pass
120
                else:
117
                else:
121
                    # TODO optional parts
118
                    # TODO optional parts
122
                    pass
119
                    pass
123
120
124
            if orig_phrase.find(';') > -1:
121
            if orig_phrase.find(';') > -1:
125
                synonyms = map(
122
                synonyms = map(
126
                    lambda x: sub(braces_re, r'\1', x),
123
                    lambda x: sub(braces_re, r'\1', x),
127
                    split(semicolon_re, orig_phrase))
124
                    split(semicolon_re, orig_phrase))
128
125
129
                for synonym in synonyms:
126
                for synonym in synonyms:
130
                    dictionary[synonym] = deepcopy(data)
127
                    self[synonym] = data
131
128
132
                del dictionary[orig_phrase]
129
                del self[orig_phrase]
133
            else:
130
            else:
134
                m = match(braces_re, orig_phrase)
131
                m = match(braces_re, orig_phrase)
135
                if m is not None:
132
                if m is not None:
-
 
133
                    phrase = m.group("phrase")
-
 
134
                    m2 = match(parens_re, phrase)
-
 
135
                    if m2 is not None:
136
                    dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase])
136
                        # TODO alternation and optional parts
-
 
137
                        pass
-
 
138
-
 
139
                    self[phrase] = data
137
                    del dictionary[orig_phrase]
140
                    del self[orig_phrase]
138
 
141