Subversion Repositories LCARS

Rev

Rev 295 | Rev 297 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 295 Rev 296
1
"""
1
"""
2
Created on 2014-10-20
2
Created on 2014-10-20
3

3

4
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
4
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
5

5

6
"""
6
"""
7
7
8
from os import chdir, stat
8
from os import chdir, stat
9
from sys import stderr
9
from sys import stderr
10
from os.path import dirname, realpath, basename
10
from os.path import dirname, realpath, basename
11
from pickle import dump, load
11
from pickle import dump, load
12
from re import match, DOTALL, search, sub, split, compile
12
from re import match, DOTALL, search, sub, split, compile
13
13
14
debug_level = 2
14
debug_level = 2
15
15
16
def dmsg(*args, **kwargs):
16
def dmsg(*args, **kwargs):
17
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
17
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
18
        kwargs['min_level'] = 1
18
        kwargs['min_level'] = 1
19
19
20
    if not hasattr(kwargs, 'file'):
20
    if not hasattr(kwargs, 'file'):
21
        kwargs['file'] = stderr
21
        kwargs['file'] = stderr
22
22
23
    if debug_level >= kwargs['min_level']:
23
    if debug_level >= kwargs['min_level']:
24
        del kwargs['min_level']
24
        del kwargs['min_level']
25
        print(*args, **kwargs)
25
        print(*args, **kwargs)
26
26
27
def sort_dict_alnum_english_key(phrase):
27
def sort_dict_alnum_english_key(phrase):
28
    return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
28
    return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
29
29
30
class Dictionary(dict):
30
class Dictionary(dict):
31
    """
31
    """
32
    classdocs
32
    classdocs
33
   
33
   
34
    """
34
    """
-
 
35
    _language_key = 'en'
35
    _keys = "ipa|en|lit|pos|com|tag|ex"
36
    _keys = "ipa|en|lit|pos|com|tag|ex"
36
    _expressions = {}
37
    _expressions = {}
37
38
38
    def load (self, dictionary_file, language_key='en'):
39
    def load (self, dictionary_file, language_key='en'):
-
 
40
        self._language_key = language_key
-
 
41
39
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
42
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
40
43
41
        chdir(dirname(realpath(__file__)))
44
        chdir(dirname(realpath(__file__)))
42
45
43
        pickle_file = basename(dictionary_file) + '.pickle'
46
        pickle_file = basename(dictionary_file) + '.pickle'
44
47
45
        try:
48
        try:
46
            pickle_mtime = stat(pickle_file).st_mtime
49
            pickle_mtime = stat(pickle_file).st_mtime
47
        except FileNotFoundError:
50
        except FileNotFoundError:
48
            pickle_mtime = None
51
            pickle_mtime = None
49
52
50
        if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
53
        if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
51
            dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
54
            dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
52
            phrase = None
55
            phrase = None
53
            key = None
56
            key = None
54
            value = None
57
            value = None
55
            with open(dictionary_file) as f:
58
            with open(dictionary_file) as f:
56
                indent = None
59
                indent = None
57
60
58
                for line in f:
61
                for line in f:
59
                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
62
                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(self._language_key), line)
60
                    if m is not None:
63
                    if m is not None:
61
                        phrase = m.group("phrase")
64
                        phrase = m.group("phrase")
62
                        self[phrase] = {}
65
                        self[phrase] = {}
63
                        indent = None
66
                        indent = None
64
                    else:
67
                    else:
65
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
68
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
66
                        if m is not None:
69
                        if m is not None:
67
                            # join previous value if necessary
70
                            # join previous value if necessary
68
                            if type(value) == list:
71
                            if type(value) == list:
69
                                self[phrase][key] = ' '.join(value)
72
                                self[phrase][key] = ' '.join(value)
70
73
71
                            indent = m.group("indent")
74
                            indent = m.group("indent")
72
                            key = m.group("key")
75
                            key = m.group("key")
73
                            value = m.group("value")
76
                            value = m.group("value")
74
                            # assign a string for memory efficiency
77
                            # assign a string for memory efficiency
75
                            self[phrase][key] = value
78
                            self[phrase][key] = value
76
                        elif indent is not None:
79
                        elif indent is not None:
77
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
80
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
78
                            if m is not None:
81
                            if m is not None:
79
                                if len(m.group("indent")) == len(indent) + 2:
82
                                if len(m.group("indent")) == len(indent) + 2:
80
                                    continuation = m.group("continuation")
83
                                    continuation = m.group("continuation")
81
                                    if type(value) == str:
84
                                    if type(value) == str:
82
                                        # when a continuation is first found, convert to a list
85
                                        # when a continuation is first found, convert to a list
83
                                        # because there could be more continuations
86
                                        # because there could be more continuations
84
                                        value = self[phrase][key] = [value, continuation]
87
                                        value = self[phrase][key] = [value, continuation]
85
                                    else:
88
                                    else:
86
                                        value.append(continuation)
89
                                        value.append(continuation)
87
90
88
            # join last value if necessary
91
            # join last value if necessary
89
            if type(value) == list:
92
            if type(value) == list:
90
                self[phrase][key] = ' '.join(value)
93
                self[phrase][key] = ' '.join(value)
91
94
92
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
95
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
93
            # TODO: Pickle should only contain strings to be small
96
            # TODO: Pickle should only contain strings to be small
94
            with open(pickle_file, mode='wb') as f: dump(self, f)
97
            with open(pickle_file, mode='wb') as f: dump(self, f)
95
            dmsg(' done.', min_level=1)
98
            dmsg(' done.', min_level=1)
96
        else:
99
        else:
97
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
100
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
98
            with open(pickle_file, mode='rb') as f: pickle = load(f)
101
            with open(pickle_file, mode='rb') as f: pickle = load(f)
99
            for key, value in pickle.items():
102
            for key, value in pickle.items():
100
                self[key] = value
103
                self[key] = value
101
104
102
        dmsg(' done ({0} entries).'.format(len(self)), min_level=1)
105
        dmsg(' done ({0} entries).'.format(len(self)), min_level=1)
103
106
104
    def clean (self):
107
    def clean (self):
105
        parens_re = compile(r'\(.+\)', DOTALL)
108
        re_parens = compile(r'\(.+\)', DOTALL)
-
 
109
        re_parens_no_alt = compile(r'\(([^|]+)\)', DOTALL)
106
        braces_re = compile(
110
        re_braces = compile(
107
            r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$',
111
            r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$',
108
            DOTALL)
112
            DOTALL)
109
        semicolon_re = compile(r'\s*;\s*')
113
        re_semicolon = compile(r'\s*;\s*')
110
114
111
        for orig_phrase, data in list(self.items()):
115
        for orig_phrase, data in list(self.items()):
112
            # if there are optional or alternating parts
116
            # if there are optional or alternating parts
113
            if search(parens_re, orig_phrase):
117
            if search(re_parens, orig_phrase):
114
                if orig_phrase.find('|') > -1:
118
                if orig_phrase.find('|') > -1:
115
                    # TODO alternation
119
                    # TODO alternation
116
                    pass
120
                    pass
117
                else:
121
                else:
118
                    # TODO optional parts
122
                    # TODO optional parts
119
                    pass
123
                    pass
120
124
121
            if orig_phrase.find(';') > -1:
125
            if orig_phrase.find(';') > -1:
122
                synonyms = map(
126
                synonyms = map(
123
                    lambda x: sub(braces_re, r'\1', x),
127
                    lambda x: sub(re_braces, r'\1', x),
124
                    split(semicolon_re, orig_phrase))
128
                    split(re_semicolon, orig_phrase))
125
129
126
                for synonym in synonyms:
130
                for synonym in synonyms:
127
                    self[synonym] = data
131
                    self[synonym] = data
128
132
129
                del self[orig_phrase]
133
                del self[orig_phrase]
130
            else:
134
            else:
131
                m = match(braces_re, orig_phrase)
135
                m = match(re_braces, orig_phrase)
132
                if m is not None:
136
                if m is not None:
133
                    phrase = m.group("phrase")
137
                    phrase = m.group("phrase")
134
                    m2 = match(parens_re, phrase)
138
                    m_parens = search(re_parens, phrase)
135
                    if m2 is not None:
139
                    if m_parens is not None:
136
                        # TODO alternation and optional parts
140
                        # alternation and optional parts
-
 
141
                        expr = sub(re_parens_no_alt, r'(?:\1)?', phrase)
-
 
142
                        expr = sub('~', '(?=.)', expr)
-
 
143
                        self._expressions[expr] = data
137
                        pass
144
                    else:
138
-
 
-
 
145
                        # remove braces
139
                    self[phrase] = data
146
                        self[phrase] = data
-
 
147
140
                    del self[orig_phrase]
148
                    del self[orig_phrase]
-
 
149
-
 
150
    def translate (self, phrase):
-
 
151
        translation = self.get(phrase.lower(), None)
-
 
152
        if translation is not None:
-
 
153
            translation[self._language_key] = phrase
-
 
154
            return translation
-
 
155
-
 
156
        return None
-
 
157
-
 
158
    def translate_expression (self, phrase):
-
 
159
        for expression, data in list(self._expressions.items()):
-
 
160
            expression_match = match(expression, phrase)
-
 
161
            if expression_match is not None:
-
 
162
                data[self._language_key] = expression_match.group(0)
-
 
163
                return data
-
 
164
-
 
165
        return None
141
 
166