Subversion Repositories LCARS

Rev

Rev 294 | Rev 296 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 294 Rev 295
Line 8... Line 8...
8
from os import chdir, stat
8
from os import chdir, stat
9
from sys import stderr
9
from sys import stderr
10
from os.path import dirname, realpath, basename
10
from os.path import dirname, realpath, basename
11
from pickle import dump, load
11
from pickle import dump, load
12
from re import match, DOTALL, search, sub, split, compile
12
from re import match, DOTALL, search, sub, split, compile
13
from copy import deepcopy
-
 
14
13
15
debug_level = 2
14
debug_level = 2
16
15
17
def dmsg(*args, **kwargs):
16
def dmsg(*args, **kwargs):
18
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
17
    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
Line 35... Line 34...
35
    """
34
    """
36
    _keys = "ipa|en|lit|pos|com|tag|ex"
35
    _keys = "ipa|en|lit|pos|com|tag|ex"
37
    _expressions = {}
36
    _expressions = {}
38
37
39
    def load (self, dictionary_file, language_key='en'):
38
    def load (self, dictionary_file, language_key='en'):
40
        dictionary = self
-
 
41
-
 
42
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
39
        dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
43
40
44
        chdir(dirname(realpath(__file__)))
41
        chdir(dirname(realpath(__file__)))
45
42
46
        pickle_file = basename(dictionary_file) + '.pickle'
43
        pickle_file = basename(dictionary_file) + '.pickle'
Line 60... Line 57...
60
57
61
                for line in f:
58
                for line in f:
62
                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
59
                    m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
63
                    if m is not None:
60
                    if m is not None:
64
                        phrase = m.group("phrase")
61
                        phrase = m.group("phrase")
65
                        dictionary[phrase] = {}
62
                        self[phrase] = {}
66
                        indent = None
63
                        indent = None
67
                    else:
64
                    else:
68
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
65
                        m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
69
                        if m is not None:
66
                        if m is not None:
70
                            # join previous value if necessary
67
                            # join previous value if necessary
71
                            if type(value) == list:
68
                            if type(value) == list:
72
                                dictionary[phrase][key] = ' '.join(value)
69
                                self[phrase][key] = ' '.join(value)
73
70
74
                            indent = m.group("indent")
71
                            indent = m.group("indent")
75
                            key = m.group("key")
72
                            key = m.group("key")
76
                            value = m.group("value")
73
                            value = m.group("value")
77
                            # assign a string for memory efficiency
74
                            # assign a string for memory efficiency
78
                            dictionary[phrase][key] = value
75
                            self[phrase][key] = value
79
                        elif indent is not None:
76
                        elif indent is not None:
80
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
77
                            m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
81
                            if m is not None:
78
                            if m is not None:
82
                                if len(m.group("indent")) == len(indent) + 2:
79
                                if len(m.group("indent")) == len(indent) + 2:
83
                                    continuation = m.group("continuation")
80
                                    continuation = m.group("continuation")
84
                                    if type(value) == str:
81
                                    if type(value) == str:
85
                                        # when a continuation is first found, convert to a list
82
                                        # when a continuation is first found, convert to a list
86
                                        # because there could be more continuations
83
                                        # because there could be more continuations
87
                                        value = dictionary[phrase][key] = [value, continuation]
84
                                        value = self[phrase][key] = [value, continuation]
88
                                    else:
85
                                    else:
89
                                        value.append(continuation)
86
                                        value.append(continuation)
90
87
91
            # join last value if necessary
88
            # join last value if necessary
92
            if type(value) == list:
89
            if type(value) == list:
93
                dictionary[phrase][key] = ' '.join(value)
90
                self[phrase][key] = ' '.join(value)
94
91
95
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
92
            dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
96
            # TODO: Pickle should only contain strings to be small
93
            # TODO: Pickle should only contain strings to be small
97
            with open(pickle_file, mode='wb') as f: dump(dictionary, f)
94
            with open(pickle_file, mode='wb') as f: dump(self, f)
98
            dmsg(' done.', min_level=1)
95
            dmsg(' done.', min_level=1)
99
        else:
96
        else:
100
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
97
            dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
101
            with open(pickle_file, mode='rb') as f: pickle = load(f)
98
            with open(pickle_file, mode='rb') as f: pickle = load(f)
102
            for key, value in pickle.items():
99
            for key, value in pickle.items():
103
                dictionary[key] = value
100
                self[key] = value
104
101
105
        dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1)
102
        dmsg(' done ({0} entries).'.format(len(self)), min_level=1)
106
103
107
    def clean (self):
104
    def clean (self):
108
        dictionary = self
-
 
109
-
 
110
        parens_re = compile(r'\(.+\)', DOTALL)
105
        parens_re = compile(r'\(.+\)', DOTALL)
111
        braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL)
106
        braces_re = compile(
-
 
107
            r'^\s*\{(?P<phrase>.+)\}(?:\s*\((?P<variant>.+?)\))?\s*$',
-
 
108
            DOTALL)
112
        semicolon_re = compile(r'\s*;\s*')
109
        semicolon_re = compile(r'\s*;\s*')
113
110
114
        for orig_phrase, data in list(dictionary.items()):
111
        for orig_phrase, data in list(self.items()):
115
            # if there are optional or alternating parts
112
            # if there are optional or alternating parts
116
            if search(parens_re, orig_phrase):
113
            if search(parens_re, orig_phrase):
117
                if orig_phrase.find('|') > -1:
114
                if orig_phrase.find('|') > -1:
118
                    # TODO alternation
115
                    # TODO alternation
119
                    pass
116
                    pass
Line 125... Line 122...
125
                synonyms = map(
122
                synonyms = map(
126
                    lambda x: sub(braces_re, r'\1', x),
123
                    lambda x: sub(braces_re, r'\1', x),
127
                    split(semicolon_re, orig_phrase))
124
                    split(semicolon_re, orig_phrase))
128
125
129
                for synonym in synonyms:
126
                for synonym in synonyms:
130
                    dictionary[synonym] = deepcopy(data)
127
                    self[synonym] = data
131
128
132
                del dictionary[orig_phrase]
129
                del self[orig_phrase]
133
            else:
130
            else:
134
                m = match(braces_re, orig_phrase)
131
                m = match(braces_re, orig_phrase)
135
                if m is not None:
132
                if m is not None:
-
 
133
                    phrase = m.group("phrase")
-
 
134
                    m2 = match(parens_re, phrase)
-
 
135
                    if m2 is not None:
136
                    dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase])
136
                        # TODO alternation and optional parts
-
 
137
                        pass
-
 
138
-
 
139
                    self[phrase] = data
137
                    del dictionary[orig_phrase]
140
                    del self[orig_phrase]