Rev 293 | Rev 295 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 293 | Rev 294 | ||
---|---|---|---|
1 | """
|
1 | """
|
2 | Created on 2014-10-20
|
2 | Created on 2014-10-20
|
3 | 3 | ||
4 | @author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
|
4 | @author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
|
5 | 5 | ||
6 | """
|
6 | """
|
7 | 7 | ||
8 | from os import chdir, stat |
8 | from os import chdir, stat |
9 | from sys import stderr |
9 | from sys import stderr |
10 | from os.path import dirname, realpath, basename |
10 | from os.path import dirname, realpath, basename |
11 | from pickle import dump, load |
11 | from pickle import dump, load |
12 | from re import match, DOTALL, search, sub, split, compile |
12 | from re import match, DOTALL, search, sub, split, compile |
13 | from copy import deepcopy |
13 | from copy import deepcopy |
14 | 14 | ||
15 | debug_level = 2 |
15 | debug_level = 2 |
16 | 16 | ||
17 | def dmsg(*args, **kwargs): |
17 | def dmsg(*args, **kwargs): |
18 | if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None: |
18 | if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None: |
19 | kwargs['min_level'] = 1 |
19 | kwargs['min_level'] = 1 |
20 | 20 | ||
21 | if not hasattr(kwargs, 'file'): |
21 | if not hasattr(kwargs, 'file'): |
22 | kwargs['file'] = stderr |
22 | kwargs['file'] = stderr |
23 | 23 | ||
24 | if debug_level >= kwargs['min_level']: |
24 | if debug_level >= kwargs['min_level']: |
25 | del kwargs['min_level'] |
25 | del kwargs['min_level'] |
26 | print(*args, **kwargs) |
26 | print(*args, **kwargs) |
27 | 27 | ||
28 | def sort_dict_alnum_english_key(phrase): |
28 | def sort_dict_alnum_english_key(phrase): |
29 | return sub(r'\{(.+)\}', r'\1', phrase[0]).lower() |
29 | return sub(r'\{(.+)\}', r'\1', phrase[0]).lower() |
30 | 30 | ||
31 | class Dictionary(dict): |
31 | class Dictionary(dict): |
32 | """
|
32 | """
|
33 | classdocs
|
33 | classdocs
|
34 |
|
34 |
|
35 | """
|
35 | """
|
36 | _keys = "ipa|en|lit|pos|com|tag|ex" |
36 | _keys = "ipa|en|lit|pos|com|tag|ex" |
37 | _expressions = {} |
37 | _expressions = {} |
38 | 38 | ||
39 | def load (self, dictionary_file): |
39 | def load (self, dictionary_file, language_key='en'): |
40 | dictionary = self |
40 | dictionary = self |
41 | 41 | ||
42 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
42 | dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
43 | 43 | ||
44 | chdir(dirname(realpath(__file__))) |
44 | chdir(dirname(realpath(__file__))) |
45 | 45 | ||
46 | pickle_file = basename(dictionary_file) + '.pickle' |
46 | pickle_file = basename(dictionary_file) + '.pickle' |
47 | 47 | ||
48 | try:
|
48 | try:
|
49 | pickle_mtime = stat(pickle_file).st_mtime |
49 | pickle_mtime = stat(pickle_file).st_mtime |
50 | except FileNotFoundError:
|
50 | except FileNotFoundError:
|
51 | pickle_mtime = None |
51 | pickle_mtime = None |
52 | 52 | ||
53 | if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime: |
53 | if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime: |
54 | dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1) |
54 | dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1) |
55 | phrase = None |
55 | phrase = None |
56 | key = None |
56 | key = None |
57 | value = None |
57 | value = None |
58 | with open(dictionary_file) as f: |
58 | with open(dictionary_file) as f: |
59 | indent = None |
59 | indent = None |
60 | 60 | ||
61 | for line in f: |
61 | for line in f: |
62 | m = match(r'^\s*vuh:\s*(?P<phrase>.+)', line) |
62 | m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line) |
63 | if m is not None: |
63 | if m is not None: |
64 | phrase = m.group("phrase") |
64 | phrase = m.group("phrase") |
65 | dictionary[phrase] = {} |
65 | dictionary[phrase] = {} |
66 | indent = None |
66 | indent = None |
67 | else:
|
67 | else:
|
68 | m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line) |
68 | m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line) |
69 | if m is not None: |
69 | if m is not None: |
70 | # join previous value if necessary
|
70 | # join previous value if necessary
|
71 | if type(value) == list: |
71 | if type(value) == list: |
72 | dictionary[phrase][key] = ' '.join(value) |
72 | dictionary[phrase][key] = ' '.join(value) |
73 | 73 | ||
74 | indent = m.group("indent") |
74 | indent = m.group("indent") |
75 | key = m.group("key") |
75 | key = m.group("key") |
76 | value = m.group("value") |
76 | value = m.group("value") |
77 | # assign a string for memory efficiency
|
77 | # assign a string for memory efficiency
|
78 | dictionary[phrase][key] = value |
78 | dictionary[phrase][key] = value |
79 | elif indent is not None: |
79 | elif indent is not None: |
80 | m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line) |
80 | m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line) |
81 | if m is not None: |
81 | if m is not None: |
82 | if len(m.group("indent")) == len(indent) + 2: |
82 | if len(m.group("indent")) == len(indent) + 2: |
83 | continuation = m.group("continuation") |
83 | continuation = m.group("continuation") |
84 | if type(value) == str: |
84 | if type(value) == str: |
85 | # when a continuation is first found, convert to a list
|
85 | # when a continuation is first found, convert to a list
|
86 | # because there could be more continuations
|
86 | # because there could be more continuations
|
87 | value = dictionary[phrase][key] = [value, continuation] |
87 | value = dictionary[phrase][key] = [value, continuation] |
88 | else:
|
88 | else:
|
89 | value.append(continuation) |
89 | value.append(continuation) |
90 | 90 | ||
91 | # join last value if necessary
|
91 | # join last value if necessary
|
92 | if type(value) == list: |
92 | if type(value) == list: |
93 | dictionary[phrase][key] = ' '.join(value) |
93 | dictionary[phrase][key] = ' '.join(value) |
94 | 94 | ||
95 | dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1) |
95 | dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1) |
96 | # TODO: Pickle should only contain strings to be small
|
96 | # TODO: Pickle should only contain strings to be small
|
97 | with open(pickle_file, mode='wb') as f: dump(dictionary, f) |
97 | with open(pickle_file, mode='wb') as f: dump(dictionary, f) |
98 | dmsg(' done.', min_level=1) |
98 | dmsg(' done.', min_level=1) |
99 | else:
|
99 | else:
|
100 | dmsg('from {0} ...'.format(pickle_file), end='', min_level=1) |
100 | dmsg('from {0} ...'.format(pickle_file), end='', min_level=1) |
101 | with open(pickle_file, mode='rb') as f: pickle = load(f) |
101 | with open(pickle_file, mode='rb') as f: pickle = load(f) |
102 | for key, value in pickle.items(): |
102 | for key, value in pickle.items(): |
103 | dictionary[key] = value |
103 | dictionary[key] = value |
104 | 104 | ||
105 | dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1) |
105 | dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1) |
106 | 106 | ||
107 | def clean (self): |
107 | def clean (self): |
108 | dictionary = self |
108 | dictionary = self |
109 | 109 | ||
110 | parens_re = compile(r'\(.+\)', DOTALL) |
110 | parens_re = compile(r'\(.+\)', DOTALL) |
111 | braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL) |
111 | braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL) |
112 | semicolon_re = compile(r'\s*;\s*') |
112 | semicolon_re = compile(r'\s*;\s*') |
113 | 113 | ||
114 | for orig_phrase, data in list(dictionary.items()): |
114 | for orig_phrase, data in list(dictionary.items()): |
115 | # if there are optional or alternating parts
|
115 | # if there are optional or alternating parts
|
116 | if search(parens_re, orig_phrase): |
116 | if search(parens_re, orig_phrase): |
117 | if orig_phrase.find('|') > -1: |
117 | if orig_phrase.find('|') > -1: |
118 | # TODO alternation
|
118 | # TODO alternation
|
119 | pass
|
119 | pass
|
120 | else:
|
120 | else:
|
121 | # TODO optional parts
|
121 | # TODO optional parts
|
122 | pass
|
122 | pass
|
123 | 123 | ||
124 | if orig_phrase.find(';') > -1: |
124 | if orig_phrase.find(';') > -1: |
125 | synonyms = map( |
125 | synonyms = map( |
126 | lambda x: sub(braces_re, r'\1', x), |
126 | lambda x: sub(braces_re, r'\1', x), |
127 | split(semicolon_re, orig_phrase)) |
127 | split(semicolon_re, orig_phrase)) |
128 | 128 | ||
129 | for synonym in synonyms: |
129 | for synonym in synonyms: |
130 | dictionary[synonym] = deepcopy(data) |
130 | dictionary[synonym] = deepcopy(data) |
131 | 131 | ||
132 | del dictionary[orig_phrase] |
132 | del dictionary[orig_phrase] |
133 | else:
|
133 | else:
|
134 | m = match(braces_re, orig_phrase) |
134 | m = match(braces_re, orig_phrase) |
135 | if m is not None: |
135 | if m is not None: |
136 | dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase]) |
136 | dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase]) |
137 | del dictionary[orig_phrase] |
137 | del dictionary[orig_phrase] |
138 | 138 |