Subversion Repositories LCARS

Rev

Rev 296 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 296 Rev 297
1
#!/usr/bin/env python3
1
#!/usr/bin/env python3
2
2
3
'''
3
'''
4
Created on 2014-10-20
4
Created on 2014-10-20
5

5

6
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
6
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
7
'''
7
'''
8
from sys import argv, stderr
8
from sys import argv, stderr
9
from re import findall, DOTALL, match, sub, compile, \
9
from re import findall, DOTALL, match, sub, compile, \
10
    escape, search
10
    escape, search
11
from os.path import basename
11
from os.path import basename
12
from functools import cmp_to_key
12
from functools import cmp_to_key
13
from Dictionary import Dictionary, dmsg, \
13
from Dictionary import Dictionary, dmsg, \
14
    sort_dict_alnum_english_key
14
    sort_dict_alnum_english_key
15
15
16
dictionary = {}
16
dictionary = {}
17
17
18
prepositions = {
18
prepositions = {
19
    "fi'": 'on',
19
    "fi'": 'on',
20
    "na'": 'at|to',
20
    "na'": 'at|to',
21
    "t'": 'of'
21
    "t'": 'of'
22
}
22
}
23
23
24
def cli_help():
24
def cli_help():
25
    print('Usage: {0} TEXT...'.format(basename(argv[0])))
25
    print('Usage: {0} TEXT...'.format(basename(argv[0])))
26
26
27
def get_sort_dict_alnum_vulcan_key ():
27
def get_sort_dict_alnum_vulcan_key ():
28
    letters = list(map(str.lower, [
28
    letters = list(map(str.lower, [
29
        " ", 'S', 'T', 'P', 'K', 'R', 'L', 'A', 'Sh', 'O', 'U', 'D',
29
        " ", 'S', 'T', 'P', 'K', 'R', 'L', 'A', 'Sh', 'O', 'U', 'D',
30
        'V', 'Kh', 'E', 'H', 'G', 'Ch', 'I', 'N', 'Zh', 'M', 'Y', 'F', 'Z',
30
        'V', 'Kh', 'E', 'H', 'G', 'Ch', 'I', 'N', 'Zh', 'M', 'Y', 'F', 'Z',
31
        'Th', 'W', 'B', "'", '-']))
31
        'Th', 'W', 'B', "'", '-']))
32
    letter_values = dict(map(lambda x: (x[1], x[0]), enumerate(letters)))
32
    letter_values = dict(map(lambda x: (x[1], x[0]), enumerate(letters)))
33
    letters_re = compile(r'(?:{0})'.format('|'.join(sorted(letters, key=lambda char:-len(char)))))
33
    letters_re = compile(r'(?:{0})'.format('|'.join(sorted(letters, key=lambda char:-len(char)))))
34
34
35
    def sort_dict_alnum_vulcan (a, b):
35
    def sort_dict_alnum_vulcan (a, b):
36
        # split into Vulcan letters
36
        # split into Vulcan letters
37
        a = findall(letters_re, sort_dict_alnum_english_key(a))
37
        a = findall(letters_re, sort_dict_alnum_english_key(a))
38
        b = findall(letters_re, sort_dict_alnum_english_key(b))
38
        b = findall(letters_re, sort_dict_alnum_english_key(b))
39
39
40
        if len(a) < len(b):
40
        if len(a) < len(b):
41
            for index, char in enumerate(a):
41
            for index, char in enumerate(a):
42
                diff = letter_values[char] - letter_values[b[index]]
42
                diff = letter_values[char] - letter_values[b[index]]
43
                if diff != 0:
43
                if diff != 0:
44
                    return diff
44
                    return diff
45
            return -1
45
            return -1
46
46
47
        # len(b) <= len(a)
47
        # len(b) <= len(a)
48
        for index, char in enumerate(b):
48
        for index, char in enumerate(b):
49
            diff = letter_values[a[index]] - letter_values[char]
49
            diff = letter_values[a[index]] - letter_values[char]
50
            if diff != 0:
50
            if diff != 0:
51
                return diff
51
                return diff
52
52
53
        return 1 if len(b) < len(a) else 0
53
        return 1 if len(b) < len(a) else 0
54
54
55
    return cmp_to_key(sort_dict_alnum_vulcan)
55
    return cmp_to_key(sort_dict_alnum_vulcan)
56
56
57
class VulcanDictionary (Dictionary):
57
class VulcanDictionary (Dictionary):
-
 
58
    """
-
 
59
   
-
 
60
    """
58
    def translate (self, phrase, search_prefix=True, search_plural=True):
61
    def translate (self, phrase, search_prefix=True, search_plural=True):
59
        dictionary = self
62
        """
60
63
       
-
 
64
        :param phrase:
-
 
65
        :type phrase:
-
 
66
        :param search_prefix:
-
 
67
        :type search_prefix:
-
 
68
        :param search_plural:
-
 
69
        :type search_plural:
-
 
70
        """
61
        translation = super().translate(phrase)
71
        translation = super().translate(phrase)
62
        if translation is not None:
72
        if translation is not None:
63
            return translation
73
            return translation
64
        else:
74
        else:
65
            expr_translation = dictionary.translate_expression(phrase)
75
            expr_translation = self.translate_expression(phrase)
66
            if expr_translation is not None:
76
            if expr_translation is not None:
67
                return expr_translation
77
                return expr_translation
68
78
69
            if search_prefix:
79
            if search_prefix:
70
                # find prefix
80
                # find prefix
71
                for preposition in prepositions:
81
                for preposition in prepositions:
72
                    prefix = match(escape(preposition), phrase)
82
                    prefix = match(escape(preposition), phrase)
73
                    if prefix is not None:
83
                    if prefix is not None:
74
                        prefix_translation = self.translate(prefix.group(0))
84
                        prefix_translation = self.translate(prefix.group(0))
75
                        if prefix_translation is not None:
85
                        if prefix_translation is not None:
76
                            tail = sub(preposition, '', phrase)
86
                            tail = sub(preposition, '', phrase)
77
                            tail_translation = self.translate(tail, search_prefix=False)
87
                            tail_translation = self.translate(tail, search_prefix=False)
78
                            if tail_translation is not None:
88
                            if tail_translation is not None:
79
                                return [prefix_translation, tail_translation]
89
                                return [prefix_translation, tail_translation]
80
            elif search_plural:
90
            elif search_plural:
81
                # find plural
91
                # find plural
82
                suffix = search(r'lar$', phrase)
92
                suffix = search(r'lar$', phrase)
83
                if suffix is not None:
93
                if suffix is not None:
84
                    head = sub(r'lar$', '', phrase)
94
                    head = sub(r'lar$', '', phrase)
85
                    head_translation = self.translate(head, search_prefix=False, search_plural=False)
95
                    head_translation = self.translate(head, search_prefix=False, search_plural=False)
86
                    if head_translation is not None:
96
                    if head_translation is not None:
87
                        head_translation = dict(head_translation)
97
                        head_translation = dict(head_translation)
88
                        head_translation['en'] += ' (pl.)'
98
                        head_translation['en'] += ' (pl.)'
89
                        return head_translation
99
                        return head_translation
90
100
91
        return None
101
        return None
92
102
-
 
103
    def clean_entry(self, phrase):
-
 
104
        """
-
 
105
        Replace GV Media Script parens with FSE parens
-
 
106
        :param phrase:
-
 
107
        :type phrase:
-
 
108
        """
-
 
109
        return sub(
-
 
110
            r'(\([^)]*\))|\|([^|)]+)\|',
-
 
111
            lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
-
 
112
            phrase)
-
 
113
93
if __name__ == '__main__':
114
if __name__ == '__main__':
94
    if len(argv) < 2:
115
    if len(argv) < 2:
95
        print('Nothing to translate.', end='\n\n', file=stderr)
116
        print('Nothing to translate.', end='\n\n', file=stderr)
96
        cli_help()
117
        cli_help()
97
        exit(1)
118
        exit(1)
98
119
99
    text = ' '.join(argv[1:])
120
    text = ' '.join(argv[1:])
100
121
101
    dictionary = VulcanDictionary(dictionary)
122
    dictionary = VulcanDictionary(dictionary)
102
    dictionary.load('vuh-gol-en.dict.zdb.txt', 'vuh')
123
    dictionary.load('vuh-gol-en.dict.zdb.txt', 'vuh')
103
    dictionary.clean()
124
    dictionary.clean()
104
125
105
#     try:
126
#     try:
106
#         for phrase, data in OrderedDict(sorted(
127
#         for phrase, data in OrderedDict(sorted(
107
#             dictionary.items(),
128
#             dictionary.items(),
108
#             key=get_sort_dict_alnum_vulcan_key()
129
#             key=get_sort_dict_alnum_vulcan_key()
109
#         )).items():
130
#         )).items():
110
#             print(phrase, "=", data)
131
#             print(phrase, "=", data)
111
#     except BrokenPipeError:
132
#     except BrokenPipeError:
112
#         pass
133
#         pass
113
134
114
    dmsg("text:", text, min_level=2)
135
    dmsg("text:", text, min_level=2)
115
    sentences = findall(r'(?!\s+)(?:.+?\.{1,3}|.+$)', text, DOTALL)
136
    sentences = findall(r'(?!\s+)(?:.+?\.{1,3}|.+$)', text, DOTALL)
116
    dmsg("sentences:", sentences, min_level=2)
137
    dmsg("sentences:", sentences, min_level=2)
117
    for sentence in sentences:
138
    for sentence in sentences:
118
        dmsg("sentence:", sentence, min_level=2)
139
        dmsg("sentence:", sentence, min_level=2)
119
140
120
        clauses = findall(r'(?!\s+)(?:.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|.+$))', sentence, DOTALL)
141
        clauses = findall(r'(?!\s+)(?:.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|.+$))', sentence, DOTALL)
121
        dmsg("clauses:", clauses, min_level=2)
142
        dmsg("clauses:", clauses, min_level=2)
122
        for clause in clauses:
143
        for clause in clauses:
123
            dmsg("clause:", clause, min_level=2)
144
            dmsg("clause:", clause, min_level=2)
124
145
125
            words = findall(r'[^\s.]+', clause)
146
            words = findall(r'[^\s.]+', clause)
126
            dmsg("words:", words, min_level=2)
147
            dmsg("words:", words, min_level=2)
127
148
128
            offset = 0
149
            offset = 0
129
            while offset < len(words):
150
            while offset < len(words):
130
                translation = None
151
                translation = None
131
152
132
                for i in range(len(words), offset, -1):
153
                for i in range(len(words), offset, -1):
133
                    dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
154
                    dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
134
                    phrase = ' '.join(words[offset:i])
155
                    phrase = ' '.join(words[offset:i])
135
156
136
                    dmsg("phrase:", phrase, min_level=2)
157
                    dmsg("phrase:", phrase, min_level=2)
137
158
138
                    translation = dictionary.translate(phrase)
159
                    translation = dictionary.translate(phrase)
139
160
140
                    if translation is not None:
161
                    if translation is not None:
141
                        dmsg("phrase-translation:", translation, min_level=2)
162
                        dmsg("phrase-translation:", translation, min_level=2)
142
                        dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
163
                        dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
143
                        words[offset:i] = [translation]
164
                        words[offset:i] = [translation]
144
                        offset += i - offset
165
                        offset += i - offset
145
                        break
166
                        break
146
167
147
                if translation is None:
168
                if translation is None:
148
                    dmsg("phrase-translation:", translation, min_level=2)
169
                    dmsg("phrase-translation:", translation, min_level=2)
149
                    offset += 1
170
                    offset += 1
150
171
151
            dmsg("words-translation:", words, min_level=2)
172
            dmsg("words-translation:", words, min_level=2)
152
            dmsg("words-translation-reduced:",
173
            dmsg("words-translation-reduced:",
153
                list(map(
174
                list(map(
154
                    lambda word:
175
                    lambda word:
155
                        word['en']
176
                        word['en']
156
                        if (hasattr(word, "get") and word.get('en', None) is not None)
177
                        if (hasattr(word, "get") and word.get('en', None) is not None)
157
                        else word,
178
                        else word,
158
                    words)),
179
                    words)),
159
                min_level=2)
180
                min_level=2)
160
            # dmsg(dictionary._expressions)
181
#             for key, value in dictionary._expressions.items():
-
 
182
#                 dmsg(key, value, min_level=3)
161
 
183