Subversion Repositories LCARS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
300 PointedEar 1
"""
2
Created on 15.01.2015
3
 
4
@author: pelinux
5
 
6
"""
7
from Dictionary import Dictionary, dmsg
8
from re import escape, match, sub, search, findall, DOTALL
9
 
10
prepositions = {
11
    "fi'": 'on',
12
    "na'": 'at|to',
13
    "t'": 'of'
14
}
15
 
16
class VulcanDictionary (Dictionary):
17
    """
18
 
19
    """
20
    def translate (self, phrase, search_prefix=True, search_plural=True):
21
        """
22
 
23
        :param phrase:
24
        :type phrase:
25
        :param search_prefix:
26
        :type search_prefix:
27
        :param search_plural:
28
        :type search_plural:
29
        """
30
        translation = super().translate(phrase)
31
        if translation is not None:
32
            return translation
33
        else:
34
            expr_translation = self.translate_expression(phrase)
35
            if expr_translation is not None:
36
                return expr_translation
37
 
38
            if search_prefix:
39
                # find prefix
40
                for preposition in prepositions:
41
                    prefix = match(escape(preposition), phrase)
42
                    if prefix is not None:
43
                        prefix_translation = self.translate(prefix.group(0))
44
                        if prefix_translation is not None:
45
                            tail = sub(preposition, '', phrase)
46
                            tail_translation = self.translate(tail, search_prefix=False)
47
                            if tail_translation is not None:
48
                                return [prefix_translation, tail_translation]
49
            elif search_plural:
50
                # find plural
51
                suffix = search(r'lar$', phrase)
52
                if suffix is not None:
53
                    head = sub(r'lar$', '', phrase)
54
                    head_translation = self.translate(head, search_prefix=False, search_plural=False)
55
                    if head_translation is not None:
56
                        head_translation = dict(head_translation)
57
                        head_translation['en'] += ' (pl.)'
58
                        return head_translation
59
 
60
        return None
61
 
62
    def clean_entry(self, phrase):
63
        """
64
        Replace GV Media Script parens with FSE parens
65
        :param phrase:
66
        :type phrase:
67
        """
68
        return sub(
69
            r'(\([^)]*\))|\|([^|)]+)\|',
70
            lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
71
            phrase)
72
 
73
class Clause (object):
74
    pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))'
75
 
76
    def __init__ (self, text):
77
        self._text = text
78
        self._phrases = []
79
 
80
    def __str__(self):
81
        return self._text
82
 
83
    def translate (self, dictionary):
84
        # Find phrases by attempts to translate
85
        words = findall(r'[^\s.]+', self._text)
86
 
87
        dmsg("words:", words, min_level=2)
88
 
89
        offset = 0
90
        while offset < len(words):
91
            translation = None
92
 
93
            for i in range(len(words), offset, -1):
94
                dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
95
 
96
                phrase = ' '.join(words[offset:i])
97
 
98
                dmsg("phrase:", phrase, min_level=2)
99
 
100
                translation = dictionary.translate(phrase)
101
 
102
                if translation is not None:
103
                    dmsg("phrase-translation:", translation, min_level=2)
104
                    dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
105
 
106
                    self._phrases.append(translation)
107
                    offset += i - offset
108
                    break
109
 
110
            if translation is None:
111
                dmsg("phrase-translation:", translation, min_level=2)
112
                self._phrases.append(phrase)
113
                offset += 1
114
 
115
        dmsg("phrases:", self._phrases, min_level=2)
116
 
117
class Sentence (object):
118
    pattern = '(?!\s+)(.+?\.{1,3}|.+$)'
119
 
120
    def __init__ (self, text):
121
        self._text = text
122
        self.find_clauses(text)
123
 
124
    def __str__(self):
125
        return self._text
126
 
127
    def find_clauses (self, text):
128
        self._clauses = list(map(
129
            lambda clause_text: Clause(clause_text),
130
            findall(Clause.pattern, text, DOTALL)))
131
 
132
    def translate (self, dictionary):
133
        for clause in self._clauses:
134
            clause.translate(dictionary)
135
 
136
class Paragraph (object):
137
    pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)'
138
 
139
    def __init__ (self, text):
140
        self._text = text
141
        self.find_sentences(text)
142
 
143
    def __str__ (self):
144
        return self._text
145
 
146
    def find_sentences (self, text):
147
        sentences = findall(Sentence.pattern, text, DOTALL)
148
        self._sentences = list(map(
149
            lambda sentence_text: Sentence(sentence_text),
150
            sentences))
151
 
152
    def translate (self, dictionary):
153
        for sentence in self._sentences:
154
            sentence.translate(dictionary)
155
 
156
class Text (object):
157
    def __init__ (self, text):
158
        self._text = text
159
        self.find_paragraphs(text)
160
 
161
    def __repr__ (self):
162
        return ''.join(map(lambda p: p.__repr__(), self._paragraphs))
163
 
164
    def __str__ (self):
165
        return self._text
166
 
167
    def find_paragraphs (self, text):
168
        self._paragraphs = list(map(
169
            lambda paragraph_text: Paragraph(paragraph_text),
170
            findall(Paragraph.pattern, text, DOTALL)))
171
 
172
    def translate (self, dictionary):
173
        for paragraph in self._paragraphs:
174
            paragraph.translate(dictionary)