Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
300 | PointedEar | 1 | """ |
2 | Created on 15.01.2015 |
||
3 | |||
4 | @author: pelinux |
||
5 | |||
6 | """ |
||
7 | from Dictionary import Dictionary, dmsg |
||
8 | from re import escape, match, sub, search, findall, DOTALL |
||
9 | |||
10 | prepositions = { |
||
11 | "fi'": 'on', |
||
12 | "na'": 'at|to', |
||
13 | "t'": 'of' |
||
14 | } |
||
15 | |||
16 | class VulcanDictionary (Dictionary): |
||
17 | """ |
||
18 | |||
19 | """ |
||
20 | def translate (self, phrase, search_prefix=True, search_plural=True): |
||
21 | """ |
||
22 | |||
23 | :param phrase: |
||
24 | :type phrase: |
||
25 | :param search_prefix: |
||
26 | :type search_prefix: |
||
27 | :param search_plural: |
||
28 | :type search_plural: |
||
29 | """ |
||
30 | translation = super().translate(phrase) |
||
31 | if translation is not None: |
||
32 | return translation |
||
33 | else: |
||
34 | expr_translation = self.translate_expression(phrase) |
||
35 | if expr_translation is not None: |
||
36 | return expr_translation |
||
37 | |||
38 | if search_prefix: |
||
39 | # find prefix |
||
40 | for preposition in prepositions: |
||
41 | prefix = match(escape(preposition), phrase) |
||
42 | if prefix is not None: |
||
43 | prefix_translation = self.translate(prefix.group(0)) |
||
44 | if prefix_translation is not None: |
||
45 | tail = sub(preposition, '', phrase) |
||
46 | tail_translation = self.translate(tail, search_prefix=False) |
||
47 | if tail_translation is not None: |
||
48 | return [prefix_translation, tail_translation] |
||
49 | elif search_plural: |
||
50 | # find plural |
||
51 | suffix = search(r'lar$', phrase) |
||
52 | if suffix is not None: |
||
53 | head = sub(r'lar$', '', phrase) |
||
54 | head_translation = self.translate(head, search_prefix=False, search_plural=False) |
||
55 | if head_translation is not None: |
||
56 | head_translation = dict(head_translation) |
||
57 | head_translation['en'] += ' (pl.)' |
||
58 | return head_translation |
||
59 | |||
60 | return None |
||
61 | |||
62 | def clean_entry(self, phrase): |
||
63 | """ |
||
64 | Replace GV Media Script parens with FSE parens |
||
65 | :param phrase: |
||
66 | :type phrase: |
||
67 | """ |
||
68 | return sub( |
||
69 | r'(\([^)]*\))|\|([^|)]+)\|', |
||
70 | lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1), |
||
71 | phrase) |
||
72 | |||
73 | class Clause (object): |
||
74 | pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))' |
||
75 | |||
76 | def __init__ (self, text): |
||
77 | self._text = text |
||
78 | self._phrases = [] |
||
79 | |||
80 | def __str__(self): |
||
81 | return self._text |
||
82 | |||
83 | def translate (self, dictionary): |
||
84 | # Find phrases by attempts to translate |
||
85 | words = findall(r'[^\s.]+', self._text) |
||
86 | |||
87 | dmsg("words:", words, min_level=2) |
||
88 | |||
89 | offset = 0 |
||
90 | while offset < len(words): |
||
91 | translation = None |
||
92 | |||
93 | for i in range(len(words), offset, -1): |
||
94 | dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2) |
||
95 | |||
96 | phrase = ' '.join(words[offset:i]) |
||
97 | |||
98 | dmsg("phrase:", phrase, min_level=2) |
||
99 | |||
100 | translation = dictionary.translate(phrase) |
||
101 | |||
102 | if translation is not None: |
||
103 | dmsg("phrase-translation:", translation, min_level=2) |
||
104 | dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2) |
||
105 | |||
106 | self._phrases.append(translation) |
||
107 | offset += i - offset |
||
108 | break |
||
109 | |||
110 | if translation is None: |
||
111 | dmsg("phrase-translation:", translation, min_level=2) |
||
112 | self._phrases.append(phrase) |
||
113 | offset += 1 |
||
114 | |||
115 | dmsg("phrases:", self._phrases, min_level=2) |
||
116 | |||
117 | class Sentence (object): |
||
118 | pattern = '(?!\s+)(.+?\.{1,3}|.+$)' |
||
119 | |||
120 | def __init__ (self, text): |
||
121 | self._text = text |
||
122 | self.find_clauses(text) |
||
123 | |||
124 | def __str__(self): |
||
125 | return self._text |
||
126 | |||
127 | def find_clauses (self, text): |
||
128 | self._clauses = list(map( |
||
129 | lambda clause_text: Clause(clause_text), |
||
130 | findall(Clause.pattern, text, DOTALL))) |
||
131 | |||
132 | def translate (self, dictionary): |
||
133 | for clause in self._clauses: |
||
134 | clause.translate(dictionary) |
||
135 | |||
136 | class Paragraph (object): |
||
137 | pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)' |
||
138 | |||
139 | def __init__ (self, text): |
||
140 | self._text = text |
||
141 | self.find_sentences(text) |
||
142 | |||
143 | def __str__ (self): |
||
144 | return self._text |
||
145 | |||
146 | def find_sentences (self, text): |
||
147 | sentences = findall(Sentence.pattern, text, DOTALL) |
||
148 | self._sentences = list(map( |
||
149 | lambda sentence_text: Sentence(sentence_text), |
||
150 | sentences)) |
||
151 | |||
152 | def translate (self, dictionary): |
||
153 | for sentence in self._sentences: |
||
154 | sentence.translate(dictionary) |
||
155 | |||
156 | class Text (object): |
||
157 | def __init__ (self, text): |
||
158 | self._text = text |
||
159 | self.find_paragraphs(text) |
||
160 | |||
161 | def __repr__ (self): |
||
162 | return ''.join(map(lambda p: p.__repr__(), self._paragraphs)) |
||
163 | |||
164 | def __str__ (self): |
||
165 | return self._text |
||
166 | |||
167 | def find_paragraphs (self, text): |
||
168 | self._paragraphs = list(map( |
||
169 | lambda paragraph_text: Paragraph(paragraph_text), |
||
170 | findall(Paragraph.pattern, text, DOTALL))) |
||
171 | |||
172 | def translate (self, dictionary): |
||
173 | for paragraph in self._paragraphs: |
||
174 | paragraph.translate(dictionary) |