Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 300 | PointedEar | 1 | """ |
| 2 | Created on 15.01.2015 |
||
| 3 | |||
| 4 | @author: pelinux |
||
| 5 | |||
| 6 | """ |
||
| 7 | from Dictionary import Dictionary, dmsg |
||
| 8 | from re import escape, match, sub, search, findall, DOTALL |
||
| 9 | |||
| 10 | prepositions = { |
||
| 11 | "fi'": 'on', |
||
| 12 | "na'": 'at|to', |
||
| 13 | "t'": 'of' |
||
| 14 | } |
||
| 15 | |||
| 16 | class VulcanDictionary (Dictionary): |
||
| 17 | """ |
||
| 18 | |||
| 19 | """ |
||
| 20 | def translate (self, phrase, search_prefix=True, search_plural=True): |
||
| 21 | """ |
||
| 22 | |||
| 23 | :param phrase: |
||
| 24 | :type phrase: |
||
| 25 | :param search_prefix: |
||
| 26 | :type search_prefix: |
||
| 27 | :param search_plural: |
||
| 28 | :type search_plural: |
||
| 29 | """ |
||
| 30 | translation = super().translate(phrase) |
||
| 31 | if translation is not None: |
||
| 32 | return translation |
||
| 33 | else: |
||
| 34 | expr_translation = self.translate_expression(phrase) |
||
| 35 | if expr_translation is not None: |
||
| 36 | return expr_translation |
||
| 37 | |||
| 38 | if search_prefix: |
||
| 39 | # find prefix |
||
| 40 | for preposition in prepositions: |
||
| 41 | prefix = match(escape(preposition), phrase) |
||
| 42 | if prefix is not None: |
||
| 43 | prefix_translation = self.translate(prefix.group(0)) |
||
| 44 | if prefix_translation is not None: |
||
| 45 | tail = sub(preposition, '', phrase) |
||
| 46 | tail_translation = self.translate(tail, search_prefix=False) |
||
| 47 | if tail_translation is not None: |
||
| 48 | return [prefix_translation, tail_translation] |
||
| 49 | elif search_plural: |
||
| 50 | # find plural |
||
| 51 | suffix = search(r'lar$', phrase) |
||
| 52 | if suffix is not None: |
||
| 53 | head = sub(r'lar$', '', phrase) |
||
| 54 | head_translation = self.translate(head, search_prefix=False, search_plural=False) |
||
| 55 | if head_translation is not None: |
||
| 56 | head_translation = dict(head_translation) |
||
| 57 | head_translation['en'] += ' (pl.)' |
||
| 58 | return head_translation |
||
| 59 | |||
| 60 | return None |
||
| 61 | |||
| 62 | def clean_entry(self, phrase): |
||
| 63 | """ |
||
| 64 | Replace GV Media Script parens with FSE parens |
||
| 65 | :param phrase: |
||
| 66 | :type phrase: |
||
| 67 | """ |
||
| 68 | return sub( |
||
| 69 | r'(\([^)]*\))|\|([^|)]+)\|', |
||
| 70 | lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1), |
||
| 71 | phrase) |
||
| 72 | |||
| 73 | class Clause (object): |
||
| 74 | pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))' |
||
| 75 | |||
| 76 | def __init__ (self, text): |
||
| 77 | self._text = text |
||
| 78 | self._phrases = [] |
||
| 79 | |||
| 80 | def __str__(self): |
||
| 81 | return self._text |
||
| 82 | |||
| 83 | def translate (self, dictionary): |
||
| 84 | # Find phrases by attempts to translate |
||
| 85 | words = findall(r'[^\s.]+', self._text) |
||
| 86 | |||
| 87 | dmsg("words:", words, min_level=2) |
||
| 88 | |||
| 89 | offset = 0 |
||
| 90 | while offset < len(words): |
||
| 91 | translation = None |
||
| 92 | |||
| 93 | for i in range(len(words), offset, -1): |
||
| 94 | dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2) |
||
| 95 | |||
| 96 | phrase = ' '.join(words[offset:i]) |
||
| 97 | |||
| 98 | dmsg("phrase:", phrase, min_level=2) |
||
| 99 | |||
| 100 | translation = dictionary.translate(phrase) |
||
| 101 | |||
| 102 | if translation is not None: |
||
| 103 | dmsg("phrase-translation:", translation, min_level=2) |
||
| 104 | dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2) |
||
| 105 | |||
| 106 | self._phrases.append(translation) |
||
| 107 | offset += i - offset |
||
| 108 | break |
||
| 109 | |||
| 110 | if translation is None: |
||
| 111 | dmsg("phrase-translation:", translation, min_level=2) |
||
| 112 | self._phrases.append(phrase) |
||
| 113 | offset += 1 |
||
| 114 | |||
| 115 | dmsg("phrases:", self._phrases, min_level=2) |
||
| 116 | |||
| 117 | class Sentence (object): |
||
| 118 | pattern = '(?!\s+)(.+?\.{1,3}|.+$)' |
||
| 119 | |||
| 120 | def __init__ (self, text): |
||
| 121 | self._text = text |
||
| 122 | self.find_clauses(text) |
||
| 123 | |||
| 124 | def __str__(self): |
||
| 125 | return self._text |
||
| 126 | |||
| 127 | def find_clauses (self, text): |
||
| 128 | self._clauses = list(map( |
||
| 129 | lambda clause_text: Clause(clause_text), |
||
| 130 | findall(Clause.pattern, text, DOTALL))) |
||
| 131 | |||
| 132 | def translate (self, dictionary): |
||
| 133 | for clause in self._clauses: |
||
| 134 | clause.translate(dictionary) |
||
| 135 | |||
| 136 | class Paragraph (object): |
||
| 137 | pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)' |
||
| 138 | |||
| 139 | def __init__ (self, text): |
||
| 140 | self._text = text |
||
| 141 | self.find_sentences(text) |
||
| 142 | |||
| 143 | def __str__ (self): |
||
| 144 | return self._text |
||
| 145 | |||
| 146 | def find_sentences (self, text): |
||
| 147 | sentences = findall(Sentence.pattern, text, DOTALL) |
||
| 148 | self._sentences = list(map( |
||
| 149 | lambda sentence_text: Sentence(sentence_text), |
||
| 150 | sentences)) |
||
| 151 | |||
| 152 | def translate (self, dictionary): |
||
| 153 | for sentence in self._sentences: |
||
| 154 | sentence.translate(dictionary) |
||
| 155 | |||
| 156 | class Text (object): |
||
| 157 | def __init__ (self, text): |
||
| 158 | self._text = text |
||
| 159 | self.find_paragraphs(text) |
||
| 160 | |||
| 161 | def __repr__ (self): |
||
| 162 | return ''.join(map(lambda p: p.__repr__(), self._paragraphs)) |
||
| 163 | |||
| 164 | def __str__ (self): |
||
| 165 | return self._text |
||
| 166 | |||
| 167 | def find_paragraphs (self, text): |
||
| 168 | self._paragraphs = list(map( |
||
| 169 | lambda paragraph_text: Paragraph(paragraph_text), |
||
| 170 | findall(Paragraph.pattern, text, DOTALL))) |
||
| 171 | |||
| 172 | def translate (self, dictionary): |
||
| 173 | for paragraph in self._paragraphs: |
||
| 174 | paragraph.translate(dictionary) |