""" Created on 15.01.2015 @author: pelinux """ from Dictionary import Dictionary, dmsg from re import escape, match, sub, search, findall, DOTALL prepositions = { "fi'": 'on', "na'": 'at|to', "t'": 'of' } class VulcanDictionary (Dictionary): """ """ def translate (self, phrase, search_prefix=True, search_plural=True): """ :param phrase: :type phrase: :param search_prefix: :type search_prefix: :param search_plural: :type search_plural: """ translation = super().translate(phrase) if translation is not None: return translation else: expr_translation = self.translate_expression(phrase) if expr_translation is not None: return expr_translation if search_prefix: # find prefix for preposition in prepositions: prefix = match(escape(preposition), phrase) if prefix is not None: prefix_translation = self.translate(prefix.group(0)) if prefix_translation is not None: tail = sub(preposition, '', phrase) tail_translation = self.translate(tail, search_prefix=False) if tail_translation is not None: return [prefix_translation, tail_translation] elif search_plural: # find plural suffix = search(r'lar$', phrase) if suffix is not None: head = sub(r'lar$', '', phrase) head_translation = self.translate(head, search_prefix=False, search_plural=False) if head_translation is not None: head_translation = dict(head_translation) head_translation['en'] += ' (pl.)' return head_translation return None def clean_entry(self, phrase): """ Replace GV Media Script parens with FSE parens :param phrase: :type phrase: """ return sub( r'(\([^)]*\))|\|([^|)]+)\|', lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1), phrase) class Clause (object): pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))' def __init__ (self, text): self._text = text self._phrases = [] def __str__(self): return self._text def translate (self, dictionary): # Find phrases by attempts to translate words = findall(r'[^\s.]+', self._text) dmsg("words:", words, min_level=2) offset = 0 while offset < len(words): translation = None for i in range(len(words), offset, -1): dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2) phrase = ' '.join(words[offset:i]) dmsg("phrase:", phrase, min_level=2) translation = dictionary.translate(phrase) if translation is not None: dmsg("phrase-translation:", translation, min_level=2) dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2) self._phrases.append(translation) offset += i - offset break if translation is None: dmsg("phrase-translation:", translation, min_level=2) self._phrases.append(phrase) offset += 1 dmsg("phrases:", self._phrases, min_level=2) class Sentence (object): pattern = '(?!\s+)(.+?\.{1,3}|.+$)' def __init__ (self, text): self._text = text self.find_clauses(text) def __str__(self): return self._text def find_clauses (self, text): self._clauses = list(map( lambda clause_text: Clause(clause_text), findall(Clause.pattern, text, DOTALL))) def translate (self, dictionary): for clause in self._clauses: clause.translate(dictionary) class Paragraph (object): pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)' def __init__ (self, text): self._text = text self.find_sentences(text) def __str__ (self): return self._text def find_sentences (self, text): sentences = findall(Sentence.pattern, text, DOTALL) self._sentences = list(map( lambda sentence_text: Sentence(sentence_text), sentences)) def translate (self, dictionary): for sentence in self._sentences: sentence.translate(dictionary) class Text (object): def __init__ (self, text): self._text = text self.find_paragraphs(text) def __repr__ (self): return ''.join(map(lambda p: p.__repr__(), self._paragraphs)) def __str__ (self): return self._text def find_paragraphs (self, text): self._paragraphs = list(map( lambda paragraph_text: Paragraph(paragraph_text), findall(Paragraph.pattern, text, DOTALL))) def translate (self, dictionary): for paragraph in self._paragraphs: paragraph.translate(dictionary)