View as "text/plain" |
Blame |
Last modification |
View Log
| RSS feed
1
"""
Created on 15.01.2015
@author: pelinux
"""
from Dictionary import Dictionary, dmsg
from re import escape, match, sub, search, findall, DOTALL
prepositions = {
"fi'": 'on',
"na'": 'at|to',
"t'": 'of'
}
class VulcanDictionary (Dictionary):
"""
"""
def translate (self, phrase, search_prefix=True, search_plural=True):
"""
:param phrase:
:type phrase:
:param search_prefix:
:type search_prefix:
:param search_plural:
:type search_plural:
"""
translation = super().translate(phrase)
if translation is not None:
return translation
else:
expr_translation = self.translate_expression(phrase)
if expr_translation is not None:
return expr_translation
if search_prefix:
# find prefix
for preposition in prepositions:
prefix = match(escape(preposition), phrase)
if prefix is not None:
prefix_translation = self.translate(prefix.group(0))
if prefix_translation is not None:
tail = sub(preposition, '', phrase)
tail_translation = self.translate(tail, search_prefix=False)
if tail_translation is not None:
return [prefix_translation, tail_translation]
elif search_plural:
# find plural
suffix = search(r'lar$', phrase)
if suffix is not None:
head = sub(r'lar$', '', phrase)
head_translation = self.translate(head, search_prefix=False, search_plural=False)
if head_translation is not None:
head_translation = dict(head_translation)
head_translation['en'] += ' (pl.)'
return head_translation
return None
def clean_entry(self, phrase):
"""
Replace GV Media Script parens with FSE parens
:param phrase:
:type phrase:
"""
return sub(
r'(\([^)]*\))|\|([^|)]+)\|',
lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
phrase)
class Clause (object):
pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))'
def __init__ (self, text):
self._text = text
self._phrases = []
def __str__(self):
return self._text
def translate (self, dictionary):
# Find phrases by attempts to translate
words = findall(r'[^\s.]+', self._text)
dmsg("words:", words, min_level=2)
offset = 0
while offset < len(words):
translation = None
for i in range(len(words), offset, -1):
dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
phrase = ' '.join(words[offset:i])
dmsg("phrase:", phrase, min_level=2)
translation = dictionary.translate(phrase)
if translation is not None:
dmsg("phrase-translation:", translation, min_level=2)
dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
self._phrases.append(translation)
offset += i - offset
break
if translation is None:
dmsg("phrase-translation:", translation, min_level=2)
self._phrases.append(phrase)
offset += 1
dmsg("phrases:", self._phrases, min_level=2)
class Sentence (object):
pattern = '(?!\s+)(.+?\.{1,3}|.+$)'
def __init__ (self, text):
self._text = text
self.find_clauses(text)
def __str__(self):
return self._text
def find_clauses (self, text):
self._clauses = list(map(
lambda clause_text: Clause(clause_text),
findall(Clause.pattern, text, DOTALL)))
def translate (self, dictionary):
for clause in self._clauses:
clause.translate(dictionary)
class Paragraph (object):
pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)'
def __init__ (self, text):
self._text = text
self.find_sentences(text)
def __str__ (self):
return self._text
def find_sentences (self, text):
sentences = findall(Sentence.pattern, text, DOTALL)
self._sentences = list(map(
lambda sentence_text: Sentence(sentence_text),
sentences))
def translate (self, dictionary):
for sentence in self._sentences:
sentence.translate(dictionary)
class Text (object):
def __init__ (self, text):
self._text = text
self.find_paragraphs(text)
def __repr__ (self):
return ''.join(map(lambda p: p.__repr__(), self._paragraphs))
def __str__ (self):
return self._text
def find_paragraphs (self, text):
self._paragraphs = list(map(
lambda paragraph_text: Paragraph(paragraph_text),
findall(Paragraph.pattern, text, DOTALL)))
def translate (self, dictionary):
for paragraph in self._paragraphs:
paragraph.translate(dictionary)