Subversion Repositories LCARS

Rev

View as "text/plain" | Blame | Last modification | View Log | RSS feed

1
"""
Created on 15.01.2015

@author: pelinux

"""

from Dictionary import Dictionary, dmsg
from re import escape, match, sub, search, findall, DOTALL

prepositions = {
    "fi'": 'on',
    "na'": 'at|to',
    "t'": 'of'
}

class VulcanDictionary (Dictionary):
    """
   
    """

    def translate (self, phrase, search_prefix=True, search_plural=True):
        """
       
        :param phrase:
        :type phrase:
        :param search_prefix:
        :type search_prefix:
        :param search_plural:
        :type search_plural:
        """

        translation = super().translate(phrase)
        if translation is not None:
            return translation
        else:
            expr_translation = self.translate_expression(phrase)
            if expr_translation is not None:
                return expr_translation

            if search_prefix:
                # find prefix
                for preposition in prepositions:
                    prefix = match(escape(preposition), phrase)
                    if prefix is not None:
                        prefix_translation = self.translate(prefix.group(0))
                        if prefix_translation is not None:
                            tail = sub(preposition, '', phrase)
                            tail_translation = self.translate(tail, search_prefix=False)
                            if tail_translation is not None:
                                return [prefix_translation, tail_translation]
            elif search_plural:
                # find plural
                suffix = search(r'lar$', phrase)
                if suffix is not None:
                    head = sub(r'lar$', '', phrase)
                    head_translation = self.translate(head, search_prefix=False, search_plural=False)
                    if head_translation is not None:
                        head_translation = dict(head_translation)
                        head_translation['en'] += ' (pl.)'
                        return head_translation

        return None

    def clean_entry(self, phrase):
        """
        Replace GV Media Script parens with FSE parens
        :param phrase:
        :type phrase:
        """

        return sub(
            r'(\([^)]*\))|\|([^|)]+)\|',
            lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
            phrase)

class Clause (object):
    pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))'

    def __init__ (self, text):
        self._text = text
        self._phrases = []

    def __str__(self):
        return self._text

    def translate (self, dictionary):
        # Find phrases by attempts to translate
        words = findall(r'[^\s.]+', self._text)

        dmsg("words:", words, min_level=2)

        offset = 0
        while offset < len(words):
            translation = None

            for i in range(len(words), offset, -1):
                dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)

                phrase = ' '.join(words[offset:i])

                dmsg("phrase:", phrase, min_level=2)

                translation = dictionary.translate(phrase)

                if translation is not None:
                    dmsg("phrase-translation:", translation, min_level=2)
                    dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)

                    self._phrases.append(translation)
                    offset += i - offset
                    break

            if translation is None:
                dmsg("phrase-translation:", translation, min_level=2)
                self._phrases.append(phrase)
                offset += 1

        dmsg("phrases:", self._phrases, min_level=2)

class Sentence (object):
    pattern = '(?!\s+)(.+?\.{1,3}|.+$)'

    def __init__ (self, text):
        self._text = text
        self.find_clauses(text)

    def __str__(self):
        return self._text

    def find_clauses (self, text):
        self._clauses = list(map(
            lambda clause_text: Clause(clause_text),
            findall(Clause.pattern, text, DOTALL)))

    def translate (self, dictionary):
        for clause in self._clauses:
            clause.translate(dictionary)

class Paragraph (object):
    pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)'

    def __init__ (self, text):
        self._text = text
        self.find_sentences(text)

    def __str__ (self):
        return self._text

    def find_sentences (self, text):
        sentences = findall(Sentence.pattern, text, DOTALL)
        self._sentences = list(map(
            lambda sentence_text: Sentence(sentence_text),
            sentences))

    def translate (self, dictionary):
        for sentence in self._sentences:
            sentence.translate(dictionary)

class Text (object):
    def __init__ (self, text):
        self._text = text
        self.find_paragraphs(text)

    def __repr__ (self):
        return ''.join(map(lambda p: p.__repr__(), self._paragraphs))

    def __str__ (self):
        return self._text

    def find_paragraphs (self, text):
        self._paragraphs = list(map(
            lambda paragraph_text: Paragraph(paragraph_text),
            findall(Paragraph.pattern, text, DOTALL)))

    def translate (self, dictionary):
        for paragraph in self._paragraphs:
            paragraph.translate(dictionary)