Subversion Repositories LCARS

Compare Revisions

Last modification

Ignore whitespace Rev 297 → Rev 300

/trunk/tools/eazytrans/vuh.py
6,21 → 6,14
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
'''
from sys import argv, stderr
from re import findall, DOTALL, match, sub, compile, \
escape, search
from re import findall, compile
from os.path import basename
from functools import cmp_to_key
from Dictionary import Dictionary, dmsg, \
sort_dict_alnum_english_key
from Dictionary import dmsg, sort_dict_alnum_english_key
from VulcanDictionary import VulcanDictionary, Text
 
dictionary = {}
 
prepositions = {
"fi'": 'on',
"na'": 'at|to',
"t'": 'of'
}
 
def cli_help():
print('Usage: {0} TEXT...'.format(basename(argv[0])))
 
54,63 → 47,7
 
return cmp_to_key(sort_dict_alnum_vulcan)
 
class VulcanDictionary (Dictionary):
"""
"""
def translate (self, phrase, search_prefix=True, search_plural=True):
"""
:param phrase:
:type phrase:
:param search_prefix:
:type search_prefix:
:param search_plural:
:type search_plural:
"""
translation = super().translate(phrase)
if translation is not None:
return translation
else:
expr_translation = self.translate_expression(phrase)
if expr_translation is not None:
return expr_translation
 
if search_prefix:
# find prefix
for preposition in prepositions:
prefix = match(escape(preposition), phrase)
if prefix is not None:
prefix_translation = self.translate(prefix.group(0))
if prefix_translation is not None:
tail = sub(preposition, '', phrase)
tail_translation = self.translate(tail, search_prefix=False)
if tail_translation is not None:
return [prefix_translation, tail_translation]
elif search_plural:
# find plural
suffix = search(r'lar$', phrase)
if suffix is not None:
head = sub(r'lar$', '', phrase)
head_translation = self.translate(head, search_prefix=False, search_plural=False)
if head_translation is not None:
head_translation = dict(head_translation)
head_translation['en'] += ' (pl.)'
return head_translation
 
return None
 
def clean_entry(self, phrase):
"""
Replace GV Media Script parens with FSE parens
:param phrase:
:type phrase:
"""
return sub(
r'(\([^)]*\))|\|([^|)]+)\|',
lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
phrase)
 
if __name__ == '__main__':
if len(argv) < 2:
print('Nothing to translate.', end='\n\n', file=stderr)
123,60 → 60,30
dictionary.load('vuh-gol-en.dict.zdb.txt', 'vuh')
dictionary.clean()
 
# try:
# for phrase, data in OrderedDict(sorted(
# dictionary.items(),
# key=get_sort_dict_alnum_vulcan_key()
# )).items():
# print(phrase, "=", data)
# except BrokenPipeError:
# pass
# try:
# for phrase, data in OrderedDict(sorted(
# dictionary.items(),
# key=get_sort_dict_alnum_vulcan_key()
# )).items():
# print(phrase, "=", data)
# except BrokenPipeError:
# pass
 
text = Text(text)
 
dmsg("text:", text, min_level=2)
sentences = findall(r'(?!\s+)(?:.+?\.{1,3}|.+$)', text, DOTALL)
dmsg("sentences:", sentences, min_level=2)
for sentence in sentences:
dmsg("sentence:", sentence, min_level=2)
dmsg("text:", text.__repr__(), min_level=2)
 
clauses = findall(r'(?!\s+)(?:.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|.+$))', sentence, DOTALL)
dmsg("clauses:", clauses, min_level=2)
for clause in clauses:
dmsg("clause:", clause, min_level=2)
text.translate(dictionary)
 
words = findall(r'[^\s.]+', clause)
dmsg("words:", words, min_level=2)
 
offset = 0
while offset < len(words):
translation = None
 
for i in range(len(words), offset, -1):
dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
phrase = ' '.join(words[offset:i])
 
dmsg("phrase:", phrase, min_level=2)
 
translation = dictionary.translate(phrase)
 
if translation is not None:
dmsg("phrase-translation:", translation, min_level=2)
dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
words[offset:i] = [translation]
offset += i - offset
break
 
if translation is None:
dmsg("phrase-translation:", translation, min_level=2)
offset += 1
 
dmsg("words-translation:", words, min_level=2)
dmsg("words-translation-reduced:",
list(map(
lambda word:
word['en']
if (hasattr(word, "get") and word.get('en', None) is not None)
else word,
words)),
min_level=2)
# for key, value in dictionary._expressions.items():
# dmsg(key, value, min_level=3)
# dmsg("words-translation:", words, min_level=2)
# dmsg("words-translation-reduced:",
# list(map(
# lambda word:
# word['en']
# if (hasattr(word, "get") and word.get('en', None) is not None)
# else word,
# words)),
# min_level=2)
# for key, value in dictionary._expressions.items():
# dmsg(key, value, min_level=3)