WebSVN - LCARS - Blame - Rev 291 - /trunk/tools/eazytrans/vuh.py

Rev	Author	Line No.	Line
291	PointedEar	1	'''
		2	Created on 2014-10-20
		3
		4	@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
		5	'''
		6	from sys import argv, stderr
		7	from re import findall, DOTALL, IGNORECASE, match, sub, compile, \
		8	split
		9	from os import chdir
		10	from os.path import dirname, realpath
		11	from collections import OrderedDict
		12	from functools import cmp_to_key
		13	from copy import deepcopy
		14	from collections.abc import MutableSequence
		15
		16	dictionary = {}
		17
		18	prepositions = {
		19	"fi'": 'on',
		20	"na'": 'at\|to',
		21	"t'": 'of'
		22	}
		23
		24	class MutableString2(MutableSequence):
		25	def __init__(self, value=None):
		26	self._values = [str(value)] if value is not None else []
		27
		28	def __add__(self, value):
		29	return ''.join([self, value])
		30
		31	def __delitem__(self):
		32	raise NotImplementedError
		33
		34	def __getitem__(self, index):
		35	return str(self)[index]
		36
		37	def __len__(self):
		38	return len(str(self))
		39
		40	def __repr__(self):
		41	return ''.join(self._values)
		42
		43	def __setitem__(self, index, value):
		44	raise NotImplementedError
		45
		46	def __str__(self):
		47	return self.__repr__()
		48
		49	def extend(self, values):
		50	self._values.append(values)
		51
		52	def insert(self, index, value):
		53	raise NotImplementedError
		54
		55	def load_dictionary(dictionary, dictionary_file):
		56	print('Loading dictionary {0} ...'.format(dictionary_file), end='', file=stderr)
		57
		58	chdir(dirname(realpath(__file__)))
		59	with open(dictionary_file) as f:
		60	keys = "ipa\|en\|lit\|pos\|com\|tag\|ex"
		61	indent = None
		62	value = None
		63
		64	for line in f:
		65	m = match(r'^\svuh:\s(?P<phrase>.+)', line)
		66	if m is not None:
		67	phrase = m.group("phrase")
		68	dictionary[phrase] = {}
		69	indent = None
		70	else:
		71	m = match(
		72	r'(?P<indent>\s)(?P<key>{0}):\s(?P<value>.+)'.format(keys),
		73	line)
		74	if m is not None:
		75	indent = m.group("indent")
		76	key = m.group("key")
		77	value = m.group("value")
		78	value = dictionary[phrase][key] = MutableString2(value)
		79	elif indent is not None:
		80	m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
		81	if m is not None:
		82	if len(m.group("indent")) == len(indent) + 2:
		83	dictionary[phrase][key] += (" " + m.group("continuation"))
		84
		85	print(' done ({0} entries).'.format(len(dictionary)), file=stderr)
		86
		87	def clean_dictionary(dictionary):
		88	braces_re = compile(r'^\s\{(.+)\}\s$')
		89	semicolon_re = compile(r'\s;\s')
		90
		91	for orig_phrase, data in list(dictionary.items()):
		92	if orig_phrase.find(";") > -1:
		93	synonyms = map(
		94	lambda x: sub(braces_re, r'\1', orig_phrase),
		95	split(semicolon_re, orig_phrase))
		96
		97	for synonym in synonyms:
		98	dictionary[synonym] = deepcopy(data)
		99
		100	del dictionary[orig_phrase]
		101	else:
		102	m = match(braces_re, orig_phrase)
		103	if m is not None:
		104	dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase])
		105	del dictionary[orig_phrase]
		106
		107	def sort_dict_alnum_english_key(phrase):
		108	return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
		109
		110	def get_sort_dict_alnum_vulcan_key():
		111	letters = list(map(str.lower, [
		112	" ", 'S', 'T', 'P', 'K', 'R', 'L', 'A', 'Sh', 'O', 'U', 'D',
		113	'V', 'Kh', 'E', 'H', 'G', 'Ch', 'I', 'N', 'Zh', 'M', 'Y', 'F', 'Z',
		114	'Th', 'W', 'B', "'", '-']))
		115	letter_values = dict(map(lambda x: (x[1], x[0]), enumerate(letters)))
		116	letters_re = compile(r'(?:{0})'.format('\|'.join(sorted(letters, key=lambda char:-len(char)))))
		117
		118	def sort_dict_alnum_vulcan (a, b):
		119	# split into Vulcan letters
		120	a = findall(letters_re, sort_dict_alnum_english_key(a))
		121	b = findall(letters_re, sort_dict_alnum_english_key(b))
		122
		123	if len(a) < len(b):
		124	for index, char in enumerate(a):
		125	diff = letter_values[char] - letter_values[b[index]]
		126	if diff != 0:
		127	return diff
		128	return -1
		129
		130	# len(b) <= len(a)
		131	for index, char in enumerate(b):
		132	diff = letter_values[a[index]] - letter_values[char]
		133	if diff != 0:
		134	return diff
		135
		136	return 1 if len(b) < len(a) else 0
		137
		138	return cmp_to_key(sort_dict_alnum_vulcan)
		139
		140	def translate (word, recursion=False):
		141	translation = dictionary.get(word.lower(), None)
		142	if translation is not None:
		143	translation = translation["en"]
		144	if match('[A-Z]', word):
		145	return sub('[a-z]', lambda ch: ch.group(0).upper(), str(translation), count=1)
		146	return translation
		147
		148	if not recursion:
		149	# prepositions attached?
		150	for prep, prep_transl in prepositions.items():
		151	if (match(prep, word)):
		152	real_word = word.replace(r'^' + prep, '')
		153	real_word_transl = translate(real_word, recursion=True)
		154	if real_word_transl is not None:
		155	return prep_transl + ' ' + real_word_transl
		156
		157	if recursion:
		158	return None
		159	else:
		160	# Not in dictionary: proper name or missing for other reasons
		161	return '{{{0}}}'.format(word)
		162
		163	if __name__ == '__main__':
		164	load_dictionary(dictionary, 'vuh-gol-en.dict.zdb.txt')
		165	clean_dictionary(dictionary)
		166
		167	# try:
		168	# for phrase, data in OrderedDict(sorted(
		169	# dictionary.items(),
		170	# key=get_sort_dict_alnum_vulcan_key()
		171	# )).items():
		172	# print(phrase, "=", data)
		173	# except BrokenPipeError:
		174	# pass
		175
		176	text = argv[1]
		177	sentences = findall(r'(?!\s+)(?:.+?\.{1,3}\|.+$)', text, DOTALL)
		178	for sentence in sentences:
		179	print(sentence)
		180
		181	words = findall(r"(?!\s+)[a-z'-]{2,}", sentence, IGNORECASE)
		182	print(words)
		183
		184	translated_words = list(map(translate, words))
		185	print(translated_words)
		186
		187	for index, word in enumerate(words):
		188	sentence = sentence.replace(word, str(translated_words[index]))
		189	print(sentence)
		190
		191	# replace punctuation
		192	for symbol, replacement in ({" - ": ", "}).items():
		193	sentence = sentence.replace(symbol, replacement)
		194
		195	print(sentence)

Subversion Repositories LCARS

(root)/trunk/tools/eazytrans/vuh.py @ 292 - Rev 291