WebSVN - LCARS - Blame - Rev 298 - /trunk/tools/eazytrans/Dictionary.py

Rev	Author	Line No.	Line
293	PointedEar	1	"""
		2	Created on 2014-10-20
		3
		4	@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
		5
		6	"""
		7
		8	from os import chdir, stat
		9	from sys import stderr
		10	from os.path import dirname, realpath, basename
		11	from pickle import dump, load
		12	from re import match, DOTALL, search, sub, split, compile
		13
		14	debug_level = 2
		15
		16	def dmsg(args, *kwargs):
		17	if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
		18	kwargs['min_level'] = 1
		19
		20	if not hasattr(kwargs, 'file'):
		21	kwargs['file'] = stderr
		22
		23	if debug_level >= kwargs['min_level']:
		24	del kwargs['min_level']
		25	print(args, *kwargs)
		26
		27	def sort_dict_alnum_english_key(phrase):
		28	return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
		29
		30	class Dictionary(dict):
		31	"""
297	PointedEar	32	A Dictionary (not to be confused with its ancestor, dict)
		33	represents a word dictionary stored in a file.
293	PointedEar	34
		35	"""
296	PointedEar	36	_language_key = 'en'
293	PointedEar	37	_keys = "ipa\|en\|lit\|pos\|com\|tag\|ex"
		38	_expressions = {}
		39
294	PointedEar	40	def load (self, dictionary_file, language_key='en'):
297	PointedEar	41	"""
		42	Loads a word dictionary from a file.
		43	:param dictionary_file:
		44	:type dictionary_file:
		45	:param language_key:
		46	:type language_key:
		47	"""
296	PointedEar	48	self._language_key = language_key
		49
293	PointedEar	50	dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
		51
		52	chdir(dirname(realpath(__file__)))
		53
		54	pickle_file = basename(dictionary_file) + '.pickle'
		55
		56	try:
		57	pickle_mtime = stat(pickle_file).st_mtime
		58	except FileNotFoundError:
		59	pickle_mtime = None
		60
		61	if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
		62	dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
		63	phrase = None
		64	key = None
		65	value = None
		66	with open(dictionary_file) as f:
		67	indent = None
		68
		69	for line in f:
296	PointedEar	70	m = match(r'^\s{0}:\s(?P<phrase>.+)'.format(self._language_key), line)
293	PointedEar	71	if m is not None:
		72	phrase = m.group("phrase")
295	PointedEar	73	self[phrase] = {}
293	PointedEar	74	indent = None
		75	else:
		76	m = match(r'(?P<indent>\s)(?P<key>{0}):\s(?P<value>.+)'.format(self._keys), line)
		77	if m is not None:
		78	# join previous value if necessary
		79	if type(value) == list:
295	PointedEar	80	self[phrase][key] = ' '.join(value)
293	PointedEar	81
		82	indent = m.group("indent")
		83	key = m.group("key")
		84	value = m.group("value")
		85	# assign a string for memory efficiency
295	PointedEar	86	self[phrase][key] = value
293	PointedEar	87	elif indent is not None:
		88	m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
		89	if m is not None:
		90	if len(m.group("indent")) == len(indent) + 2:
		91	continuation = m.group("continuation")
		92	if type(value) == str:
		93	# when a continuation is first found, convert to a list
		94	# because there could be more continuations
295	PointedEar	95	value = self[phrase][key] = [value, continuation]
293	PointedEar	96	else:
		97	value.append(continuation)
		98
		99	# join last value if necessary
		100	if type(value) == list:
295	PointedEar	101	self[phrase][key] = ' '.join(value)
293	PointedEar	102
		103	dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
		104	# TODO: Pickle should only contain strings to be small
295	PointedEar	105	with open(pickle_file, mode='wb') as f: dump(self, f)
293	PointedEar	106	dmsg(' done.', min_level=1)
		107	else:
		108	dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
		109	with open(pickle_file, mode='rb') as f: pickle = load(f)
		110	for key, value in pickle.items():
295	PointedEar	111	self[key] = value
293	PointedEar	112
295	PointedEar	113	dmsg(' done ({0} entries).'.format(len(self)), min_level=1)
293	PointedEar	114
		115	def clean (self):
297	PointedEar	116	"""
		117	Cleans dictionary entries
		118	"""
296	PointedEar	119	re_parens = compile(r'$.+$', DOTALL)
297	PointedEar	120	re_parens_no_alt = compile(r'$([^\\|]+)$', DOTALL)
296	PointedEar	121	re_braces = compile(
295	PointedEar	122	r'^\s\{(?P<phrase>.+)\}(?:\s$(?P<variant>.+?)$)?\s*$',
		123	DOTALL)
296	PointedEar	124	re_semicolon = compile(r'\s;\s')
293	PointedEar	125
295	PointedEar	126	for orig_phrase, data in list(self.items()):
293	PointedEar	127	# if there are optional or alternating parts
296	PointedEar	128	if search(re_parens, orig_phrase):
293	PointedEar	129	if orig_phrase.find('\|') > -1:
		130	# TODO alternation
		131	pass
		132	else:
		133	# TODO optional parts
		134	pass
		135
		136	if orig_phrase.find(';') > -1:
		137	synonyms = map(
296	PointedEar	138	lambda x: sub(re_braces, r'\1', x),
		139	split(re_semicolon, orig_phrase))
293	PointedEar	140
		141	for synonym in synonyms:
295	PointedEar	142	self[synonym] = data
293	PointedEar	143
295	PointedEar	144	del self[orig_phrase]
293	PointedEar	145	else:
296	PointedEar	146	m = match(re_braces, orig_phrase)
293	PointedEar	147	if m is not None:
297	PointedEar	148	phrase = m.group('phrase')
		149
		150	if callable(getattr(self, 'clean_entry', None)):
		151	phrase = self.clean_entry(phrase)
		152
296	PointedEar	153	m_parens = search(re_parens, phrase)
		154	if m_parens is not None:
		155	# alternation and optional parts
		156	expr = sub(re_parens_no_alt, r'(?:\1)?', phrase)
		157	expr = sub('~', '(?=.)', expr)
		158	self._expressions[expr] = data
		159	else:
		160	# remove braces
		161	self[phrase] = data
295	PointedEar	162
		163	del self[orig_phrase]
296	PointedEar	164
		165	def translate (self, phrase):
297	PointedEar	166	"""
		167	Translate a phrase according to this dictionary.
		168	For language-specific processing, this method should be
		169	called/overridden by inheriting classes.
		170	:param phrase:
		171	:type phrase: str
		172	"""
296	PointedEar	173	translation = self.get(phrase.lower(), None)
		174	if translation is not None:
		175	translation[self._language_key] = phrase
		176	return translation
		177
		178	return None
		179
		180	def translate_expression (self, phrase):
297	PointedEar	181	"""
		182	Translate a phrase according entries in this dictionary
		183	based on regular expressions.
		184	:param phrase:
		185	:type phrase:
		186	"""
		187	for expression, data in sorted(self._expressions.items(), key=lambda item:-len(item[1])):
298	PointedEar	188	expression_match = match(r'{0}$'.format(expression), phrase)
296	PointedEar	189	if expression_match is not None:
		190	data[self._language_key] = expression_match.group(0)
		191	return data
		192
		193	return None

Subversion Repositories LCARS

(root)/trunk/tools/eazytrans/Dictionary.py @ 303 - Rev 298