WebSVN - LCARS - Blame - Rev 295 - /trunk/tools/eazytrans/Dictionary.py

Rev	Author	Line No.	Line
293	PointedEar	1	"""
		2	Created on 2014-10-20
		3
		4	@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
		5
		6	"""
		7
		8	from os import chdir, stat
		9	from sys import stderr
		10	from os.path import dirname, realpath, basename
		11	from pickle import dump, load
		12	from re import match, DOTALL, search, sub, split, compile
		13
		14	debug_level = 2
		15
		16	def dmsg(args, *kwargs):
		17	if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
		18	kwargs['min_level'] = 1
		19
		20	if not hasattr(kwargs, 'file'):
		21	kwargs['file'] = stderr
		22
		23	if debug_level >= kwargs['min_level']:
		24	del kwargs['min_level']
		25	print(args, *kwargs)
		26
		27	def sort_dict_alnum_english_key(phrase):
		28	return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()
		29
		30	class Dictionary(dict):
		31	"""
		32	classdocs
		33
		34	"""
		35	_keys = "ipa\|en\|lit\|pos\|com\|tag\|ex"
		36	_expressions = {}
		37
294	PointedEar	38	def load (self, dictionary_file, language_key='en'):
293	PointedEar	39	dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
		40
		41	chdir(dirname(realpath(__file__)))
		42
		43	pickle_file = basename(dictionary_file) + '.pickle'
		44
		45	try:
		46	pickle_mtime = stat(pickle_file).st_mtime
		47	except FileNotFoundError:
		48	pickle_mtime = None
		49
		50	if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
		51	dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
		52	phrase = None
		53	key = None
		54	value = None
		55	with open(dictionary_file) as f:
		56	indent = None
		57
		58	for line in f:
294	PointedEar	59	m = match(r'^\s{0}:\s(?P<phrase>.+)'.format(language_key), line)
293	PointedEar	60	if m is not None:
		61	phrase = m.group("phrase")
295	PointedEar	62	self[phrase] = {}
293	PointedEar	63	indent = None
		64	else:
		65	m = match(r'(?P<indent>\s)(?P<key>{0}):\s(?P<value>.+)'.format(self._keys), line)
		66	if m is not None:
		67	# join previous value if necessary
		68	if type(value) == list:
295	PointedEar	69	self[phrase][key] = ' '.join(value)
293	PointedEar	70
		71	indent = m.group("indent")
		72	key = m.group("key")
		73	value = m.group("value")
		74	# assign a string for memory efficiency
295	PointedEar	75	self[phrase][key] = value
293	PointedEar	76	elif indent is not None:
		77	m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
		78	if m is not None:
		79	if len(m.group("indent")) == len(indent) + 2:
		80	continuation = m.group("continuation")
		81	if type(value) == str:
		82	# when a continuation is first found, convert to a list
		83	# because there could be more continuations
295	PointedEar	84	value = self[phrase][key] = [value, continuation]
293	PointedEar	85	else:
		86	value.append(continuation)
		87
		88	# join last value if necessary
		89	if type(value) == list:
295	PointedEar	90	self[phrase][key] = ' '.join(value)
293	PointedEar	91
		92	dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
		93	# TODO: Pickle should only contain strings to be small
295	PointedEar	94	with open(pickle_file, mode='wb') as f: dump(self, f)
293	PointedEar	95	dmsg(' done.', min_level=1)
		96	else:
		97	dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
		98	with open(pickle_file, mode='rb') as f: pickle = load(f)
		99	for key, value in pickle.items():
295	PointedEar	100	self[key] = value
293	PointedEar	101
295	PointedEar	102	dmsg(' done ({0} entries).'.format(len(self)), min_level=1)
293	PointedEar	103
		104	def clean (self):
		105	parens_re = compile(r'$.+$', DOTALL)
295	PointedEar	106	braces_re = compile(
		107	r'^\s\{(?P<phrase>.+)\}(?:\s$(?P<variant>.+?)$)?\s*$',
		108	DOTALL)
293	PointedEar	109	semicolon_re = compile(r'\s;\s')
		110
295	PointedEar	111	for orig_phrase, data in list(self.items()):
293	PointedEar	112	# if there are optional or alternating parts
		113	if search(parens_re, orig_phrase):
		114	if orig_phrase.find('\|') > -1:
		115	# TODO alternation
		116	pass
		117	else:
		118	# TODO optional parts
		119	pass
		120
		121	if orig_phrase.find(';') > -1:
		122	synonyms = map(
		123	lambda x: sub(braces_re, r'\1', x),
		124	split(semicolon_re, orig_phrase))
		125
		126	for synonym in synonyms:
295	PointedEar	127	self[synonym] = data
293	PointedEar	128
295	PointedEar	129	del self[orig_phrase]
293	PointedEar	130	else:
		131	m = match(braces_re, orig_phrase)
		132	if m is not None:
295	PointedEar	133	phrase = m.group("phrase")
		134	m2 = match(parens_re, phrase)
		135	if m2 is not None:
		136	# TODO alternation and optional parts
		137	pass
		138
		139	self[phrase] = data
		140	del self[orig_phrase]

Subversion Repositories LCARS

(root)/trunk/tools/eazytrans/Dictionary.py @ 300 - Rev 295