WebSVN - LCARS - Rev 294 - /trunk/tools/eazytrans/Dictionary.py

"""
Created on 2014-10-20

@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>

"""

from os import chdir, stat
from sys import stderr
from os.path import dirname, realpath, basename
from pickle import dump, load
from re import match, DOTALL, search, sub, split, compile
from copy import deepcopy

debug_level = 2

def dmsg(*args, **kwargs):
if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
kwargs['min_level'] = 1

if not hasattr(kwargs, 'file'):
kwargs['file'] = stderr

if debug_level >= kwargs['min_level']:
del kwargs['min_level']
print(*args, **kwargs)

def sort_dict_alnum_english_key(phrase):
return sub(r'\{(.+)\}', r'\1', phrase[0]).lower()

class Dictionary(dict):
"""
classdocs

"""
_keys = "ipa|en|lit|pos|com|tag|ex"
_expressions = {}

def load (self, dictionary_file, language_key='en'):
dictionary = self

dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)

chdir(dirname(realpath(__file__)))

pickle_file = basename(dictionary_file) + '.pickle'

try:
pickle_mtime = stat(pickle_file).st_mtime
except FileNotFoundError:
pickle_mtime = None

if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
phrase = None
key = None
value = None
with open(dictionary_file) as f:
indent = None

for line in f:
m = match(r'^\s*{0}:\s*(?P<phrase>.+)'.format(language_key), line)
if m is not None:
phrase = m.group("phrase")
dictionary[phrase] = {}
indent = None
else:
m = match(r'(?P<indent>\s*)(?P<key>{0}):\s*(?P<value>.+)'.format(self._keys), line)
if m is not None:
# join previous value if necessary
if type(value) == list:
dictionary[phrase][key] = ' '.join(value)

indent = m.group("indent")
key = m.group("key")
value = m.group("value")
# assign a string for memory efficiency
dictionary[phrase][key] = value
elif indent is not None:
m = match(r'(?P<indent>\s+)(?P<continuation>\S.*)', line)
if m is not None:
if len(m.group("indent")) == len(indent) + 2:
continuation = m.group("continuation")
if type(value) == str:
# when a continuation is first found, convert to a list
# because there could be more continuations
value = dictionary[phrase][key] = [value, continuation]
else:
value.append(continuation)

# join last value if necessary
if type(value) == list:
dictionary[phrase][key] = ' '.join(value)

dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
# TODO: Pickle should only contain strings to be small
with open(pickle_file, mode='wb') as f: dump(dictionary, f)
dmsg(' done.', min_level=1)
else:
dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
with open(pickle_file, mode='rb') as f: pickle = load(f)
for key, value in pickle.items():
dictionary[key] = value

dmsg(' done ({0} entries).'.format(len(dictionary)), min_level=1)

def clean (self):
dictionary = self

parens_re = compile(r'$.+$', DOTALL)
braces_re = compile(r'^\s*\{(.+)\}\s*$', DOTALL)
semicolon_re = compile(r'\s*;\s*')

for orig_phrase, data in list(dictionary.items()):
# if there are optional or alternating parts
if search(parens_re, orig_phrase):
if orig_phrase.find('|') > -1:
# TODO alternation
pass
else:
# TODO optional parts
pass

if orig_phrase.find(';') > -1:
synonyms = map(
lambda x: sub(braces_re, r'\1', x),
split(semicolon_re, orig_phrase))

for synonym in synonyms:
dictionary[synonym] = deepcopy(data)

del dictionary[orig_phrase]
else:
m = match(braces_re, orig_phrase)
if m is not None:
dictionary[m.group(1)] = deepcopy(dictionary[orig_phrase])
del dictionary[orig_phrase]

Subversion Repositories LCARS

(root)/trunk/tools/eazytrans/Dictionary.py - Rev 294