Subversion Repositories LCARS

Compare Revisions

Last modification

Regard whitespace Rev 300 → Rev 298

/trunk/tools/eazytrans/VulcanDictionary.py
File deleted
Property changes:
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: Dictionary.py
===================================================================
--- Dictionary.py (revision 300)
+++ Dictionary.py (revision 298)
@@ -14,12 +14,14 @@
debug_level = 2
def dmsg (*args, **kwargs):
- if not kwargs.get('file'):
+ if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
+ kwargs['min_level'] = 1
+
+ if not hasattr(kwargs, 'file'):
kwargs['file'] = stderr
- min_level = kwargs.pop('min_level', 1)
-
- if debug_level >= min_level:
+ if debug_level >= kwargs['min_level']:
+ del kwargs['min_level']
print(*args, **kwargs)
def sort_dict_alnum_english_key (phrase):
@@ -35,7 +37,7 @@
_keys = "ipa|en|lit|pos|com|tag|ex"
_expressions = {}
- def load (self, dictionary_file, keys=None, language_key=None):
+ def load (self, dictionary_file, language_key='en'):
"""
Loads a word dictionary from a file.
:param dictionary_file:
@@ -43,10 +45,6 @@
:param language_key:
:type language_key:
"""
- if keys is not None:
- self._keys = keys
-
- if language_key is not None:
self._language_key = language_key
dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
@@ -62,7 +60,6 @@
if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
-
phrase = None
key = None
value = None
@@ -104,14 +101,11 @@
self[phrase][key] = ' '.join(value)
dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
-
# TODO: Pickle should only contain strings to be small
with open(pickle_file, mode='wb') as f: dump(self, f)
-
dmsg(' done.', min_level=1)
else:
dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
-
with open(pickle_file, mode='rb') as f: pickle = load(f)
for key, value in pickle.items():
self[key] = value
/trunk/tools/eazytrans/vuh.py
6,14 → 6,21
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
'''
from sys import argv, stderr
from re import findall, compile
from re import findall, DOTALL, match, sub, compile, \
escape, search
from os.path import basename
from functools import cmp_to_key
from Dictionary import dmsg, sort_dict_alnum_english_key
from VulcanDictionary import VulcanDictionary, Text
from Dictionary import Dictionary, dmsg, \
sort_dict_alnum_english_key
 
dictionary = {}
 
prepositions = {
"fi'": 'on',
"na'": 'at|to',
"t'": 'of'
}
 
def cli_help():
print('Usage: {0} TEXT...'.format(basename(argv[0])))
 
47,7 → 54,63
 
return cmp_to_key(sort_dict_alnum_vulcan)
 
class VulcanDictionary (Dictionary):
"""
 
"""
def translate (self, phrase, search_prefix=True, search_plural=True):
"""
:param phrase:
:type phrase:
:param search_prefix:
:type search_prefix:
:param search_plural:
:type search_plural:
"""
translation = super().translate(phrase)
if translation is not None:
return translation
else:
expr_translation = self.translate_expression(phrase)
if expr_translation is not None:
return expr_translation
 
if search_prefix:
# find prefix
for preposition in prepositions:
prefix = match(escape(preposition), phrase)
if prefix is not None:
prefix_translation = self.translate(prefix.group(0))
if prefix_translation is not None:
tail = sub(preposition, '', phrase)
tail_translation = self.translate(tail, search_prefix=False)
if tail_translation is not None:
return [prefix_translation, tail_translation]
elif search_plural:
# find plural
suffix = search(r'lar$', phrase)
if suffix is not None:
head = sub(r'lar$', '', phrase)
head_translation = self.translate(head, search_prefix=False, search_plural=False)
if head_translation is not None:
head_translation = dict(head_translation)
head_translation['en'] += ' (pl.)'
return head_translation
 
return None
 
def clean_entry(self, phrase):
"""
Replace GV Media Script parens with FSE parens
:param phrase:
:type phrase:
"""
return sub(
r'(\([^)]*\))|\|([^|)]+)\|',
lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
phrase)
 
if __name__ == '__main__':
if len(argv) < 2:
print('Nothing to translate.', end='\n\n', file=stderr)
69,21 → 132,51
# except BrokenPipeError:
# pass
 
text = Text(text)
 
dmsg("text:", text, min_level=2)
dmsg("text:", text.__repr__(), min_level=2)
sentences = findall(r'(?!\s+)(?:.+?\.{1,3}|.+$)', text, DOTALL)
dmsg("sentences:", sentences, min_level=2)
for sentence in sentences:
dmsg("sentence:", sentence, min_level=2)
 
text.translate(dictionary)
clauses = findall(r'(?!\s+)(?:.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|.+$))', sentence, DOTALL)
dmsg("clauses:", clauses, min_level=2)
for clause in clauses:
dmsg("clause:", clause, min_level=2)
 
# dmsg("words-translation:", words, min_level=2)
# dmsg("words-translation-reduced:",
# list(map(
# lambda word:
# word['en']
# if (hasattr(word, "get") and word.get('en', None) is not None)
# else word,
# words)),
# min_level=2)
words = findall(r'[^\s.]+', clause)
dmsg("words:", words, min_level=2)
 
offset = 0
while offset < len(words):
translation = None
 
for i in range(len(words), offset, -1):
dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
phrase = ' '.join(words[offset:i])
 
dmsg("phrase:", phrase, min_level=2)
 
translation = dictionary.translate(phrase)
 
if translation is not None:
dmsg("phrase-translation:", translation, min_level=2)
dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
words[offset:i] = [translation]
offset += i - offset
break
 
if translation is None:
dmsg("phrase-translation:", translation, min_level=2)
offset += 1
 
dmsg("words-translation:", words, min_level=2)
dmsg("words-translation-reduced:",
list(map(
lambda word:
word['en']
if (hasattr(word, "get") and word.get('en', None) is not None)
else word,
words)),
min_level=2)
# for key, value in dictionary._expressions.items():
# dmsg(key, value, min_level=3)
/trunk/tools/eazytrans/vuh-gol-en.dict.zdb.txt
145,7 → 145,7
# ("Close-follow entries of-this dictionary format established
# by-Klingonska Akademien [KA] for-online-version
# of-'Pocket-Dictionary Klingon' <http://klingonska.org/dict/>.
# See! [hon.] on-website of-them for-details.")
# See [hon. imp.] on-website of-them for-details.")
#
# The format of this dictionary differs in that it uses “vuh”,
# for Vulcan, instead of “tlh”, for Klingon, adds the “ipa” key,
158,7 → 158,7
# ("Have format of-this dictionary difference – use of->vuh< -
# for-language Vulcan – instead of->tlh< - for-Klingon – add
# key >ipa< - use abbrevations for-key >pos< for-small-keeping
# size of-file and different-abbrevations [below-see! (hon.)] :")
# size of-file and different-abbrevations [below-see (hon. imp.)] :")
#
# vuh: {original(required|alternative) (optional part)} (required)
# ipa: IPA transcription (optional)
210,17 → 210,14
#
# Different from the KA format, entries are indented to signify
# derivation, so that software can mark common word-roots
# automatically. Use two spaces per indentation level.
# The optional part of the original may contain
# automatically. The optional part of the original may contain
# affixes that can be omitted, or give an abbreviation.
# {Natyan na'yidor t'KA - pugas-dvun-tor svinlar glantokau
# sakazun - shatik kup-ulidau tumak ka-zhit-girlar.
# Is'voh dah-ret na'vok t'gas-dvun.
# Kup-tuhlau dvelik krus t'tvesh-vel tereklar kup-puyenik
# il tanilau zhipenaya.}
# ("Difference to-format of-KA - be-indented entries signify
# derivation - automatically can-mark program same-word-root.
# Use! two-space for-level of-indentation.
# derivation - automatically can-mark program same-word-roots.
# Can-contain optional part of-original affixes can-be-forgotten
# or give abbreviation.")
#
329,7 → 326,7
# Isha ri bolau zhit-dunap-gir nam-tor kim-krus t'terti-zhit.}
# ("Only-if all-criteria superordered same - be-sorted entries
# to-FSE-alphabet | not alphabet of-Modern-Golic-Vulcan
# to-help students. | Notice! [hon.] that verb-forms of-nouns -
# to-help students. | Notice [hon. imp.] that verb-forms of-nouns -
# although modifying e.g. with->~au<, and combining with->-tor< -
# be-sorted before-other-forms modifying and combining because have
# they more-strong root-relation.
341,7 → 338,7
# {Fayei t'ta - kuv ri kup tal-tor du mesukh t'terti-zhit
# na'svinlar palikaun k'zun t'zhit - viglazha'voh svin t'nel-gir.}
# ("Because of-that - if not can find you translation of-compound-word
# at-entries beginning with-letter of-word - into-look! [hon.]
# at-entries beginning with-letter of-word - into-look [hon. imp.]
# entry of-main-root.")
#
# The following abbreviations have been used:
500,7 → 497,7
# ka-tvahik iyi-zhit na'isan ek'gadik heh
# rivanuk | na'li-fal {wak} svi'rak t'{farr} -
# hi – {kari farr} - {pon farr} ∴}
# ("too-old [ word ] - instead-use! [hon.]
# ("too-old [ word ] - instead-use [hon. imp.]
# synonymous contemporary-word for-usage diurnal and
# non-ceremonial | for-example {wak} instead of-{farr} -
# but : {kari farr} - {pon farr} etc.")
714,7 → 711,7
com: not in GV-FSE
 
vuh: {abrash}
ipa: ɑ‿'brɑʃ
ipa: ɑ‿'braʃ
en: <flood>
pos: n.
 
772,7 → 769,7
def: FSE-GV
com: not in GV-FSE
 
vuh: {abu(')~}
vuh: {abu(')~)}
en: <up>
 
vuh: {abu-tor}
779,12 → 776,12
en: <erect>, <put up>
 
vuh: {abu'le}
ipa: ɑ‿'bu‿le
ipa: - ' -
en: <upward(s)>
com: from CLGV {apu'leh} and AGV {ápuullh}
 
vuh: {abulau}
ipa: ɑ‿bu‿'lau
ipa: - - '
en: <increase>
pos: v.
 
10696,7 → 10693,6
vuh: {gas-dvun-tor}
en: <indent>
lit: <right>-<move>
pos: v.
def: PE
 
vuh: {pugas-dvun-tor}
14841,11 → 14837,6
def: FSE-GV
com: not in GV-FSE
 
vuh: {huhrsauyik}
en: <practical>
pos: adj.
def: PE
 
vuh: {huhsh}
ipa: ,
en: <cough>
15682,7 → 15673,7
 
vuh: {ish}
en: <that>
pos: det.
pos: adj.
def: FSE-GV
com: not in GV-FSE
 
17813,11 → 17804,6
en: <bring>
pos: v.
 
vuh: {nukatau}
en: <bring down>, <inflict>
pos: v.
def: PE
 
vuh: {sakatau}
en: <enhance>, <bring out>, <implement>
def: GV-FSE; <implement> by PE
17828,7 → 17814,7
 
vuh: {sakatausu}
en: <implementor>
lit: <implement><person>
lit: implementperson
def: PE
 
vuh: {vikatau}
19077,11 → 19063,6
en: <direct>
pos: adj.
 
vuh: {rikhar(-~|ik)}
en: <indirect>
pos: adj.
def: PE
 
vuh: {khar-el'taranaya}
en: <direct manipulation>
def: FSE-GV
28055,26 → 28036,10
en: <reach>
pos: v.
 
vuh: {sapulau}
en: <reach out>
pos: v.
def: PE
 
vuh: {pulaya}
en: <reach>
pos: n.
def: PE
 
vuh: {pulayan}
en: <reaching>
pos: n.
def: PE
 
vuh: {pulayau}
ipa: - - '
en: <be present>
pos: v.
com: but <presence> {la'es}; cf. {pulayan}
 
vuh: {pulu-kur}
en: <lavender>
31653,7 → 31618,7
 
vuh: {ta}
en: <that>, <which>, <who> (not a question word)
pos: conj., pron.
pos: conj.
tag: MGV
 
vuh: {ta'a}
31895,11 → 31860,6
en: <ideal>
pos: n.
 
vuh: {tangu(-~|yik)}
en: <ideal>
pos: adj.
def: PE
 
vuh: {tanilau}
en: <provide>, <supply>
pos: v.
44788,6 → 44748,11
def: FSE-GV
com: not in GV-FSE
 
vuh: {papulau}
en: <roam>
def: FSE-GV
com: not in GV-FSE
 
vuh: {paresh-tor}
en: <occur>
def: FSE-GV
46938,13 → 46903,13
def: FSE-GV
com: not in GV-FSE
 
vuh: {plom (n., anc.); plomik (adj.)}
en: <vegetable>
vuh: {plomik shur}
en: <vegetable soup>
def: FSE-GV
com: not in GV-FSE
 
vuh: {plomik shur}
en: <vegetable soup>
vuh: {plom (n., anc.); plomik (adj.)}
en: <vegetable>
def: FSE-GV
com: not in GV-FSE
 
47865,11 → 47830,6
def: FSE-GV
com: not in GV-FSE
 
vuh: {papulau}
en: <roam>
def: FSE-GV
com: not in GV-FSE
 
vuh: {pulayau}
en: <be present>
def: FSE-GV
53085,6 → 53045,11
def: FSE-GV
com: not in GV-FSE
 
vuh: {sakatau}
en: <bring out>, <enhance>
def: FSE-GV
com: not in GV-FSE
 
vuh: {sakataya}
en: <enhancement>
def: FSE-GV
56551,7 → 56516,6
 
vuh: {sha'ves-yokulan}
en: <cannibalism>
lit: <own><kind>-<eating>
def: FSE-GV
com: not in GV-FSE
 
61977,6 → 61941,12
def: FSE-GV
com: not in GV-FSE
 
vuh: {ta}
en: <that>
pos: conj., pron.
def: FSE-GV
com: not in GV-FSE
 
vuh: {ta'bek}
tag: anc.
en: <drug>
62417,6 → 62387,12
def: FSE-GV
com: not in GV-FSE
 
vuh: {tangu}
en: <ideal>
pos: n.
def: FSE-GV
com: not in GV-FSE
 
vuh: {tangu-sfek}
en: <ideal point>
def: FSE-GV
63005,7 → 62981,6
 
vuh: {tehnekon-torsu}
en: <sinner>
lit: <against><god>-<do><person>
def: FSE-GV
com: not in GV-FSE
 
70711,6 → 70686,11
def: FSE-GV
com: not in GV-FSE
 
vuh: {vikatau}
en: <bring in>
def: FSE-GV
com: not in GV-FSE
 
vuh: {vikau}
en: <warn>
pos: v.
71214,10 → 71194,8
 
vuh: {vishasplotau}
en: <invade>
lit: <into><land><?>
def: FSE-GV
com: not in GV-FSE
see: {vi}, {shasol}, {plotau}
 
vuh: {vishasplotausu}
en: <invader>
71498,6 → 71476,11
def: FSE-GV
com: not in GV-FSE
 
vuh: {vi}
en: <who>
def: FSE-GV
com: not in GV-FSE
 
vuh: {viyatau}
en: <impregnate>
def: FSE-GV
77908,5 → 77891,3
en: <gadget>, <gizmo>
def: FSE-GV
com: not in GV-FSE
 
# vim: set tabstop=2 shiftwidth=2 expandtab :
/trunk/tools/eazytrans/.
Property changes:
Deleted: svn:ignore
## -1 +0,0 ##
-*.pickle