Subversion Repositories LCARS

Compare Revisions

Last modification

Regard whitespace Rev 298 → Rev 300

/trunk/tools/eazytrans/vuh-gol-en.dict.zdb.txt
145,7 → 145,7
# ("Close-follow entries of-this dictionary format established
# by-Klingonska Akademien [KA] for-online-version
# of-'Pocket-Dictionary Klingon' <http://klingonska.org/dict/>.
# See [hon. imp.] on-website of-them for-details.")
# See! [hon.] on-website of-them for-details.")
#
# The format of this dictionary differs in that it uses “vuh”,
# for Vulcan, instead of “tlh”, for Klingon, adds the “ipa” key,
158,7 → 158,7
# ("Have format of-this dictionary difference – use of->vuh< -
# for-language Vulcan – instead of->tlh< - for-Klingon – add
# key >ipa< - use abbrevations for-key >pos< for-small-keeping
# size of-file and different-abbrevations [below-see (hon. imp.)] :")
# size of-file and different-abbrevations [below-see! (hon.)] :")
#
# vuh: {original(required|alternative) (optional part)} (required)
# ipa: IPA transcription (optional)
210,14 → 210,17
#
# Different from the KA format, entries are indented to signify
# derivation, so that software can mark common word-roots
# automatically. The optional part of the original may contain
# automatically. Use two spaces per indentation level.
# The optional part of the original may contain
# affixes that can be omitted, or give an abbreviation.
# {Natyan na'yidor t'KA - pugas-dvun-tor svinlar glantokau
# sakazun - shatik kup-ulidau tumak ka-zhit-girlar.
# Is'voh dah-ret na'vok t'gas-dvun.
# Kup-tuhlau dvelik krus t'tvesh-vel tereklar kup-puyenik
# il tanilau zhipenaya.}
# ("Difference to-format of-KA - be-indented entries signify
# derivation - automatically can-mark program same-word-roots.
# derivation - automatically can-mark program same-word-root.
# Use! two-space for-level of-indentation.
# Can-contain optional part of-original affixes can-be-forgotten
# or give abbreviation.")
#
326,7 → 329,7
# Isha ri bolau zhit-dunap-gir nam-tor kim-krus t'terti-zhit.}
# ("Only-if all-criteria superordered same - be-sorted entries
# to-FSE-alphabet | not alphabet of-Modern-Golic-Vulcan
# to-help students. | Notice [hon. imp.] that verb-forms of-nouns -
# to-help students. | Notice! [hon.] that verb-forms of-nouns -
# although modifying e.g. with->~au<, and combining with->-tor< -
# be-sorted before-other-forms modifying and combining because have
# they more-strong root-relation.
338,7 → 341,7
# {Fayei t'ta - kuv ri kup tal-tor du mesukh t'terti-zhit
# na'svinlar palikaun k'zun t'zhit - viglazha'voh svin t'nel-gir.}
# ("Because of-that - if not can find you translation of-compound-word
# at-entries beginning with-letter of-word - into-look [hon. imp.]
# at-entries beginning with-letter of-word - into-look! [hon.]
# entry of-main-root.")
#
# The following abbreviations have been used:
497,7 → 500,7
# ka-tvahik iyi-zhit na'isan ek'gadik heh
# rivanuk | na'li-fal {wak} svi'rak t'{farr} -
# hi – {kari farr} - {pon farr} ∴}
# ("too-old [ word ] - instead-use [hon. imp.]
# ("too-old [ word ] - instead-use! [hon.]
# synonymous contemporary-word for-usage diurnal and
# non-ceremonial | for-example {wak} instead of-{farr} -
# but : {kari farr} - {pon farr} etc.")
711,7 → 714,7
com: not in GV-FSE
 
vuh: {abrash}
ipa: ɑ‿'braʃ
ipa: ɑ‿'brɑʃ
en: <flood>
pos: n.
 
769,7 → 772,7
def: FSE-GV
com: not in GV-FSE
 
vuh: {abu(')~)}
vuh: {abu(')~}
en: <up>
 
vuh: {abu-tor}
776,12 → 779,12
en: <erect>, <put up>
 
vuh: {abu'le}
ipa: - ' -
ipa: ɑ‿'bu‿le
en: <upward(s)>
com: from CLGV {apu'leh} and AGV {ápuullh}
 
vuh: {abulau}
ipa: - - '
ipa: ɑ‿bu‿'lau
en: <increase>
pos: v.
 
10693,6 → 10696,7
vuh: {gas-dvun-tor}
en: <indent>
lit: <right>-<move>
pos: v.
def: PE
 
vuh: {pugas-dvun-tor}
14837,6 → 14841,11
def: FSE-GV
com: not in GV-FSE
 
vuh: {huhrsauyik}
en: <practical>
pos: adj.
def: PE
 
vuh: {huhsh}
ipa: ,
en: <cough>
15673,7 → 15682,7
 
vuh: {ish}
en: <that>
pos: adj.
pos: det.
def: FSE-GV
com: not in GV-FSE
 
17804,6 → 17813,11
en: <bring>
pos: v.
 
vuh: {nukatau}
en: <bring down>, <inflict>
pos: v.
def: PE
 
vuh: {sakatau}
en: <enhance>, <bring out>, <implement>
def: GV-FSE; <implement> by PE
17814,7 → 17828,7
 
vuh: {sakatausu}
en: <implementor>
lit: implementperson
lit: <implement><person>
def: PE
 
vuh: {vikatau}
19063,6 → 19077,11
en: <direct>
pos: adj.
 
vuh: {rikhar(-~|ik)}
en: <indirect>
pos: adj.
def: PE
 
vuh: {khar-el'taranaya}
en: <direct manipulation>
def: FSE-GV
28036,10 → 28055,26
en: <reach>
pos: v.
 
vuh: {sapulau}
en: <reach out>
pos: v.
def: PE
 
vuh: {pulaya}
en: <reach>
pos: n.
def: PE
 
vuh: {pulayan}
en: <reaching>
pos: n.
def: PE
 
vuh: {pulayau}
ipa: - - '
en: <be present>
pos: v.
com: but <presence> {la'es}; cf. {pulayan}
 
vuh: {pulu-kur}
en: <lavender>
31618,7 → 31653,7
 
vuh: {ta}
en: <that>, <which>, <who> (not a question word)
pos: conj.
pos: conj., pron.
tag: MGV
 
vuh: {ta'a}
31860,6 → 31895,11
en: <ideal>
pos: n.
 
vuh: {tangu(-~|yik)}
en: <ideal>
pos: adj.
def: PE
 
vuh: {tanilau}
en: <provide>, <supply>
pos: v.
44748,11 → 44788,6
def: FSE-GV
com: not in GV-FSE
 
vuh: {papulau}
en: <roam>
def: FSE-GV
com: not in GV-FSE
 
vuh: {paresh-tor}
en: <occur>
def: FSE-GV
46903,13 → 46938,13
def: FSE-GV
com: not in GV-FSE
 
vuh: {plomik shur}
en: <vegetable soup>
vuh: {plom (n., anc.); plomik (adj.)}
en: <vegetable>
def: FSE-GV
com: not in GV-FSE
 
vuh: {plom (n., anc.); plomik (adj.)}
en: <vegetable>
vuh: {plomik shur}
en: <vegetable soup>
def: FSE-GV
com: not in GV-FSE
 
47830,6 → 47865,11
def: FSE-GV
com: not in GV-FSE
 
vuh: {papulau}
en: <roam>
def: FSE-GV
com: not in GV-FSE
 
vuh: {pulayau}
en: <be present>
def: FSE-GV
53045,11 → 53085,6
def: FSE-GV
com: not in GV-FSE
 
vuh: {sakatau}
en: <bring out>, <enhance>
def: FSE-GV
com: not in GV-FSE
 
vuh: {sakataya}
en: <enhancement>
def: FSE-GV
56516,6 → 56551,7
 
vuh: {sha'ves-yokulan}
en: <cannibalism>
lit: <own><kind>-<eating>
def: FSE-GV
com: not in GV-FSE
 
61941,12 → 61977,6
def: FSE-GV
com: not in GV-FSE
 
vuh: {ta}
en: <that>
pos: conj., pron.
def: FSE-GV
com: not in GV-FSE
 
vuh: {ta'bek}
tag: anc.
en: <drug>
62387,12 → 62417,6
def: FSE-GV
com: not in GV-FSE
 
vuh: {tangu}
en: <ideal>
pos: n.
def: FSE-GV
com: not in GV-FSE
 
vuh: {tangu-sfek}
en: <ideal point>
def: FSE-GV
62981,6 → 63005,7
 
vuh: {tehnekon-torsu}
en: <sinner>
lit: <against><god>-<do><person>
def: FSE-GV
com: not in GV-FSE
 
70686,11 → 70711,6
def: FSE-GV
com: not in GV-FSE
 
vuh: {vikatau}
en: <bring in>
def: FSE-GV
com: not in GV-FSE
 
vuh: {vikau}
en: <warn>
pos: v.
71194,8 → 71214,10
 
vuh: {vishasplotau}
en: <invade>
lit: <into><land><?>
def: FSE-GV
com: not in GV-FSE
see: {vi}, {shasol}, {plotau}
 
vuh: {vishasplotausu}
en: <invader>
71476,11 → 71498,6
def: FSE-GV
com: not in GV-FSE
 
vuh: {vi}
en: <who>
def: FSE-GV
com: not in GV-FSE
 
vuh: {viyatau}
en: <impregnate>
def: FSE-GV
77891,3 → 77908,5
en: <gadget>, <gizmo>
def: FSE-GV
com: not in GV-FSE
 
# vim: set tabstop=2 shiftwidth=2 expandtab :
/trunk/tools/eazytrans/VulcanDictionary.py
0,0 → 1,174
"""
Created on 15.01.2015
 
@author: pelinux
 
"""
from Dictionary import Dictionary, dmsg
from re import escape, match, sub, search, findall, DOTALL
 
prepositions = {
"fi'": 'on',
"na'": 'at|to',
"t'": 'of'
}
 
class VulcanDictionary (Dictionary):
"""
"""
def translate (self, phrase, search_prefix=True, search_plural=True):
"""
:param phrase:
:type phrase:
:param search_prefix:
:type search_prefix:
:param search_plural:
:type search_plural:
"""
translation = super().translate(phrase)
if translation is not None:
return translation
else:
expr_translation = self.translate_expression(phrase)
if expr_translation is not None:
return expr_translation
 
if search_prefix:
# find prefix
for preposition in prepositions:
prefix = match(escape(preposition), phrase)
if prefix is not None:
prefix_translation = self.translate(prefix.group(0))
if prefix_translation is not None:
tail = sub(preposition, '', phrase)
tail_translation = self.translate(tail, search_prefix=False)
if tail_translation is not None:
return [prefix_translation, tail_translation]
elif search_plural:
# find plural
suffix = search(r'lar$', phrase)
if suffix is not None:
head = sub(r'lar$', '', phrase)
head_translation = self.translate(head, search_prefix=False, search_plural=False)
if head_translation is not None:
head_translation = dict(head_translation)
head_translation['en'] += ' (pl.)'
return head_translation
 
return None
 
def clean_entry(self, phrase):
"""
Replace GV Media Script parens with FSE parens
:param phrase:
:type phrase:
"""
return sub(
r'(\([^)]*\))|\|([^|)]+)\|',
lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
phrase)
 
class Clause (object):
pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))'
 
def __init__ (self, text):
self._text = text
self._phrases = []
 
def __str__(self):
return self._text
 
def translate (self, dictionary):
# Find phrases by attempts to translate
words = findall(r'[^\s.]+', self._text)
 
dmsg("words:", words, min_level=2)
 
offset = 0
while offset < len(words):
translation = None
 
for i in range(len(words), offset, -1):
dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
 
phrase = ' '.join(words[offset:i])
 
dmsg("phrase:", phrase, min_level=2)
 
translation = dictionary.translate(phrase)
 
if translation is not None:
dmsg("phrase-translation:", translation, min_level=2)
dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
 
self._phrases.append(translation)
offset += i - offset
break
 
if translation is None:
dmsg("phrase-translation:", translation, min_level=2)
self._phrases.append(phrase)
offset += 1
 
dmsg("phrases:", self._phrases, min_level=2)
 
class Sentence (object):
pattern = '(?!\s+)(.+?\.{1,3}|.+$)'
 
def __init__ (self, text):
self._text = text
self.find_clauses(text)
 
def __str__(self):
return self._text
 
def find_clauses (self, text):
self._clauses = list(map(
lambda clause_text: Clause(clause_text),
findall(Clause.pattern, text, DOTALL)))
 
def translate (self, dictionary):
for clause in self._clauses:
clause.translate(dictionary)
 
class Paragraph (object):
pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)'
 
def __init__ (self, text):
self._text = text
self.find_sentences(text)
 
def __str__ (self):
return self._text
 
def find_sentences (self, text):
sentences = findall(Sentence.pattern, text, DOTALL)
self._sentences = list(map(
lambda sentence_text: Sentence(sentence_text),
sentences))
 
def translate (self, dictionary):
for sentence in self._sentences:
sentence.translate(dictionary)
 
class Text (object):
def __init__ (self, text):
self._text = text
self.find_paragraphs(text)
 
def __repr__ (self):
return ''.join(map(lambda p: p.__repr__(), self._paragraphs))
 
def __str__ (self):
return self._text
 
def find_paragraphs (self, text):
self._paragraphs = list(map(
lambda paragraph_text: Paragraph(paragraph_text),
findall(Paragraph.pattern, text, DOTALL)))
 
def translate (self, dictionary):
for paragraph in self._paragraphs:
paragraph.translate(dictionary)
Property changes:
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: eazytrans/vuh.py
===================================================================
--- eazytrans/vuh.py (revision 298)
+++ eazytrans/vuh.py (revision 300)
@@ -6,21 +6,14 @@
@author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
'''
from sys import argv, stderr
-from re import findall, DOTALL, match, sub, compile, \
- escape, search
+from re import findall, compile
from os.path import basename
from functools import cmp_to_key
-from Dictionary import Dictionary, dmsg, \
- sort_dict_alnum_english_key
+from Dictionary import dmsg, sort_dict_alnum_english_key
+from VulcanDictionary import VulcanDictionary, Text
dictionary = {}
-prepositions = {
- "fi'": 'on',
- "na'": 'at|to',
- "t'": 'of'
-}
-
def cli_help():
print('Usage: {0} TEXT...'.format(basename(argv[0])))
@@ -54,63 +47,7 @@
return cmp_to_key(sort_dict_alnum_vulcan)
-class VulcanDictionary (Dictionary):
- """
- """
- def translate (self, phrase, search_prefix=True, search_plural=True):
- """
-
- :param phrase:
- :type phrase:
- :param search_prefix:
- :type search_prefix:
- :param search_plural:
- :type search_plural:
- """
- translation = super().translate(phrase)
- if translation is not None:
- return translation
- else:
- expr_translation = self.translate_expression(phrase)
- if expr_translation is not None:
- return expr_translation
-
- if search_prefix:
- # find prefix
- for preposition in prepositions:
- prefix = match(escape(preposition), phrase)
- if prefix is not None:
- prefix_translation = self.translate(prefix.group(0))
- if prefix_translation is not None:
- tail = sub(preposition, '', phrase)
- tail_translation = self.translate(tail, search_prefix=False)
- if tail_translation is not None:
- return [prefix_translation, tail_translation]
- elif search_plural:
- # find plural
- suffix = search(r'lar$', phrase)
- if suffix is not None:
- head = sub(r'lar$', '', phrase)
- head_translation = self.translate(head, search_prefix=False, search_plural=False)
- if head_translation is not None:
- head_translation = dict(head_translation)
- head_translation['en'] += ' (pl.)'
- return head_translation
-
- return None
-
- def clean_entry(self, phrase):
- """
- Replace GV Media Script parens with FSE parens
- :param phrase:
- :type phrase:
- """
- return sub(
- r'(\([^)]*\))|\|([^|)]+)\|',
- lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
- phrase)
-
if __name__ == '__main__':
if len(argv) < 2:
print('Nothing to translate.', end='\n\n', file=stderr)
@@ -132,51 +69,21 @@
# except BrokenPipeError:
# pass
+ text = Text(text)
+
dmsg("text:", text, min_level=2)
- sentences = findall(r'(?!\s+)(?:.+?\.{1,3}|.+$)', text, DOTALL)
- dmsg("sentences:", sentences, min_level=2)
- for sentence in sentences:
- dmsg("sentence:", sentence, min_level=2)
+ dmsg("text:", text.__repr__(), min_level=2)
- clauses = findall(r'(?!\s+)(?:.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|.+$))', sentence, DOTALL)
- dmsg("clauses:", clauses, min_level=2)
- for clause in clauses:
- dmsg("clause:", clause, min_level=2)
+ text.translate(dictionary)
- words = findall(r'[^\s.]+', clause)
- dmsg("words:", words, min_level=2)
-
- offset = 0
- while offset < len(words):
- translation = None
-
- for i in range(len(words), offset, -1):
- dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
- phrase = ' '.join(words[offset:i])
-
- dmsg("phrase:", phrase, min_level=2)
-
- translation = dictionary.translate(phrase)
-
- if translation is not None:
- dmsg("phrase-translation:", translation, min_level=2)
- dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
- words[offset:i] = [translation]
- offset += i - offset
- break
-
- if translation is None:
- dmsg("phrase-translation:", translation, min_level=2)
- offset += 1
-
- dmsg("words-translation:", words, min_level=2)
- dmsg("words-translation-reduced:",
- list(map(
- lambda word:
- word['en']
- if (hasattr(word, "get") and word.get('en', None) is not None)
- else word,
- words)),
- min_level=2)
+# dmsg("words-translation:", words, min_level=2)
+# dmsg("words-translation-reduced:",
+# list(map(
+# lambda word:
+# word['en']
+# if (hasattr(word, "get") and word.get('en', None) is not None)
+# else word,
+# words)),
+# min_level=2)
# for key, value in dictionary._expressions.items():
# dmsg(key, value, min_level=3)
/trunk/tools/eazytrans/Dictionary.py
14,14 → 14,12
debug_level = 2
 
def dmsg(*args, **kwargs):
if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
kwargs['min_level'] = 1
 
if not hasattr(kwargs, 'file'):
if not kwargs.get('file'):
kwargs['file'] = stderr
 
if debug_level >= kwargs['min_level']:
del kwargs['min_level']
min_level = kwargs.pop('min_level', 1)
 
if debug_level >= min_level:
print(*args, **kwargs)
 
def sort_dict_alnum_english_key(phrase):
37,7 → 35,7
_keys = "ipa|en|lit|pos|com|tag|ex"
_expressions = {}
 
def load (self, dictionary_file, language_key='en'):
def load (self, dictionary_file, keys=None, language_key=None):
"""
Loads a word dictionary from a file.
:param dictionary_file:
45,6 → 43,10
:param language_key:
:type language_key:
"""
if keys is not None:
self._keys = keys
 
if language_key is not None:
self._language_key = language_key
 
dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
60,6 → 62,7
 
if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
 
phrase = None
key = None
value = None
101,11 → 104,14
self[phrase][key] = ' '.join(value)
 
dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
 
# TODO: Pickle should only contain strings to be small
with open(pickle_file, mode='wb') as f: dump(self, f)
 
dmsg(' done.', min_level=1)
else:
dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
 
with open(pickle_file, mode='rb') as f: pickle = load(f)
for key, value in pickle.items():
self[key] = value
/trunk/tools/eazytrans
Property changes:
Added: svn:ignore
## -0,0 +1 ##
+*.pickle