WebSVN - LCARS - Path Comparison - /trunk/tools/eazytrans/ Rev 299 and /trunk/tools/eazytrans/ Rev 300

Regard whitespace Rev 299 → Rev 300

 /trunk/tools/eazytrans/vuh-gol-en.dict.zdb.txt
 ,7 → 145,7
 # ("Close-follow entries of-this dictionary format established
 # by-Klingonska Akademien [KA] for-online-version
 # of-'Pocket-Dictionary Klingon' <http://klingonska.org/dict/>.
-# See [hon. imp.] on-website of-them for-details.")
+# See! [hon.] on-website of-them for-details.")
 #
 # The format of this dictionary differs in that it uses “vuh”,
 # for Vulcan, instead of “tlh”, for Klingon, adds the “ipa” key,
 ,7 → 158,7
 # ("Have format of-this dictionary difference – use of->vuh< -
 # for-language Vulcan – instead of->tlh< - for-Klingon – add
 # key >ipa< - use abbrevations for-key >pos< for-small-keeping
-# size of-file and different-abbrevations [below-see (hon. imp.)] :")
+# size of-file and different-abbrevations [below-see! (hon.)] :")
 #
 #   vuh: {original(required|alternative) (optional part)} (required)
 #   ipa: IPA transcription (optional)
 ,14 → 210,17
 #
 # Different from the KA format, entries are indented to signify
 # derivation, so that software can mark common word-roots
-# automatically.  The optional part of the original may contain
+# automatically.  Use two spaces per indentation level.
+# The optional part of the original may contain
 # affixes that can be omitted, or give an abbreviation.
 # {Natyan na'yidor t'KA - pugas-dvun-tor svinlar glantokau
 # sakazun - shatik kup-ulidau tumak ka-zhit-girlar.
+# Is'voh dah-ret na'vok t'gas-dvun.
 # Kup-tuhlau dvelik krus t'tvesh-vel tereklar kup-puyenik
 # il tanilau zhipenaya.}
 # ("Difference to-format of-KA - be-indented entries signify
-# derivation - automatically can-mark program same-word-roots.
+# derivation - automatically can-mark program same-word-root.
+# Use! two-space for-level of-indentation.
 # Can-contain optional part of-original affixes can-be-forgotten
 # or give abbreviation.")
 #
 ,7 → 329,7
 # Isha ri bolau zhit-dunap-gir nam-tor kim-krus t'terti-zhit.}
 # ("Only-if all-criteria superordered same - be-sorted entries
 # to-FSE-alphabet | not alphabet of-Modern-Golic-Vulcan
-# to-help students. |  Notice [hon. imp.] that verb-forms of-nouns -
+# to-help students. |  Notice! [hon.] that verb-forms of-nouns -
 # although modifying e.g. with->~au<, and combining with->-tor< -
 # be-sorted before-other-forms modifying and combining because have
 # they more-strong root-relation.
 ,7 → 341,7
 # {Fayei t'ta - kuv ri kup tal-tor du mesukh t'terti-zhit
 # na'svinlar palikaun k'zun t'zhit - viglazha'voh svin t'nel-gir.}
 # ("Because of-that - if not can find you translation of-compound-word
-# at-entries beginning with-letter of-word - into-look [hon. imp.]
+# at-entries beginning with-letter of-word - into-look! [hon.]
 # entry of-main-root.")
 #
 # The following abbreviations have been used:
 ,7 → 500,7
 #            ka-tvahik iyi-zhit na'isan ek'gadik heh
 #            rivanuk | na'li-fal {wak} svi'rak t'{farr} -
 #            hi – {kari farr} - {pon farr} ∴}
-#            ("too-old [ word ] - instead-use [hon. imp.]
+#            ("too-old [ word ] - instead-use! [hon.]
 #            synonymous contemporary-word for-usage diurnal and
 #            non-ceremonial | for-example {wak} instead of-{farr} -
 #            but : {kari farr} - {pon farr} etc.")
 ,7 → 714,7
   com: not in GV-FSE
 vuh: {abrash}
-ipa: ɑ‿'braʃ
+ipa: ɑ‿'brɑʃ
 en: <flood>
 pos: n.
 ,7 → 772,7
   def: FSE-GV
   com: not in GV-FSE
-vuh: {abu(')~)}
+vuh: {abu(')~}
 en: <up>
   vuh: {abu-tor}
 ,12 → 779,12
   en: <erect>, <put up>
   vuh: {abu'le}
-  ipa:    - '  -
+  ipa: ɑ‿'bu‿le
   en: <upward(s)>
   com: from CLGV {apu'leh} and AGV {ápuullh}
   vuh: {abulau}
-  ipa:   -  - '
+  ipa: ɑ‿bu‿'lau
   en: <increase>
   pos: v.
 ,6 → 10696,7
     vuh: {gas-dvun-tor}
     en: <indent>
     lit: <right>-<move>
+    pos: v.
     def: PE
       vuh: {pugas-dvun-tor}
 ,6 → 14841,11
   def: FSE-GV
   com: not in GV-FSE
+  vuh: {huhrsauyik}
+  en: <practical>
+  pos: adj.
+  def: PE
 vuh: {huhsh}
 ipa:   ,
 en: <cough>
 ,7 → 15682,7
 vuh: {ish}
 en: <that>
-pos: adj.
+pos: det.
 def: FSE-GV
 com: not in GV-FSE
 ,6 → 17813,11
 en: <bring>
 pos: v.
+  vuh: {nukatau}
+  en: <bring down>, <inflict>
+  pos: v.
+  def: PE
   vuh: {sakatau}
   en: <enhance>, <bring out>, <implement>
   def: GV-FSE; <implement> by PE
 ,7 → 17828,7
   vuh: {sakatausu}
   en: <implementor>
-  lit: implementperson
+  lit: <implement><person>
   def: PE
   vuh: {vikatau}
 ,6 → 19077,11
   en: <direct>
   pos: adj.
+    vuh: {rikhar(-~|ik)}
+    en: <indirect>
+    pos: adj.
+    def: PE
     vuh: {khar-el'taranaya}
     en: <direct manipulation>
     def: FSE-GV
 ,10 → 28055,26
 en: <reach>
 pos: v.
+  vuh: {sapulau}
+  en: <reach out>
+  pos: v.
+  def: PE
+  vuh: {pulaya}
+  en: <reach>
+  pos: n.
+  def: PE
+    vuh: {pulayan}
+    en: <reaching>
+    pos: n.
+    def: PE
 vuh: {pulayau}
 ipa:   -  - '
 en: <be present>
 pos: v.
+  com: but <presence> {la'es}; cf. {pulayan}
 vuh: {pulu-kur}
 en: <lavender>
 ,7 → 31653,7
 vuh: {ta}
 en: <that>, <which>, <who> (not a question word)
-pos: conj.
+pos: conj., pron.
 tag: MGV
 vuh: {ta'a}
 ,6 → 31895,11
   en: <ideal>
   pos: n.
+    vuh: {tangu(-~|yik)}
+    en: <ideal>
+    pos: adj.
+    def: PE
   vuh: {tanilau}
   en: <provide>, <supply>
   pos: v.
 ,11 → 44788,6
 def: FSE-GV
 com: not in GV-FSE
-vuh: {papulau}
-en: <roam>
-def: FSE-GV
-com: not in GV-FSE
 vuh: {paresh-tor}
 en: <occur>
 def: FSE-GV
 ,13 → 46938,13
 def: FSE-GV
 com: not in GV-FSE
-vuh: {plomik shur}
-en: <vegetable soup>
+vuh: {plom (n., anc.); plomik (adj.)}
+en: <vegetable>
 def: FSE-GV
 com: not in GV-FSE
-vuh: {plom (n., anc.); plomik (adj.)}
-en: <vegetable>
+  vuh: {plomik shur}
+  en: <vegetable soup>
 def: FSE-GV
 com: not in GV-FSE
 ,6 → 47865,11
 def: FSE-GV
 com: not in GV-FSE
+  vuh: {papulau}
+  en: <roam>
+  def: FSE-GV
+  com: not in GV-FSE
 vuh: {pulayau}
 en: <be present>
 def: FSE-GV
 ,11 → 53085,6
 def: FSE-GV
 com: not in GV-FSE
-vuh: {sakatau}
-en: <bring out>, <enhance>
-def: FSE-GV
-com: not in GV-FSE
 vuh: {sakataya}
 en: <enhancement>
 def: FSE-GV
 ,6 → 56551,7
 vuh: {sha'ves-yokulan}
 en: <cannibalism>
+lit: <own><kind>-<eating>
 def: FSE-GV
 com: not in GV-FSE
 ,12 → 61977,6
 def: FSE-GV
 com: not in GV-FSE
-vuh: {ta}
-en: <that>
-pos: conj., pron.
-def: FSE-GV
-com: not in GV-FSE
 vuh: {ta'bek}
 tag: anc.
 en: <drug>
 ,12 → 62417,6
 def: FSE-GV
 com: not in GV-FSE
-vuh: {tangu}
-en: <ideal>
-pos: n.
-def: FSE-GV
-com: not in GV-FSE
 vuh: {tangu-sfek}
 en: <ideal point>
 def: FSE-GV
 ,6 → 63005,7
 vuh: {tehnekon-torsu}
 en: <sinner>
+  lit: <against><god>-<do><person>
 def: FSE-GV
 com: not in GV-FSE
 ,11 → 70711,6
 def: FSE-GV
 com: not in GV-FSE
-vuh: {vikatau}
-en: <bring in>
-def: FSE-GV
-com: not in GV-FSE
 vuh: {vikau}
 en: <warn>
 pos: v.
 ,8 → 71214,10
 vuh: {vishasplotau}
 en: <invade>
+lit: <into><land><?>
 def: FSE-GV
 com: not in GV-FSE
+see: {vi}, {shasol}, {plotau}
 vuh: {vishasplotausu}
 en: <invader>
 ,11 → 71498,6
 def: FSE-GV
 com: not in GV-FSE
-vuh: {vi}
-en: <who>
-def: FSE-GV
-com: not in GV-FSE
 vuh: {viyatau}
 en: <impregnate>
 def: FSE-GV
 ,3 → 77908,5
 en: <gadget>, <gizmo>
 def: FSE-GV
 com: not in GV-FSE
+# vim: set tabstop=2 shiftwidth=2 expandtab :

 /trunk/tools/eazytrans/VulcanDictionary.py
 ,0 → 1,174
+"""
+Created on 15.01.2015
+@author: pelinux
+"""
+from Dictionary import Dictionary, dmsg
+from re import escape, match, sub, search, findall, DOTALL
+prepositions = {
+    "fi'": 'on',
+    "na'": 'at|to',
+    "t'": 'of'
+}
+class VulcanDictionary (Dictionary):
+    """
+    """
+    def translate (self, phrase, search_prefix=True, search_plural=True):
+        """
+        :param phrase:
+        :type phrase:
+        :param search_prefix:
+        :type search_prefix:
+        :param search_plural:
+        :type search_plural:
+        """
+        translation = super().translate(phrase)
+        if translation is not None:
+            return translation
+        else:
+            expr_translation = self.translate_expression(phrase)
+            if expr_translation is not None:
+                return expr_translation
+            if search_prefix:
+                # find prefix
+                for preposition in prepositions:
+                    prefix = match(escape(preposition), phrase)
+                    if prefix is not None:
+                        prefix_translation = self.translate(prefix.group(0))
+                        if prefix_translation is not None:
+                            tail = sub(preposition, '', phrase)
+                            tail_translation = self.translate(tail, search_prefix=False)
+                            if tail_translation is not None:
+                                return [prefix_translation, tail_translation]
+            elif search_plural:
+                # find plural
+                suffix = search(r'lar$', phrase)
+                if suffix is not None:
+                    head = sub(r'lar$', '', phrase)
+                    head_translation = self.translate(head, search_prefix=False, search_plural=False)
+                    if head_translation is not None:
+                        head_translation = dict(head_translation)
+                        head_translation['en'] += ' (pl.)'
+                        return head_translation
+        return None
+    def clean_entry(self, phrase):
+        """
+        Replace GV Media Script parens with FSE parens
+        :param phrase:
+        :type phrase:
+        """
+        return sub(
+            r'(\([^)]*\))|\|([^|)]+)\|',
+            lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
+            phrase)
+class Clause (object):
+    pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))'
+    def __init__ (self, text):
+        self._text = text
+        self._phrases = []
+    def __str__(self):
+        return self._text
+    def translate (self, dictionary):
+        # Find phrases by attempts to translate
+        words = findall(r'[^\s.]+', self._text)
+        dmsg("words:", words, min_level=2)
+        offset = 0
+        while offset < len(words):
+            translation = None
+            for i in range(len(words), offset, -1):
+                dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
+                phrase = ' '.join(words[offset:i])
+                dmsg("phrase:", phrase, min_level=2)
+                translation = dictionary.translate(phrase)
+                if translation is not None:
+                    dmsg("phrase-translation:", translation, min_level=2)
+                    dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
+                    self._phrases.append(translation)
+                    offset += i - offset
+                    break
+            if translation is None:
+                dmsg("phrase-translation:", translation, min_level=2)
+                self._phrases.append(phrase)
+                offset += 1
+        dmsg("phrases:", self._phrases, min_level=2)
+class Sentence (object):
+    pattern = '(?!\s+)(.+?\.{1,3}|.+$)'
+    def __init__ (self, text):
+        self._text = text
+        self.find_clauses(text)
+    def __str__(self):
+        return self._text
+    def find_clauses (self, text):
+        self._clauses = list(map(
+            lambda clause_text: Clause(clause_text),
+            findall(Clause.pattern, text, DOTALL)))
+    def translate (self, dictionary):
+        for clause in self._clauses:
+            clause.translate(dictionary)
+class Paragraph (object):
+    pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)'
+    def __init__ (self, text):
+        self._text = text
+        self.find_sentences(text)
+    def __str__ (self):
+        return self._text
+    def find_sentences (self, text):
+        sentences = findall(Sentence.pattern, text, DOTALL)
+        self._sentences = list(map(
+            lambda sentence_text: Sentence(sentence_text),
+            sentences))
+    def translate (self, dictionary):
+        for sentence in self._sentences:
+            sentence.translate(dictionary)
+class Text (object):
+    def __init__ (self, text):
+        self._text = text
+        self.find_paragraphs(text)
+    def __repr__ (self):
+        return ''.join(map(lambda p: p.__repr__(), self._paragraphs))
+    def __str__ (self):
+        return self._text
+    def find_paragraphs (self, text):
+        self._paragraphs = list(map(
+            lambda paragraph_text: Paragraph(paragraph_text),
+            findall(Paragraph.pattern, text, DOTALL)))
+    def translate (self, dictionary):
+        for paragraph in self._paragraphs:
+            paragraph.translate(dictionary)
 Property changes:
 Added: svn:mime-type
 ## -0,0 +1 ##
 +text/plain
 \ No newline at end of property
 Index: vuh.py
 ===================================================================
 --- vuh.py	(revision 299)
 +++ vuh.py	(revision 300)
@@ -6,21 +6,14 @@
 @author: Thomas 'PointedEars' Lahn <mail@PointedEars.de>
 '''
 from sys import argv, stderr
 -from re import findall, DOTALL, match, sub, compile, \
 -    escape, search
 +from re import findall, compile
 from os.path import basename
 from functools import cmp_to_key
 -from Dictionary import Dictionary, dmsg, \
 -    sort_dict_alnum_english_key
 +from Dictionary import dmsg, sort_dict_alnum_english_key
 +from VulcanDictionary import VulcanDictionary, Text
  dictionary = {}
 -prepositions = {
 -    "fi'": 'on',
 -    "na'": 'at|to',
 -    "t'": 'of'
 -}
 -
  def cli_help():
      print('Usage: {0} TEXT...'.format(basename(argv[0])))
@@ -54,63 +47,7 @@
      return cmp_to_key(sort_dict_alnum_vulcan)
 -class VulcanDictionary (Dictionary):
 -    """
 -    """
 -    def translate (self, phrase, search_prefix=True, search_plural=True):
 -        """
 -
 -        :param phrase:
 -        :type phrase:
 -        :param search_prefix:
 -        :type search_prefix:
 -        :param search_plural:
 -        :type search_plural:
 -        """
 -        translation = super().translate(phrase)
 -        if translation is not None:
 -            return translation
 -        else:
 -            expr_translation = self.translate_expression(phrase)
 -            if expr_translation is not None:
 -                return expr_translation
 -
 -            if search_prefix:
 -                # find prefix
 -                for preposition in prepositions:
 -                    prefix = match(escape(preposition), phrase)
 -                    if prefix is not None:
 -                        prefix_translation = self.translate(prefix.group(0))
 -                        if prefix_translation is not None:
 -                            tail = sub(preposition, '', phrase)
 -                            tail_translation = self.translate(tail, search_prefix=False)
 -                            if tail_translation is not None:
 -                                return [prefix_translation, tail_translation]
 -            elif search_plural:
 -                # find plural
 -                suffix = search(r'lar$', phrase)
 -                if suffix is not None:
 -                    head = sub(r'lar$', '', phrase)
 -                    head_translation = self.translate(head, search_prefix=False, search_plural=False)
 -                    if head_translation is not None:
 -                        head_translation = dict(head_translation)
 -                        head_translation['en'] += ' (pl.)'
 -                        return head_translation
 -
 -        return None
 -
 -    def clean_entry(self, phrase):
 -        """
 -        Replace GV Media Script parens with FSE parens
 -        :param phrase:
 -        :type phrase:
 -        """
 -        return sub(
 -            r'(\([^)]*\))|\|([^|)]+)\|',
 -            lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1),
 -            phrase)
 -
  if __name__ == '__main__':
      if len(argv) < 2:
          print('Nothing to translate.', end='\n\n', file=stderr)
@@ -132,51 +69,21 @@
  #     except BrokenPipeError:
  #         pass
 +    text = Text(text)
 +
      dmsg("text:", text, min_level=2)
 -    sentences = findall(r'(?!\s+)(?:.+?\.{1,3}|.+$)', text, DOTALL)
 -    dmsg("sentences:", sentences, min_level=2)
 -    for sentence in sentences:
 -        dmsg("sentence:", sentence, min_level=2)
 +    dmsg("text:", text.__repr__(), min_level=2)
 -        clauses = findall(r'(?!\s+)(?:.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|.+$))', sentence, DOTALL)
 -        dmsg("clauses:", clauses, min_level=2)
 -        for clause in clauses:
 -            dmsg("clause:", clause, min_level=2)
 +    text.translate(dictionary)
 -            words = findall(r'[^\s.]+', clause)
 -            dmsg("words:", words, min_level=2)
 -
 -            offset = 0
 -            while offset < len(words):
 -                translation = None
 -
 -                for i in range(len(words), offset, -1):
 -                    dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2)
 -                    phrase = ' '.join(words[offset:i])
 -
 -                    dmsg("phrase:", phrase, min_level=2)
 -
 -                    translation = dictionary.translate(phrase)
 -
 -                    if translation is not None:
 -                        dmsg("phrase-translation:", translation, min_level=2)
 -                        dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2)
 -                        words[offset:i] = [translation]
 -                        offset += i - offset
 -                        break
 -
 -                if translation is None:
 -                    dmsg("phrase-translation:", translation, min_level=2)
 -                    offset += 1
 -
 -            dmsg("words-translation:", words, min_level=2)
 -            dmsg("words-translation-reduced:",
 -                list(map(
 -                    lambda word:
 -                        word['en']
 -                        if (hasattr(word, "get") and word.get('en', None) is not None)
 -                        else word,
 -                    words)),
 -                min_level=2)
 +#             dmsg("words-translation:", words, min_level=2)
 +#             dmsg("words-translation-reduced:",
 +#                 list(map(
 +#                     lambda word:
 +#                         word['en']
 +#                         if (hasattr(word, "get") and word.get('en', None) is not None)
 +#                         else word,
 +#                     words)),
 +#                 min_level=2)
  #             for key, value in dictionary._expressions.items():
  #                 dmsg(key, value, min_level=3)

 /trunk/tools/eazytrans/Dictionary.py
 ,14 → 14,12
 debug_level = 2
 def dmsg(*args, **kwargs):
-    if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None:
-        kwargs['min_level'] = 1
-    if not hasattr(kwargs, 'file'):
+    if not kwargs.get('file'):
         kwargs['file'] = stderr
-    if debug_level >= kwargs['min_level']:
-        del kwargs['min_level']
+    min_level = kwargs.pop('min_level', 1)
+    if debug_level >= min_level:
         print(*args, **kwargs)
 def sort_dict_alnum_english_key(phrase):
 ,7 → 35,7
     _keys = "ipa|en|lit|pos|com|tag|ex"
     _expressions = {}
-    def load (self, dictionary_file, language_key='en'):
+    def load (self, dictionary_file, keys=None, language_key=None):
         """
         Loads a word dictionary from a file.
         :param dictionary_file:
 ,6 → 43,10
         :param language_key:
         :type language_key:
         """
+        if keys is not None:
+            self._keys = keys
+        if language_key is not None:
         self._language_key = language_key
         dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1)
 ,6 → 62,7
         if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime:
             dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1)
             phrase = None
             key = None
             value = None
 ,11 → 104,14
                 self[phrase][key] = ' '.join(value)
             dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1)
             # TODO: Pickle should only contain strings to be small
             with open(pickle_file, mode='wb') as f: dump(self, f)
             dmsg(' done.', min_level=1)
         else:
             dmsg('from {0} ...'.format(pickle_file), end='', min_level=1)
             with open(pickle_file, mode='rb') as f: pickle = load(f)
             for key, value in pickle.items():
                 self[key] = value

Subversion Repositories LCARS

Compare Revisions

Last modification

Regard whitespace Rev 299 → Rev 300