eazytrans:
* Dictionary.py
- dmsg(): Fixed argument handling
- Dictionary.load(): Fixed/updated default values
* vuh-gol-en.dict.zdb.txt
- Added Vim modeline
- Clarified format description
- Simplified expression of imperatives in translation
- Updated IPA transcriptions
- Updated word order
- New words based on Surak's teachings in "Spock's World"
* vuh.py, VulcanDictionary.py
- Organized imports
- Moved VulcanDictionary class to own file
- Properly split into paragraphs, sentences, and clauses
- Find and translate phrases by looking up subsets of clauses
(TODO: compound handling)| /trunk/tools/eazytrans/vuh-gol-en.dict.zdb.txt |
|---|
| 145,7 → 145,7 |
| # ("Close-follow entries of-this dictionary format established |
| # by-Klingonska Akademien [KA] for-online-version |
| # of-'Pocket-Dictionary Klingon' <http://klingonska.org/dict/>. |
| # See [hon. imp.] on-website of-them for-details.") |
| # See! [hon.] on-website of-them for-details.") |
| # |
| # The format of this dictionary differs in that it uses “vuh”, |
| # for Vulcan, instead of “tlh”, for Klingon, adds the “ipa” key, |
| 158,7 → 158,7 |
| # ("Have format of-this dictionary difference – use of->vuh< - |
| # for-language Vulcan – instead of->tlh< - for-Klingon – add |
| # key >ipa< - use abbrevations for-key >pos< for-small-keeping |
| # size of-file and different-abbrevations [below-see (hon. imp.)] :") |
| # size of-file and different-abbrevations [below-see! (hon.)] :") |
| # |
| # vuh: {original(required|alternative) (optional part)} (required) |
| # ipa: IPA transcription (optional) |
| 210,14 → 210,17 |
| # |
| # Different from the KA format, entries are indented to signify |
| # derivation, so that software can mark common word-roots |
| # automatically. The optional part of the original may contain |
| # automatically. Use two spaces per indentation level. |
| # The optional part of the original may contain |
| # affixes that can be omitted, or give an abbreviation. |
| # {Natyan na'yidor t'KA - pugas-dvun-tor svinlar glantokau |
| # sakazun - shatik kup-ulidau tumak ka-zhit-girlar. |
| # Is'voh dah-ret na'vok t'gas-dvun. |
| # Kup-tuhlau dvelik krus t'tvesh-vel tereklar kup-puyenik |
| # il tanilau zhipenaya.} |
| # ("Difference to-format of-KA - be-indented entries signify |
| # derivation - automatically can-mark program same-word-roots. |
| # derivation - automatically can-mark program same-word-root. |
| # Use! two-space for-level of-indentation. |
| # Can-contain optional part of-original affixes can-be-forgotten |
| # or give abbreviation.") |
| # |
| 326,7 → 329,7 |
| # Isha ri bolau zhit-dunap-gir nam-tor kim-krus t'terti-zhit.} |
| # ("Only-if all-criteria superordered same - be-sorted entries |
| # to-FSE-alphabet | not alphabet of-Modern-Golic-Vulcan |
| # to-help students. | Notice [hon. imp.] that verb-forms of-nouns - |
| # to-help students. | Notice! [hon.] that verb-forms of-nouns - |
| # although modifying e.g. with->~au<, and combining with->-tor< - |
| # be-sorted before-other-forms modifying and combining because have |
| # they more-strong root-relation. |
| 338,7 → 341,7 |
| # {Fayei t'ta - kuv ri kup tal-tor du mesukh t'terti-zhit |
| # na'svinlar palikaun k'zun t'zhit - viglazha'voh svin t'nel-gir.} |
| # ("Because of-that - if not can find you translation of-compound-word |
| # at-entries beginning with-letter of-word - into-look [hon. imp.] |
| # at-entries beginning with-letter of-word - into-look! [hon.] |
| # entry of-main-root.") |
| # |
| # The following abbreviations have been used: |
| 497,7 → 500,7 |
| # ka-tvahik iyi-zhit na'isan ek'gadik heh |
| # rivanuk | na'li-fal {wak} svi'rak t'{farr} - |
| # hi – {kari farr} - {pon farr} ∴} |
| # ("too-old [ word ] - instead-use [hon. imp.] |
| # ("too-old [ word ] - instead-use! [hon.] |
| # synonymous contemporary-word for-usage diurnal and |
| # non-ceremonial | for-example {wak} instead of-{farr} - |
| # but : {kari farr} - {pon farr} etc.") |
| 711,7 → 714,7 |
| com: not in GV-FSE |
| vuh: {abrash} |
| ipa: ɑ‿'braʃ |
| ipa: ɑ‿'brɑʃ |
| en: <flood> |
| pos: n. |
| 769,7 → 772,7 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {abu(')~)} |
| vuh: {abu(')~} |
| en: <up> |
| vuh: {abu-tor} |
| 776,12 → 779,12 |
| en: <erect>, <put up> |
| vuh: {abu'le} |
| ipa: - ' - |
| ipa: ɑ‿'bu‿le |
| en: <upward(s)> |
| com: from CLGV {apu'leh} and AGV {ápuullh} |
| vuh: {abulau} |
| ipa: - - ' |
| ipa: ɑ‿bu‿'lau |
| en: <increase> |
| pos: v. |
| 10693,6 → 10696,7 |
| vuh: {gas-dvun-tor} |
| en: <indent> |
| lit: <right>-<move> |
| pos: v. |
| def: PE |
| vuh: {pugas-dvun-tor} |
| 14837,6 → 14841,11 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {huhrsauyik} |
| en: <practical> |
| pos: adj. |
| def: PE |
| vuh: {huhsh} |
| ipa: , |
| en: <cough> |
| 15673,7 → 15682,7 |
| vuh: {ish} |
| en: <that> |
| pos: adj. |
| pos: det. |
| def: FSE-GV |
| com: not in GV-FSE |
| 17804,6 → 17813,11 |
| en: <bring> |
| pos: v. |
| vuh: {nukatau} |
| en: <bring down>, <inflict> |
| pos: v. |
| def: PE |
| vuh: {sakatau} |
| en: <enhance>, <bring out>, <implement> |
| def: GV-FSE; <implement> by PE |
| 17814,7 → 17828,7 |
| vuh: {sakatausu} |
| en: <implementor> |
| lit: implementperson |
| lit: <implement><person> |
| def: PE |
| vuh: {vikatau} |
| 19063,6 → 19077,11 |
| en: <direct> |
| pos: adj. |
| vuh: {rikhar(-~|ik)} |
| en: <indirect> |
| pos: adj. |
| def: PE |
| vuh: {khar-el'taranaya} |
| en: <direct manipulation> |
| def: FSE-GV |
| 28036,10 → 28055,26 |
| en: <reach> |
| pos: v. |
| vuh: {sapulau} |
| en: <reach out> |
| pos: v. |
| def: PE |
| vuh: {pulaya} |
| en: <reach> |
| pos: n. |
| def: PE |
| vuh: {pulayan} |
| en: <reaching> |
| pos: n. |
| def: PE |
| vuh: {pulayau} |
| ipa: - - ' |
| en: <be present> |
| pos: v. |
| com: but <presence> {la'es}; cf. {pulayan} |
| vuh: {pulu-kur} |
| en: <lavender> |
| 31618,7 → 31653,7 |
| vuh: {ta} |
| en: <that>, <which>, <who> (not a question word) |
| pos: conj. |
| pos: conj., pron. |
| tag: MGV |
| vuh: {ta'a} |
| 31860,6 → 31895,11 |
| en: <ideal> |
| pos: n. |
| vuh: {tangu(-~|yik)} |
| en: <ideal> |
| pos: adj. |
| def: PE |
| vuh: {tanilau} |
| en: <provide>, <supply> |
| pos: v. |
| 44748,11 → 44788,6 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {papulau} |
| en: <roam> |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {paresh-tor} |
| en: <occur> |
| def: FSE-GV |
| 46903,13 → 46938,13 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {plomik shur} |
| en: <vegetable soup> |
| vuh: {plom (n., anc.); plomik (adj.)} |
| en: <vegetable> |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {plom (n., anc.); plomik (adj.)} |
| en: <vegetable> |
| vuh: {plomik shur} |
| en: <vegetable soup> |
| def: FSE-GV |
| com: not in GV-FSE |
| 47830,6 → 47865,11 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {papulau} |
| en: <roam> |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {pulayau} |
| en: <be present> |
| def: FSE-GV |
| 53045,11 → 53085,6 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {sakatau} |
| en: <bring out>, <enhance> |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {sakataya} |
| en: <enhancement> |
| def: FSE-GV |
| 56516,6 → 56551,7 |
| vuh: {sha'ves-yokulan} |
| en: <cannibalism> |
| lit: <own><kind>-<eating> |
| def: FSE-GV |
| com: not in GV-FSE |
| 61941,12 → 61977,6 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {ta} |
| en: <that> |
| pos: conj., pron. |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {ta'bek} |
| tag: anc. |
| en: <drug> |
| 62387,12 → 62417,6 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {tangu} |
| en: <ideal> |
| pos: n. |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {tangu-sfek} |
| en: <ideal point> |
| def: FSE-GV |
| 62981,6 → 63005,7 |
| vuh: {tehnekon-torsu} |
| en: <sinner> |
| lit: <against><god>-<do><person> |
| def: FSE-GV |
| com: not in GV-FSE |
| 70686,11 → 70711,6 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {vikatau} |
| en: <bring in> |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {vikau} |
| en: <warn> |
| pos: v. |
| 71194,8 → 71214,10 |
| vuh: {vishasplotau} |
| en: <invade> |
| lit: <into><land><?> |
| def: FSE-GV |
| com: not in GV-FSE |
| see: {vi}, {shasol}, {plotau} |
| vuh: {vishasplotausu} |
| en: <invader> |
| 71476,11 → 71498,6 |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {vi} |
| en: <who> |
| def: FSE-GV |
| com: not in GV-FSE |
| vuh: {viyatau} |
| en: <impregnate> |
| def: FSE-GV |
| 77891,3 → 77908,5 |
| en: <gadget>, <gizmo> |
| def: FSE-GV |
| com: not in GV-FSE |
| # vim: set tabstop=2 shiftwidth=2 expandtab : |
| /trunk/tools/eazytrans/VulcanDictionary.py |
|---|
| 0,0 → 1,174 |
| """ |
| Created on 15.01.2015 |
| @author: pelinux |
| """ |
| from Dictionary import Dictionary, dmsg |
| from re import escape, match, sub, search, findall, DOTALL |
| prepositions = { |
| "fi'": 'on', |
| "na'": 'at|to', |
| "t'": 'of' |
| } |
| class VulcanDictionary (Dictionary): |
| """ |
| """ |
| def translate (self, phrase, search_prefix=True, search_plural=True): |
| """ |
| :param phrase: |
| :type phrase: |
| :param search_prefix: |
| :type search_prefix: |
| :param search_plural: |
| :type search_plural: |
| """ |
| translation = super().translate(phrase) |
| if translation is not None: |
| return translation |
| else: |
| expr_translation = self.translate_expression(phrase) |
| if expr_translation is not None: |
| return expr_translation |
| if search_prefix: |
| # find prefix |
| for preposition in prepositions: |
| prefix = match(escape(preposition), phrase) |
| if prefix is not None: |
| prefix_translation = self.translate(prefix.group(0)) |
| if prefix_translation is not None: |
| tail = sub(preposition, '', phrase) |
| tail_translation = self.translate(tail, search_prefix=False) |
| if tail_translation is not None: |
| return [prefix_translation, tail_translation] |
| elif search_plural: |
| # find plural |
| suffix = search(r'lar$', phrase) |
| if suffix is not None: |
| head = sub(r'lar$', '', phrase) |
| head_translation = self.translate(head, search_prefix=False, search_plural=False) |
| if head_translation is not None: |
| head_translation = dict(head_translation) |
| head_translation['en'] += ' (pl.)' |
| return head_translation |
| return None |
| def clean_entry(self, phrase): |
| """ |
| Replace GV Media Script parens with FSE parens |
| :param phrase: |
| :type phrase: |
| """ |
| return sub( |
| r'(\([^)]*\))|\|([^|)]+)\|', |
| lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1), |
| phrase) |
| class Clause (object): |
| pattern = r'(?!\s+)(.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|[^–—.]+$))' |
| def __init__ (self, text): |
| self._text = text |
| self._phrases = [] |
| def __str__(self): |
| return self._text |
| def translate (self, dictionary): |
| # Find phrases by attempts to translate |
| words = findall(r'[^\s.]+', self._text) |
| dmsg("words:", words, min_level=2) |
| offset = 0 |
| while offset < len(words): |
| translation = None |
| for i in range(len(words), offset, -1): |
| dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2) |
| phrase = ' '.join(words[offset:i]) |
| dmsg("phrase:", phrase, min_level=2) |
| translation = dictionary.translate(phrase) |
| if translation is not None: |
| dmsg("phrase-translation:", translation, min_level=2) |
| dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2) |
| self._phrases.append(translation) |
| offset += i - offset |
| break |
| if translation is None: |
| dmsg("phrase-translation:", translation, min_level=2) |
| self._phrases.append(phrase) |
| offset += 1 |
| dmsg("phrases:", self._phrases, min_level=2) |
| class Sentence (object): |
| pattern = '(?!\s+)(.+?\.{1,3}|.+$)' |
| def __init__ (self, text): |
| self._text = text |
| self.find_clauses(text) |
| def __str__(self): |
| return self._text |
| def find_clauses (self, text): |
| self._clauses = list(map( |
| lambda clause_text: Clause(clause_text), |
| findall(Clause.pattern, text, DOTALL))) |
| def translate (self, dictionary): |
| for clause in self._clauses: |
| clause.translate(dictionary) |
| class Paragraph (object): |
| pattern = r'.+?(?:(?:\r?\n|\r){2,}|$)' |
| def __init__ (self, text): |
| self._text = text |
| self.find_sentences(text) |
| def __str__ (self): |
| return self._text |
| def find_sentences (self, text): |
| sentences = findall(Sentence.pattern, text, DOTALL) |
| self._sentences = list(map( |
| lambda sentence_text: Sentence(sentence_text), |
| sentences)) |
| def translate (self, dictionary): |
| for sentence in self._sentences: |
| sentence.translate(dictionary) |
| class Text (object): |
| def __init__ (self, text): |
| self._text = text |
| self.find_paragraphs(text) |
| def __repr__ (self): |
| return ''.join(map(lambda p: p.__repr__(), self._paragraphs)) |
| def __str__ (self): |
| return self._text |
| def find_paragraphs (self, text): |
| self._paragraphs = list(map( |
| lambda paragraph_text: Paragraph(paragraph_text), |
| findall(Paragraph.pattern, text, DOTALL))) |
| def translate (self, dictionary): |
| for paragraph in self._paragraphs: |
| paragraph.translate(dictionary) |
| Property changes: |
| Added: svn:mime-type |
| ## -0,0 +1 ## |
| +text/plain |
| \ No newline at end of property |
| Index: vuh.py |
| =================================================================== |
| --- vuh.py (revision 299) |
| +++ vuh.py (revision 300) |
| @@ -6,21 +6,14 @@ |
| @author: Thomas 'PointedEars' Lahn <mail@PointedEars.de> |
| ''' |
| from sys import argv, stderr |
| -from re import findall, DOTALL, match, sub, compile, \ |
| - escape, search |
| +from re import findall, compile |
| from os.path import basename |
| from functools import cmp_to_key |
| -from Dictionary import Dictionary, dmsg, \ |
| - sort_dict_alnum_english_key |
| +from Dictionary import dmsg, sort_dict_alnum_english_key |
| +from VulcanDictionary import VulcanDictionary, Text |
| dictionary = {} |
| -prepositions = { |
| - "fi'": 'on', |
| - "na'": 'at|to', |
| - "t'": 'of' |
| -} |
| - |
| def cli_help(): |
| print('Usage: {0} TEXT...'.format(basename(argv[0]))) |
| @@ -54,63 +47,7 @@ |
| return cmp_to_key(sort_dict_alnum_vulcan) |
| -class VulcanDictionary (Dictionary): |
| - """ |
| - """ |
| - def translate (self, phrase, search_prefix=True, search_plural=True): |
| - """ |
| - |
| - :param phrase: |
| - :type phrase: |
| - :param search_prefix: |
| - :type search_prefix: |
| - :param search_plural: |
| - :type search_plural: |
| - """ |
| - translation = super().translate(phrase) |
| - if translation is not None: |
| - return translation |
| - else: |
| - expr_translation = self.translate_expression(phrase) |
| - if expr_translation is not None: |
| - return expr_translation |
| - |
| - if search_prefix: |
| - # find prefix |
| - for preposition in prepositions: |
| - prefix = match(escape(preposition), phrase) |
| - if prefix is not None: |
| - prefix_translation = self.translate(prefix.group(0)) |
| - if prefix_translation is not None: |
| - tail = sub(preposition, '', phrase) |
| - tail_translation = self.translate(tail, search_prefix=False) |
| - if tail_translation is not None: |
| - return [prefix_translation, tail_translation] |
| - elif search_plural: |
| - # find plural |
| - suffix = search(r'lar$', phrase) |
| - if suffix is not None: |
| - head = sub(r'lar$', '', phrase) |
| - head_translation = self.translate(head, search_prefix=False, search_plural=False) |
| - if head_translation is not None: |
| - head_translation = dict(head_translation) |
| - head_translation['en'] += ' (pl.)' |
| - return head_translation |
| - |
| - return None |
| - |
| - def clean_entry(self, phrase): |
| - """ |
| - Replace GV Media Script parens with FSE parens |
| - :param phrase: |
| - :type phrase: |
| - """ |
| - return sub( |
| - r'(\([^)]*\))|\|([^|)]+)\|', |
| - lambda m: '({0})'.format(m.group(2)) if m.group(2) else m.group(1), |
| - phrase) |
| - |
| if __name__ == '__main__': |
| if len(argv) < 2: |
| print('Nothing to translate.', end='\n\n', file=stderr) |
| @@ -132,51 +69,21 @@ |
| # except BrokenPipeError: |
| # pass |
| + text = Text(text) |
| + |
| dmsg("text:", text, min_level=2) |
| - sentences = findall(r'(?!\s+)(?:.+?\.{1,3}|.+$)', text, DOTALL) |
| - dmsg("sentences:", sentences, min_level=2) |
| - for sentence in sentences: |
| - dmsg("sentence:", sentence, min_level=2) |
| + dmsg("text:", text.__repr__(), min_level=2) |
| - clauses = findall(r'(?!\s+)(?:.+?(?:\s+-\s*|\s*[–—]\s*|\.{1,3}|.+$))', sentence, DOTALL) |
| - dmsg("clauses:", clauses, min_level=2) |
| - for clause in clauses: |
| - dmsg("clause:", clause, min_level=2) |
| + text.translate(dictionary) |
| - words = findall(r'[^\s.]+', clause) |
| - dmsg("words:", words, min_level=2) |
| - |
| - offset = 0 |
| - while offset < len(words): |
| - translation = None |
| - |
| - for i in range(len(words), offset, -1): |
| - dmsg("words[{0}:{1}] = {2}".format(offset, i, words[offset:i]), min_level=2) |
| - phrase = ' '.join(words[offset:i]) |
| - |
| - dmsg("phrase:", phrase, min_level=2) |
| - |
| - translation = dictionary.translate(phrase) |
| - |
| - if translation is not None: |
| - dmsg("phrase-translation:", translation, min_level=2) |
| - dmsg("words[{0}:{1}] = [\"{2}\"]".format(offset, i, translation), min_level=2) |
| - words[offset:i] = [translation] |
| - offset += i - offset |
| - break |
| - |
| - if translation is None: |
| - dmsg("phrase-translation:", translation, min_level=2) |
| - offset += 1 |
| - |
| - dmsg("words-translation:", words, min_level=2) |
| - dmsg("words-translation-reduced:", |
| - list(map( |
| - lambda word: |
| - word['en'] |
| - if (hasattr(word, "get") and word.get('en', None) is not None) |
| - else word, |
| - words)), |
| - min_level=2) |
| +# dmsg("words-translation:", words, min_level=2) |
| +# dmsg("words-translation-reduced:", |
| +# list(map( |
| +# lambda word: |
| +# word['en'] |
| +# if (hasattr(word, "get") and word.get('en', None) is not None) |
| +# else word, |
| +# words)), |
| +# min_level=2) |
| # for key, value in dictionary._expressions.items(): |
| # dmsg(key, value, min_level=3) |
| /trunk/tools/eazytrans/Dictionary.py |
|---|
| 14,14 → 14,12 |
| debug_level = 2 |
| def dmsg(*args, **kwargs): |
| if not hasattr(kwargs, 'min_level') or kwargs['min_level'] is None: |
| kwargs['min_level'] = 1 |
| if not hasattr(kwargs, 'file'): |
| if not kwargs.get('file'): |
| kwargs['file'] = stderr |
| if debug_level >= kwargs['min_level']: |
| del kwargs['min_level'] |
| min_level = kwargs.pop('min_level', 1) |
| if debug_level >= min_level: |
| print(*args, **kwargs) |
| def sort_dict_alnum_english_key(phrase): |
| 37,7 → 35,7 |
| _keys = "ipa|en|lit|pos|com|tag|ex" |
| _expressions = {} |
| def load (self, dictionary_file, language_key='en'): |
| def load (self, dictionary_file, keys=None, language_key=None): |
| """ |
| Loads a word dictionary from a file. |
| :param dictionary_file: |
| 45,6 → 43,10 |
| :param language_key: |
| :type language_key: |
| """ |
| if keys is not None: |
| self._keys = keys |
| if language_key is not None: |
| self._language_key = language_key |
| dmsg('Loading dictionary '.format(dictionary_file), end='', min_level=1) |
| 60,6 → 62,7 |
| if pickle_mtime is None or stat(dictionary_file).st_mtime > pickle_mtime: |
| dmsg('from {0} ...'.format(dictionary_file), end='', min_level=1) |
| phrase = None |
| key = None |
| value = None |
| 101,11 → 104,14 |
| self[phrase][key] = ' '.join(value) |
| dmsg('\nSaving pickle {0} ...'.format(pickle_file), end='', min_level=1) |
| # TODO: Pickle should only contain strings to be small |
| with open(pickle_file, mode='wb') as f: dump(self, f) |
| dmsg(' done.', min_level=1) |
| else: |
| dmsg('from {0} ...'.format(pickle_file), end='', min_level=1) |
| with open(pickle_file, mode='rb') as f: pickle = load(f) |
| for key, value in pickle.items(): |
| self[key] = value |
| /trunk/tools/eazytrans/. |
|---|
| Property changes: |
| Added: svn:ignore |
| ## -0,0 +1 ## |
| +*.pickle |