Difference between revisions of "Talk:Using GIZA++"
Jump to navigation
Jump to search
(3 intermediate revisions by the same user not shown) | |||
Line 10: | Line 10: | ||
apertium-tagger -g ~/local/share/apertium/apertium-es-it/it-es.prob | apertium-retxt > it-text.tagged.txt & |
apertium-tagger -g ~/local/share/apertium/apertium-es-it/it-es.prob | apertium-retxt > it-text.tagged.txt & |
||
550 ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.tagged.txt it/it-text.tagged.txt |
550 ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.tagged.txt it/it-text.tagged.txt |
||
551 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.txt -c50 -Ves/es-text.vcb.classes opt >& es/mkcls1.log |
551 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.tagged.txt -c50 -Ves/es-text.tagged.vcb.classes opt >& es/mkcls1.log |
||
555 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.txt -c50 -Vit/it-text.vcb.classes opt >& it/mkcls1.log |
555 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.tagged.txt -c50 -Vit/it-text.tagged.vcb.classes opt >& it/mkcls1.log |
||
564 ~fsanchez/GIZA++-v2/GIZA++ -S es/es-text.tagged.vcb -T it/it-text.tagged.vcb -C es/es-text.tagged_it-text.tagged.snt -p0 0.98 -o es-it.aligned >& alignment.log |
564 ~fsanchez/GIZA++-v2/GIZA++ -S es/es-text.tagged.vcb -T it/it-text.tagged.vcb -C es/es-text.tagged_it-text.tagged.snt -p0 0.98 -o es-it.aligned >& alignment.log |
||
Line 23: | Line 23: | ||
930 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.txt -c50 -Ves/es-text.vcb.classes opt >& es/mkcls1.log |
930 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.txt -c50 -Ves/es-text.vcb.classes opt >& es/mkcls1.log |
||
932 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.txt -c50 -Vit/it-text.vcb.classes opt >& it/mkcls1.log |
932 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.txt -c50 -Vit/it-text.vcb.classes opt >& it/mkcls1.log |
||
==Welsh to English (classic)== |
|||
<pre> |
|||
774 cat cy-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > cy.crp.txt & |
|||
775 cat en-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > en.crp.txt & |
|||
781 plain2snt.out cy.crp.txt en.crp.txt |
|||
783 snt2cooc.out cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt > cy-en.model.cooc |
|||
784 trainGIZA++.sh cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt |
|||
</pre> |
Latest revision as of 21:04, 9 August 2008
First try[edit]
629 cat es-text.txt | grep -v '^<' > es-text.t 630 mv es-text.t es-text.txt 633 cat it-text.txt | grep -v '<' > it 634 mv it it-text.txt 721 cat es-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/es-it.automorf.bin | apertium-tagger -g ~/local/share/apertium/apertium-es-it/es-it.prob | apertium-retxt > es-text.tagged.txt & 525 cat it-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/it-es.automorf.bin | apertium-tagger -g ~/local/share/apertium/apertium-es-it/it-es.prob | apertium-retxt > it-text.tagged.txt & 550 ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.tagged.txt it/it-text.tagged.txt 551 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.tagged.txt -c50 -Ves/es-text.tagged.vcb.classes opt >& es/mkcls1.log 555 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.tagged.txt -c50 -Vit/it-text.tagged.vcb.classes opt >& it/mkcls1.log 564 ~fsanchez/GIZA++-v2/GIZA++ -S es/es-text.tagged.vcb -T it/it-text.tagged.vcb -C es/es-text.tagged_it-text.tagged.snt -p0 0.98 -o es-it.aligned >& alignment.log
Second try[edit]
629 cat es-text.txt | grep -v '^<' > es-text.t 630 mv es-text.t es-text.txt 633 cat it-text.txt | grep -v '<' > it 634 mv it it-text.txt 550 ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.txt it/it-text.txt 930 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.txt -c50 -Ves/es-text.vcb.classes opt >& es/mkcls1.log 932 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.txt -c50 -Vit/it-text.vcb.classes opt >& it/mkcls1.log
Welsh to English (classic)[edit]
774 cat cy-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > cy.crp.txt & 775 cat en-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > en.crp.txt & 781 plain2snt.out cy.crp.txt en.crp.txt 783 snt2cooc.out cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt > cy-en.model.cooc 784 trainGIZA++.sh cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt