Difference between revisions of "Talk:Using GIZA++"
Jump to navigation
Jump to search
Line 30: | Line 30: | ||
775 cat en-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > en.crp.txt & |
775 cat en-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > en.crp.txt & |
||
781 plain2snt.out cy.crp.txt en.crp.txt |
781 plain2snt.out cy.crp.txt en.crp.txt |
||
783 snt2cooc.out cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt |
|||
784 trainGIZA++.sh cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt |
784 trainGIZA++.sh cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt |
||
Revision as of 20:30, 9 August 2008
First try
629 cat es-text.txt | grep -v '^<' > es-text.t 630 mv es-text.t es-text.txt 633 cat it-text.txt | grep -v '<' > it 634 mv it it-text.txt 721 cat es-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/es-it.automorf.bin | apertium-tagger -g ~/local/share/apertium/apertium-es-it/es-it.prob | apertium-retxt > es-text.tagged.txt & 525 cat it-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/it-es.automorf.bin | apertium-tagger -g ~/local/share/apertium/apertium-es-it/it-es.prob | apertium-retxt > it-text.tagged.txt & 550 ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.tagged.txt it/it-text.tagged.txt 551 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.tagged.txt -c50 -Ves/es-text.tagged.vcb.classes opt >& es/mkcls1.log 555 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.tagged.txt -c50 -Vit/it-text.tagged.vcb.classes opt >& it/mkcls1.log 564 ~fsanchez/GIZA++-v2/GIZA++ -S es/es-text.tagged.vcb -T it/it-text.tagged.vcb -C es/es-text.tagged_it-text.tagged.snt -p0 0.98 -o es-it.aligned >& alignment.log
Second try
629 cat es-text.txt | grep -v '^<' > es-text.t 630 mv es-text.t es-text.txt 633 cat it-text.txt | grep -v '<' > it 634 mv it it-text.txt 550 ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.txt it/it-text.txt 930 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.txt -c50 -Ves/es-text.vcb.classes opt >& es/mkcls1.log 932 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.txt -c50 -Vit/it-text.vcb.classes opt >& it/mkcls1.log
Welsh to English (classic)
774 cat cy-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > cy.crp.txt & 775 cat en-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > en.crp.txt & 781 plain2snt.out cy.crp.txt en.crp.txt 783 snt2cooc.out cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt 784 trainGIZA++.sh cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt