Difference between revisions of "Talk:Using GIZA++"

From Apertium
Jump to navigation Jump to search
 
(9 intermediate revisions by the same user not shown)
Line 1: Line 1:
  +
==First try==
  +
 
629 cat es-text.txt | grep -v '^<' > es-text.t
 
629 cat es-text.txt | grep -v '^<' > es-text.t
 
630 mv es-text.t es-text.txt
 
630 mv es-text.t es-text.txt
Line 5: Line 7:
 
721 cat es-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/es-it.automorf.bin |
 
721 cat es-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/es-it.automorf.bin |
 
apertium-tagger -g ~/local/share/apertium/apertium-es-it/es-it.prob | apertium-retxt > es-text.tagged.txt &
 
apertium-tagger -g ~/local/share/apertium/apertium-es-it/es-it.prob | apertium-retxt > es-text.tagged.txt &
  +
525 cat it-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/it-es.automorf.bin |
  +
apertium-tagger -g ~/local/share/apertium/apertium-es-it/it-es.prob | apertium-retxt > it-text.tagged.txt &
  +
550 ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.tagged.txt it/it-text.tagged.txt
  +
551 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.tagged.txt -c50 -Ves/es-text.tagged.vcb.classes opt >& es/mkcls1.log
  +
555 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.tagged.txt -c50 -Vit/it-text.tagged.vcb.classes opt >& it/mkcls1.log
  +
564 ~fsanchez/GIZA++-v2/GIZA++ -S es/es-text.tagged.vcb -T it/it-text.tagged.vcb -C es/es-text.tagged_it-text.tagged.snt -p0 0.98 -o es-it.aligned >& alignment.log
  +
  +
==Second try==
  +
  +
629 cat es-text.txt | grep -v '^<' > es-text.t
  +
630 mv es-text.t es-text.txt
  +
633 cat it-text.txt | grep -v '<' > it
  +
634 mv it it-text.txt
  +
550 ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.txt it/it-text.txt
  +
930 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.txt -c50 -Ves/es-text.vcb.classes opt >& es/mkcls1.log
  +
932 ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.txt -c50 -Vit/it-text.vcb.classes opt >& it/mkcls1.log
  +
  +
==Welsh to English (classic)==
  +
  +
<pre>
  +
774 cat cy-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > cy.crp.txt &
  +
775 cat en-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > en.crp.txt &
  +
781 plain2snt.out cy.crp.txt en.crp.txt
  +
783 snt2cooc.out cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt > cy-en.model.cooc
  +
784 trainGIZA++.sh cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt
  +
  +
  +
</pre>

Latest revision as of 21:04, 9 August 2008

First try[edit]

 629  cat es-text.txt | grep -v '^<' > es-text.t
 630  mv es-text.t es-text.txt 
 633  cat it-text.txt | grep -v '<' > it
 634  mv it it-text.txt 
 721  cat es-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/es-it.automorf.bin | 
      apertium-tagger -g ~/local/share/apertium/apertium-es-it/es-it.prob | apertium-retxt > es-text.tagged.txt &
 525  cat it-text.txt | apertium-destxt | lt-proc ~/local/share/apertium/apertium-es-it/it-es.automorf.bin |
      apertium-tagger -g ~/local/share/apertium/apertium-es-it/it-es.prob | apertium-retxt > it-text.tagged.txt &
 550  ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.tagged.txt it/it-text.tagged.txt
 551  ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.tagged.txt -c50 -Ves/es-text.tagged.vcb.classes opt >& es/mkcls1.log
 555  ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.tagged.txt -c50 -Vit/it-text.tagged.vcb.classes opt >& it/mkcls1.log
 564  ~fsanchez/GIZA++-v2/GIZA++ -S es/es-text.tagged.vcb -T it/it-text.tagged.vcb -C es/es-text.tagged_it-text.tagged.snt -p0 0.98 -o es-it.aligned >& alignment.log

Second try[edit]

 629  cat es-text.txt | grep -v '^<' > es-text.t
 630  mv es-text.t es-text.txt 
 633  cat it-text.txt | grep -v '<' > it
 634  mv it it-text.txt 
 550  ~fsanchez/GIZA++-v2/plain2snt.out es/es-text.txt it/it-text.txt
 930  ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pes/es-text.txt -c50 -Ves/es-text.vcb.classes opt >& es/mkcls1.log
 932  ~fsanchez/GIZA++-v2/mkcls-v2/mkcls -m2 -pit/it-text.txt -c50 -Vit/it-text.vcb.classes opt >& it/mkcls1.log

Welsh to English (classic)[edit]

  774  cat cy-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > cy.crp.txt &
  775  cat en-clean.txt | python -c "import sys, codecs; sys.stdout = codecs.getwriter('utf-8')(sys.stdout); sys.stdin = codecs.getreader('utf-8')(sys.stdin); print sys.stdin.read().lower();" > en.crp.txt &
  781  plain2snt.out cy.crp.txt en.crp.txt 
  783  snt2cooc.out cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt > cy-en.model.cooc
  784  trainGIZA++.sh cy.crp.vcb en.crp.vcb cy.crp_en.crp.snt