Difference between revisions of "User:Francis Tyers/Experiments"

From Apertium
Jump to navigation Jump to search
Line 10: Line 10:
 
2094 cat europako_testuak_memoria_2010.txt | grep '^es' | cut -f2- > europako_testuak_memoria_2010.es.txt
 
2094 cat europako_testuak_memoria_2010.txt | grep '^es' | cut -f2- > europako_testuak_memoria_2010.es.txt
 
2095 cat europako_testuak_memoria_2010.txt | grep '^eu' | cut -f2- > europako_testuak_memoria_2010.eu.txt
 
2095 cat europako_testuak_memoria_2010.txt | grep '^eu' | cut -f2- > europako_testuak_memoria_2010.eu.txt
2099 cat europako_testuak_memoria_2010.es.txt 2010_memo_orokorra.es.txt > opendata.es.txt
+
2099 cat europako_testuak_memoria_2010.es.txt 2010_memo_orokorra.es.txt > opendata.es
2100 cat europako_testuak_memoria_2010.eu.txt 2010_memo_orokorra.eu.txt > opendata.eu.txt
+
2100 cat europako_testuak_memoria_2010.eu.txt 2010_memo_orokorra.eu.txt > opendata.eu
   
   
 
$ wc -l opendata.e*
 
$ wc -l opendata.e*
782325 opendata.es.txt
+
782325 opendata.es
782325 opendata.eu.txt
+
782325 opendata.eu
  +
  +
  +
2114 perl /home/fran/local/bin/scripts-20120109-1229/training/clean-corpus-n.perl opendata eu es opendata.clean 1 80
  +
  +
2117 cat opendata.clean.eu |apertium-destxt | apertium -f none -d ~/source/apertium-eu-es/ eu-es-pretransfer > opendata.tagged.eu
  +
2126 cat opendata.clean.es |apertium-destxt | apertium -f none -d ~/source/apertium-eu-es/ es-eu-pretransfer > opendata.tagged.es &
  +
  +
  +
2132 seq 1 771238 > opendata.lines
  +
2133 paste opendata.lines opendata.tagged.eu opendata.tagged.es | grep '<' | cut -f1 > opendata.lines.new
  +
2134 paste opendata.lines opendata.tagged.eu opendata.tagged.es | grep '<' | cut -f2 > opendata.tagged.eu.new
  +
2135 paste opendata.lines opendata.tagged.eu opendata.tagged.es | grep '<' | cut -f3 > opendata.tagged.es.new
  +
  +
2137 mv opendata.lines.new opendata.lines
  +
2138 mv opendata.tagged.es.new opendata.tagged.es
  +
2139 mv opendata.tagged.eu.new opendata.tagged.eu
  +
  +
2146 cat opendata.tagged.eu | lt-proc -b ~/source/apertium-eu-es/eu-es.autobil.bin >/tmp/eu-es.bil1
  +
  +
2148 cat opendata.tagged.eu | lt-proc -b ~/source/apertium-eu-es/eu-es.autobil-noRL.bin >/tmp/eu-es.bil2
  +
  +
$ tail -n 1 /tmp/*.poly
  +
==> /tmp/eu-es.bil1.poly <==
  +
1.00240014637
  +
  +
==> /tmp/eu-es.bil2.poly <==
  +
1.3015831681
  +
  +
2191 mv /tmp/eu-es.bil2 opendata.biltrans.eu-es
  +
  +
2258 cat opendata.tagged.es | python /home/fran/source/apertium-lex-tools/scripts/process-tagger-output.py es > opendata.token.es
  +
2007 cat opendata.tagged.eu | python /home/fran/source/apertium-lex-tools/scripts/process-tagger-output.py eu > opendata.token.eu
  +
2014 cat opendata.biltrans.eu-es | python /home/fran/source/apertium-lex-tools/scripts/process-biltrans-output.py > opendata.token.eu-es &
  +
  +
   
 
</pre>
 
</pre>

Revision as of 12:52, 21 June 2012

Basque→Spanish

 2081  cat europako_testuak_memoria_2010.tmx | iconv -f utf-16 -t utf-8 > europako_testuak_memoria_2010.tmx.u8
 2082  cat 2010_memo_orokorra.tmx | iconv -f utf-16 -t utf-8 > 2010_memo_orokorra.tmx.u8
 2088  python3 process-tmx.py europako_testuak_memoria_2010.tmx.u8 > europako_testuak_memoria_2010.txt
 2090  python3 process-tmx.py 2010_memo_orokorra.tmx.u8 > 2010_memo_orokorra.txt
 2091  cat 2010_memo_orokorra.txt | grep '^es' | cut -f2- > 2010_memo_orokorra.es.txt
 2092  cat 2010_memo_orokorra.txt | grep '^eu' | cut -f2- > 2010_memo_orokorra.eu.txt
 2094  cat europako_testuak_memoria_2010.txt | grep '^es' | cut -f2- > europako_testuak_memoria_2010.es.txt
 2095  cat europako_testuak_memoria_2010.txt | grep '^eu' | cut -f2- > europako_testuak_memoria_2010.eu.txt
 2099  cat europako_testuak_memoria_2010.es.txt 2010_memo_orokorra.es.txt > opendata.es
 2100  cat europako_testuak_memoria_2010.eu.txt 2010_memo_orokorra.eu.txt > opendata.eu


$ wc -l opendata.e*
   782325 opendata.es
   782325 opendata.eu


 2114  perl /home/fran/local/bin/scripts-20120109-1229/training/clean-corpus-n.perl opendata eu es opendata.clean 1 80

 2117  cat opendata.clean.eu |apertium-destxt | apertium -f none -d ~/source/apertium-eu-es/ eu-es-pretransfer > opendata.tagged.eu
 2126  cat opendata.clean.es |apertium-destxt | apertium -f none -d ~/source/apertium-eu-es/ es-eu-pretransfer > opendata.tagged.es &


 2132  seq 1 771238 > opendata.lines
 2133  paste opendata.lines opendata.tagged.eu opendata.tagged.es | grep '<' | cut -f1 > opendata.lines.new
 2134  paste opendata.lines opendata.tagged.eu opendata.tagged.es | grep '<' | cut -f2 > opendata.tagged.eu.new
 2135  paste opendata.lines opendata.tagged.eu opendata.tagged.es | grep '<' | cut -f3 > opendata.tagged.es.new

 2137  mv opendata.lines.new opendata.lines
 2138  mv opendata.tagged.es.new opendata.tagged.es
 2139  mv opendata.tagged.eu.new opendata.tagged.eu

 2146  cat opendata.tagged.eu | lt-proc -b ~/source/apertium-eu-es/eu-es.autobil.bin  >/tmp/eu-es.bil1

 2148  cat opendata.tagged.eu | lt-proc -b ~/source/apertium-eu-es/eu-es.autobil-noRL.bin  >/tmp/eu-es.bil2

$ tail -n 1 /tmp/*.poly
==> /tmp/eu-es.bil1.poly <==
1.00240014637

==> /tmp/eu-es.bil2.poly <==
1.3015831681

 2191  mv /tmp/eu-es.bil2 opendata.biltrans.eu-es

 2258  cat opendata.tagged.es | python /home/fran/source/apertium-lex-tools/scripts/process-tagger-output.py es > opendata.token.es
 2007  cat opendata.tagged.eu |  python /home/fran/source/apertium-lex-tools/scripts/process-tagger-output.py eu > opendata.token.eu
 2014  cat opendata.biltrans.eu-es | python /home/fran/source/apertium-lex-tools/scripts/process-biltrans-output.py > opendata.token.eu-es &