Difference between revisions of "Talk:Scandinavian MT project"
Jump to navigation
Jump to search
(→script) |
|||
Line 128: | Line 128: | ||
===script=== |
===script=== |
||
<pre> |
<pre> |
||
$ while sleep 30; do |
|||
$ while sleep 30; do for d in trunk/apertium-{swe-nor,dan-nor,nno-nob,swe-dan}; do ( cd $d && up && make langs && rev=$(svn info|grep ^Revision: |grep -o '[0-9]*') && for m in modes/???-???.mode; do p=${m##modes/} && p=${p%%.mode} && echo $p && ( test -f nobackup/${rev}-$p || ( dev/testvoc/generation.sh --hfst $p > nobackup/${rev}-$p.tmp; mv nobackup/${rev}-$p.tmp nobackup/${rev}-$p ) ); done );done;done |
|||
for d in trunk/apertium-{swe-nor,dan-nor,nno-nob,swe-dan}; do |
|||
( cd $d && up && make langs && rev=$(svn info|grep ^Revision: |grep -o '[0-9]*') && |
|||
for m in modes/???-???.mode; do |
|||
p=${m##modes/} && p=${p%%.mode} && echo $p && |
|||
( test -f nobackup/${rev}-$p || |
|||
( dev/testvoc/generation.sh --hfst $p > nobackup/${rev}-$p.tmp |
|||
mv nobackup/${rev}-$p.tmp nobackup/${rev}-$p |
|||
) |
|||
) |
|||
done |
|||
) |
|||
done |
|||
done |
|||
</pre> |
</pre> |
Revision as of 12:28, 6 June 2016
Coverage
Coverage on Wikipedia dumps ("no decomp" is with decompounding turned off, ie. without the -e switch to lt-proc; ex-upper is excluding anything with uppercase characters from all counts; script at bottom of page).
Results
$ ./scandicov.sh nno-swe unk known tot cov % 876166 5251500 6127666 85.7015 no decomp ex-upper: 87.6831 773219 5354447 6127666 87.3815 with decomp ex-upper: 91.0789 nob-swe unk known tot cov % 744696 5369943 6114639 87.8211 no decomp ex-upper: 89.4771 632117 5482522 6114639 89.6622 with decomp ex-upper: 92.8601 swe-nno unk known tot cov % 992577 5282677 6275254 84.1827 no decomp ex-upper: 87.128 813461 5461794 6275255 87.037 with decomp ex-upper: 92.1041 swe-nob unk known tot cov % 979754 5295424 6275178 84.3868 no decomp ex-upper: 87.3452 795866 5479313 6275179 87.3172 with decomp ex-upper: 92.4016 dan-swe unk known tot cov % 1168922 4733019 5901941 80.1943 with decomp ex-upper: 87.5534 1301693 4600248 5901941 77.9447 no decomp ex-upper: 82.9197 swe-dan unk known tot cov % 1337817 4539056 5876873 77.2359 no decomp ex-upper: 81.2688 1136015 4740858 5876873 80.6697 with decomp ex-upper: 88.3335 dan-nno unk known tot cov % 869064 5033026 5902090 85.2753 no decomp ex-upper: 87.573 750810 5151280 5902090 87.2789 with decomp ex-upper: 91.5197 dan-nob unk known tot cov % 831676 5065778 5897454 85.8977 no decomp ex-upper: 88.2809 702310 5195144 5897454 88.0913 with decomp ex-upper: 92.3478 nno-dan unk known tot cov % 708015 5304680 6012695 88.2247 no decomp ex-upper: 90.2486 614142 5398553 6012695 89.7859 with decomp ex-upper: 93.4224 nob-dan unk known tot cov % 594853 5397002 5991855 90.0723 no decomp ex-upper: 91.7318 492242 5499613 5991855 91.7848 with decomp ex-upper: 94.8782 nno-nob unk known tot cov % 706329 5367221 6073550 88.3704 no decomp ex-upper: 91.2201 614665 5458885 6073550 89.8796 with decomp ex-upper: 94.2577 nob-nno unk known tot cov % 603978 5459984 6063962 90.0399 no decomp ex-upper: 92.6016 503705 5560257 6063962 91.6935 with decomp ex-upper: 95.5487
script
$ cat scandicov.sh #!/bin/bash # kill process group: trap "kill -- -0" EXIT sum () { awk -v note="$1" ' BEGIN{OFS=FS="\t"} /^\^/{w++} /\/\*/{u++} /\/\*.*[[:upper:]]/{Uu++} /[[:upper:]]/{Uw++} END{ lw=w-Uw lu=u-Uu print u,w-u,w,100*(w-u)/w, note,"ex-upper:",100*(lw-lu)/lw}' } for ana in /l/a/*/apertium-{swe-nor,swe-dan,dan-nor,nno-nob}/*.automorf.bin; do f="$(basename "${ana}")" echo "${f%%.automorf.bin}" src="${f%%-*}" printf "unk\tknown\ttot\tcov %%\n" xzcat ~/corpora/"${src}".wikicov.xz | lt-proc "${ana}" \ | apertium-cleanstream -n \ | sum " no decomp" & xzcat ~/corpora/"${src}".wikicov.xz | lt-proc -e "${ana}" \ | apertium-cleanstream -n \ | sum "with decomp" & wait echo done
Testvoc
Results
1 ../apertium-dan-nor/nobackup/69403-dan-nno 30 ../apertium-dan-nor/nobackup/69403-dan-nob 37 ../apertium-dan-nor/nobackup/69403-nno-dan 26 ../apertium-dan-nor/nobackup/69403-nob-dan 94 total 136 ../apertium-nno-nob/nobackup/69407-nno-nob 71 ../apertium-nno-nob/nobackup/69407-nob-nno 207 total 377 ../apertium-swe-dan/nobackup/69408-dan-swe 276 ../apertium-swe-dan/nobackup/69408-swe-dan 653 total 10 ../apertium-swe-nor/nobackup/69408-nno-swe 14 ../apertium-swe-nor/nobackup/69408-nob-swe 0 ../apertium-swe-nor/nobackup/69408-swe-nno 0 ../apertium-swe-nor/nobackup/69408-swe-nob.tmp 24 total
script
$ while sleep 30; do for d in trunk/apertium-{swe-nor,dan-nor,nno-nob,swe-dan}; do ( cd $d && up && make langs && rev=$(svn info|grep ^Revision: |grep -o '[0-9]*') && for m in modes/???-???.mode; do p=${m##modes/} && p=${p%%.mode} && echo $p && ( test -f nobackup/${rev}-$p || ( dev/testvoc/generation.sh --hfst $p > nobackup/${rev}-$p.tmp mv nobackup/${rev}-$p.tmp nobackup/${rev}-$p ) ) done ) done done