Difference between revisions of "User:Francis Tyers/TLH"
Jump to navigation
Jump to search
(One intermediate revision by the same user not shown) | |||
Line 2: | Line 2: | ||
<pre> |
<pre> |
||
for x in `seq 0 9`; do |
for x in `seq 0 9`; do |
||
cooked2lex.pl < LexEsp-train-$x.cooked > train-$x.lex |
cooked2lex.pl < LexEsp-train-$x.cooked > train-$x.lex |
||
cooked2ngram.pl < LexEsp-train-$x.cooked > train-$x.ngrams |
cooked2ngram.pl < LexEsp-train-$x.cooked > train-$x.ngrams |
||
Line 8: | Line 7: | ||
t3 train-$x.ngrams train-$x.lex < LexEsp-$x.raw > LexEsp-$x.t3 |
t3 train-$x.ngrams train-$x.lex < LexEsp-$x.raw > LexEsp-$x.t3 |
||
evaluate.pl LexEsp-$x.cooked LexEsp-$x.t3 >> output |
evaluate.pl LexEsp-$x.cooked LexEsp-$x.t3 >> output |
||
⚫ | |||
done |
done |
||
Line 123: | Line 120: | ||
Entropy H(p)=5.488119 |
Entropy H(p)=5.488119 |
||
$ for x in `seq 0 9`; do |
|||
⚫ | |||
⚫ | |||
254 tags 15431 types 87094 tokens |
|||
1 14742 95.535% 62581 71.855% |
|||
2 637 4.128% 20044 23.014% |
|||
3 47 0.305% 3620 4.156% |
|||
4 2 0.013% 96 0.110% |
|||
5 3 0.019% 753 0.865% |
|||
Mean ambiguity A=1.351161 |
|||
⚫ | |||
Entropy H(p)=5.485330 |
|||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
done |
|||
⚫ | |||
$ cooked2raw.pl LexEsp_Etq_Larga-0.cooked > LexEsp_Etq_Larga-0.raw |
|||
⚫ | |||
⚫ | |||
[ 4 ms::1] |
|||
[ 4 ms::1] Trigram POS Tagger (c) Ingo Schröder, schroeder@informatik.uni-hamburg.de |
|||
[ 4 ms::1] |
|||
[ 2064 ms::1] model generated from 3761 sentences (thereof 43 one-word) |
|||
[ 2064 ms::1] found 11283 uni-, 15044 bi-, and 18762 trigram counts for the boundary tag |
|||
[ 12724 ms::1] computed smoothed transition probabilities |
|||
[ 13512 ms::1] built suffix tries with 29924 lowercase and 6743 uppercase nodes |
|||
[ 13532 ms::1] leaves/single/total LC: 7672 18878 29925 |
|||
[ 13536 ms::1] leaves/single/total UC: 1320 4874 6744 |
|||
[ 16329 ms::1] suffix probabilities smoothing done [theta 1.281e-02] |
|||
[ 12249377 ms::1] done |
|||
⚫ | |||
⚫ | |||
418 sentences |
418 sentences |
||
LexEsp_Etq_Larga-0.t3 9412 455 95.389% |
LexEsp_Etq_Larga-0.t3 9412 455 95.389% |
||
⚫ | |||
LexEsp_Etq_Larga-1.t3 9206 494 94.907% |
|||
418 sentences |
|||
LexEsp_Etq_Larga-2.t3 9123 506 94.745% |
|||
418 sentences |
|||
LexEsp_Etq_Larga-3.t3 9208 496 94.889% |
|||
418 sentences |
|||
LexEsp_Etq_Larga-4.t3 9105 507 94.725% |
|||
418 sentences |
|||
LexEsp_Etq_Larga-5.t3 8846 459 95.067% |
|||
418 sentences |
|||
LexEsp_Etq_Larga-6.t3 8893 493 94.747% |
|||
418 sentences |
|||
LexEsp_Etq_Larga-7.t3 9258 490 94.973% |
|||
418 sentences |
|||
LexEsp_Etq_Larga-8.t3 9645 526 94.828% |
|||
417 sentences |
|||
LexEsp_Etq_Larga-9.t3 9355 484 95.081% |
|||
</pre> |
</pre> |
Latest revision as of 15:09, 6 April 2008
Tarea 1[edit]
for x in `seq 0 9`; do cooked2lex.pl < LexEsp-train-$x.cooked > train-$x.lex cooked2ngram.pl < LexEsp-train-$x.cooked > train-$x.ngrams cooked2raw.pl < LexEsp-$x.cooked > LexEsp-$x.raw t3 train-$x.ngrams train-$x.lex < LexEsp-$x.raw > LexEsp-$x.t3 evaluate.pl LexEsp-$x.cooked LexEsp-$x.t3 >> output done $ cat output 418 sentences LexEsp-0.t3 9470 397 95.976% 418 sentences LexEsp-1.t3 9290 410 95.773% 418 sentences LexEsp-2.t3 9199 430 95.534% 418 sentences LexEsp-3.t3 9264 440 95.466% 418 sentences LexEsp-4.t3 9164 448 95.339% 418 sentences LexEsp-5.t3 8908 397 95.733% 418 sentences LexEsp-6.t3 8968 418 95.547% 418 sentences LexEsp-7.t3 9334 414 95.753% 418 sentences LexEsp-8.t3 9693 478 95.300% 417 sentences LexEsp-9.t3 9434 405 95.884%
Tarea 2[edit]
$ for i in `seq 1 9`; do cat LexEsp-[1-$i].cooked > LexEsp-ejecucion$i.cooked; cooked2lex.pl < LexEsp-ejecucion$i.cooked > train.$i.lex; cooked2ngram.pl < LexEsp-ejecucion$i.cooked > train.$i.ngrams; t3 train.$i.ngrams train.$i.lex < LexEsp-0.raw > LexEsp-0.$i.t3; evaluate.pl LexEsp-0.cooked LexEsp-0.$i.t3 >> output ; done $ wc -l LexEsp-ejecucion*.cooked 418 LexEsp-ejecucion1.cooked 836 LexEsp-ejecucion2.cooked 1254 LexEsp-ejecucion3.cooked 1672 LexEsp-ejecucion4.cooked 2090 LexEsp-ejecucion5.cooked 2508 LexEsp-ejecucion6.cooked 2926 LexEsp-ejecucion7.cooked 3344 LexEsp-ejecucion8.cooked 3761 LexEsp-ejecucion9.cooked $ cat output 418 sentences LexEsp-0.1.t3 8948 919 90.686% 418 sentences LexEsp-0.2.t3 9155 712 92.784% 418 sentences LexEsp-0.3.t3 9275 592 94.000% 418 sentences LexEsp-0.4.t3 9313 554 94.385% 418 sentences LexEsp-0.5.t3 9366 501 94.922% 418 sentences LexEsp-0.6.t3 9391 476 95.176% 418 sentences LexEsp-0.7.t3 9419 448 95.460% 418 sentences LexEsp-0.8.t3 9444 423 95.713% 418 sentences LexEsp-0.9.t3 9470 397 95.976%
Tarea 3[edit]
$ for i in `seq 1 10`; do t3 -l $i train.ngrams train.lex < LexEsp-0.raw > LexEsp-0.l$i.t3; evaluate.pl LexEsp-0.cooked LexEsp-0.l$i.t3 >> output.l; done $ cat output.l 418 sentences LexEsp-0.l1.t3 9411 456 95.379% 418 sentences LexEsp-0.l2.t3 9466 401 95.936% 418 sentences LexEsp-0.l3.t3 9492 375 96.199% 418 sentences LexEsp-0.l4.t3 9490 377 96.179% 418 sentences LexEsp-0.l5.t3 9473 394 96.007% 418 sentences LexEsp-0.l6.t3 9477 390 96.047% 418 sentences LexEsp-0.l7.t3 9473 394 96.007% 418 sentences LexEsp-0.l8.t3 9470 397 95.976% 418 sentences LexEsp-0.l9.t3 9470 397 95.976% 418 sentences LexEsp-0.l10.t3 9470 397 95.976%
Tarea 4[edit]
$ prepare-corpus.sh LexEsp_Etq_Larga.cooked 4179 sentences 256 tags 16481 types 96961 tokens 1 15735 95.474% 69045 71.209% 2 689 4.181% 22621 23.330% 3 51 0.309% 4315 4.450% 4 3 0.018% 151 0.156% 5 3 0.018% 829 0.855% Mean ambiguity A=1.361176 Entropy H(p)=5.488119 $ for x in `seq 0 9`; do cooked2lex.pl < LexEsp_Etq_Larga-train-$x.cooked > train-$x.lex cooked2ngram.pl < LexEsp_Etq_Larga-train-$x.cooked > train-$x.ngrams cooked2raw.pl < LexEsp_Etq_Larga-$x.cooked > LexEsp_Etq_Larga-$x.raw t3 train-$x.ngrams train-$x.lex < LexEsp_Etq_Larga-$x.raw > LexEsp_Etq_Larga-$x.t3 evaluate.pl LexEsp_Etq_Larga-$x.cooked LexEsp_Etq_Larga-$x.t3 >> output done $ cat output 418 sentences LexEsp_Etq_Larga-0.t3 9412 455 95.389% 418 sentences LexEsp_Etq_Larga-1.t3 9206 494 94.907% 418 sentences LexEsp_Etq_Larga-2.t3 9123 506 94.745% 418 sentences LexEsp_Etq_Larga-3.t3 9208 496 94.889% 418 sentences LexEsp_Etq_Larga-4.t3 9105 507 94.725% 418 sentences LexEsp_Etq_Larga-5.t3 8846 459 95.067% 418 sentences LexEsp_Etq_Larga-6.t3 8893 493 94.747% 418 sentences LexEsp_Etq_Larga-7.t3 9258 490 94.973% 418 sentences LexEsp_Etq_Larga-8.t3 9645 526 94.828% 417 sentences LexEsp_Etq_Larga-9.t3 9355 484 95.081%