Difference between revisions of "User:Firespeaker/GSoC2014/Progress"

From Apertium
Jump to navigation Jump to search
(→‎WER: r52183)
 
(34 intermediate revisions by the same user not shown)
Line 3: Line 3:
 
{|class="wikitable"
 
{|class="wikitable"
 
|-
 
|-
! week
+
!colspan=2| week
  +
! 0a !! 0b !! 0c !! 3 !! 4 || 5 || 6
! 1 !! 2
 
 
|-
 
|-
! date
+
!colspan=2| date
  +
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-06-07 || 2014-06-15 || 2014-06-23 || 2014-07-06
| 2014-××-×× || 2014-××-××
 
 
|-
 
|-
! revision
+
!colspan=2| revision
  +
| r51024 || r51765 || r52183 || r53965 || r54459 || r54896 || r55321
| r××××× || r×××××
 
 
|-
 
|-
 
! kaz(-kir)
 
! kaz(-kir)
Line 16: Line 16:
 
! kir(-kaz)
 
! kir(-kaz)
 
|-
 
|-
! tur(-kir)
+
! tur(-kir) || SETimes
  +
|align="right"| {{#expr: 100*(2869340/3752236) round 2}}
  +
|align="right"| {{#expr: 100*(3509382/4438857) round 2}}
  +
|align="right"| {{#expr: 100*(3538138/4418156) round 2}}
  +
|align="right"| {{#expr: 100*(3537968/4418062) round 2}}
  +
|align="right"| {{#expr: 100*(3620291/4408624) round 2}}
  +
|align="right"| {{#expr: 100*(3623017/4406402) round 2}}
  +
|align="right"| {{#expr: 100*(3628566/4406401) round 2}}
 
|-
 
|-
 
! kir(-tur)
 
! kir(-tur)
Line 29: Line 36:
 
|-
 
|-
 
! week
 
! week
! 0a !! 0b !! 0c !! 1 !! 2
+
! 0a !! 0b !! 0c !! 3 !! 4 !! 5 !! 6
 
|-
 
|-
 
! date
 
! date
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-××-×× || 2014-××-××
+
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-06-07 || 2014-06-15 || 2014-06-23 || 2014-07-06
 
|-
 
|-
 
! revision
 
! revision
| r51024 || r51765 || r52183 || r××××× || r×××××
+
| r51024 || r51765 || r52183 || r53965 || r54459 || r54896 || r55321
 
|-
 
|-
 
! kaz
 
! kaz
Line 41: Line 48:
 
|align="right"| 11336
 
|align="right"| 11336
 
|align="right"| 11337
 
|align="right"| 11337
  +
|align="right"| 11337
  +
|align="right"| 11633
  +
|align="right"| 11690
  +
|align="right"| 12619
 
|-
 
|-
 
! kir
 
! kir
Line 46: Line 57:
 
|align="right"| 13703
 
|align="right"| 13703
 
|align="right"| 13705
 
|align="right"| 13705
  +
|align="right"| 13715
  +
|align="right"| 13737
  +
|align="right"| 13776
  +
|align="right"| 13773
 
|-
 
|-
 
! tur
 
! tur
Line 51: Line 66:
 
|align="right"| 11186
 
|align="right"| 11186
 
|align="right"| 11172
 
|align="right"| 11172
  +
|align="right"| 11172
  +
|align="right"| 11416
  +
|align="right"| 11417
  +
|align="right"| 11417
 
|-
 
|-
 
! uzb
 
! uzb
 
|align="right"| 3922
 
|align="right"| 3922
  +
|align="right"| 3957
  +
|align="right"| 3957
  +
|align="right"| 3957
  +
|align="right"| 3957
 
|align="right"| 3957
 
|align="right"| 3957
 
|align="right"| 3957
 
|align="right"| 3957
Line 62: Line 85:
 
|-
 
|-
 
! week
 
! week
! 0a !! 0b !! 0c !! 1 !! 2
+
! 0a !! 0b !! 0c !! 3 !! 4 !! 5 !! 6
 
|-
 
|-
 
! date
 
! date
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-××-×× || 2014-××-××
+
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-06-07 || 2014-06-15 || 2014-06-23 || 2014-07-06
 
|-
 
|-
 
! revision
 
! revision
| r51024 || r51765 || r52183 || r××××× || r×××××
+
| r51024 || r51765 || r52183 || r53965 || r54459 || r54896 || r55321
 
|-
 
|-
 
! kaz-kir
 
! kaz-kir
Line 74: Line 97:
 
|align="right"| 7557
 
|align="right"| 7557
 
|align="right"| 7557
 
|align="right"| 7557
  +
|align="right"| 7557
  +
|align="right"| 7557
  +
|align="right"| 7557
  +
|align="right"| 7559
 
|-
 
|-
 
! tur-kir
 
! tur-kir
Line 79: Line 106:
 
|align="right"| 7249
 
|align="right"| 7249
 
|align="right"| 7107
 
|align="right"| 7107
  +
|align="right"| 7107
  +
|align="right"| 7110
  +
|align="right"| 7123
  +
|align="right"| 7123
 
|-
 
|-
 
! tur-uzb
 
! tur-uzb
  +
|align="right"| 2416
  +
|align="right"| 2416
  +
|align="right"| 2416
  +
|align="right"| 2416
 
|align="right"| 2416
 
|align="right"| 2416
 
|align="right"| 2416
 
|align="right"| 2416
Line 91: Line 126:
 
|-
 
|-
 
!colspan=2| week
 
!colspan=2| week
! 0a !! 0b !! 0c !! 1 !! 2
+
! 0a !! 0b !! 0c !! 3 !! 4 !! 5 !! 6
 
|-
 
|-
 
!colspan=2| date
 
!colspan=2| date
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-××-×× || 2014-××-××
+
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-06-07 || 2014-06-15 || 2014-06-23 || 2014-07-06
 
|-
 
|-
 
!colspan=2| revision
 
!colspan=2| revision
| r51024 || r51765 || r52183 || r××××× || r×××××
+
| r51024 || r51765 || r52183 || r53965 || r54459 || r54896 || r55321
 
|-
 
|-
 
! kaz ||
 
! kaz ||
Line 106: Line 141:
 
|align="right"| {{#expr: 6617625/3752236 round 2}} → {{#expr: 5646219/3752236 round 2}}
 
|align="right"| {{#expr: 6617625/3752236 round 2}} → {{#expr: 5646219/3752236 round 2}}
 
|align="right"| {{#expr: 8915256/4438857 round 2}} → {{#expr: 5686494/4438857 round 2}}
 
|align="right"| {{#expr: 8915256/4438857 round 2}} → {{#expr: 5686494/4438857 round 2}}
|align="right"| {{#expr: 10078222/4353306 round 2}} → {{#expr: 5619377/4353306 round 2}}
+
|align="right"| {{#expr: 9388399/4418156 round 2}} → {{#expr: 5418392/4418156 round 2}}
  +
|align="right"| {{#expr: 9383280/4418062 round 2}} → {{#expr: 5498977/4418062 round 2}}
  +
|align="right"| {{#expr: 12791196/4408624 round 2}} → {{#expr: 5508485/4408624 round 2}}
  +
|align="right"| {{#expr: 10344801/4406402 round 2}} → {{#expr: 5507130/4406402 round 2}}
  +
|align="right"| {{#expr: 10377368/4406401 round 2}} → {{#expr: 5521663/4406401 round 2}}
 
|-
 
|-
 
! uzb ||
 
! uzb ||
Line 116: Line 155:
 
|-
 
|-
 
!colspan=2| week
 
!colspan=2| week
! 0a !! 0b !! 1 !! 2
+
! 0a !! 0b !! 0c !! 3 !! 4 !! 5 !! 6
 
|-
 
|-
 
!colspan=2| date
 
!colspan=2| date
| 2014-03-21 || 2014-04-06 || 2014-××-×× || 2014-××-××
+
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-06-07 || 2014-06-15 || 2014-06-23 || 2014-06-07
 
|-
 
|-
 
!colspan=2| revision
 
!colspan=2| revision
| r51024 || r51765 || r××××× || r×××××
+
| r51024 || r51765 || r52183 || r53965 || r54459 || r54896 || r55321
 
|-
 
|-
 
! kaz(-kir)
 
! kaz(-kir)
Line 131: Line 170:
 
|align="right"| {{#expr: 4258568/3763466 round 5}} → {{#expr: 3833506/3763466 round 5}}
 
|align="right"| {{#expr: 4258568/3763466 round 5}} → {{#expr: 3833506/3763466 round 5}}
 
|align="right"| {{#expr: 5024056/4526576 round 5}} → {{#expr: 4618354/4526576 round 5}}
 
|align="right"| {{#expr: 5024056/4526576 round 5}} → {{#expr: 4618354/4526576 round 5}}
  +
|align="right"| {{#expr: 5096083/4590251 round 5}} → {{#expr: 4689952/4590251 round 5}}
  +
|align="right"| {{#expr: 5088551/4583077 round 5}} → {{#expr: 4682807/4583077 round 5}}
  +
|align="right"| {{#expr: 5076760/4556424 round 5}} → {{#expr: 4691423/4556424 round 5}}
  +
|align="right"| {{#expr: 5133354/4609140 round 5}} → {{#expr: 4744793/4609140 round 5}}
  +
|align="right"| {{#expr: 5134387/4609108 round 5}} → {{#expr: 4743478/4609108 round 5}}
 
|-
 
|-
 
! kir(-tur)
 
! kir(-tur)
Line 144: Line 188:
 
|-
 
|-
 
!colspan=2| week
 
!colspan=2| week
! 0a !! 0b !! 1 !! 2
+
! 0a !! 0b !! 0c !! 3 !! 4 !! 5 !! 6
 
|-
 
|-
 
!colspan=2| date
 
!colspan=2| date
| 2014-03-21 || 2014-04-06 || 2014-××-×× || 2014-××-××
+
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-06-07 || 2014-06-15 || 2014-06-23 || 2014-06-07
 
|-
 
|-
 
!colspan=2| revision
 
!colspan=2| revision
| r51024 || r51765 || r××××× || r×××××
+
| r51024 || r51765 || r52183 || r53965 || r54459 || r54896 || r55321
 
|-
 
|-
 
! kaz(-kir)
 
! kaz(-kir)
Line 159: Line 203:
 
|align="right"| {{#expr: 100*(740436/3763466) round 2}}%
 
|align="right"| {{#expr: 100*(740436/3763466) round 2}}%
 
|align="right"| {{#expr: 100*(304135/4526576) round 2}}%
 
|align="right"| {{#expr: 100*(304135/4526576) round 2}}%
  +
|align="right"| {{#expr: 100*(524711/4590251) round 2}}%
  +
|align="right"| {{#expr: 100*(476231/4583077) round 2}}%
  +
|align="right"| {{#expr: 100*(10052/4556424) round 2}}%
  +
|align="right"| {{#expr: 100*(1955/4609140) round 2}}%
  +
|align="right"| {{#expr: 100*(21001/4609108) round 2}}%
 
|-
 
|-
 
! kir(-tur)
 
! kir(-tur)
Line 173: Line 222:
 
!colspan="4"|texts
 
!colspan="4"|texts
 
! week
 
! week
! 0a !! 0b !! 0c !! 1 !! 2
+
! 0a !! 0b !! 0c !! 3 !! 4 !! 5 !! 6
 
|-
 
|-
 
!rowspan="2"| name
 
!rowspan="2"| name
Line 180: Line 229:
 
!rowspan="2"| direction
 
!rowspan="2"| direction
 
! date
 
! date
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-××-×× || 2014-××-××
+
| 2014-03-21 || 2014-04-06 || 2014-04-21 || 2014-06-07 || 2014-06-15 || 2014-06-26 || 2014-07-06
 
|-
 
|-
 
! revision
 
! revision
| r51024 || r51765 || r52183 || r××××× || r×××××
+
| r51024 || r51765 || r52183 || r53965 || r54459 || r54896 || r55321
 
|-
 
|-
 
| foo || kaz || ~200 || kaz-kir ||rowspan="6" align="center"|dev set 1
 
| foo || kaz || ~200 || kaz-kir ||rowspan="6" align="center"|dev set 1
Line 201: Line 250:
 
| 58.06% ~ 49.19%
 
| 58.06% ~ 49.19%
 
| 66.67% ~ 54.85%
 
| 66.67% ~ 54.85%
  +
| 63.44% ~ 51.88%
  +
| 59.95% ~ 49.46%
  +
| 60.48% ~ 50.00%
  +
| 60.22% ~ 49.73%
 
|-
 
|-
 
| tur-uzb
 
| tur-uzb

Latest revision as of 06:49, 13 July 2014

trimmed coverage[edit]

week 0a 0b 0c 3 4 5 6
date 2014-03-21 2014-04-06 2014-04-21 2014-06-07 2014-06-15 2014-06-23 2014-07-06
revision r51024 r51765 r52183 r53965 r54459 r54896 r55321
kaz(-kir)
kir(-kaz)
tur(-kir) SETimes 76.47 79.06 80.08 80.08 82.12 82.22 82.35
kir(-tur)
tur(-uzb)
uzb(-tur)

monodix stems[edit]

week 0a 0b 0c 3 4 5 6
date 2014-03-21 2014-04-06 2014-04-21 2014-06-07 2014-06-15 2014-06-23 2014-07-06
revision r51024 r51765 r52183 r53965 r54459 r54896 r55321
kaz 11332 11336 11337 11337 11633 11690 12619
kir 13637 13703 13705 13715 13737 13776 13773
tur 11128 11186 11172 11172 11416 11417 11417
uzb 3922 3957 3957 3957 3957 3957 3957

bidix stems[edit]

week 0a 0b 0c 3 4 5 6
date 2014-03-21 2014-04-06 2014-04-21 2014-06-07 2014-06-15 2014-06-23 2014-07-06
revision r51024 r51765 r52183 r53965 r54459 r54896 r55321
kaz-kir 7557 7557 7557 7557 7557 7557 7559
tur-kir 7163 7249 7107 7107 7110 7123 7123
tur-uzb 2416 2416 2416 2416 2416 2416 2416

CG per-token ambiguity[edit]

tokens( analyser | CG ) / tokens( analyser )

week 0a 0b 0c 3 4 5 6
date 2014-03-21 2014-04-06 2014-04-21 2014-06-07 2014-06-15 2014-06-23 2014-07-06
revision r51024 r51765 r52183 r53965 r54459 r54896 r55321
kaz
kir
tur SETimes 1.76 → 1.5 2.01 → 1.28 2.12 → 1.23 2.12 → 1.24 2.9 → 1.25 2.35 → 1.25 2.36 → 1.25
uzb

lrx per-token ambiguity[edit]

tokens( analyser | CG | biltrans | lrx ) / tokens( analyser | CG | biltrans )

week 0a 0b 0c 3 4 5 6
date 2014-03-21 2014-04-06 2014-04-21 2014-06-07 2014-06-15 2014-06-23 2014-06-07
revision r51024 r51765 r52183 r53965 r54459 r54896 r55321
kaz(-kir)
kir(-kaz)
tur(-kir) SETimes 1.13155 → 1.01861 1.1099 → 1.02028 1.1102 → 1.02172 1.11029 → 1.02176 1.1142 → 1.02963 1.11373 → 1.02943 1.11397 → 1.02915
kir(-tur)
tur(-uzb)
uzb(-tur)

corpus testvoc[edit]

week 0a 0b 0c 3 4 5 6
date 2014-03-21 2014-04-06 2014-04-21 2014-06-07 2014-06-15 2014-06-23 2014-06-07
revision r51024 r51765 r52183 r53965 r54459 r54896 r55321
kaz(-kir)
kir(-kaz)
tur(-kir) SETimes 19.67% 6.72% 11.43% 10.39% 0.22% 0.04% 0.46%
kir(-tur)
tur(-uzb)
uzb(-tur)

WER[edit]

texts week 0a 0b 0c 3 4 5 6
name language № words direction date 2014-03-21 2014-04-06 2014-04-21 2014-06-07 2014-06-15 2014-06-26 2014-07-06
revision r51024 r51765 r52183 r53965 r54459 r54896 r55321
foo kaz ~200 kaz-kir dev set 1
foo kir ~200 kir-kaz
kir-tur
küçükkuş tur 339 tur-kir 99.46% ~ 98.66% 58.06% ~ 49.19% 66.67% ~ 54.85% 63.44% ~ 51.88% 59.95% ~ 49.46% 60.48% ~ 50.00% 60.22% ~ 49.73%
tur-uzb
foo uzb ~200 uzb-tur
bar kaz ~200 kaz-kir dev set 2
bar kir ~200 kir-kaz
kir-tur
bar tur ~200 tur-kir
tur-uzb
bar uzb ~200 uzb-tur
baz kaz ~500 kaz-kir dev set 3
baz kir ~500 kir-kaz
kir-tur
baz tur ~500 tur-kir
tur-uzb
baz uzb ~500 uzb-tur
foo kaz ~200 kaz-kir eval set 1
foo kir ~200 kir-kaz
kir-tur
foo tur ~200 tur-kir
tur-uzb
foo uzb ~200 uzb-tur
bar kaz ~200 kaz-kir eval set 2
bar kir ~200 kir-kaz
kir-tur
bar tur ~200 tur-kir
tur-uzb
bar uzb ~200 uzb-tur
baz kaz ~500 kaz-kir eval set 3
baz kir ~500 kir-kaz
kir-tur
baz tur ~500 tur-kir
tur-uzb
baz uzb ~500 uzb-tur