Difference between revisions of "Talk:Apertium New Language Pair HOWTO"
Jump to navigation
Jump to search
| Line 227: | Line 227: | ||
<e lm="see"><i>s</i><par n="s/ee__vblex"/></e> |
<e lm="see"><i>s</i><par n="s/ee__vblex"/></e> |
||
<e lm="personal subject pronouns"><i/><par n="prsubj__prn"/></e> |
<e lm="personal subject pronouns"><i/><par n="prsubj__prn"/></e> |
||
<e lm="record player"><i>record<b/>player</i><par n=" |
<e lm="record player"><i>record<b/>player</i><par n="gramophone__n"/></e> |
||
</section> |
</section> |
||
</dictionary> |
</dictionary> |
||
</pre> |
|||
buildall.sh: |
|||
<pre> |
|||
lt-comp lr apertium-sh-en.sh.dix sh-en.automorf.bin |
|||
lt-comp rl apertium-sh-en.sh.dix sh-en.autogen.bin |
|||
lt-comp lr apertium-sh-en.en.dix en-sh.automorf.bin |
|||
lt-comp rl apertium-sh-en.en.dix en-sh.autogen.bin |
|||
lt-comp lr apertium-sh-en.sh-en.dix sh-en.autobil.bin |
|||
lt-comp rl apertium-sh-en.sh-en.dix en-sh.autobil.bin |
|||
apertium-preprocess-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin |
|||
</pre> |
</pre> |
||
| Line 237: | Line 252: | ||
<pre> |
<pre> |
||
echo "gramofoni" | lt-proc sh-en.automorf.bin | \ |
|||
> gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ |
> gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ |
||
> apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ |
> apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ |
||
> lt-proc -g sh-en.autogen.bin |
> lt-proc -g sh-en.autogen.bin |
||
#gramophone |
|||
#prperssee #see *gramofone |
|||
echo "vidim" | lt-proc sh-en.automorf.bin | \ |
|||
> gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ |
|||
> apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ |
|||
> lt-proc -g sh-en.autogen.bin |
|||
#prpers #see |
|||
echo "vidim gramofoni" | lt-proc sh-en.automorf.bin | \ |
|||
gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ |
|||
apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ |
|||
lt-proc -g sh-en.autogen.bin |
|||
#prpers #see |
|||
</pre> |
</pre> |
||
Revision as of 09:15, 30 April 2009
Possibly, at the very end, one could mention the possibility of typing
make sh-en.t1x.bin
etc. instead of all those different commands, for the language pairs priviliged enough to have fancy makefiles. --Unhammer
Files
apertium-sh-en.sh-en.dix
<?xml version="1.0" encoding="UTF-8"?>
<dictionary>
<alphabet/>
<sdefs>
<sdef n="n"/>
<sdef n="sg"/>
<sdef n="pl"/>
<sdef n="vblex"/>
</sdefs>
<section id="main" type="standard">
<e><p><l>gramofon<s n="n"/></l><r>gramophone<s n="n"/></r></p></e>
<e><p><l>videti<s n="vblex"/></l><r>see<s n="vblex"/></r></p></e>
</section>
</dictionary>
apertium-sh-en.sh.dix
<?xml version="1.0" encoding="ISO-8859-2"?>
<dictionary>
<alphabet>ABCCCDDzZEFGHIJKLLjMNNjOPRSŠTUVZŽabc??ddž?efghijklljmnnjoprsštuvzž</alphabet>
<sdefs>
<sdef n="n"/>
<sdef n="sg"/>
<sdef n="pl"/>
<sdef n="vblex"/>
<sdef n="p1"/>
<sdef n="pri"/>
</sdefs>
<pardefs>
<pardef n="gramofon__n">
<e>
<p>
<l/>
<r><s n="n"/><s n="sg"/></r>
</p>
</e>
<e>
<p>
<l>i</l>
<r><s n="n"/><s n="pl"/></r>
</p>
</e>
<e>
<p>
<l>e</l>
<r><s n="n"/><s n="pl"/></r>
</p>
</e>
</pardef>
<pardef n="vid/eti__vblex">
<e>
<p>
<l>im</l>
<r>eti<s n="vblex"/><s n="pri"/><s n="p1"/><s n="sg"/></r>
</p>
</e>
<e>
<p>
<l>imo</l>
<r>eti<s n="vblex"/><s n="pri"/><s n="p1"/><s n="pl"/></r>
</p>
</e>
</pardef>
</pardefs>
<section id="main" type="standard">
<e lm="gramofon"><i>gramofon</i><par n="gramofon__n"/></e>
<e lm="videti"><i>vid</i><par n="vid/eti__vblex"/></e>
</section>
</dictionary>
apertium-sh-en.sh-en.t1x
<?xml version="1.0" encoding="UTF-8"?>
<transfer>
<section-def-cats>
<def-cat n="nom">
<cat-item tags="n.*"/>
</def-cat>
<def-cat n="vrb">
<cat-item tags="vblex.*"/>
</def-cat>
<def-cat n="prpers">
<cat-item lemma="prpers" tags="prn.*"/>
</def-cat>
</section-def-cats>
<section-def-attrs>
<def-attr n="nbr">
<attr-item tags="sg"/>
<attr-item tags="pl"/>
</def-attr>
<def-attr n="a_nom">
<attr-item tags="n"/>
</def-attr>
<def-attr n="temps">
<attr-item tags="pri"/>
</def-attr>
<def-attr n="pers">
<attr-item tags="p1"/>
</def-attr>
<def-attr n="a_verb">
<attr-item tags="vblex"/>
</def-attr>
<def-attr n="tipus_prn">
<attr-item tags="prn.subj"/>
<attr-item tags="prn.obj"/>
</def-attr>
</section-def-attrs>
<section-def-vars>
<def-var n="number"/>
</section-def-vars>
<section-rules>
<rule>
<pattern>
<pattern-item n="nom"/>
</pattern>
<action>
<out>
<lu>
<clip pos="1" side="tl" part="lem"/>
<clip pos="1" side="tl" part="a_nom"/>
<clip pos="1" side="tl" part="nbr"/>
</lu>
</out>
</action>
</rule>
<rule>
<pattern>
<pattern-item n="vrb"/>
</pattern>
<action>
<out>
<lu>
<lit v="prpers"/>
<lit-tag v="prn"/>
<lit-tag v="subj"/>
<clip pos="1" side="tl" part="pers"/>
<clip pos="1" side="tl" part="nbr"/>
</lu>
<b/>
<lu>
<clip pos="1" side="tl" part="lem"/>
<clip pos="1" side="tl" part="a_verb"/>
<clip pos="1" side="tl" part="temps"/>
</lu>
</out>
</action>
</rule>
</section-rules>
</transfer>
apertium-sh-en.en.dix
<?xml version="1.0" encoding="ISO-8859-2"?>
<dictionary>
<alphabet>ABCCCDDzZEFGHIJKLLjMNNjOPRSŠTUVZŽabc??ddž?efghijklljmnnjoprsštuvzž</alphabet>
<sdefs>
<sdef n="n"/>
<sdef n="sg"/>
<sdef n="pl"/>
<sdef n="vblex"/>
<sdef n="p1"/>
<sdef n="pri"/>
<sdef n="prn"/>
<sdef n="subj"/>
</sdefs>
<pardefs>
<pardef n="gramophone__n">
<e>
<p>
<l/>
<r><s n="n"/><s n="sg"/></r>
</p>
</e>
<e>
<p>
<l>s</l>
<r><s n="n"/><s n="pl"/></r>
</p>
</e>
</pardef>
<pardef n="s/ee__vblex">
<e>
<p>
<l>ee</l>
<r>ee<s n="vblex"/><s n="pri"/></r>
</p>
</e>
</pardef>
<pardef n="prsubj__prn">
<e>
<p>
<l>I</l>
<r>prpers<s n="prn"/><s n="subj"/><s n="p1"/><s n="sg"/></r>
</p>
</e>
</pardef>
</pardefs>
<section id="main" type="standard">
<e lm="gramophone"><i>gramophone</i><par n="gramophone__n"/></e>
<e lm="see"><i>s</i><par n="s/ee__vblex"/></e>
<e lm="personal subject pronouns"><i/><par n="prsubj__prn"/></e>
<e lm="record player"><i>record<b/>player</i><par n="gramophone__n"/></e>
</section>
</dictionary>
buildall.sh:
lt-comp lr apertium-sh-en.sh.dix sh-en.automorf.bin lt-comp rl apertium-sh-en.sh.dix sh-en.autogen.bin lt-comp lr apertium-sh-en.en.dix en-sh.automorf.bin lt-comp rl apertium-sh-en.en.dix en-sh.autogen.bin lt-comp lr apertium-sh-en.sh-en.dix sh-en.autobil.bin lt-comp rl apertium-sh-en.sh-en.dix en-sh.autobil.bin apertium-preprocess-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin
output:
echo "gramofoni" | lt-proc sh-en.automorf.bin | \
> gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \
> apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \
> lt-proc -g sh-en.autogen.bin
#gramophone
echo "vidim" | lt-proc sh-en.automorf.bin | \
> gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \
> apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \
> lt-proc -g sh-en.autogen.bin
#prpers #see
echo "vidim gramofoni" | lt-proc sh-en.automorf.bin | \
gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \
apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \
lt-proc -g sh-en.autogen.bin
#prpers #see