Difference between revisions of "Talk:Apertium New Language Pair HOWTO"
Jump to navigation
Jump to search
Line 227: | Line 227: | ||
<e lm="see"><i>s</i><par n="s/ee__vblex"/></e> |
<e lm="see"><i>s</i><par n="s/ee__vblex"/></e> |
||
<e lm="personal subject pronouns"><i/><par n="prsubj__prn"/></e> |
<e lm="personal subject pronouns"><i/><par n="prsubj__prn"/></e> |
||
<e lm="record player"><i>record<b/>player</i><par n=" |
<e lm="record player"><i>record<b/>player</i><par n="gramophone__n"/></e> |
||
</section> |
</section> |
||
</dictionary> |
</dictionary> |
||
</pre> |
|||
buildall.sh: |
|||
<pre> |
|||
lt-comp lr apertium-sh-en.sh.dix sh-en.automorf.bin |
|||
lt-comp rl apertium-sh-en.sh.dix sh-en.autogen.bin |
|||
lt-comp lr apertium-sh-en.en.dix en-sh.automorf.bin |
|||
lt-comp rl apertium-sh-en.en.dix en-sh.autogen.bin |
|||
lt-comp lr apertium-sh-en.sh-en.dix sh-en.autobil.bin |
|||
lt-comp rl apertium-sh-en.sh-en.dix en-sh.autobil.bin |
|||
apertium-preprocess-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin |
|||
</pre> |
</pre> |
||
Line 237: | Line 252: | ||
<pre> |
<pre> |
||
echo "gramofoni" | lt-proc sh-en.automorf.bin | \ |
|||
> gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ |
> gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ |
||
> apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ |
> apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ |
||
> lt-proc -g sh-en.autogen.bin |
> lt-proc -g sh-en.autogen.bin |
||
#gramophone |
|||
#prperssee #see *gramofone |
|||
echo "vidim" | lt-proc sh-en.automorf.bin | \ |
|||
> gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ |
|||
> apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ |
|||
> lt-proc -g sh-en.autogen.bin |
|||
#prpers #see |
|||
echo "vidim gramofoni" | lt-proc sh-en.automorf.bin | \ |
|||
gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ |
|||
apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ |
|||
lt-proc -g sh-en.autogen.bin |
|||
#prpers #see |
|||
</pre> |
</pre> |
Revision as of 09:15, 30 April 2009
Possibly, at the very end, one could mention the possibility of typing
make sh-en.t1x.bin
etc. instead of all those different commands, for the language pairs priviliged enough to have fancy makefiles. --Unhammer
Files
apertium-sh-en.sh-en.dix
<?xml version="1.0" encoding="UTF-8"?> <dictionary> <alphabet/> <sdefs> <sdef n="n"/> <sdef n="sg"/> <sdef n="pl"/> <sdef n="vblex"/> </sdefs> <section id="main" type="standard"> <e><p><l>gramofon<s n="n"/></l><r>gramophone<s n="n"/></r></p></e> <e><p><l>videti<s n="vblex"/></l><r>see<s n="vblex"/></r></p></e> </section> </dictionary>
apertium-sh-en.sh.dix
<?xml version="1.0" encoding="ISO-8859-2"?> <dictionary> <alphabet>ABCCCDDzZEFGHIJKLLjMNNjOPRSŠTUVZŽabc??ddž?efghijklljmnnjoprsštuvzž</alphabet> <sdefs> <sdef n="n"/> <sdef n="sg"/> <sdef n="pl"/> <sdef n="vblex"/> <sdef n="p1"/> <sdef n="pri"/> </sdefs> <pardefs> <pardef n="gramofon__n"> <e> <p> <l/> <r><s n="n"/><s n="sg"/></r> </p> </e> <e> <p> <l>i</l> <r><s n="n"/><s n="pl"/></r> </p> </e> <e> <p> <l>e</l> <r><s n="n"/><s n="pl"/></r> </p> </e> </pardef> <pardef n="vid/eti__vblex"> <e> <p> <l>im</l> <r>eti<s n="vblex"/><s n="pri"/><s n="p1"/><s n="sg"/></r> </p> </e> <e> <p> <l>imo</l> <r>eti<s n="vblex"/><s n="pri"/><s n="p1"/><s n="pl"/></r> </p> </e> </pardef> </pardefs> <section id="main" type="standard"> <e lm="gramofon"><i>gramofon</i><par n="gramofon__n"/></e> <e lm="videti"><i>vid</i><par n="vid/eti__vblex"/></e> </section> </dictionary>
apertium-sh-en.sh-en.t1x
<?xml version="1.0" encoding="UTF-8"?> <transfer> <section-def-cats> <def-cat n="nom"> <cat-item tags="n.*"/> </def-cat> <def-cat n="vrb"> <cat-item tags="vblex.*"/> </def-cat> <def-cat n="prpers"> <cat-item lemma="prpers" tags="prn.*"/> </def-cat> </section-def-cats> <section-def-attrs> <def-attr n="nbr"> <attr-item tags="sg"/> <attr-item tags="pl"/> </def-attr> <def-attr n="a_nom"> <attr-item tags="n"/> </def-attr> <def-attr n="temps"> <attr-item tags="pri"/> </def-attr> <def-attr n="pers"> <attr-item tags="p1"/> </def-attr> <def-attr n="a_verb"> <attr-item tags="vblex"/> </def-attr> <def-attr n="tipus_prn"> <attr-item tags="prn.subj"/> <attr-item tags="prn.obj"/> </def-attr> </section-def-attrs> <section-def-vars> <def-var n="number"/> </section-def-vars> <section-rules> <rule> <pattern> <pattern-item n="nom"/> </pattern> <action> <out> <lu> <clip pos="1" side="tl" part="lem"/> <clip pos="1" side="tl" part="a_nom"/> <clip pos="1" side="tl" part="nbr"/> </lu> </out> </action> </rule> <rule> <pattern> <pattern-item n="vrb"/> </pattern> <action> <out> <lu> <lit v="prpers"/> <lit-tag v="prn"/> <lit-tag v="subj"/> <clip pos="1" side="tl" part="pers"/> <clip pos="1" side="tl" part="nbr"/> </lu> <b/> <lu> <clip pos="1" side="tl" part="lem"/> <clip pos="1" side="tl" part="a_verb"/> <clip pos="1" side="tl" part="temps"/> </lu> </out> </action> </rule> </section-rules> </transfer>
apertium-sh-en.en.dix
<?xml version="1.0" encoding="ISO-8859-2"?> <dictionary> <alphabet>ABCCCDDzZEFGHIJKLLjMNNjOPRSŠTUVZŽabc??ddž?efghijklljmnnjoprsštuvzž</alphabet> <sdefs> <sdef n="n"/> <sdef n="sg"/> <sdef n="pl"/> <sdef n="vblex"/> <sdef n="p1"/> <sdef n="pri"/> <sdef n="prn"/> <sdef n="subj"/> </sdefs> <pardefs> <pardef n="gramophone__n"> <e> <p> <l/> <r><s n="n"/><s n="sg"/></r> </p> </e> <e> <p> <l>s</l> <r><s n="n"/><s n="pl"/></r> </p> </e> </pardef> <pardef n="s/ee__vblex"> <e> <p> <l>ee</l> <r>ee<s n="vblex"/><s n="pri"/></r> </p> </e> </pardef> <pardef n="prsubj__prn"> <e> <p> <l>I</l> <r>prpers<s n="prn"/><s n="subj"/><s n="p1"/><s n="sg"/></r> </p> </e> </pardef> </pardefs> <section id="main" type="standard"> <e lm="gramophone"><i>gramophone</i><par n="gramophone__n"/></e> <e lm="see"><i>s</i><par n="s/ee__vblex"/></e> <e lm="personal subject pronouns"><i/><par n="prsubj__prn"/></e> <e lm="record player"><i>record<b/>player</i><par n="gramophone__n"/></e> </section> </dictionary>
buildall.sh:
lt-comp lr apertium-sh-en.sh.dix sh-en.automorf.bin lt-comp rl apertium-sh-en.sh.dix sh-en.autogen.bin lt-comp lr apertium-sh-en.en.dix en-sh.automorf.bin lt-comp rl apertium-sh-en.en.dix en-sh.autogen.bin lt-comp lr apertium-sh-en.sh-en.dix sh-en.autobil.bin lt-comp rl apertium-sh-en.sh-en.dix en-sh.autobil.bin apertium-preprocess-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin
output:
echo "gramofoni" | lt-proc sh-en.automorf.bin | \ > gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ > apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ > lt-proc -g sh-en.autogen.bin #gramophone echo "vidim" | lt-proc sh-en.automorf.bin | \ > gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ > apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ > lt-proc -g sh-en.autogen.bin #prpers #see echo "vidim gramofoni" | lt-proc sh-en.automorf.bin | \ gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \ apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \ lt-proc -g sh-en.autogen.bin #prpers #see