Talk:Apertium New Language Pair HOWTO

From Apertium
Revision as of 05:35, 30 April 2009 by Francis Tyers (talk | contribs)
Jump to navigation Jump to search

Possibly, at the very end, one could mention the possibility of typing

make sh-en.t1x.bin

etc. instead of all those different commands, for the language pairs priviliged enough to have fancy makefiles. --Unhammer

Files

apertium-sh-en.sh-en.dix

<?xml version="1.0" encoding="UTF-8"?>
<dictionary>
   <alphabet/>
   <sdefs>
     <sdef n="n"/>
     <sdef n="sg"/>
     <sdef n="pl"/>
     <sdef n="vblex"/>
   </sdefs>

   <section id="main" type="standard">
<e><p><l>gramofon<s n="n"/></l><r>gramophone<s n="n"/></r></p></e>
<e><p><l>videti<s n="vblex"/></l><r>see<s n="vblex"/></r></p></e>
   </section>
</dictionary>

apertium-sh-en.sh.dix

<?xml version="1.0" encoding="ISO-8859-2"?>
<dictionary>
<alphabet>ABCCCDDzZEFGHIJKLLjMNNjOPRSŠTUVZŽabc??ddž?efghijklljmnnjoprsštuvzž</alphabet>
<sdefs>
   <sdef n="n"/>
   <sdef n="sg"/>
   <sdef n="pl"/>
   <sdef n="vblex"/>
   <sdef n="p1"/>
   <sdef n="pri"/>
</sdefs>

<pardefs>
<pardef n="gramofon__n">
   <e>
     <p>
       <l/>
       <r><s n="n"/><s n="sg"/></r>
     </p>
   </e>
   <e>
     <p>
       <l>i</l>
       <r><s n="n"/><s n="pl"/></r>
     </p>
   </e>
   <e>
     <p>
       <l>e</l>
       <r><s n="n"/><s n="pl"/></r>
     </p>
   </e>
</pardef>
<pardef n="vid/eti__vblex">
<e>
 <p>
   <l>im</l>
   <r>eti<s n="vblex"/><s n="pri"/><s n="p1"/><s n="sg"/></r>
 </p>
</e>
<e>
 <p>
   <l>imo</l>
   <r>eti<s n="vblex"/><s n="pri"/><s n="p1"/><s n="pl"/></r>
 </p>
</e>
</pardef>

</pardefs>

<section id="main" type="standard">
<e lm="gramofon"><i>gramofon</i><par n="gramofon__n"/></e>
<e lm="videti"><i>vid</i><par n="vid/eti__vblex"/></e>
 </section>


</dictionary>

apertium-sh-en.sh-en.t1x

<?xml version="1.0" encoding="UTF-8"?>
<transfer>
<section-def-cats>
<def-cat n="nom">
   <cat-item tags="n.*"/>
</def-cat>
<def-cat n="vrb">
   <cat-item tags="vblex.*"/>
</def-cat>
<def-cat n="prpers">
   <cat-item lemma="prpers" tags="prn.*"/>
</def-cat>

</section-def-cats>
<section-def-attrs>
<def-attr n="nbr">
   <attr-item tags="sg"/>
   <attr-item tags="pl"/>
</def-attr>
<def-attr n="a_nom">
   <attr-item tags="n"/>
</def-attr>
<def-attr n="temps">
   <attr-item tags="pri"/>
</def-attr>
<def-attr n="pers">
   <attr-item tags="p1"/>
</def-attr>
<def-attr n="a_verb">
   <attr-item tags="vblex"/>
</def-attr>
<def-attr n="tipus_prn">
   <attr-item tags="prn.subj"/>
   <attr-item tags="prn.obj"/>
</def-attr>
</section-def-attrs>

<section-def-vars>
<def-var n="number"/>
</section-def-vars>

<section-rules>
<rule>
   <pattern>
     <pattern-item n="nom"/>
   </pattern>
   <action>
     <out>
       <lu>
         <clip pos="1" side="tl" part="lem"/>
         <clip pos="1" side="tl" part="a_nom"/>
         <clip pos="1" side="tl" part="nbr"/>
       </lu>
     </out>
   </action>
</rule>
<rule>
   <pattern>
     <pattern-item n="vrb"/>
   </pattern>
   <action>
     <out>
       <lu>
         <lit v="prpers"/>
         <lit-tag v="prn"/>
         <lit-tag v="subj"/>
         <clip pos="1" side="tl" part="pers"/>
         <clip pos="1" side="tl" part="nbr"/>
       </lu>
       <b/>
       <lu>
         <clip pos="1" side="tl" part="lem"/>
         <clip pos="1" side="tl" part="a_verb"/>
         <clip pos="1" side="tl" part="temps"/>
       </lu>
     </out>
   </action>
</rule>
</section-rules>

</transfer>


apertium-sh-en.en.dix

<?xml version="1.0" encoding="ISO-8859-2"?>
<dictionary>
<alphabet>ABCCCDDzZEFGHIJKLLjMNNjOPRSŠTUVZŽabc??ddž?efghijklljmnnjoprsštuvzž</alphabet>
<sdefs>
   <sdef n="n"/>
   <sdef n="sg"/>
   <sdef n="pl"/>
   <sdef n="vblex"/>
   <sdef n="p1"/>
   <sdef n="pri"/>
   <sdef n="prn"/>
   <sdef n="subj"/>
</sdefs>

<pardefs>
<pardef n="gramophone__n">
   <e>
     <p>
       <l/>
       <r><s n="n"/><s n="sg"/></r>
     </p>
   </e>
   <e>
     <p>
       <l>s</l>
       <r><s n="n"/><s n="pl"/></r>
     </p>
   </e>
</pardef>
<pardef n="s/ee__vblex">
<e>
 <p>
   <l>ee</l>
   <r>ee<s n="vblex"/><s n="pri"/></r>
 </p>
</e>
</pardef>
<pardef n="prsubj__prn">
<e>
 <p>
   <l>I</l>
   <r>prpers<s n="prn"/><s n="subj"/><s n="p1"/><s n="sg"/></r>
 </p>
</e>
</pardef>

</pardefs>

<section id="main" type="standard">
<e lm="gramophone"><i>gramophone</i><par n="gramophone__n"/></e>
<e lm="see"><i>s</i><par n="s/ee__vblex"/></e>
<e lm="personal subject pronouns"><i/><par n="prsubj__prn"/></e>
<e lm="record player"><i>record<b/>player</i><par n="gramofon__n"/></e>
 </section>


</dictionary>

output:

en@anonymous:~/tmp/download/forditas/apertium-en-es-0.fone" | lt-proc sh-en.automorf.bin  | \
>   gawk 'BEGIN{RS="$"; FS="/";}{nf=split($1,COMPONENTS,"^"); for(i = 1; i<nf; i++) printf COMPONENTS[i]; if($2 != "") printf("^%s$",$2);}' | \
>   apertium-transfer apertium-sh-en.sh-en.t1x sh-en.t1x.bin sh-en.autobil.bin | \
>   lt-proc -g sh-en.autogen.bin
#prperssee #see *gramofone