User talk:David Nemeskey/CG XML brainstorming

From Apertium
Jump to navigation Jump to search

An alternative (draft) proposal



<xmlcg>
  <!-- DELIMITERS = "<.>" "<!>" "<?>" "<;>" "<...>" (sent);-->
  <section-delims>
    <delim word="."/>
    <delim word="!"/>
    <delim word="?"/>
    <delim word=";"/>
    <delim word="..."/>
    <delim tags="sent"/>
  </section-delims>
  <section-sets>
    <def-set n="art"> <!-- LIST Art    = det ; -->
      <set-item tags="det"/>
    </def-set>
    <def-set n="def-art"> <!-- LIST DefArt = (det def) ; -->
      <set-item tags="det.def"/>
    </def-set>
    <def-set n="ind-art"> <!-- LIST IndArt = (det ind) ; -->
      <set-item tags="det.ind"/>
    </def-set>
    <def-set n="dem"> <!-- LIST Dem    = "az" "ez" "amaz" "emez" ;-->
      <set-item lemma="az"/>
      <set-item lemma="ez"/>
      <set-item lemma="amaz"/>
      <set-item lemma="emez"/>
    </def-set>
    <def-set n="noun"><!--LIST Noun = n np ;-->
      <set-item tags="n"/>
      <set-item tags="np"/>
    </def-set>
    <def-set n="verb"><!--LIST Verb = vblex vbser vbmod ;-->
      <set-item tags="vbser"/>
      <set-item tags="vblex"/>
      <set-item tags="vbmod"/>
    </def-set>
    <def-set n="verb-not-ser"><!--LIST VerbNotSer = vblex vbmod ;-->
      <minus>  
        <set n="verb"/><set-item tags="vbser"/>
      </minus>
    </def-set>
    <def-set n="punct"><!--LIST Punct = cm sent quot ;  # guio, apos?-->
      <set-item tags="cm"/>
      <set-item tags="sent"/>
      <set-item tags="quot"/>
      <set-item tags="guio"/>
    </def-set> 
    <def-set n="poss"><!--LIST Poss = px1ss px2ss px3ss px1ps px2ps px3ps px1sp px2sp px3sp px1pp px2pp px3pp ;-->
      <set-item tags="px1ss"/><set-item tags="px2ss"/><set-item tags="px1ps"/> 
      <set-item tags="px1ps"/><set-item tags="px2ps"/><set-item tags="px1ps"/> 
      <set-item tags="px1sp"/><set-item tags="px2sp"/><set-item tags="px1ps"/> 
      <set-item tags="px1pp"/><set-item tags="px2pp"/><set-item tags="px1ps"/> 
    </def-set> 
    <def-set n="vbser"><!--LIST VbSer = "van" "lehet" "volt" "nincs" ;-->
      <set-item lemma="van"/><set-item lemma="lehet"/>
      <set-item lemma="volt"/><set-item lemma="nincs"/>
    </def-set>
    <def-set n="past-adv"><!--LIST PastAdv = "tegnap" "múltkor" "régen" ;-->
      <set-item lemma="tegnap"/>
      <set-item lemma="múltkor"/>
      <set-item lemma="régen"/>
    </def-set>
    <def-set n="post-first"><!--LIST PostFirst = "alá" ;-->
      <set-item lemma="alá"/>
    </def-set>
    <def-set n="np-part"><!--LIST NpPart = det num adv adj ;-->
      <set-item tags="det"/>
      <set-item tags="num"/>
      <set-item tags="adv"/>
      <set-item tags="adj"/>
    </def-set> 
    <def-set n="np-part-from-adj"><!--LIST NpPart = adv adj ;-->
      <set-item tags="adv"/>
      <set-item tags="adj"/>
    </def-set> 
    <def-set n="np-conj">
      <set-item lemma="és"/>
      <set-item lemma="vagy"/>
      <set-item tags="cm"/>
    </def-set>
    <def-set n="conj">
      <set-item lemma="és"/>
      <set-item lemma="vagy"/>
      <set-item tags="cm"/>
    </def-set>
    <def-set n="meta-verb">
      <set-item lemma="akar"/>
      <set-item lemma="szeret"/>
      <set-item lemma="imád"/>
    </def-set>
    <def-set n="an-p">
      <set-item tags="pxs"/>
      <set-item tags="pxp"/>
    </def-set>
  </section-sets>
  <section-rules>

    <rule target="az">
      <select>
        <set-item tags="det"/>
      </select>
      <match pos="1">
        <set>
          <set-item tags="adj"/>
          <set-item tags="n"/>
        </set>
      </match>
    </rule>

    <!--SELECT ("az" prn tem) IF (*1 Verb + (past) BARRIER Verb) (NOT 1 PastAdv) (NOT -1 PastAdv);-->
    <rule target="akkor">
      <not>
        <match pos="-1">
          <set n="past-adv"/>
        </match>
      </not>
      <select>
        <set-item lemma="az" tags="prn.*.tem"/>
      </select>
      <match from="1">
          <and><set n="verb"/> 
               <set><set-item tags="past"/></set></and>
          <barrier>
            <set n="verb"/>
          </barrier>
      </match>
      <not>
        <match pos="1">
          <set n="past-adv"/>
        </match>
      </not>
    </rule>

    <!--SELECT ("ez" prn tem) IF (1C VerbNotSer);-->
    <rule target="ekkor">
      <select>
        <set-item lemma="ez" tags="prn.*.tem"/>
      </select>
      <match pos="1" careful>
        <set n="verb-not-ser"/>
      </match>
    </rule>


<!--SELECT ("válasz" n acc) IF (*1 ("kap") + VerbNotSer BARRIER Verb);-->
   <rule>
     <select>
       <set-item lemma="válasz" tags="n.*.acc"/> 
     </select>
     <match from="1">
          <and><set n="verb-not-ser"/> 
               <set><set-item lemma="kap"/></set></and>
          <barrier>
            <set n="verb"/>
          </barrier>
     </match>
   </rule>
 
<!--SELECT ("válasz" n acc) IF (*-1 ("kap") + VerbNotSer BARRIER Verb);-->
   <rule>
     <match from="-1">
          <and><set n="verb-not-ser"/> 
               <set><set-item lemma="kap"/></set></and>
          <barrier>
            <set n="verb"/>
          </barrier>
     </match>
     <select>
       <set-item lemma="válasz" tags="n.*.acc"/> 
     </select>
   </rule>
   



<!--SELECT (det) IF (*1 Noun BARRIER (*) - (adj) - (num));-->
    <rule>
      <select>
        <set-item tags="det"/>
      </select>
      <match from="1">
        <set n="noun"/>
        <barrier>
          <minus><set n="*"/>
                 <set><set-item tags="adj"/><set-item tags="num"/></set>
          </minus>       
        </barrier> 
      </match>
    </rule>


<!--SELECT (np) IF (NOT -1 (>>>));-->

  <rule>
    <select>
      <set-item tags="np"/>
    </select>
    <not>
      <match pos="-1">
        <bos/>
      </match>
    </not>
  </rule>

  </section-rules>
</xmlcg>