User talk:David Nemeskey/CG XML brainstorming
Revision as of 13:32, 24 July 2013 by Francis Tyers (talk | contribs)
An alternative (draft) proposal
<xmlcg> <!-- DELIMITERS = "<.>" "<!>" "<?>" "<;>" "<...>" (sent);--> <section-delims> <delim word="."/> <delim word="!"/> <delim word="?"/> <delim word=";"/> <delim word="..."/> <delim tags="sent"/> </section-delims> <section-sets> <def-set n="art"> <!-- LIST Art = det ; --> <set-item tags="det"/> </def-set> <def-set n="def-art"> <!-- LIST DefArt = (det def) ; --> <set-item tags="det.def"/> </def-set> <def-set n="ind-art"> <!-- LIST IndArt = (det ind) ; --> <set-item tags="det.ind"/> </def-set> <def-set n="dem"> <!-- LIST Dem = "az" "ez" "amaz" "emez" ;--> <set-item lemma="az"/> <set-item lemma="ez"/> <set-item lemma="amaz"/> <set-item lemma="emez"/> </def-set> <def-set n="noun"><!--LIST Noun = n np ;--> <set-item tags="n"/> <set-item tags="np"/> </def-set> <def-set n="verb"><!--LIST Verb = vblex vbser vbmod ;--> <set-item tags="vbser"/> <set-item tags="vblex"/> <set-item tags="vbmod"/> </def-set> <def-set n="verb-not-ser"><!--LIST VerbNotSer = vblex vbmod ;--> <minus> <set n="verb"/><set-item tags="vbser"/> </minus> </def-set> <def-set n="punct"><!--LIST Punct = cm sent quot ; # guio, apos?--> <set-item tags="cm"/> <set-item tags="sent"/> <set-item tags="quot"/> <set-item tags="guio"/> </def-set> <def-set n="poss"><!--LIST Poss = px1ss px2ss px3ss px1ps px2ps px3ps px1sp px2sp px3sp px1pp px2pp px3pp ;--> <set-item tags="px1ss"/><set-item tags="px2ss"/><set-item tags="px1ps"/> <set-item tags="px1ps"/><set-item tags="px2ps"/><set-item tags="px1ps"/> <set-item tags="px1sp"/><set-item tags="px2sp"/><set-item tags="px1ps"/> <set-item tags="px1pp"/><set-item tags="px2pp"/><set-item tags="px1ps"/> </def-set> <def-set n="vbser"><!--LIST VbSer = "van" "lehet" "volt" "nincs" ;--> <set-item lemma="van"/><set-item lemma="lehet"/> <set-item lemma="volt"/><set-item lemma="nincs"/> </def-set> <def-set n="past-adv"><!--LIST PastAdv = "tegnap" "múltkor" "régen" ;--> <set-item lemma="tegnap"/> <set-item lemma="múltkor"/> <set-item lemma="régen"/> </def-set> <def-set n="post-first"><!--LIST PostFirst = "alá" ;--> <set-item lemma="alá"/> </def-set> <def-set n="np-part"><!--LIST NpPart = det num adv adj ;--> <set-item tags="det"/> <set-item tags="num"/> <set-item tags="adv"/> <set-item tags="adj"/> </def-set> <def-set n="np-part-from-adj"><!--LIST NpPart = adv adj ;--> <set-item tags="adv"/> <set-item tags="adj"/> </def-set> <def-set n="np-conj"> <set-item lemma="és"/> <set-item lemma="vagy"/> <set-item tags="cm"/> </def-set> <def-set n="conj"> <set-item lemma="és"/> <set-item lemma="vagy"/> <set-item tags="cm"/> </def-set> <def-set n="meta-verb"> <set-item lemma="akar"/> <set-item lemma="szeret"/> <set-item lemma="imád"/> </def-set> <def-set n="an-p"> <set-item tags="pxs"/> <set-item tags="pxp"/> </def-set> </section-sets> <section-rules> <rule target="az"> <select> <set-item tags="det"/> </select> <match pos="1"> <set> <set-item tags="adj"/> <set-item tags="n"/> </set> </match> </rule> <!--SELECT ("az" prn tem) IF (*1 Verb + (past) BARRIER Verb) (NOT 1 PastAdv) (NOT -1 PastAdv);--> <rule target="akkor"> <not> <match pos="-1"> <set n="past-adv"/> </match> </not> <select> <set-item lemma="az" tags="prn.*.tem"/> </select> <match from="1"> <and><set n="verb"/> <set><set-item tags="past"/></set></and> <barrier> <set n="verb"/> </barrier> </match> <not> <match pos="1"> <set n="past-adv"/> </match> </not> </rule> <!--SELECT ("ez" prn tem) IF (1C VerbNotSer);--> <rule target="ekkor"> <select> <set-item lemma="ez" tags="prn.*.tem"/> </select> <match pos="1" careful> <set n="verb-not-ser"/> </match> </rule> <!--SELECT ("válasz" n acc) IF (*1 ("kap") + VerbNotSer BARRIER Verb);--> <rule> <select> <set-item lemma="válasz" tags="n.*.acc"/> </select> <match from="1"> <and><set n="verb-not-ser"/> <set><set-item lemma="kap"/></set></and> <barrier> <set n="verb"/> </barrier> </match> </rule> <!--SELECT ("válasz" n acc) IF (*-1 ("kap") + VerbNotSer BARRIER Verb);--> <rule> <match from="-1"> <and><set n="verb-not-ser"/> <set><set-item lemma="kap"/></set></and> <barrier> <set n="verb"/> </barrier> </match> <select> <set-item lemma="válasz" tags="n.*.acc"/> </select> </rule> <!--SELECT (det) IF (*1 Noun BARRIER (*) - (adj) - (num));--> <rule> <select> <set-item tags="det"/> </select> <match from="1"> <set n="noun"/> <barrier> <minus><set n="*"/> <set><set-item tags="adj"/><set-item tags="num"/></set> </minus> </barrier> </match> </rule> <!--SELECT (np) IF (NOT -1 (>>>));--> <rule> <select> <set-item tags="np"/> </select> <not> <match pos="-1"> <bos/> </match> </not> </rule> </section-rules> </xmlcg>