User:Krvoje/Foma script for testing finite-state disambiguation
Jump to navigation
Jump to search
# A foma script file, can be executed with: foma -l constraint.foma # Definitions for Apertium stream format define Delimiter [ %^ | %/ | %< | %> | %$ | %+ | %# | %{ | %} | %\] ; define Letter ~$Delimiter | %\ Delimiter ; define Lemma Letter* ; define Surface Letter* ; define Tag %< Letter* %> ; define Head %/ Lemma Tag* ; define Morpheme %+ Lemma Tag* ; define Queue [%# " " Letter*]^{0,1} ; define Reading Head Morpheme* Queue ; define ambLU %^ Surface Reading+ %$ ; define unambLU %^ Lemma Tag* Morpheme* Queue %$ ; # For Chunks: define Name Letter* ; define Chunk %^ Name Tag* %{ [unambLU " "]* unambLU %} %$ ; #################################################### # The part used for disambiguation proof of concept # LU or Reading delimiter define End [ %$ | %/ ] ; define Alive [{/:)} | {/:|}]; # For our points and purposes we need only the amb LU, from now on it signifies any LU define LU [ambLU " "*]; # A transducer that matches an LU containing a tag T, ignores LU's that have a ":(" define LU(T) LU .o. [?* Alive (~$End)* T ?*]; # A valid input consists of exclusively LU's separated by " "* define ValidInput [LU " "*]+ ; # Assigns "suspicious :|" tags to all readings define PrepareInput %/ -> {/:|} ; # Cleans all markings define CleanMarks {/:|} -> %/, {/:)} -> %/, {/:(} -> %/ ; # Removes a reading marked :( define Remove {/:(} [~$End]* %/ -> %/, {/:(} [~$End]* %$ -> %$; # Whether a reading contains a tag, for a replacement rule define Contains(Tag) [~$End]* Tag [~$End]* End ; # Marks a reading as suspicious, under a condition specified by a composition of transducers define MarkSuspicious(Condition) {/:|} -> {/:(} || _ Condition ; define MarkSelection(Condition) [{/:|} -> {/:)} || _ Condition] .o. [{/:|} -> {/:(}] ; # Marks a suspicious reading for removal define MarkRemoval {/:|} -> {/:(} ; # Building of transducers for sample rules: # Definitions of LU's containing tags, for better legibility define Dative LU({<dat>}) ; define Locative LU({<loc>}) ; define Preposition LU({<pr>}) ; define Accusative LU({<acc>}) ; define Noun LU({<n>}) ; # The example rules define RuleRemoveAccusative [ [Preposition & Accusative & Locative] .o. MarkSuspicious(Contains({<acc>}))] [ Noun & Locative ] ; define RuleSelectLocative [ Preposition & Locative ] [ [Noun & Locative & Dative] .o. MarkSelection(Contains({<loc>})) ] ; define RuleCombine [ [Preposition & Locative & Accusative] .o. MarkSuspicious(Contains({<acc>}))] [ [Noun & Locative & Dative] .o. MarkSelection(Contains({<loc>})) ] ; # Sample input: # ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$ define ExampleRemove ValidInput .o. PrepareInput .o. RuleRemoveAccusative .o. Remove .o. CleanMarks ; define ExampleSelect ValidInput .o. PrepareInput .o. RuleSelectLocative .o. Remove .o. CleanMarks ; define ExampleCombined ValidInput .o. PrepareInput .o. RuleCombine .o. Remove .o. CleanMarks ; regex ExampleCombined ; regex ExampleSelect ; regex ExampleRemove ; # Applying the rules to the test string: down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$ pop down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$ pop down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$