User:Krvoje/Foma script for testing finite-state disambiguation
Jump to navigation
Jump to search
# A foma script file, can be executed with: foma -l constraint.foma
# Definitions for Apertium stream format
define Delimiter [ %^ | %/ | %< | %> | %$ | %+ | %# | %{ | %} | %\] ;
define Letter ~$Delimiter | %\ Delimiter ;
define Lemma Letter* ;
define Surface Letter* ;
define Tag %< Letter* %> ;
define Head %/ Lemma Tag* ;
define Morpheme %+ Lemma Tag* ;
define Queue [%# " " Letter*]^{0,1} ;
define Reading Head Morpheme* Queue ;
define ambLU %^ Surface Reading+ %$ ;
define unambLU %^ Lemma Tag* Morpheme* Queue %$ ;
# For Chunks:
define Name Letter* ;
define Chunk %^ Name Tag* %{ [unambLU " "]* unambLU %} %$ ;
####################################################
# The part used for disambiguation proof of concept
# LU or Reading delimiter
define End [ %$ | %/ ] ;
define Alive [{/:)} | {/:|}];
# For our points and purposes we need only the amb LU, from now on it signifies any LU
define LU [ambLU " "*];
# A transducer that matches an LU containing a tag T, ignores LU's that have a ":("
define LU(T) LU .o. [?* Alive (~$End)* T ?*];
# A valid input consists of exclusively LU's separated by " "*
define ValidInput [LU " "*]+ ;
# Assigns "suspicious :|" tags to all readings
define PrepareInput %/ -> {/:|} ;
# Cleans all markings
define CleanMarks {/:|} -> %/, {/:)} -> %/, {/:(} -> %/ ;
# Removes a reading marked :(
define Remove {/:(} [~$End]* %/ -> %/, {/:(} [~$End]* %$ -> %$;
# Whether a reading contains a tag, for a replacement rule
define Contains(Tag) [~$End]* Tag [~$End]* End ;
# Marks a reading as suspicious, under a condition specified by a composition of transducers
define MarkSuspicious(Condition)
{/:|} -> {/:(} || _ Condition ;
define MarkSelection(Condition)
[{/:|} -> {/:)} || _ Condition] .o. [{/:|} -> {/:(}] ;
# Marks a suspicious reading for removal
define MarkRemoval {/:|} -> {/:(} ;
# Building of transducers for sample rules:
# Definitions of LU's containing tags, for better legibility
define Dative LU({<dat>}) ;
define Locative LU({<loc>}) ;
define Preposition LU({<pr>}) ;
define Accusative LU({<acc>}) ;
define Noun LU({<n>}) ;
# The example rules
define RuleRemoveAccusative [ [Preposition & Accusative & Locative] .o. MarkSuspicious(Contains({<acc>}))]
[ Noun & Locative ] ;
define RuleSelectLocative [ Preposition & Locative ]
[ [Noun & Locative & Dative] .o. MarkSelection(Contains({<loc>})) ] ;
define RuleCombine [ [Preposition & Locative & Accusative] .o. MarkSuspicious(Contains({<acc>}))]
[ [Noun & Locative & Dative] .o. MarkSelection(Contains({<loc>})) ] ;
# Sample input:
# ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$
define ExampleRemove ValidInput .o. PrepareInput .o. RuleRemoveAccusative .o. Remove .o. CleanMarks ;
define ExampleSelect ValidInput .o. PrepareInput .o. RuleSelectLocative .o. Remove .o. CleanMarks ;
define ExampleCombined ValidInput .o. PrepareInput .o. RuleCombine .o. Remove .o. CleanMarks ;
regex ExampleCombined ;
regex ExampleSelect ;
regex ExampleRemove ;
# Applying the rules to the test string:
down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$
pop
down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$
pop
down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$
The rules when applied to the string "^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$" (Na kući == On the house) yield the following results:
ExampleRemove => ^na/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$
ExampleSelect => ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>$
ExampleCombined => ^na/na<pr><loc>$ ^kući/kuća<n><sg><loc>$