User:Krvoje/Foma script for testing finite-state disambiguation

From Apertium
Jump to navigation Jump to search
# A foma script file, can be executed with: foma -l constraint.foma

#  Definitions for Apertium stream format

define Delimiter [ %^ | %/ | %< | %> | %$ | %+ | %# | %{ | %} | %\] ;

define Letter  ~$Delimiter | %\ Delimiter ;

define Lemma Letter* ;
define Surface Letter* ;
define Tag  %< Letter* %> ;
define Head %/ Lemma Tag* ;
define Morpheme %+ Lemma Tag* ;
define Queue [%# " " Letter*]^{0,1} ;

define Reading Head Morpheme* Queue ;

define ambLU %^ Surface Reading+ %$ ;

define unambLU %^ Lemma Tag* Morpheme* Queue %$ ;

# For Chunks:
define Name Letter* ;

define Chunk %^ Name Tag* %{ [unambLU " "]* unambLU %} %$ ;

####################################################
# The part used for disambiguation proof of concept

# LU or Reading delimiter
define End [ %$ | %/ ] ;

define Alive [{/:)} | {/:|}];

# For our points and purposes we need only the amb LU, from now on it signifies any LU
define LU [ambLU " "*];

# A transducer that matches an LU containing a tag T, ignores LU's that have a ":("
define LU(T) LU .o. [?* Alive (~$End)* T ?*];

# A valid input consists of exclusively LU's separated by " "*
define ValidInput [LU " "*]+ ;

# Assigns "suspicious :|" tags to all readings
define PrepareInput %/ -> {/:|}  ;

# Cleans all markings
define CleanMarks {/:|} -> %/, {/:)} -> %/, {/:(} -> %/ ;

# Removes a reading marked :(
define Remove {/:(} [~$End]* %/ -> %/, {/:(} [~$End]* %$ -> %$;

# Whether a reading contains a tag, for a replacement rule
define Contains(Tag) [~$End]* Tag [~$End]* End ;

# Marks a reading as suspicious, under a condition specified by a composition of transducers
define MarkSuspicious(Condition)
       {/:|} -> {/:(} || _ Condition ;

define MarkSelection(Condition)
       [{/:|} -> {/:)} || _ Condition] .o. [{/:|} -> {/:(}] ;

# Marks a suspicious reading for removal
define MarkRemoval {/:|} -> {/:(} ;

# Building of transducers for sample rules:

# Definitions of LU's containing tags, for better legibility
define Dative LU({<dat>}) ;
define Locative LU({<loc>}) ;
define Preposition LU({<pr>}) ;
define Accusative LU({<acc>}) ;
define Noun LU({<n>}) ;

# The example rules

define RuleRemoveAccusative  [ [Preposition & Accusative & Locative] .o. MarkSuspicious(Contains({<acc>}))] 
       			     [ Noun & Locative ] ;

define RuleSelectLocative [ Preposition & Locative ] 
       			  [ [Noun & Locative & Dative] .o. MarkSelection(Contains({<loc>})) ] ;

define RuleCombine  [ [Preposition & Locative & Accusative] .o. MarkSuspicious(Contains({<acc>}))]
		    [ [Noun & Locative & Dative] .o.  MarkSelection(Contains({<loc>})) ] ;
		    
# Sample input:
# ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$

define ExampleRemove ValidInput .o. PrepareInput .o. RuleRemoveAccusative .o. Remove .o. CleanMarks ;

define ExampleSelect ValidInput .o. PrepareInput .o. RuleSelectLocative .o. Remove .o. CleanMarks ;

define ExampleCombined ValidInput .o. PrepareInput .o. RuleCombine .o. Remove .o. CleanMarks ;

regex ExampleCombined ;
regex ExampleSelect ;
regex ExampleRemove ;

# Applying the rules to the test string:
down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$
pop
down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$
pop
down ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$

The rules when applied to the string "^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$" (Na kući == On the house) yield the following results:

ExampleRemove => ^na/na<pr><loc>$ ^kući/kuća<n><sg><loc>/kuća<n><sg><dat>$
ExampleSelect => ^na/na<pr><acc>/na<pr><loc>$ ^kući/kuća<n><sg><loc>$
ExampleCombined => ^na/na<pr><loc>$ ^kući/kuća<n><sg><loc>$