<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.apertium.org/w/index.php?action=history&amp;feed=atom&amp;title=Measuring_coverage_of_HFST_transducer</id>
	<title>Measuring coverage of HFST transducer - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.apertium.org/w/index.php?action=history&amp;feed=atom&amp;title=Measuring_coverage_of_HFST_transducer"/>
	<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Measuring_coverage_of_HFST_transducer&amp;action=history"/>
	<updated>2026-05-15T14:48:39Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.34.1</generator>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Measuring_coverage_of_HFST_transducer&amp;diff=70451&amp;oldid=prev</id>
		<title>Firespeaker: Created page with &quot;Here&#039;s a script that measures coverage of an HFST transducer (and gives the top of the hitparade):  &lt;pre&gt; #!/bin/bash  LG=abc ANALYSERDIR=/path/to/analyser CORPUS=/path/to/cor...&quot;</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Measuring_coverage_of_HFST_transducer&amp;diff=70451&amp;oldid=prev"/>
		<updated>2019-09-06T02:14:37Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;Here&amp;#039;s a script that measures coverage of an HFST transducer (and gives the top of the hitparade):  &amp;lt;pre&amp;gt; #!/bin/bash  LG=abc ANALYSERDIR=/path/to/analyser CORPUS=/path/to/cor...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;Here&amp;#039;s a script that measures coverage of an HFST transducer (and gives the top of the hitparade):&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
&lt;br /&gt;
LG=abc&lt;br /&gt;
ANALYSERDIR=/path/to/analyser&lt;br /&gt;
CORPUS=/path/to/corpus/corpus.txt.bz2&lt;br /&gt;
ANALYSER=$ANALYSERDIR/$LG.automorf.hfst&lt;br /&gt;
&lt;br /&gt;
TMPCORPUS=/tmp/$LG.corpus.txt&lt;br /&gt;
&lt;br /&gt;
bzcat $CORPUS &amp;gt; $TMPCORPUS&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;Generating hitparade (might take a bit!)&amp;quot;&lt;br /&gt;
cat $TMPCORPUS | apertium-destxt | hfst-proc -w $ANALYSER | apertium-retxt | sed &amp;#039;s/\$\s*/\$\n/g&amp;#039; &amp;gt; /tmp/$LG.parade.txt&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;TOP UNKNOWN WORDS:&amp;quot;&lt;br /&gt;
&lt;br /&gt;
cat /tmp/$LG.parade.txt | grep &amp;#039;\*&amp;#039; | sort | uniq -c | sort -rn | head -n20&lt;br /&gt;
&lt;br /&gt;
TOTAL=`cat /tmp/$LG.parade.txt | wc -l`&lt;br /&gt;
KNOWN=`cat /tmp/$LG.parade.txt | grep -v &amp;#039;\*&amp;#039; | wc -l`&lt;br /&gt;
UNKNOWN=`cat /tmp/$LG.parade.txt | grep &amp;#039;\*&amp;#039; | wc -l`&lt;br /&gt;
&lt;br /&gt;
PERCENTAGE=`calc $KNOWN/$TOTAL | sed &amp;#039;s/[\s\t]//g&amp;#039;`&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;coverage: $KNOWN / $TOTAL ($PERCENTAGE)&amp;quot;&lt;br /&gt;
echo &amp;quot;remaining unknown forms: $UNKNOWN&amp;quot;&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;/div&gt;</summary>
		<author><name>Firespeaker</name></author>
		
	</entry>
</feed>