<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.apertium.org/w/index.php?action=history&amp;feed=atom&amp;title=N-grams</id>
	<title>N-grams - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.apertium.org/w/index.php?action=history&amp;feed=atom&amp;title=N-grams"/>
	<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=N-grams&amp;action=history"/>
	<updated>2026-05-09T10:36:23Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.34.1</generator>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=N-grams&amp;diff=52737&amp;oldid=prev</id>
		<title>Unhammer: Created page with &quot;Say you have a corpus and an analyser, how do you make an trigram frequency list in three shell commands?  Grab apertium-cleanstream, then do: &lt;pre&gt; bzcat corpus.bz2 | ape...&quot;</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=N-grams&amp;diff=52737&amp;oldid=prev"/>
		<updated>2015-02-10T13:53:27Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;Say you have a corpus and an analyser, how do you make an trigram frequency list in three shell commands?  Grab &lt;a href=&quot;/wiki/Apertium-cleanstream&quot; class=&quot;mw-redirect&quot; title=&quot;Apertium-cleanstream&quot;&gt;apertium-cleanstream&lt;/a&gt;, then do: &amp;lt;pre&amp;gt; bzcat corpus.bz2 | ape...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;Say you have a corpus and an analyser, how do you make an trigram frequency list in three shell commands?&lt;br /&gt;
&lt;br /&gt;
Grab [[apertium-cleanstream]], then do:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
bzcat corpus.bz2 | apertium-deshtml | lt-proc foo.bin | apertium-cleanstream -n &amp;gt;corpus.ana&lt;br /&gt;
paste corpus.ana &amp;lt;(tail -n+1 corpus.ana) &amp;lt;(tail -n+2 corpus.ana) &amp;gt;corpus.trigrams&lt;br /&gt;
sort corpus.trigrams | uniq -c | sort -nr &amp;gt; corpus.trigrams.hitparade&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;/div&gt;</summary>
		<author><name>Unhammer</name></author>
		
	</entry>
</feed>