<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.apertium.org/w/index.php?action=history&amp;feed=atom&amp;title=Aravot.am</id>
	<title>Aravot.am - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.apertium.org/w/index.php?action=history&amp;feed=atom&amp;title=Aravot.am"/>
	<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;action=history"/>
	<updated>2026-04-18T11:04:55Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.34.1</generator>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38220&amp;oldid=prev</id>
		<title>Dtvrij74: /* Sentence Aligning */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38220&amp;oldid=prev"/>
		<updated>2013-01-09T00:17:42Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Sentence Aligning&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 00:17, 9 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 259:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 259:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;(The above quote is from [http://mokk.bme.hu/resources/hunalign/ here].)&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;(The above quote is from [http://mokk.bme.hu/resources/hunalign/ here].)&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;A sample Armenian to English dictionary  based on Apertium&#039;s hye-eng bidix (converted to a Hunalign-compatible dictionary format via a simple Python script) can be found [http://sebsauvage.net/paste/?ea47c257cf471482#7RcEqbvdH1NwNHAlPrwUDPAVkgfxrkwbzpQkdPoI0Go= here].&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;A sample Armenian to English dictionary  based on Apertium&#039;s hye-eng bidix (converted to a Hunalign-compatible dictionary format via&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt; [http://danielhonline.com/2013/01/apertium-bidix-to-hunalign-dictionary-converter-script/&lt;/ins&gt; a simple Python script&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;]&lt;/ins&gt;) can be found [http://sebsauvage.net/paste/?ea47c257cf471482#7RcEqbvdH1NwNHAlPrwUDPAVkgfxrkwbzpQkdPoI0Go= here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Information about all Hunalign commands can be found [http://mokk.bme.hu/resources/hunalign/ here].&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Information about all Hunalign commands can be found [http://mokk.bme.hu/resources/hunalign/ here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38212&amp;oldid=prev</id>
		<title>Dtvrij74: /* Scraping */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38212&amp;oldid=prev"/>
		<updated>2013-01-08T23:45:17Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Scraping&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 23:45, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 225:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 225:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Loop through all the article URLs for each day and language and get the HTML tags that store article contents (e.g. &amp;lt;nowiki&amp;gt;&amp;lt;pre&amp;gt;, &amp;lt;p&amp;gt;, etc.&amp;lt;/nowiki&amp;gt;) by using Beautiful Soup. Append the contents to a string.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Loop through all the article URLs for each day and language and get the HTML tags that store article contents (e.g. &amp;lt;nowiki&amp;gt;&amp;lt;pre&amp;gt;, &amp;lt;p&amp;gt;, etc.&amp;lt;/nowiki&amp;gt;) by using Beautiful Soup. Append the contents to a string.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;The Python 3 scraper for Aravot.am can be found [http://&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;wiki&lt;/del&gt;.&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;apertium.org&lt;/del&gt;/&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;wiki&lt;/del&gt;/&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;Aravot.&lt;/del&gt;am here].&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;The Python 3 scraper for Aravot.am can be found [http://&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;danielhonline&lt;/ins&gt;.&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;com&lt;/ins&gt;/&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;2013&lt;/ins&gt;/&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;01/scrape-armenian-english-and-russian-articles-from-aravot-&lt;/ins&gt;am&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;/ &lt;/ins&gt; here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;===Sentence Segmentation===&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;===Sentence Segmentation===&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38211&amp;oldid=prev</id>
		<title>Dtvrij74: /* Scraping */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38211&amp;oldid=prev"/>
		<updated>2013-01-08T23:44:26Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Scraping&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 23:44, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 224:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 224:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Loop through all the article URLs for each day and language and get the HTML tags that store article contents (e.g. &amp;lt;nowiki&amp;gt;&amp;lt;pre&amp;gt;, &amp;lt;p&amp;gt;, etc.&amp;lt;/nowiki&amp;gt;) by using Beautiful Soup. Append the contents to a string.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Loop through all the article URLs for each day and language and get the HTML tags that store article contents (e.g. &amp;lt;nowiki&amp;gt;&amp;lt;pre&amp;gt;, &amp;lt;p&amp;gt;, etc.&amp;lt;/nowiki&amp;gt;) by using Beautiful Soup. Append the contents to a string.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;The Python 3 scraper for Aravot.am can be found [http://wiki.apertium.org/wiki/Aravot.am here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;===Sentence Segmentation===&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;===Sentence Segmentation===&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38194&amp;oldid=prev</id>
		<title>Dtvrij74: /* Sentence Aligning */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38194&amp;oldid=prev"/>
		<updated>2013-01-08T01:05:27Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Sentence Aligning&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 01:05, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 289:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 289:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;You need to loop through all the files that you generated after the sentence segmentation step. You can create a simple Python script to do so:&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;You need to loop through all the files that you generated after the sentence segmentation step. You can create a simple Python script to do so:&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;os.popen(&#039;~&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;/hun3&lt;/del&gt;/hunalign-1.1/src/hunalign/hunalign -utf -bisent -text dict.txt &#039;+en+&#039; &#039;+hy+&#039; &amp;gt; ~&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;/hun3&lt;/del&gt;/hunalign-1.1/align/&#039;+year+&#039;/&#039;+month+&#039;/hy-&#039;+lang+&#039;/&#039;+num).read()&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;os.popen(&#039;~/hunalign-1.1/src/hunalign/hunalign -utf -bisent -text dict.txt &#039;+en+&#039; &#039;+hy+&#039; &amp;gt; ~/hunalign-1.1/align/&#039;+year+&#039;/&#039;+month+&#039;/hy-&#039;+lang+&#039;/&#039;+num).read()&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* Hunalign is under this file path: &amp;lt;code&amp;gt;~&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;/hun3&lt;/del&gt;/hunalign-1.1/src/hunalign/hunalign&amp;lt;/code&amp;gt;.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* Hunalign is under this file path: &amp;lt;code&amp;gt;~/hunalign-1.1/src/hunalign/hunalign&amp;lt;/code&amp;gt;.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* &amp;lt;code&amp;gt;en&amp;lt;/code&amp;gt; is the variable with the file path to the source language article&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* &amp;lt;code&amp;gt;en&amp;lt;/code&amp;gt; is the variable with the file path to the source language article&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* &amp;lt;code&amp;gt;hy&amp;lt;/code&amp;gt; is the variable with the file path to the target language article&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* &amp;lt;code&amp;gt;hy&amp;lt;/code&amp;gt; is the variable with the file path to the target language article&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38193&amp;oldid=prev</id>
		<title>Dtvrij74: /* Sentence Aligning */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38193&amp;oldid=prev"/>
		<updated>2013-01-08T01:02:15Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Sentence Aligning&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 01:02, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 272:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 272:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Use the &amp;lt;code&amp;gt;-utf&amp;lt;/code&amp;gt; argument to input and output in &amp;lt;code&amp;gt;UTF-8&amp;lt;/code&amp;gt;.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Use the &amp;lt;code&amp;gt;-utf&amp;lt;/code&amp;gt; argument to input and output in &amp;lt;code&amp;gt;UTF-8&amp;lt;/code&amp;gt;.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;Use the &amp;lt;code&amp;gt;-bisent&amp;lt;code&amp;gt; argument to only print bisentences (one-to-one alignment segments).&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Use the &amp;lt;code&amp;gt;-bisent&amp;lt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;/&lt;/ins&gt;code&amp;gt; argument to only print bisentences (one-to-one alignment segments).&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;An example command would be:&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;An example command would be:&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38192&amp;oldid=prev</id>
		<title>Dtvrij74: /* Sentence Segmentation */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38192&amp;oldid=prev"/>
		<updated>2013-01-08T00:35:16Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Sentence Segmentation&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 00:35, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 239:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 239:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;After tokenisation, Article contents can be saved to a file. The file structure can be: year/month (the month folder contains the articles, in all three languages, for that month).&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;After tokenisation, Article contents can be saved to a file. The file structure can be: year/month (the month folder contains the articles, in all three languages, for that month).&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;You will have to loop through this process for all files for sentence segmentation.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;You will have to loop through this process for all files for sentence segmentation.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;===Sentence Aligning===&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;===Sentence Aligning===&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Hunalign will be used to align sentences from two languages. For example, hy-en and hy-ru pairs articles can be ran through. &lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Hunalign will be used to align sentences from two languages. For example, hy-en and hy-ru pairs articles can be ran through. &lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38191&amp;oldid=prev</id>
		<title>Dtvrij74: /* Sentence Aligning */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38191&amp;oldid=prev"/>
		<updated>2013-01-08T00:31:41Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Sentence Aligning&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 00:31, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 255:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 255:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;&amp;lt;/blockquote&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;&amp;lt;/blockquote&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;(The above quote is from [http://mokk.bme.hu/resources/hunalign/ here].)&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;(The above quote is from [http://mokk.bme.hu/resources/hunalign/ here].)&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;A sample Armenian to English dictionary  based on Apertium&#039;s hye-eng bidix (converted to a Hunalign-compatible dictionary format via a simple Python script) can be found [http://sebsauvage.net/paste/?ea47c257cf471482#7RcEqbvdH1NwNHAlPrwUDPAVkgfxrkwbzpQkdPoI0Go= here].&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;A sample Armenian to English dictionary  based on Apertium&#039;s hye-eng bidix (converted to a Hunalign-compatible dictionary format via a simple Python script) can be found [http://sebsauvage.net/paste/?ea47c257cf471482#7RcEqbvdH1NwNHAlPrwUDPAVkgfxrkwbzpQkdPoI0Go= here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Information about all Hunalign commands can be found [http://mokk.bme.hu/resources/hunalign/ here].&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Information about all Hunalign commands can be found [http://mokk.bme.hu/resources/hunalign/ here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;If you do not have a bilingual dictionary, you can use the -realign argument. &lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;If you do not have a bilingual dictionary, you can use the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;-realign&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; argument. &lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Hunalign will build its own dictionary based on the two text files (in two languages) that you input. You can input a 0 byte file as a placeholder for the dictionary argument.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Hunalign will build its own dictionary based on the two text files (in two languages) that you input. You can input a 0 byte file as a placeholder for the dictionary argument.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 268:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 269:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;Use the -utf argument to input and output in UTF-8.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Use the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;-utf&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; argument to input and output in &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;UTF-8&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;Use the -bisent argument to only print bisentences (one-to-one alignment segments).&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Use the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;-bisent&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt; argument to only print bisentences (one-to-one alignment segments).&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;An example command would be:&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;An example command would be:&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 279:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 280:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;In this command:&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;In this command:&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* The command is executed in Hunspell&#039;s root directory.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* The command is executed in Hunspell&#039;s root directory.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* The -utf argument (explained above) is used.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* The &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;-utf&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; argument (explained above) is used.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* The -bisent command (explained above) is used.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* The &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;-bisent&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; command (explained above) is used.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* The bilingual dictionary used is called &amp;lt;code&amp;gt;dict.txt&amp;lt;/code&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* The bilingual dictionary used is called &amp;lt;code&amp;gt;dict.txt&amp;lt;/code&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;*&amp;lt;code&amp;gt;en_2012_06_23_84517.txt&amp;lt;/code&amp;gt; is the file in the source language.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;*&amp;lt;code&amp;gt;en_2012_06_23_84517.txt&amp;lt;/code&amp;gt; is the file in the source language.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 304:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 305:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* The second column contains the (supposedly) corresponding sentence from the target language text.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* The second column contains the (supposedly) corresponding sentence from the target language text.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* The third column is a confidence score for the alignment. It averages all the scores to determine the quality of the alignment.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* The third column is a confidence score for the alignment. It averages all the scores to determine the quality of the alignment.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* At the bottom, there will be the quality score. &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;I&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;found&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;that&lt;/del&gt; the quality score increases with the amount of content in the input articles.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* At the bottom, there will be the quality score. &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;It&lt;/ins&gt; &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;seems&lt;/ins&gt; &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;like&lt;/ins&gt; the quality score increases with the amount of content in the input articles.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38190&amp;oldid=prev</id>
		<title>Dtvrij74: /* Scraping and aligning */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38190&amp;oldid=prev"/>
		<updated>2013-01-08T00:22:23Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Scraping and aligning&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 00:22, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 282:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 282:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* The -bisent command (explained above) is used.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* The -bisent command (explained above) is used.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* The bilingual dictionary used is called &amp;lt;code&amp;gt;dict.txt&amp;lt;/code&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* The bilingual dictionary used is called &amp;lt;code&amp;gt;dict.txt&amp;lt;/code&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;*&amp;lt;code&amp;gt;en_2012_06_23_84517.txt&lt;del class=&quot;diffchange diffchange-inline&quot;&gt; &lt;/del&gt;&amp;lt;/code&amp;gt; is the file in the source language.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;*&amp;lt;code&amp;gt;en_2012_06_23_84517.txt&amp;lt;/code&amp;gt; is the file in the source language.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;*&lt;del class=&quot;diffchange diffchange-inline&quot;&gt; &lt;/del&gt;&amp;lt;code&amp;gt;hy_2012_06_23_84517.txt&amp;lt;/code&amp;gt; is the file in the target language.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;*&amp;lt;code&amp;gt;hy_2012_06_23_84517.txt&amp;lt;/code&amp;gt; is the file in the target language.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;You need to loop through all the files that you generated after the sentence segmentation step. You can create a simple Python script to do so:&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;You need to loop through all the files that you generated after the sentence segmentation step. You can create a simple Python script to do so:&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 290:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 290:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* Hunalign is under this &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;filepath&lt;/del&gt;: &amp;lt;code&amp;gt;~/hun3/hunalign-1.1/src/hunalign/hunalign&amp;lt;/code&amp;gt;.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* Hunalign is under this &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;file path&lt;/ins&gt;: &amp;lt;code&amp;gt;~/hun3/hunalign-1.1/src/hunalign/hunalign&amp;lt;/code&amp;gt;.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* en is the variable with the &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;filepath&lt;/del&gt; to the source language article&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;en&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; is the variable with the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;file path&lt;/ins&gt; to the source language article&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* hy is the variable with the &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;filepath&lt;/del&gt; to the target language article&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;hy&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; is the variable with the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;file path&lt;/ins&gt; to the target language article&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* year is the variable with the year (i.e. &quot;2012&quot;)&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;year&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; is the variable with the year (i.e. &quot;2012&quot;)&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* lang is the variable with the source language ISO code (i.e. &quot;en&quot;)&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;lang&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; is the variable with the source language ISO code (i.e. &quot;en&quot;)&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;* num is the &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;varaible&lt;/del&gt; with the article number (i.e. &quot;1234&quot;)&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;* &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;code&amp;gt;&lt;/ins&gt;num&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/code&amp;gt;&lt;/ins&gt; is the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;variable&lt;/ins&gt; with the article number (i.e. &quot;1234&quot;)&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;* The output from Hunalign (for each article) is stored inside a directory with the target language and the source language (separated by a hyphen), which is inside a month directory inside a year directory. Their names are their article numbers. (i.e. align/2012/10/hy-en/12345)&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;* The output from Hunalign (for each article) is stored inside a directory with the target language and the source language (separated by a hyphen), which is inside a month directory inside a year directory. Their names are their article numbers. (i.e. align/2012/10/hy-en/12345)&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38189&amp;oldid=prev</id>
		<title>Dtvrij74: /* Scraping and aligning */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38189&amp;oldid=prev"/>
		<updated>2013-01-08T00:13:40Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Scraping and aligning&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 00:13, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 240:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 240:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;You will have to loop through this process for all files for sentence segmentation.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;You will have to loop through this process for all files for sentence segmentation.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;===Sentence Aligning===&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;===Sentence Aligning===&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;Hunalign will be used to align sentences from two languages. &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;In&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;my case&lt;/del&gt;,&lt;del class=&quot;diffchange diffchange-inline&quot;&gt; I ran it on&lt;/del&gt; hy-en and hy-ru articles&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;. You&lt;/del&gt; can &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;download&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;Hunalign&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;[http://mokk&lt;/del&gt;.&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;bme.hu/resources/hunalign/&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;here].&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Hunalign will be used to align sentences from two languages. &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;For&lt;/ins&gt; &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;example&lt;/ins&gt;, hy-en and hy-ru&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt; pairs&lt;/ins&gt; articles can &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;be&lt;/ins&gt; &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;ran&lt;/ins&gt; &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;through&lt;/ins&gt;. &lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;a class=&quot;mw-diff-movedpara-left&quot; title=&quot;Paragraph was moved. Click to jump to new location.&quot; href=&quot;#movedpara_4_1_rhs&quot;&gt;&amp;#x26AB;&lt;/a&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;&lt;a name=&quot;movedpara_3_0_lhs&quot;&gt;&lt;/a&gt;To build Hunalign on Linux/Unix/Mac OS X, run these commands in the directory where &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;you&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;downloaded&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;Hunalign&lt;/del&gt;:&lt;/div&gt;&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-added&quot;&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Hunalign can be downloaded from [http://mokk.bme.hu/resources/hunalign/ here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;a class=&quot;mw-diff-movedpara-right&quot; title=&quot;Paragraph was moved. Click to jump to old location.&quot; href=&quot;#movedpara_3_0_lhs&quot;&gt;&amp;#x26AB;&lt;/a&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;&lt;a name=&quot;movedpara_4_1_rhs&quot;&gt;&lt;/a&gt;To build Hunalign on Linux/Unix/Mac OS X, run these commands in the directory where &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;Hunalign&lt;/ins&gt; &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;was&lt;/ins&gt; &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;downloaded&lt;/ins&gt;:&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;tar zxvf hunalign-1.1.tgz&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;tar zxvf hunalign-1.1.tgz&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 252:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 254:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;The dictionary consists of newline-separated dictionary items. An item consists of a target languge phrase and a source language phrase, separated by the ” @ ” sequence. Multiword phrases are allowed. The words of a phrase are space-separated as usual. IMPORTANT NOTE: In the current version, for historical reasons, the target language phrases come first. Therefore the ordering is the opposite of the ordering of the command-line arguments or the results.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;The dictionary consists of newline-separated dictionary items. An item consists of a target languge phrase and a source language phrase, separated by the ” @ ” sequence. Multiword phrases are allowed. The words of a phrase are space-separated as usual. IMPORTANT NOTE: In the current version, for historical reasons, the target language phrases come first. Therefore the ordering is the opposite of the ordering of the command-line arguments or the results.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;&amp;lt;/blockquote&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;&amp;lt;/blockquote&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;(The above quote is from [http://mokk.bme.hu/resources/hunalign/ here].)&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;A sample Armenian to English dictionary  based on Apertium&#039;s hye-eng bidix (converted to a Hunalign-compatible dictionary format via a simple Python script) can be found [http://sebsauvage.net/paste/?ea47c257cf471482#7RcEqbvdH1NwNHAlPrwUDPAVkgfxrkwbzpQkdPoI0Go= here].&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;A sample Armenian to English dictionary  based on Apertium&#039;s hye-eng bidix (converted to a Hunalign-compatible dictionary format via a simple Python script) can be found [http://sebsauvage.net/paste/?ea47c257cf471482#7RcEqbvdH1NwNHAlPrwUDPAVkgfxrkwbzpQkdPoI0Go= here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;a class=&quot;mw-diff-movedpara-left&quot; title=&quot;Paragraph was moved. Click to jump to new location.&quot; href=&quot;#movedpara_10_4_rhs&quot;&gt;&amp;#x26AB;&lt;/a&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;&lt;a name=&quot;movedpara_9_0_lhs&quot;&gt;&lt;/a&gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;If you do not have a bilingual dictionary, you can use the -realign argument. &lt;/del&gt;Hunalign will build its own dictionary based on the two text files (in two languages) that you input. You can input a 0 byte file as a placeholder for the dictionary argument.&lt;/div&gt;&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-added&quot;&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Information about all Hunalign commands can be found [http://mokk.bme.hu/resources/hunalign/ here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;If you do not have a bilingual dictionary, you can use the -realign argument. &lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;a class=&quot;mw-diff-movedpara-right&quot; title=&quot;Paragraph was moved. Click to jump to old location.&quot; href=&quot;#movedpara_9_0_lhs&quot;&gt;&amp;#x26AB;&lt;/a&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;&lt;a name=&quot;movedpara_10_4_rhs&quot;&gt;&lt;/a&gt;Hunalign will build its own dictionary based on the two text files (in two languages) that you input. You can input a 0 byte file as a placeholder for the dictionary argument.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Hunalign is used like this:&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Hunalign is used like this:&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38187&amp;oldid=prev</id>
		<title>Dtvrij74: /* Scraping and aligning */</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Aravot.am&amp;diff=38187&amp;oldid=prev"/>
		<updated>2013-01-08T00:00:17Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Scraping and aligning&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 00:00, 8 January 2013&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 218:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 218:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;===Scraping===&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;===Scraping===&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;English, Russian, and Armenian articles&#039; content will be scraped from aravot.am.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;a class=&quot;mw-diff-movedpara-left&quot; title=&quot;Paragraph was moved. Click to jump to new location.&quot; href=&quot;#movedpara_4_0_rhs&quot;&gt;&amp;#x26AB;&lt;/a&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;&lt;a name=&quot;movedpara_2_0_lhs&quot;&gt;&lt;/a&gt;Write the Python script with Beautiful Soup, which aids in screen scraping.&lt;/div&gt;&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-added&quot;&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-empty diff-side-deleted&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;a class=&quot;mw-diff-movedpara-right&quot; title=&quot;Paragraph was moved. Click to jump to old location.&quot; href=&quot;#movedpara_2_0_lhs&quot;&gt;&amp;#x26AB;&lt;/a&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;&lt;a name=&quot;movedpara_4_0_rhs&quot;&gt;&lt;/a&gt;Write the Python script with Beautiful Soup, which aids in screen scraping.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;English, Russian, and Armenian will be scraped from aravot.am. &lt;/del&gt;Loop through the archive pages for each day (and month and year) of the given range and for each language to get all the article URLs.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Loop through the archive pages for each day (and month and year) of the given range and for each language to get all the article URLs.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;Loop through all the article URLs for each day and language and get the HTML tags that store &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;content&lt;/del&gt; (e.g. &amp;lt;nowiki&amp;gt;&amp;lt;pre&amp;gt;, &amp;lt;p&amp;gt;, etc.&amp;lt;/nowiki&amp;gt;) by using Beautiful Soup. Append the contents to a string.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Loop through all the article URLs for each day and language and get the HTML tags that store &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;article contents&lt;/ins&gt; (e.g. &amp;lt;nowiki&amp;gt;&amp;lt;pre&amp;gt;, &amp;lt;p&amp;gt;, etc.&amp;lt;/nowiki&amp;gt;) by using Beautiful Soup. Append the contents to a string.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;===Sentence Segmentation===&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;===Sentence Segmentation===&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;Some things need to be done with the &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;text&lt;/del&gt; before tokenising it. For English, smart quotes should be replaced:&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Some things need to be done with the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;content string&lt;/ins&gt; before tokenising it. For English, smart quotes should be replaced&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;. This is an example code snippet&lt;/ins&gt;:&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;output_en=output_en.replace(&#039;“&#039;,&#039;&quot;&#039;)&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;output_en=output_en.replace(&#039;“&#039;,&#039;&quot;&#039;)&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 234:&lt;/td&gt;
  &lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 235:&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;Please refer to the [http://wiki.apertium.org/wiki/Sentence_segmenting sentence segmentation] page for sentence tokenisation.&lt;del class=&quot;diffchange diffchange-inline&quot;&gt; I used&lt;/del&gt; NLTK Punkt.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;Please refer to the [http://wiki.apertium.org/wiki/Sentence_segmenting sentence segmentation] page for sentence tokenisation. NLTK Punkt&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt; can be used&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;br /&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;br /&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;After tokenisation, &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;you&lt;/del&gt; can &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;save&lt;/del&gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;the article text&lt;/del&gt; to a file. The file structure &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;should&lt;/del&gt; be: year/month.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;After tokenisation, &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;Article contents&lt;/ins&gt; can &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;be&lt;/ins&gt; &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;saved&lt;/ins&gt; to a file. The file structure &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;can&lt;/ins&gt; be: year/month&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt; (the month folder contains the articles, in all three languages, for that month)&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-deletedline diff-side-deleted&quot;&gt;&lt;div&gt;You will have to loop through this process for all files.&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-addedline diff-side-added&quot;&gt;&lt;div&gt;You will have to loop through this process for all files&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt; for sentence segmentation&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;===Sentence Aligning===&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;===Sentence Aligning===&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-deleted&quot;&gt;&lt;div&gt;Hunalign will be used to align sentences from two languages. In my case, I ran it on hy-en and hy-ru articles. You can download Hunalign [http://mokk.bme.hu/resources/hunalign/ here].&lt;/div&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;
  &lt;td class=&quot;diff-context diff-side-added&quot;&gt;&lt;div&gt;Hunalign will be used to align sentences from two languages. In my case, I ran it on hy-en and hy-ru articles. You can download Hunalign [http://mokk.bme.hu/resources/hunalign/ here].&lt;/div&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dtvrij74</name></author>
		
	</entry>
</feed>