<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.apertium.org/w/index.php?action=history&amp;feed=atom&amp;title=Talk%3ALexical_selection_in_target_language</id>
	<title>Talk:Lexical selection in target language - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.apertium.org/w/index.php?action=history&amp;feed=atom&amp;title=Talk%3ALexical_selection_in_target_language"/>
	<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Talk:Lexical_selection_in_target_language&amp;action=history"/>
	<updated>2026-05-09T10:17:45Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.34.1</generator>
	<entry>
		<id>https://wiki.apertium.org/w/index.php?title=Talk:Lexical_selection_in_target_language&amp;diff=8292&amp;oldid=prev</id>
		<title>Francis Tyers: New page: ==Ranker==  Presuming that you have a language model in the following format:  &lt;pre&gt; $ head test.ngrams  3086,1,last 1157,2,the last 1128,1,recent 703,1,recently 501,2,last year 301,2,in r...</title>
		<link rel="alternate" type="text/html" href="https://wiki.apertium.org/w/index.php?title=Talk:Lexical_selection_in_target_language&amp;diff=8292&amp;oldid=prev"/>
		<updated>2008-10-30T15:24:10Z</updated>

		<summary type="html">&lt;p&gt;New page: ==Ranker==  Presuming that you have a language model in the following format:  &amp;lt;pre&amp;gt; $ head test.ngrams  3086,1,last 1157,2,the last 1128,1,recent 703,1,recently 501,2,last year 301,2,in r...&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;==Ranker==&lt;br /&gt;
&lt;br /&gt;
Presuming that you have a language model in the following format:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
$ head test.ngrams &lt;br /&gt;
3086,1,last&lt;br /&gt;
1157,2,the last&lt;br /&gt;
1128,1,recent&lt;br /&gt;
703,1,recently&lt;br /&gt;
501,2,last year&lt;br /&gt;
301,2,in recent&lt;br /&gt;
277,2,recent years&lt;br /&gt;
250,2,the recent&lt;br /&gt;
231,1,constantly&lt;br /&gt;
225,3,in the last&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
this code should provide a very rudimentary ranker for input sentences of the form specified on the main page. This is far from efficient.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;div style=&amp;quot;padding: 1em;border: 1px dashed #2f6fab;color: black;background-color: #f9f9f9;line-height: 1.1em; font-size: 85%&amp;quot;&amp;gt;&lt;br /&gt;
&amp;lt;source lang=&amp;quot;python&amp;quot;&amp;gt;&lt;br /&gt;
#!/usr/bin/python2.5&lt;br /&gt;
# coding=utf-8&lt;br /&gt;
# -*- encoding: utf-8 -*-&lt;br /&gt;
&lt;br /&gt;
import sys, codecs;&lt;br /&gt;
&lt;br /&gt;
sys.stdout = codecs.getwriter(&amp;#039;utf-8&amp;#039;)(sys.stdout);&lt;br /&gt;
sys.stderr = codecs.getwriter(&amp;#039;utf-8&amp;#039;)(sys.stderr);&lt;br /&gt;
&lt;br /&gt;
def return_possibles(ambig): #{&lt;br /&gt;
	c = sys.stdin.read(1);&lt;br /&gt;
&lt;br /&gt;
	count = 1;&lt;br /&gt;
	unambig = ambig[1];&lt;br /&gt;
&lt;br /&gt;
	while c != &amp;#039;}&amp;#039;:  #{&lt;br /&gt;
		if c == &amp;#039;|&amp;#039;: #{&lt;br /&gt;
			count = count + 1;&lt;br /&gt;
			ambig[count] = unambig;&lt;br /&gt;
			c = sys.stdin.read(1);&lt;br /&gt;
			continue;&lt;br /&gt;
		#}&lt;br /&gt;
&lt;br /&gt;
		ambig[count] = ambig[count] + c;&lt;br /&gt;
		c = sys.stdin.read(1);&lt;br /&gt;
	#}	&lt;br /&gt;
	&lt;br /&gt;
	return ambig;&lt;br /&gt;
#}&lt;br /&gt;
&lt;br /&gt;
def rank(ambig, lm): #{&lt;br /&gt;
	ranked = [];&lt;br /&gt;
&lt;br /&gt;
	for i in ambig.keys(): #{&lt;br /&gt;
		x = ambig[i].replace(&amp;#039;  &amp;#039;, &amp;#039; &amp;#039;).split(&amp;#039; &amp;#039;);&lt;br /&gt;
		score = 0.0;	&lt;br /&gt;
		grams = &amp;#039;&amp;#039;;&lt;br /&gt;
&lt;br /&gt;
		for i in range(0, len(x) - 1): #{&lt;br /&gt;
			grams = grams + x[i] + &amp;#039; &amp;#039; + x[i+1] + &amp;#039;\n&amp;#039;;&lt;br /&gt;
		#}&lt;br /&gt;
		for i in range(0, len(x) - 2): #{&lt;br /&gt;
			grams = grams + x[i] + &amp;#039; &amp;#039; + x[i+1] + &amp;#039; &amp;#039; + x[i+2] + &amp;#039;\n&amp;#039;;&lt;br /&gt;
		#}&lt;br /&gt;
		for i in range(0, len(x) - 3): #{&lt;br /&gt;
			grams = grams + x[i] + &amp;#039; &amp;#039; + x[i+1] + &amp;#039; &amp;#039; + x[i+2] + &amp;#039; &amp;#039; + x[i+3] + &amp;#039;\n&amp;#039;;&lt;br /&gt;
		#} &lt;br /&gt;
		for i in range(0, len(x) - 4): #{&lt;br /&gt;
			grams = grams + x[i] + &amp;#039; &amp;#039; + x[i+1] + &amp;#039; &amp;#039; + x[i+2] + &amp;#039; &amp;#039; + x[i+3] + &amp;#039; &amp;#039; + x[i+4] + &amp;#039;\n&amp;#039;;&lt;br /&gt;
		#}&lt;br /&gt;
&lt;br /&gt;
		for g in grams.split(&amp;#039;\n&amp;#039;): #{&lt;br /&gt;
			g = g.strip();&lt;br /&gt;
			if g in lm: #{&lt;br /&gt;
				score = score + lm[g];&lt;br /&gt;
			#}&lt;br /&gt;
		#}&lt;br /&gt;
&lt;br /&gt;
		ranked.append((score, x));&lt;br /&gt;
	#}&lt;br /&gt;
		&lt;br /&gt;
	return ranked;&lt;br /&gt;
#}&lt;br /&gt;
&lt;br /&gt;
def load_lm(f): #{&lt;br /&gt;
	lmfile = file(f).read();&lt;br /&gt;
	lm = {};&lt;br /&gt;
&lt;br /&gt;
	for line in lmfile.split(&amp;#039;\n&amp;#039;): #{&lt;br /&gt;
		if len(line) &amp;lt; 1: #{&lt;br /&gt;
			continue;&lt;br /&gt;
		#}&lt;br /&gt;
		row = line.split(&amp;#039;,&amp;#039;);&lt;br /&gt;
		gram = row[2];&lt;br /&gt;
	&lt;br /&gt;
		lm[gram] = float(row[0]) * float(row[1]);&lt;br /&gt;
	#}&lt;br /&gt;
	return lm;&lt;br /&gt;
#}&lt;br /&gt;
&lt;br /&gt;
ambig = {};&lt;br /&gt;
ambig[1] = &amp;#039;&amp;#039;;&lt;br /&gt;
count = 1;&lt;br /&gt;
frame = 0;&lt;br /&gt;
unambig = &amp;#039;&amp;#039;;&lt;br /&gt;
output = &amp;#039;&amp;#039;;&lt;br /&gt;
&lt;br /&gt;
lm = load_lm(&amp;#039;test.ngrams&amp;#039;);&lt;br /&gt;
&lt;br /&gt;
c = sys.stdin.read(1);&lt;br /&gt;
&lt;br /&gt;
while c != &amp;#039;&amp;#039;: #{&lt;br /&gt;
&lt;br /&gt;
	if c == &amp;#039;{&amp;#039; and frame == 0: #{&lt;br /&gt;
		ambig = return_possibles(ambig);&lt;br /&gt;
		frame = 1;&lt;br /&gt;
	#}&lt;br /&gt;
&lt;br /&gt;
	if c == &amp;#039;{&amp;#039; and frame == 1: #{&lt;br /&gt;
		ranked = rank(ambig, lm);&lt;br /&gt;
		ranked.sort();&lt;br /&gt;
		ranked.reverse();&lt;br /&gt;
		output = output + &amp;#039; &amp;#039;.join(ranked[0][1]);&lt;br /&gt;
&lt;br /&gt;
		del ambig;&lt;br /&gt;
		unambig = &amp;#039;&amp;#039;;&lt;br /&gt;
		ambig = {};&lt;br /&gt;
		ambig[1] = &amp;#039;&amp;#039;;&lt;br /&gt;
		count = 1;&lt;br /&gt;
		frame = 0;&lt;br /&gt;
		c = sys.stdin.read(1);&lt;br /&gt;
	#}&lt;br /&gt;
&lt;br /&gt;
	for i in ambig.keys(): #{&lt;br /&gt;
		ambig[i] = ambig[i] + c;&lt;br /&gt;
	#}&lt;br /&gt;
&lt;br /&gt;
	c = sys.stdin.read(1);&lt;br /&gt;
#}&lt;br /&gt;
&lt;br /&gt;
for i in ambig.keys(): #{&lt;br /&gt;
	output = output + ambig[i];&lt;br /&gt;
#}&lt;br /&gt;
&lt;br /&gt;
sys.stdout.write(output);&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&amp;lt;/div&amp;gt;&lt;/div&gt;</summary>
		<author><name>Francis Tyers</name></author>
		
	</entry>
</feed>