Difference between revisions of "Talk:Constraint-based lexical selection module"
Jump to navigation
Jump to search
| Line 93: | Line 93: | ||
</pre> |
</pre> |
||
===Text=== |
|||
<pre> |
|||
s ("estació" n) ("season" n) (1 "plujós") |
|||
s ("estació" n) ("season" n) (2 "plujós") |
|||
s ("estació" n) ("season" n) (1 "de") (3 "any") |
|||
s ("estació" n) ("station" n) (1 "de") (3 "Línia") |
|||
s ("prova" n) ("evidence" n) (1 "arqueològic") |
|||
s ("prova" n) ("test" n) (1 "estadístic") |
|||
s ("prova" n) ("event" n) (-3 "guanyador") (-2 "de") |
|||
s ("prova" n) ("testing" n) (-2 "tècnica") (-1 "de") |
|||
s ("joc" n) ("game" n) (1 "olímpic") |
|||
s ("joc" n) ("set" n) (1 "de") (2 "caràcter") |
|||
r ("pista" n) ("hint" n) (1 "més") (2 "llarg") |
|||
r ("pista" n) ("clue" n) (1 "més") (2 "llarg") |
|||
r ("motiu" n) ("motif" n) (-1 "aquest") (-2 "per") |
|||
s ("carn" n) ("flesh" n) (1 "i") (2 "os") |
|||
s ("sobre" pr) ("over" n) (-1 "victòria") |
|||
s ("dona" n) ("wife" n) (-1 "*" det pos) |
|||
s ("dona" n) ("wife" n) (-1 "el") (1 "de") |
|||
s ("dona" n) ("woman" n) (1 "de") (2 "*" det pos) (3 "somni") |
|||
r ("patró n) ("pattern" n) (1 "*" np ant) |
|||
</pre> |
|||
==Old application strategy== |
==Old application strategy== |
||
| Line 129: | Line 153: | ||
IF target IN rule.tl_patro: |
IF target IN rule.tl_patro: |
||
DELETE target |
DELETE target |
||
Revision as of 11:20, 1 December 2011
Rule formats
Note: Felipe doesn't like "skip".
- can't say I do either … sounds like a command rather than a constraint --unhammer 11:42, 18 November 2011 (UTC)
- also, in the <or>, should we read them as independent of each other? that's a bit confusing since otherwise they're all required and have a certain order --unhammer 11:42, 18 November 2011 (UTC)
The regular expression for the OR below is:
(nazi<adj>[0-9A-Za-z <>]*|totalitari<adj>[0-9A-Za-z <>]*|feixista<adj>[0-9A-Za-z <>]*|franquista<adj>[0-9A-Za-z <>]*|militar<adj>[0-9A-Za-z <>]*|fiscal<adj>[0-9A-Za-z <>]*)
- Francis Tyers 11:53, 18 November 2011 (UTC)
- 1
<rule>
<remove lemma="règim" tags="n.*">
<acception lemma="diet" tags="n.*"/>
</remove>
<or>
<skip lemma="nazi" tags="adj.*"/>
<skip lemma="totalitari" tags="adj.*"/>
<skip lemma="feixista" tags="adj.*"/>
<skip lemma="franquista" tags="adj.*"/>
<skip lemma="militar" tags="adj.*"/>
<skip lemma="fiscal" tags="adj.*"/>
</or>
</rule>
- 2
<rule>
<remove lemma="règim" tags="n.*">
<acception lemma="diet" tags="n.*"/>
</remove>
<or>
<pattern lemma="nazi" tags="adj.*"/>
<pattern lemma="totalitari" tags="adj.*"/>
<pattern lemma="feixista" tags="adj.*"/>
<pattern lemma="franquista" tags="adj.*"/>
<pattern lemma="militar" tags="adj.*"/>
<pattern lemma="fiscal" tags="adj.*"/>
</or>
</rule>
- 3
<rule>
<remove-from lemma="règim" tags="n.*">
<translation lemma="diet" tags="n.*"/>
</remove-from>
<pattern>
<or>
<pattern-item lemma="nazi" tags="adj.*"/>
<pattern-item lemma="totalitari" tags="adj.*"/>
<pattern-item lemma="feixista" tags="adj.*"/>
<pattern-item lemma="franquista" tags="adj.*"/>
<pattern-item lemma="militar" tags="adj.*"/>
<pattern-item lemma="fiscal" tags="adj.*"/>
</or>
</pattern>
</rule>
<rule c="la dona dels seus somnis">
<select-for lemma="dona" tags="n.*">
<translation lemma="wife" tags="n.*"/>
</select>
<pattern>
<pattern-item lemma="de" tags="pr.*"/>
<pattern-item lemma="*" tags="det.pos.*"/>
<pattern-item lemma="somni" tags="n.*"/>
</pattern>
</rule>
- 4
<rule>
<target lemma="règim" tags="n.*">
<remove lemma="diet" tags="n.*"/>
</target>
…
</rule>
<rule c="la dona dels seus somnis">
<target lemma="dona" tags="n.*">
<select lemma="wife" tags="n.*"/>
</target>
…
</rule>
Text
s ("estació" n) ("season" n) (1 "plujós")
s ("estació" n) ("season" n) (2 "plujós")
s ("estació" n) ("season" n) (1 "de") (3 "any")
s ("estació" n) ("station" n) (1 "de") (3 "Línia")
s ("prova" n) ("evidence" n) (1 "arqueològic")
s ("prova" n) ("test" n) (1 "estadístic")
s ("prova" n) ("event" n) (-3 "guanyador") (-2 "de")
s ("prova" n) ("testing" n) (-2 "tècnica") (-1 "de")
s ("joc" n) ("game" n) (1 "olímpic")
s ("joc" n) ("set" n) (1 "de") (2 "caràcter")
r ("pista" n) ("hint" n) (1 "més") (2 "llarg")
r ("pista" n) ("clue" n) (1 "més") (2 "llarg")
r ("motiu" n) ("motif" n) (-1 "aquest") (-2 "per")
s ("carn" n) ("flesh" n) (1 "i") (2 "os")
s ("sobre" pr) ("over" n) (-1 "victòria")
s ("dona" n) ("wife" n) (-1 "*" det pos)
s ("dona" n) ("wife" n) (-1 "el") (1 "de")
s ("dona" n) ("woman" n) (1 "de") (2 "*" det pos) (3 "somni")
r ("patró n) ("pattern" n) (1 "*" np ant)
Old application strategy
The following is an inefficient implementation of the rule application process:
# s ("prova" n) ("event" n) (-3 "guanyador") (-2 "de")
#
# tipus = "select";
# centre = "^prova<n>.*"
# tl_patro = ["^event<n>.*"]
# sl_patro = {-3: "^guanyador<", -2: "^de<"}
CLASS Rule:
tipus = enum('select', 'remove')
centre = '';
tl_patro = [];
sl_patro = {};
rule_table = {}; # e.g. rule_table["estació"] = [rule1, rule2, rule3];
i = 0
DEFINE ApplyRule(rule, lu):
FOREACH target IN lu.tl:
SWITCH rule.tipus:
'select':
IF target NOT IN rule.tl_patro:
DELETE target
'remove':
IF target IN rule.tl_patro:
DELETE target
FOREACH pair(sl, tl) IN sentence:
FOREACH centre IN rule_table:
IF centre IN sl:
FOREACH rule IN rule_table[centre]:
matched = False
FOREACH context_item IN rule_table[centre][rule]:
IF context_item in sentence:
matched = True
ELSE:
matched = False
# If all of the context items have matched, and none of them have not matched
# if a rule matches break and continue to the pair.
IF matched == True:
sentence[i] = ApplyRule(rule_table[centre][rule], sentence[i])
break
i = i + 1