Difference between revisions of "VM for transfer"
Jump to navigation
Jump to search
Darthxaher (talk | contribs) |
(Updated the instruction set as of GSoC11) |
||
| Line 1: | Line 1: | ||
== Instruction |
== Instruction Set == |
||
{| class="wikitable" border="1" |
{| class="wikitable" border="1" |
||
! Mnemonic !! Opcode<br>''(in hex)'' !! Other operands !! Stack<br>[before]→[after] !! Description |
! Mnemonic !! Opcode<br>''(in hex)'' !! Other operands !! Stack<br>[before]→[after] (top, top<sub>-1</sub>, ...) !! Description |
||
|- |
|- |
||
| push || - || value || [empty] → value || Pushes a value |
| push || - || value || [empty] → value || Pushes a string or a variable value onto the stack. Strings go between quotes ("string") but variable's names not |
||
|- |
|- |
||
| |
| pushbl || - || N/A || [empty] → blank || Pushes a blank onto the stack |
||
|- |
|- |
||
| |
| pushsb || - || pos || [empty] → superblank || Pushes the superblank at 'pos' onto the stack |
||
|- |
|- |
||
| |
| append || - || N || value<sub>N</sub>, ..., value<sub>1</sub> → [empty] || Pops 'N' elements and appends them to a variable or clip |
||
|- |
|- |
||
| concat || - || N || value<sub>N</sub>, ..., value<sub>1</sub> → value<sub>N</sub>...value<sub>1</sub> || Pops 'N' elements and pushes them back concatenated |
|||
| pushsb || - || pos || [empty] → superblank || pushes the superblank at 'pos' in stack |
|||
|- |
|- |
||
| |
| clip || - || N/A || part → value || Obtains the part in the only language there is (inter/post-chunk) and pushes the value onto the stack |
||
|- |
|- |
||
| |
| clipsl || - || N/A || part, pos → value || Obtains the 'part' in source language in position 'pos' and pushes the 'value' onto the stack |
||
|- |
|- |
||
| cliptl || - || N/A || |
| cliptl || - || N/A || part, pos → value || Obtains the 'part' in target language in position 'pos' and pushes the 'value' onto the stack |
||
|- |
|- |
||
| |
| storecl || - || N/A || value, part → [empty] || Stores 'value' in the only language there is (inter/post-chunk) |
||
|- |
|- |
||
| |
| storesl || - || N/A || value, part, pos → [empty] || Stores 'value' as the 'part' of the source language in position 'pos' |
||
|- |
|- |
||
| |
| storetl || - || N/A || value, part, pos → [empty] || Stores 'value' as the 'part' of the target language in position 'pos' |
||
|- |
|- |
||
| |
| storev || - || N/A || value, varName → [empty] || Stores 'value' in the variable with name 'varName' |
||
|- |
|- |
||
| |
| addtrie || - || address || N, pattern<sub>N</sub>, ..., pattern<sub>1</sub> → [empty] || Pops 'N' patterns and creates a trie entry pointing to 'address' |
||
|- |
|- |
||
| |
| lu || - || N || value<sub>N</sub>, ..., value<sub>1</sub> → ^(lexical_unit)$ || Pops 'N' values from the stack, creates a lexical unit ^...$ with them and pushes the lu back onto the stack |
||
|- |
|- |
||
| |
| mlu || - || N || lu<sub>N</sub>, ..., lu<sub>1</sub> → multiword || Pops 'N' lu from the stack, creates a multiword with them and pushes the multiword back onto the stack |
||
|- |
|- |
||
| |
| lu-count || - || N/A || [empty] → number || Pushes the number of lexical units (words inside the chunk) in the rule onto the stack |
||
|- |
|- |
||
| |
| chunk || - || N || N, tag<sub>N-2</sub>, ... , tag<sub>1</sub>, ^...$, name → ^name<tag<sub>1</sub>>...<tag<sub>n-2</sub>>{^...$}$ || Pops 'N' amount of data from the stack, creates the chunk and pushes it back onto the stack |
||
|- |
|- |
||
| out || - || |
| out || - || N || value<sub>N</sub>, ..., value<sub>1</sub> → [empty] || Pops 'N' values from the stack and outputs them |
||
|- |
|- |
||
| |
| cmp || - || N/A || value<sub>2</sub>, value<sub>1</sub> → result || Pops 'value<sub>1</sub>' and 'value<sub>2</sub>', compares them, if they are equal pushes a 1 (true), if they aren't pushes a 0 (false) |
||
|- |
|- |
||
| |
| cmpi || - || N/A || value<sub>2</sub>, value<sub>1</sub> → result || Pops 'value<sub>1</sub>' and 'value<sub>2</sub>', compares them (ignoring case for strings), if they are equal pushes a 1 (true), if they aren't pushes a 0 (false) |
||
|- |
|- |
||
| |
| not || - || N || value → result || Negates the value on top of the stack, 0 -> 1 or 1 -> 0 |
||
|- |
|- |
||
| |
| and || - || N || value<sub>N</sub>, ..., value<sub>1</sub> → result || And operation of 'N' values, result can be 1 (true) or 0 (false) |
||
|- |
|- |
||
| |
| or || - || N || value<sub>N</sub>, ..., value<sub>1</sub> → result || Or operation of 'N' values, result can be 1 (true) or 0 (false) |
||
|- |
|- |
||
| |
| in || - || N/A || list, value → result || Performs a search of a 'value' in a 'list' |
||
|- |
|- |
||
| |
| inig || - || N/A || list, value → result || Performs a search (ignoring case) of a 'value' in a 'list' |
||
|- |
|- |
||
| |
| jmp || - || label || [empty] → [empty] || Jumps to the label, unconditionally |
||
|- |
|- |
||
| |
| jz || - || label || top → [empty] || Jumps to the label if stack.top == 0 |
||
|- |
|- |
||
| |
| jnz || - || label || top → [empty] || Jumps to the label if stack.top == 1 |
||
|- |
|||
| call || - || label || N, arg<sub>N</sub>, ..., arg<sub>1</sub> → [empty] || Calls a macro with the arguments on the stack |
|||
|- |
|||
| ret || - || N/A || [empty] → [empty] || Returns from a macro, PC will be handled automatically by the VM. |
|||
|- |
|||
| nop || - || N/A || [empty] → [empty] || No operation |
|||
|- |
|||
| get-case-from || - || N/A || pos, container → case || Get the case from 'container' in 'pos' |
|||
|- |
|||
| modify-case || - || N/A || newCase, container → [empty] || Modifies the case of the 'container' to 'newCase' |
|||
|- |
|||
| begins-with || - || N/A || value<sub>2</sub>, value<sub>1</sub> → result || Checks if 'value<sub>1</sub>' begins with 'value<sub>2</sub>' and pushes 1 (true) or 0 (false), 'value<sub>2</sub>' can be a list |
|||
|- |
|||
| begins-with-ig || - || N/A || value<sub>2</sub>, value<sub>1</sub> → result || Checks if 'value<sub>1</sub>' begins with 'value<sub>2</sub>' (ignoring the case) and pushes 1 (true) or 0 (false), 'value<sub>2</sub>' can be a list |
|||
|- |
|||
| ends-with || - || N/A || value<sub>2</sub>, value<sub>1</sub> → result || Checks if 'value<sub>1</sub>' ends with 'value<sub>2</sub>' and pushes 1 (true) or 0 (false), 'value<sub>2</sub>' can be a list |
|||
|- |
|||
| ends-with-ig || - || N/A || value<sub>2</sub>, value<sub>1</sub> → result || Checks if 'value<sub>1</sub>' ends with 'value<sub>2</sub>' (ignoring the case) and pushes 1 (true) or 0 (false), 'value<sub>2</sub>' can be a list |
|||
|} |
|} |
||
* Lists are represented as a concatenation of items separated by '|', e.g. uno|otro|poco|cuánto|menos|mucho|tanto|demasiado |
|||
* The case is represented as "aa" (all lowercase), "Aa" (first uppercase) and "AA", (all uppercase). |
|||
== Sample compilation of XML code fragments == |
== Sample compilation of XML code fragments == |
||
Revision as of 18:21, 16 June 2011
Contents
Instruction Set
| Mnemonic | Opcode (in hex) |
Other operands | Stack [before]→[after] (top, top-1, ...) |
Description |
|---|---|---|---|---|
| push | - | value | [empty] → value | Pushes a string or a variable value onto the stack. Strings go between quotes ("string") but variable's names not |
| pushbl | - | N/A | [empty] → blank | Pushes a blank onto the stack |
| pushsb | - | pos | [empty] → superblank | Pushes the superblank at 'pos' onto the stack |
| append | - | N | valueN, ..., value1 → [empty] | Pops 'N' elements and appends them to a variable or clip |
| concat | - | N | valueN, ..., value1 → valueN...value1 | Pops 'N' elements and pushes them back concatenated |
| clip | - | N/A | part → value | Obtains the part in the only language there is (inter/post-chunk) and pushes the value onto the stack |
| clipsl | - | N/A | part, pos → value | Obtains the 'part' in source language in position 'pos' and pushes the 'value' onto the stack |
| cliptl | - | N/A | part, pos → value | Obtains the 'part' in target language in position 'pos' and pushes the 'value' onto the stack |
| storecl | - | N/A | value, part → [empty] | Stores 'value' in the only language there is (inter/post-chunk) |
| storesl | - | N/A | value, part, pos → [empty] | Stores 'value' as the 'part' of the source language in position 'pos' |
| storetl | - | N/A | value, part, pos → [empty] | Stores 'value' as the 'part' of the target language in position 'pos' |
| storev | - | N/A | value, varName → [empty] | Stores 'value' in the variable with name 'varName' |
| addtrie | - | address | N, patternN, ..., pattern1 → [empty] | Pops 'N' patterns and creates a trie entry pointing to 'address' |
| lu | - | N | valueN, ..., value1 → ^(lexical_unit)$ | Pops 'N' values from the stack, creates a lexical unit ^...$ with them and pushes the lu back onto the stack |
| mlu | - | N | luN, ..., lu1 → multiword | Pops 'N' lu from the stack, creates a multiword with them and pushes the multiword back onto the stack |
| lu-count | - | N/A | [empty] → number | Pushes the number of lexical units (words inside the chunk) in the rule onto the stack |
| chunk | - | N | N, tagN-2, ... , tag1, ^...$, name → ^name<tag1>...<tagn-2>{^...$}$ | Pops 'N' amount of data from the stack, creates the chunk and pushes it back onto the stack |
| out | - | N | valueN, ..., value1 → [empty] | Pops 'N' values from the stack and outputs them |
| cmp | - | N/A | value2, value1 → result | Pops 'value1' and 'value2', compares them, if they are equal pushes a 1 (true), if they aren't pushes a 0 (false) |
| cmpi | - | N/A | value2, value1 → result | Pops 'value1' and 'value2', compares them (ignoring case for strings), if they are equal pushes a 1 (true), if they aren't pushes a 0 (false) |
| not | - | N | value → result | Negates the value on top of the stack, 0 -> 1 or 1 -> 0 |
| and | - | N | valueN, ..., value1 → result | And operation of 'N' values, result can be 1 (true) or 0 (false) |
| or | - | N | valueN, ..., value1 → result | Or operation of 'N' values, result can be 1 (true) or 0 (false) |
| in | - | N/A | list, value → result | Performs a search of a 'value' in a 'list' |
| inig | - | N/A | list, value → result | Performs a search (ignoring case) of a 'value' in a 'list' |
| jmp | - | label | [empty] → [empty] | Jumps to the label, unconditionally |
| jz | - | label | top → [empty] | Jumps to the label if stack.top == 0 |
| jnz | - | label | top → [empty] | Jumps to the label if stack.top == 1 |
| call | - | label | N, argN, ..., arg1 → [empty] | Calls a macro with the arguments on the stack |
| ret | - | N/A | [empty] → [empty] | Returns from a macro, PC will be handled automatically by the VM. |
| nop | - | N/A | [empty] → [empty] | No operation |
| get-case-from | - | N/A | pos, container → case | Get the case from 'container' in 'pos' |
| modify-case | - | N/A | newCase, container → [empty] | Modifies the case of the 'container' to 'newCase' |
| begins-with | - | N/A | value2, value1 → result | Checks if 'value1' begins with 'value2' and pushes 1 (true) or 0 (false), 'value2' can be a list |
| begins-with-ig | - | N/A | value2, value1 → result | Checks if 'value1' begins with 'value2' (ignoring the case) and pushes 1 (true) or 0 (false), 'value2' can be a list |
| ends-with | - | N/A | value2, value1 → result | Checks if 'value1' ends with 'value2' and pushes 1 (true) or 0 (false), 'value2' can be a list |
| ends-with-ig | - | N/A | value2, value1 → result | Checks if 'value1' ends with 'value2' (ignoring the case) and pushes 1 (true) or 0 (false), 'value2' can be a list |
- Lists are represented as a concatenation of items separated by '|', e.g. uno|otro|poco|cuánto|menos|mucho|tanto|demasiado
- The case is represented as "aa" (all lowercase), "Aa" (first uppercase) and "AA", (all uppercase).
Sample compilation of XML code fragments
Example 1
XML t1x Code: chunking
<out>
<chunk name="det_det_nom_adj" case="caseFirstWord">
<tags>
<tag><lit-tag v="SN"/></tag>
<tag></tag>
<tag></tag>
<tag></tag>
</tags>
<lu>
<clip pos="1" side="tl" part="lem"/>
<clip pos="1" side="tl" part="a_det"/>
<clip pos="1" side="tl" part="gen_sense_mf" link-to="3"/>
<clip pos="1" side="tl" part="gen_mf"/>
<clip pos="1" side="tl" part="nbr_sense_sp" link-to="4"/>
<clip pos="1" side="tl" part="nbr_sp"/>
</lu>
<lu>
<lit v="el"/>
<lit-tag v="det.def"/>
<clip pos="1" side="tl" part="gen_sense_mf" link-to="3"/>
<lit-tag v="pl"/>
</lu>
<lu>
<clip pos="3" side="tl" part="lemh"/>
<clip pos="3" side="tl" part="a_nom"/>
<clip pos="3" side="tl" part="gen_sense_mf" link-to="3"/>
<clip pos="3" side="tl" part="gen_mf"/>
<clip pos="3" side="tl" part="nbr_sense_sp" link-to="4"/>
<clip pos="3" side="tl" part="nbr_sp"/>
<clip pos="3" side="tl" part="lemq"/>
</lu>
<lu>
<clip pos="2" side="tl" part="lemh"/>
<clip pos="2" side="tl" part="a_adj"/>
<clip pos="2" side="tl" part="gen_sense_mf" link-to="3"/>
<clip pos="2" side="tl" part="gen_mf"/>
<clip pos="2" side="tl" part="nbr_sense_sp" link-to="4"/>
<clip pos="2" side="tl" part="nbr_sp" link-to="4"/>
<clip pos="2" side="tl" part="lemq"/>
</lu>
</chunk>
</out>
Compiled Code
push "det_det_nom_adj"
push "<SN>"
pusht tipus_det ; first evaluate the variable, append/prepend '<>', then push in the stack
pusht gen_chunk
pusht nbr_chunk
push 1
push "^\w+" ; lem
cliptl
push 1
push [regex] ; a_det
cliptl
push "<3>" ; since link-to overrides everything else, we do not need any dedicated instruction
; for that
push 1
push [regex] ; gen_mf
cliptl
push "<4>"
push 1
push [regex] ; nbr_sp
cliptl
lu 6 ; pop 6 items, concat, create lexical unit ^...$ and push back in stack
pushbl ; push a blank
push "el"
push "<det><def>"
push "<3>"
push "<pl>"
lu 4 ; pop 4 items from the stack, create a lexical unit ^...$ and then
; push in the stack
pushsb 1
push 3
push [regex] ; lemh
cliptl
push 3
push [regex] ; a_nom
cliptl
push "<3>"
push 3
push [regex] ; gen_mf
cliptl
push "<4>"
push 3
push [regex] ; nbr_sp
cliptl
push 3
push [regex] ; lemq
cliptl
lu 7
pushbl
pushsb 2
pushv adjectiu1 ; its a var, so eval and push the value
push 3
push [regex] ; lemh
cliptl
push 3
push [regex] ; a_adj
cliptl
push "<3>"
push 3
push [regex] ; gen_mf
cliptl
push "<4>"
push "<4>" ; a bit confused, there are two link-to in the XML
push 3
push [regex] ; lemq
cliptl
lu 7
brace 7 ; no of blank + lexical unit = 7
; pop 7 items, concat, prepend and append {, } then push back
chunk 6 ; create the chunk, ^...{^...$}$, and push back in stack
out 1 ; give output (number of chunks = 1)
Example 2
XML t1x Code
<section-def-cats>
<def-cat n="nom">
<cat-item tags="n.*"/>
</def-cat>
<def-cat n="det">
<cat-item tags="det.*"/>
<cat-item tags="predet.*"/>
</def-cat>
</section-def-cats>
<section-rules>
<rule>
<pattern>
<pattern-item n="det"/>
</pattern>
</rule>
<rule>
<pattern>
<pattern-item n="nom"/>
</pattern>
<action/>
</rule>
<rule>
<pattern>
<pattern-item n="det"/>
<pattern-item n="nom"/>
</pattern>
<action/>
</rule>
</section-rules>
Compiled Code
;first rule: def-cat has two equivalent cat-items
push "\w<det>\t" ;load pattern into stack
push 1
addtrie [address1] ;define a trie pattern with value 1 (the first rule)
push "\w<predet>\t" ;same with the second cat-item
push 1
addtrie [address1]
;second rule (and so on) very simple, unique cat-item
push "\w<n>\t"
push 1
addtrie [address2]
;third rule (here is the trick: multiple cat-items in one of the words)
push "\w<det>\t"
push "\w<n>\t"
push 2 ; we have 'det' followed by a 'nom', so addtrie has to pop two elements
addtrie [address3]
push "\w<predet>\t"
push "\w<n>\t"
push 2
addtrie [address3]
Example 3
XML t1x Code
<def-macro n="f_coma" npar="1">
<choose>
<when>
<test>
<equal caseless="yes">
<clip pos="1" side="sl" part="lem"/>
<lit v="como"/>
</equal>
</test>
<let>
<clip pos="1" side="tl" part="lem"/>
<get-case-from pos="1">
<lit v="com a"/>
</get-case-from>
</let>
</when>
</choose>
</def-macro>
Compiled code
f_coma: push 1 ; "pos" of "clip"
push "^\w+" ; "lem"
clipsl ; gets the value clips on the top of the stack.
; "sl" side is implied in the name of the instruction
push "como"
cmpi ; does the comparison and cleans the stack, it means caseless
jnz end ; if the comparison does not succeeds, go to end
; semantics: j = jump n = not z = zero flag is activated
; zero flag is activated when a comparison succeeds
; or an arithmetical operation gives 0
push 1 ; "pos" of "clip"
push "^\w+"
push "com a"
storetl ; store the value provided in the top of the stack
; given position 1, "tl" side and "lem"
end: ...
Example 4
XML t1x Code
<test>
<or>
<not>
<equal>
<clip pos="1" side="sl" part="gen"/>
<clip pos="3" side="sl" part="gen"/>
</equal>
</not>
<not>
<equal>
<clip pos="2" side="sl" part="gen"/>
<clip pos="3" side="sl" part="gen"/>
</equal>
</not>
</or>
</test>
Compiled code
start: push 1
push [regex] ; part="gen"
clipsl
push 3
push [regex] ; part="gen"
clipsl
cmp ; compare (case sensitive)
pushnz ; NOT zero flag and push in stack
push 2
push [regex] ; part="gen"
clipsl
push 3
push [regex] ; part="gen"
clipsl
cmp ; compare (case sensitive)
pushnz
or ; pop 2 items and OR, push result in stack
jnz end ; jump if zero flag is 0 (we did not get ZERO as the result)
... ... ...
(code for successful test)
... ... ...
end: ...
Example 5
XML t1x Code
<def-list n="verbos_est">
<list-item v="actuar"/>
<list-item v="buscar"/>
<list-item v="estudiar"/>
<list-item v="existir"/>
<list-item v="ingressar"/>
<list-item v="introduir"/>
<list-item v="penetrar"/>
<list-item v="publicar"/>
<list-item v="treballar"/>
<list-item v="viure"/>
</def-list>
<rule>
<pattern>
<pattern-item n="verb"/>
<pattern-item n="a"/>
</pattern>
<action>
<choose>
<when>
<test>
<in caseless="yes"/>
<clip pos="1" side="sl" part="lem"/>
<list n="verbos_est"/>
</in>
</test>
<let>
<clip pos="2" side="tl" part="lem"/>
<lit v="en"/>
</let>
</when>
</choose>
</rule>
Compiled code
push "actuar"
push "buscar"
push "estudiar"
push "existir"
push "ingressar"
push "introduir"
push "penetrar"
push "publicar"
push "treballar"
push "viure"
push 10 ; number of elements in the list
mklist verbos_est ; make a list variable named 'verbos_est' and put the last 10 data
; from the stack in the list
rule1: push [regex_verb]
push [regex_a]
push 2
addtrie rule1_action
... ... ...
... ... ...
rule1_action: push 1
push "^\w+" ; lem
clipsl ; we have lemmma in stack now
incini verbox_est ; if in verbos_est (ignore case), set ZF = 1, else ZF = 0
jnz rule1_end
push 2
push "^\w+"
push "en"
storetl
rule1_end: ...
Example 6
XML t1x Code
<def-macro n="firstWord" npar="1">
<choose>
<when>
<test>
<equal>
<clip pos="1" side="sl" part="a_np_acr"/>
<lit v=""/>
</equal>
</test>
<choose>
<when>
<test>
<equal>
<lit v="true"/>
</equal>
</test>
<modify-case>
<clip pos="1" side="tl" part="lem"/>
<lit v="aa"/>
</modify-case>
<let>
<lit v="Aa"/>
</let>
</when>
<otherwise>
<let>
<lit v="aa"/>
</let>
</otherwise>
</choose>
</when>
<otherwise>
<let>
<lit v="aa"/>
</let>
</otherwise>
</choose>
<let>
<lit v="false"/>
</let>
</def-macro>
<rule comment="REGLA: DET DET ADJ NOM (your many beautiful cats)">
... ...
<action>
<call-macro n="firstWord">
<with-param pos="1"/>
</call-macro>
<call-macro n="f_concord4">
<with-param pos="4"/>
<with-param pos="3"/>
<with-param pos="2"/>
<with-param pos="1"/>
</call-macro>
...
<out>
<chunk name="det_det_nom_adj" case="caseFirstWord">
... ...
</chunk>
</out>
</action>
</rule>
Compiled code
firstWord:
... ... ; normal translation of instructions, all the variables are assumed global
... ...
ret ; ret instruction does a number of things
; pops 'frame stack', current 'local variable frame' is reset with popped
; values (actually its more pointer assignment), C++ version will also
; do the necessary deallocations
; pops global stack, update PC with the popped value
... ...
... ...
rule_ddan_action: push 1 ; pos = 1
push 1 ; number of parameters 1
call firstWord ; macro label
; call statement does a number of things
; 1. temppc = PC + 1, set PC = firstWord
; 2. pushes the current 'local variable frame' into 'frame stack'
; 3. create a new 'local variable frame'
; 4. pops the arguments from the stack and places then in the 'local
; variable frame'
; 5. pushes temppc in global stack (it will be used by the return
; statement)
; 6. continue (instruction at firstWord will be evaluated next)
push 1 ; notice that the arguments are pushed in reverse order
; when popped, they will be in the right order
push 2
push 3
push 4
push 4
call f_concord4
... ...
Development Notes
- None of the macro and actions need to return anything (unlike conventional functions), so provision for returning a value (using stack) is unnecessary
- The local variable frame is actually a queue with a maximum length equal to the maximum pattern length in the trie.