Difference between revisions of "VM for transfer"
Jump to navigation
Jump to search
Line 88: | Line 88: | ||
* The case is represented as "aa" (all lowercase), "Aa" (first uppercase) and "AA", (all uppercase). |
* The case is represented as "aa" (all lowercase), "Aa" (first uppercase) and "AA", (all uppercase). |
||
+ | == Code generation == |
||
− | == Sample compilation of XML code fragments == |
||
− | |||
− | === Example 1 === |
||
− | ==== XML t1x Code: chunking ==== |
||
− | <code> |
||
− | <out> |
||
− | <chunk name="det_det_nom_adj" case="caseFirstWord"> |
||
− | <tags> |
||
− | <tag><lit-tag v="SN"/></tag> |
||
− | <tag><var n="tipus_det"/></tag> |
||
− | <tag><var n="gen_chunk"/></tag> |
||
− | <tag><var n="nbr_chunk"/></tag> |
||
− | </tags> |
||
− | <lu> |
||
− | <clip pos="1" side="tl" part="lem"/> |
||
− | <clip pos="1" side="tl" part="a_det"/> |
||
− | <clip pos="1" side="tl" part="gen_sense_mf" link-to="3"/> |
||
− | <clip pos="1" side="tl" part="gen_mf"/> |
||
− | <clip pos="1" side="tl" part="nbr_sense_sp" link-to="4"/> |
||
− | <clip pos="1" side="tl" part="nbr_sp"/> |
||
− | </lu> |
||
− | <b/> |
||
− | <lu> |
||
− | <lit v="el"/> |
||
− | <lit-tag v="det.def"/> |
||
− | <clip pos="1" side="tl" part="gen_sense_mf" link-to="3"/> |
||
− | <lit-tag v="pl"/> |
||
− | </lu> |
||
− | <b pos="1"/> |
||
− | <lu> |
||
− | <clip pos="3" side="tl" part="lemh"/> |
||
− | <clip pos="3" side="tl" part="a_nom"/> |
||
− | <clip pos="3" side="tl" part="gen_sense_mf" link-to="3"/> |
||
− | <clip pos="3" side="tl" part="gen_mf"/> |
||
− | <clip pos="3" side="tl" part="nbr_sense_sp" link-to="4"/> |
||
− | <clip pos="3" side="tl" part="nbr_sp"/> |
||
− | <clip pos="3" side="tl" part="lemq"/> |
||
− | </lu> |
||
− | <b/> |
||
− | <b pos="2"/> |
||
− | <lu> |
||
− | <var n="adjectiu1"/> |
||
− | <clip pos="2" side="tl" part="lemh"/> |
||
− | <clip pos="2" side="tl" part="a_adj"/> |
||
− | <clip pos="2" side="tl" part="gen_sense_mf" link-to="3"/> |
||
− | <clip pos="2" side="tl" part="gen_mf"/> |
||
− | <clip pos="2" side="tl" part="nbr_sense_sp" link-to="4"/> |
||
− | <clip pos="2" side="tl" part="nbr_sp" link-to="4"/> |
||
− | <clip pos="2" side="tl" part="lemq"/> |
||
− | </lu> |
||
− | </chunk> |
||
− | </out> |
||
− | </code> |
||
− | |||
− | ==== Compiled Code ==== |
||
− | |||
− | <code> |
||
− | push "det_det_nom_adj" |
||
− | push "<SN>" |
||
− | pusht tipus_det ; first evaluate the variable, append/prepend '<>', then push in the stack |
||
− | pusht gen_chunk |
||
− | pusht nbr_chunk |
||
− | |||
− | push 1 |
||
− | push "^\w+" ; lem |
||
− | cliptl |
||
− | push 1 |
||
− | push [regex] ; a_det |
||
− | cliptl |
||
− | push "<3>" ; since link-to overrides everything else, we do not need any dedicated instruction |
||
− | ; for that |
||
− | push 1 |
||
− | push [regex] ; gen_mf |
||
− | cliptl |
||
− | push "<4>" |
||
− | push 1 |
||
− | push [regex] ; nbr_sp |
||
− | cliptl |
||
− | lu 6 ; pop 6 items, concat, create lexical unit ^...$ and push back in stack |
||
− | |||
− | pushbl ; push a blank |
||
− | |||
− | push "el" |
||
− | push "<det><def>" |
||
− | push "<3>" |
||
− | push "<pl>" |
||
− | lu 4 ; pop 4 items from the stack, create a lexical unit ^...$ and then |
||
− | ; push in the stack |
||
− | |||
− | pushsb 1 |
||
− | |||
− | push 3 |
||
− | push [regex] ; lemh |
||
− | cliptl |
||
− | push 3 |
||
− | push [regex] ; a_nom |
||
− | cliptl |
||
− | push "<3>" |
||
− | push 3 |
||
− | push [regex] ; gen_mf |
||
− | cliptl |
||
− | push "<4>" |
||
− | push 3 |
||
− | push [regex] ; nbr_sp |
||
− | cliptl |
||
− | push 3 |
||
− | push [regex] ; lemq |
||
− | cliptl |
||
− | lu 7 |
||
− | |||
− | pushbl |
||
− | pushsb 2 |
||
− | |||
− | pushv adjectiu1 ; its a var, so eval and push the value |
||
− | push 3 |
||
− | push [regex] ; lemh |
||
− | cliptl |
||
− | push 3 |
||
− | push [regex] ; a_adj |
||
− | cliptl |
||
− | push "<3>" |
||
− | push 3 |
||
− | push [regex] ; gen_mf |
||
− | cliptl |
||
− | push "<4>" |
||
− | push "<4>" ; a bit confused, there are two link-to in the XML |
||
− | push 3 |
||
− | push [regex] ; lemq |
||
− | cliptl |
||
− | lu 7 |
||
− | |||
− | brace 7 ; no of blank + lexical unit = 7 |
||
− | ; pop 7 items, concat, prepend and append {, } then push back |
||
− | |||
− | chunk 6 ; create the chunk, ^...{^...$}$, and push back in stack |
||
− | |||
− | out 1 ; give output (number of chunks = 1) |
||
− | </code> |
||
− | |||
− | === Example 2 === |
||
− | |||
− | ==== XML t1x Code ==== |
||
− | <code> |
||
− | <section-def-cats> |
||
− | <def-cat n="nom"> |
||
− | <cat-item tags="n.*"/> |
||
− | </def-cat> |
||
− | |||
− | <def-cat n="det"> |
||
− | <cat-item tags="det.*"/> |
||
− | <cat-item tags="predet.*"/> |
||
− | </def-cat> |
||
− | </section-def-cats> |
||
− | |||
− | <section-rules> |
||
− | <rule> |
||
− | <pattern> |
||
− | <pattern-item n="det"/> |
||
− | </pattern> |
||
− | </rule> |
||
− | <rule> |
||
− | <pattern> |
||
− | <pattern-item n="nom"/> |
||
− | </pattern> |
||
− | <action/> |
||
− | </rule> |
||
− | <rule> |
||
− | <pattern> |
||
− | <pattern-item n="det"/> |
||
− | <pattern-item n="nom"/> |
||
− | </pattern> |
||
− | <action/> |
||
− | </rule> |
||
− | </section-rules> |
||
− | </code> |
||
− | |||
− | ==== Compiled Code ==== |
||
− | |||
− | <code> |
||
− | ;first rule: def-cat has two equivalent cat-items |
||
− | push "\w<det>\t" ;load pattern into stack |
||
− | push 1 |
||
− | addtrie [address1] ;define a trie pattern with value 1 (the first rule) |
||
− | |||
− | push "\w<predet>\t" ;same with the second cat-item |
||
− | push 1 |
||
− | addtrie [address1] |
||
− | ;second rule (and so on) very simple, unique cat-item |
||
− | push "\w<n>\t" |
||
− | push 1 |
||
− | addtrie [address2] |
||
− | ;third rule (here is the trick: multiple cat-items in one of the words) |
||
− | push "\w<det>\t" |
||
− | push "\w<n>\t" |
||
− | push 2 ; we have 'det' followed by a 'nom', so addtrie has to pop two elements |
||
− | addtrie [address3] |
||
− | |||
− | push "\w<predet>\t" |
||
− | push "\w<n>\t" |
||
− | push 2 |
||
− | addtrie [address3] |
||
− | </code> |
||
− | |||
− | === Example 3 === |
||
− | ==== XML t1x Code ==== |
||
− | |||
− | <code> |
||
− | <def-macro n="f_coma" npar="1"> |
||
− | <choose> |
||
− | <when> |
||
− | <test> |
||
− | <equal caseless="yes"> |
||
− | <clip pos="1" side="sl" part="lem"/> |
||
− | <lit v="como"/> |
||
− | </equal> |
||
− | </test> |
||
− | <let> |
||
− | <clip pos="1" side="tl" part="lem"/> |
||
− | <get-case-from pos="1"> |
||
− | <lit v="com a"/> |
||
− | </get-case-from> |
||
− | </let> |
||
− | </when> |
||
− | </choose> |
||
− | </def-macro> |
||
− | </code> |
||
− | |||
− | ==== Compiled code ==== |
||
− | |||
− | <code> |
||
− | f_coma: push 1 ; "pos" of "clip" |
||
− | push "^\w+" ; "lem" |
||
− | clipsl ; gets the value clips on the top of the stack. |
||
− | ; "sl" side is implied in the name of the instruction |
||
− | push "como" |
||
− | cmpi ; does the comparison and cleans the stack, it means caseless |
||
− | jnz end ; if the comparison does not succeeds, go to end |
||
− | ; semantics: j = jump n = not z = zero flag is activated |
||
− | ; zero flag is activated when a comparison succeeds |
||
− | ; or an arithmetical operation gives 0 |
||
− | push 1 ; "pos" of "clip" |
||
− | push "^\w+" |
||
− | push "com a" |
||
− | storetl ; store the value provided in the top of the stack |
||
− | ; given position 1, "tl" side and "lem" |
||
− | |||
− | end: ... |
||
− | </code> |
||
− | |||
− | === Example 4 === |
||
− | ==== XML t1x Code ==== |
||
− | <code> |
||
− | <test> |
||
− | <or> |
||
− | <not> |
||
− | <equal> |
||
− | <clip pos="1" side="sl" part="gen"/> |
||
− | <clip pos="3" side="sl" part="gen"/> |
||
− | </equal> |
||
− | </not> |
||
− | <not> |
||
− | <equal> |
||
− | <clip pos="2" side="sl" part="gen"/> |
||
− | <clip pos="3" side="sl" part="gen"/> |
||
− | </equal> |
||
− | </not> |
||
− | </or> |
||
− | </test> |
||
− | </code> |
||
− | ==== Compiled code ==== |
||
− | |||
− | <code> |
||
− | start: push 1 |
||
− | push [regex] ; part="gen" |
||
− | clipsl |
||
− | push 3 |
||
− | push [regex] ; part="gen" |
||
− | clipsl |
||
− | cmp ; compare (case sensitive) |
||
− | pushnz ; NOT zero flag and push in stack |
||
− | |||
− | push 2 |
||
− | push [regex] ; part="gen" |
||
− | clipsl |
||
− | push 3 |
||
− | push [regex] ; part="gen" |
||
− | clipsl |
||
− | cmp ; compare (case sensitive) |
||
− | pushnz |
||
− | |||
− | or ; pop 2 items and OR, push result in stack |
||
− | jnz end ; jump if zero flag is 0 (we did not get ZERO as the result) |
||
− | |||
− | ... ... ... |
||
− | (code for successful test) |
||
− | ... ... ... |
||
− | end: ... |
||
− | </code> |
||
− | |||
− | === Example 5 === |
||
− | ==== XML t1x Code ==== |
||
− | <def-list n="verbos_est"> |
||
− | <list-item v="actuar"/> |
||
− | <list-item v="buscar"/> |
||
− | <list-item v="estudiar"/> |
||
− | <list-item v="existir"/> |
||
− | <list-item v="ingressar"/> |
||
− | <list-item v="introduir"/> |
||
− | <list-item v="penetrar"/> |
||
− | <list-item v="publicar"/> |
||
− | <list-item v="treballar"/> |
||
− | <list-item v="viure"/> |
||
− | </def-list> |
||
− | |||
− | <rule> |
||
− | <pattern> |
||
− | <pattern-item n="verb"/> |
||
− | <pattern-item n="a"/> |
||
− | </pattern> |
||
− | <action> |
||
− | <choose> |
||
− | <when> |
||
− | <test> |
||
− | <in caseless="yes"/> |
||
− | <clip pos="1" side="sl" part="lem"/> |
||
− | <list n="verbos_est"/> |
||
− | </in> |
||
− | </test> |
||
− | <let> |
||
− | <clip pos="2" side="tl" part="lem"/> |
||
− | <lit v="en"/> |
||
− | </let> |
||
− | </when> |
||
− | </choose> |
||
− | </rule> |
||
− | |||
− | ==== Compiled code ==== |
||
− | push "actuar" |
||
− | push "buscar" |
||
− | push "estudiar" |
||
− | push "existir" |
||
− | push "ingressar" |
||
− | push "introduir" |
||
− | push "penetrar" |
||
− | push "publicar" |
||
− | push "treballar" |
||
− | push "viure" |
||
− | push 10 ; number of elements in the list |
||
− | mklist verbos_est ; make a list variable named 'verbos_est' and put the last 10 data |
||
− | ; from the stack in the list |
||
− | |||
− | rule1: push [regex_verb] |
||
− | push [regex_a] |
||
− | push 2 |
||
− | addtrie rule1_action |
||
− | ... ... ... |
||
− | ... ... ... |
||
− | |||
− | rule1_action: push 1 |
||
− | push "^\w+" ; lem |
||
− | clipsl ; we have lemmma in stack now |
||
− | incini verbox_est ; if in verbos_est (ignore case), set ZF = 1, else ZF = 0 |
||
− | jnz rule1_end |
||
− | |||
− | push 2 |
||
− | push "^\w+" |
||
− | push "en" |
||
− | storetl |
||
− | rule1_end: ... |
||
− | |||
− | === Example 6 === |
||
− | ==== XML t1x Code ==== |
||
− | <def-macro n="firstWord" npar="1"> |
||
− | <choose> |
||
− | <when> |
||
− | <test> |
||
− | <equal> |
||
− | <clip pos="1" side="sl" part="a_np_acr"/> |
||
− | <lit v=""/> |
||
− | </equal> |
||
− | </test> |
||
− | <choose> |
||
− | <when> |
||
− | <test> |
||
− | <equal> |
||
− | <var n="EOS"/> |
||
− | <lit v="true"/> |
||
− | </equal> |
||
− | </test> |
||
− | <modify-case> |
||
− | <clip pos="1" side="tl" part="lem"/> |
||
− | <lit v="aa"/> |
||
− | </modify-case> |
||
− | <let> |
||
− | <var n="caseFirstWord"/> |
||
− | <lit v="Aa"/> |
||
− | </let> |
||
− | </when> |
||
− | <otherwise> |
||
− | <let> |
||
− | <var n="caseFirstWord"/> |
||
− | <lit v="aa"/> |
||
− | </let> |
||
− | </otherwise> |
||
− | </choose> |
||
− | </when> |
||
− | <otherwise> |
||
− | <let> |
||
− | <var n="caseFirstWord"/> |
||
− | <lit v="aa"/> |
||
− | </let> |
||
− | </otherwise> |
||
− | </choose> |
||
− | <let> |
||
− | <var n="EOS"/> |
||
− | <lit v="false"/> |
||
− | </let> |
||
− | </def-macro> |
||
− | |||
− | |||
− | <rule comment="REGLA: DET DET ADJ NOM (your many beautiful cats)"> |
||
− | ... ... |
||
− | <action> |
||
− | <call-macro n="firstWord"> |
||
− | <with-param pos="1"/> |
||
− | </call-macro> |
||
− | <call-macro n="f_concord4"> |
||
− | <with-param pos="4"/> |
||
− | <with-param pos="3"/> |
||
− | <with-param pos="2"/> |
||
− | <with-param pos="1"/> |
||
− | </call-macro> |
||
− | ... |
||
− | <out> |
||
− | <chunk name="det_det_nom_adj" case="caseFirstWord"> |
||
− | ... ... |
||
− | </chunk> |
||
− | </out> |
||
− | </action> |
||
− | </rule> |
||
− | |||
− | ==== Compiled code ==== |
||
− | |||
− | firstWord: |
||
− | ... ... ; normal translation of instructions, all the variables are assumed global |
||
− | ... ... |
||
− | ret ; ret instruction does a number of things |
||
− | ; pops 'frame stack', current 'local variable frame' is reset with popped |
||
− | ; values (actually its more pointer assignment), C++ version will also |
||
− | ; do the necessary deallocations |
||
− | ; pops global stack, update PC with the popped value |
||
− | ... ... |
||
− | ... ... |
||
− | rule_ddan_action: push 1 ; pos = 1 |
||
− | push 1 ; number of parameters 1 |
||
− | call firstWord ; macro label |
||
− | ; call statement does a number of things |
||
− | ; 1. temppc = PC + 1, set PC = firstWord |
||
− | ; 2. pushes the current 'local variable frame' into 'frame stack' |
||
− | ; 3. create a new 'local variable frame' |
||
− | ; 4. pops the arguments from the stack and places then in the 'local |
||
− | ; variable frame' |
||
− | ; 5. pushes temppc in global stack (it will be used by the return |
||
− | ; statement) |
||
− | |||
− | ; 6. continue (instruction at firstWord will be evaluated next) |
||
− | push 1 ; notice that the arguments are pushed in reverse order |
||
− | ; when popped, they will be in the right order |
||
− | push 2 |
||
− | push 3 |
||
− | push 4 |
||
− | push 4 |
||
− | call f_concord4 |
||
− | ... ... |
||
− | |||
− | == Development Notes == |
||
− | |||
− | * None of the macro and actions need to return anything (unlike conventional functions), so provision for returning a value (using stack) is unnecessary |
||
− | |||
− | * The local variable frame is actually a queue with a maximum length equal to the maximum pattern length in the trie. |
Revision as of 08:38, 12 July 2011
Instruction Set
Mnemonic | Opcode (in hex) |
Other operands | Stack [before]→[after] (top, top-1, ...) |
Description |
---|---|---|---|---|
push | - | value | [empty] → value | Pushes a string or a variable value onto the stack. Strings go between quotes ("string") but variable's names not |
pushbl | - | N/A | [empty] → blank | Pushes a blank onto the stack |
pushsb | - | pos | [empty] → superblank | Pushes the superblank at 'pos' onto the stack |
append | - | N | valueN, ..., value1, varName → [empty] | Pops 'N' elements and appends them to a variable or clip |
concat | - | N | valueN, ..., value1 → value1...valueN | Pops 'N' elements and pushes them back concatenated |
clip | - | N/A | part → value | Obtains the part in the only language there is (inter/post-chunk) and pushes the value onto the stack |
clipsl | - | N/A | part, pos → value | Obtains the 'part' in source language in position 'pos' and pushes the 'value' onto the stack |
cliptl | - | N/A | part, pos → value | Obtains the 'part' in target language in position 'pos' and pushes the 'value' onto the stack |
storecl | - | N/A | value, part → [empty] | Stores 'value' in the only language there is (inter/post-chunk) |
storesl | - | N/A | value, part, pos → [empty] | Stores 'value' as the 'part' of the source language in position 'pos' |
storetl | - | N/A | value, part, pos → [empty] | Stores 'value' as the 'part' of the target language in position 'pos' |
storev | - | N/A | value, varName → [empty] | Stores 'value' in the variable with name 'varName' |
addtrie | - | address | N, patternN, ..., pattern1 → [empty] | Pops 'N' patterns and creates a trie entry pointing to 'address' |
lu | - | N | valueN, ..., value1 → ^(lexical_unit)$ | Pops 'N' values from the stack, creates a lexical unit ^...$ with them and pushes the lu back onto the stack |
mlu | - | N | luN, ..., lu1 → multiword | Pops 'N' lu from the stack, creates a multiword with them and pushes the multiword back onto the stack |
lu-count | - | N/A | [empty] → number | Pushes the number of lexical units (words inside the chunk) in the rule onto the stack |
chunk | - | N | elemN-2, ... , elem1, <tags>, name → ^name<tags>{elem1...elemN-2}$ | Pops 'N' amount of data from the stack, creates the chunk and pushes it back onto the stack |
out | - | N | valueN, ..., value1 → [empty] | Pops 'N' values from the stack and outputs them |
cmp | - | N/A | value2, value1 → result | Pops 'value1' and 'value2', compares them, if they are equal pushes a 1 (true), if they aren't pushes a 0 (false) |
cmpi | - | N/A | value2, value1 → result | Pops 'value1' and 'value2', compares them (ignoring case for each string), if they are equal pushes a 1 (true), if they aren't pushes a 0 (false) |
cmp-substr | - | N/A | value2, value1 → result | Tests if 'value1' contains the substring 'value2', result can be 1 (true) or 0 (false). |
cmpi-substr | - | N/A | value2, value1 → result | Tests if 'value1' contains the substring 'value2' (ignoring case for each string), result can be 1 (true) or 0 (false). |
not | - | N | value → result | Negates the value on top of the stack, 0 -> 1 or 1 -> 0 |
and | - | N | valueN, ..., value1 → result | And operation of 'N' values, result can be 1 (true) or 0 (false) |
or | - | N | valueN, ..., value1 → result | Or operation of 'N' values, result can be 1 (true) or 0 (false) |
in | - | N/A | list, value → result | Performs a search of a 'value' in a 'list' |
inig | - | N/A | list, value → result | Performs a search (ignoring case) of a 'value' in a 'list' |
jmp | - | label | [empty] → [empty] | Jumps to the label, unconditionally |
jz | - | label | top → [empty] | Jumps to the label if stack.top == 0 |
jnz | - | label | top → [empty] | Jumps to the label if stack.top == 1 |
call | - | label | N, argN, ..., arg1 → [empty] | Calls a macro with the arguments on the stack |
ret | - | N/A | [empty] → [empty] | Returns from a macro, PC will be handled automatically by the VM. |
nop | - | N/A | [empty] → [empty] | No operation |
case-of | - | N/A | container → case | Gets the case from the container in the stack. The container would usually be the result of a clip instruction but can be any string. |
get-case-from | - | N/A | pos → case | Gets the case from the lexical unit in position 'pos' |
modify-case | - | N/A | case, container → modifiedContainer | Modifies the case of the 'container' to 'case' and leaves the modified container on the stack |
begins-with | - | N/A | value2, value1 → result | Checks if 'value1' begins with 'value2' and pushes 1 (true) or 0 (false), 'value2' can be a list |
begins-with-ig | - | N/A | value2, value1 → result | Checks if 'value1' begins with 'value2' (ignoring the case) and pushes 1 (true) or 0 (false), 'value2' can be a list |
ends-with | - | N/A | value2, value1 → result | Checks if 'value1' ends with 'value2' and pushes 1 (true) or 0 (false), 'value2' can be a list |
ends-with-ig | - | N/A | value2, value1 → result | Checks if 'value1' ends with 'value2' (ignoring the case) and pushes 1 (true) or 0 (false), 'value2' can be a list |
- Lists are represented as a concatenation of items separated by '|', e.g. uno|otro|poco|cuánto|menos|mucho|tanto|demasiado
- The case is represented as "aa" (all lowercase), "Aa" (first uppercase) and "AA", (all uppercase).