Difference between revisions of "Lttoolbox API examples"
Jump to navigation
Jump to search
(that version only works for 'beer') |
|||
(12 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
[[Exemple d'application Lttoolbox]] |
|||
{{TOCD}} |
|||
==Basic example== |
|||
This is a simple C++/lttoolbox version of the 'beer' program in [[Morphological dictionaries]] |
This is a simple C++/lttoolbox version of the 'beer' program in [[Morphological dictionaries]] |
||
Line 22: | Line 27: | ||
// Alphabet class |
// Alphabet class |
||
#include <lttoolbox/alphabet.h> |
#include <lttoolbox/alphabet.h> |
||
// Pool class |
|||
#include <lttoolbox/pool.h> |
|||
// State class |
// State class |
||
Line 80: | Line 82: | ||
//Rather than try to convert between Transducer and TransExe, we'll |
//Rather than try to convert between Transducer and TransExe, we'll |
||
//just write and read. |
//just write and read. |
||
FILE* fst=fopen("beer.fst", "w"); |
FILE* fst=fopen("beer.fst", "w"); |
||
Compression::wstring_write(L"ber", fst); //write letters |
|||
t.write(fst); |
|||
alphabet.write(fst); // write multichars |
|||
Compression::multibyte_write(1, fst); // write num. fsts |
|||
Compression::wstring_write(L"main", fst); // write name of first fst |
|||
t.write(fst); // write fst |
|||
fclose(fst); |
fclose(fst); |
||
fst=fopen("beer.fst", "r"); |
fst=fopen("beer.fst", "r"); |
||
Line 89: | Line 95: | ||
fclose(fst); |
fclose(fst); |
||
State *initial_state = new State(); |
|||
Pool<vector<int> > *pool = new Pool<vector<int> >(1, vector<int>(50)); |
|||
State *initial_state = new State(pool); |
|||
initial_state->init(te.getInitial()); |
initial_state->init(te.getInitial()); |
||
State current_state = *initial_state; |
State current_state = *initial_state; |
||
Line 126: | Line 131: | ||
} |
} |
||
// Not used, just don't want it to be empty... |
|||
if (current_state.isFinal(anfinals)) |
if (current_state.isFinal(anfinals)) |
||
{ |
{ |
||
// Not used, just don't want it to be empty... |
|||
set<wchar_t> escaped; |
set<wchar_t> escaped; |
||
escaped.insert(L'$'); |
escaped.insert(L'$'); |
||
Line 143: | Line 148: | ||
} |
} |
||
</pre> |
</pre> |
||
[[File:Ltbeer.png|center]] |
|||
we can simplify the building of the transducers like this: |
|||
<pre> |
|||
// build "beer" manually |
|||
int beer = initial; |
|||
// these are the transitions b:b e:e e:e r:r |
|||
beer = t.insertSingleTransduction(alphabet(L'b',L'b'), beer); |
|||
beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer); |
|||
beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer); |
|||
beer = t.insertSingleTransduction(alphabet(L'r',L'r'), beer); |
|||
int beersg = beer; |
|||
// these are 0:<n> 0:<sg> |
|||
beersg = t.insertSingleTransduction(alphabet(0, n_sym), beersg); |
|||
beersg = t.insertSingleTransduction(alphabet(0, sg_sym), beersg); |
|||
t.setFinal(beersg); |
|||
// build "beers" manually |
|||
int beerpl = beer; |
|||
beerpl = t.insertSingleTransduction(alphabet(L's', 0), beerpl); |
|||
beerpl = t.insertSingleTransduction(alphabet(0, n_sym), beerpl); |
|||
beerpl = t.insertSingleTransduction(alphabet(0, pl_sym), beerpl); |
|||
t.setFinal(beerpl); |
|||
</pre> |
|||
==Writing/reading several transducers to the same file== |
|||
<pre> |
|||
// g++ -o test test.cc -I/home/fran/local/include/lttoolbox-3.2 -L/home/fran/local/lib -llttoolbox3 |
|||
#include <cwchar> |
|||
#include <cstdio> |
|||
#include <iostream> |
|||
#include <set> |
|||
#include <lttoolbox/ltstr.h> |
|||
#include <lttoolbox/lt_locale.h> |
|||
#include <lttoolbox/transducer.h> |
|||
#include <lttoolbox/alphabet.h> |
|||
#include <lttoolbox/regexp_compiler.h> |
|||
#include <lttoolbox/compression.h> |
|||
int main (int argc, char** argv) |
|||
{ |
|||
map<int, Transducer> patterns; |
|||
Alphabet a; |
|||
RegexpCompiler re; |
|||
LtLocale::tryToSetLocale(); |
|||
FILE *output = stdout; |
|||
FILE *fst = fopen(argv[1], "w+"); |
|||
// Build transducers |
|||
a.includeSymbol(L"<n>"); |
|||
re.initialize(&a); |
|||
re.compile(L"foo"); |
|||
patterns[1] = re.getTransducer(); |
|||
re.initialize(&a); |
|||
re.compile(L"bar"); |
|||
patterns[2] = re.getTransducer(); |
|||
re.initialize(&a); |
|||
re.compile(L"baz"); |
|||
patterns[3] = re.getTransducer(); |
|||
// Write out the transducers |
|||
a.write(fst); |
|||
Compression::multibyte_write(patterns.size(), fst); |
|||
fwprintf(output, L"Patterns: %d, Alphabet: %d\n", patterns.size(), a.size()); |
|||
for(map<int, Transducer>::iterator it = patterns.begin(); it != patterns.end(); it++) |
|||
{ |
|||
wchar_t buf[50]; |
|||
memset(buf, '\0', sizeof(buf)); |
|||
swprintf(buf, 50, L"%d", it->first); |
|||
wstring id(buf); |
|||
fwprintf(output, L"= %S =============================\n", id.c_str()); |
|||
it->second.show(a, output); |
|||
Compression::wstring_write(id, fst); |
|||
it->second.write(fst); |
|||
} |
|||
fclose(fst); |
|||
fwprintf(output, L"\n\n"); |
|||
// Now read in the transducers that we have written out, one by one. |
|||
FILE *new_fst = fopen(argv[1], "r"); |
|||
Alphabet new_alphabet; |
|||
map<wstring, Transducer> transducers; |
|||
new_alphabet.read(new_fst); |
|||
int len = Compression::multibyte_read(new_fst); |
|||
while(len > 0) |
|||
{ |
|||
int len2 = Compression::multibyte_read(new_fst); |
|||
wstring name = L""; |
|||
while(len2 > 0) |
|||
{ |
|||
name += static_cast<wchar_t>(Compression::multibyte_read(new_fst)); |
|||
len2--; |
|||
} |
|||
transducers[name].read(new_fst); |
|||
len--; |
|||
} |
|||
fwprintf(output, L"Patterns: %d, Alphabet: %d\n", transducers.size(), new_alphabet.size()); |
|||
for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++) |
|||
{ |
|||
fwprintf(output, L"= %S =============================\n", it->first.c_str()); |
|||
it->second.minimize(); |
|||
it->second.show(a, output); |
|||
} |
|||
fclose(new_fst); |
|||
return 0; |
|||
} |
|||
</pre> |
|||
==Using regular expressions== |
|||
<pre> |
|||
/* |
|||
* g++ -o lt-regexp lt-regexp.cc -I/usr/include/libxml2 -I/home/fran/local/include/lttoolbox-3.2 -L/home/fran/local/lib -llttoolbox3 -llibxml2 |
|||
*/ |
|||
#include <cwchar> |
|||
#include <cstdio> |
|||
#include <cerrno> |
|||
#include <string> |
|||
#include <iostream> |
|||
#include <list> |
|||
#include <set> |
|||
#include <lttoolbox/ltstr.h> |
|||
#include <lttoolbox/lt_locale.h> |
|||
#include <lttoolbox/transducer.h> |
|||
#include <lttoolbox/alphabet.h> |
|||
#include <lttoolbox/state.h> |
|||
#include <lttoolbox/regexp_compiler.h> |
|||
#include <lttoolbox/match_exe.h> |
|||
#include <lttoolbox/match_state.h> |
|||
#include <lttoolbox/xml_parse_util.h> |
|||
wstring ws(char *arg) |
|||
{ |
|||
wchar_t buf[1024]; |
|||
memset(buf, '\0', 1024); |
|||
size_t num_chars = mbstowcs(buf, arg, strlen(arg)); |
|||
wstring ws(buf, num_chars); |
|||
return ws; |
|||
} |
|||
bool match(Transducer t, wstring str, Alphabet a) |
|||
{ |
|||
map<int, int> finals; |
|||
for(int i = 0; i < t.size(); i++) |
|||
{ |
|||
if(!t.isFinal(i)) |
|||
{ |
|||
continue; |
|||
} |
|||
finals[i] = i; |
|||
} |
|||
MatchExe me(t, finals); |
|||
MatchState ms; |
|||
ms.clear(); |
|||
ms.init(me.getInitial()); |
|||
for(wstring::iterator it = str.begin(); it != str.end(); it++) |
|||
{ |
|||
wcout << ms.size() << " " << *it << endl; |
|||
ms.step(a(*it, *it)); |
|||
} |
|||
int val = ms.classifyFinals(me.getFinals()); |
|||
fwprintf(stdout, L"%d\n", val); |
|||
if(val != -1) |
|||
{ |
|||
return true; |
|||
} |
|||
return false; |
|||
} |
|||
int main (int argc, char** argv) |
|||
{ |
|||
Alphabet alphabet; |
|||
Transducer t; |
|||
RegexpCompiler re; |
|||
bool matched; |
|||
LtLocale::tryToSetLocale(); |
|||
if(argc < 3) |
|||
{ |
|||
wcout << L"Usage: lt-regexp <pattern> <string to match>" << endl; |
|||
exit(-1); |
|||
} |
|||
FILE *output = stdout; |
|||
wstring pattern = ws(argv[1]); |
|||
wstring s = ws(argv[2]); |
|||
re.initialize(&alphabet); |
|||
re.compile(pattern); |
|||
t = re.getTransducer(); |
|||
t.minimize(); |
|||
t.show(alphabet, output); |
|||
matched = match(t, s, alphabet); |
|||
wcout << endl << pattern << " " << s << endl; |
|||
} |
|||
</pre> |
|||
[[Category:Lttoolbox]] |
|||
[[Category:Documentation in English]] |
Latest revision as of 07:59, 5 July 2017
Exemple d'application Lttoolbox
Basic example[edit]
This is a simple C++/lttoolbox version of the 'beer' program in Morphological dictionaries
// g++ -I/usr/local/include/lttoolbox-3.2 -I/usr/local/lib -llttoolbox3 lt_beer.cc -o lt-beer #include <cwchar> #include <cstdio> #include <cerrno> #include <string> #include <iostream> #include <list> #include <set> #include <lttoolbox/ltstr.h> // LtLocale::tryToSetLocale() #include <lttoolbox/lt_locale.h> // Transducer class #include <lttoolbox/transducer.h> // Alphabet class #include <lttoolbox/alphabet.h> // State class #include <lttoolbox/state.h> // TransExe class #include <lttoolbox/trans_exe.h> int main (int argc, char** argv) { Alphabet alphabet; Transducer t; // Set locale LtLocale::tryToSetLocale(); // Include symbols into alphabet, keeping the values alphabet.includeSymbol(L"<n>"); alphabet.includeSymbol(L"<sg>"); alphabet.includeSymbol(L"<pl>"); int n_sym = alphabet(L"<n>"); int sg_sym = alphabet(L"<sg>"); int pl_sym = alphabet(L"<pl>"); // Initial state int initial = t.getInitial(); // build "beer" manually int beer = initial; // these are the transitions b:b e:e e:e r:r beer = t.insertSingleTransduction(alphabet(L'b',L'b'), beer); beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer); beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer); beer = t.insertSingleTransduction(alphabet(L'r',L'r'), beer); // these are 0:<n> 0:<sg> beer = t.insertSingleTransduction(alphabet(0, n_sym), beer); beer = t.insertSingleTransduction(alphabet(0, sg_sym), beer); t.setFinal(beer); // build "beers" manually int beers = t.getInitial(); beers = t.insertSingleTransduction(alphabet(L'b',L'b'), beers); beers = t.insertSingleTransduction(alphabet(L'e',L'e'), beers); beers = t.insertSingleTransduction(alphabet(L'e',L'e'), beers); beers = t.insertSingleTransduction(alphabet(L'r',L'r'), beers); // this transition is s:0 beers = t.insertSingleTransduction(alphabet(L's', 0), beers); beers = t.insertSingleTransduction(alphabet(0, n_sym), beers); beers = t.insertSingleTransduction(alphabet(0, pl_sym), beers); t.setFinal(beers); t.minimize(); //Rather than try to convert between Transducer and TransExe, we'll //just write and read. FILE* fst=fopen("beer.fst", "w"); Compression::wstring_write(L"ber", fst); //write letters alphabet.write(fst); // write multichars Compression::multibyte_write(1, fst); // write num. fsts Compression::wstring_write(L"main", fst); // write name of first fst t.write(fst); // write fst fclose(fst); fst=fopen("beer.fst", "r"); TransExe te; te.read(fst, alphabet); fclose(fst); State *initial_state = new State(); initial_state->init(te.getInitial()); State current_state = *initial_state; wstring input, output=L""; set<Node *> anfinals; anfinals.insert(te.getFinals().begin(), te.getFinals().end()); FILE* in=stdin; bool reading=true; // This is our runtime: see if the input matches while (reading) { wchar_t val = (wchar_t)fgetwc(in); if(val==WEOF||iswspace(val)) { reading=false; } else { if (!reading) { // At the end. We don't need to do anything but // leave the loop in this simplistic example break; } else { current_state.step(val); alphabet.getSymbol(input, val); } } } if (current_state.isFinal(anfinals)) { // Not used, just don't want it to be empty... set<wchar_t> escaped; escaped.insert(L'$'); output = current_state.filterFinals(anfinals, alphabet, escaped); wcout << input << output << endl; } else { wcout << L"Unrecognised: " << input << endl; } return 0; }
we can simplify the building of the transducers like this:
// build "beer" manually int beer = initial; // these are the transitions b:b e:e e:e r:r beer = t.insertSingleTransduction(alphabet(L'b',L'b'), beer); beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer); beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer); beer = t.insertSingleTransduction(alphabet(L'r',L'r'), beer); int beersg = beer; // these are 0:<n> 0:<sg> beersg = t.insertSingleTransduction(alphabet(0, n_sym), beersg); beersg = t.insertSingleTransduction(alphabet(0, sg_sym), beersg); t.setFinal(beersg); // build "beers" manually int beerpl = beer; beerpl = t.insertSingleTransduction(alphabet(L's', 0), beerpl); beerpl = t.insertSingleTransduction(alphabet(0, n_sym), beerpl); beerpl = t.insertSingleTransduction(alphabet(0, pl_sym), beerpl); t.setFinal(beerpl);
Writing/reading several transducers to the same file[edit]
// g++ -o test test.cc -I/home/fran/local/include/lttoolbox-3.2 -L/home/fran/local/lib -llttoolbox3 #include <cwchar> #include <cstdio> #include <iostream> #include <set> #include <lttoolbox/ltstr.h> #include <lttoolbox/lt_locale.h> #include <lttoolbox/transducer.h> #include <lttoolbox/alphabet.h> #include <lttoolbox/regexp_compiler.h> #include <lttoolbox/compression.h> int main (int argc, char** argv) { map<int, Transducer> patterns; Alphabet a; RegexpCompiler re; LtLocale::tryToSetLocale(); FILE *output = stdout; FILE *fst = fopen(argv[1], "w+"); // Build transducers a.includeSymbol(L"<n>"); re.initialize(&a); re.compile(L"foo"); patterns[1] = re.getTransducer(); re.initialize(&a); re.compile(L"bar"); patterns[2] = re.getTransducer(); re.initialize(&a); re.compile(L"baz"); patterns[3] = re.getTransducer(); // Write out the transducers a.write(fst); Compression::multibyte_write(patterns.size(), fst); fwprintf(output, L"Patterns: %d, Alphabet: %d\n", patterns.size(), a.size()); for(map<int, Transducer>::iterator it = patterns.begin(); it != patterns.end(); it++) { wchar_t buf[50]; memset(buf, '\0', sizeof(buf)); swprintf(buf, 50, L"%d", it->first); wstring id(buf); fwprintf(output, L"= %S =============================\n", id.c_str()); it->second.show(a, output); Compression::wstring_write(id, fst); it->second.write(fst); } fclose(fst); fwprintf(output, L"\n\n"); // Now read in the transducers that we have written out, one by one. FILE *new_fst = fopen(argv[1], "r"); Alphabet new_alphabet; map<wstring, Transducer> transducers; new_alphabet.read(new_fst); int len = Compression::multibyte_read(new_fst); while(len > 0) { int len2 = Compression::multibyte_read(new_fst); wstring name = L""; while(len2 > 0) { name += static_cast<wchar_t>(Compression::multibyte_read(new_fst)); len2--; } transducers[name].read(new_fst); len--; } fwprintf(output, L"Patterns: %d, Alphabet: %d\n", transducers.size(), new_alphabet.size()); for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++) { fwprintf(output, L"= %S =============================\n", it->first.c_str()); it->second.minimize(); it->second.show(a, output); } fclose(new_fst); return 0; }
Using regular expressions[edit]
/* * g++ -o lt-regexp lt-regexp.cc -I/usr/include/libxml2 -I/home/fran/local/include/lttoolbox-3.2 -L/home/fran/local/lib -llttoolbox3 -llibxml2 */ #include <cwchar> #include <cstdio> #include <cerrno> #include <string> #include <iostream> #include <list> #include <set> #include <lttoolbox/ltstr.h> #include <lttoolbox/lt_locale.h> #include <lttoolbox/transducer.h> #include <lttoolbox/alphabet.h> #include <lttoolbox/state.h> #include <lttoolbox/regexp_compiler.h> #include <lttoolbox/match_exe.h> #include <lttoolbox/match_state.h> #include <lttoolbox/xml_parse_util.h> wstring ws(char *arg) { wchar_t buf[1024]; memset(buf, '\0', 1024); size_t num_chars = mbstowcs(buf, arg, strlen(arg)); wstring ws(buf, num_chars); return ws; } bool match(Transducer t, wstring str, Alphabet a) { map<int, int> finals; for(int i = 0; i < t.size(); i++) { if(!t.isFinal(i)) { continue; } finals[i] = i; } MatchExe me(t, finals); MatchState ms; ms.clear(); ms.init(me.getInitial()); for(wstring::iterator it = str.begin(); it != str.end(); it++) { wcout << ms.size() << " " << *it << endl; ms.step(a(*it, *it)); } int val = ms.classifyFinals(me.getFinals()); fwprintf(stdout, L"%d\n", val); if(val != -1) { return true; } return false; } int main (int argc, char** argv) { Alphabet alphabet; Transducer t; RegexpCompiler re; bool matched; LtLocale::tryToSetLocale(); if(argc < 3) { wcout << L"Usage: lt-regexp <pattern> <string to match>" << endl; exit(-1); } FILE *output = stdout; wstring pattern = ws(argv[1]); wstring s = ws(argv[2]); re.initialize(&alphabet); re.compile(pattern); t = re.getTransducer(); t.minimize(); t.show(alphabet, output); matched = match(t, s, alphabet); wcout << endl << pattern << " " << s << endl; }