Difference between revisions of "Lttoolbox API examples"
Jump to navigation
Jump to search
m (moved Basic lttoolbox example to Lttoolbox API examples) |
|||
| Line 1: | Line 1: | ||
{{TOCD}} |
|||
==Basic example== |
|||
This is a simple C++/lttoolbox version of the 'beer' program in [[Morphological dictionaries]] |
This is a simple C++/lttoolbox version of the 'beer' program in [[Morphological dictionaries]] |
||
| Line 170: | Line 173: | ||
</pre> |
</pre> |
||
==Writing/reading several transducers to the same file== |
|||
<pre> |
|||
// g++ -o test test.cc -I/home/fran/local/include/lttoolbox-3.2 -L/home/fran/local/lib -llttoolbox3 |
|||
#include <cwchar> |
|||
#include <cstdio> |
|||
#include <iostream> |
|||
#include <set> |
|||
#include <lttoolbox/ltstr.h> |
|||
#include <lttoolbox/lt_locale.h> |
|||
#include <lttoolbox/transducer.h> |
|||
#include <lttoolbox/alphabet.h> |
|||
#include <lttoolbox/regexp_compiler.h> |
|||
#include <lttoolbox/compression.h> |
|||
int main (int argc, char** argv) |
|||
{ |
|||
map<int, Transducer> patterns; |
|||
Alphabet a; |
|||
RegexpCompiler re; |
|||
LtLocale::tryToSetLocale(); |
|||
FILE *output = stdout; |
|||
FILE *fst = fopen(argv[1], "w+"); |
|||
// Build transducers |
|||
a.includeSymbol(L"<n>"); |
|||
re.initialize(&a); |
|||
re.compile(L"foo"); |
|||
patterns[1] = re.getTransducer(); |
|||
re.initialize(&a); |
|||
re.compile(L"bar"); |
|||
patterns[2] = re.getTransducer(); |
|||
re.initialize(&a); |
|||
re.compile(L"baz"); |
|||
patterns[3] = re.getTransducer(); |
|||
// Write out the transducers |
|||
a.write(fst); |
|||
Compression::multibyte_write(patterns.size(), fst); |
|||
fwprintf(output, L"Patterns: %d, Alphabet: %d\n", patterns.size(), a.size()); |
|||
for(map<int, Transducer>::iterator it = patterns.begin(); it != patterns.end(); it++) |
|||
{ |
|||
wchar_t buf[50]; |
|||
memset(buf, '\0', sizeof(buf)); |
|||
swprintf(buf, 50, L"%d", it->first); |
|||
wstring id(buf); |
|||
fwprintf(output, L"= %S =============================\n", id.c_str()); |
|||
it->second.show(a, output); |
|||
Compression::wstring_write(id, fst); |
|||
it->second.write(fst); |
|||
} |
|||
fclose(fst); |
|||
fwprintf(output, L"\n\n"); |
|||
// Now read in the transducers that we have written out, one by one. |
|||
FILE *new_fst = fopen(argv[1], "r"); |
|||
Alphabet new_alphabet; |
|||
map<wstring, Transducer> transducers; |
|||
new_alphabet.read(new_fst); |
|||
int len = Compression::multibyte_read(new_fst); |
|||
while(len > 0) |
|||
{ |
|||
int len2 = Compression::multibyte_read(new_fst); |
|||
wstring name = L""; |
|||
while(len2 > 0) |
|||
{ |
|||
name += static_cast<wchar_t>(Compression::multibyte_read(new_fst)); |
|||
len2--; |
|||
} |
|||
transducers[name].read(new_fst); |
|||
len--; |
|||
} |
|||
fwprintf(output, L"Patterns: %d, Alphabet: %d\n", transducers.size(), new_alphabet.size()); |
|||
for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++) |
|||
{ |
|||
fwprintf(output, L"= %S =============================\n", it->first.c_str()); |
|||
it->second.minimize(); |
|||
it->second.show(a, output); |
|||
} |
|||
fclose(new_fst); |
|||
return 0; |
|||
} |
|||
</pre> |
|||
</pre> |
|||
[[Category:Lttoolbox]] |
[[Category:Lttoolbox]] |
||
Revision as of 09:39, 18 October 2011
Basic example
This is a simple C++/lttoolbox version of the 'beer' program in Morphological dictionaries
// g++ -I/usr/local/include/lttoolbox-3.2 -I/usr/local/lib -llttoolbox3 lt_beer.cc -o lt-beer
#include <cwchar>
#include <cstdio>
#include <cerrno>
#include <string>
#include <iostream>
#include <list>
#include <set>
#include <lttoolbox/ltstr.h>
// LtLocale::tryToSetLocale()
#include <lttoolbox/lt_locale.h>
// Transducer class
#include <lttoolbox/transducer.h>
// Alphabet class
#include <lttoolbox/alphabet.h>
// Pool class
#include <lttoolbox/pool.h>
// State class
#include <lttoolbox/state.h>
// TransExe class
#include <lttoolbox/trans_exe.h>
int main (int argc, char** argv)
{
Alphabet alphabet;
Transducer t;
// Set locale
LtLocale::tryToSetLocale();
// Include symbols into alphabet, keeping the values
alphabet.includeSymbol(L"<n>");
alphabet.includeSymbol(L"<sg>");
alphabet.includeSymbol(L"<pl>");
int n_sym = alphabet(L"<n>");
int sg_sym = alphabet(L"<sg>");
int pl_sym = alphabet(L"<pl>");
// Initial state
int initial = t.getInitial();
// build "beer" manually
int beer = initial;
// these are the transitions b:b e:e e:e r:r
beer = t.insertSingleTransduction(alphabet(L'b',L'b'), beer);
beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer);
beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer);
beer = t.insertSingleTransduction(alphabet(L'r',L'r'), beer);
// these are 0:<n> 0:<sg>
beer = t.insertSingleTransduction(alphabet(0, n_sym), beer);
beer = t.insertSingleTransduction(alphabet(0, sg_sym), beer);
t.setFinal(beer);
// build "beers" manually
int beers = t.getInitial();
beers = t.insertSingleTransduction(alphabet(L'b',L'b'), beers);
beers = t.insertSingleTransduction(alphabet(L'e',L'e'), beers);
beers = t.insertSingleTransduction(alphabet(L'e',L'e'), beers);
beers = t.insertSingleTransduction(alphabet(L'r',L'r'), beers);
// this transition is s:0
beers = t.insertSingleTransduction(alphabet(L's', 0), beers);
beers = t.insertSingleTransduction(alphabet(0, n_sym), beers);
beers = t.insertSingleTransduction(alphabet(0, pl_sym), beers);
t.setFinal(beers);
t.minimize();
//Rather than try to convert between Transducer and TransExe, we'll
//just write and read.
FILE* fst=fopen("beer.fst", "w");
t.write(fst);
fclose(fst);
fst=fopen("beer.fst", "r");
TransExe te;
te.read(fst, alphabet);
fclose(fst);
Pool<vector<int> > *pool = new Pool<vector<int> >(1, vector<int>(50));
State *initial_state = new State(pool);
initial_state->init(te.getInitial());
State current_state = *initial_state;
wstring input, output=L"";
set<Node *> anfinals;
anfinals.insert(te.getFinals().begin(), te.getFinals().end());
FILE* in=stdin;
bool reading=true;
// This is our runtime: see if the input matches
while (reading)
{
wchar_t val = (wchar_t)fgetwc(in);
if(val==WEOF||iswspace(val))
{
reading=false;
}
else
{
if (!reading)
{
// At the end. We don't need to do anything but
// leave the loop in this simplistic example
break;
}
else
{
current_state.step(val);
alphabet.getSymbol(input, val);
}
}
}
if (current_state.isFinal(anfinals))
{
// Not used, just don't want it to be empty...
set<wchar_t> escaped;
escaped.insert(L'$');
output = current_state.filterFinals(anfinals, alphabet, escaped);
wcout << input << output << endl;
}
else
{
wcout << L"Unrecognised: " << input << endl;
}
return 0;
}
we can simplify the building of the transducers like this:
// build "beer" manually int beer = initial; // these are the transitions b:b e:e e:e r:r beer = t.insertSingleTransduction(alphabet(L'b',L'b'), beer); beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer); beer = t.insertSingleTransduction(alphabet(L'e',L'e'), beer); beer = t.insertSingleTransduction(alphabet(L'r',L'r'), beer); int beersg = beer; // these are 0:<n> 0:<sg> beersg = t.insertSingleTransduction(alphabet(0, n_sym), beersg); beersg = t.insertSingleTransduction(alphabet(0, sg_sym), beersg); t.setFinal(beersg); // build "beers" manually int beerpl = beer; beerpl = t.insertSingleTransduction(alphabet(L's', 0), beerpl); beerpl = t.insertSingleTransduction(alphabet(0, n_sym), beerpl); beerpl = t.insertSingleTransduction(alphabet(0, pl_sym), beerpl); t.setFinal(beerpl);
Writing/reading several transducers to the same file
// g++ -o test test.cc -I/home/fran/local/include/lttoolbox-3.2 -L/home/fran/local/lib -llttoolbox3
#include <cwchar>
#include <cstdio>
#include <iostream>
#include <set>
#include <lttoolbox/ltstr.h>
#include <lttoolbox/lt_locale.h>
#include <lttoolbox/transducer.h>
#include <lttoolbox/alphabet.h>
#include <lttoolbox/regexp_compiler.h>
#include <lttoolbox/compression.h>
int main (int argc, char** argv)
{
map<int, Transducer> patterns;
Alphabet a;
RegexpCompiler re;
LtLocale::tryToSetLocale();
FILE *output = stdout;
FILE *fst = fopen(argv[1], "w+");
// Build transducers
a.includeSymbol(L"<n>");
re.initialize(&a);
re.compile(L"foo");
patterns[1] = re.getTransducer();
re.initialize(&a);
re.compile(L"bar");
patterns[2] = re.getTransducer();
re.initialize(&a);
re.compile(L"baz");
patterns[3] = re.getTransducer();
// Write out the transducers
a.write(fst);
Compression::multibyte_write(patterns.size(), fst);
fwprintf(output, L"Patterns: %d, Alphabet: %d\n", patterns.size(), a.size());
for(map<int, Transducer>::iterator it = patterns.begin(); it != patterns.end(); it++)
{
wchar_t buf[50];
memset(buf, '\0', sizeof(buf));
swprintf(buf, 50, L"%d", it->first);
wstring id(buf);
fwprintf(output, L"= %S =============================\n", id.c_str());
it->second.show(a, output);
Compression::wstring_write(id, fst);
it->second.write(fst);
}
fclose(fst);
fwprintf(output, L"\n\n");
// Now read in the transducers that we have written out, one by one.
FILE *new_fst = fopen(argv[1], "r");
Alphabet new_alphabet;
map<wstring, Transducer> transducers;
new_alphabet.read(new_fst);
int len = Compression::multibyte_read(new_fst);
while(len > 0)
{
int len2 = Compression::multibyte_read(new_fst);
wstring name = L"";
while(len2 > 0)
{
name += static_cast<wchar_t>(Compression::multibyte_read(new_fst));
len2--;
}
transducers[name].read(new_fst);
len--;
}
fwprintf(output, L"Patterns: %d, Alphabet: %d\n", transducers.size(), new_alphabet.size());
for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++)
{
fwprintf(output, L"= %S =============================\n", it->first.c_str());
it->second.minimize();
it->second.show(a, output);
}
fclose(new_fst);
return 0;
}
