Difference between revisions of "Lttoolbox API"
(Created page with 'You can use lttoolbox as a library in C++ by with <code>#include <lttoolbox/fst_processor.h></code>, and with a bit more work, you can also use it from Python. ==Using as a …') |
|||
| (19 intermediate revisions by 2 users not shown) | |||
| Line 1: | Line 1: | ||
[[Application lttoolbox|En français]] |
|||
{{TOCD}} |
|||
You can use [[lttoolbox]] as a library in C++ by with <code>#include <lttoolbox/fst_processor.h></code>, and with a bit more work, you can also use it from Python. |
You can use [[lttoolbox]] as a library in C++ by with <code>#include <lttoolbox/fst_processor.h></code>, and with a bit more work, you can also use it from Python. |
||
| Line 26: | Line 29: | ||
#include <lttoolbox/fst_processor.h> |
#include <lttoolbox/fst_processor.h> |
||
#include <lttoolbox/lt_locale.h> |
|||
#include <lttoolbox/ltstr.h> |
#include <lttoolbox/ltstr.h> |
||
| Line 39: | Line 43: | ||
} |
} |
||
LtLocale::tryToSetLocale(); |
|||
FILE *t_rl = fopen(argv[1], "r"); |
FILE *t_rl = fopen(argv[1], "r"); |
||
| Line 71: | Line 76: | ||
==Using as a module from Python== |
==Using as a module from Python== |
||
This example is based on how [[libvoikko]] uses lttoolbox for analysis. We make a C++ library that exposes whatever lttoolbox functions we need to Python, and a little Python class that makes calling those functions a bit more pythonic.<ref>An alternative might be to include Python.h in the C++ code, and define a full Python class from there, like how [https://launchpad.net/pylibtextcat pylibtextcat]/[https://github.com/bbqsrc/pylibtextcat py3libtextcat] creates a Python class for a C library.</ref> |
|||
(This example is based on how [[libvoikko]] uses lttoolbox for analysis) |
|||
Write the following files: |
|||
libltpy.hpp: |
|||
ltpy.h: |
|||
<pre> |
<pre> |
||
#include <lttoolbox/fst_processor.h> |
#include <lttoolbox/fst_processor.h> |
||
| Line 88: | Line 93: | ||
</pre> |
</pre> |
||
libltpy.cpp: |
|||
ltpy.cc: |
|||
<pre> |
<pre> |
||
#include " |
#include "libltpy.hpp" |
||
#include <lttoolbox/fst_processor.h> |
#include <lttoolbox/fst_processor.h> |
||
#include <lttoolbox/lt_locale.h> |
|||
using namespace std; |
using namespace std; |
||
Analyser::Analyser(const string & analyserpath) throw(exception) { |
Analyser::Analyser(const string & analyserpath) throw(exception) { |
||
cerr << "analyser: " << analyserpath << endl; |
|||
FILE * file = fopen(analyserpath.c_str(), "r"); |
FILE * file = fopen(analyserpath.c_str(), "r"); |
||
if (!file) { |
if (!file) { |
||
cerr << "Couldn't open analyser file " << analyserpath << endl; |
cerr << "Couldn't open analyser file " << analyserpath << endl; |
||
//throw LoadException("Couldn't open analyser file"); |
|||
throw exception(); |
throw exception(); |
||
} |
} |
||
| Line 112: | Line 116: | ||
wstring Analyser::analyse(wstring const &word) { |
wstring Analyser::analyse(wstring const &word) { |
||
pair <wstring,int> analysis = fst.biltransWithQueue(word, false); |
pair <wstring,int> analysis = fst.biltransWithQueue(word, false); |
||
| ⚫ | |||
// or fst.biltrans, which just returns the string. The 'false' |
|||
| ⚫ | |||
if (analysis.second == 0) { |
if (analysis.second == 0) { |
||
return analysis.first; |
return analysis.first; |
||
| Line 119: | Line 122: | ||
else { |
else { |
||
// a partial match: |
// a partial match: |
||
return L"@"+ |
return L"@"+word; |
||
} |
} |
||
} |
} |
||
extern "C" wstring * analyse(Analyser * a, const wchar_t * word) { |
extern "C" wstring * analyse(Analyser * a, const wchar_t * word) { |
||
// |
// It seems Python can only send wchar_t*, but we need a wstring |
||
size_t wlen = wcslen(word); |
size_t wlen = wcslen(word); |
||
if (wlen == 0) { |
|||
// avoid a bug in biltransWithQueue: |
|||
return 0; |
|||
} |
|||
wstring inputString = L""; |
wstring inputString = L""; |
||
for (size_t i = 0; i < wlen; i++) { |
for (size_t i = 0; i < wlen; i++) { |
||
| Line 132: | Line 139: | ||
wstring * out = new wstring(a->analyse(inputString)); |
wstring * out = new wstring(a->analyse(inputString)); |
||
return out; |
return out; |
||
} |
|||
extern "C" void free_analyses(wstring * analyses) { |
|||
delete analyses; |
|||
} |
} |
||
extern "C" Analyser * init(const char ** error, const char * path) { |
extern "C" Analyser * init(const char ** error, const char * path) { |
||
LtLocale::tryToSetLocale(); |
|||
Analyser * a = 0; |
Analyser * a = 0; |
||
try { |
try { |
||
| Line 177: | Line 189: | ||
if error.value != None: |
if error.value != None: |
||
self.__handle = 0 |
self.__handle = 0 |
||
raise Exception(u" |
raise Exception(u"Initialisation of fst failed: " + unicode(error.value, "UTF-8")) |
||
def __del__(self): |
def __del__(self): |
||
| Line 188: | Line 200: | ||
def analyse(self, word): |
def analyse(self, word): |
||
ana_p = self.__lib.analyse(self.__handle, word) |
|||
if ana_p: |
|||
analysis = ana_p.contents.value |
|||
else: |
|||
analysis = "" |
|||
self.__lib.free_analyses(ana_p) |
|||
return analysis |
|||
</pre> |
</pre> |
||
Compile the C++ files so you get a file like |
Compile the C++ files so you get a file like libltpy.so. The following commands worked for me: |
||
<pre> |
|||
g++ -DGCC_VISIBILITY=1 -g -O2 -fvisibility=hidden -I/usr/local/include/lttoolbox-3.2 -I/usr/local/lib/lttoolbox-3.2/include -c libltpy.cpp -fPIC -DPIC -o libltpy.o |
|||
g++ -fPIC -DPIC -shared libltpy.o -Wl,-rpath -Wl,/usr/local/lib -Wl,-rpath -Wl,/usr/local/lib -L/usr/local/lib /usr/local/lib/liblttoolbox3.so -Wl,-soname -Wl,libltpy.so.0 -o libltpy.so |
|||
</pre> |
|||
However, if you use autotools, you typically just need the following in your Makefile.am: |
|||
<pre> |
|||
lib_LTLIBRARIES = libltpy.la |
|||
libltpy_la_SOURCES = libltpy.cpp |
|||
libltpy_la_LDFLAGS = $(LTTOOLBOX_LIBS) |
|||
</pre> |
|||
then <code>autogen.sh</code> and <code>make libltpy.la</code> (see [[#Real-world examples]]). |
|||
Then use it like: |
|||
<pre> |
<pre> |
||
import lt |
import lt |
||
fst = lt.FST(" |
fst = lt.FST("libltpy.so", "analyser.bin") |
||
print fst.analyse(u"cars") # should print car<n><pl> |
print fst.analyse(u"cars") # should print car<n><pl> |
||
</pre> |
</pre> |
||
==Real-world examples== |
|||
* apertium-transfer uses lttoolbox from C++ to look up words in the bidix |
|||
** http://apertium.svn.sourceforge.net/viewvc/apertium/trunk/apertium/apertium/transfer.cc?view=markup |
|||
* libvoikko uses lttoolbox from C++ to analyse words, and also makes a Python module: |
|||
** http://sourceforge.net/apps/trac/voikko/browser/trunk/libvoikko/src/morphology/LttoolboxAnalyzer.cpp |
|||
** http://sourceforge.net/apps/trac/voikko/browser/trunk/libvoikko/src/morphology/interface.cpp |
|||
** http://sourceforge.net/apps/trac/voikko/browser/trunk/libvoikko/python/libvoikko.py |
|||
==Footnotes== |
|||
<references/> |
|||
[[Category:lttoolbox]] |
[[Category:lttoolbox]] |
||
[[Category:Morphological analysers]] |
|||
[[Category:Development]] |
|||
[[Category:Documentation in English]] |
|||
Latest revision as of 13:19, 10 March 2017
You can use lttoolbox as a library in C++ by with #include <lttoolbox/fst_processor.h>, and with a bit more work, you can also use it from Python.
Using as a library in C++[edit]
For this example you will need two files, test.dix:
<dictionary>
<alphabet/>
<sdefs>
<sdef n="n"/>
<sdef n="pl"/>
</sdefs>
<section id="main" type="standard">
<e><p><l>cars</l><r>car<s n="n"/><s n="pl"/></r></p></e>
</section>
</dictionary>
And test.cc:
#include <stdio.h>
#include <string>
#include <iostream>
#include <lttoolbox/fst_processor.h>
#include <lttoolbox/lt_locale.h>
#include <lttoolbox/ltstr.h>
using namespace std;
FSTProcessor fstp;
int main(int argc, char **argv)
{
if(argc < 2) {
wcout << L"Please specify a transducer" << endl;
exit(-1);
}
LtLocale::tryToSetLocale();
FILE *t_rl = fopen(argv[1], "r");
fstp.load(t_rl);
fclose(t_rl);
fstp.initBiltrans();
wstring input = L"^car<n><pl>$";
wstring trad = fstp.biltrans(input);
wcout << input << L" --> " << trad << endl;
return 0;
}
Compile both of the files:
$ lt-comp rl test.dix test_rl.bin main@standard 6 5 $ g++ -Wall test.cc -o test -llttoolbox3
And then test:
$ ./test test_rl.bin ^car<n><pl>$ --> ^cars$
Using as a module from Python[edit]
This example is based on how libvoikko uses lttoolbox for analysis. We make a C++ library that exposes whatever lttoolbox functions we need to Python, and a little Python class that makes calling those functions a bit more pythonic.[1]
Write the following files:
libltpy.hpp:
#include <lttoolbox/fst_processor.h>
class Analyser {
public:
Analyser(const std::string & directoryName) throw(exception);
wstring analyse(wstring const &word);
private:
FSTProcessor fst;
};
libltpy.cpp:
#include "libltpy.hpp"
#include <lttoolbox/fst_processor.h>
#include <lttoolbox/lt_locale.h>
using namespace std;
Analyser::Analyser(const string & analyserpath) throw(exception) {
FILE * file = fopen(analyserpath.c_str(), "r");
if (!file) {
cerr << "Couldn't open analyser file " << analyserpath << endl;
throw exception();
}
fst.load(file);
fclose(file);
fst.setCaseSensitiveMode(false);
fst.setDictionaryCaseMode(true);
fst.initBiltrans();
}
wstring Analyser::analyse(wstring const &word) {
pair <wstring,int> analysis = fst.biltransWithQueue(word, false);
// The 'false' means we require no ^ or $ in input/output
if (analysis.second == 0) {
return analysis.first;
}
else {
// a partial match:
return L"@"+word;
}
}
extern "C" wstring * analyse(Analyser * a, const wchar_t * word) {
// It seems Python can only send wchar_t*, but we need a wstring
size_t wlen = wcslen(word);
if (wlen == 0) {
// avoid a bug in biltransWithQueue:
return 0;
}
wstring inputString = L"";
for (size_t i = 0; i < wlen; i++) {
inputString.append(1, word[i]);
}
wstring * out = new wstring(a->analyse(inputString));
return out;
}
extern "C" void free_analyses(wstring * analyses) {
delete analyses;
}
extern "C" Analyser * init(const char ** error, const char * path) {
LtLocale::tryToSetLocale();
Analyser * a = 0;
try {
a = new Analyser(path);
}
catch (exception & e) {
delete a;
a = 0;
*error = e.what();
return 0;
}
*error = 0;
return a;
}
extern "C" void terminate(Analyser * a) {
delete a;
}
lt.py:
# -*- coding: utf-8 -*-
from ctypes import byref, CDLL, c_char_p, c_wchar_p, c_void_p, POINTER
class FST(object):
def __init__(self, libpath, fstpath):
self.__lib = CDLL(libpath)
self.__lib.init.argtypes = [POINTER(c_char_p), c_char_p]
self.__lib.init.restype = c_void_p
self.__lib.terminate.argtypes = [c_void_p]
self.__lib.terminate.restype = None
self.__lib.analyse.argtypes = [c_void_p, c_wchar_p]
self.__lib.analyse.restype = POINTER(c_wchar_p)
error = c_char_p()
self.__handle = self.__lib.init(byref(error), fstpath)
if error.value != None:
self.__handle = 0
raise Exception(u"Initialisation of fst failed: " + unicode(error.value, "UTF-8"))
def __del__(self):
if (self.__handle != 0):
self.__handle = 0
class DummyLib:
def __getattr__(obj, name):
raise Exception("Attempt to use library after terminate() was called")
self.__lib = DummyLib()
def analyse(self, word):
ana_p = self.__lib.analyse(self.__handle, word)
if ana_p:
analysis = ana_p.contents.value
else:
analysis = ""
self.__lib.free_analyses(ana_p)
return analysis
Compile the C++ files so you get a file like libltpy.so. The following commands worked for me:
g++ -DGCC_VISIBILITY=1 -g -O2 -fvisibility=hidden -I/usr/local/include/lttoolbox-3.2 -I/usr/local/lib/lttoolbox-3.2/include -c libltpy.cpp -fPIC -DPIC -o libltpy.o g++ -fPIC -DPIC -shared libltpy.o -Wl,-rpath -Wl,/usr/local/lib -Wl,-rpath -Wl,/usr/local/lib -L/usr/local/lib /usr/local/lib/liblttoolbox3.so -Wl,-soname -Wl,libltpy.so.0 -o libltpy.so
However, if you use autotools, you typically just need the following in your Makefile.am:
lib_LTLIBRARIES = libltpy.la libltpy_la_SOURCES = libltpy.cpp libltpy_la_LDFLAGS = $(LTTOOLBOX_LIBS)
then autogen.sh and make libltpy.la (see #Real-world examples).
Then use it like:
import lt
fst = lt.FST("libltpy.so", "analyser.bin")
print fst.analyse(u"cars") # should print car<n><pl>
Real-world examples[edit]
- apertium-transfer uses lttoolbox from C++ to look up words in the bidix
- libvoikko uses lttoolbox from C++ to analyse words, and also makes a Python module:
Footnotes[edit]
- ↑ An alternative might be to include Python.h in the C++ code, and define a full Python class from there, like how pylibtextcat/py3libtextcat creates a Python class for a C library.