Difference between revisions of "Lttoolbox API"

From Apertium
Jump to navigation Jump to search
(unknowns should always return the input word)
 
(3 intermediate revisions by one other user not shown)
Line 1: Line 1:
[[Application lttoolbox|En français]]

{{TOCD}}
{{TOCD}}
You can use [[lttoolbox]] as a library in C++ by with <code>#include <lttoolbox/fst_processor.h></code>, and with a bit more work, you can also use it from Python.
You can use [[lttoolbox]] as a library in C++ by with <code>#include <lttoolbox/fst_processor.h></code>, and with a bit more work, you can also use it from Python.
Line 78: Line 80:
Write the following files:
Write the following files:


libltpy.h:
libltpy.hpp:
<pre>
<pre>
#include <lttoolbox/fst_processor.h>
#include <lttoolbox/fst_processor.h>
Line 91: Line 93:
</pre>
</pre>


libltpy.cc:
libltpy.cpp:
<pre>
<pre>
#include "libltpy.hpp"
#include "libltpy.hpp"
Line 139: Line 141:
}
}


EXTERN void free_analyses(wstring * analyses) {
extern "C" void free_analyses(wstring * analyses) {
delete analyses;
delete analyses;
}
}
Line 187: Line 189:
if error.value != None:
if error.value != None:
self.__handle = 0
self.__handle = 0
raise Exception(u"Initialization of fst failed: " + unicode(error.value, "UTF-8"))
raise Exception(u"Initialisation of fst failed: " + unicode(error.value, "UTF-8"))
def __del__(self):
def __del__(self):
Line 246: Line 248:
[[Category:Morphological analysers]]
[[Category:Morphological analysers]]
[[Category:Development]]
[[Category:Development]]
[[Category:Documentation in English]]

Latest revision as of 13:19, 10 March 2017

En français

You can use lttoolbox as a library in C++ by with #include <lttoolbox/fst_processor.h>, and with a bit more work, you can also use it from Python.

Using as a library in C++[edit]

For this example you will need two files, test.dix:

<dictionary>
  <alphabet/>
  <sdefs>
    <sdef n="n"/>
    <sdef n="pl"/>
  </sdefs>
  <section id="main" type="standard">
    <e><p><l>cars</l><r>car<s n="n"/><s n="pl"/></r></p></e>
  </section>
</dictionary>

And test.cc:

#include <stdio.h>
#include <string>
#include <iostream>

#include <lttoolbox/fst_processor.h>
#include <lttoolbox/lt_locale.h>
#include <lttoolbox/ltstr.h>

using namespace std;

FSTProcessor fstp;

int main(int argc, char **argv)
{
	if(argc < 2) { 
		wcout << L"Please specify a transducer" << endl;
		exit(-1);
	}

        LtLocale::tryToSetLocale();
	FILE *t_rl = fopen(argv[1], "r");

	fstp.load(t_rl);
	fclose(t_rl);
	fstp.initBiltrans();

	wstring input = L"^car<n><pl>$";
	wstring trad = fstp.biltrans(input);

	wcout << input << L" --> " << trad << endl;

	return 0;
}

Compile both of the files:

$ lt-comp rl test.dix test_rl.bin
main@standard 6 5

$ g++ -Wall test.cc -o test -llttoolbox3

And then test:

$ ./test test_rl.bin 
^car<n><pl>$ --> ^cars$

Using as a module from Python[edit]

This example is based on how libvoikko uses lttoolbox for analysis. We make a C++ library that exposes whatever lttoolbox functions we need to Python, and a little Python class that makes calling those functions a bit more pythonic.[1]

Write the following files:

libltpy.hpp:

#include <lttoolbox/fst_processor.h>

class Analyser {
	public:
		Analyser(const std::string & directoryName) throw(exception);
		wstring analyse(wstring const &word);
	private:
		FSTProcessor fst;
};

libltpy.cpp:

#include "libltpy.hpp"
#include <lttoolbox/fst_processor.h>
#include <lttoolbox/lt_locale.h>

using namespace std;

Analyser::Analyser(const string & analyserpath) throw(exception) {
	FILE * file = fopen(analyserpath.c_str(), "r");
	if (!file) {
		cerr << "Couldn't open analyser file " << analyserpath << endl;
		throw exception();
	}
	fst.load(file);
	fclose(file);
	fst.setCaseSensitiveMode(false);
	fst.setDictionaryCaseMode(true);
	fst.initBiltrans();
}
    
wstring Analyser::analyse(wstring const &word) {
	pair <wstring,int> analysis = fst.biltransWithQueue(word, false);
	// The 'false' means we require no ^ or $ in input/output
	if (analysis.second == 0) {
		return analysis.first;
	}
	else {
		// a partial match:
		return L"@"+word;
	}
}

extern "C" wstring * analyse(Analyser * a, const wchar_t * word) {
	// It seems Python can only send wchar_t*, but we need a wstring
	size_t wlen = wcslen(word);
	if (wlen == 0) {
		// avoid a bug in biltransWithQueue:
		return 0;
	}
	wstring inputString = L"";
	for (size_t i = 0; i < wlen; i++) {
		inputString.append(1, word[i]);
	}
	wstring * out = new wstring(a->analyse(inputString));
	return out;
}

extern "C" void free_analyses(wstring * analyses) {
	delete analyses;
}

extern "C" Analyser * init(const char ** error, const char * path) {
	LtLocale::tryToSetLocale();
	Analyser * a = 0;
	try {
		a = new Analyser(path);
		
	}
	catch (exception & e) {
		delete a;
		a = 0;
		*error = e.what();
		return 0;
	}
	*error = 0;
	return a;
}

extern "C" void terminate(Analyser * a) {
	delete a;
}

lt.py:

# -*- coding: utf-8 -*-
from ctypes import byref, CDLL, c_char_p, c_wchar_p, c_void_p, POINTER

class FST(object):
	def __init__(self, libpath, fstpath):
		self.__lib = CDLL(libpath)

		self.__lib.init.argtypes = [POINTER(c_char_p), c_char_p]
		self.__lib.init.restype = c_void_p

		self.__lib.terminate.argtypes = [c_void_p]
		self.__lib.terminate.restype = None

		self.__lib.analyse.argtypes = [c_void_p, c_wchar_p]
		self.__lib.analyse.restype = POINTER(c_wchar_p)

		error = c_char_p()
		self.__handle = self.__lib.init(byref(error), fstpath)
		if error.value != None:
			self.__handle = 0
			raise Exception(u"Initialisation of fst failed: " + unicode(error.value, "UTF-8"))
	
	def __del__(self):
		if (self.__handle != 0):
			self.__handle = 0
			class DummyLib:
				def __getattr__(obj, name):
					raise Exception("Attempt to use library after terminate() was called")
			self.__lib = DummyLib()
		
	def analyse(self, word):
		ana_p = self.__lib.analyse(self.__handle, word)
		if ana_p:
			analysis = ana_p.contents.value
		else:
			analysis = ""
		self.__lib.free_analyses(ana_p)
		return analysis

Compile the C++ files so you get a file like libltpy.so. The following commands worked for me:

g++ -DGCC_VISIBILITY=1 -g -O2 -fvisibility=hidden -I/usr/local/include/lttoolbox-3.2 -I/usr/local/lib/lttoolbox-3.2/include -c libltpy.cpp -fPIC -DPIC -o libltpy.o

g++  -fPIC -DPIC -shared  libltpy.o -Wl,-rpath -Wl,/usr/local/lib -Wl,-rpath -Wl,/usr/local/lib -L/usr/local/lib /usr/local/lib/liblttoolbox3.so   -Wl,-soname -Wl,libltpy.so.0 -o libltpy.so

However, if you use autotools, you typically just need the following in your Makefile.am:

lib_LTLIBRARIES = libltpy.la
libltpy_la_SOURCES = libltpy.cpp
libltpy_la_LDFLAGS = $(LTTOOLBOX_LIBS)

then autogen.sh and make libltpy.la (see #Real-world examples).

Then use it like:

import lt
fst = lt.FST("libltpy.so", "analyser.bin")
print fst.analyse(u"cars") # should print car<n><pl>

Real-world examples[edit]

Footnotes[edit]

  1. An alternative might be to include Python.h in the C++ code, and define a full Python class from there, like how pylibtextcat/py3libtextcat creates a Python class for a C library.