Running the monolingual rule learning
Revision as of 21:32, 14 June 2013 by Fpetkovski (talk | contribs) (Created page with 'Place the following Makefile in the folder where you want to run your training process: <pre> CORPUS=setimes DIR=sh-mk DATA=/home/philip/Apertium/apertium-sh-mk/ AUTOBIL=sh-mk.a…')
Place the following Makefile in the folder where you want to run your training process:
CORPUS=setimes DIR=sh-mk DATA=/home/philip/Apertium/apertium-sh-mk/ AUTOBIL=sh-mk.autobil.bin SCRIPTS=/home/philip/Apertium/apertium-lex-tools/scripts MODEL=/home/philip/Apertium/corpora/language-models/mk/setimes.mk.5.blm #all: data/$(CORPUS).$(DIR).lrx data/$(CORPUS).$(DIR).freq.lrx all: data/$(CORPUS).$(DIR).freq.lrx.bin data/$(CORPUS).$(DIR).lines: $(CORPUS).$(DIR).txt if [ ! -d data ]; then mkdir data; fi seq `cat $< | wc -l` > $@ data/$(CORPUS).$(DIR).biltrans: $(CORPUS).$(DIR).txt if [ ! -d data ]; then mkdir data; fi cat $(CORPUS).$(DIR).txt | apertium-destxt | apertium -f none -d $(DATA) $(DIR)-pretransfer | lt-proc -b $(DATA)/$(AUTOBIL) > $@ data/$(CORPUS).$(DIR).ambig: data/$(CORPUS).$(DIR).biltrans data/$(CORPUS).$(DIR).lines cat -n data/$(CORPUS).$(DIR).biltrans | python3 $(SCRIPTS)/trim-fertile-lines.py | python3 $(SCRIPTS)/biltrans-line-only-pos-ambig.py | python3 $(SCRIPTS)/biltrans-trim-uncovered.py > $@ data/$(CORPUS).$(DIR).multi: data/$(CORPUS).$(DIR).ambig cat $< | python $(SCRIPTS)/biltrans-to-multitrans-line-recursive.py > $@ data/$(CORPUS).$(DIR).unranked: data/$(CORPUS).$(DIR).multi cat $< | apertium -f none -d $(DATA) $(DIR)-multi > $@ data/$(CORPUS).$(DIR).ranked: data/$(CORPUS).$(DIR).unranked cat $< | irstlm-ranker-frac $(MODEL) > $@ data/$(CORPUS).$(DIR).annotated: data/$(CORPUS).$(DIR).multi data/$(CORPUS).$(DIR).ranked paste data/$(CORPUS).$(DIR).multi data/$(CORPUS).$(DIR).ranked | cut -f1-4 > $@ data/$(CORPUS).$(DIR).freq: data/$(CORPUS).$(DIR).ambig data/$(CORPUS).$(DIR).annotated python3 $(SCRIPTS)/biltrans-extract-frac-freq.py data/$(CORPUS).$(DIR).ambig data/$(CORPUS).$(DIR).annotated > $@ data/$(CORPUS).$(DIR).ngrams: data/$(CORPUS).$(DIR).freq data/$(CORPUS).$(DIR).ambig data/$(CORPUS).$(DIR).annotated python3 $(SCRIPTS)/biltrans-count-patterns-ngrams.py data/$(CORPUS).$(DIR).freq data/$(CORPUS).$(DIR).ambig data/$(CORPUS).$(DIR).annotated > $@ data/$(CORPUS).$(DIR).patterns: data/$(CORPUS).$(DIR).freq data/$(CORPUS).$(DIR).ngrams python3 $(SCRIPTS)/ngram-pruning-frac.py data/$(CORPUS).$(DIR).freq data/$(CORPUS).$(DIR).ngrams > $@ data/$(CORPUS).$(DIR).freq.lrx: data/$(CORPUS).$(DIR).freq python3 $(SCRIPTS)/extract-alig-lrx.py $< > $@ data/$(CORPUS).$(DIR).freq.lrx.bin: data/$(CORPUS).$(DIR).freq.lrx apertium-lrx-comp $< $@