preprocess.plan 1.05 KiB
<?xml version="1.0" encoding="UTF-8"?>
<alvisnlp-plan id="preprocess">
	<!-- segment text to phrases and tokenize -->
	<import>resources/segmentation/segmentation.plan</import>
	<!-- set correct pos tags for some nouns -->
	<correct-plants class="Action">
  		<target>documents.sections.layer:words[
  			str:lower(@form) == "fraise" or
    		str:lower(@form) == "celeri" or
    		str:lower(@form) == "aubergine" or
    		str:lower(@form) == "blette" or
    		str:lower(@form) == "kaki" ]
    	</target>
  		<action>set:feat:pos("NOM")</action>
  		<setFeatures/>
	</correct-plants>
	<!-- lemmatize -->
	<tt class="TreeTagger">
		<!-- global configs for treetagger -->
		<!-- Change to your own local paths -->
		<treeTaggerExecutable>/Users/belka/Documents/work/inrae/outils/tree-tagger/bin/tree-tagger</treeTaggerExecutable>
		<parFile>/Users/belka/Documents/work/inrae/outils/tree-tagger/lib/french.par</parFile>
  		<!-- parameters -->
		<noUnknownLemma/>
  		<inputCharset>UTF-8</inputCharset>
  		<outputCharset>UTF-8</outputCharset>
	</tt>
</alvisnlp-plan>