projectors.plan

<?xml version="1.0" encoding="UTF-8"?>
<alvisnlp-plan id="projectors">


<!--
///////////////////////////////////////////////////////////////////////////////////////
//
//  I. Baseline approach
//
///////////////////////////////////////////////////////////////////////////////////////
-->

<baseline>
	<fcu>
		<!-- Project concepts on text -->
  		<project class="RDFProjector">
    		<!-- project concepts on the lemmas from the corpus -->
    		<source>resources/thesaurus/fcu/frenchCropUsage_20210817.rdf</source>
    		<subject layer="words" feature="lemma"/>
    		<!-- save only owl:individuals with their iri and french labels-->
    		<resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs>
    		<language>fr</language>
    		<uriFeatureName>uri</uriFeatureName>
    		<!-- parameters of mapping -->
    		<allowJoined>true</allowJoined>
    		<joinDash>true</joinDash>
    		<caseInsensitive>true</caseInsensitive>
    		<!-- place annotations to a layer -->
    		<targetLayerName>fcu-baseline</targetLayerName>
    		<constantAnnotationFeatures>type=RDFProjector</constantAnnotationFeatures>
  		</project>

		<filter>
			<!-- filter some of ambiguous words -->
    		<ambiguous-words class="Action">
      			<target>documents.sections.layer:fcu-baseline[
        		@lemma == "orange"
        		or @lemma== "marron"
        		or @lemma == "fruit"
        		or @lemma == "semence"
        		or @lemma == "côte"
        		or @lemma == "soleil"
        		or @lemma == "gel"
        		or @lemma == "fleur"]</target>
     	 		<action>remove:fcu-baseline</action>
      			<removeFromLayer/>
    		</ambiguous-words>

    		<!-- filter overlapping annotations -->
    		<overlaps class="RemoveOverlaps">
    			<layerName>fcu-baseline</layerName>
    			<removeEqual/>
    			<removeIncluded>true</removeIncluded>
   				<removeOverlapping>true</removeOverlapping>
  			</overlaps>
		</filter>

		<export>
			<annotations href="modules/export.plan">
				<!-- focus on the annotations made by rdf-projector -->
				<outLayerName>documents.sections.(layer:fcu-baseline)</outLayerName>
				<!-- save to a tabular file-->
				<outFile>fcu-baseline-annotations.csv</outFile>
			</annotations>

			<most-prominent-words>
				<!-- calculate tf-idf score with standard parameters -->
  				<tfidf-score class="KeywordsSelector">
  					<!-- input terms -->
  					<keywords>sections.layer:fcu-baseline</keywords>