diff --git a/main-annotation.plan b/main-annotation.plan index 32a6015f0b20a9c9d0c85c37762429b77e0baf25..8f2ccc5395dd00395788332ec9c05aaec7d931fa 100644 --- a/main-annotation.plan +++ b/main-annotation.plan @@ -13,12 +13,12 @@ <!-- Assign every page with corresponding names of each corpora --> <load-corpus> - <bsv class="XMLReader"> - <html/> + <bsv class="XMLReader"> + <html/> <xslTransform>resources/segmentation/html2alvisnlp.xslt</xslTransform> <source>resources/corpus/test_d2kab</source> <constantSectionFeatures>corpus=corpus bsv</constantSectionFeatures> - </bsv> + </bsv> </load-corpus> <make-xpath> @@ -75,6 +75,16 @@ <find-themes href="modules/patterns/find-themes.plan"/> +<!-- + /////////////////////////////////////////////////////////////////////////////////////// + // + // V. Visualize the annotations + // + /////////////////////////////////////////////////////////////////////////////////////// + --> + <quick-html href="modules/visualize.plan"/> + + </alvisnlp-plan> diff --git a/modules/patterns/bbch.plan b/modules/patterns/bbch.plan index 895204b5ad2a36206a3a39771195be9f8dd40b42..7e815822f6892b32106c7c1b1f33ceab5e64551a 100644 --- a/modules/patterns/bbch.plan +++ b/modules/patterns/bbch.plan @@ -5,38 +5,29 @@ <!-- On ne peut pas mettre deux actions, sinon ça crée deux annotations --> - <!-- ?? BBCH ?? --> - <!--_bbch_ class="PatternMatcher"> - <pattern> - [ @form =~ "BBCH" ] - </pattern> - <actions> - <createAnnotation layer = "bbch"/> - </actions> - <constantAnnotationFeatures>type=?BBCH?</constantAnnotationFeatures> - </_bbch_--> -<!-- BBCH00 --> - <bbch class="PatternMatcher"> + +<!-- BBCH9 ou BBCH09 BBCH00 --> + <BBCH_GLUED class="PatternMatcher"> <pattern> - [ @form ^= "BBCH" ] + [ @form =~ "^BBCH\\d{1,3}$" ] </pattern> <actions> <createAnnotation layer = "bbch"/> </actions> - <constantAnnotationFeatures>type=BBCH</constantAnnotationFeatures> + <constantAnnotationFeatures>type=BBCH_GLUED</constantAnnotationFeatures> <!--constantAnnotationFeatures>uri=http://ontology.inrae.fr/ppdo/ontology/bbch_globalScale</constantAnnotationFeatures--> - </bbch> + </BBCH_GLUED> - <insert-space class="Action"> - <target>documents.sections.layer:bbch[@type == "BBCH"]</target> - <action>set:feat:canonical-form(str:replace(@form, "BBCH", "BBCH "))</action> + <set-canonical class="Action"> + <target>documents.sections.layer:bbch[@type == "BBCH_GLUED"]</target> + <action>set:feat:canonical-form("BBCH " ^ str:replace(@form, "BBCH", ""))</action> <setFeatures/> - </insert-space> + </set-canonical> - <!-- BBCH-00 --> + <!-- BBCH-00 ou BBCH 00 ou (BBCH 00 --> <BBCH_DD class="PatternMatcher"> <pattern> [ @form =^ "BBCH" ] @@ -53,28 +44,23 @@ <!-- BBCH-00) --> <BBCH_DD_PAR class="PatternMatcher"> <pattern> - [ @form =^ "BBCH" ] + [ @form == "BBCH" ] [ @form == "-"]? - [ @form =~ "[0-9]{2}" and @form =^ ")" ] + (num: [ @form =~ "[0-9]{2}\\)"]) </pattern> <actions> - <createAnnotation layer="bbch"/> + <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ (group:num =~ "^.."))'/> <!-- createAnnotation layer="bbch" features='uri=("http://ontology.inrae.fr/ppdo/ontology/bbch_globalScale_secondaryStage_BBCH" ^ str:replace(group:number, ")", ""))'/ --> </actions> <constantAnnotationFeatures>type=BBCH-DD-PAR</constantAnnotationFeatures> </BBCH_DD_PAR> - <delete-par class="Action"> - <target>documents.sections.layer:bbch[@type == "BBCH-DD-PAR"]</target> - <action>set:feat:canonical-form(str:replace(@form, ")", " "))</action> - <setFeatures/> - </delete-par> <!-- Stade 00 BBCH --> <STADE_DD_BBCH class="PatternMatcher"> <pattern> - [ @form ^= "Stade" ]? + [ @lemma == "stade" ]? (number1:[ @form =~ "[0-9]{2}" ]) [ @form ^= "BBCH" ] </pattern> @@ -91,7 +77,7 @@ <pattern> [ @form ^= "BBCH" ] [ @form == ":" ] - [ @form ^= "Stade" ] + [ @lemma == "stade" ] (number2 : [ @form =~ "\\d{2}$" ]) </pattern> <actions> @@ -104,20 +90,22 @@ <!-- BBCH 17 à 19--> <!-- BBCH 17 - 19--> + <!-- BBCH 17 à BBCH 19 --> <BBCH_RANGE class="PatternMatcher"> <pattern> [ @form == "BBCH" ] [ @form == "-"]? (number3 : [ @form =~ "\\d{2}$" ]) - [@form == "à " or @form == "-"] + ([ @form == "BBCH" ] + [ @form == "-"]?)? (number4 : [ @form =~ "\\d{2}$" ]) </pattern> <actions> <createAnnotation layer="bbch" features='number=(group:number3 =~ "..$"),canonical-form=("BBCH " ^ (group:number3 =~ "..$"))'/> <createAnnotation layer="bbch" features='number=(group:number4 =~ "..$"),canonical-form=("BBCH " ^ (group:number4 =~ "..$"))'/> </actions> - <constantAnnotationFeatures>type=BBCH_RANGE</constantAnnotationFeatures> + <constantAnnotationFeatures>type=BBCH_RANGE,range=True</constantAnnotationFeatures> </BBCH_RANGE> @@ -133,7 +121,7 @@ <createAnnotation layer="bbch" features='number=(group:number3bis =~ "^.."),canonical-form=("BBCH " ^ (group:number3bis =~ "^.."))'/> <createAnnotation layer="bbch" features='number=(group:number3bis =~ "..$"),canonical-form=("BBCH " ^ (group:number3bis =~ "..$"))'/> </actions> - <constantAnnotationFeatures>type=BBCH_RANGE</constantAnnotationFeatures> + <constantAnnotationFeatures>type=BBCH_RANGE_GLUED,range=True</constantAnnotationFeatures> </BBCH_RANGE_GLUED> @@ -204,10 +192,35 @@ <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number11)'/> <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number12)'/> </actions> - <constantAnnotationFeatures>type=BBCH_TABLE</constantAnnotationFeatures> + <constantAnnotationFeatures>type=BBCH_TABLE_PHYSIO</constantAnnotationFeatures> </BBCH_TABLE_PHYSIO> + <bbch-inverse class="PatternMatcher"> + <pattern>(num: [ @form =~ "\\d\\d-BBCH"])</pattern> + <actions> + <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:num =~ "^..")'/> + </actions> + <constantAnnotationFeatures>type=BBCH_INVERSE</constantAnnotationFeatures> + </bbch-inverse> + + <!-- 2-3 feuilles étalées -> BBCH 102 BBCH 103 --> + <n-m-feuilles-etalees class="PatternMatcher"> + <pattern> + (num:[@form =~ "^\\d-\\d$"]) + [@lemma == "feuille"] + [@form =~ "étalées?"] + </pattern> + <actions> + <createAnnotation layer="bbch" features='canonical-form=("BBCH 10" ^ group:num =~ "^.")'/> + <createAnnotation layer="bbch" features='canonical-form=("BBCH 10" ^ group:num =~ ".$")'/> + </actions> + <constantAnnotationFeatures>type=N-M-FEUILLES-ETALEES</constantAnnotationFeatures> + </n-m-feuilles-etalees> + + +<bbch-context> +<active>false</active> <bbch_in_context50 class="PatternMatcher"> <pattern> @@ -419,5 +432,6 @@ <constantAnnotationFeatures>type=BBCH_in_context500</constantAnnotationFeatures> </bbch_in_context500> +</bbch-context> </alvisnlp-plan> diff --git a/modules/patterns/find-themes.plan b/modules/patterns/find-themes.plan index bda31471bd94115fa48c70da368ab488bb270d56..dc2fd01420e2f665bbeb4309b2b05019a35c29ab 100644 --- a/modules/patterns/find-themes.plan +++ b/modules/patterns/find-themes.plan @@ -2,6 +2,38 @@ <alvisnlp-plan id="themes"> + <fcu> + <numeric-themes> + <most-prominent-words> + <active>false</active> + <!-- calculate tf-idf score with standard parameters --> + <tfidf-score class="KeywordsSelector"> + <!-- input terms --> + <keywords>sections.layer:fcu-baseline</keywords> + <keywordForm>@skos-prefLabel</keywordForm> + <!-- parameters --> + <scoreFunction>tfidf</scoreFunction> + <scoreThreshold>-1000</scoreThreshold> + <!-- save --> + <outFile>output/annotations/fcu-baseline-tfidf.csv</outFile> + </tfidf-score> + + <!-- calculate bm25 score with standard parameters --> + <bm25-score class="KeywordsSelector"> + <!-- input terms --> + <keywords>sections.layer:fcu-baseline</keywords> + <keywordForm>@skos-prefLabel</keywordForm> + <documentId>document.@id</documentId> + <!-- parameters --> + <scoreFunction type="bm25" k1="1.2" b="0.75"/> + <scoreThreshold>-1000</scoreThreshold> + <!-- save --> + <outFile>output/annotations/fcu-baseline-bm25.csv</outFile> + </bm25-score> + </most-prominent-words> + </numeric-themes> + </fcu> + <patterns> diff --git a/modules/projectors.plan b/modules/projectors.plan index 66544a2c8bfbcfadbc17fc4d8b7177d86829e7d5..217df5aff10f9701b455313360869dc74a57b023 100644 --- a/modules/projectors.plan +++ b/modules/projectors.plan @@ -14,11 +14,10 @@ <baseline> <fcu> - <!-- Project concepts on text --> - <project class="RDFProjector"> - <!-- project concepts on the lemmas from the corpus --> + <!-- project concepts on the text --> + <project class="RDFProjector"> <source>resources/thesaurus/fcu/frenchCropUsage_20210817.rdf</source> - <subject layer="words" feature="lemma"/> + <subject layer="words" feature="form"/> <!-- save only owl:individuals with their iri and french labels--> <resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs> <language>fr</language> @@ -26,81 +25,17 @@ <!-- parameters of mapping --> <allowJoined>true</allowJoined> <joinDash>true</joinDash> + <ignoreDiacritics/> <caseInsensitive>true</caseInsensitive> <!-- place annotations to a layer --> <targetLayer>fcu-baseline</targetLayer> - <constantAnnotationFeatures>type=RDFProjector</constantAnnotationFeatures> - </project> - - <filter> - <!-- filter some of ambiguous words --> - <ambiguous-words class="Action"> - <target>documents.sections.layer:fcu-baseline[ - @lemma == "orange" - or @lemma== "marron" - or @lemma == "fruit" - or @lemma == "semence" - or @lemma == "côte" - or @lemma == "soleil" - or @lemma == "gel" - or @lemma == "fleur"]</target> - <action>remove:fcu-baseline</action> - <removeFromLayer/> - </ambiguous-words> - - <!-- filter overlapping annotations --> - <overlaps class="RemoveOverlaps"> - <layer>fcu-baseline</layer> - <removeEqual/> - <removeIncluded>true</removeIncluded> - <removeOverlapping>true</removeOverlapping> - </overlaps> - </filter> - - <export> - <annotations href="modules/export.plan"> - <!-- focus on the annotations made by rdf-projector --> - <outLayer>documents.sections.(layer:fcu-baseline)</outLayer> - <!-- save to a tabular file--> - <outFile>fcu-baseline-annotations.csv</outFile> - </annotations> - - <most-prominent-words> - <!-- calculate tf-idf score with standard parameters --> - <tfidf-score class="KeywordsSelector"> - <!-- input terms --> - <keywords>sections.layer:fcu-baseline</keywords> - <keywordForm>@skos-prefLabel</keywordForm> - <!-- parameters --> - <scoreFunction>tfidf</scoreFunction> - <scoreThreshold>-1000</scoreThreshold> - <!-- save --> - <outFile>output/annotations/fcu-baseline-tfidf.csv</outFile> - </tfidf-score> + <constantAnnotationFeatures>type=RDFProjector,projectedOn=form</constantAnnotationFeatures> + </project> - <!-- calculate bm25 score with standard parameters --> - <bm25-score class="KeywordsSelector"> - <!-- input terms --> - <keywords>sections.layer:fcu-baseline</keywords> - <keywordForm>@skos-prefLabel</keywordForm> - <documentId>document.@id</documentId> - <!-- parameters --> - <scoreFunction type="bm25" k1="1.2" b="0.75"/> - <scoreThreshold>-1000</scoreThreshold> - <!-- save --> - <outFile>output/annotations/fcu-baseline-bm25.csv</outFile> - </bm25-score> - </most-prominent-words> - </export> - </fcu> - - - - <ppdo> - <!-- Project concepts on text --> - <project class="RDFProjector"> + <!-- Project concepts on lemmatized text --> + <project-lemma class="RDFProjector"> <!-- project concepts on the lemmas from the corpus --> - <source>resources/thesaurus/ppdo/ppdo_20210726.rdf</source> + <source>resources/thesaurus/fcu/frenchCropUsage_20210817.rdf</source> <subject layer="words" feature="lemma"/> <!-- save only owl:individuals with their iri and french labels--> <resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs> @@ -110,15 +45,166 @@ <allowJoined>true</allowJoined> <joinDash>true</joinDash> <caseInsensitive>true</caseInsensitive> + <ignoreDiacritics/> <!-- place annotations to a layer --> - <targetLayer>ppdo-baseline</targetLayer> - <constantAnnotationFeatures>type=RDFProjector</constantAnnotationFeatures> - </project> + <targetLayer>fcu-baseline</targetLayer> + <constantAnnotationFeatures>type=RDFProjector,projectedOn=lemma</constantAnnotationFeatures> + </project-lemma> + <dedup-double-projection> + <!-- Removing included mentions ex betterave if we also have betterave sucrière --> + <rm-included class="RemoveOverlaps"> + <layer>fcu-baseline</layer> + <removeEqual>false</removeEqual> + <removeIncluded>true</removeIncluded> + <removeOverlapping>false</removeOverlapping> + </rm-included> + + <!-- Removing equal mentions (same span), except if they have different uris (which can happen for example with jardin + which is linked to both zone non agricole and jardin) --> + <!-- This is needed because of the double projection of fcu on form and lemma --> + <dedup class="Action"> + <target>documents.sections.sort:nsval(layer:fcu-baseline, start ^ "---" ^ end ^ "---" ^ @uri)</target> + <action>set:feat:keep("yes")</action> + <setFeatures/> + </dedup> + + <remove class="Action"> + <target>documents.sections.layer:fcu-baseline[@keep != "yes"]</target> + <action>delete</action> + <deleteElements/> + </remove> + </dedup-double-projection> + + <!-- Because of the ignoreDiacritics maïs is projected on mais, which we don't wan't --> + <mais-filter class="Action"> + <target>documents.sections.layer:fcu-baseline[ str:lower(@form) == "mais"]</target> + <action>remove:fcu-baseline</action> + <removeFromLayer/> + </mais-filter> + + <!-- same for côte -> http://ontology.inrae.fr/frenchcropusage/Poirees --> + <cote-filter class="Action"> + <target>documents.sections.layer:fcu-baseline[ str:lower(@form) == "côté"]</target> + <action>remove:fcu-baseline</action> + <removeFromLayer/> + </cote-filter> - <find-patterns> + + <filter> + <!-- filter some of ambiguous words --> + <!-- annotations in fcu-baseline do not have lemma features, we have to go in the words layer to use this feature --> + <!-- previous filter tried to target: orange/marron/fruit/semence/côte/soleil/gel/fleur --> + <!-- côte -> peut référer à la bette mais presque jamais utilisé pour ça --> + <ambiguous-words class="Action"> + <target>documents.sections.layer:fcu-baseline[ outside:words[ + @lemma == "fruit" + or @lemma == "semence" + or @lemma == "côte" + or @lemma == "soleil" + or @lemma == "gel" + or @lemma == "fleur"]]</target> + <action>remove:fcu-baseline</action> + <removeFromLayer/> + </ambiguous-words> + </filter> + + <export-fcu href="modules/export.plan"> + <!-- focus on the annotations made by rdf-projector --> + <outLayer>documents.sections.(layer:fcu-baseline)</outLayer> + <!-- save to a tabular file--> + <outFile>fcu-baseline-annotations.csv</outFile> + </export-fcu> + + + </fcu> + + <phenological-stages> + <ppdo> + <project class="RDFProjector"> + <!-- project concepts on the text --> + <!-- updated rdf with no beginning and ending whitespaces in the labels --> + <source>resources/thesaurus/ppdo/ppdo_20230127.rdf</source> + <subject layer="words" feature="form"/> + <!-- save only owl:individuals with their iri and french labels--> + <resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs> + <!-- <labelURIs>skos:prefLabel</labelURIs> --> + <language>fr</language> + <uriFeature>uri</uriFeature> + <!-- parameters of mapping --> + <allowJoined>true</allowJoined> + <joinDash>true</joinDash> + <caseInsensitive>true</caseInsensitive> + <!-- place annotations to a layer --> + <targetLayer>ppdo-baseline</targetLayer> + <constantAnnotationFeatures>type=RDFProjector, projectedOn=form</constantAnnotationFeatures> + </project> + <project-lemma class="RDFProjector"> + <!-- project concepts on the lemmas --> + <!-- updated rdf with no beginning and ending whitespaces in the labels --> + <source>resources/thesaurus/ppdo/ppdo_20230127.rdf</source> + <subject layer="words" feature="lemma"/> + <!-- save only owl:individuals with their iri and french labels--> + <resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs> + <language>fr</language> + <uriFeature>uri</uriFeature> + <!-- parameters of mapping --> + <allowJoined>true</allowJoined> + <joinDash>true</joinDash> + <caseInsensitive>true</caseInsensitive> + <!-- place annotations to a layer --> + <targetLayer>ppdo-baseline</targetLayer> + <constantAnnotationFeatures>type=RDFProjector,projectedOn=lemma</constantAnnotationFeatures> + </project-lemma> + + <dedup-double-projection> + <rm-included class="RemoveOverlaps"> + <layer>ppdo-baseline</layer> + <removeEqual>false</removeEqual> + <removeIncluded>true</removeIncluded> + <removeOverlapping>false</removeOverlapping> + </rm-included> + + <!-- Removing equal mentions (same span), except if they have different uris (which can happen for example with levée + which is linked to both bbch_generalScale_secondaryStage_BBCH09 andbbch_globalScale_secondaryStage_BBCH009) --> + <!-- This is needed because of the double projection of ppdo on form and lemma --> + <dedup class="Action"> + <target>documents.sections.sort:nsval(layer:ppdo-baseline, start ^ "---" ^ end ^ "---" ^ @uri)</target> + <action>set:feat:keep("yes")</action> + <setFeatures/> + </dedup> + + <remove class="Action"> + <target>documents.sections.layer:ppdo-baseline[@keep != "yes"]</target> + <action>delete</action> + <deleteElements/> + </remove> + </dedup-double-projection> + + </ppdo> + <pattern-based> <bbch> - <patterns href="modules/patterns/bbch.plan"/> + <find-patterns href="modules/patterns/bbch.plan"/> + + <rm-included class="RemoveOverlaps"> + <layer>bbch</layer> + <removeEqual>false</removeEqual> + <removeIncluded>true</removeIncluded> + <removeOverlapping>false</removeOverlapping> + </rm-included> + <!-- Removing equal mentions (same span), except if they have different canonical forms (@uris aren't available at this stage) --> + <find-duplicates class="Action"> + <target>documents.sections.sort:nsval(layer:bbch, start ^ "---" ^ end ^ "---" ^ @canonical-form)</target> + <action>set:feat:keep("yes")</action> + <setFeatures/> + </find-duplicates> + + <remove class="Action"> + <target>documents.sections.layer:bbch[@keep != "yes"]</target> + <action>delete</action> + <deleteElements/> + </remove> + <export href="modules/export.plan"> <!-- focus on the matches made with patterns--> <outLayer>documents.sections.(layer:bbch)</outLayer> @@ -126,22 +212,23 @@ <outFile>ppdo-baseline-bbch.csv</outFile> </export> <align class="RDFProjector"> - <!-- create a correspondance between matches and real concepts from ppdo--> - <source>resources/thesaurus/ppdo/ppdo_20210726.rdf</source> - <subject layer="bbch" feature="canonical-form"/> - <targetLayer>ppdo-baseline</targetLayer> - <!-- information that has to be saved and added to the baseline layer--> - <resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs> - <language>fr</language> - <uriFeature>uri</uriFeature> - <!-- parameters --> - <wordStartCaseInsensitive/> - <allowJoined/> - <constantAnnotationFeatures>type=PATTERN_BBCH - </constantAnnotationFeatures> + <!-- create a correspondance between matches and real concepts from ppdo--> + <source>resources/thesaurus/ppdo/ppdo_20230127.rdf</source> + <subject layer="bbch" feature="canonical-form"/> + <targetLayer>ppdo-combined</targetLayer> + <!-- information that has to be saved and added to the baseline layer--> + <resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs> + <language>fr</language> + <uriFeature>uri</uriFeature> + <!-- parameters --> + <wordStartCaseInsensitive/> + <allowJoined/> + <constantAnnotationFeatures>type=PATTERN_BBCH + </constantAnnotationFeatures> </align> + + </bbch> - <baggiolini> <active>false</active> <patterns href="modules/patterns/baggiolini.plan"/> @@ -153,7 +240,7 @@ </export> <align class="RDFProjector"> <!-- create a correspondance between matches and real concepts from ppdo--> - <source>resources/thesaurus/ppdo/ppdo_20210726.rdf</source> + <source>resources/thesaurus/ppdo/ppdo_20230127.rdf</source> <subject layer="baggiolini" feature="canonical-form"/> <targetLayer>ppdo-baseline</targetLayer> <!-- information that has to be saved and added to the baseline layer--> @@ -175,7 +262,7 @@ </export> <align class="RDFProjector"> <!-- create a correspondance between matches and real concepts from ppdo--> - <source>resources/thesaurus/ppdo/ppdo_20210726.rdf</source> + <source>resources/thesaurus/ppdo/ppdo_20230127.rdf</source> <subject layer="eicchorn-lorenz" feature="canonical-form"/> <targetLayer>ppdo-baseline</targetLayer> <!-- information that has to be saved and added to the baseline layer--> @@ -187,24 +274,37 @@ </constantAnnotationFeatures> </align> </eicchorn-lorenz> - </find-patterns> + </pattern-based> + + <merging-ppdo-bbch> + <merge class="MergeLayers"> + <sourceLayers>ppdo-baseline,ppdo-combined</sourceLayers> + <targetLayer>ppdo-combined</targetLayer> + </merge> + <dedup class="Action"> + <target>documents.sections.sort:nsval(layer:ppdo-combined, start ^ "---" ^ end ^ "---" ^ @uri)</target> + <action>set:feat:keep("yes")</action> + <setFeatures/> + </dedup> + + <remove class="Action"> + <target>documents.sections.layer:ppdo-combined[@keep != "yes"]</target> + <action>delete</action> + <deleteElements/> + </remove> + <export href="modules/export.plan"> + <!-- export both the matches from ppdo and the bbch patterns that have been aligned--> + <outLayer>documents.sections.(layer:ppdo-combined)</outLayer> + <!-- save to a tabular file--> + <outFile>ppdo-baseline-annotations.csv</outFile> + </export> + </merging-ppdo-bbch> + + </phenological-stages> + + - <!-- filter overlapping annotations form baseline layer --> - <filter> - <overlaps class="RemoveOverlaps"> - <layer>ppdo-baseline</layer> - <removeEqual>false</removeEqual> - <removeIncluded>false</removeIncluded> - <removeOverlapping>true</removeOverlapping> - </overlaps> - </filter> - <!-- save ppdo stages to a tabular file --> - <export href="modules/export.plan"> - <outLayer>documents.sections.(layer:ppdo-baseline)</outLayer> - <outFile>ppdo-baseline-annotations.csv</outFile> - </export> - </ppdo> <others> <wine-bioagressors class="TabularProjector"> @@ -233,7 +333,7 @@ <!-- parameters of mapping --> <valueFeatures>$</valueFeatures> <allowJoined/> - <caseInsensitive/> + <allUpperCaseInsensitive>true</allUpperCaseInsensitive> <ignoreDiacritics/> <skipWhitespace/> <!-- place annotations to a layer --> diff --git a/modules/visualize.plan b/modules/visualize.plan new file mode 100644 index 0000000000000000000000000000000000000000..357ea535bbaf4e8ebdeaa731a14ef153e45f6acc --- /dev/null +++ b/modules/visualize.plan @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<alvisnlp-plan id="visualize"> + + <!-- Adding a feature by layer so we can target them with quickhtml --> + <add-crops-feat class="Action"> + <target>documents.sections.layer:fcu-baseline</target> + <action>set:feat:layer("crops")</action> + <setFeatures/> + </add-crops-feat> + <add-ppdo-feat class="Action"> + <target>documents.sections.layer:ppdo-baseline</target> + <action>set:feat:layer("ppdo")</action> + <setFeatures/> + </add-ppdo-feat> + <add-bbch-pattern-feat class="Action"> + <target>documents.sections.layer:bbch</target> + <action>set:feat:layer("bbch-pattern")</action> + <setFeatures/> + </add-bbch-pattern-feat> + + + <html class="QuickHTML"> + <outDir>html</outDir> + <classFeature>layer</classFeature> + <layers>fcu-baseline,ppdo-baseline,bbch</layers> + </html> + +</alvisnlp-plan> \ No newline at end of file