From d9c13cf6a69b6cdc1e674eabc6d0da00e700ba3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phan=20Bernard?= <stephan.bernard@inrae.fr> Date: Thu, 6 Oct 2022 14:21:06 +0200 Subject: [PATCH] Corrections sur le plan BBCH. --- modules/export.plan | 5 ++-- modules/patterns/bbch.plan | 51 ++++++++++++++++++++++++-------------- modules/preprocess.plan | 8 +++--- modules/projectorsSB.plan | 6 ++--- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/modules/export.plan b/modules/export.plan index aed9542..c2a5047 100644 --- a/modules/export.plan +++ b/modules/export.plan @@ -37,9 +37,10 @@ @skos-prefLabel; @canonical-form; <!-- context before --> - str:normalizeSpace(str:sub(section.contents, m:max(0, start - 20), start)); + <!-- str:normalizeSpace(str:sub(section.contents, m:max(0, start - 20), start)); --> + str:seds(str:normalizeSpace(str:sub(section.contents, m:max(0, start - 20), start)), "\"", "'"); <!-- context after --> - str:normalizeSpace(str:sub(section.contents, end, m:min(end + 20, str:len(section.contents)))); + str:seds(str:normalizeSpace(str:sub(section.contents, end, m:min(end + 20, str:len(section.contents)))), "\"", "'"); <!-- location --> start ^ "-" ^ end; <!-- number of words --> diff --git a/modules/patterns/bbch.plan b/modules/patterns/bbch.plan index 5d4014d..895204b 100644 --- a/modules/patterns/bbch.plan +++ b/modules/patterns/bbch.plan @@ -55,7 +55,7 @@ <pattern> [ @form =^ "BBCH" ] [ @form == "-"]? - [ @form =~ "[0-9]{2}" and @form =^ ")" ] + [ @form =~ "[0-9]{2}" and @form =^ ")" ] </pattern> <actions> <createAnnotation layer="bbch"/> @@ -92,7 +92,7 @@ [ @form ^= "BBCH" ] [ @form == ":" ] [ @form ^= "Stade" ] - (number2 : [ @form =~ "[0-9]{2}" ]) + (number2 : [ @form =~ "\\d{2}$" ]) </pattern> <actions> <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number2)'/> @@ -108,19 +108,34 @@ <pattern> [ @form == "BBCH" ] [ @form == "-"]? - (number3 : [ @form =~ "[0-9]{2}" ]) + (number3 : [ @form =~ "\\d{2}$" ]) [@form == "à " or @form == "-"] - (number4 : [ @form =~ "[0-9]{2}" ]) + (number4 : [ @form =~ "\\d{2}$" ]) </pattern> <actions> - <createAnnotation layer="bbch" features='number=group:number3,canonical-form=("BBCH " ^ group:number3)'/> - <createAnnotation layer="bbch" features='number=group:number4,canonical-form=("BBCH " ^ group:number4)'/> + <createAnnotation layer="bbch" features='number=(group:number3 =~ "..$"),canonical-form=("BBCH " ^ (group:number3 =~ "..$"))'/> + <createAnnotation layer="bbch" features='number=(group:number4 =~ "..$"),canonical-form=("BBCH " ^ (group:number4 =~ "..$"))'/> </actions> <constantAnnotationFeatures>type=BBCH_RANGE</constantAnnotationFeatures> </BBCH_RANGE> + <!-- BBCH 13-17 --> + <!-- Sans espace contenu dans "13-17" le pattern est différent de BBCH_RANGE --> + <BBCH_RANGE_GLUED class="PatternMatcher"> + <pattern> + [ @form == "BBCH" ] + [ @form == "-"]? + (number3bis : [ @form =~ "\\d{2}-\\d{2}$" ]) + </pattern> + <actions> + <createAnnotation layer="bbch" features='number=(group:number3bis =~ "^.."),canonical-form=("BBCH " ^ (group:number3bis =~ "^.."))'/> + <createAnnotation layer="bbch" features='number=(group:number3bis =~ "..$"),canonical-form=("BBCH " ^ (group:number3bis =~ "..$"))'/> + </actions> + <constantAnnotationFeatures>type=BBCH_RANGE</constantAnnotationFeatures> + </BBCH_RANGE_GLUED> + <!-- BBCH-69 et 73 --> <!-- BBCH 13, 18 ou 32 --> @@ -128,18 +143,18 @@ <pattern> [ @form =^ "BBCH" ] [ @form == "-"]? - (number5 : [ @form =~ "[0-9]{2}" ]) + (number5 : [ @form =~ "\\d{2}$" ]) [@lemma == "et" or @lemma == "ou" or @lemma == ","] - (number6 : [ @form =~ "[0-9]{2}" ]) + (number6 : [ @form =~ "\\d{2}$" ]) [@lemma == "et" or @lemma == "ou" or @lemma == ","]? - (number7 : [ @form =~ "[0-9]{2}" ])? + (number7 : [ @form =~ "\\d{2}$" ])? </pattern> <actions> - <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number5)'/> - <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number6)'/> - <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number7)'/> + <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ (group:number5 =~ "..$"))'/> + <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ (group:number6 =~ "..$"))'/> + <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ (group:number7 =~ "..$"))'/> </actions> <constantAnnotationFeatures>type=BBCH_DD_KON_DD</constantAnnotationFeatures> </BBCH_DD_KON_DD> @@ -156,14 +171,14 @@ [ @form == "à " ] [ @form == "100" ] [ @form == ")" ] - (number8 : [ @form =~ "[0-9]{2}" ]) - (number9 : [ @form =~ "[0-9]{2}" ])? - (number10 : [ @form =~ "[0-9]{2}" ])? + (number8 : [ @form =~ "[0-9]{2}$" ]) + (number9 : [ @form =~ "[0-9]{2}$" ])? + (number10 : [ @form =~ "[0-9]{2}$" ])? </pattern> <actions> - <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number8)'/> - <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number9)'/> - <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ group:number10)'/> + <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ (group:number8 =~ "..$"))'/> + <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ (group:number9 =~ "..$"))'/> + <createAnnotation layer="bbch" features='canonical-form=("BBCH " ^ (group:number10 =~ "..$"))'/> </actions> <constantAnnotationFeatures>type=BBCH_TABLE</constantAnnotationFeatures> </BBCH_TABLE> diff --git a/modules/preprocess.plan b/modules/preprocess.plan index 6f9af0f..459ff22 100644 --- a/modules/preprocess.plan +++ b/modules/preprocess.plan @@ -1,10 +1,8 @@ <?xml version="1.0" encoding="UTF-8"?> <alvisnlp-plan id="preprocess"> - <!-- segment text to phrases and tokenize --> - <import>resources/segmentation/segmentation.plan</import> - + <import href ="resources/segmentation/segmentation.plan" /> <!-- set correct pos tags for some nouns --> <correct-plants class="Action"> @@ -24,8 +22,8 @@ <tt class="TreeTagger"> <!-- global configs for treetagger --> <!-- Change to your own local paths --> - <treeTaggerExecutable>/home/stef/opt/share/alvisnlp/treetagger/bin/tree-tagger</treeTaggerExecutable> - <parFile>/home/stef/opt/share/alvisnlp/treetagger/lib/french.par</parFile> + <treeTaggerExecutable>/home/stef/opt/share/alvisnlp.externalTools/treetagger/bin/tree-tagger</treeTaggerExecutable> + <parFile>/home/stef/opt/share/alvisnlp.externalTools/treetagger/lib/french.par</parFile> <!-- parameters --> <noUnknownLemma/> <inputCharset>UTF-8</inputCharset> diff --git a/modules/projectorsSB.plan b/modules/projectorsSB.plan index 9a7d559..527d862 100644 --- a/modules/projectorsSB.plan +++ b/modules/projectorsSB.plan @@ -154,7 +154,7 @@ <!-- create a correspondance between matches and real concepts from ppdo--> <source>resources/thesaurus/ppdo/ppdo_20210726.rdf</source> <subject layer="baggiolini" feature="canonical-form"/> - <targetLayerName>ppdo-baseline</targetLayerName> + <targetLayerName>ppdo-baseline-DISABLE</targetLayerName> <!-- information that has to be saved and added to the baseline layer--> <resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs> <language>fr</language> @@ -175,7 +175,7 @@ <!-- create a correspondance between matches and real concepts from ppdo--> <source>resources/thesaurus/ppdo/ppdo_20210726.rdf</source> <subject layer="eicchorn-lorenz" feature="canonical-form"/> - <targetLayerName>ppdo-baseline</targetLayerName> + <targetLayerName>ppdo-baseline-DISABLE</targetLayerName> <!-- information that has to be saved and added to the baseline layer--> <resourceTypeURIs>owl:NamedIndividual</resourceTypeURIs> <language>fr</language> @@ -191,7 +191,7 @@ <filter> <overlaps class="RemoveOverlaps"> <layerName>ppdo-baseline</layerName> - <removeEqual/> + <removeEqual>false</removeEqual> <removeIncluded>false</removeIncluded> <removeOverlapping>true</removeOverlapping> </overlaps> -- GitLab