Commit cf18a6ea authored by Decoupes Remy's avatar Decoupes Remy
Browse files

add timexy

parent 31946ee6
No related merge requests found
Showing with 105 additions and 47 deletions
+105 -47
%% Cell type:code id:collectible-oasis tags:
``` python
# Import Libraries
import spacy
from spacy import displacy # Visualize spacy outputs
import requests # for geocoding with photon and OSM data
import timexy
```
%% Cell type:code id:electronic-consensus tags:
%% Cell type:code id:written-gathering tags:
``` python
# download spacy model
import sys
!{sys.executable} -m spacy download xx_ent_wiki_sm
!{sys.executable} -m spacy download en_core_web_sm
```
%% Cell type:code id:hydraulic-spread tags:
``` python
# load Spacy model:
nlp = spacy.load("xx_ent_wiki_sm")
# nlp = spacy.load("xx_ent_wiki_sm"): not working with timexy
nlp = spacy.load("en_core_web_sm")
```
%% Cell type:code id:judicial-shepherd tags:
``` python
tweet_content_example = "The Avian influenza crisis has occured in South of France since 2019"
```
%% Cell type:code id:pleasant-father tags:
``` python
# use spacy to NER
# Configure timexy type
config = {
"kb_id_type": "timex3", # possible values: 'timex3'(default), 'timestamp'
"label": "timexy", # default: 'timexy'
"overwrite": False # default: False
}
# use spacy to NER
nlp.add_pipe("timexy", config=config, before="ner")
doc = nlp(tweet_content_example) # enable NLP pipeline
displacy.render(doc, style="ent", jupyter=True) # visualize ent in the sentence
```
%% Output
%% Cell type:code id:animated-waterproof tags:
%% Cell type:code id:compressed-fitness tags:
``` python
# get all Spatial Named Entites
is_SNE = lambda x: True if x.label_ == "LOC" else False
# get all Spatial and temporal Named Entites
# is_SNE = lambda x: True if x.label_ == "LOC" else False # for xx_ent_wiki_sm
is_SNE = lambda x: True if x.label_ == "GPE" else False
is_TNE = lambda x: True if x.label_ == "DATE" else False
list_of_SNE = []
list_of_TNE = []
for ner in doc.ents:
if is_SNE(ner):
list_of_SNE.append(ner)
list_of_SNE
if is_TNE(ner):
list_of_TNE.append(ner)
print(list_of_SNE)
print(list_of_TNE)
```
%% Output
[South of France]
[France]
[2019]
%% Cell type:code id:hydraulic-commission tags:
%% Cell type:code id:advised-mailman tags:
``` python
# Geocode
url_photon = "https://photon.komoot.io/api/?q="
reponses = requests.get(url_photon + str(list_of_SNE[0]))
reponses.json()['features'][0]
```
%% Output
{'geometry': {'coordinates': [31.5856417, 4.8547574], 'type': 'Point'},
{'geometry': {'coordinates': [1.8883335, 46.603354], 'type': 'Point'},
'type': 'Feature',
'properties': {'osm_id': 3567141206,
'country': 'South Sudan',
'city': 'Juba',
'countrycode': 'SS',
'postcode': 'PRIVATE BAG',
'locality': 'Juba Na Bari',
'county': 'Juba',
'type': 'house',
'osm_type': 'N',
'osm_key': 'office',
'street': 'Kololo Road',
'osm_value': 'diplomatic',
'name': 'Embassy of France',
'state': 'Central Equatoria'}}
'properties': {'osm_id': 2202162,
'osm_type': 'R',
'extent': [-178.3873749, 51.3055721, 172.3057152, -50.2187169],
'country': 'France',
'osm_key': 'place',
'countrycode': 'FR',
'osm_value': 'country',
'name': 'France',
'type': 'country'}}
%% Cell type:code id:actual-table tags:
``` python
# get date nomalization
tne = list_of_TNE[0]
print(f"{tne.text}\t{tne.label_}\t{tne.kb_id_}")
```
%% Output
2019 DATE
%% Cell type:code id:hispanic-lesson tags:
``` python
doc = nlp("Today is the 10.10.2010. I was in Paris for six years.")
for e in doc.ents:
print(f"{e.text}\t{e.label_}\t{e.kb_id_}")
```
%% Output
Today DATE
10.10.2010 timexy TIMEX3 type="DATE" value="2010-10-10T00:00:00"
Paris GPE
six years timexy TIMEX3 type="DURATION" value="P6Y"
%% Cell type:code id:registered-european tags:
%% Cell type:code id:cosmetic-basis tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment