``` python
# Import Libraries
import spacy
from spacy import displacy # Visualize spacy outputs
import requests # for geocoding with photon and OSM data
import timexy
``` python
# download spacy model
import sys
!{sys.executable} -m spacy download xx_ent_wiki_sm
!{sys.executable} -m spacy download en_core_web_sm
``` python
# load Spacy model:
nlp = spacy.load("xx_ent_wiki_sm")
# nlp = spacy.load("xx_ent_wiki_sm"): not working with timexy
nlp = spacy.load("en_core_web_sm")
``` python
tweet_content_example = "The Avian influenza crisis has occured in South of France since 2019"
``` python
# use spacy to NER
# Configure timexy type
config = {
"kb_id_type": "timex3", # possible values: 'timex3'(default), 'timestamp'
"label": "timexy", # default: 'timexy'
"overwrite": False # default: False
# use spacy to NER
nlp.add_pipe("timexy", config=config, before="ner")
doc = nlp(tweet_content_example) # enable NLP pipeline
displacy.render(doc, style="ent", jupyter=True) # visualize ent in the sentence
``` python
# get all Spatial Named Entites
is_SNE = lambda x: True if x.label_ == "LOC" else False
# get all Spatial and temporal Named Entites
# is_SNE = lambda x: True if x.label_ == "LOC" else False # for xx_ent_wiki_sm
is_SNE = lambda x: True if x.label_ == "GPE" else False
is_TNE = lambda x: True if x.label_ == "DATE" else False
list_of_SNE = []
list_of_TNE = []
for ner in doc.ents:
if is_SNE(ner):
if is_TNE(ner):
%% Output
[South of France]
``` python
# Geocode
url_photon = ""
reponses = requests.get(url_photon + str(list_of_SNE[0]))
%% Output
{'geometry': {'coordinates': [31.5856417, 4.8547574], 'type': 'Point'},
{'geometry': {'coordinates': [1.8883335, 46.603354], 'type': 'Point'},
'type': 'Feature',
'properties': {'osm_id': 3567141206,
'country': 'South Sudan',
'city': 'Juba',
'countrycode': 'SS',
'postcode': 'PRIVATE BAG',
'locality': 'Juba Na Bari',
'county': 'Juba',
'type': 'house',
'osm_type': 'N',
'osm_key': 'office',
'street': 'Kololo Road',
'osm_value': 'diplomatic',
'name': 'Embassy of France',
'state': 'Central Equatoria'}}
'properties': {'osm_id': 2202162,
'osm_type': 'R',
'extent': [-178.3873749, 51.3055721, 172.3057152, -50.2187169],
'country': 'France',
'osm_key': 'place',
'countrycode': 'FR',
'osm_value': 'country',
'name': 'France',
'type': 'country'}}
``` python
# get date nomalization
tne = list_of_TNE[0]
%% Output
2019 DATE
``` python
doc = nlp("Today is the 10.10.2010. I was in Paris for six years.")
for e in doc.ents:
%% Output
Today DATE
10.10.2010 timexy TIMEX3 type="DATE" value="2010-10-10T00:00:00"
Paris GPE
six years timexy TIMEX3 type="DURATION" value="P6Y"
``` python
