An error occurred while loading the file. Please try again.
-
Pokiros authored4ab22185
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import json
from gzip import GzipFile
import sys
from collections import defaultdict
import time
from config.configuration import Configuration
from wikidata.helpers import *
from wikidata.types_wd import *
from wikidata.entity_wd import *
from wikidata.property_wd import *
from wikidata.reader import Reader
from wikidata.process_wd import *
config=Configuration("config/configuration.json")
class BasicExtraction(Process):
def __init__(self, id, labels_fn,page_rank):
super(BasicExtraction, Process.__init__(self, id))
self.dataframe = {}
self.instance_of_prop = Property("P31", True, EntityID())
self.coord_prop = Property("P625", False, Coordinates())
self.geoname_prop = Property("P1566", False, ExternalIdentifier())
self.osm_prop = Property("P402", False, ExternalIdentifier())
self.loc_prop = Property('P131',True,EntityID())
self.terrain_prop = Property('P706',True,EntityID())
self.continent_prop = Property("P30", False, EntityID())
self.country_prop = Property("P17", False, EntityID())
self.scores = {}
print("Loading the geonames labels...")
f = open(labels_fn, encoding='utf-8')
self.labels_list = json.load(f)
f.close()
f = open(page_rank,encoding = 'utf-8')
self.scores = json.load(f)
f.close()
def processItem(self, entry):
if self.instance_of_prop.exists(entry):
if self.geoname_prop.exists(entry) or self.osm_prop.exists(entry) or self.loc_prop.exists(entry) or self.terrain_prop.exists(entry):
entity = Entity()
# setting labels
setlabels(entity, entry, self.labels_list,config.lang_list)
self.get_alias(entity,entry)
#print(entity)
# setting instances
if self.instance_of_prop.exists(entry):
try:
entity.instance_of = self.instance_of_prop.extractData(entry)
except:
print(entry["id"])
# setting locations
if self.loc_prop.exists(entry):
try:
entity['P131'] = self.loc_prop.extractData(entry)
except:
print(entry["id"])
if self.terrain_prop.exists(entry):
try:
entity['P706'] = self.terrain_prop.extractData(entry)
except:
print(entry["id"])
if self.coord_prop.exists(entry):
entity["coord"] = self.coord_prop.extractData(entry)
if self.osm_prop.exists(entry):
entity["osmID"] = self.osm_prop.extractData(entry)
if self.geoname_prop.exists(entry):
entity["geonameID"] = self.geoname_prop.extractData(entry)
if self.country_prop.exists(entry):
entity["country"] = self.country_prop.extractData(entry)
if self.continent_prop.exists(entry):
entity["continent"] = self.continent_prop.extractData(entry)
if entry['id'] in self.scores.keys():
entity['score'] = self.scores[entry["id"]]
# setting GeoName ID
# self.dataframe.append(entity)
self.dataframe[entry['id']] = entity
def get_alias(self, entity, entry):
alias={}
for lang in config.lang_list:
alias[lang] = []
if lang in entry["aliases"]:
for v in entry["aliases"][lang]:
alias[lang].append(v["values"])
entity["aliases"]=alias