An error occurred while loading the file. Please try again.
-
Pokiros authored6140bbb5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# coding=utf-8
import json
from gzip import GzipFile
import sys
from collections import defaultdict
import time
from wikidata.helpers import *
from wikidata.types_wd import *
from wikidata.entity_wd import *
from wikidata.property_wd import *
from wikidata.reader import Reader
from wikidata.process_wd import *
class PropertyExtract(Process):
def __init__(self, id, properties, data):
super(PropertyExtract, Process.__init__(self, id))
self.dataframe = {}
#self.extract_prop = Property(prop, istype, String())
self.properties_to_extract = properties['properties_to_extract']
self.isType = {
"EntityID":EntityID(),
"String":String(),
"Coordinates":Coordinates(),
"URL":URL(),
"Time":Time(),
"Quantity":Quantity(),
"ExternalIdentifier":ExternalIdentifier()
}
print("Extracting property...")
f = open(data, encoding='utf-8')
self.dataframe = json.load(f)
f.close()
def processItem(self, entry):
if entry['id'] in self.dataframe.keys():
for prop in self.properties_to_extract:
temp_prop =Property(prop['id'],prop["isMultiple"],self.isType[prop['type']])
if temp_prop.exists(entry):
try:
self.dataframe[entry['id']][prop['id']] = temp_prop.extractData(entry)
except:
print(entry["id"])