Newer
Older
from rasterio.windows import Window
from rasterio.features import shapes
import geopandas as gpd
import pandas as pd
from enum import Enum
from Common.otb_numpy_proc import to_otb_pipeline
from skimage.measure import regionprops, label
import otbApplication as otb
from tqdm import tqdm
from sklearn.impute import SimpleImputer
class OStats(Enum):
MEAN = 'mean'
STD = 'std'
MIN = 'min'
MAX = 'max'
def intensity_std(region, intensities):
# note the ddof arg to get the sample var if you so desire!
return np.std(intensities[region])
ref_data=None, ref_id_field='id', ref_class_field='class',
nominal_tile_size=5000, background_label=0):
with rio.open(self.object_layer, 'r') as src:
self.crs = src.crs
self.transform = src.transform
self.background_label = background_label
if type(nominal_tile_size) is tuple:
self.nominal_tile_size = nominal_tile_size
elif type(nominal_tile_size) is int:
self.nominal_tile_size = (nominal_tile_size, nominal_tile_size)
else:
sys.exit('Invalid nominal tile size')
self.tiles, self.obj_to_tile = self.generate_tile_info(self.nominal_tile_size, self.background_label)
# Init database
if ref_data is not None:
assert(ref_id_field is not None and ref_class_field is not None)
self.init_ref_db(ref_data, ref_id_field, ref_class_field)
# Computation rasters
self.raster_files = []
self.raster_stats = []
self.raster_var_names = []
self.raster_groups = []
self.output_maps = []
def init_ref_db(self, vector_file, id_field, class_field):
if isinstance(class_field, str):
class_field = [class_field]
ras_id = otb.Registry.CreateApplication('Rasterization')
ras_id.SetParameterString('in', vector_file)
ras_id.SetParameterString('im', self.object_layer)
ras_id.SetParameterString('mode', 'attribute')
ras_id.SetParameterString('mode.attribute.field', id_field)
ras_id.Execute()
ras_cl = []
for cf in class_field:
ras_cl.append(otb.Registry.CreateApplication('Rasterization'))
ras_cl[-1].SetParameterString('in', vector_file)
ras_cl[-1].SetParameterString('im', self.object_layer)
ras_cl[-1].SetParameterString('mode', 'attribute')
ras_cl[-1].SetParameterString('mode.attribute.field', cf)
ras_cl[-1].Execute()
in_seg = to_otb_pipeline(self.object_layer)
intensity_img = otb.Registry.CreateApplication('ConcatenateImages')
intensity_img.AddImageToParameterInputImageList('il', in_seg.GetParameterOutputImage('out'))
intensity_img.AddImageToParameterInputImageList('il', ras_id.GetParameterOutputImage('out'))
[intensity_img.AddImageToParameterInputImageList('il', rcl.GetParameterOutputImage('out')) for rcl in ras_cl]
intensity_img.Execute()
ref_ol = otb.Registry.CreateApplication('BandMath')
ref_ol.AddImageToParameterInputImageList('il', in_seg.GetParameterOutputImage('out'))
ref_ol.AddImageToParameterInputImageList('il', ras_id.GetParameterOutputImage('out'))
ref_ol.SetParameterString('exp', 'im1b1 * (im2b1 > 0)')
columns=['area', 'orig_label', 'polygon_id'] + class_field,
for tn, t in tqdm(self.tiles.items(), desc='Init. Ref. DB', total=len(self.tiles)):
r['index'][0], r['index'][1] = t[0], t[1]
r['size'][0], r['size'][1] = t[2], t[3]
ref_ol.PropagateRequestedRegion('out', r)
intensity_img.PropagateRequestedRegion('out', r)
tile_ref_ol = ref_ol.GetImageAsNumpyArray('out').astype(np.int32)
tile_int_img = intensity_img.GetVectorImageAsNumpyArray('out').astype(int)
rp = regionprops(tile_ref_ol, intensity_image=tile_int_img)
self.ref_db = pd.concat([
self.ref_db,
pd.DataFrame(
data=[np.insert(o.intensity_min, 0, o.area) for o in rp if self.obj_to_tile[o.label] == tn],
index=[o.label for o in rp if self.obj_to_tile[o.label] == tn]
def generate_tile_info(self, nominal_tile_size, background_label):
in_seg = to_otb_pipeline(self.object_layer)
self.W, self.H = in_seg.GetImageSize('out')
r = otb.itkRegion()
obj_bbox = {}
rw, rh = range(0, self.W, nominal_tile_size[0]), range(0, self.H, nominal_tile_size[1])
with tqdm(total=len(rw)*len(rh), desc='Gen. tile info') as pb:
for i in rw:
for j in rh:
r['index'][0], r['index'][1] = i, j
r['size'][0], r['size'][1] = min(nominal_tile_size[0], self.W-i), min(nominal_tile_size[1], self.H-j)
in_seg.PropagateRequestedRegion('out', r)
tile = in_seg.GetImageAsNumpyArray('out')
rp = regionprops(tile.astype(np.uint32))
for o in rp:
bbox = np.array([o.bbox[0]+j, o.bbox[1]+i, o.bbox[2]+j, o.bbox[3]+i])
if o.label in obj_bbox.keys():
obj_bbox[o.label].append(bbox)
else:
obj_bbox[o.label] = [bbox]
pb.update(1)
for o in obj_bbox.keys():
obj_bbox[o] = np.array(obj_bbox[o])
mn, mx = np.min(obj_bbox[o], axis=0), np.max(obj_bbox[o], axis=0)
obj_bbox[o] = [mn[0], mn[1], mx[2], mx[3]]
tx, ty = int(self.W / nominal_tile_size[0]) + 1, int(self.H / nominal_tile_size[1]) + 1
obj_to_tile = {}
tiles = dict.fromkeys(range(tx * ty))
for i in range(tx * ty):
tiles[i] = [np.inf, np.inf, 0, 0]
for o,bb in obj_bbox.items():
if o != background_label:
ix, iy = int(bb[1] / nominal_tile_size[0]), int(bb[0] / nominal_tile_size[1])
tiles[idx][0] = int(min(bb[1], tiles[idx][0]))
tiles[idx][1] = int(min(bb[0], tiles[idx][1]))
tiles[idx][2] = int(max(bb[3], tiles[idx][2]))
tiles[idx][3] = int(max(bb[2], tiles[idx][3]))
tiles = {k: [v[0],v[1],v[2]-v[0],v[3]-v[1]] for k,v in tiles.items() if not np.isinf(v[0])}
return tiles, obj_to_tile
def compute_stats_on_tile(self, tile_num, input_image, stats=[OStats.MEAN], on_ref=False):
r = otb.itkRegion()
r['index'][0], r['index'][1] = self.tiles[tile_num][0], self.tiles[tile_num][1]
r['size'][0], r['size'][1] = self.tiles[tile_num][2], self.tiles[tile_num][3]
if not on_ref:
obj = to_otb_pipeline(self.object_layer)
obj.PropagateRequestedRegion('out', r)
clip_obj = obj.ExportImage('out')
tile_obj = np.squeeze(clip_obj['array']).astype(np.uint32)
else:
tmp_er = otb.Registry.CreateApplication('ExtractROI')
tmp_er.SetParameterInputImage('in', self.ref_obj_layer_pipe[-1].GetParameterOutputImage('out'))
tmp_er.SetParameterInt('startx', r['index'][0])
tmp_er.SetParameterInt('starty', r['index'][1])
tmp_er.SetParameterInt('sizex', r['size'][0])
tmp_er.SetParameterInt('sizey', r['size'][1])
tmp_er.Execute()
tile_obj = tmp_er.GetImageAsNumpyArray('out').astype(np.uint32)
si = otb.Registry.CreateApplication('Superimpose')
si.SetParameterString('inm', input_image)
si.SetParameterString('inr', self.object_layer)
si.SetParameterString('interpolator', 'nn')
si.Execute()
si.PropagateRequestedRegion('out', r)
clip_img = si.ExportImage('out')
tile_img = np.squeeze(clip_img['array'])
if OStats.STD in stats:
op = regionprops(tile_obj, intensity_image=tile_img, extra_properties=[intensity_std])
else:
op = regionprops(tile_obj, intensity_image=tile_img)
out_stats = {}
for o in op:
if (not on_ref and self.obj_to_tile[o.label] == tile_num) or \
(on_ref and self.obj_to_tile[self.ref_db["orig_label"][o.label]] == tile_num):
out_stats[o.label] = []
if OStats.MEAN in stats:
out_stats[o.label].append(o.intensity_mean)
if OStats.STD in stats:
out_stats[o.label].append(o.intensity_std)
if OStats.MIN in stats:
out_stats[o.label].append(o.intensity_min)
if OStats.MAX in stats:
out_stats[o.label].append(o.intensity_max)
out_stats[o.label] = np.array(out_stats[o.label]).flatten(order='F')
if len(out_stats) > 0:
return out_stats.keys(), np.vstack(list(out_stats.values()))
else:
return None, None
def add_raster_for_stats(self, raster_file, raster_name=None, stats=[OStats.MEAN], as_group=True):
self.raster_files.append(raster_file)
self.raster_stats.append(stats)
if raster_name is None:
raster_name = os.path.splitext(os.path.basename(raster_file))[0]
with rio.open(raster_file) as ds:
n_bands = ds.count
to_append = [raster_name + '_{}_{}'.format(i+1,j.value) for i in range(n_bands) for j in stats]
if as_group:
self.raster_groups.extend(range(self.n_vars, self.n_vars+len(to_append)))
self.raster_var_names.append(to_append)
return
def add_raster_time_series_for_stats(self, raster_list, ts_name=None, stats=[OStats.MEAN]):
#Check
n_bands = []
for r in raster_list:
with rio.open(r) as ds:
n_bands.append(ds.count)
assert all(x == n_bands[0] for x in n_bands)
n_bands = n_bands[0]
for i,r in enumerate(raster_list):
if ts_name is not None:
raster_name = '{}_D{}'.format(ts_name,i)
self.add_raster_for_stats(r, stats=stats, raster_name=ts_name, as_group=False)
for i in range(n_bands):
for s in range(len(stats)):
self.raster_groups.append(list(range(len(stats)*i+s+pos,
len(stats)*i+s+pos+len(stats)*n_bands*len(raster_list),
len(stats)*n_bands)))
def get_full_stats_on_tile(self, tilenum):
out_db = pd.DataFrame(index=[o for o,t in self.obj_to_tile.items() if t == tilenum])
for rf,rs,rv in zip(self.raster_files, self.raster_stats, self.raster_var_names):
k,v = self.compute_stats_on_tile(tilenum, rf, rs)
if k is not None:
out_db.loc[k,rv] = v
return out_db
def populate_ref_db(self):
assert(self.ref_db is not None)
for t in tqdm(self.tiles.keys(), desc="Zonal stats on DB", total=len(self.tiles)):
for rf, rs, rv in zip(self.raster_files, self.raster_stats, self.raster_var_names):
k, v = self.compute_stats_on_tile(t, rf, rs, on_ref=True)
if k is not None:
self.ref_db.loc[k,rv] = v
def export_reference_db(self, out_vector): #TO BE CHANGED
with rio.open(self.object_layer) as ds:
feats = [{'properties': {'sample_id': v}, 'geometry': s} for s, v in
shapes(self.ref_obj_layer.astype(np.int32), mask=self.ref_obj_layer > 0, transform=ds.transform)]
crs = ds.crs.to_epsg()
vds = gpd.GeoDataFrame.from_features(feats)
vds = vds.set_crs(epsg=crs)
vds = vds.join(self.ref_db, on="sample_id")
vds.to_file(out_vector)
return
def get_vars(self):
return [item for sublist in self.raster_var_names for item in sublist]
def get_reference_db_as_training_base(self, class_field='class'):
if isinstance(class_field, str):
class_field = [class_field]
assert(self.ref_db is not None and len(self.raster_var_names)>0)
vars = self.get_vars()
out = {}
out['vars'] = vars
out['obj_id'] = self.ref_db['orig_label'].to_numpy(dtype=int)
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
out['X'] = imp.fit_transform(self.ref_db[vars].to_numpy())
out['perc2'] = np.zeros(out['X'].shape[1])
out['perc98'] = np.zeros(out['X'].shape[1])
m,M = np.percentile(tmp, [2, 98])
if isinstance(g, list):
for x in g:
out['perc2'][x] = m
out['perc98'][x] = M
out['perc2'][g] = m
out['perc98'][g] = M
out['X'][:,g] = (tmp - m)/(M - m)
for cf in class_field:
out[cf] = self.ref_db[cf].to_numpy(dtype=int)
out['groups'] = self.ref_db['polygon_id'].to_numpy(dtype=int)
return out
def tiled_data(self, normalize=None):
vars = [item for sublist in self.raster_var_names for item in sublist]
for tilenum in self.tiles.keys():
tile_data = self.get_full_stats_on_tile(tilenum)
L = tile_data.index.to_numpy(dtype=np.int32)
# Probably sub-optimal and error-prone, uses mean value from tile only
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
X = imp.fit_transform(tile_data[vars].to_numpy())
if normalize is not None:
X = (X - normalize[0]) / (normalize[1] - normalize[0])
def create_new_map(self):
self.output_maps.append(np.zeros((self.H, self.W), dtype=np.int))
return
def populate_map(self, tilenum, obj_id, classes, output_file=None, compress='NONE'):
r = otb.itkRegion()
r['index'][0], r['index'][1], r['size'][0], r['size'][1] = self.tiles[tilenum]
obj = to_otb_pipeline(self.object_layer)
obj.PropagateRequestedRegion('out', r)
clip_obj = obj.ExportImage('out')
tile_obj = np.squeeze(clip_obj['array']).astype(np.uint32)
tmp = np.zeros(np.max(tile_obj) + 1, dtype=int)
tmp[obj_id] = classes
if output_file is None:
self.output_map[-1][self.tiles[tilenum][1]:self.tiles[tilenum][1]+self.tiles[tilenum][3],
self.tiles[tilenum][0]:self.tiles[tilenum][0]+self.tiles[tilenum][2]] += tmp[tile_obj]
else:
if not os.path.exists(os.path.dirname(output_file)):
os.makedirs(os.path.dirname(output_file))
if tilenum == 0 and os.path.exists(output_file):
os.remove(output_file)
mode = 'w+' if not os.path.exists(output_file) else 'r+'
with rio.open(output_file, mode, driver='GTiff', width=self.W, height=self.H, crs=self.crs,
transform=self.transform, count=1, dtype=np.uint16) as dst:
# issue with compress option?
#transform=self.transform, count=1, dtype=np.uint16, compress=compress) as dst:
win = Window(self.tiles[tilenum][0], self.tiles[tilenum][1],
self.tiles[tilenum][2], self.tiles[tilenum][3])
x = dst.read(1, window=win)
dst.write(x + tmp[tile_obj], window=win, indexes=1)
def export_raster_map(self, output_file, compress=False):
assert (self.output_map is not None)
if not os.path.exists(os.path.dirname(output_file)):
os.makedirs(os.path.dirname(output_file))
obj = to_otb_pipeline(self.object_layer)
arr = obj.ExportImage('out')
arr['array'] = self.output_map.astype(np.float32)
obj.ImportImage('in', arr)
if compress:
output_file = output_file + "?gdal:co:compress=deflate"
obj.SetParameterString('out', output_file)
obj.SetParameterOutputImagePixelType('out', otb.ImagePixelType_uint16)
obj.ExecuteAndWriteOutput()
def populate_export_raster_map(self, obj_id, classes, output_file, compress='NONE'):
for tn in tqdm(self.tiles.keys(), desc="Writing output map"):
self.populate_map(tn,obj_id,classes,output_file,compress)
def true_pred_bypixel(self, labels, predicted_classes, class_field='class'):
pred_c = np.zeros(int(np.max(self.ref_db['orig_label']))+1)
pred_c[labels] = predicted_classes
support = []
for tn, t in self.tiled_objects(on_ref=True):
support.append(t[np.isin(t, labels)])
support = np.concatenate(support)
pred = pred_c[support]
true_c = np.zeros(int(np.max(self.ref_db['orig_label']))+1)
# ATTENTION: works if "labels" is sorted (as provided by get_reference_...)
true_c[labels] = self.ref_db.loc[self.ref_db['orig_label'].isin(labels),class_field].to_numpy(dtype=int)
return true[pred>0], pred[pred>0]
def tiled_objects(self, on_ref=False):
assert(self.tiles is not None)
idx = -1 if on_ref else 0
r = otb.itkRegion()
for tn, t in self.tiles.items():
r['index'][0], r['index'][1] = t[0], t[1]
r['size'][0], r['size'][1] = t[2], t[3]
self.ref_obj_layer_pipe[idx].PropagateRequestedRegion('out', r)
arr = self.ref_obj_layer_pipe[idx].GetImageAsNumpyArray('out').astype(np.uint32)
yield tn, arr