Commit a649440d authored by Gaetano Raffaele's avatar Gaetano Raffaele
Browse files

ENH: Feature importances in report.

parent 33af9ebd
No related merge requests found
Showing with 78 additions and 39 deletions
+78 -39
......@@ -66,8 +66,6 @@ class ObjectBasedClassifier:
models[-1].fit(self.training_base['X'][tr_i], self.training_base[class_field][tr_i])
l, c = self.training_base['obj_id'][ts_i], models[-1].predict(self.training_base['X'][ts_i])
y_true, y_pred = self.obia_base.true_pred_bypixel(l, c, class_field)
if return_true_vs_pred:
yt_yp.append((y_true, y_pred))
results.append(
{
'conf_matrix': confusion_matrix(y_true, y_pred),
......@@ -77,6 +75,8 @@ class ObjectBasedClassifier:
'importances' : models[-1].feature_importances_
}
)
if return_true_vs_pred:
results[-1]['true_vs_pred'] = (y_true, y_pred)
all_imp = np.vstack([x['importances'] for x in results])
summary = {
......@@ -93,10 +93,7 @@ class ObjectBasedClassifier:
'importance_mean': {k:v for k, v in zip(self.obia_base.get_vars(), np.mean(all_imp, axis=0))},
'importance_std': {k:v for k, v in zip(self.obia_base.get_vars(), np.std(all_imp, axis=0))}
}
if return_true_vs_pred:
return models, summary, results, yt_yp
else:
return models, summary, results
return models, summary, results
def classify(self, model, output_file=None, compress='NONE'):
prg = tqdm(desc='Classification', total=len(self.obia_base.tiles))
......@@ -119,11 +116,11 @@ class ObjectBasedClassifier:
return
#TEST CODE
def run_test():
obc = ObjectBasedClassifier('/DATA/Moringa_Sample/Parakou/output/segmentation/segmentation.tif',
'/DATA/Moringa_Sample/Parakou/input/REF/ref_l2.shp',
['/DATA/Moringa_Sample/Parakou/output/S2_processed/T31PDL/*/*FEAT.tif'],
['/DATA/Moringa_Sample/Parakou/input/THR/THR_SPOT6.tif'],
def run_test(sample_folder):
obc = ObjectBasedClassifier('{}/output/segmentation/segmentation.tif'.format(sample_folder),
'{}/input/REF/ref_l2.shp'.format(sample_folder),
['{}/output/S2_processed/T31PDL/*/*FEAT.tif'.format(sample_folder)],
['{}/input/THR/THR_SPOT6.tif'.format(sample_folder)],
ref_class_field=['class', 'Class_L1a'])
'''
obc = ObjectBasedClassifier('/DATA/Benin/OBSYDYA_data/MORINGA/SEGMENTATION/segmentation.tif',
......@@ -131,11 +128,24 @@ def run_test():
['/DATA/Benin/OBSYDYA_data/MORINGA/basefolder/FEAT/S2_THEIA_FEAT/S2_THEIA_MOSAIC_*.tif'],
glob.glob('/DATA/Benin/OBSYDYA_data/MORINGA/ext_features'))
'''
obc.gen_k_folds(5, class_field='Class_L1a')
#obc.gen_hold_out(0.2, class_field='Class_L1a')
#m,yt_yp = obc.train_RF(100, return_true_vs_pred=True)
m1, s1, r1 = obc.train_RF(100, class_field='class')
m2, s2, r2 = obc.train_RF(100, class_field='Class_L1a')
obc.classify(m1, '/DATA/Moringa_Sample/Parakou/output/classification/firstmap_l1.tif')
obc.classify(m2, '/DATA/Moringa_Sample/Parakou/output/classification/firstmap_l2.tif')
return m1,s1,r1,m2,s2,r2
obc.gen_k_folds(5, class_field='class')
m, s, r = obc.train_RF(100, return_true_vs_pred=True)
obc.classify(m, '{}/output/classification/firstmap_l1.tif'.format(sample_folder))
d = {'model':m, 'results':r, 'summary':s}
import pickle
with open('{}/output/test_out.pkl'.format(sample_folder), 'wb') as f:
pickle.dump(d, f)
from Postprocessing import Report
of = Report.generate_report_figures(
'{}/output/classification/firstmap_l1.tif'.format(sample_folder),
'{}/input/txt/palette_L0a.clr'.format(sample_folder), d['results'], d['summary'],
'{}/output/reports'.format(sample_folder), 'Testou')
with open('{}/output/test_out_figs.pkl'.format(sample_folder), 'wb') as f:
pickle.dump(of, f)
Report.generate_pdf(of, '{}/output/reports/firstmap_l1_report.pdf'.format(sample_folder),
'Testou')
return m, s, r
......@@ -30,22 +30,22 @@ def parse_colormap_file(fn):
return labels, class_names, colors
def generate_report_figures(map, yt_yp, palette_fn, out_dir, map_name=None):
def generate_report_figures(map, palette_fn, results, summary, out_dir, map_name=None):
labels, class_names, colors = parse_colormap_file(palette_fn)
colors_norm = [(x[0]/255,x[1]/255,x[2]/255,x[3]/255) for x in colors]
with plt.ioff():
font = {'family': 'Arial',
'weight': 'normal',
font = {'weight': 'normal',
'size': 8}
plt.rc('font', **font)
if not os.path.exists(out_dir):
os.makedirs(out_dir)
if not isinstance(yt_yp, list):
yt_yp = [yt_yp]
if not isinstance(results, list):
results = [results]
of = {}
of['conf_matrices'] = []
for i,r in enumerate(yt_yp):
cm = ConfusionMatrixDisplay.from_predictions(r[0], r[1], normalize='true', include_values=True)
for i,r in enumerate(results):
cm = ConfusionMatrixDisplay.from_predictions(r['true_vs_pred'][0], r['true_vs_pred'][1],
normalize='true', include_values=True)
of['conf_matrices'].append('{}/conf_matrix_{}.png'.format(out_dir, str(i).zfill(2)))
cm.ax_.set_xticklabels(class_names, rotation=45, ha='right')
cm.ax_.set_yticklabels(class_names)
......@@ -55,36 +55,65 @@ def generate_report_figures(map, yt_yp, palette_fn, out_dir, map_name=None):
cm.figure_.savefig(of['conf_matrices'][-1], dpi=300)
of['cl_rep'] = []
summary = []
for r in yt_yp:
of['cl_rep'].append(classification_report(r[0], r[1], output_dict=True, target_names=class_names))
for r in results:
of['cl_rep'].append(classification_report(r['true_vs_pred'][0], r['true_vs_pred'][1],
output_dict=True, target_names=class_names))
fsc = [np.array([x[c]['f1-score'] for x in of['cl_rep']]) for c in class_names]
fsc_m = [np.mean(x) for x in fsc]
fsc_s = [np.std(x) for x in fsc]
fig, ax = plt.subplots()
ax.bar(range(len(class_names)), fsc_m, yerr=fsc_s, align="center", ecolor='black', capsize=10, color=colors_norm)
ax.bar(range(len(class_names)), fsc_m, yerr=fsc_s, align="center", width=0.3,
ecolor='black', capsize=10, color=colors_norm)
ax.set_xticks(range(len(class_names)))
ax.set_xticklabels(class_names, rotation=45, ha='right')
ax.set_title('Per-class F1-scores', fontsize=12, fontweight='bold', pad=10)
ax.yaxis.grid(True)
plt.tight_layout()
of['summary'] = '{}/f1scores.png'.format(out_dir)
fig.set_figwidth(4)
plt.tight_layout()
plt.savefig(of['summary'], dpi=300)
imp_m = list(summary['importance_mean'].values())
imp_s = list(summary['importance_std'].values())
imp_n = list(summary['importance_mean'].keys())
imp_n = [x for _, x in sorted(zip(imp_m, imp_n), reverse=True)]
imp_s = [x for _, x in sorted(zip(imp_m, imp_s), reverse=True)]
imp_m = sorted(imp_m, reverse=True)
c_imp = np.cumsum(imp_m)
idx = np.where(c_imp<0.75*c_imp[-1])[0][-1]
imp_m = imp_m[:idx]
imp_s = imp_s[:idx]
imp_n = imp_n[:idx]
fig, ax = plt.subplots()
ax.barh(range(len(imp_n)), imp_m, xerr=imp_s, align='center')
ax.set_yticks(range(len(imp_n)))
ax.set_yticklabels(imp_n, fontsize=6)
ax.invert_yaxis()
ax.set_title('Feature Importances')
ax.set_xlabel('Mean Decrease in Impurity')
ax.xaxis.grid(True)
of['importances'] = '{}/importances.png'.format(out_dir)
fig.set_figwidth(5)
plt.tight_layout()
plt.savefig(of['importances'], dpi=300)
if map_name is None:
map_name = 'output'
of['quicklook'] = create_map_quicklook_and_legend(map, labels, colors, class_names, yt_yp, out_dir, map_name)
of['quicklook'] = create_map_quicklook_and_legend(map, labels, colors, class_names, results, out_dir, map_name)
return of
def create_map_quicklook_and_legend(map, labels, colors, class_names, yt_yp, out_dir, name='', qkl_height=1024):
def create_map_quicklook_and_legend(map, labels, colors, class_names, results, out_dir, name='', qkl_height=1024):
if not os.path.exists(out_dir):
os.makedirs(out_dir)
oa,k = [],[]
for yt,yp in yt_yp:
oa.append(accuracy_score(yt, yp))
k.append(cohen_kappa_score(yt, yp))
if not isinstance(results, list):
results = [results]
for r in results:
oa.append(r['accuracy'])
k.append(r['kappa'])
oam, oas = np.mean(np.array(oa)), np.std(np.array(oa))
km, ks = np.mean(np.array(k)), np.std(np.array(k))
with rasterio.open(map) as ds:
......@@ -98,8 +127,7 @@ def create_map_quicklook_and_legend(map, labels, colors, class_names, yt_yp, out
cmap[labels] = np.array([np.array(list(c)) for c in colors])
img = cmap[smap].astype(int)
font = {'family': 'Arial',
'weight': 'normal',
font = {'weight': 'normal',
'size': 6}
plt.rc('font', **font)
fig, ax = plt.subplots()
......@@ -132,7 +160,8 @@ def generate_pdf(of, out_pdf, name='output'):
txt = 'Moringa Final Report for Chain {}, {}'.format(name, datetime.now().strftime('%Y-%m-%d %Hh%M'))
pdf.cell(0, txt=txt, align='C')
pdf.image(of['quicklook'], 14, 24, h=140)
pdf.image(of['summary'], 22, 170, h=120)
pdf.image(of['summary'], 8, 170, w=86)
pdf.image(of['importances'], 100, 170, w=100)
# Pages 2-end, Per-fold assessment
for i,(cm,rep) in enumerate(zip(of['conf_matrices'], of['cl_rep'])):
pdf.add_page()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment