From dc43c50f3f52c45a91c8e665fd79f591dc9fe1ea Mon Sep 17 00:00:00 2001
From: "raffaele.gaetano" <raffaele.gaetano@cirad.fr>
Date: Wed, 31 May 2023 22:30:07 +0200
Subject: [PATCH] ENH: storing training base in OBC instance.

---
 Learning/ObjectBased.py | 46 ++++++++++++++++++++++++++++++++---------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/Learning/ObjectBased.py b/Learning/ObjectBased.py
index 3000c29..737b013 100644
--- a/Learning/ObjectBased.py
+++ b/Learning/ObjectBased.py
@@ -13,25 +13,51 @@ class ObjectBasedClassifier:
         for ras in user_feature_list:
             self.obia_base.add_raster_for_stats(ras)
         self.obia_base.populate_ref_db()
-
-    def train_RF(self, n_estimators, n_folds):
         L, X, Y, G, p2, p98 = self.obia_base.get_reference_db_as_training_base()
-        sgk = StratifiedGroupKFold(n_splits=n_folds, shuffle=True)
-        folds_indices = []
+        self.training_base = {
+            'obj_id': L,
+            'X': X,
+            'Y': Y,
+            'groups': G,
+            'perc2': p2,
+            'perc98': p98
+        }
+        return
+
+    def gen_k_folds(self, k):
+        self.training_base['folds'] = []
+        sgk = StratifiedGroupKFold(n_splits=k, shuffle=True)
+        for tr_i, ts_i in sgk.split(self.training_base['X'],
+                                    self.training_base['Y'],
+                                    self.training_base['groups']):
+            self.training_base['folds'].append((tr_i, ts_i))
+        return
+
+    def train_RF(self, n_estimators):
+        assert('folds' in self.training_base.keys())
         models = []
         results = []
-        for tr_i, ts_i in sgk.split(X, Y, G):
-            folds_indices.append((tr_i, ts_i))
+        for tr_i, ts_i in self.training_base['folds']:
             models.append(RandomForestClassifier(n_estimators=n_estimators))
-            models[-1].fit(X[tr_i], Y[tr_i])
-            l, c = L[ts_i], models[-1].predict(X[ts_i])
+            models[-1].fit(self.training_base['X'][tr_i], self.training_base['Y'][tr_i])
+            l, c = self.training_base['obj_id'][ts_i], models[-1].predict(self.training_base['X'][ts_i])
             y_true, y_pred = self.obia_base.true_pred_bypixel(l, c)
             results.append(
                 {
                     'conf_matrix': confusion_matrix(y_true, y_pred),
                     'accuracy': accuracy_score(y_true, y_pred),
                     'kappa' : cohen_kappa_score(y_true, y_pred),
-                    'p_r_f1': precision_recall_fscore_support(y_true, y_pred)
+                    'p_r_f1': precision_recall_fscore_support(y_true, y_pred, zero_division=0)
                 }
             )
-        return folds_indices, models, results
+            '''
+            summary = {
+                'accuracy_mean': np.mean([x['accuracy'] for x in results]),
+                'accuracy_std': np.std([x['accuracy'] for x in results]),
+                'kappa_mean': np.mean([x['kappa'] for x in results]),
+                'kappa_std': np.std([x['kappa'] for x in results]),
+                'f1_mean': np.mean([x['p_r_f1'][2] for x in results], axis=0),
+                'f1_std': np.std([x['p_r_f1'][2] for x in results], axis=0)
+            }
+            '''
+        return models, results
-- 
GitLab