diff --git a/Modules/Applications/AppClassification/app/otbKMeansClassification.cxx b/Modules/Applications/AppClassification/app/otbKMeansClassification.cxx index ee04d9cd0fb2ff3035ca2208a8cd64c735cd93d8..52e0d6b426d7defb6343113029c17abe6dcacd15 100644 --- a/Modules/Applications/AppClassification/app/otbKMeansClassification.cxx +++ b/Modules/Applications/AppClassification/app/otbKMeansClassification.cxx @@ -291,15 +291,12 @@ protected: itkExceptionMacro(<< "File : " << modelFileName << " couldn't be opened"); } - // get the line with the centroids (starts with "2 ") + // get the end line with the centroids std::string line, centroidLine; while(std::getline(infile,line)) { - if (line.size() > 2 && line[0] == '2' && line[1] == ' ') - { + if (!line.empty()) centroidLine = line; - break; - } } std::vector<std::string> centroidElm; diff --git a/Modules/Learning/DimensionalityReductionLearning/include/otbAutoencoderModel.h b/Modules/Learning/DimensionalityReductionLearning/include/otbAutoencoderModel.h index 5fe7ec2f27c9f91f21a2ce2417502f5d4beac54d..7bf3231a9b8d372473fd5ed0f598d3f2fd7c7c06 100644 --- a/Modules/Learning/DimensionalityReductionLearning/include/otbAutoencoderModel.h +++ b/Modules/Learning/DimensionalityReductionLearning/include/otbAutoencoderModel.h @@ -33,9 +33,8 @@ #endif #include "otb_shark.h" #include <shark/Algorithms/StoppingCriteria/AbstractStoppingCriterion.h> -#include <shark/Models/LinearModel.h> -#include <shark/Models/ConcatenatedModel.h> -#include <shark/Models/NeuronLayers.h> +#include <shark/Models/FFNet.h> +#include <shark/Models/Autoencoder.h> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif @@ -77,9 +76,9 @@ public: typedef typename Superclass::ConfidenceListSampleType ConfidenceListSampleType; /// Neural network related typedefs - typedef shark::ConcatenatedModel<shark::RealVector> ModelType; - typedef shark::LinearModel<shark::RealVector,NeuronType> LayerType; - typedef shark::LinearModel<shark::RealVector, shark::LinearNeuron> OutLayerType; + typedef shark::Autoencoder<NeuronType,shark::LinearNeuron> OutAutoencoderType; + typedef shark::Autoencoder<NeuronType,NeuronType> AutoencoderType; + typedef shark::FFNet<NeuronType,shark::LinearNeuron> NetworkType; itkNewMacro(Self); itkTypeMacro(AutoencoderModel, DimensionalityReductionModel); @@ -128,16 +127,18 @@ public: void Train() override; - template <class T> + template <class T, class Autoencoder> void TrainOneLayer( shark::AbstractStoppingCriterion<T> & criterion, + Autoencoder &, unsigned int, shark::Data<shark::RealVector> &, std::ostream&); - template <class T> + template <class T, class Autoencoder> void TrainOneSparseLayer( shark::AbstractStoppingCriterion<T> & criterion, + Autoencoder &, unsigned int, shark::Data<shark::RealVector> &, std::ostream&); @@ -165,9 +166,7 @@ protected: private: /** Internal Network */ - ModelType m_Encoder; - std::vector<LayerType> m_InLayers; - OutLayerType m_OutLayer; + NetworkType m_Net; itk::Array<unsigned int> m_NumberOfHiddenNeurons; /** Training parameters */ unsigned int m_NumberOfIterations; // stop the training after a fixed number of iterations diff --git a/Modules/Learning/DimensionalityReductionLearning/include/otbAutoencoderModel.txx b/Modules/Learning/DimensionalityReductionLearning/include/otbAutoencoderModel.txx index e5a26e9ee3dc8cbf4918222f4b7b45bc93e925cb..33f1c28e247c43f80ac28a1d608b1c15967c6a5e 100644 --- a/Modules/Learning/DimensionalityReductionLearning/include/otbAutoencoderModel.txx +++ b/Modules/Learning/DimensionalityReductionLearning/include/otbAutoencoderModel.txx @@ -34,17 +34,18 @@ #include "otbSharkUtils.h" //include train function #include <shark/ObjectiveFunctions/ErrorFunction.h> -//~ #include <shark/ObjectiveFunctions/SparseAutoencoderError.h>//the error function performing the regularisation of the hidden neurons +#include <shark/ObjectiveFunctions/SparseAutoencoderError.h>//the error function performing the regularisation of the hidden neurons #include <shark/Algorithms/GradientDescent/Rprop.h>// the RProp optimization algorithm #include <shark/ObjectiveFunctions/Loss/SquaredLoss.h> // squared loss used for regression #include <shark/ObjectiveFunctions/Regularizer.h> //L2 regulariziation -//~ #include <shark/Models/ImpulseNoiseModel.h> //noise source to corrupt the inputs +#include <shark/Models/ImpulseNoiseModel.h> //noise source to corrupt the inputs +#include <shark/Models/ConcatenatedModel.h>//to concatenate the noise with the model #include <shark/Algorithms/StoppingCriteria/MaxIterations.h> //A simple stopping criterion that stops after a fixed number of iterations #include <shark/Algorithms/StoppingCriteria/TrainingProgress.h> //Stops when the algorithm seems to converge, Tracks the progress of the training error over a period of time -#include <shark/Algorithms/GradientDescent/Adam.h> +#include <shark/Algorithms/GradientDescent/SteepestDescent.h> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif @@ -82,56 +83,96 @@ AutoencoderModel<TInputValue,NeuronType> } // Initialization of the feed forward neural network - m_Encoder = ModelType(); - m_InLayers.clear(); - size_t previousShape = shark::dataDimension(inputSamples); + std::vector<size_t> layers; + layers.push_back(shark::dataDimension(inputSamples)); for (unsigned int i = 0 ; i < m_NumberOfHiddenNeurons.Size(); ++i) { - m_InLayers.push_back( LayerType(previousShape, m_NumberOfHiddenNeurons[i]) ); - previousShape = m_NumberOfHiddenNeurons[i]; - m_Encoder.add(&(m_InLayers.back()), true); + layers.push_back(m_NumberOfHiddenNeurons[i]); } + for (unsigned int i = std::max(0,static_cast<int>(m_NumberOfHiddenNeurons.Size()-1)) ; i > 0; --i) { - m_InLayers.push_back( LayerType(previousShape, m_NumberOfHiddenNeurons[i-1]) ); - previousShape = m_NumberOfHiddenNeurons[i-1]; + layers.push_back(m_NumberOfHiddenNeurons[i-1]); } - m_OutLayer = OutLayerType(previousShape, shark::dataDimension(inputSamples)); - // Training of the autoencoders pairwise, starting from the first and last layers - for (unsigned int i = 0 ; i < m_NumberOfHiddenNeurons.Size(); ++i) + layers.push_back(shark::dataDimension(inputSamples)); + m_Net.setStructure(layers); + shark::initRandomNormal(m_Net,0.1); + + // Training of the first Autoencoder (first and last layer of the FF network) + if (m_Epsilon > 0) + { + shark::TrainingProgress<> criterion(5,m_Epsilon); + + OutAutoencoderType net; + // Shark doesn't allow to train a layer using a sparsity term AND a noisy input. + if (m_Noise[0] != 0) + { + TrainOneLayer(criterion, net, 0, inputSamples, ofs); + } + else + { + TrainOneSparseLayer(criterion, net, 0, inputSamples, ofs); + } + criterion.reset(); + } + else { - if (m_Epsilon > 0) + shark::MaxIterations<> criterion(m_NumberOfIterations); + + OutAutoencoderType net; + // Shark doesn't allow to train a layer using a sparsity term AND a noisy input. + if (m_Noise[0] != 0) { - shark::TrainingProgress<> criterion(5,m_Epsilon); + TrainOneLayer(criterion, net, 0, inputSamples, ofs); + otbMsgDevMacro(<< "m_Noise " << m_Noise[0]); + } + else + { + TrainOneSparseLayer(criterion, net, 0, inputSamples, ofs); + } + criterion.reset(); + } + + // Training of the other autoencoders + if (m_Epsilon > 0) + { + shark::TrainingProgress<> criterion(5,m_Epsilon); + + for (unsigned int i = 1 ; i < m_NumberOfHiddenNeurons.Size(); ++i) + { + AutoencoderType net; // Shark doesn't allow to train a layer using a sparsity term AND a noisy input. if (m_Noise[i] != 0) { - TrainOneLayer(criterion, i, inputSamples, ofs); + TrainOneLayer(criterion, net, i, inputSamples, ofs); } else { - TrainOneSparseLayer(criterion, i, inputSamples, ofs); + TrainOneSparseLayer(criterion, net, i, inputSamples, ofs); } criterion.reset(); } - else + } + else + { + shark::MaxIterations<> criterion(m_NumberOfIterations); + + for (unsigned int i = 1 ; i < m_NumberOfHiddenNeurons.Size(); ++i) { - shark::MaxIterations<> criterion(m_NumberOfIterations); + AutoencoderType net; // Shark doesn't allow to train a layer using a sparsity term AND a noisy input. if (m_Noise[i] != 0) { - TrainOneLayer(criterion, i, inputSamples, ofs); + TrainOneLayer(criterion, net, i, inputSamples, ofs); otbMsgDevMacro(<< "m_Noise " << m_Noise[0]); } else { - TrainOneSparseLayer( criterion, i, inputSamples, ofs); + TrainOneSparseLayer( criterion, net, i, inputSamples, ofs); } criterion.reset(); } - // encode the samples with the last encoder trained - inputSamples = m_InLayers[i](inputSamples); } if (m_NumberOfIterationsFineTuning > 0) { @@ -142,37 +183,31 @@ AutoencoderModel<TInputValue,NeuronType> } template <class TInputValue, class NeuronType> -template <class T> +template <class T, class Autoencoder> void AutoencoderModel<TInputValue,NeuronType> ::TrainOneLayer( shark::AbstractStoppingCriterion<T> & criterion, + Autoencoder & net, unsigned int layer_index, shark::Data<shark::RealVector> &samples, std::ostream& File) { - typedef shark::AbstractModel<shark::RealVector,shark::RealVector> BaseModelType; - ModelType net; - net.add(&(m_InLayers[layer_index]), true); - net.add( (layer_index ? - (BaseModelType*) &(m_InLayers[m_NumberOfHiddenNeurons.Size()*2 - 1 - layer_index]) : - (BaseModelType*) &m_OutLayer) , true); - otbMsgDevMacro(<< "Noise " << m_Noise[layer_index]); std::size_t inputs = dataDimension(samples); + net.setStructure(inputs, m_NumberOfHiddenNeurons[layer_index]); initRandomUniform(net,-m_InitFactor*std::sqrt(1.0/inputs),m_InitFactor*std::sqrt(1.0/inputs)); - //~ shark::ImpulseNoiseModel noise(inputs,m_Noise[layer_index],1.0); //set an input pixel with probability m_Noise to 0 - //~ shark::ConcatenatedModel<shark::RealVector,shark::RealVector> model = noise>> net; + shark::ImpulseNoiseModel noise(inputs,m_Noise[layer_index],1.0); //set an input pixel with probability m_Noise to 0 + shark::ConcatenatedModel<shark::RealVector,shark::RealVector> model = noise>> net; shark::LabeledData<shark::RealVector,shark::RealVector> trainSet(samples,samples);//labels identical to inputs shark::SquaredLoss<shark::RealVector> loss; - //~ shark::ErrorFunction error(trainSet, &model, &loss); - shark::ErrorFunction<> error(trainSet, &net, &loss); + shark::ErrorFunction error(trainSet, &model, &loss); - shark::TwoNormRegularizer<> regularizer(error.numberOfVariables()); + shark::TwoNormRegularizer regularizer(error.numberOfVariables()); error.setRegularizer(m_Regularization[layer_index],®ularizer); - shark::Adam<> optimizer; + shark::IRpropPlusFull optimizer; error.init(); optimizer.init(error); @@ -195,37 +230,35 @@ AutoencoderModel<TInputValue,NeuronType> } while( !criterion.stop( optimizer.solution() ) ); net.setParameterVector(optimizer.solution().point); + m_Net.setLayer(layer_index,net.encoderMatrix(),net.hiddenBias()); // Copy the encoder in the FF neural network + m_Net.setLayer( m_NumberOfHiddenNeurons.Size()*2 - 1 - layer_index,net.decoderMatrix(),net.outputBias()); // Copy the decoder in the FF neural network + samples = net.encode(samples); } template <class TInputValue, class NeuronType> -template <class T> +template <class T, class Autoencoder> void AutoencoderModel<TInputValue,NeuronType>::TrainOneSparseLayer( shark::AbstractStoppingCriterion<T> & criterion, + Autoencoder & net, unsigned int layer_index, shark::Data<shark::RealVector> &samples, std::ostream& File) { - typedef shark::AbstractModel<shark::RealVector,shark::RealVector> BaseModelType; - ModelType net; - net.add(&(m_InLayers[layer_index]), true); - net.add( (layer_index ? - (BaseModelType*) &(m_InLayers[m_NumberOfHiddenNeurons.Size()*2 - 1 - layer_index]) : - (BaseModelType*) &m_OutLayer) , true); - + //AutoencoderType net; std::size_t inputs = dataDimension(samples); + net.setStructure(inputs, m_NumberOfHiddenNeurons[layer_index]); + shark::initRandomUniform(net,-m_InitFactor*std::sqrt(1.0/inputs),m_InitFactor*std::sqrt(1.0/inputs)); // Idea : set the initials value for the output weights higher than the input weights shark::LabeledData<shark::RealVector,shark::RealVector> trainSet(samples,samples);//labels identical to inputs shark::SquaredLoss<shark::RealVector> loss; - //~ shark::SparseAutoencoderError error(trainSet,&net, &loss, m_Rho[layer_index], m_Beta[layer_index]); - // SparseAutoencoderError doesn't exist anymore, for now use a plain ErrorFunction - shark::ErrorFunction<> error(trainSet, &net, &loss); - - shark::TwoNormRegularizer<> regularizer(error.numberOfVariables()); + shark::SparseAutoencoderError error(trainSet,&net, &loss, m_Rho[layer_index], m_Beta[layer_index]); + + shark::TwoNormRegularizer regularizer(error.numberOfVariables()); error.setRegularizer(m_Regularization[layer_index],®ularizer); - shark::Adam<> optimizer; + shark::IRpropPlusFull optimizer; error.init(); optimizer.init(error); @@ -246,6 +279,9 @@ void AutoencoderModel<TInputValue,NeuronType>::TrainOneSparseLayer( File << "end layer" << std::endl; } net.setParameterVector(optimizer.solution().point); + m_Net.setLayer(layer_index,net.encoderMatrix(),net.hiddenBias()); // Copy the encoder in the FF neural network + m_Net.setLayer( m_NumberOfHiddenNeurons.Size()*2 - 1 - layer_index,net.decoderMatrix(),net.outputBias()); // Copy the decoder in the FF neural network + samples = net.encode(samples); } template <class TInputValue, class NeuronType> @@ -257,23 +293,15 @@ AutoencoderModel<TInputValue,NeuronType> shark::Data<shark::RealVector> &samples, std::ostream& File) { - // create full network - ModelType net; - for (auto &layer : m_InLayers) - { - net.add(&layer, true); - } - net.add(&m_OutLayer, true); - //labels identical to inputs shark::LabeledData<shark::RealVector,shark::RealVector> trainSet(samples,samples); shark::SquaredLoss<shark::RealVector> loss; - shark::ErrorFunction<> error(trainSet, &net, &loss); - shark::TwoNormRegularizer<> regularizer(error.numberOfVariables()); + shark::ErrorFunction error(trainSet, &m_Net, &loss); + shark::TwoNormRegularizer regularizer(error.numberOfVariables()); error.setRegularizer(m_Regularization[0],®ularizer); - shark::Adam<> optimizer; + shark::IRpropPlusFull optimizer; error.init(); optimizer.init(error); otbMsgDevMacro(<<"Error before training : " << optimizer.solution().value); @@ -298,6 +326,7 @@ AutoencoderModel<TInputValue,NeuronType> try { this->Load(filename); + m_Net.name(); } catch(...) { @@ -321,15 +350,22 @@ AutoencoderModel<TInputValue,NeuronType> { otbMsgDevMacro(<< "saving model ..."); std::ofstream ofs(filename); - ofs << "Autoencoder" << std::endl; // the first line of the model file contains a key - ofs << (m_InLayers.size() + 1) << std::endl; // second line is the number of encoders/decoders + ofs << m_Net.name() << std::endl; // the first line of the model file contains a key shark::TextOutArchive oa(ofs); - for (const auto &layer : m_InLayers) + oa << m_Net; + ofs.close(); + + if (this->m_WriteWeights == true) // output the map vectors in a txt file { - oa << layer; + std::ofstream otxt(filename+".txt"); + for (unsigned int i = 0 ; i < m_Net.layerMatrices().size(); ++i) + { + otxt << "layer " << i << std::endl; + otxt << m_Net.layerMatrix(i) << std::endl; + otxt << m_Net.bias(i) << std::endl; + otxt << std::endl; + } } - oa << m_OutLayer; - ofs.close(); } template <class TInputValue, class NeuronType> @@ -337,39 +373,23 @@ void AutoencoderModel<TInputValue,NeuronType> ::Load(const std::string & filename, const std::string & /*name*/) { + NetworkType net; std::ifstream ifs(filename); - char buffer[256]; - // check first line - ifs.getline(buffer,256); - std::string bufferStr(buffer); - if (bufferStr != "Autoencoder"){ + char autoencoder[256]; + ifs.getline(autoencoder,256); + std::string autoencoderstr(autoencoder); + + if (autoencoderstr != net.name()){ itkExceptionMacro(<< "Error opening " << filename.c_str() ); } - // check second line - ifs.getline(buffer,256); - int nbLevels = boost::lexical_cast<int>(buffer); - if (nbLevels < 2 || nbLevels%2 == 1) - { - itkExceptionMacro(<< "Unexpected number of levels : "<<buffer ); - } - m_InLayers.clear(); - m_Encoder = ModelType(); shark::TextInArchive ia(ifs); - for (int i=0 ; (i+1) < nbLevels ; i++) - { - LayerType layer; - ia >> layer; - m_InLayers.push_back(layer); - } - ia >> m_OutLayer; + ia >> m_Net; ifs.close(); - for (int i=0 ; i < nbLevels/2 ; i++) - { - m_Encoder.add(&(m_InLayers[i]) ,true); - } - - this->SetDimension( m_Encoder.outputShape()[0] ); + // This gives us the dimension if we keep the encoder and decoder + size_t feature_layer_index = m_Net.layerMatrices().size()/2; + // number of neurons in the feature layer (second dimension of the first decoder weight matrix) + this->SetDimension(m_Net.layerMatrix(feature_layer_index).size2()); } template <class TInputValue, class NeuronType> @@ -389,7 +409,7 @@ AutoencoderModel<TInputValue,NeuronType> shark::Data<shark::RealVector> data = shark::createDataFromRange(features); // features layer for a network containing the encoder and decoder part - data = m_Encoder(data); + data = m_Net.evalLayer( m_Net.layerMatrices().size()/2-1 ,data); TargetSampleType target; target.SetSize(this->m_Dimension); @@ -415,7 +435,7 @@ AutoencoderModel<TInputValue,NeuronType> shark::Data<shark::RealVector> data = shark::createDataFromRange(features); TargetSampleType target; // features layer for a network containing the encoder and decoder part - data = m_Encoder(data); + data = m_Net.evalLayer( m_Net.layerMatrices().size()/2-1 ,data); unsigned int id = startIndex; target.SetSize(this->m_Dimension); diff --git a/Modules/Learning/DimensionalityReductionLearning/include/otbPCAModel.txx b/Modules/Learning/DimensionalityReductionLearning/include/otbPCAModel.txx index a387852fecc386d9c5f2a6c27c7bf39cd7a3649d..9f39326a21bc5f1980a49d80ecdaea55b42a450a 100644 --- a/Modules/Learning/DimensionalityReductionLearning/include/otbPCAModel.txx +++ b/Modules/Learning/DimensionalityReductionLearning/include/otbPCAModel.txx @@ -137,11 +137,11 @@ PCAModel<TInputValue>::Load(const std::string & filename, const std::string & /* ifs.close(); if (this->m_Dimension ==0) { - this->m_Dimension = m_Encoder.outputShape()[0]; + this->m_Dimension = m_Encoder.outputSize(); } auto eigenvectors = m_Encoder.matrix(); - eigenvectors.resize(this->m_Dimension,m_Encoder.inputShape()[0]); + eigenvectors.resize(this->m_Dimension,m_Encoder.inputSize()); m_Encoder.setStructure(eigenvectors, m_Encoder.offset() ); } diff --git a/Modules/Learning/LearningBase/otb-module.cmake b/Modules/Learning/LearningBase/otb-module.cmake index c0af985032de6d4a2acd11988be1b9a177cf8219..afa2a339a1813cf16e5f6ea3700f079a36180dcd 100644 --- a/Modules/Learning/LearningBase/otb-module.cmake +++ b/Modules/Learning/LearningBase/otb-module.cmake @@ -28,11 +28,7 @@ otb_module(OTBLearningBase OTBImageBase OTBITK - OPTIONAL_DEPENDS - OTBShark - - TEST_DEPENDS - OTBBoost + TEST_DEPENDS OTBTestKernel OTBImageIO diff --git a/Modules/Learning/LearningBase/test/CMakeLists.txt b/Modules/Learning/LearningBase/test/CMakeLists.txt index 48e28cc5cad320ffa41eee0659ff6979d0bf4457..d1d16c3e65801e606c6e6903538b65264a4483a6 100644 --- a/Modules/Learning/LearningBase/test/CMakeLists.txt +++ b/Modules/Learning/LearningBase/test/CMakeLists.txt @@ -32,10 +32,6 @@ otbKMeansImageClassificationFilterNew.cxx otbMachineLearningModelTemplates.cxx ) -if(OTB_USE_SHARK) - set(OTBLearningBaseTests ${OTBLearningBaseTests} otbSharkUtilsTests.cxx) -endif() - add_executable(otbLearningBaseTestDriver ${OTBLearningBaseTests}) target_link_libraries(otbLearningBaseTestDriver ${OTBLearningBase-Test_LIBRARIES}) otb_module_target_label(otbLearningBaseTestDriver) @@ -72,7 +68,3 @@ otb_add_test(NAME leTuDecisionTreeNew COMMAND otbLearningBaseTestDriver otb_add_test(NAME leTuKMeansImageClassificationFilterNew COMMAND otbLearningBaseTestDriver otbKMeansImageClassificationFilterNew) -if(OTB_USE_SHARK) - otb_add_test(NAME leTuSharkNormalizeLabels COMMAND otbLearningBaseTestDriver - otbSharkNormalizeLabels) -endif() diff --git a/Modules/Learning/LearningBase/test/otbLearningBaseTestDriver.cxx b/Modules/Learning/LearningBase/test/otbLearningBaseTestDriver.cxx index dc2d36b7943129ec6519ebbc4f194d1dd6078800..5b38bf300dd4520c18e198b6e6643848cbdc937c 100644 --- a/Modules/Learning/LearningBase/test/otbLearningBaseTestDriver.cxx +++ b/Modules/Learning/LearningBase/test/otbLearningBaseTestDriver.cxx @@ -29,7 +29,4 @@ void RegisterTests() REGISTER_TEST(otbSEMClassifierNew); REGISTER_TEST(otbDecisionTreeNew); REGISTER_TEST(otbKMeansImageClassificationFilterNew); -#ifdef OTB_USE_SHARK - REGISTER_TEST(otbSharkNormalizeLabels); -#endif } diff --git a/Modules/Learning/LearningBase/test/otbSharkUtilsTests.cxx b/Modules/Learning/LearningBase/test/otbSharkUtilsTests.cxx deleted file mode 100644 index bc3783cb728b0f5ad0f6b2d43620b18ba7939e30..0000000000000000000000000000000000000000 --- a/Modules/Learning/LearningBase/test/otbSharkUtilsTests.cxx +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2005-2017 Centre National d'Etudes Spatiales (CNES) - * - * This file is part of Orfeo Toolbox - * - * https://www.orfeo-toolbox.org/ - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "itkMacro.h" -#include "otbSharkUtils.h" - - -int otbSharkNormalizeLabels(int itkNotUsed(argc), char* itkNotUsed(argv) []) -{ - std::vector<unsigned int> inLabels = {2, 2, 3, 20, 1}; - std::vector<unsigned int> expectedDictionary = {2, 3, 20, 1}; - std::vector<unsigned int> expectedLabels = {0, 0, 1, 2, 3}; - - auto newLabels = inLabels; - std::vector<unsigned int> labelDict; - otb::Shark::NormalizeLabelsAndGetDictionary(newLabels, labelDict); - - if(newLabels != expectedLabels) - { - std::cout << "Wrong new labels\n"; - for(size_t i = 0; i<newLabels.size(); ++i) - std::cout << "Got " << newLabels[i] << " expected " << expectedLabels[i] << '\n'; - - return EXIT_FAILURE; - } - - if(labelDict != expectedDictionary) - { - std::cout << "Wrong dictionary\n"; - for(size_t i = 0; i<labelDict.size(); ++i) - std::cout << "Got " << labelDict[i] << " expected " << expectedDictionary[i] << '\n'; - - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} diff --git a/Modules/Learning/Supervised/include/otbSharkRandomForestsMachineLearningModel.h b/Modules/Learning/Supervised/include/otbSharkRandomForestsMachineLearningModel.h index 41015ee9dc7f5f6bb6c3d1defbfad5ccb1c0c47b..f2d4f003776b82a1ba2ae3897c2b54c17596a492 100644 --- a/Modules/Learning/Supervised/include/otbSharkRandomForestsMachineLearningModel.h +++ b/Modules/Learning/Supervised/include/otbSharkRandomForestsMachineLearningModel.h @@ -36,7 +36,6 @@ #pragma GCC diagnostic ignored "-Wheader-guard" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #endif -#include <shark/Models/Classifier.h> #include "otb_shark.h" #include "shark/Algorithms/Trainers/RFTrainer.h" #if defined(__GNUC__) || defined(__clang__) @@ -137,10 +136,6 @@ public: /** If true, margin confidence value will be computed */ itkSetMacro(ComputeMargin, bool); - /** If true, class labels will be normalised in [0 ... nbClasses] */ - itkGetMacro(NormalizeClassLabels, bool); - itkSetMacro(NormalizeClassLabels, bool); - protected: /** Constructor */ SharkRandomForestsMachineLearningModel(); @@ -161,10 +156,8 @@ private: SharkRandomForestsMachineLearningModel(const Self &); //purposely not implemented void operator =(const Self&); //purposely not implemented - shark::RFClassifier<unsigned int> m_RFModel; - shark::RFTrainer<unsigned int> m_RFTrainer; - std::vector<unsigned int> m_ClassDictionary; - bool m_NormalizeClassLabels; + shark::RFClassifier m_RFModel; + shark::RFTrainer m_RFTrainer; unsigned int m_NumberOfTrees; unsigned int m_MTry; diff --git a/Modules/Learning/Supervised/include/otbSharkRandomForestsMachineLearningModel.txx b/Modules/Learning/Supervised/include/otbSharkRandomForestsMachineLearningModel.txx index 72c816069bebddc048a0f8af48f24579a55fa38b..207f1abdd77e4b5cfffd9bc5d104c4b40232f853 100644 --- a/Modules/Learning/Supervised/include/otbSharkRandomForestsMachineLearningModel.txx +++ b/Modules/Learning/Supervised/include/otbSharkRandomForestsMachineLearningModel.txx @@ -32,6 +32,7 @@ #pragma GCC diagnostic ignored "-Woverloaded-virtual" #pragma GCC diagnostic ignored "-Wignored-qualifiers" #endif +#include <shark/Models/Converter.h> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif @@ -51,7 +52,6 @@ SharkRandomForestsMachineLearningModel<TInputValue,TOutputValue> this->m_ConfidenceIndex = true; this->m_IsRegressionSupported = false; this->m_IsDoPredictBatchMultiThreaded = true; - this->m_NormalizeClassLabels = true; } @@ -76,17 +76,13 @@ SharkRandomForestsMachineLearningModel<TInputValue,TOutputValue> Shark::ListSampleToSharkVector(this->GetInputListSample(), features); Shark::ListSampleToSharkVector(this->GetTargetListSample(), class_labels); - if(m_NormalizeClassLabels) - { - Shark::NormalizeLabelsAndGetDictionary(class_labels, m_ClassDictionary); - } shark::ClassificationDataset TrainSamples = shark::createLabeledDataFromRange(features,class_labels); //Set parameters m_RFTrainer.setMTry(m_MTry); m_RFTrainer.setNTrees(m_NumberOfTrees); m_RFTrainer.setNodeSize(m_NodeSize); - // m_RFTrainer.setOOBratio(m_OobRatio); + m_RFTrainer.setOOBratio(m_OobRatio); m_RFTrainer.train(m_RFModel, TrainSamples); } @@ -129,20 +125,15 @@ SharkRandomForestsMachineLearningModel<TInputValue,TOutputValue> } if (quality != ITK_NULLPTR) { - shark::RealVector probas = m_RFModel.decisionFunction()(samples); + shark::RealVector probas = m_RFModel(samples); (*quality) = ComputeConfidence(probas, m_ComputeMargin); } - unsigned int res{0}; - m_RFModel.eval(samples, res); + shark::ArgMaxConverter<shark::RFClassifier> amc; + amc.decisionFunction() = m_RFModel; + unsigned int res; + amc.eval(samples, res); TargetSampleType target; - if(m_NormalizeClassLabels) - { - target[0] = m_ClassDictionary[static_cast<TOutputValue>(res)]; - } - else - { - target[0] = static_cast<TOutputValue>(res); - } + target[0] = static_cast<TOutputValue>(res); return target; } @@ -166,13 +157,13 @@ SharkRandomForestsMachineLearningModel<TInputValue,TOutputValue> Shark::ListSampleRangeToSharkVector(input, features,startIndex,size); shark::Data<shark::RealVector> inputSamples = shark::createDataFromRange(features); -#ifdef _OPENMP + #ifdef _OPENMP omp_set_num_threads(itk::MultiThreader::GetGlobalDefaultNumberOfThreads()); -#endif + #endif if(quality != ITK_NULLPTR) { - shark::Data<shark::RealVector> probas = m_RFModel.decisionFunction()(inputSamples); + shark::Data<shark::RealVector> probas = m_RFModel(inputSamples); unsigned int id = startIndex; for(shark::RealVector && p : probas.elements()) { @@ -184,19 +175,14 @@ SharkRandomForestsMachineLearningModel<TInputValue,TOutputValue> } } - auto prediction = m_RFModel(inputSamples); + shark::ArgMaxConverter<shark::RFClassifier> amc; + amc.decisionFunction() = m_RFModel; + auto prediction = amc(inputSamples); unsigned int id = startIndex; for(const auto& p : prediction.elements()) { TargetSampleType target; - if(m_NormalizeClassLabels) - { - target[0] = m_ClassDictionary[static_cast<TOutputValue>(p)]; - } - else - { - target[0] = static_cast<TOutputValue>(p); - } + target[0] = static_cast<TOutputValue>(p); targets->SetMeasurementVector(id,target); ++id; } @@ -213,18 +199,7 @@ SharkRandomForestsMachineLearningModel<TInputValue,TOutputValue> itkExceptionMacro(<< "Error opening " << filename.c_str() ); } // Add comment with model file name - ofs << "#" << m_RFModel.name(); - if(m_NormalizeClassLabels) ofs << " with_dictionary"; - ofs << std::endl; - if(m_NormalizeClassLabels) - { - ofs << m_ClassDictionary.size() << " "; - for(const auto& l : m_ClassDictionary) - { - ofs << l << " "; - } - ofs << std::endl; - } + ofs << "#" << m_RFModel.name() << std::endl; shark::TextOutArchive oa(ofs); m_RFModel.save(oa,0); } @@ -244,10 +219,6 @@ SharkRandomForestsMachineLearningModel<TInputValue,TOutputValue> { if( line.find( m_RFModel.name() ) == std::string::npos ) itkExceptionMacro( "The model file : " + filename + " cannot be read." ); - if( line.find( "with_dictionary" ) == std::string::npos ) - { - m_NormalizeClassLabels=false; - } } else { @@ -255,18 +226,6 @@ SharkRandomForestsMachineLearningModel<TInputValue,TOutputValue> ifs.clear(); ifs.seekg( 0, std::ios::beg ); } - if(m_NormalizeClassLabels) - { - size_t nbLabels{0}; - ifs >> nbLabels; - m_ClassDictionary.resize(nbLabels); - for(size_t i=0; i<nbLabels; ++i) - { - unsigned int label; - ifs >> label; - m_ClassDictionary[i]=label; - } - } shark::TextInArchive ia( ifs ); m_RFModel.load( ia, 0 ); } diff --git a/Modules/Learning/Unsupervised/include/otbSharkKMeansMachineLearningModel.txx b/Modules/Learning/Unsupervised/include/otbSharkKMeansMachineLearningModel.txx index 1b08d538c943001279d9401f314d51e21e8dbf88..9dd43948a719c9305dace0a6366ebfd40e4b3e24 100644 --- a/Modules/Learning/Unsupervised/include/otbSharkKMeansMachineLearningModel.txx +++ b/Modules/Learning/Unsupervised/include/otbSharkKMeansMachineLearningModel.txx @@ -55,7 +55,6 @@ SharkKMeansMachineLearningModel<TInputValue, TOutputValue> m_Normalized( false ), m_K(2), m_MaximumNumberOfIterations( 10 ) { // Default set HardClusteringModel - this->m_ConfidenceIndex = true; m_ClusteringModel = boost::make_shared<ClusteringModelType>( &m_Centroids ); } @@ -175,7 +174,7 @@ SharkKMeansMachineLearningModel<TInputValue, TOutputValue> // Change quality measurement only if SoftClustering or other clustering method is used. if( quality != ITK_NULLPTR ) { - for( unsigned int qid = startIndex; qid < startIndex+size; ++qid ) + for( unsigned int qid = startIndex; qid < size; ++qid ) { quality->SetMeasurementVector( qid, static_cast<ConfidenceValueType>(1.) ); } diff --git a/Modules/ThirdParty/Shark/include/otbSharkUtils.h b/Modules/ThirdParty/Shark/include/otbSharkUtils.h index 04c57b6d4e7f5a022b0c4fafa86ac41b134f690c..de3adf77401d0f131d2bd7d447627829b3df64ff 100644 --- a/Modules/ThirdParty/Shark/include/otbSharkUtils.h +++ b/Modules/ThirdParty/Shark/include/otbSharkUtils.h @@ -23,7 +23,6 @@ #include <stdexcept> #include <string> -#include <unordered_map> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push @@ -128,27 +127,6 @@ template <class T> void ListSampleToSharkVector(const T * listSample, std::vecto assert(listSample != nullptr); ListSampleRangeToSharkVector(listSample,output,0, static_cast<unsigned int>(listSample->Size())); } - -/** Shark assumes that labels are 0 ... (nbClasses-1). This function modifies the labels contained in the input vector and returns a vector with size = nbClasses which allows the translation from the normalised labels to the new ones oldLabel = dictionary[newLabel]. -*/ -template <typename T> void NormalizeLabelsAndGetDictionary(std::vector<T>& labels, - std::vector<T>& dictionary) -{ - std::unordered_map<T, T> dictMap; - T labelCount{0}; - for(const auto& l : labels) - { - if(dictMap.find(l)==dictMap.end()) - dictMap.insert({l, labelCount++}); - } - dictionary.resize(labelCount); - for(auto& l : labels) - { - auto newLabel = dictMap[l]; - dictionary[newLabel] = l; - l = newLabel; - } -} } } diff --git a/SuperBuild/CMake/External_shark.cmake b/SuperBuild/CMake/External_shark.cmake index 0468dbd4017d25538b0b5a3b4176cd3d7ef0f6b6..75fa0d214fa309cec089aefba047fe91bbb1cb6c 100644 --- a/SuperBuild/CMake/External_shark.cmake +++ b/SuperBuild/CMake/External_shark.cmake @@ -30,8 +30,8 @@ ADD_SUPERBUILD_CMAKE_VAR(SHARK BOOST_LIBRARYDIR) ExternalProject_Add(SHARK PREFIX SHARK - URL "https://github.com/Shark-ML/Shark/archive/67990bcd2c4a90a27be97d933b3740931e9da141.zip" - URL_MD5 9ad7480a4f9832b63086b9a683566187 + URL "https://github.com/Shark-ML/Shark/archive/v3.1.4.zip" + URL_MD5 149e7d2e458cbe65c6373c2e89876b3e SOURCE_DIR ${SHARK_SB_SRC} BINARY_DIR ${SHARK_SB_BUILD_DIR} INSTALL_DIR ${SB_INSTALL_PREFIX} diff --git a/SuperBuild/patches/SHARK/shark-2-ext-num-literals-all.diff b/SuperBuild/patches/SHARK/shark-2-ext-num-literals-all.diff deleted file mode 100644 index 0b964c1b9ada7aa4409f0f032285a70723caacfe..0000000000000000000000000000000000000000 --- a/SuperBuild/patches/SHARK/shark-2-ext-num-literals-all.diff +++ /dev/null @@ -1,13 +0,0 @@ -diff -burN Shark.orig/CMakeLists.txt Shark/CMakeLists.txt ---- Shark.orig/CMakeLists.txt 2018-02-05 18:04:58.012612932 +0100 -+++ Shark/CMakeLists.txt 2018-02-05 18:20:50.032233165 +0100 -@@ -415,6 +415,9 @@ - ##################################################################### - # General Path settings - ##################################################################### -+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") -+ add_definitions(-fext-numeric-literals) -+endif() - include_directories( ${shark_SOURCE_DIR}/include ) - include_directories( ${shark_BINARY_DIR}/include ) - add_subdirectory( include ) diff --git a/SuperBuild/patches/SHARK/shark-2-find-boost-all.diff b/SuperBuild/patches/SHARK/shark-2-find-boost-all.diff new file mode 100644 index 0000000000000000000000000000000000000000..a97c1ac4afd1f56118fdba14cf7b993755bb5c00 --- /dev/null +++ b/SuperBuild/patches/SHARK/shark-2-find-boost-all.diff @@ -0,0 +1,16 @@ +diff -burN Shark-349f29bd71c370e0f88f7fc9aa66fa5c4768fcb0.orig/CMakeLists.txt Shark-349f29bd71c370e0f88f7fc9aa66fa5c4768fcb0/CMakeLists.txt +--- Shark-349f29bd71c370e0f88f7fc9aa66fa5c4768fcb0.orig/CMakeLists.txt 2017-08-22 11:31:50.472052695 +0200 ++++ Shark-349f29bd71c370e0f88f7fc9aa66fa5c4768fcb0/CMakeLists.txt 2017-08-22 11:32:36.448358789 +0200 +@@ -141,10 +141,8 @@ + + find_package( + Boost 1.48.0 REQUIRED COMPONENTS +- system date_time filesystem +- program_options serialization thread +- unit_test_framework +-) ++ serialization ++ ) + + if(NOT Boost_FOUND) + message(FATAL_ERROR "Please make sure Boost 1.48.0 is installed on your system")