From b6625040467f552c6563afe43f93688c36eef14f Mon Sep 17 00:00:00 2001 From: "raffaele.gaetano" <raffaele.gaetano@cirad.fr> Date: Mon, 22 Mar 2021 15:20:09 +0100 Subject: [PATCH] ENH: simplified resume option to recover interrupted segmentation. --- app/otbLSGRM.cxx | 8 +---- include/lsgrmController.h | 5 +-- include/lsgrmController.txx | 28 ++++++++------- include/lsgrmGraphOperations.h | 8 ++--- include/lsgrmGraphOperations.txx | 61 ++++++++++++++++++++++++-------- 5 files changed, 68 insertions(+), 42 deletions(-) diff --git a/app/otbLSGRM.cxx b/app/otbLSGRM.cxx index af745e7..ac71879 100644 --- a/app/otbLSGRM.cxx +++ b/app/otbLSGRM.cxx @@ -116,13 +116,7 @@ private: AddParameter(ParameterType_Bool, "resume", "Activate resume mode"); SetDefaultParameterInt("resume", 0); - AddParameter(ParameterType_Int, "resumetilerow", "Starting tile row in resume mode"); - AddParameter(ParameterType_Int, "resumetilecol", "Starting tile column in resume mode"); - SetDefaultParameterInt("resumetilerow", 0); - SetDefaultParameterInt("resumetilecol", 0); MandatoryOff("resume"); - MandatoryOff("resumetilerow"); - MandatoryOff("resumetilecol"); } void DoUpdateParameters() @@ -237,7 +231,7 @@ private: // Resume mode? if (GetParameterInt("resume")) { - controller->SetResumingMode(GetParameterInt("resumetilerow"),GetParameterInt("resumetilecol")); + controller->SetResumingMode(); } // Run the segmentation diff --git a/include/lsgrmController.h b/include/lsgrmController.h index 85bdec7..81e9d9f 100644 --- a/include/lsgrmController.h +++ b/include/lsgrmController.h @@ -45,7 +45,7 @@ public: void SetInputImage(ImageType * inputImage); void SetInternalMemoryAvailable(long long unsigned int v); // expecting a value in Mbytes. - void SetResumingMode(unsigned int rRow, unsigned int rCol); + void SetResumingMode(); void StopResumingMode(); /* Accessors */ @@ -118,9 +118,6 @@ private: unsigned int m_TileWidth; // regular tile width (i.e. not left tiles) unsigned int m_TileHeight; // regular tile height (i.e. not bottom tiles) bool m_Resuming; // True if in resuming mode - unsigned int m_ResumeTileRow; // X index of starting tile in resume mode - unsigned int m_ResumeTileCol; // Y index of starting tile in resume mode - /* read-only variables */ LSGRMTilingMode m_TilingMode; // tiling mode (none/user/auto) diff --git a/include/lsgrmController.txx b/include/lsgrmController.txx index e1c46ef..d3af8ef 100644 --- a/include/lsgrmController.txx +++ b/include/lsgrmController.txx @@ -19,8 +19,6 @@ Controller<TSegmenter>::Controller() m_Threshold = 75; m_Memory = 0; m_Resuming = false; - m_ResumeTileRow = 0; - m_ResumeTileCol = 0; } @@ -94,6 +92,7 @@ void Controller<TSegmenter>::RunSegmentation() // temp. patch, maybe calculate real current memory after resuming graphs. long long unsigned int accumulatedMemory = 2 * m_Memory; + unsigned int nextTile = m_NbTilesX*m_NbTilesY; accumulatedMemory = RunFirstPartialSegmentation<TSegmenter>( m_InputImage, @@ -107,8 +106,9 @@ void Controller<TSegmenter>::RunSegmentation() m_TileWidth, m_TileHeight, isFusion, - m_Resuming); - + m_Resuming, + nextTile); + #ifdef OTB_USE_MPI GatherUsefulVariables(accumulatedMemory, isFusion); #endif @@ -116,7 +116,16 @@ void Controller<TSegmenter>::RunSegmentation() // Time monitoring ShowTime(t); - while(accumulatedMemory > m_Memory && isFusion) + if (m_Resuming & nextTile < m_NbTilesX*m_NbTilesY) { + std::cout << "Detected resumable process from tile #" << nextTile << std::endl; + std::cout << "Forcing regular pass on the whole graph to update stability margins." << std::endl; + } + else { + std::cout << "Process resumed at first stage." << std::endl; + StopResumingMode(); + } + + while(m_Resuming || (accumulatedMemory > m_Memory && isFusion)) { isFusion = false; accumulatedMemory = RunPartialSegmentation<TSegmenter>( @@ -131,8 +140,7 @@ void Controller<TSegmenter>::RunSegmentation() m_InputImage->GetNumberOfComponentsPerPixel(), isFusion, m_Resuming, - m_ResumeTileRow, - m_ResumeTileCol); + nextTile); if (m_Resuming) StopResumingMode(); @@ -474,19 +482,15 @@ std::vector<std::string> Controller<TSegmenter>::GetTemporaryFilesList() } template<class TSegmenter> -void Controller<TSegmenter>::SetResumingMode(unsigned int rRow, unsigned int rCol) +void Controller<TSegmenter>::SetResumingMode() { m_Resuming = true; - m_ResumeTileRow = rRow; - m_ResumeTileCol = rCol; } template<class TSegmenter> void Controller<TSegmenter>::StopResumingMode() { m_Resuming = false; - m_ResumeTileRow = 0; - m_ResumeTileCol = 0; } } // end of namespace lsgrm diff --git a/include/lsgrmGraphOperations.h b/include/lsgrmGraphOperations.h index c8faa4d..3d8e4af 100644 --- a/include/lsgrmGraphOperations.h +++ b/include/lsgrmGraphOperations.h @@ -55,7 +55,8 @@ long long unsigned int RunFirstPartialSegmentation( const unsigned int nbTilesY, const unsigned int tileWidth, const unsigned int tileHeight, - bool& isFusion); + bool& isFusion, + unsigned int& nextTile); template<class TSegmenter> long long unsigned int RunPartialSegmentation( @@ -69,9 +70,8 @@ long long unsigned int RunPartialSegmentation( const unsigned int imageHeight, const unsigned int imageBands, bool& isFusion, - bool resume = false, - unsigned int rRow = 0, - unsigned int rCol = 0); + bool resume, + unsigned int& nextTile); template<class TSegmenter> void RemoveUselessNodes(ProcessingTile& tile, diff --git a/include/lsgrmGraphOperations.txx b/include/lsgrmGraphOperations.txx index 15e8afc..76d647b 100644 --- a/include/lsgrmGraphOperations.txx +++ b/include/lsgrmGraphOperations.txx @@ -4,6 +4,7 @@ //#include <unistd.h> #include <fstream> #include <cstdio> +#include <sys/stat.h> namespace lsgrm { @@ -14,6 +15,13 @@ bool file_exists(const std::string& fileName) return infile.good(); } +time_t last_mod_time(const std::string& fileName) +{ + struct stat result; + if (!stat(fileName.c_str(), &result)) return result.st_mtime; + else return -1; +} + template<class TSegmenter> typename TSegmenter::ImageType::Pointer ReadImageRegion( typename TSegmenter::ImageType * inputPtr, @@ -147,8 +155,7 @@ long long unsigned int RunPartialSegmentation(const typename TSegmenter::ParamTy const unsigned int imageBands, bool& isFusion, bool resume, - unsigned int rRow, - unsigned int rCol) + unsigned int& nextTile) { long long unsigned int accumulatedMemory = 0; isFusion = false; @@ -167,18 +174,19 @@ long long unsigned int RunPartialSegmentation(const typename TSegmenter::ParamTy { // Get the current tile std::cout << "Processing tile " << row << ", " << col << std::endl; - ProcessingTile currentTile = tiles[row*nbTilesX + col]; + unsigned int currTile = row*nbTilesX + col; + ProcessingTile currentTile = tiles[currTile]; // If resume mode, get accumulated memory on previous tiles - if (resume && (row*nbTilesX+col) < (rRow*nbTilesX+rCol) ) { - // Load the graph - std::cout << "\tResuming graph..." << std::endl; - TSegmenter segmenter; - ReadGraph<TSegmenter>(segmenter.m_Graph, currentTile.nodeFileName, currentTile.edgeFileName); - // Retrieve the amount of memory to store this graph - std::cout << "\tGet graph memory..." << std::endl; - accumulatedMemory += segmenter.GetGraphMemory(); - continue; + if (resume && currTile < nextTile ) { + // Load the graph + std::cout << "\tResuming graph..." << std::endl; + TSegmenter segmenter; + ReadGraph<TSegmenter>(segmenter.m_Graph, currentTile.nodeFileName, currentTile.edgeFileName); + // Retrieve the amount of memory to store this graph + std::cout << "\tGet graph memory..." << std::endl; + accumulatedMemory += segmenter.GetGraphMemory(); + continue; } // Load the graph @@ -237,9 +245,13 @@ long long unsigned int RunPartialSegmentation(const typename TSegmenter::ParamTy // Write graph to temporay directory std::cout << "\tWrite graph..." << std::endl; WriteGraph<TSegmenter>(segmenter.m_Graph, currentTile.nodeFileName, currentTile.edgeFileName); + } } } + + if (resume) + nextTile = 0; #ifdef OTB_USE_MPI otb::MPIConfig::Instance()->barrier(); @@ -280,7 +292,7 @@ long long unsigned int RunPartialSegmentation(const typename TSegmenter::ParamTy } } std::cout << std::endl; - + return accumulatedMemory; } @@ -648,7 +660,8 @@ long long unsigned int RunFirstPartialSegmentation( const unsigned int tileWidth, const unsigned int tileHeight, bool& isFusion, - bool resume) + bool resume, + unsigned int& nextTile) { using ImageType = typename TSegmenter::ImageType; @@ -657,11 +670,14 @@ long long unsigned int RunFirstPartialSegmentation( long long unsigned int accumulatedMemory = 0; isFusion = false; + bool accomplished = true; const unsigned int numberOfNeighborLayers = static_cast<unsigned int>(pow(2, niter2 + 1) - 2); std::cout << "--- Running fist partial segmentation...\nNumber of neighbor layers " << numberOfNeighborLayers << std::endl; + time_t lastTime = -1; + for(unsigned int row = 0; row < nbTilesY; ++row) { for(unsigned int col = 0; col < nbTilesX; col++) @@ -675,6 +691,11 @@ long long unsigned int RunFirstPartialSegmentation( if (resume && file_exists(currentTile.nodeFileName) && file_exists(currentTile.edgeFileName)) { + int lastMod = last_mod_time(currentTile.nodeFileName); + if (lastMod > lastTime) { + lastTime = lastMod; + nextTile = row*nbTilesX + col + 1; + } std::cout << "\tResuming graph..." << std::endl; TSegmenter segmenter; ReadGraph<TSegmenter>(segmenter.m_Graph, currentTile.nodeFileName, currentTile.edgeFileName); @@ -685,7 +706,9 @@ long long unsigned int RunFirstPartialSegmentation( isFusion = true; continue; } - + + accomplished = false; + std::cout << "Processing tile " << (row*nbTilesX + col) << " / " << (nbTilesX*nbTilesY) << " (" << col << ", " << row << ")" << " start: [" << currentTile.region.GetIndex()[0] << ", " << currentTile.region.GetIndex()[1] << @@ -740,10 +763,18 @@ long long unsigned int RunFirstPartialSegmentation( ExtractStabilityMargin<TSegmenter>(borderNodeMap, numberOfNeighborLayers); WriteStabilityMargin<TSegmenter>(borderNodeMap, currentTile.nodeMarginFileName, currentTile.edgeMarginFileName); + } } } // for each col } // for each row + + if (resume && !accomplished) { + std::cout << "\tSegmentation recovered during first partial segmentation." << std::endl; + nextTile = nbTilesX*nbTilesY; + } + if (resume && accomplished && nextTile == nbTilesX*nbTilesY) + nextTile = 0; return accumulatedMemory; } -- GitLab