Forked from Cresson Remi / otbtf
Source project has a limited visibility.
otbTensorflowMultisourceModelFilter.hxx 16.22 KiB
/*=========================================================================
  Copyright (c) Remi Cresson (IRSTEA). All rights reserved.
     This software is distributed WITHOUT ANY WARRANTY; without even
     the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
     PURPOSE.  See the above copyright notices for more information.
=========================================================================*/
#ifndef otbTensorflowMultisourceModelFilter_txx
#define otbTensorflowMultisourceModelFilter_txx
#include "otbTensorflowMultisourceModelFilter.h"
namespace otb
template <class TInputImage, class TOutputImage>
TensorflowMultisourceModelFilter<TInputImage, TOutputImage>
::TensorflowMultisourceModelFilter()
  m_OutputGridSize.Fill(0);
  m_ForceOutputGridSize = false;
  m_FullyConvolutional = false;
  m_OutputSpacing.Fill(0);
  m_OutputOrigin.Fill(0);
  m_OutputSize.Fill(0);
  m_OutputSpacingScale = 1.0f;
  Superclass::SetCoordinateTolerance(itk::NumericTraits<double>::max() );
  Superclass::SetDirectionTolerance(itk::NumericTraits<double>::max() );
template <class TInputImage, class TOutputImage>
void
TensorflowMultisourceModelFilter<TInputImage, TOutputImage>
::SmartPad(RegionType& region, const SizeType &patchSize)
  for(unsigned int dim = 0; dim<OutputImageType::ImageDimension; ++dim)
    const SizeValueType psz = patchSize[dim];
    const SizeValueType rval = 0.5 * psz;
    const SizeValueType lval = psz - rval;
    region.GetModifiableIndex()[dim] -= lval;
    region.GetModifiableSize()[dim] += psz;
template <class TInputImage, class TOutputImage>
void
TensorflowMultisourceModelFilter<TInputImage, TOutputImage>
::SmartShrink(RegionType& region, const SizeType &patchSize)
  for(unsigned int dim = 0; dim<OutputImageType::ImageDimension; ++dim)
    const SizeValueType psz = patchSize[dim];
    const SizeValueType rval = 0.5 * psz;
    const SizeValueType lval = psz - rval;
    region.GetModifiableIndex()[dim] += lval;
    region.GetModifiableSize()[dim] -= psz;
/**
  Compute the input image extent i.e. corners inf & sup
  Function taken from "Mosaic" and adapted
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
template <class TInputImage, class TOutputImage> void TensorflowMultisourceModelFilter<TInputImage, TOutputImage> ::ImageToExtent(ImageType* image, PointType &extentInf, PointType &extentSup, SizeType &patchSize) { // Get largest possible region RegionType largestPossibleRegion = image->GetLargestPossibleRegion(); // Shrink it a little with the FOV radius SmartShrink(largestPossibleRegion, patchSize); // Get index of first and last pixel IndexType imageFirstIndex = largestPossibleRegion.GetIndex(); IndexType imageLastIndex = largestPossibleRegion.GetUpperIndex(); // Compute extent PointType imageOrigin; PointType imageEnd; image->TransformIndexToPhysicalPoint(imageLastIndex, imageEnd); image->TransformIndexToPhysicalPoint(imageFirstIndex, imageOrigin); for(unsigned int dim = 0; dim<OutputImageType::ImageDimension; ++dim) { extentInf[dim] = vnl_math_min(imageOrigin[dim], imageEnd[dim]); extentSup[dim] = vnl_math_max(imageOrigin[dim], imageEnd[dim]); } } /** Compute the region of the input image which correspond to the given output requested region Return true if the region exists, false if not Function taken from "Mosaic" */ template <class TInputImage, class TOutputImage> bool TensorflowMultisourceModelFilter<TInputImage, TOutputImage> ::OutputRegionToInputRegion(const RegionType &outputRegion, RegionType &inputRegion, ImageType* &inputImage) { // Mosaic Region Start & End (mosaic image index) const IndexType outIndexStart = outputRegion.GetIndex(); const IndexType outIndexEnd = outputRegion.GetUpperIndex(); // Mosaic Region Start & End (geo) PointType outPointStart, outPointEnd; this->GetOutput()->TransformIndexToPhysicalPoint(outIndexStart, outPointStart); this->GetOutput()->TransformIndexToPhysicalPoint(outIndexEnd , outPointEnd ); // Add the half-width pixel size of the input image // and remove the half-width pixel size of the output image // (coordinates = pixel center) const SpacingType outputSpc = this->GetOutput()->GetSpacing(); const SpacingType inputSpc = inputImage->GetSpacing(); for(unsigned int dim = 0; dim<OutputImageType::ImageDimension; ++dim) { const typename SpacingType::ValueType border = 0.5 * (inputSpc[dim] - outputSpc[dim]); if (outPointStart[dim] < outPointEnd[dim]) { outPointStart[dim] += border; outPointEnd [dim] -= border; } else { outPointStart[dim] -= border; outPointEnd [dim] += border; } }
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
// Mosaic Region Start & End (input image index) IndexType defIndexStart, defIndexEnd; inputImage->TransformPhysicalPointToIndex(outPointStart, defIndexStart); inputImage->TransformPhysicalPointToIndex(outPointEnd , defIndexEnd); // Compute input image region for(unsigned int dim = 0; dim<OutputImageType::ImageDimension; ++dim) { inputRegion.SetIndex(dim, vnl_math_min(defIndexStart[dim], defIndexEnd[dim])); inputRegion.SetSize(dim, vnl_math_max(defIndexStart[dim], defIndexEnd[dim]) - inputRegion.GetIndex(dim) + 1); } // crop the input requested region at the input's largest possible region return inputRegion.Crop( inputImage->GetLargestPossibleRegion() ); } /* * Enlarge the given region to the nearest aligned region. * Aligned region = Index and UpperIndex+1 are on the output grid */ template <class TInputImage, class TOutputImage> void TensorflowMultisourceModelFilter<TInputImage, TOutputImage> ::EnlargeToAlignedRegion(RegionType& region) { for(unsigned int dim = 0; dim<OutputImageType::ImageDimension; ++dim) { // Get corners IndexValueType lower = region.GetIndex(dim); IndexValueType upper = lower + region.GetSize(dim); // Compute deltas between corners and the grid const IndexValueType deltaLo = lower % m_OutputGridSize[dim]; const IndexValueType deltaUp = upper % m_OutputGridSize[dim]; // Move corners to aligned positions lower -= deltaLo; if (deltaUp > 0) { upper += m_OutputGridSize[dim] - deltaUp; } // Update region region.SetIndex(dim, lower); region.SetSize(dim, upper - lower); } } template <class TInputImage, class TOutputImage> void TensorflowMultisourceModelFilter<TInputImage, TOutputImage> ::GenerateOutputInformation() { Superclass::GenerateOutputInformation(); ////////////////////////////////////////////////////////////////////////////////////////// // Compute the output image extent ////////////////////////////////////////////////////////////////////////////////////////// // If the output spacing is not specified, we use the first input image as grid reference m_OutputSpacing = this->GetInput(0)->GetSignedSpacing(); m_OutputSpacing[0] *= m_OutputSpacingScale; m_OutputSpacing[1] *= m_OutputSpacingScale; PointType extentInf, extentSup; extentSup.Fill(itk::NumericTraits<double>::max()); extentInf.Fill(itk::NumericTraits<double>::NonpositiveMin());
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
// Compute the extent of each input images and update the global extent for (unsigned int imageIndex = 0 ; imageIndex < this->GetNumberOfInputs() ; imageIndex++) { ImageType * currentImage = static_cast<ImageType *>( Superclass::ProcessObject::GetInput(imageIndex) ); // Update output image extent PointType currentInputImageExtentInf, currentInputImageExtentSup; ImageToExtent(currentImage, currentInputImageExtentInf, currentInputImageExtentSup, this->GetInputFOVSizes()[imageIndex]); for(unsigned int dim = 0; dim<ImageType::ImageDimension; ++dim) { extentInf[dim] = vnl_math_max(currentInputImageExtentInf[dim], extentInf[dim]); extentSup[dim] = vnl_math_min(currentInputImageExtentSup[dim], extentSup[dim]); } } // Set final size m_OutputSize[0] = vcl_floor( (extentSup[0] - extentInf[0]) / vcl_abs(m_OutputSpacing[0]) ) + 1; m_OutputSize[1] = vcl_floor( (extentSup[1] - extentInf[1]) / vcl_abs(m_OutputSpacing[1]) ) + 1; // Set final origin m_OutputOrigin[0] = extentInf[0]; m_OutputOrigin[1] = extentSup[1]; // Set output grid size if (!m_ForceOutputGridSize) { // Default is the output field of expression m_OutputGridSize = m_OutputFOESize; } // Resize the largestPossibleRegion to be a multiple of the grid size for(unsigned int dim = 0; dim<ImageType::ImageDimension; ++dim) { if (m_OutputGridSize[dim] > m_OutputSize[dim]) itkGenericExceptionMacro("Output grid size is larger than output image size !"); m_OutputSize[dim] -= m_OutputSize[dim] % m_OutputGridSize[dim]; } // Set the largest possible region RegionType largestPossibleRegion; largestPossibleRegion.SetSize(m_OutputSize); ////////////////////////////////////////////////////////////////////////////////////////// // Compute the output number of components per pixel ////////////////////////////////////////////////////////////////////////////////////////// unsigned int outputPixelSize = 0; for (auto& protoShape: this->GetOutputTensorsShapes()) { // The number of components per pixel is the last dimension of the tensor int dim_size = protoShape.dim_size(); unsigned int nComponents = 1; if (1 < dim_size && dim_size <= 4) { nComponents = protoShape.dim(dim_size-1).size(); } else { itkExceptionMacro("Dim_size=" << dim_size << " currently not supported."); } outputPixelSize += nComponents; } // Copy input image projection ImageType * inputImage = static_cast<ImageType * >( Superclass::ProcessObject::GetInput(0) ); const std::string projectionRef = inputImage->GetProjectionRef(); // Set output image origin/spacing/size/projection ImageType * outputPtr = this->GetOutput();
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
outputPtr->SetNumberOfComponentsPerPixel(outputPixelSize); outputPtr->SetProjectionRef ( projectionRef ); outputPtr->SetOrigin ( m_OutputOrigin ); outputPtr->SetSignedSpacing ( m_OutputSpacing ); outputPtr->SetLargestPossibleRegion( largestPossibleRegion); } template <class TInputImage, class TOutputImage> void TensorflowMultisourceModelFilter<TInputImage, TOutputImage> ::GenerateInputRequestedRegion() { Superclass::GenerateInputRequestedRegion(); // Output requested region RegionType requestedRegion = this->GetOutput()->GetRequestedRegion(); // First, align the output region EnlargeToAlignedRegion(requestedRegion); // For each image, get the requested region for(unsigned int i = 0; i < this->GetNumberOfInputs(); ++i) { ImageType * inputImage = static_cast<ImageType * >( Superclass::ProcessObject::GetInput(i) ); // Compute the requested region RegionType inRegion; if (!OutputRegionToInputRegion(requestedRegion, inRegion, inputImage) ) { // Image does not overlap requested region: set requested region to null itkDebugMacro( << "Image #" << i << " :\n" << inRegion << " is outside the requested region"); inRegion.GetModifiableIndex().Fill(0); inRegion.GetModifiableSize().Fill(0); } // Compute the FOV-scale*FOE radius to pad SizeType toPad(this->GetInputFOVSizes().at(i)); toPad[0] -= 1 + (m_OutputFOESize[0] - 1) * m_OutputSpacingScale; toPad[1] -= 1 + (m_OutputFOESize[1] - 1) * m_OutputSpacingScale; // Pad with radius SmartPad(inRegion, toPad); // We need to avoid some extrapolation when mode is patch-based. // The reason is that, when some input have a lower spacing than the // reference image, the requested region of this lower res input image // can be one pixel larger when the input image regions are not physicaly // aligned. if (!m_FullyConvolutional) { inRegion.PadByRadius(1); } inRegion.Crop(inputImage->GetLargestPossibleRegion()); // Update the requested region inputImage->SetRequestedRegion(inRegion); // std::cout << "Input #" << i << " region starts at " << inRegion.GetIndex() << " with size " << inRegion.GetSize() << std::endl; } // next image } /** * Compute the output image */ template <class TInputImage, class TOutputImage> void
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
TensorflowMultisourceModelFilter<TInputImage, TOutputImage> ::GenerateData() { // Output pointer and requested region typename TOutputImage::Pointer outputPtr = this->GetOutput(); const RegionType outputReqRegion = outputPtr->GetRequestedRegion(); // Get the aligned output requested region RegionType outputAlignedReqRegion(outputReqRegion); EnlargeToAlignedRegion(outputAlignedReqRegion); // Add a progress reporter itk::ProgressReporter progress(this, 0, outputReqRegion.GetNumberOfPixels()); const unsigned int nInputs = this->GetNumberOfInputs(); // Create input tensors list DictListType inputs; // Populate input tensors for (unsigned int i = 0 ; i < nInputs ; i++) { // Input image pointer const ImagePointerType inputPtr = const_cast<TInputImage*>(this->GetInput(i)); // Patch size of tensor #i const SizeType inputPatchSize = this->GetInputFOVSizes().at(i); // Input image requested region const RegionType reqRegion = inputPtr->GetRequestedRegion(); if (m_FullyConvolutional) { // Shape of input tensor #i tensorflow::int64 sz_n = 1; tensorflow::int64 sz_y = reqRegion.GetSize(1); tensorflow::int64 sz_x = reqRegion.GetSize(0); tensorflow::int64 sz_c = inputPtr->GetNumberOfComponentsPerPixel(); tensorflow::TensorShape inputTensorShape({sz_n, sz_y, sz_x, sz_c}); // Create the input tensor tensorflow::Tensor inputTensor(this->GetInputTensorsDataTypes()[i], inputTensorShape); // Recopy the whole input tf::RecopyImageRegionToTensorWithCast<TInputImage>(inputPtr, reqRegion, inputTensor, 0); // Input #1 : the tensor of patches (aka the batch) DictType input1 = { this->GetInputPlaceholdersNames()[i], inputTensor }; inputs.push_back(input1); } else { // Preparing patches (not very optimized ! ) // It would be better to perform the loop inside the TF session using TF operators // Shape of input tensor #i tensorflow::int64 sz_n = outputReqRegion.GetNumberOfPixels(); tensorflow::int64 sz_y = inputPatchSize[1]; tensorflow::int64 sz_x = inputPatchSize[0]; tensorflow::int64 sz_c = inputPtr->GetNumberOfComponentsPerPixel(); tensorflow::TensorShape inputTensorShape({sz_n, sz_y, sz_x, sz_c}); // Create the input tensor tensorflow::Tensor inputTensor(this->GetInputTensorsDataTypes()[i], inputTensorShape); // Fill the input tensor. // We iterate over points which are located from the index iterator // moving through the output image requested region unsigned int elemIndex = 0; IndexIteratorType idxIt(outputPtr, outputReqRegion); for (idxIt.GoToBegin(); !idxIt.IsAtEnd(); ++idxIt)
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
{ // Get the coordinates of the current output pixel PointType point; outputPtr->TransformIndexToPhysicalPoint(idxIt.GetIndex(), point); // Sample the i-th input patch centered on the point tf::SampleCenteredPatch<TInputImage>(inputPtr, point, inputPatchSize, inputTensor, elemIndex); elemIndex++; } // Input #1 : the tensor of patches (aka the batch) DictType input1 = { this->GetInputPlaceholdersNames()[i], inputTensor }; inputs.push_back(input1); } // mode is not full convolutional } // next input tensor // Run session TensorListType outputs; this->RunSession(inputs, outputs); // Fill the output buffer with zero value outputPtr->SetBufferedRegion(outputReqRegion); outputPtr->Allocate(); OutputPixelType nullpix; nullpix.SetSize(outputPtr->GetNumberOfComponentsPerPixel()); nullpix.Fill(0); outputPtr->FillBuffer(nullpix); // Get output tensors int bandOffset = 0; for (unsigned int i = 0 ; i < outputs.size() ; i++) { // The offset (i.e. the starting index of the channel for the output tensor) is updated // during this call // TODO: implement a generic strategy enabling FOE copy in patch-based mode (see tf::CopyTensorToImageRegion) tf::CopyTensorToImageRegion<TOutputImage> (outputs[i], outputAlignedReqRegion, outputPtr, outputReqRegion, bandOffset); } } } // end namespace otb #endif