#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/SpatialDilatedMaxPooling.c" #else static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)( THTensor *input, THTensor *gradOutput, THIndexTensor *indices, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, bool ceil_mode) { THArgCheck(kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 8, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(dilationH > 0 && dilationW > 0, 12, "dilation should be greater than zero, but got dilationH: %d dilationW: %d", dilationH, dilationW); int ndim = input->nDimension; int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, "3D or 4D input tensor expected but got: %s"); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size, but got " "padW = %d, padH = %d, kW = %d, kH = %d", padW, padH, kW, kH); long nInputPlane = input->size[dimh-1]; long inputHeight = input->size[dimh]; long inputWidth = input->size[dimw]; long outputHeight, outputWidth; long nOutputPlane = nInputPlane; if (ceil_mode) { outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } else { outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } if (padW || padH) { // ensure that the last pooling starts inside the image // needed to avoid problems in ceil mode if ((outputHeight - 1)*dH >= inputHeight + padH) --outputHeight; if ((outputWidth - 1)*dW >= inputWidth + padW) --outputWidth; } if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). " "Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); if (gradOutput != NULL) { THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); } if (indices != NULL) { THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight); THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth); } } static void THNN_(SpatialDilatedMaxPooling_updateOutput_frame)( real *input_p, real *output_p, THIndex_t *ind_p, long nslices, long iwidth, long iheight, long owidth, long oheight, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH ) { long k; #pragma omp parallel for private(k) for (k = 0; k < nslices; k++) { /* loop over output */ long i, j; real *ip = input_p + k*iwidth*iheight; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { long hstart = i * dH - padH; long wstart = j * dW - padW; long hend = fminf(hstart + (kH - 1) * dilationH + 1, iheight); long wend = fminf(wstart + (kW - 1) * dilationW + 1, iwidth); while(hstart < 0) hstart += dilationH; while(wstart < 0) wstart += dilationW; /* local pointers */ real *op = output_p + k*owidth*oheight + i*owidth + j; THIndex_t *indp = ind_p + k*owidth*oheight + i*owidth + j; /* compute local max: */ long maxindex = -1; real maxval = -THInf; long tcntr = 0; long x,y; for(y = hstart; y < hend; y += dilationH) { for(x = wstart; x < wend; x += dilationW) { tcntr = y*iwidth + x; real val = *(ip + tcntr); if (val > maxval) { maxval = val; maxindex = tcntr; } } } /* set output to local max */ *op = maxval; /* store location of max */ *indp = maxindex + TH_INDEX_BASE; } } } } void THNN_(SpatialDilatedMaxPooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, bool ceil_mode) { int dimw = 2; int dimh = 1; long nbatch = 1; long nInputPlane; long inputHeight; long inputWidth; long outputHeight; long outputWidth; real *input_data; real *output_data; THIndex_t *indices_data; THNN_(SpatialDilatedMaxPooling_shapeCheck) (input, NULL, NULL, kH, kW, dH, dW, padH, padW, dilationH, dilationW, ceil_mode); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nInputPlane = input->size[dimh-1]; inputHeight = input->size[dimh]; inputWidth = input->size[dimw]; if (ceil_mode) { outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } else { outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; } if (padW || padH) { // ensure that the last pooling starts inside the image // needed to avoid problems in ceil mode if ((outputHeight - 1)*dH >= inputHeight + padH) --outputHeight; if ((outputWidth - 1)*dW >= inputWidth + padW) --outputWidth; } /* get contiguous input */ input = THTensor_(newContiguous)(input); /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); /* indices will contain the locations for each output point */ THIndexTensor_(resize3d)(indices, nInputPlane, outputHeight, outputWidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); THNN_(SpatialDilatedMaxPooling_updateOutput_frame) (input_data, output_data, indices_data, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, kW, kH, dW, dH, padW, padH, dilationW, dilationH ); } else { long p; THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth); /* indices will contain the locations for each output point */ THIndexTensor_(resize4d)(indices, nbatch, nInputPlane, outputHeight, outputWidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THIndexTensor_(data)(indices); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(SpatialDilatedMaxPooling_updateOutput_frame) (input_data+p*nInputPlane*inputWidth*inputHeight, output_data+p*nInputPlane*outputWidth*outputHeight, indices_data+p*nInputPlane*outputWidth*outputHeight, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, kW, kH, dW, dH, padW, padH, dilationW, dilationH ); } } /* cleanup */ THTensor_(free)(input); } static void THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)( real *gradInput_p, real *gradOutput_p, THIndex_t *ind_p, long nInputPlane, long inputWidth, long inputHeight, long outputWidth, long outputHeight, int dW, int dH) { long k; #pragma omp parallel for private(k) for (k = 0; k < nInputPlane; k++) { real *gradInput_p_k = gradInput_p + k*inputWidth*inputHeight; real *gradOutput_p_k = gradOutput_p + k*outputWidth*outputHeight; THIndex_t *ind_p_k = ind_p + k*outputWidth*outputHeight; /* calculate max points */ long i, j; for(i = 0; i < outputHeight; i++) { for(j = 0; j < outputWidth; j++) { /* retrieve position of max */ long maxp = ind_p_k[i*outputWidth + j] - TH_INDEX_BASE; if (maxp != -1) { /* update gradient */ gradInput_p_k[maxp] += gradOutput_p_k[i*outputWidth + j]; } } } } } void THNN_(SpatialDilatedMaxPooling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THIndexTensor *indices, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, bool ceil_mode) { int dimw = 2; int dimh = 1; long nbatch = 1; int nInputPlane; int inputHeight; int inputWidth; int outputHeight; int outputWidth; real *gradInput_data; real *gradOutput_data; THIndex_t *indices_data; THNN_(SpatialDilatedMaxPooling_shapeCheck) (input, gradOutput, indices, kH, kW, dH, dW, padH, padW, dilationH, dilationW, ceil_mode); /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nInputPlane = input->size[dimh-1]; inputHeight = input->size[dimh]; inputWidth = input->size[dimw]; outputHeight = gradOutput->size[dimh]; outputWidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); indices_data = THIndexTensor_(data)(indices); /* backprop */ if (input->nDimension == 3) { THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) (gradInput_data, gradOutput_data, indices_data, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, dW, dH); } else { long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) (gradInput_data+p*nInputPlane*inputWidth*inputHeight, gradOutput_data+p*nInputPlane*outputWidth*outputHeight, indices_data+p*nInputPlane*outputWidth*outputHeight, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight, dW, dH); } } /* cleanup */ THTensor_(free)(gradOutput); } #endif