#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/SpatialSubSampling.c" #else static inline void THNN_(SpatialSubSampling_shapeCheck)( THTensor *input, THTensor *gradOutput, THTensor *weight, int kW, int kH) { int ndims = input->nDimension; THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, "3D or 4D input tensor expected but got: %s"); THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); int nInputPlane = THTensor_(size)(weight, 0); int dimw = 2; int dimh = 1; long inputWidth; long inputHeight; if (input->nDimension == 4) { dimw++; dimh++; } inputWidth = input->size[dimw]; inputHeight = input->size[dimh]; THArgCheck(input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes"); THArgCheck(inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size"); } void THNN_(SpatialSubSampling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, int kW, int kH, int dW, int dH) { THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous"); real *weight_data = THTensor_(data)(weight); real *bias_data = THTensor_(data)(bias); real *output_data; real *input_data; int dimw = 2; int dimh = 1; long nbatch = 1; long inputWidth; long inputHeight; long outputWidth; long outputHeight; int nInputPlane = THTensor_(size)(weight,0); long k; THNN_(SpatialSubSampling_shapeCheck)(input, NULL, weight, kW, kH); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } inputWidth = input->size[dimw]; inputHeight = input->size[dimh]; outputWidth = (inputWidth - kW) / dW + 1; outputHeight = (inputHeight - kH) / dH + 1; if (input->nDimension == 3) THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); else THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth); input = THTensor_(newContiguous)(input); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { long p; for(p = 0; p < nbatch; p++) { long xx, yy; /* For all output pixels... */ real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight; /* Get the good mask for (k,i) (k out, i in) */ real the_weight = weight_data[k]; /* Initialize to the bias */ real z = bias_data[k]; long i; for(i = 0; i < outputWidth*outputHeight; i++) ptr_output[i] = z; for(yy = 0; yy < outputHeight; yy++) { for(xx = 0; xx < outputWidth; xx++) { /* Compute the mean of the input image... */ real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW; real sum = 0; long kx, ky; for(ky = 0; ky < kH; ky++) { for(kx = 0; kx < kW; kx++) sum += ptr_input[kx]; ptr_input += inputWidth; /* next input line */ } /* Update output */ *ptr_output++ += the_weight*sum; } } } } THTensor_(free)(input); } void THNN_(SpatialSubSampling_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, int kW, int kH, int dW, int dH) { THNN_(SpatialSubSampling_shapeCheck)(input, gradOutput, weight, kW, kH); int dimw = 2; int dimh = 1; long nbatch = 1; long inputWidth; long inputHeight; long outputWidth; long outputHeight; int nInputPlane = THTensor_(size)(weight,0); real *weight_data; real *gradOutput_data; real *input_data, *gradInput_data; long k; if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } inputWidth = input->size[dimw]; inputHeight = input->size[dimh]; outputWidth = (inputWidth - kW) / dW + 1; outputHeight = (inputHeight - kH) / dH + 1; weight_data = THTensor_(data)(weight); gradOutput = THTensor_(newContiguous)(gradOutput); gradOutput_data = THTensor_(data)(gradOutput); input_data = THTensor_(data)(input); THTensor_(resizeAs)(gradInput, input); gradInput_data = THTensor_(data)(gradInput); #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { long p; for(p = 0; p < nbatch; p++) { real the_weight = weight_data[k]; real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; long xx, yy; real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight; long i; for(i=0; inDimension == 4) { dimw++; dimh++; nbatch = input->size[0]; } inputWidth = input->size[dimw]; inputHeight = input->size[dimh]; outputWidth = (inputWidth - kW) / dW + 1; outputHeight = (inputHeight - kH) / dH + 1; gradWeight_data = THTensor_(data)(gradWeight); gradBias_data = THTensor_(data)(gradBias); gradOutput = THTensor_(newContiguous)(gradOutput); gradOutput_data = THTensor_(data)(gradOutput); input = THTensor_(newContiguous)(input); input_data = THTensor_(data)(input); #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { long p; for(p = 0; p < nbatch; p++) { real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; real sum; long xx, yy; long i; sum = 0; for(i = 0; i < outputWidth*outputHeight; i++) sum += ptr_gradOutput[i]; gradBias_data[k] += scale*sum; sum = 0; for(yy = 0; yy < outputHeight; yy++) { for(xx = 0; xx < outputWidth; xx++) { real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW; real z = *ptr_gradOutput++; long kx, ky; for(ky = 0; ky < kH; ky++) { for(kx = 0; kx < kW; kx++) sum += z * ptr_input[kx]; ptr_input += inputWidth; } } } gradWeight_data[k] += scale*sum; } } THTensor_(free)(input); THTensor_(free)(gradOutput); } #endif