aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c')
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c462
1 files changed, 462 insertions, 0 deletions
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c
new file mode 100644
index 000000000..2edc53b5a
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c
@@ -0,0 +1,462 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialFullConvolution.c"
+#else
+
+static void THNN_(im2col)(const real* data_im, const int channels,
+ const int height, const int width, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ real* data_col) {
+ const int height_col = (height + 2 * pad_h -
+ (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ const int width_col = (width + 2 * pad_w -
+ (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+ const int channels_col = channels * kernel_h * kernel_w;
+ for (int c_col = 0; c_col < channels_col; ++c_col) {
+ int w_offset = c_col % kernel_w;
+ int h_offset = (c_col / kernel_w) % kernel_h;
+ int c_im = c_col / kernel_h / kernel_w;
+ for (int h_col = 0; h_col < height_col; ++h_col) {
+ for (int w_col = 0; w_col < width_col; ++w_col) {
+ int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
+ int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
+ data_col[(c_col * height_col + h_col) * width_col + w_col] =
+ (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
+ data_im[(c_im * height + h_im) * width + w_im] : 0;
+ }
+ }
+ }
+}
+
+static void THNN_(col2im)(const real* data_col, const int channels,
+ const int height, const int width, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ real* data_im) {
+ memset(data_im, 0, sizeof(real) * height * width * channels);
+ const int height_col = (height + 2 * pad_h -
+ (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ const int width_col = (width + 2 * pad_w -
+ (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+ const int channels_col = channels * kernel_h * kernel_w;
+ for (int c_col = 0; c_col < channels_col; ++c_col) {
+ int w_offset = c_col % kernel_w;
+ int h_offset = (c_col / kernel_w) % kernel_h;
+ int c_im = c_col / kernel_h / kernel_w;
+ for (int h_col = 0; h_col < height_col; ++h_col) {
+ for (int w_col = 0; w_col < width_col; ++w_col) {
+ int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
+ int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
+ if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
+ data_im[(c_im * height + h_im) * width + w_im] +=
+ data_col[(c_col * height_col + h_col) * width_col + w_col];
+ }
+ }
+ }
+}
+
+static inline void THNN_(SpatialFullConvolution_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ THTensor *weight, THTensor *bias,
+ int kH, int kW, int dH, int dW, int padH, int padW, int adjH, int adjW) {
+
+ THArgCheck(kW > 0 && kH > 0, 9,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+ THArgCheck(dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+ THArgCheck(adjW < dW && adjH < dH, 15,
+ "output adjustment must be smaller than stride, but got adjH: %d adjW: %d dH: %d dW: %d",
+ adjH, adjW, dH, dW);
+ THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 4, 5, weight,
+ "2D or 4D weight tensor expected, but got: %s");
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]);
+ }
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+
+ long nInputPlane = weight->size[0];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long nOutputPlane = weight->size[1];
+ long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
+ long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
+
+ if (outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%d x %d x %d). "
+ "Calculated output size: (%d x %d x %d). Output size is too small",
+ nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+void THNN_(SpatialFullConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH)
+{
+ THNN_(SpatialFullConvolution_shapeCheck)
+ (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW, adjH, adjW);
+
+ int nInputPlane = THTensor_(size)(weight,0);
+ int nOutputPlane = THTensor_(size)(weight,1);
+
+ input = THTensor_(newContiguous)(input);
+ weight = THTensor_(newContiguous)(weight);
+ bias = bias ? THTensor_(newContiguous)(bias) : bias;
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ }
+
+ long inputHeight = input->size[2];
+ long inputWidth = input->size[3];
+ long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
+ long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
+ THTensor_(zero)(columns);
+
+ // Define a buffer of ones, for bias accumulation
+ // Note: this buffer can be shared with other modules, it only ever gets increased,
+ // and always contains ones.
+ if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize2d)(ones, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *output_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(output_n, output, 0, elt);
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long m = weight->size[1] * weight->size[2] * weight->size[3];
+ long n = columns->size[1];
+ long k = weight->size[0];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 't',
+ n, m, k,
+ 1,
+ THTensor_(data)(input_n), n,
+ THTensor_(data)(weight), m,
+ 0,
+ THTensor_(data)(columns), n
+ );
+
+ // Unpack columns back into input:
+ THNN_(col2im)(
+ THTensor_(data)(columns),
+ nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
+ 1, 1,
+ THTensor_(data)(output_n)
+ );
+
+ // Do Bias after:
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long m_ = nOutputPlane;
+ long n_ = outputHeight * outputWidth;
+ long k_ = 1;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ if (bias) {
+ THBlas_(gemm)(
+ 't', 'n',
+ n_, m_, k_,
+ 1,
+ THTensor_(data)(ones), k_,
+ THTensor_(data)(bias), k_,
+ 1,
+ THTensor_(data)(output_n), n_
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(output_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(weight);
+ if (bias) THTensor_(free)(bias);
+}
+
+void THNN_(SpatialFullConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH)
+{
+ THNN_(SpatialFullConvolution_shapeCheck)
+ (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, adjH, adjW);
+
+ int nInputPlane = THTensor_(size)(weight,0);
+ int nOutputPlane = THTensor_(size)(weight,1);
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ weight = THTensor_(newContiguous)(weight);
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
+ }
+
+ long inputWidth = input->size[3];
+ long inputHeight = input->size[2];
+ long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
+ long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
+ THTensor_(zero)(gradInput);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth);
+
+ // Helpers
+ THTensor *gradInput_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per sample:
+ THTensor_(select)(gradInput_n, gradInput, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(im2col)(
+ THTensor_(data)(gradOutput_n),
+ nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
+ 1, 1,
+ THTensor_(data)(gradColumns)
+ );
+
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long m = weight->size[0];
+ long n = gradColumns->size[1];
+ long k = weight->size[1] * weight->size[2] * weight->size[3];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 'n',
+ n, m, k,
+ 1,
+ THTensor_(data)(gradColumns), n,
+ THTensor_(data)(weight), k,
+ 0,
+ THTensor_(data)(gradInput_n), n
+ );
+ }
+
+
+ // Free
+ THTensor_(free)(gradInput_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+}
+
+
+void THNN_(SpatialFullConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THNN_(SpatialFullConvolution_shapeCheck)
+ (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, adjH, adjW);
+
+ int nInputPlane = THTensor_(size)(gradWeight,0);
+ int nOutputPlane = THTensor_(size)(gradWeight,1);
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
+ if (gradBias)
+ THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
+ }
+
+ long inputWidth = input->size[3];
+ long inputHeight = input->size[2];
+ long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
+ long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Define a buffer of ones, for bias accumulation
+ if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize2d)(ones, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(im2col)(
+ THTensor_(data)(gradOutput_n),
+ nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
+ 1, 1,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long n = columns->size[0]; // nOutputPlane * kh * kw
+ long m = input_n->size[0]; // nInputPlane
+ long k = columns->size[1]; // inputHeight * inputWidth
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 't', 'n',
+ n, m, k,
+ scale,
+ THTensor_(data)(columns), k,
+ THTensor_(data)(input_n), k,
+ 1,
+ THTensor_(data)(gradWeight), n
+ );
+
+
+ // Do Bias:
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long m_ = nOutputPlane;
+ long k_ = outputHeight * outputWidth;
+
+ // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
+ if (gradBias) {
+ THBlas_(gemv)(
+ 't',
+ k_, m_,
+ scale,
+ THTensor_(data)(gradOutput_n), k_,
+ THTensor_(data)(ones), 1,
+ 1,
+ THTensor_(data)(gradBias), 1
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize
+ if (batch == 0) {
+ THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif