aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c')
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c472
1 files changed, 472 insertions, 0 deletions
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c
new file mode 100644
index 000000000..e3ae41e22
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c
@@ -0,0 +1,472 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalRowConvolution.c"
+#else
+
+static inline void THNN_(TemporalRowConvolution_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *bias,
+ int kW,
+ int dW,
+ int padW) {
+
+ THArgCheck(kW > 0, 5,
+ "kernel size should be greater than zero, but got kW: %d", kW);
+ THArgCheck(dW > 0, 6,
+ "stride should be greater than zero, but got dW: %d", dW);
+ THNN_ARGCHECK(weight->nDimension == 3, 3, weight,
+ "3D weight tensor expected, but got: %s");
+ THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
+ THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous");
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
+ }
+
+ // we're always looking at (possibly batch) x feats x seq
+ int ndim = input->nDimension;
+ int dimF = 0;
+ int dimS = 1;
+
+ if (ndim == 3) {
+ ++dimS;
+ ++dimF;
+ }
+
+ THNN_ARGCHECK(ndim == 2 || ndim == 3, 1, input,
+ "2D or 3D (batch mode) input tensor expected, but got :%s");
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[dimS];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (nOutputFrame < 1) {
+ THError("Given input size: (%d x %d). "
+ "Calculated output size: (%d x %d). Output size is too small",
+ inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame);
+ }
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame);
+ }
+}
+
+static void THNN_(unfolded_acc_row)(
+ THTensor *finput,
+ THTensor *input,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ size_t c;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+// #pragma omp parallel for private(c)
+ for (c = 0; c < inputFrameSize; c++) {
+ size_t kw, x;
+ long long ix = 0;
+
+ for (kw = 0; kw < kW; kw++) {
+ real *src = finput_data
+ + c * (kW * nOutputFrame)
+ + kw * (nOutputFrame);
+ real *dst = input_data + c * (nInputFrame);
+
+ ix = (long long)(kw);
+ if (dW == 1) {
+ real *dst_slice = dst + (size_t)(ix);
+ THVector_(cadd)(dst_slice, dst_slice, src, 1, nOutputFrame);
+ } else {
+ for (x = 0; x < nOutputFrame; x++) {
+ real *dst_slice = dst + (size_t)(ix + x * dW);
+ THVector_(cadd)(dst_slice, dst_slice,
+ src + (size_t)(x), 1, 1);
+ }
+ }
+ }
+ }
+}
+
+static void THNN_(unfolded_copy_row)(
+ THTensor *finput,
+ THTensor *input,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ long k;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+// #pragma omp parallel for private(k)
+ for (k = 0; k < inputFrameSize * kW; k++) {
+ size_t c = k / kW;
+ size_t rest = k % kW;
+ size_t kw = rest % kW;
+ size_t x;
+ long long ix;
+ real *dst = finput_data + c * (kW * nOutputFrame) + kw * (nOutputFrame);
+ real *src = input_data + c * (nInputFrame);
+
+ ix = (long long)(kw);
+ if (dW == 1) {
+ memcpy(dst, src+(size_t)(ix), sizeof(real) * (nOutputFrame));
+ } else {
+ for (x = 0; x < nOutputFrame; x++) {
+ memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW),
+ sizeof(real) * 1);
+ }
+ }
+ }
+}
+
+static void THNN_(TemporalRowConvolution_updateOutput_frame)(
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ long i;
+
+ THTensor *output3d = THTensor_(newWithStorage3d)(
+ output->storage, output->storageOffset,
+ inputFrameSize, -1,
+ 1, -1,
+ nOutputFrame, -1);
+
+ THNN_(unfolded_copy_row)(finput, input, kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(zero)(output);
+
+ if (bias != NULL) {
+ for (i = 0; i < inputFrameSize; i++)
+ THVector_(fill)
+ (output->storage->data + output->storageOffset
+ + output->stride[0] * i,
+ THTensor_(get1d)(bias, i), nOutputFrame);
+ }
+
+ THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput);
+
+ THTensor_(free)(output3d);
+}
+
+void THNN_(TemporalRowConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput, // unused here but needed for Cuda
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst) {
+
+ int ndim = input->nDimension;
+
+ THTensor *tinput;
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ input = THTensor_(newContiguous)(tinput);
+ } else {
+ input = THTensor_(newContiguous)(input);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)(
+ state, input, NULL, weight, bias, kW, dW, padW);
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (ndim == 2) { /* non-batch mode */
+
+ THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame);
+ THTensor_(resize2d)(output, inputFrameSize, nOutputFrame);
+
+ THTensor_(zero)(finput);
+ THTensor_(zero)(output);
+
+ THNN_(TemporalRowConvolution_updateOutput_frame)
+ (input, output, weight, bias, finput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ } else {
+ long T = input->size[0];
+ long t;
+
+ THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame);
+ THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame);
+
+ THTensor_(zero)(finput);
+ THTensor_(zero)(output);
+
+#pragma omp parallel for private(t)
+ for (t = 0; t < T; t++) {
+ THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+ THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(TemporalRowConvolution_updateOutput_frame)
+ (input_t, output_t, weight, bias, finput_t,
+ kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(free)(input_t);
+ THTensor_(free)(output_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ if (!featFirst) { // NOTE: output will NOT be contiguous in this case
+ THTensor_(transpose)(output, output, ndim - 1, ndim - 2);
+ THTensor_(free)(tinput);
+ }
+
+ THTensor_(free)(input);
+}
+
+static void THNN_(TemporalRowConvolution_updateGradInput_frame)(
+ THTensor *gradInput,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
+ gradOutput->storage, gradOutput->storageOffset,
+ inputFrameSize, -1,
+ 1, -1,
+ nOutputFrame, -1);
+
+ // weight: inputFrameSize x kW x 1
+ // gradOutput3d: inputFrameSize x 1 x nOutputFrame
+ THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d);
+ // fgradInput: inputFrameSize x kW x nOutputFrame
+ THTensor_(free)(gradOutput3d);
+
+ THTensor_(zero)(gradInput);
+
+ THNN_(unfolded_acc_row)(fgradInput, gradInput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+}
+
+void THNN_(TemporalRowConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst) {
+
+ int ndim = input->nDimension;
+
+ THTensor *tinput, *tgradOutput;
+
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
+
+ input = THTensor_(newContiguous)(tinput);
+ gradOutput = THTensor_(newContiguous)(tgradOutput);
+
+ } else {
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)(state, input, gradOutput, weight,
+ NULL, kW, dW, padW);
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ THTensor_(resizeAs)(fgradInput, finput);
+ THTensor_(resizeAs)(gradInput, input);
+
+ THTensor_(zero)(fgradInput);
+ THTensor_(zero)(gradInput);
+
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight, weight, 1, 2);
+
+ if (ndim == 2) {
+ THNN_(TemporalRowConvolution_updateGradInput_frame)
+ (gradInput, gradOutput, tweight, fgradInput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+ } else {
+ long T = input->size[0];
+ long t;
+
+#pragma omp parallel for private(t)
+ for (t = 0; t < T; t++) {
+
+ THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+ THNN_(TemporalRowConvolution_updateGradInput_frame)
+ (gradInput_t, gradOutput_t, tweight, fgradInput_t,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(free)(gradInput_t);
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(fgradInput_t);
+ }
+ }
+
+ THTensor_(free)(tweight);
+
+ if (!featFirst) { // NOTE: gradInput will NOT be contiguous in this case
+
+ THTensor_(free)(tinput);
+ THTensor_(free)(tgradOutput);
+
+ THTensor_(transpose)(gradInput, gradInput, ndim - 1, ndim - 2);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+
+}
+
+static void THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
+ THTensor *finput, real scale) {
+
+ long i;
+ THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
+ gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ 1, -1,
+ gradOutput->size[1], -1);
+
+ THTensor *tfinput = THTensor_(new)();
+ THTensor_(transpose)(tfinput, finput, 1, 2);
+ // gradOutput3d: inputFrameSize x 1 x nOutputFrame
+ // finput: inputFrameSize x nOutputFrame x kW
+ THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, tfinput);
+ // gradWeight: inputFrameSize x 1 x kW
+ THTensor_(free)(tfinput);
+
+ if (gradBias != NULL) {
+ for (i = 0; i < gradBias->size[0]; i++) {
+ long k;
+ real sum = 0;
+ real *data = gradOutput3d->storage->data
+ + gradOutput3d->storageOffset
+ + i * gradOutput3d->stride[0];
+ for (k = 0; k < gradOutput3d->size[2]; k++) {
+ sum += data[k];
+ }
+ (gradBias->storage->data + gradBias->storageOffset)[i]
+ += scale * sum;
+ }
+ }
+
+ THTensor_(free)(gradOutput3d);
+
+}
+
+void THNN_(TemporalRowConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst,
+ accreal scale_) {
+
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ int ndim = input->nDimension;
+
+ THTensor *tinput, *tgradOutput;
+
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
+
+ input = THTensor_(newContiguous)(tinput);
+ gradOutput = THTensor_(newContiguous)(tgradOutput);
+ } else {
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)
+ (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW);
+
+ long inputFrameSize = gradWeight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (ndim == 2) {
+ THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ gradOutput, gradWeight, gradBias, finput, scale);
+ } else {
+ long T = input->size[0];
+ long t;
+
+ for (t = 0; t < T; t++) {
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ gradOutput_t, gradWeight, gradBias, finput_t, scale);
+
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ if (!featFirst) {
+ THTensor_(free)(tinput);
+ THTensor_(free)(tgradOutput);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif