diff options
Diffstat (limited to 'contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c')
-rw-r--r-- | contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c | 472 |
1 files changed, 472 insertions, 0 deletions
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c new file mode 100644 index 000000000..e3ae41e22 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c @@ -0,0 +1,472 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/TemporalRowConvolution.c" +#else + +static inline void THNN_(TemporalRowConvolution_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *weight, + THTensor *bias, + int kW, + int dW, + int padW) { + + THArgCheck(kW > 0, 5, + "kernel size should be greater than zero, but got kW: %d", kW); + THArgCheck(dW > 0, 6, + "stride should be greater than zero, but got dW: %d", dW); + THNN_ARGCHECK(weight->nDimension == 3, 3, weight, + "3D weight tensor expected, but got: %s"); + THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); + THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous"); + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); + } + + // we're always looking at (possibly batch) x feats x seq + int ndim = input->nDimension; + int dimF = 0; + int dimS = 1; + + if (ndim == 3) { + ++dimS; + ++dimF; + } + + THNN_ARGCHECK(ndim == 2 || ndim == 3, 1, input, + "2D or 3D (batch mode) input tensor expected, but got :%s"); + + long inputFrameSize = weight->size[0]; + long nInputFrame = input->size[dimS]; + long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + if (nOutputFrame < 1) { + THError("Given input size: (%d x %d). " + "Calculated output size: (%d x %d). Output size is too small", + inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame); + } + + THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame); + } +} + +static void THNN_(unfolded_acc_row)( + THTensor *finput, + THTensor *input, + int kW, + int dW, + int padW, + long inputFrameSize, + long nInputFrame, + long nOutputFrame) { + + size_t c; + real *input_data = THTensor_(data)(input); + real *finput_data = THTensor_(data)(finput); + +// #pragma omp parallel for private(c) + for (c = 0; c < inputFrameSize; c++) { + size_t kw, x; + long long ix = 0; + + for (kw = 0; kw < kW; kw++) { + real *src = finput_data + + c * (kW * nOutputFrame) + + kw * (nOutputFrame); + real *dst = input_data + c * (nInputFrame); + + ix = (long long)(kw); + if (dW == 1) { + real *dst_slice = dst + (size_t)(ix); + THVector_(cadd)(dst_slice, dst_slice, src, 1, nOutputFrame); + } else { + for (x = 0; x < nOutputFrame; x++) { + real *dst_slice = dst + (size_t)(ix + x * dW); + THVector_(cadd)(dst_slice, dst_slice, + src + (size_t)(x), 1, 1); + } + } + } + } +} + +static void THNN_(unfolded_copy_row)( + THTensor *finput, + THTensor *input, + int kW, + int dW, + int padW, + long inputFrameSize, + long nInputFrame, + long nOutputFrame) { + + long k; + real *input_data = THTensor_(data)(input); + real *finput_data = THTensor_(data)(finput); + +// #pragma omp parallel for private(k) + for (k = 0; k < inputFrameSize * kW; k++) { + size_t c = k / kW; + size_t rest = k % kW; + size_t kw = rest % kW; + size_t x; + long long ix; + real *dst = finput_data + c * (kW * nOutputFrame) + kw * (nOutputFrame); + real *src = input_data + c * (nInputFrame); + + ix = (long long)(kw); + if (dW == 1) { + memcpy(dst, src+(size_t)(ix), sizeof(real) * (nOutputFrame)); + } else { + for (x = 0; x < nOutputFrame; x++) { + memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW), + sizeof(real) * 1); + } + } + } +} + +static void THNN_(TemporalRowConvolution_updateOutput_frame)( + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + int kW, + int dW, + int padW, + long inputFrameSize, + long nInputFrame, + long nOutputFrame) { + + long i; + + THTensor *output3d = THTensor_(newWithStorage3d)( + output->storage, output->storageOffset, + inputFrameSize, -1, + 1, -1, + nOutputFrame, -1); + + THNN_(unfolded_copy_row)(finput, input, kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + + THTensor_(zero)(output); + + if (bias != NULL) { + for (i = 0; i < inputFrameSize; i++) + THVector_(fill) + (output->storage->data + output->storageOffset + + output->stride[0] * i, + THTensor_(get1d)(bias, i), nOutputFrame); + } + + THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput); + + THTensor_(free)(output3d); +} + +void THNN_(TemporalRowConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + THTensor *fgradInput, // unused here but needed for Cuda + int kW, + int dW, + int padW, + bool featFirst) { + + int ndim = input->nDimension; + + THTensor *tinput; + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + input = THTensor_(newContiguous)(tinput); + } else { + input = THTensor_(newContiguous)(input); + } + + THNN_(TemporalRowConvolution_shapeCheck)( + state, input, NULL, weight, bias, kW, dW, padW); + + long inputFrameSize = weight->size[0]; + long nInputFrame = input->size[ndim - 1]; + long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + if (ndim == 2) { /* non-batch mode */ + + THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame); + THTensor_(resize2d)(output, inputFrameSize, nOutputFrame); + + THTensor_(zero)(finput); + THTensor_(zero)(output); + + THNN_(TemporalRowConvolution_updateOutput_frame) + (input, output, weight, bias, finput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + + } else { + long T = input->size[0]; + long t; + + THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame); + THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame); + + THTensor_(zero)(finput); + THTensor_(zero)(output); + +#pragma omp parallel for private(t) + for (t = 0; t < T; t++) { + THTensor *input_t = THTensor_(newSelect)(input, 0, t); + THTensor *output_t = THTensor_(newSelect)(output, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(TemporalRowConvolution_updateOutput_frame) + (input_t, output_t, weight, bias, finput_t, + kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame); + + THTensor_(free)(input_t); + THTensor_(free)(output_t); + THTensor_(free)(finput_t); + } + } + + if (!featFirst) { // NOTE: output will NOT be contiguous in this case + THTensor_(transpose)(output, output, ndim - 1, ndim - 2); + THTensor_(free)(tinput); + } + + THTensor_(free)(input); +} + +static void THNN_(TemporalRowConvolution_updateGradInput_frame)( + THTensor *gradInput, + THTensor *gradOutput, + THTensor *weight, + THTensor *fgradInput, + int kW, + int dW, + int padW, + long inputFrameSize, + long nInputFrame, + long nOutputFrame) { + + THTensor *gradOutput3d = THTensor_(newWithStorage3d)( + gradOutput->storage, gradOutput->storageOffset, + inputFrameSize, -1, + 1, -1, + nOutputFrame, -1); + + // weight: inputFrameSize x kW x 1 + // gradOutput3d: inputFrameSize x 1 x nOutputFrame + THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d); + // fgradInput: inputFrameSize x kW x nOutputFrame + THTensor_(free)(gradOutput3d); + + THTensor_(zero)(gradInput); + + THNN_(unfolded_acc_row)(fgradInput, gradInput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); +} + +void THNN_(TemporalRowConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst) { + + int ndim = input->nDimension; + + THTensor *tinput, *tgradOutput; + + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); + + input = THTensor_(newContiguous)(tinput); + gradOutput = THTensor_(newContiguous)(tgradOutput); + + } else { + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + } + + THNN_(TemporalRowConvolution_shapeCheck)(state, input, gradOutput, weight, + NULL, kW, dW, padW); + + long inputFrameSize = weight->size[0]; + long nInputFrame = input->size[ndim - 1]; + long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + THTensor_(resizeAs)(fgradInput, finput); + THTensor_(resizeAs)(gradInput, input); + + THTensor_(zero)(fgradInput); + THTensor_(zero)(gradInput); + + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight, weight, 1, 2); + + if (ndim == 2) { + THNN_(TemporalRowConvolution_updateGradInput_frame) + (gradInput, gradOutput, tweight, fgradInput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + } else { + long T = input->size[0]; + long t; + +#pragma omp parallel for private(t) + for (t = 0; t < T; t++) { + + THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); + + THNN_(TemporalRowConvolution_updateGradInput_frame) + (gradInput_t, gradOutput_t, tweight, fgradInput_t, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + + THTensor_(free)(gradInput_t); + THTensor_(free)(gradOutput_t); + THTensor_(free)(fgradInput_t); + } + } + + THTensor_(free)(tweight); + + if (!featFirst) { // NOTE: gradInput will NOT be contiguous in this case + + THTensor_(free)(tinput); + THTensor_(free)(tgradOutput); + + THTensor_(transpose)(gradInput, gradInput, ndim - 1, ndim - 2); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + +} + +static void THNN_(TemporalRowConvolution_accGradParameters_frame)( + THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, + THTensor *finput, real scale) { + + long i; + THTensor *gradOutput3d = THTensor_(newWithStorage3d)( + gradOutput->storage, gradOutput->storageOffset, + gradOutput->size[0], -1, + 1, -1, + gradOutput->size[1], -1); + + THTensor *tfinput = THTensor_(new)(); + THTensor_(transpose)(tfinput, finput, 1, 2); + // gradOutput3d: inputFrameSize x 1 x nOutputFrame + // finput: inputFrameSize x nOutputFrame x kW + THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, tfinput); + // gradWeight: inputFrameSize x 1 x kW + THTensor_(free)(tfinput); + + if (gradBias != NULL) { + for (i = 0; i < gradBias->size[0]; i++) { + long k; + real sum = 0; + real *data = gradOutput3d->storage->data + + gradOutput3d->storageOffset + + i * gradOutput3d->stride[0]; + for (k = 0; k < gradOutput3d->size[2]; k++) { + sum += data[k]; + } + (gradBias->storage->data + gradBias->storageOffset)[i] + += scale * sum; + } + } + + THTensor_(free)(gradOutput3d); + +} + +void THNN_(TemporalRowConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst, + accreal scale_) { + + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + int ndim = input->nDimension; + + THTensor *tinput, *tgradOutput; + + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); + + input = THTensor_(newContiguous)(tinput); + gradOutput = THTensor_(newContiguous)(tgradOutput); + } else { + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + } + + THNN_(TemporalRowConvolution_shapeCheck) + (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW); + + long inputFrameSize = gradWeight->size[0]; + long nInputFrame = input->size[ndim - 1]; + long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + if (ndim == 2) { + THNN_(TemporalRowConvolution_accGradParameters_frame)( + gradOutput, gradWeight, gradBias, finput, scale); + } else { + long T = input->size[0]; + long t; + + for (t = 0; t < T; t++) { + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(TemporalRowConvolution_accGradParameters_frame)( + gradOutput_t, gradWeight, gradBias, finput_t, scale); + + THTensor_(free)(gradOutput_t); + THTensor_(free)(finput_t); + } + } + + if (!featFirst) { + THTensor_(free)(tinput); + THTensor_(free)(tgradOutput); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif |