diff options
Diffstat (limited to 'contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c')
-rw-r--r-- | contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c b/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c new file mode 100644 index 000000000..a7280422b --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c @@ -0,0 +1,137 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/LogSoftMax.c" +#else + +void THNN_(LogSoftMax_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output) +{ + real *input_data, *output_data; + ptrdiff_t nframe = 0, dim = 0, stride = 0; + ptrdiff_t t, d; + + if (input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + stride = 1; + } + else if (input->nDimension == 2) + { + nframe = input->size[0]; + dim = input->size[1]; + stride = 1; + } + else if (input->nDimension == 3) + { + nframe = 1; + dim = input->size[0]; + stride = input->size[1]*input->size[2]; + } + else if (input->nDimension == 4) + { + nframe = input->size[0]; + dim = input->size[1]; + stride = input->size[2]*input->size[3]; + } + else + THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected"); + + input = THTensor_(newContiguous)(input); + THTensor_(resizeAs)(output, input); + + real *input_data0 = THTensor_(data)(input); + real *output_data0 = THTensor_(data)(output); + + accreal logsum; + real maxInput; + #pragma omp parallel for private(t, d, maxInput, logsum, input_data, output_data) + for (t = 0; t < stride*nframe; t++) + { + logsum = 0; + maxInput = -THInf; + input_data = input_data0 + (t/stride)*dim*stride + t % stride; + output_data = output_data0 + (t/stride)*dim*stride + t % stride; + + for (d = 0; d < dim; d++) + maxInput = THMax(maxInput, input_data[d*stride]); + + for (d = 0; d < dim; d++) + logsum += exp(input_data[d*stride] - maxInput); + logsum = maxInput + log(logsum); + + for (d = 0; d < dim; d++) + output_data[d*stride] = input_data[d*stride] - logsum; + } + + THTensor_(free)(input); +} + +void THNN_(LogSoftMax_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output) +{ + THNN_CHECK_SHAPE(input, gradOutput); + real *gradInput_data, *gradOutput_data, *output_data; + ptrdiff_t nframe = 0, dim = 0, stride = 0; + ptrdiff_t t, d; + + if (output->nDimension == 1) + { + nframe = 1; + dim = output->size[0]; + stride = 1; + } + else if (output->nDimension == 2) + { + nframe = output->size[0]; + dim = output->size[1]; + stride = 1; + } + else if (output->nDimension == 3) + { + nframe = 1; + dim = output->size[0]; + stride = output->size[1]*output->size[2]; + } + else if (output->nDimension == 4) + { + nframe = output->size[0]; + dim = output->size[1]; + stride = output->size[2]*output->size[3]; + } + else + THError("1D, 2D, 3D or 4D tensor expected"); + + output = THTensor_(newContiguous)(output); + gradOutput = THTensor_(newContiguous)(gradOutput); + + THTensor_(resizeAs)(gradInput, output); + real *gradInput_data0 = THTensor_(data)(gradInput); + real *output_data0 = THTensor_(data)(output); + real *gradOutput_data0 = THTensor_(data)(gradOutput); + accreal sum; + #pragma omp parallel for private(t, sum, d, gradInput_data, output_data, gradOutput_data) + for (t = 0; t < stride*nframe; t++) + { + sum = 0; + gradInput_data = gradInput_data0 + (t/stride)*dim*stride + t % stride; + output_data = output_data0 + (t/stride)*dim*stride + t % stride; + gradOutput_data = gradOutput_data0 + (t/stride)*dim*stride + t % stride; + + for (d = 0; d < dim; d++) + sum += gradOutput_data[d*stride]; + + for (d = 0; d < dim; d++) + gradInput_data[d*stride] = gradOutput_data[d*stride] - exp(output_data[d*stride])*sum; + } + + THTensor_(free)(gradOutput); + THTensor_(free)(output); +} + +#endif |