#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/SoftMax.c" #else void THNN_(SoftMax_updateOutput)( THNNState *state, THTensor *input, THTensor *output) { real *input_data, *output_data; ptrdiff_t nframe = 0, dim = 0, stride = 0; ptrdiff_t t; if (input->nDimension == 1) { nframe = 1; dim = input->size[0]; stride = 1; } else if (input->nDimension == 2) { nframe = input->size[0]; dim = input->size[1]; stride = 1; } else if (input->nDimension == 3) { nframe = 1; dim = input->size[0]; stride = input->size[1]*input->size[2]; } else if (input->nDimension == 4) { nframe = input->size[0]; dim = input->size[1]; stride = input->size[2]*input->size[3]; } else { THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected"); } input = THTensor_(newContiguous)(input); THTensor_(resizeAs)(output, input); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(t) for (t = 0; t < stride*nframe; t++) { real *input_ptr = input_data + (t/stride)*dim*stride + t % stride; real *output_ptr = output_data + (t/stride)*dim*stride + t % stride; real inputMax = -THInf; accreal sum; ptrdiff_t d; for (d = 0; d < dim; d++) { if (input_ptr[d*stride] >= inputMax) inputMax = input_ptr[d*stride]; } sum = 0; for (d = 0; d < dim; d++) { real z = exp(input_ptr[d*stride] - inputMax); output_ptr[d*stride] = z; sum += z; } for (d = 0; d < dim; d++) { output_ptr[d*stride] *= 1/sum; } } THTensor_(free)(input); } void THNN_(SoftMax_updateGradInput)( THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *output) { THNN_CHECK_SHAPE(input, gradOutput); real *gradInput_data, *gradOutput_data, *output_data; ptrdiff_t nframe = 0, dim = 0, stride = 0; ptrdiff_t t; if (output->nDimension == 1) { nframe = 1; dim = output->size[0]; stride = 1; } else if (output->nDimension == 2) { nframe = output->size[0]; dim = output->size[1]; stride = 1; } else if (output->nDimension == 3) { nframe = 1; dim = output->size[0]; stride = output->size[1]*output->size[2]; } else if (output->nDimension == 4) { nframe = output->size[0]; dim = output->size[1]; stride = output->size[2]*output->size[3]; } else { THError("1D, 2D, 3D or 4D tensor expected"); } gradOutput = THTensor_(newContiguous)(gradOutput); output = THTensor_(newContiguous)(output); THTensor_(resizeAs)(gradInput, output); gradInput_data = THTensor_(data)(gradInput); output_data = THTensor_(data)(output); gradOutput_data = THTensor_(data)(gradOutput); #pragma omp parallel for private(t) for (t = 0; t < stride*nframe; t++) { real *gradInput_ptr = gradInput_data + (t/stride)*dim*stride + t % stride; real *output_ptr = output_data + (t/stride)*dim*stride + t % stride; real *gradOutput_ptr = gradOutput_data + (t/stride)*dim*stride + t % stride; ptrdiff_t d; accreal sum = 0; for (d = 0; d < dim; d++) sum += (accreal)gradOutput_ptr[d*stride] * output_ptr[d*stride]; for (d = 0; d < dim; d++) gradInput_ptr[d*stride] = output_ptr[d*stride] * (gradOutput_ptr[d*stride] - sum); } THTensor_(free)(gradOutput); THTensor_(free)(output); } #endif