summaryrefslogtreecommitdiffstats
path: root/contrib/lua-torch/nn/lib/THNN/generic/Sqrt.c
blob: 174884e34a8622310e04e310f24c10e7e354d582 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Sqrt.c"
#else

void THNN_(Sqrt_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          accreal eps_)
{
  real eps = TH_CONVERT_ACCREAL_TO_REAL(eps_);
  THTensor_(resizeAs)(output, input);
  THTensor_(sqrt)(output, input);
}

void THNN_(Sqrt_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THTensor *output)
{
  THNN_CHECK_SHAPE(output, gradOutput);
  THTensor_(resizeAs)(gradInput, input);

  if (output->nDimension == 1 ||
      !THTensor_(isContiguous)(output) ||
      !THTensor_(isContiguous)(gradOutput) ||
      !THTensor_(isContiguous)(gradInput))
  {
    TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
      *gradInput_data = (*output_data == 0.0) ? 0.0 : (0.5 * (*gradOutput_data / *output_data));
    );
  }
  else
  {
    real *gradOutput_data = THTensor_(data)(gradOutput);
    real *gradInput_data  = THTensor_(data)(gradInput);
    real *output_data     = THTensor_(data)(output);
    long i;
#pragma omp parallel for private(i)
    for(i = 0; i < THTensor_(nElement)(output); i++)
    {
      if (output_data[i] == 0.0)
        gradInput_data[i] = 0.0;
      else
        gradInput_data[i] = 0.5 * (gradOutput_data[i] / output_data[i]);
    }
  }
}

#endif