1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Square.c"
#else
void THNN_(Square_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output)
{
THTensor_(resizeAs)(output, input);
if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
{
TH_TENSOR_APPLY2(real, output, real, input,
*output_data = (*input_data) * (*input_data);
);
}
else
{
real *output_data = THTensor_(data)(output);
real *input_data = THTensor_(data)(input);
long i;
#pragma omp parallel for private(i)
for (i = 0; i < THTensor_(nElement)(input); i++)
output_data[i] = input_data[i]*input_data[i];
}
}
void THNN_(Square_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput)
{
THNN_CHECK_SHAPE(input, gradOutput);
THTensor_(resizeAs)(gradInput, input);
if (input->nDimension == 1 ||
!THTensor_(isContiguous)(input) ||
!THTensor_(isContiguous)(gradOutput) ||
!THTensor_(isContiguous)(gradInput))
{
TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
*gradInput_data = 2.0 * (*gradOutput_data) * (*input_data);
);
}
else
{
real *gradOutput_data = THTensor_(data)(gradOutput);
real *gradInput_data = THTensor_(data)(gradInput);
real *input_data = THTensor_(data)(input);
long i;
#pragma omp parallel for private(i)
for (i = 0; i < THTensor_(nElement)(gradInput); i++)
gradInput_data[i] = 2.0 * gradOutput_data[i] * input_data[i];
}
}
#endif
|