diff options
Diffstat (limited to 'contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c')
-rw-r--r-- | contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c | 260 |
1 files changed, 260 insertions, 0 deletions
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c new file mode 100644 index 000000000..dcde660ea --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c @@ -0,0 +1,260 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialReflectionPadding.c" +#else + +static void THNN_(SpatialReflectionPadding_updateOutput_frame)( + real *input_p, real *output_p, + long nslices, + long iwidth, long iheight, + long owidth, long oheight, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int iStartX = fmax(0, -pad_l); + int iStartY = fmax(0, -pad_t); + int oStartX = fmax(0, pad_l); + int oStartY = fmax(0, pad_t); + + long k, ip_x, ip_y; +#pragma omp parallel for private(k, ip_x, ip_y) + + for (k = 0; k < nslices; k++) + { + long i, j; + for (i = 0; i < oheight; i++) { + for (j = 0; j < owidth; j++) { + if (j < pad_l) { + ip_x = pad_l * 2 - j; + } else if (j >= pad_l && j < iwidth + pad_l) { + ip_x = j; + } else { + ip_x = (iwidth + pad_l - 1) * 2 - j; + } + ip_x = ip_x - oStartX + iStartX; + + if (i < pad_t) { + ip_y = pad_t * 2 - i; + } else if (i >= pad_t && i < iheight + pad_t) { + ip_y = i; + } else { + ip_y = (iheight + pad_t - 1) * 2 - i; + } + ip_y = ip_y - oStartY + iStartY; + + real *dest_p = output_p + k*owidth*oheight + i * owidth + j; + real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x; + *dest_p = *src_p; + } + } + } +} + +void THNN_(SpatialReflectionPadding_updateOutput)(THNNState *state, + THTensor *input, + THTensor *output, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int dimw = 2; + int dimh = 1; + int dimslices = 0; + long nbatch = 1; + long nslices; + long iheight; + long iwidth; + long oheight; + long owidth; + real *input_data; + real *output_data; + + THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, + "3D or 4D (batch mode) tensor expected for input, but got: %s"); + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = iheight + pad_t + pad_b; + owidth = iwidth + pad_l + pad_r; + + THArgCheck(owidth >= 1 || oheight >= 1 , 2, + "input (H: %d, W: %d)is too small." + " Calculated output H: %d W: %d", + iheight, iwidth, oheight, owidth); + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + /* resize output */ + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + + THNN_(SpatialReflectionPadding_updateOutput_frame)(input_data, output_data, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + else + { + long p; + + THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialReflectionPadding_updateOutput_frame)( + input_data+p*nslices*iwidth*iheight, + output_data+p*nslices*owidth*oheight, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(SpatialReflectionPadding_updateGradInput_frame)( + real *ginput_p, real *goutput_p, + long nslices, + long iwidth, long iheight, + long owidth, long oheight, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int iStartX = fmax(0, -pad_l); + int iStartY = fmax(0, -pad_t); + int oStartX = fmax(0, pad_l); + int oStartY = fmax(0, pad_t); + + long k, ip_x, ip_y; +#pragma omp parallel for private(k, ip_x, ip_y) + + for (k = 0; k < nslices; k++) + { + long i, j; + for (i = 0; i < oheight; i++) { + for (j = 0; j < owidth; j++) { + if (j < pad_l) { + ip_x = pad_l * 2 - j; + } else if (j >= pad_l && j < iwidth + pad_l) { + ip_x = j; + } else { + ip_x = (iwidth + pad_l - 1) * 2 - j; + } + ip_x = ip_x - oStartX + iStartX; + + if (i < pad_t) { + ip_y = pad_t * 2 - i; + } else if (i >= pad_t && i < iheight + pad_t) { + ip_y = i; + } else { + ip_y = (iheight + pad_t - 1) * 2 - i; + } + ip_y = ip_y - oStartY + iStartY; + + real *src_p = goutput_p + k*owidth*oheight + i * owidth + j; + real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x; + *dest_p += *src_p; + } + } + } +} + +void THNN_(SpatialReflectionPadding_updateGradInput)(THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int dimw = 2; + int dimh = 1; + int dimslices = 0; + long nbatch = 1; + long nslices; + long iheight; + long iwidth; + long oheight; + long owidth; + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = iheight + pad_t + pad_b; + owidth = iwidth + pad_l + pad_r; + + THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3, + "gradOutput width unexpected. Expected: %d, Got: %d", + owidth, THTensor_(size)(gradOutput, dimw)); + THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3, + "gradOutput height unexpected. Expected: %d, Got: %d", + oheight, THTensor_(size)(gradOutput, dimh)); + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* backprop */ + if (input->nDimension == 3) { + THNN_(SpatialReflectionPadding_updateGradInput_frame)( + THTensor_(data)(gradInput), + THTensor_(data)(gradOutput), + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } else { + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) { + THNN_(SpatialReflectionPadding_updateGradInput_frame)( + THTensor_(data)(gradInput) + p * nslices * iheight * iwidth, + THTensor_(data)(gradOutput) + p * nslices * oheight * owidth, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif |