#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/VolumetricReplicationPadding.c" #else static inline void THNN_(VolumetricReplicationPadding_shapeCheck)( THNNState *state, THTensor *input, THTensor *gradOutput, int pleft, int pright, int ptop, int pbottom, int pfront, int pback) { int dimw = 3; int dimh = 2; int dimd = 1; int dimslices = 0; long nslices; long idepth; long iheight; long iwidth; long odepth; long oheight; long owidth; THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, "4D or 5D (batch mode) tensor expected for input, but got: %s"); if (input->nDimension == 5) { dimw++; dimh++; dimd++; dimslices++; } /* sizes */ nslices = input->size[dimslices]; idepth = input->size[dimd]; iheight = input->size[dimh]; iwidth = input->size[dimw]; odepth = idepth + pfront + pback; oheight = iheight + ptop + pbottom; owidth = iwidth + pleft + pright; THArgCheck(owidth >= 1 || oheight >= 1 || odepth >= 1, 2, "input (D: %d H: %d, W: %d)is too small." " Calculated output D: %d H: %d W: %d", idepth, iheight, iwidth, odepth, oheight, owidth); if (gradOutput != NULL) { THArgCheck(nslices == THTensor_(size)(gradOutput, dimslices), 3, "gradOutput width unexpected. Expected: %d, Got: %d", nslices, THTensor_(size)(gradOutput, dimslices)); THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3, "gradOutput width unexpected. Expected: %d, Got: %d", owidth, THTensor_(size)(gradOutput, dimw)); THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3, "gradOutput height unexpected. Expected: %d, Got: %d", oheight, THTensor_(size)(gradOutput, dimh)); THArgCheck(odepth == THTensor_(size)(gradOutput, dimd), 3, "gradOutput depth unexpected. Expected: %d, Got: %d", odepth, THTensor_(size)(gradOutput, dimd)); } } static void THNN_(VolumetricReplicationPadding_updateOutput_frame)( real *input_p, real *output_p, long nslices, long iwidth, long iheight, long idepth, long owidth, long oheight, long odepth, int pleft, int pright, int ptop, int pbottom, int pfront, int pback) { int iStartX = fmax(0, -pleft); int iStartY = fmax(0, -ptop); int iStartZ = fmax(0, -pfront); int oStartX = fmax(0, pleft); int oStartY = fmax(0, ptop); int oStartZ = fmax(0, pfront); long k, ip_x, ip_y, ip_z; #pragma omp parallel for private(k, ip_x, ip_y, ip_z) for (k = 0; k < nslices; k++) { long i, j, z; for (z = 0; z < odepth; z++) { for (i = 0; i < oheight; i++) { for (j = 0; j < owidth; j++) { if (j < pleft) { ip_x = pleft; } else if (j >= pleft && j < iwidth + pleft) { ip_x = j; } else { ip_x = iwidth + pleft - 1; } ip_x = ip_x - oStartX + iStartX; if (i < ptop) { ip_y = ptop; } else if (i >= ptop && i < iheight + ptop) { ip_y = i; } else { ip_y = iheight + ptop - 1; } ip_y = ip_y - oStartY + iStartY; if (z < pfront) { ip_z = pfront; } else if (z >= pfront && z < idepth + pfront) { ip_z = z; } else { ip_z = idepth + pfront - 1; } ip_z = ip_z - oStartZ + iStartZ; real *dest_p = output_p + k * owidth * oheight * odepth + z * owidth * oheight + i * owidth + j; real *src_p = input_p + k * iwidth * iheight * idepth + ip_z * iwidth * iheight + ip_y * iwidth + ip_x; *dest_p = *src_p; } } } } } void THNN_(VolumetricReplicationPadding_updateOutput)(THNNState *state, THTensor *input, THTensor *output, int pleft, int pright, int ptop, int pbottom, int pfront, int pback) { int dimw = 3; int dimh = 2; int dimd = 1; int dimslices = 0; long nbatch = 1; long nslices; long idepth; long iheight; long iwidth; long odepth; long oheight; long owidth; real *input_data; real *output_data; THNN_(VolumetricReplicationPadding_shapeCheck)( state, input, NULL, pleft, pright, ptop, pbottom, pfront, pback); if (input->nDimension == 5) { nbatch = input->size[0]; dimw++; dimh++; dimd++; dimslices++; } /* sizes */ nslices = input->size[dimslices]; idepth = input->size[dimd]; iheight = input->size[dimh]; iwidth = input->size[dimw]; odepth = idepth + pfront + pback; oheight = iheight + ptop + pbottom; owidth = iwidth + pleft + pright; /* get contiguous input */ input = THTensor_(newContiguous)(input); /* resize output */ if (input->nDimension == 4) { THTensor_(resize4d)(output, nslices, odepth, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); THNN_(VolumetricReplicationPadding_updateOutput_frame)( input_data, output_data, nslices, iwidth, iheight, idepth, owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront, pback); } else { long p; THTensor_(resize5d)(output, nbatch, nslices, odepth, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(VolumetricReplicationPadding_updateOutput_frame)( input_data + p * nslices * iwidth * iheight * idepth, output_data + p * nslices * owidth * oheight * odepth, nslices, iwidth, iheight, idepth, owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront, pback); } } /* cleanup */ THTensor_(free)(input); } static void THNN_(VolumetricReplicationPadding_updateGradInput_frame)( real *ginput_p, real *goutput_p, long nslices, long iwidth, long iheight, long idepth, long owidth, long oheight, long odepth, int pleft, int pright, int ptop, int pbottom, int pfront, int pback) { int iStartX = fmax(0, -pleft); int iStartY = fmax(0, -ptop); int iStartZ = fmax(0, -pfront); int oStartX = fmax(0, pleft); int oStartY = fmax(0, ptop); int oStartZ = fmax(0, pfront); long k, ip_x, ip_y, ip_z; #pragma omp parallel for private(k, ip_x, ip_y, ip_z) for (k = 0; k < nslices; k++) { long i, j, z; for (z = 0; z < odepth; z++) { for (i = 0; i < oheight; i++) { for (j = 0; j < owidth; j++) { if (j < pleft) { ip_x = pleft; } else if (j >= pleft && j < iwidth + pleft) { ip_x = j; } else { ip_x = iwidth + pleft - 1; } ip_x = ip_x - oStartX + iStartX; if (i < ptop) { ip_y = ptop; } else if (i >= ptop && i < iheight + ptop) { ip_y = i; } else { ip_y = iheight + ptop - 1; } ip_y = ip_y - oStartY + iStartY; if (z < pfront) { ip_z = pfront; } else if (z >= pfront && z < idepth + pfront) { ip_z = z; } else { ip_z = idepth + pfront - 1; } ip_z = ip_z - oStartZ + iStartZ; real *src_p = goutput_p + k * owidth * oheight * odepth + z * owidth * oheight + i * owidth + j; real *dest_p = ginput_p + k * iwidth * iheight * idepth + ip_z * iwidth * iheight + ip_y * iwidth + ip_x; *dest_p += *src_p; } } } } } void THNN_(VolumetricReplicationPadding_updateGradInput)(THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, int pleft, int pright, int ptop, int pbottom, int pfront, int pback) { int dimw = 3; int dimh = 2; int dimd = 1; int dimslices = 0; long nbatch = 1; long nslices; long idepth; long iheight; long iwidth; long odepth; long oheight; long owidth; if (input->nDimension == 5) { nbatch = input->size[0]; dimw++; dimh++; dimd++; dimslices++; } /* sizes */ nslices = input->size[dimslices]; idepth = input->size[dimd]; iheight = input->size[dimh]; iwidth = input->size[dimw]; odepth = idepth + pfront + pback; oheight = iheight + ptop + pbottom; owidth = iwidth + pleft + pright; THNN_(VolumetricReplicationPadding_shapeCheck)( state, input, NULL, pleft, pright, ptop, pbottom, pfront, pback); /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); /* backprop */ if (input->nDimension == 4) { THNN_(VolumetricReplicationPadding_updateGradInput_frame)( THTensor_(data)(gradInput), THTensor_(data)(gradOutput), nslices, iwidth, iheight, idepth, owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront, pback); } else { long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { THNN_(VolumetricReplicationPadding_updateGradInput_frame)( THTensor_(data)(gradInput) + p * nslices * idepth * iheight * iwidth, THTensor_(data)(gradOutput) + p * nslices * odepth * oheight * owidth, nslices, iwidth, iheight, idepth, owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront, pback); } } /* cleanup */ THTensor_(free)(gradOutput); } #endif