#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/THTensorConv.c" #else /* 2D Input, 2D kernel : convolve given image with the given kernel. */ void THTensor_(validXCorr2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long or = (ir - kr) / sr + 1; long oc = (ic - kc) / sc + 1; long xx, yy, kx, ky; if ((sc != 1) || (oc < 4)) { /* regular convolution */ for(yy = 0; yy < or; yy++) { for(xx = 0; xx < oc; xx++) { /* Dot product in two dimensions... (between input image and the mask) */ real *pi_ = t_ + yy*sr*ic + xx*sc; real *pw_ = k_; real sum = 0; for(ky = 0; ky < kr; ky++) { for(kx = 0; kx < kc; kx++) { sum += pi_[kx]*pw_[kx]; } pi_ += ic; /* next input line */ pw_ += kc; /* next mask line */ } /* Update output */ *r_++ += alpha*sum; } } } else { /* SSE-based convolution */ for(yy = 0; yy < or; yy++) { real *pi_ = t_ + yy*sr*ic; real *pw_ = k_; for (ky = 0; ky < kr; ky++) { real *pis_ = pi_; for (kx = 0; kx < kc; kx++) { THVector_(cadd)(r_, r_, pis_, alpha*pw_[kx], oc); pis_++; } pi_ += ic; /* next input line */ pw_ += kc; /* next mask line */ } r_ += oc; } } } /* 2D Input, 2D kernel : convolve given image with the given kernel. */ void THTensor_(validConv2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long or = (ir - kr) / sr + 1; long oc = (ic - kc) / sc + 1; long xx, yy, kx, ky; if ((sc != 1) || (oc < 4)) { /* regular convolution */ for(yy = 0; yy < or; yy++) { for(xx = 0; xx < oc; xx++) { /* Dot product in two dimensions... (between input image and the mask) */ real *pi_ = t_ + yy*sr*ic + xx*sc; real *pw_ = k_ + kr*kc - 1; real sum = 0; for(ky = 0; ky < kr; ky++) { for(kx = 0; kx < kc; kx++) { sum += pi_[kx]*pw_[-kx]; } pi_ += ic; /* next input line */ pw_ -= kc; /* next mask line */ } /* Update output */ *r_++ += alpha*sum; } } } else { /* SSE-based convolution */ for(yy = 0; yy < or; yy++) { real *pw_ = k_ + kr*kc - 1; real *pi_ = t_ + yy*sr*ic; for (ky = 0; ky < kr; ky++) { real *pis_ = pi_; for (kx = 0; kx < kc; kx++) { THVector_(cadd)(r_, r_, pis_, alpha*pw_[-kx], oc); pis_++; } pi_ += ic; /* next input line */ pw_ -= kc; /* next mask line */ } r_ += oc; } } } /* 2D Input, 2D kernel : convolve given image with the given kernel, full convolution. */ void THTensor_(fullConv2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long oc = (ic - 1) * sc + kc; long xx, yy, kx, ky; if ((sc != 1) || (ic < 4)) { /* regular convolution */ for(yy = 0; yy < ir; yy++) { for(xx = 0; xx < ic; xx++) { /* Outer product in two dimensions... (between input image and the mask) */ real *po_ = r_ + yy*sr*oc + xx*sc; real *pw_ = k_; for(ky = 0; ky < kr; ky++) { real z = *t_ * alpha; for(kx = 0; kx < kc; kx++) { po_[kx] += z * pw_[kx]; } po_ += oc; /* next input line */ pw_ += kc; /* next mask line */ } t_++; } } } else { /* SSE-based convolution */ for(yy = 0; yy < ir; yy++) { real *po_ = r_ + yy*sr*oc; real *pw_ = k_; for (ky = 0; ky < kr; ky++) { real *pos_ = po_; for (kx = 0; kx < kc; kx++) { THVector_(cadd)(pos_, pos_, t_, alpha*pw_[kx], ic); pos_++; } po_ += oc; /* next input line */ pw_ += kc; /* next mask line */ } t_ += ic; } } } /* 2D Input, 2D kernel : convolve given image with the given kernel, full convolution. */ void THTensor_(fullXCorr2Dptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long oc = (ic - 1) * sc + kc; long xx, yy, kx, ky; if ((sc != 1) || (ic < 4)) { /* regular convolution */ for(yy = 0; yy < ir; yy++) { for(xx = 0; xx < ic; xx++) { /* Outer product in two dimensions... (between input image and the mask) */ real *po_ = r_ + yy*sr*oc + xx*sc; real *pw_ = k_ + kr*kc -1; long kx, ky; for(ky = 0; ky < kr; ky++) { real z = *t_ * alpha; for(kx = 0; kx < kc; kx++) { po_[kx] += z * pw_[-kx]; } po_ += oc; /* next input line */ pw_ -= kc; /* next mask line */ } t_++; } } } else { /* SSE-based convolution */ for(yy = 0; yy < ir; yy++) { real *po_ = r_ + yy*sr*oc; real *pw_ = k_ + kr*kc -1; for (ky = 0; ky < kr; ky++) { real *pos_ = po_; for (kx = 0; kx < kc; kx++) { THVector_(cadd)(pos_, pos_, t_, pw_[-kx]*alpha, ic); pos_++; } po_ += oc; /* next input line */ pw_ -= kc; /* next mask line */ } t_ += ic; } } } /* 2D Input, 2D kernel : convolve given image with the given kernel, valid convolution. for sr,sc=1 this is equivalent to validXCorr2Dptr, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(validXCorr2DRevptr)(real *r_, real alpha, real *t_, long ir, long ic, real *k_, long kr, long kc, long sr, long sc) { long or = ir - (kr - 1) * sr; long oc = ic - (kc - 1) * sc; long xx, yy, kx, ky; if ((sc != 1) || (kc < 4)) { /* regular convolution */ for(yy = 0; yy < kr; yy++) { for(xx = 0; xx < kc; xx++) { real *po_ = r_; real *pi_ = t_ + yy*sr*ic + xx*sc; real z = *k_++ * alpha; for(ky = 0; ky < or; ky++) { for(kx = 0; kx < oc; kx++) po_[kx] += z * pi_[kx]; pi_ += ic; po_ += oc; } } } } else { /* SSE-based convolution */ for(yy = 0; yy < kr; yy++) { for(xx = 0; xx < kc; xx++) { real *po_ = r_; real *pi_ = t_ + yy*sr*ic + xx*sc; real z = *k_++ * alpha; for(ky = 0; ky < or; ky++) { THVector_(cadd)(po_, po_, pi_, z, oc); pi_ += ic; po_ += oc; } } } } } /* 3D Input, 3D kernel : convolve given volume with the given kernel. */ void THTensor_(validXCorr3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long ot = (it - kt) / st + 1; long or = (ir - kr) / sr + 1; long oc = (ic - kc) / sc + 1; long zz, xx, yy; for (zz = 0; zz < ot; zz++) { for(yy = 0; yy < or; yy++) { for(xx = 0; xx < oc; xx++) { /* Dot product in two dimensions... (between input image and the mask) */ real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; real *pw_ = k_; real sum = 0; long kz, kx, ky; for(kz = 0; kz < kt; kz++) { for(ky = 0; ky < kr; ky++) { for(kx = 0; kx < kc; kx++) { sum += pi_[kx]*pw_[kx]; } pi_ += ic; /* next input line */ pw_ += kc; /* next mask line */ } pi_ += (ir-kr)*ic; /* next input slice */ } /* Update output */ *r_++ += sum*alpha; } } } } /* 3D Input, 3D kernel : convolve given volume with the given kernel. */ void THTensor_(validConv3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long ot = (it - kt) / st + 1; long or = (ir - kr) / sr + 1; long oc = (ic - kc) / sc + 1; long zz, xx, yy; for(zz = 0; zz < ot; zz++) { for(yy = 0; yy < or; yy++) { for(xx = 0; xx < oc; xx++) { /* Dot product in two dimensions... (between input image and the mask) */ real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; real *pw_ = k_ + kt*kr*kc - 1; real sum = 0; long kz, kx, ky; for(kz = 0; kz < kt; kz++) { for(ky = 0; ky < kr; ky++) { for(kx = 0; kx < kc; kx++) { sum += pi_[kx]*pw_[-kx]; } pi_ += ic; /* next input line */ pw_ -= kc; /* next mask line */ } pi_ += (ir-kr)*ic; /* next input slice */ } /* Update output */ *r_++ += alpha*sum; } } } } /* 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution. */ void THTensor_(fullConv3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long or = (ir - 1) * sr + kr; long oc = (ic - 1) * sc + kc; long zz, xx, yy; for(zz = 0; zz < it; zz++) { for(yy = 0; yy < ir; yy++) { for(xx = 0; xx < ic; xx++) { /* Outer product in two dimensions... (between input image and the mask) */ real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc; real *pw_ = k_; long kz, kx, ky; /* printf("Output Plane : %ld,%ld,%ld, input val=%g\n",zz,yy,xx,*t_); */ for(kz = 0; kz < kt; kz++) { for(ky = 0; ky < kr; ky++) { real z = *t_ * alpha; for(kx = 0; kx < kc; kx++) { /* printf("o=%g,k=%g," , po_[kx],pw_[kx]); */ po_[kx] += z * pw_[kx]; /* printf("o=%g " , po_[kx]); */ } /* printf("\n"); */ po_ += oc; /* next input line */ pw_ += kc; /* next mask line */ } po_ += (or-kr)*oc; /* next output slice */ /* printf("\n"); */ } t_++; } } } } /* 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution. */ void THTensor_(fullXCorr3Dptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long or = (ir - 1) * sr + kr; long oc = (ic - 1) * sc + kc; long zz, xx, yy; for(zz = 0; zz < it; zz++) { for(yy = 0; yy < ir; yy++) { for(xx = 0; xx < ic; xx++) { /* Outer product in two dimensions... (between input image and the mask) */ real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc; real *pw_ = k_ + kt*kr*kc -1; long kz, kx, ky; for(kz = 0; kz < kt; kz++) { for(ky = 0; ky < kr; ky++) { real z = *t_ * alpha; for(kx = 0; kx < kc; kx++) { po_[kx] += z * pw_[-kx]; } po_ += oc; /* next input line */ pw_ -= kc; /* next mask line */ } po_ += (or-kr)*oc; /* next output slice */ } t_++; } } } } /* 3D Input, 3D kernel : convolve given image with the given kernel, valid convolution. for sr,sc=1 this is equivalent to validXCorr3Dptr, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(validXCorr3DRevptr)(real *r_, real alpha, real *t_, long it, long ir, long ic, real *k_, long kt, long kr, long kc, long st, long sr, long sc) { long ot = it - (kt - 1) * st; long or = ir - (kr - 1) * sr; long oc = ic - (kc - 1) * sc; long zz, xx, yy; for(zz = 0; zz < kt; zz++) { for(yy = 0; yy < kr; yy++) { for(xx = 0; xx < kc; xx++) { real *po_ = r_; real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; real z = *k_++ * alpha; long kz, kx, ky; for(kz = 0; kz < ot; kz++) { for(ky = 0; ky < or; ky++) { for(kx = 0; kx < oc; kx++) po_[kx] += z * pi_[kx]; pi_ += ic; po_ += oc; } pi_ += (ir-or)*ic; /* next input slice */ } } } } } void THTensor_(conv2d)(real* output_data, real alpha, real* ptr_input, long nInputRows, long nInputCols, real* ptr_weight, long nKernelRows, long nKernelCols, long srow, long scol, const char *vf, const char *xc) { THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'"); if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr2Dptr)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(fullConv2Dptr)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else if (*xc == 'X') THTensor_(validXCorr2Dptr)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(validConv2Dptr)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); } void THTensor_(conv3d)(real* output_data, real alpha, real* ptr_input, long nInputDepth, long nInputRows, long nInputCols, real* ptr_weight, long nKernelDepth, long nKernelRows, long nKernelCols, long sdepth, long srow, long scol, const char *vf, const char *xc) { THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'"); if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr3Dptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); else THTensor_(fullConv3Dptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); else if (*xc == 'X') THTensor_(validXCorr3Dptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); else THTensor_(validConv3Dptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); } long THTensor_(convsize)(long x, long k, long s, const char* vf) { THArgCheck(*vf == 'V' || *vf == 'F', 1, "type of convolution can be 'V' or 'F'"); if (*vf == 'V') return (x-k)/s + 1; else return (x-1)*s + k; } /* 3D input, 3D kernel, 4D output like rank1 update A <- xx' + beta*A for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol) { long nInputPlane, nInputRows, nInputCols; long nKernelPlane, nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; nKernelPlane = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; nOutputPlane = nInputPlane * kernel->size[0]; THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "covn2DRevger : Input image is smaller than kernel"); nOutputRows = nInputRows - (nKernelRows - 1) * srow; nOutputCols = nInputCols - (nKernelCols - 1) * scol; nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nKernelPlane; k++) { long i; /* get kernel */ real *ptr_weight = weight_data+k*kstride0; for(i = 0; i < nInputPlane; i++) { /* get output */ real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; /* get input */ real *ptr_input = input_data+i*istride0; /* do image, kernel convolution */ THTensor_(validXCorr2DRevptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); /* Next output plane */ /* output_data += nOutputCols*nOutputRows; */ } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 4D output like rank1 update A <- xx' + beta*A for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol) { long nbatch, nInputPlane, nInputRows, nInputCols; long nKernelPlane, nKernelRows, nKernelCols; long nOutputRows, nOutputCols; long istride0, kstride0, istride1, kstride1; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; istride1 = input->stride[1]; nbatch = input->size[0]; nInputPlane = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelPlane = kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv2DRevger : Input image is smaller than kernel"); THArgCheck(kernel->size[0] == input->size[0] , 2, "conv2DRevger : Input batch and kernel batch is not same size"); nOutputRows = nInputRows - (nKernelRows - 1) * srow; nOutputCols = nInputCols - (nKernelCols - 1) * scol; nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nKernelPlane; k++) { long i; for(i = 0; i < nInputPlane; i++) { long p; for(p = 0; p < nbatch; p++) { /* get kernel */ real *ptr_weight = weight_data + p*kstride0 + k*kstride1; /* get output */ real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; /* get input */ real *ptr_input = input_data + p*istride0 + i*istride1; /* do image, kernel convolution */ THTensor_(validXCorr2DRevptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); /* Next output plane */ /* output_data += nOutputCols*nOutputRows; */ } } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 4D output like rank1 update A <- xx' + beta*A */ void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelPlane, nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; nKernelPlane = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; nOutputPlane = nInputPlane * kernel->size[0]; THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dger : Input image is smaller than kernel"); if (*vf == 'F') { nOutputRows = (nInputRows - 1) * srow + nKernelRows; nOutputCols = (nInputCols - 1) * scol + nKernelCols; } else { /* valid */ nOutputRows = (nInputRows - nKernelRows) / srow + 1; nOutputCols = (nInputCols - nKernelCols) / scol + 1; } nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nKernelPlane, nInputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]*r_->size[1]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nKernelPlane; k++) { long i; /* get kernel */ real *ptr_weight = weight_data+k*kstride0; for(i = 0; i < nInputPlane; i++) { /* get output */ real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; /* get input */ real *ptr_input = input_data+i*istride0; /* do image, kernel convolution */ if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(fullConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else if (*xc == 'X') THTensor_(validXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(validConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); /* Next output plane */ /* output_data += nOutputCols*nOutputRows; */ } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 4D kernel, 3D output matrix vector product like y <- Ax + beta*y */ void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0, kstride1; THTensor *input; THTensor* kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) { kernel = THTensor_(newContiguous)(k_); } else { THTensor_(retain)(k_); kernel = k_; } nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = kernel->size[0]; THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel"); if (*vf == 'F') { nOutputRows = (nInputRows - 1) * srow + nKernelRows; nOutputCols = (nInputCols - 1) * scol + nKernelCols; } else { /* valid */ nOutputRows = (nInputRows - nKernelRows) / srow + 1; nOutputCols = (nInputCols - nKernelCols) / scol + 1; } nelem = THTensor_(nElement)(r_); THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]; k++) { real* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { long i; /* get output */ real *ptr_output = output_data + k*nOutputCols*nOutputRows; for(i = 0; i < nInputPlane; i++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0 + i*kstride1; /* get input */ real *ptr_input = input_data + i*istride0; /* do image, kernel convolution */ if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(fullConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else if (*xc == 'X') THTensor_(validXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(validConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); } /* Next output plane */ /* output_data += nOutputCols*nOutputRows;*/ } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 4D kernel, 3D output matrix vector product like y <- Ax + beta*y */ void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long kstride0, kstride1; THTensor *input; THTensor* kernel; long nbatch; ptrdiff_t nelem; real *input_data; real *weight_data; real *output_data; long p; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) { kernel = THTensor_(newContiguous)(k_); } else { THTensor_(retain)(k_); kernel = k_; } nbatch = input->size[0]; nInputPlane = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = kernel->size[0]; THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel"); if (*vf == 'F') { nOutputRows = (nInputRows - 1) * srow + nKernelRows; nOutputCols = (nInputCols - 1) * scol + nKernelCols; } else { /* valid */ nOutputRows = (nInputRows - nKernelRows) / srow + 1; nOutputCols = (nInputCols - nKernelCols) / scol + 1; } nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { /*THTensor_(zero)(r_);*/ #pragma omp parallel for private(p) for (p=0; p < r_->size[0]; p++) { long k; for (k = 0; k < r_->size[1]; k++) { real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } } else if (beta != 1) { /*THTensor_(mul)(r_, beta);*/ #pragma omp parallel for private(p) for(p=0; p < r_->size[0]; p++) { long k; for (k = 0; k < r_->size[1]; k++) { real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } } #pragma omp parallel for private(p) for(p=0; p < nbatch; p++) { long k; for(k = 0; k < nOutputPlane; k++) { long i; /* get output */ real *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows; for(i = 0; i < nInputPlane; i++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0 + i*kstride1; /* get input */ real *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols; /* do image, kernel convolution */ if (*vf == 'F') if (*xc == 'X') THTensor_(fullXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(fullConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else if (*xc == 'X') THTensor_(validXCorr2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); else THTensor_(validConv2Dptr)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); } /* Next output plane */ /* output_data += nOutputCols*nOutputRows;*/ } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 2D input, 2D kernel, 2D output scalar multiplication like y <- x*y + beta*y */ void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { THTensor *input; THTensor* kernel; long nInputRows; long nInputCols; long nKernelRows; long nKernelCols; long nOutputRows, nOutputCols; real *ptr_input; real *ptr_weight; real *output_data; ptrdiff_t nelem; THArgCheck(t_->nDimension == 2 , 3, "input: 2D Tensor expected"); THArgCheck(k_->nDimension == 2 , 4, "kernel: 2D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputRows = input->size[0]; nInputCols = input->size[1]; nKernelRows = kernel->size[0]; nKernelCols = kernel->size[1]; THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmul : Input image is smaller than kernel"); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize2d)(r_, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) THTensor_(zero)(r_); else if (beta != 1) THTensor_(mul)(r_, r_, beta); ptr_input = THTensor_(data)(input); ptr_weight = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); /* do image, kernel convolution */ THTensor_(conv2d)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol, vf, xc); THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 3D output component wise multiplication like y <- y.*x + beta*y */ void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; nInputPlane = input->size[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; nOutputPlane = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dcmul : Input image is smaller than kernel"); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nOutputPlane; k++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0; /* get input */ real *ptr_input = input_data + k*istride0; /* do image, kernel convolution */ THTensor_(conv2d)(output_data, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol, vf, xc); /* Next output plane */ output_data += nOutputCols*nOutputRows; } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 3D output component wise multiplication like with a permutation map y <- y.*x + beta*y */ void THTensor_(conv2Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor* kernel; real *input_data; real *weight_data; real *output_data; long nmaps; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; nInputPlane = input->size[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; nOutputPlane = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmap : Input image is smaller than kernel"); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); nmaps = map->size[0]; for(k = 0; k < nmaps; k++) { /* get indices */ long from = (long)THTensor_(get2d)(map,k,0)-1; long to = (long)THTensor_(get2d)(map,k,1)-1; /* get kernel */ real *ptr_weight = weight_data + k*kstride0; /* get input */ real *ptr_input = input_data + from*istride0; /* get output */ real *ptr_output = output_data + to*nOutputRows*nOutputCols; /* do image, kernel convolution */ THTensor_(conv2d)(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol, vf, xc); } THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 4D kernel, 5D output like rank1 update A <- xx' + beta*A for sr,sc=1 this is equivalent to xcorr2Dger, but otherwise it is useful for calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 */ void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k, i; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; nKernelPlane = kernel->size[0]; nKernelDepth= kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = nInputPlane * kernel->size[0]; THArgCheck(nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv3DRevger : Input image is smaller than kernel"); nOutputDepth = nInputDepth - (nKernelDepth - 1) * sdepth; nOutputRows = nInputRows - (nKernelRows - 1) * srow; nOutputCols = nInputCols - (nKernelCols - 1) * scol; nelem = THTensor_(nElement)(r_); THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nKernelPlane; k++) { /* get kernel */ real *ptr_weight = weight_data+k*kstride0; for(i = 0; i < nInputPlane; i++) { /* get input */ real *ptr_input = input_data+i*istride0; /* do image, kernel convolution */ THTensor_(validXCorr3DRevptr)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol); /* Next output plane */ output_data += nOutputDepth*nOutputCols*nOutputRows; } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 4D kernel, 5D output like rank1 update A <- xx' + beta*A */ void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k, i; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; nKernelPlane = kernel->size[0]; nKernelDepth = kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = nInputPlane * kernel->size[0]; THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dger : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nKernelPlane; k++) { /* get kernel */ real *ptr_weight = weight_data+k*kstride0; for(i = 0; i < nInputPlane; i++) { /* get input */ real *ptr_input = input_data+i*istride0; /* do image, kernel convolution */ THTensor_(conv3d)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); /* Next output plane */ output_data += nOutputDepth*nOutputCols*nOutputRows; } } THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 5D kernel, 4D output matrix vector product like y <- Ax + beta*y */ void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0, kstride1; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k, i; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 5 , 4, "kernel: 5D Tensor expected"); THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); if (!(k_->stride[4] == 1) || !(k_->stride[3] == k_->size[4])) { kernel = THTensor_(newContiguous)(k_); } else { THTensor_(retain)(k_); kernel = k_; } nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelDepth = kernel->size[2]; nKernelRows = kernel->size[3]; nKernelCols = kernel->size[4]; nOutputPlane = kernel->size[0]; THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmv : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nOutputPlane; k++) { for(i = 0; i < nInputPlane; i++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0 + i*kstride1; /* get input */ real *ptr_input = input_data + i*istride0; /* do image, kernel convolution */ THTensor_(conv3d)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); } /* Next output plane */ output_data += nOutputDepth*nOutputCols*nOutputRows; } THTensor_(free)(input); THTensor_(free)(kernel); } /* 3D input, 3D kernel, 3D output scalar multiplication like y <- x*y + beta*y */ void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc) { THTensor *input; THTensor* kernel; long nInputDepth; long nInputRows; long nInputCols; long nKernelDepth; long nKernelRows; long nKernelCols; long nOutputDepth, nOutputRows, nOutputCols; real *ptr_input; real *ptr_weight; real *output_data; ptrdiff_t nelem; THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); nInputDepth = input->size[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; nKernelDepth = kernel->size[0]; nKernelRows = kernel->size[1]; nKernelCols = kernel->size[2]; THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmul : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize3d)(r_, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) THTensor_(zero)(r_); else if (beta != 1) THTensor_(mul)(r_, r_, beta); ptr_input = THTensor_(data)(input); ptr_weight = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); /* do image, kernel convolution */ THTensor_(conv3d)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 4D kernel, 4D output component wise multiplication like y <- y.*x + beta*y */ void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; real *input_data; real *weight_data; real *output_data; ptrdiff_t nelem; long k; THArgCheck(t_->nDimension == 4 , 3, "input: 3D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 3D Tensor expected"); THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; nInputPlane = input->size[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; nOutputPlane = kernel->size[0]; nKernelDepth = kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dcmul : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); for(k = 0; k < nOutputPlane; k++) { /* get kernel */ real *ptr_weight = weight_data + k*kstride0; /* get input */ real *ptr_input = input_data + k*istride0; /* do image, kernel convolution */ THTensor_(conv3d)(output_data, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); /* Next output plane */ output_data += nOutputDepth*nOutputCols*nOutputRows; } THTensor_(free)(input); THTensor_(free)(kernel); } /* 4D input, 4D kernel, 4D output component wise multiplication like with a permutation map y <- y.*x + beta*y */ void THTensor_(conv3Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, long sdepth, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputDepth, nInputRows, nInputCols; long nKernelDepth, nKernelRows, nKernelCols; long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; long istride0, kstride0; THTensor *input; THTensor *kernel; ptrdiff_t nelem; real *input_data; real *weight_data; real *output_data; long nmaps; long k; THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected"); THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); input = THTensor_(newContiguous)(t_); kernel = THTensor_(newContiguous)(k_); istride0 = input->stride[0]; nInputPlane = input->size[0]; nInputDepth = input->size[1]; nInputRows = input->size[2]; nInputCols = input->size[3]; kstride0 = kernel->stride[0]; nOutputPlane = kernel->size[0]; nKernelDepth = kernel->size[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmap : Input image is smaller than kernel"); nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); nelem = THTensor_(nElement)(r_); THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) { THTensor_(zero)(r_); } else if (beta != 1) THTensor_(mul)(r_, r_, beta); input_data = THTensor_(data)(input); weight_data = THTensor_(data)(kernel); output_data = THTensor_(data)(r_); nmaps = map->size[0]; for(k = 0; k < nmaps; k++) { /* get indices */ long from = (long)THTensor_(get2d)(map,k,0)-1; long to = (long)THTensor_(get2d)(map,k,1)-1; /* get kernel */ real *ptr_weight = weight_data + k*kstride0; /* get input */ real *ptr_input = input_data + from*istride0; /* get output */ real *ptr_output = output_data + to*nOutputDepth*nOutputRows*nOutputCols; /* do image, kernel convolution */ THTensor_(conv3d)(ptr_output, alpha, ptr_input, nInputDepth, nInputRows, nInputCols, ptr_weight, nKernelDepth, nKernelRows, nKernelCols, sdepth, srow, scol, vf, xc); } THTensor_(free)(input); THTensor_(free)(kernel); } #endif