diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-05-23 18:14:15 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-05-23 18:14:15 +0100 |
commit | 714eb56e1760fdfb26afccde92664d3a2f1e8435 (patch) | |
tree | 84d1399acbb92f852b4bd64f9ea5412680b0c6ab /contrib/lua-torch/nn | |
parent | 220a51ff68013dd668a45b78c60a7b8bfc10f074 (diff) | |
download | rspamd-714eb56e1760fdfb26afccde92664d3a2f1e8435.tar.gz rspamd-714eb56e1760fdfb26afccde92664d3a2f1e8435.zip |
[Minor] Move lua contrib libraries to lua- prefix
Diffstat (limited to 'contrib/lua-torch/nn')
282 files changed, 43109 insertions, 0 deletions
diff --git a/contrib/lua-torch/nn/.gitignore b/contrib/lua-torch/nn/.gitignore new file mode 100644 index 000000000..e0fa91eda --- /dev/null +++ b/contrib/lua-torch/nn/.gitignore @@ -0,0 +1,2 @@ +build/ +THNN_h.lua diff --git a/contrib/lua-torch/nn/.luacheckrc b/contrib/lua-torch/nn/.luacheckrc new file mode 100644 index 000000000..3d358e9c0 --- /dev/null +++ b/contrib/lua-torch/nn/.luacheckrc @@ -0,0 +1,13 @@ +-- -*- mode: lua; -*- +std = "luajit" + +globals = { + "torch", + "nn", + "include", +} + +unused_args = false + + +files['test.lua'].redefined = false diff --git a/contrib/lua-torch/nn/.travis.yml b/contrib/lua-torch/nn/.travis.yml new file mode 100644 index 000000000..1d10e0fb5 --- /dev/null +++ b/contrib/lua-torch/nn/.travis.yml @@ -0,0 +1,56 @@ +language: c +compiler: + - gcc + - clang +cache: + directories: + - $HOME/OpenBlasInstall +sudo: false +env: + - TORCH_LUA_VERSION=LUAJIT21 + - TORCH_LUA_VERSION=LUA51 + - TORCH_LUA_VERSION=LUA52 +addons: + apt: + packages: + - cmake + - gfortran + - gcc-multilib + - gfortran-multilib + - liblapack-dev + - build-essential + - gcc + - g++ + - curl + - cmake + - libreadline-dev + - git-core + - libqt4-core + - libqt4-gui + - libqt4-dev + - libjpeg-dev + - libpng-dev + - ncurses-dev + - imagemagick + - libzmq3-dev + - gfortran + - unzip + - gnuplot + - gnuplot-x11 +before_script: +- export ROOT_TRAVIS_DIR=$(pwd) +- export INSTALL_PREFIX=~/torch/install +- ls $HOME/OpenBlasInstall/lib || (cd /tmp/ && git clone https://github.com/xianyi/OpenBLAS.git -b master && cd OpenBLAS && (make NO_AFFINITY=1 -j$(getconf _NPROCESSORS_ONLN) 2>/dev/null >/dev/null) && make PREFIX=$HOME/OpenBlasInstall install) +- git clone https://github.com/torch/distro.git ~/torch --recursive +- cd ~/torch && git submodule update --init --recursive +- mkdir build && cd build +- export CMAKE_LIBRARY_PATH=$HOME/OpenBlasInstall/include:$HOME/OpenBlasInstall/lib:$CMAKE_LIBRARY_PATH +- cmake .. -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" -DCMAKE_BUILD_TYPE=Release -DWITH_${TORCH_LUA_VERSION}=ON +- make && make install +- cd $ROOT_TRAVIS_DIR +- export LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH +script: +- ${INSTALL_PREFIX}/bin/luarocks make rocks/nn-scm-1.rockspec +- export PATH=${INSTALL_PREFIX}/bin:$PATH +- export TESTLUA=$(which luajit lua | head -n 1) +- ${TESTLUA} -lnn -e "t=nn.test(); if t.errors[1] then os.exit(1) end" diff --git a/contrib/lua-torch/nn/Abs.lua b/contrib/lua-torch/nn/Abs.lua new file mode 100644 index 000000000..b32b64f79 --- /dev/null +++ b/contrib/lua-torch/nn/Abs.lua @@ -0,0 +1,22 @@ +local Abs, parent = torch.class('nn.Abs', 'nn.Module') + +function Abs:__init() + parent.__init(self) +end + +function Abs:updateOutput(input) + input.THNN.Abs_updateOutput( + input:cdata(), + self.output:cdata() + ) + return self.output +end + +function Abs:updateGradInput(input, gradOutput) + input.THNN.Abs_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/AbsCriterion.lua b/contrib/lua-torch/nn/AbsCriterion.lua new file mode 100644 index 000000000..65e2f8ae1 --- /dev/null +++ b/contrib/lua-torch/nn/AbsCriterion.lua @@ -0,0 +1,32 @@ +local AbsCriterion, parent = torch.class('nn.AbsCriterion', 'nn.Criterion') + +function AbsCriterion:__init(sizeAverage) + parent.__init(self) + if sizeAverage ~= nil then + self.sizeAverage = sizeAverage + else + self.sizeAverage = true + end +end + +function AbsCriterion:updateOutput(input, target) + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.AbsCriterion_updateOutput( + input:cdata(), + target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage + ) + self.output = self.output_tensor[1] + return self.output +end + +function AbsCriterion:updateGradInput(input, target) + input.THNN.AbsCriterion_updateGradInput( + input:cdata(), + target:cdata(), + self.gradInput:cdata(), + self.sizeAverage + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Add.lua b/contrib/lua-torch/nn/Add.lua new file mode 100644 index 000000000..d071a15b3 --- /dev/null +++ b/contrib/lua-torch/nn/Add.lua @@ -0,0 +1,66 @@ +local Add, parent = torch.class('nn.Add', 'nn.Module') + +function Add:__init(inputSize,scalar) + parent.__init(self) + + local size = inputSize + if scalar then size=1 end + self.scalar = scalar + self.bias = torch.Tensor(size) + self.gradBias = torch.Tensor(size) + + self._ones = torch.Tensor{1} + + self:reset() +end + +function Add:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.bias:size(1)) + end + + self.bias:uniform(-stdv, stdv) +end + +function Add:updateOutput(input) + self.output:resizeAs(input):copy(input) + if self.scalar then + self.output:add(self.bias[1]); + else + if input:isSameSizeAs(self.bias) then + self.output:add(self.bias) + else + local batchSize = input:size(1) + if self._ones:size(1) ~= batchSize then + self._ones:resize(batchSize):fill(1) + end + local bias = self.bias:view(-1) + local output = self.output:view(batchSize, -1) + output:addr(1, self._ones, bias) + end + end + return self.output +end + +function Add:updateGradInput(input, gradOutput) + if self.gradInput then + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + return self.gradInput + end +end + +function Add:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + if self.gradBias:size(1) == 1 then + self.gradBias[1] = self.gradBias[1] + scale*gradOutput:sum(); + else + if input:isSameSizeAs(self.bias) then + self.gradBias:add(scale, gradOutput) + else + local gradOutput = gradOutput:view(input:size(1), -1) + self.gradBias:view(-1):addmv(scale, gradOutput:t(), self._ones) + end + end +end diff --git a/contrib/lua-torch/nn/AddConstant.lua b/contrib/lua-torch/nn/AddConstant.lua new file mode 100644 index 000000000..b686d719c --- /dev/null +++ b/contrib/lua-torch/nn/AddConstant.lua @@ -0,0 +1,50 @@ +local AddConstant, parent = torch.class('nn.AddConstant', 'nn.Module') + +function AddConstant:__init(constant_scalar,ip) + parent.__init(self) + self.constant_scalar = constant_scalar + + -- default for inplace is false + self.inplace = ip or false + if (ip and type(ip) ~= 'boolean') then + error('in-place flag must be boolean') + end +end + +function AddConstant:updateOutput(input) + assert(type(self.constant_scalar) == 'number' or + (torch.isTensor(self.constant_scalar) and input:nDimension() <= 2 and + input:size(input:nDimension()) == self.constant_scalar:size(1)), + 'input is not scalar or doesn\'t match with the dimension of constant!') + local tmp + if torch.isTensor(self.constant_scalar) and input:nDimension() == 2 then + local nOutput = self.constant_scalar:size(1) + tmp = self.constant_scalar.new() + tmp:resize(1,nOutput) + tmp:copy(self.constant_scalar) + tmp = tmp:expand(input:size(1),nOutput) + else + tmp = self.constant_scalar + end + if self.inplace then + input:add(tmp) + self.output:set(input) + else + self.output:resizeAs(input) + self.output:copy(input) + self.output:add(tmp) + end + return self.output +end + +function AddConstant:updateGradInput(input, gradOutput) + if self.inplace then + self.gradInput:set(gradOutput) + -- restore previous input value + input:add(-self.constant_scalar) + else + self.gradInput:resizeAs(gradOutput) + self.gradInput:copy(gradOutput) + end + return self.gradInput +end diff --git a/contrib/lua-torch/nn/BCECriterion.lua b/contrib/lua-torch/nn/BCECriterion.lua new file mode 100644 index 000000000..8bb5f8178 --- /dev/null +++ b/contrib/lua-torch/nn/BCECriterion.lua @@ -0,0 +1,64 @@ +local THNN = require 'nn.THNN' +local BCECriterion, parent = torch.class('nn.BCECriterion', 'nn.Criterion') + +function BCECriterion:__init(weights, sizeAverage) + parent.__init(self) + if sizeAverage ~= nil then + self.sizeAverage = sizeAverage + else + self.sizeAverage = true + end + if weights ~= nil then + assert(weights:dim() == 1, "weights input should be 1-D Tensor") + self.weights = weights + end +end + + +function BCECriterion:__len() + return self.weights and #self.weights or 0 +end + +function BCECriterion:updateOutput(input, target) + -- - log(input) * target - log(1 - input) * (1 - target) + assert( input:nElement() == target:nElement(), + "input and target size mismatch") + self.output_tensor = self.output_tensor or input.new(1) + + local weights = self.weights + if weights ~= nil and target:dim() ~= 1 then + weights = self.weights:view(1, target:size(2)):expandAs(target) + end + + input.THNN.BCECriterion_updateOutput( + input:cdata(), + target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage, + THNN.optionalTensor(weights) + ) + + self.output = self.output_tensor[1] + return self.output +end + +function BCECriterion:updateGradInput(input, target) + -- - (target - input) / ( input (1 - input) ) + assert( input:nElement() == target:nElement(), + "input and target size mismatch") + + local weights = self.weights + if weights ~= nil and target:dim() ~= 1 then + weights = self.weights:view(1, target:size(2)):expandAs(target) + end + + input.THNN.BCECriterion_updateGradInput( + input:cdata(), + target:cdata(), + self.gradInput:cdata(), + self.sizeAverage, + THNN.optionalTensor(weights) + ) + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/BatchNormalization.lua b/contrib/lua-torch/nn/BatchNormalization.lua new file mode 100644 index 000000000..8dfc576b3 --- /dev/null +++ b/contrib/lua-torch/nn/BatchNormalization.lua @@ -0,0 +1,213 @@ +--[[ + This file implements Batch Normalization as described in the paper: + "Batch Normalization: Accelerating Deep Network Training + by Reducing Internal Covariate Shift" + by Sergey Ioffe, Christian Szegedy + + This implementation is useful for inputs NOT coming from convolution layers. + For convolution layers, use nn.SpatialBatchNormalization. + + The operation implemented is: + y = ( x - mean(x) ) + -------------------- * gamma + beta + standard-deviation(x) + where gamma and beta are learnable parameters. + + The learning of gamma and beta is optional. + + Usage: + with learnable parameters: nn.BatchNormalization(N [,eps] [,momentum]) + where N = dimensionality of input + without learnable parameters: nn.BatchNormalization(N [,eps] [,momentum], false) + + eps is a small value added to the standard-deviation to avoid divide-by-zero. + Defaults to 1e-5 + + In training time, this layer keeps a running estimate of it's computed mean and std. + The running sum is kept with a default momentum of 0.1 (unless over-ridden) + In test time, this running mean/std is used to normalize. +]]-- +local BN,parent = torch.class('nn.BatchNormalization', 'nn.Module') +local THNN = require 'nn.THNN' + +BN.__version = 2 + +-- expected dimension of input +BN.nDim = 2 + +function BN:__init(nOutput, eps, momentum, affine) + parent.__init(self) + assert(nOutput and type(nOutput) == 'number', + 'Missing argument #1: dimensionality of input. ') + assert(nOutput ~= 0, 'To set affine=false call BatchNormalization' + .. '(nOutput, eps, momentum, false) ') + if affine ~= nil then + assert(type(affine) == 'boolean', 'affine has to be true/false') + self.affine = affine + else + self.affine = true + end + self.eps = eps or 1e-5 + self.train = true + self.momentum = momentum or 0.1 + self.running_mean = torch.zeros(nOutput) + self.running_var = torch.ones(nOutput) + + if self.affine then + self.weight = torch.Tensor(nOutput) + self.bias = torch.Tensor(nOutput) + self.gradWeight = torch.Tensor(nOutput) + self.gradBias = torch.Tensor(nOutput) + self:reset() + end +end + +function BN:reset() + if self.weight then + self.weight:uniform() + end + if self.bias then + self.bias:zero() + end + self.running_mean:zero() + self.running_var:fill(1) +end + +function BN:checkInputDim(input) + local iDim = input:dim() + assert(iDim == self.nDim or + (iDim == self.nDim - 1 and self.train == false), string.format( + 'only mini-batch supported (%dD tensor), got %dD tensor instead', + self.nDim, iDim)) + local featDim = (iDim == self.nDim - 1) and 1 or 2 + assert(input:size(featDim) == self.running_mean:nElement(), string.format( + 'got %d-feature tensor, expected %d', + input:size(featDim), self.running_mean:nElement())) +end + +local function makeContiguous(self, input, gradOutput) + if not input:isContiguous() then + self._input = self._input or input.new() + self._input:resizeAs(input):copy(input) + input = self._input + end + if gradOutput then + if not gradOutput:isContiguous() then + self._gradOutput = self._gradOutput or gradOutput.new() + self._gradOutput:resizeAs(gradOutput):copy(gradOutput) + gradOutput = self._gradOutput + end + end + return input, gradOutput +end + +local function makeBatch(self, input) + local iDim = input:dim() + if self.train == false and iDim == self.nDim - 1 then + return nn.utils.addSingletonDimension(input, input, 1) + else + return input + end +end + +function BN:updateOutput(input) + self:checkInputDim(input) + + input = makeContiguous(self, input) + input = makeBatch(self, input) + + self.save_mean = self.save_mean or input.new() + self.save_mean:resizeAs(self.running_mean) + self.save_std = self.save_std or input.new() + self.save_std:resizeAs(self.running_var) + + input.THNN.BatchNormalization_updateOutput( + input:cdata(), + self.output:cdata(), + THNN.optionalTensor(self.weight), + THNN.optionalTensor(self.bias), + self.running_mean:cdata(), + self.running_var:cdata(), + self.save_mean:cdata(), + self.save_std:cdata(), + self.train, + self.momentum, + self.eps) + + return self.output +end + +local function backward(self, input, gradOutput, scale, gradInput, gradWeight, gradBias) + self:checkInputDim(input) + self:checkInputDim(gradOutput) + assert(self.save_mean and self.save_std, 'must call :updateOutput() first') + + input, gradOutput = makeContiguous(self, input, gradOutput) + input = makeBatch(self, input) + gradOutput = makeBatch(self, gradOutput) + + scale = scale or 1 + if gradInput then + gradInput:resizeAs(gradOutput) + end + + input.THNN.BatchNormalization_backward( + input:cdata(), + gradOutput:cdata(), + THNN.optionalTensor(gradInput), + THNN.optionalTensor(gradWeight), + THNN.optionalTensor(gradBias), + THNN.optionalTensor(self.weight), + self.running_mean:cdata(), + self.running_var:cdata(), + self.save_mean:cdata(), + self.save_std:cdata(), + self.train, + scale, + self.eps) + + return self.gradInput +end + +function BN:backward(input, gradOutput, scale) + return backward(self, input, gradOutput, scale, self.gradInput, self.gradWeight, self.gradBias) +end + +function BN:updateGradInput(input, gradOutput) + return backward(self, input, gradOutput, 1, self.gradInput) +end + +function BN:accGradParameters(input, gradOutput, scale) + return backward(self, input, gradOutput, scale, nil, self.gradWeight, self.gradBias) +end + +function BN:read(file, version) + parent.read(self, file) + if version < 2 then + if self.running_std then + self.running_var = self.running_std:pow(-2):add(-self.eps) + self.running_std = nil + end + end +end + +function BN:clearState() + -- first 5 buffers are not present in the current implementation, + -- but we keep them for cleaning old saved models + nn.utils.clear(self, { + 'buffer', + 'buffer2', + 'centered', + 'std', + 'normalized', + '_input', + '_gradOutput', + 'save_mean', + 'save_std', + }) + return parent.clearState(self) +end + +function BN:__tostring__() + return string.format('%s (%dD) (%d)', torch.type(self), self.nDim, self.running_mean:nElement()) +end diff --git a/contrib/lua-torch/nn/Bilinear.lua b/contrib/lua-torch/nn/Bilinear.lua new file mode 100644 index 000000000..9350b03ec --- /dev/null +++ b/contrib/lua-torch/nn/Bilinear.lua @@ -0,0 +1,163 @@ +local Bilinear, parent = torch.class('nn.Bilinear', 'nn.Module') + +local function isint(x) return type(x) == 'number' and x == math.floor(x) end +function Bilinear:__assertInput(input) + assert(input and type(input) == 'table' and #input == 2, + 'input should be a table containing two data Tensors') + assert(input[1]:nDimension() == 2 and input[2]:nDimension() == 2, + 'input Tensors should be two-dimensional') + assert(input[1]:size(1) == input[2]:size(1), + 'input Tensors should have the same number of rows (instances)') + assert(input[1]:size(2) == self.weight:size(2), + 'dimensionality of first input is erroneous') + assert(input[2]:size(2) == self.weight:size(3), + 'dimensionality of second input is erroneous') +end +function Bilinear:__assertInputGradOutput(input, gradOutput) + assert(input[1]:size(1) == gradOutput:size(1), + 'number of rows in gradOutput does not match input') + assert(gradOutput:size(2) == self.weight:size(1), + 'number of columns in gradOutput does not output size of layer') +end + +function Bilinear:__init(inputSize1, inputSize2, outputSize, bias) + + -- assertions: + assert(self and inputSize1 and inputSize2 and outputSize, + 'should specify inputSize1 and inputSize2 and outputSize') + assert(isint(inputSize1) and isint(inputSize2) and isint(outputSize), + 'inputSize1 and inputSize2 and outputSize should be integer numbers') + assert(inputSize1 > 0 and inputSize2 > 0 and outputSize > 0, + 'inputSize1 and inputSize2 and outputSize should be positive numbers') + + -- set up model: + parent.__init(self) + local bias = ((bias == nil) and true) or bias + self.weight = torch.Tensor(outputSize, inputSize1, inputSize2) + self.gradWeight = torch.Tensor(outputSize, inputSize1, inputSize2) + if bias then + self.bias = torch.Tensor(outputSize) + self.gradBias = torch.Tensor(outputSize) + end + self.gradInput = {torch.Tensor(), torch.Tensor()} + self:reset() +end + +function Bilinear:reset(stdv) + assert(self) + if stdv then + assert(stdv and type(stdv) == 'number' and stdv > 0, + 'standard deviation should be a positive number') + stdv = stdv * math.sqrt(3) + else + stdv = 1 / math.sqrt(self.weight:size(2)) + end + self.weight:uniform(-stdv, stdv) + if self.bias then self.bias:uniform(-stdv, stdv) end + return self +end + +function Bilinear:updateOutput(input) + assert(self) + self:__assertInput(input) + + -- set up buffer: + self.buff2 = self.buff2 or input[1].new() + self.buff2:resizeAs(input[2]) + + -- compute output scores: + self.output:resize(input[1]:size(1), self.weight:size(1)) + for k = 1,self.weight:size(1) do + torch.mm(self.buff2, input[1], self.weight[k]) + self.buff2:cmul(input[2]) + torch.sum(self.output:narrow(2, k, 1), self.buff2, 2) + end + if self.bias then + self.output:add( + self.bias:reshape(1, self.bias:nElement()):expandAs(self.output) + ) + end + return self.output +end + +function Bilinear:updateGradInput(input, gradOutput) + assert(self) + if self.gradInput then + self:__assertInputGradOutput(input, gradOutput) + + if #self.gradInput == 0 then + for i = 1, 2 do self.gradInput[i] = input[1].new() end + end + + -- compute d output / d input: + self.gradInput[1]:resizeAs(input[1]):fill(0) + self.gradInput[2]:resizeAs(input[2]):fill(0) + + + -- do first slice of weight tensor (k = 1) + self.gradInput[1]:mm(input[2], self.weight[1]:t()) + self.gradInput[1]:cmul(gradOutput:narrow(2,1,1):expand(self.gradInput[1]:size(1), + self.gradInput[1]:size(2))) + self.gradInput[2]:addmm(1, input[1], self.weight[1]) + self.gradInput[2]:cmul(gradOutput:narrow(2,1,1):expand(self.gradInput[2]:size(1), + self.gradInput[2]:size(2))) + + -- do remaining slices of weight tensor + if self.weight:size(1) > 1 then + self.buff1 = self.buff1 or input[1].new() + self.buff1:resizeAs(input[1]) + + for k = 2, self.weight:size(1) do + self.buff1:mm(input[2], self.weight[k]:t()) + self.buff1:cmul(gradOutput:narrow(2,k,1):expand(self.gradInput[1]:size(1), + self.gradInput[1]:size(2))) + self.gradInput[1]:add(self.buff1) + + self.buff2:mm(input[1], self.weight[k]) + self.buff2:cmul(gradOutput:narrow(2,k,1):expand(self.gradInput[2]:size(1), + self.gradInput[2]:size(2))) + self.gradInput[2]:add(self.buff2) + end + end + return self.gradInput + end +end + +function Bilinear:accGradParameters(input, gradOutput, scale) + local scale = scale or 1 + self:__assertInputGradOutput(input, gradOutput) + assert(scale and type(scale) == 'number' and scale >= 0) + + -- make sure we have buffer: + self.buff1 = self.buff1 or input[1].new() + self.buff1:resizeAs(input[1]) + + -- accumulate parameter gradients: + for k = 1,self.weight:size(1) do + torch.cmul( + self.buff1, input[1], gradOutput:narrow(2, k, 1):expandAs(input[1]) + ) + self.gradWeight[k]:addmm(self.buff1:t(), input[2]) + end + if self.bias then self.gradBias:add(scale, gradOutput:sum(1)) end +end + +function Bilinear:sharedAccUpdateGradParameters(input, gradOutput, lr) + -- we do not need to accumulate parameters when sharing: + self:defaultAccUpdateGradParameters(input, gradOutput, lr) +end + +function Bilinear:__tostring__() + return torch.type(self) .. + string.format( + '(%dx%d -> %d) %s', + self.weight:size(2), self.weight:size(3), self.weight:size(1), + (self.bias == nil and ' without bias' or '') + ) +end + +function Bilinear:clearState() + if self.buff2 then self.buff2:set() end + if self.buff1 then self.buff1:set() end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Bottle.lua b/contrib/lua-torch/nn/Bottle.lua new file mode 100644 index 000000000..6dee432f5 --- /dev/null +++ b/contrib/lua-torch/nn/Bottle.lua @@ -0,0 +1,71 @@ +local Bottle, parent = torch.class("nn.Bottle", "nn.Decorator") +local unpack = unpack or table.unpack + +function Bottle:__init(module, nInputDim, nOutputDim) + parent.__init(self, module) + self.nInputDim = nInputDim or 2 + self.nOutputDim = nOutputDim or self.nInputDim + self.dimDelta = self.nInputDim - self.nOutputDim + -- Used to reshape the gradients + self.inShape = torch.Tensor(self.nInputDim) + self.outShape = torch.Tensor(self.nOutputDim) +end + +function Bottle:updateOutput(input) + -- first batchDims dimensions will be fused + local batchDims = input:dim() - self.nInputDim + 1 + -- see if bottle is required + if batchDims > 1 then + -- bottle the first dims + local inSize = torch.LongTensor(input:size()) + local squeezeSize = inSize[{{1, batchDims - 1}}]:prod() + self.inShape:copy(inSize[{{batchDims, input:dim()}}]) + self.inShape[{{1}}]:mul(squeezeSize) + -- Forward with the module's dimension + local newInput = input:view(unpack(self.inShape:totable())) + local output = self.modules[1]:updateOutput(newInput) + assert(output:dim() == self.nOutputDim, + "Wrong number of output dims on module. Expected: " .. + self.nOutputDim .. ' but got ' .. + tostring(output and output:dim())) + self.outShape:copy(torch.LongTensor(output:size())) + if math.abs(self.dimDelta) > 0 then + inSize:resize(inSize:size(1) - self.dimDelta) + end + inSize[{{batchDims, inSize:size(1)}}]:copy(self.outShape) + inSize[{{batchDims}}]:div(squeezeSize) + -- unbottle + self.output:set(output:view(unpack(torch.totable(inSize)))) + else + self.output:set(self.modules[1]:updateOutput(input)) + end + return self.output +end + +function Bottle:updateGradInput(input, gradOutput) + if input:dim() > self.nInputDim then + local input_ = input:view(unpack(self.inShape:totable())) + local gradOutput_ = gradOutput:view(unpack(self.outShape:totable())) + self.modules[1]:updateGradInput(input_, gradOutput_) + if self.modules[1].gradInput then + self.gradInput:set(self.modules[1].gradInput:viewAs(input)) + else + self.gradInput = nil + end + else + if self.modules[1].gradInput then + self.gradInput:set(self.modules[1]:updateGradInput(input, gradOutput)) + else + self.gradInput = nil + end + end + return self.gradInput +end + +function Bottle:accGradParameters(input, gradOutput, scale) + if input:dim() > self.nInputDim then + input = input:view(unpack(self.inShape:totable())) + gradOutput = gradOutput:view(unpack(self.outShape:totable())) + end + self.modules[1]:accGradParameters(input, gradOutput, scale) +end diff --git a/contrib/lua-torch/nn/CAdd.lua b/contrib/lua-torch/nn/CAdd.lua new file mode 100644 index 000000000..1d7b45726 --- /dev/null +++ b/contrib/lua-torch/nn/CAdd.lua @@ -0,0 +1,127 @@ +local CAdd, parent = torch.class("nn.CAdd", "nn.Module") + +function CAdd:__init(...) + parent.__init(self) + + local arg = {...} + + self.size = torch.LongStorage() + local n = #arg + if n == 1 and torch.type(arg[1]) == 'torch.LongStorage' then + self.size:resize(#arg[1]):copy(arg[1]) + else + self.size:resize(n) + for i=1,n do + self.size[i] = arg[i] + end + end + + self.bias = torch.Tensor(self.size) + self.gradBias = torch.Tensor(self.size) + + self.output:resize(self.size) + + self:reset() +end + +function CAdd:reset(stdv) + if stdv then + --std of uniform distribution on interval [-a,a] = a/sqrt(3) + stdv = stdv * math.sqrt(3) + else + stdv = 1.0/math.sqrt(self.bias:nElement()) + end + self.bias:uniform(-stdv,stdv) +end + +function CAdd:updateOutput(input) + self._output = self._output or input.new() + self._bias = self._bias or input.new() + self._expand = self._expand or input.new() + self._repeat = self._repeat or input.new() + + self.output:resizeAs(input):copy(input) + if input:nElement() == self.bias:nElement() then + self.output:add(self.bias) + else + if self.bias:dim() == input:dim() then + self._output:set(self.output) + self._bias:set(self.bias) + else + local batchSize = input:size(1) + self._output:view(self.output, batchSize, -1) + self._bias:view(self.bias, 1, -1) + end + + self._expand:expandAs(self._bias, self._output) + + --expandAs uses stride 0 and self._expand is not contiguous + --cuda ops may assume contiguous input + if torch.type(input) == 'torch.CudaTensor' then + self._repeat:resizeAs(self._expand):copy(self._expand) + self._output:add(self._repeat) + else + self._output:add(self._expand) + end + end + + return self.output +end + +function CAdd:updateGradInput(input, gradOutput) + self.gradInput = self.gradInput or input.new() + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + + return self.gradInput +end + +function CAdd:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + + self._gradBias = self._gradBias or gradOutput.new() + self._gradOutput = self._gradOutput or gradOutput.new() + self._repeat = self._repeat or gradOutput.new() + + if self.bias:nElement() == gradOutput:nElement() then + self.gradBias:add(scale, gradOutput) + else + if self.bias:dim() == gradOutput:dim() then + self._gradBias:set(self.gradBias) + self._gradOutput:set(gradOutput) + else + local batchSize = input:size(1) + self._gradBias:view(self.gradBias, 1, -1) + self._gradOutput:view(gradOutput, batchSize, -1) + end + + self._gradBias:expandAs(self._gradBias, self._gradOutput) + + --expandAs uses stride 0 and self._gradBias is not contiguous + --cuda ops may assume contiguous input + if torch.type(self._gradBias) == 'torch.CudaTensor' then + self._repeat:resizeAs(self._gradBias):copy(self._gradBias) + self._repeat:add(scale, self._gradOutput) + self._gradBias:copy(self._repeat) + else + self._gradBias:add(scale, self._gradOutput) + end + end +end + +function CAdd:type(type, tensorCache) + if type then + self:clearState() + end + return parent.type(self, type, tensorCache) +end + +function CAdd:clearState() + nn.utils.clear(self, { + '_gradBias', + '_expand', + '_output', + '_bias', + '_repeat' + }) + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/CAddTable.lua b/contrib/lua-torch/nn/CAddTable.lua new file mode 100644 index 000000000..79deb7e9b --- /dev/null +++ b/contrib/lua-torch/nn/CAddTable.lua @@ -0,0 +1,36 @@ +local CAddTable, parent = torch.class('nn.CAddTable', 'nn.Module') + +function CAddTable:__init(ip) + parent.__init(self) + self.inplace = ip + self.gradInput = {} +end + +function CAddTable:updateOutput(input) + if self.inplace then + self.output:set(input[1]) + else + self.output:resizeAs(input[1]):copy(input[1]) + end + for i=2,#input do + self.output:add(input[i]) + end + return self.output +end + +function CAddTable:updateGradInput(input, gradOutput) + for i=1,#input do + self.gradInput[i] = self.gradInput[i] or input[1].new() + if self.inplace then + self.gradInput[i]:set(gradOutput) + else + self.gradInput[i]:resizeAs(input[i]):copy(gradOutput) + end + end + + for i=#input+1, #self.gradInput do + self.gradInput[i] = nil + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/CAddTensorTable.lua b/contrib/lua-torch/nn/CAddTensorTable.lua new file mode 100644 index 000000000..16efe4450 --- /dev/null +++ b/contrib/lua-torch/nn/CAddTensorTable.lua @@ -0,0 +1,43 @@ + +local CAddTensorTable, parent = torch.class('nn.CAddTensorTable', 'nn.Module') + +function CAddTensorTable:__init() + parent.__init(self) + self.gradInput = {} +end + +-- input is a table with 2 entries. input[1] is the vector to be added. +-- input[2] is the table to which we add the vector +function CAddTensorTable:updateOutput(input) + local currentOutput = {} + for i=1,#input[2] do + currentOutput[i] = currentOutput[i] or input[1].new() + currentOutput[i]:resizeAs(input[1]) + currentOutput[i]:copy(input[2][i]) + currentOutput[i]:add(input[1]) + end + for i = #input[2]+1, #currentOutput do + currentOutput[i] = nil + end + self.output = currentOutput + return self.output +end + +function CAddTensorTable:updateGradInput(input, gradOutput) + self.gradInput[1] = self.gradInput[1] or input[1].new() + self.gradInput[1]:resizeAs(input[1]) + self.gradInput[1]:copy(gradOutput[1]) + for i=2, #input[2] do + self.gradInput[1]:add(gradOutput[i]) + end + self.gradInput[2] = self.gradInput[2] or {} + for i=1,#input[2] do + self.gradInput[2][i] = self.gradInput[2][i] or input[1].new() + self.gradInput[2][i]:resizeAs(input[1]) + self.gradInput[2][i]:copy(gradOutput[i]) + end + for i=#input[2]+1, #self.gradInput[2] do + self.gradInput[2][i] = nil + end + return self.gradInput +end
\ No newline at end of file diff --git a/contrib/lua-torch/nn/CDivTable.lua b/contrib/lua-torch/nn/CDivTable.lua new file mode 100644 index 000000000..bf044c9af --- /dev/null +++ b/contrib/lua-torch/nn/CDivTable.lua @@ -0,0 +1,26 @@ + +local CDivTable, parent = torch.class('nn.CDivTable', 'nn.Module') + +function CDivTable:__init() + parent.__init(self) + self.gradInput = {} +end + +function CDivTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + self.output:cdiv(input[2]) + return self.output +end + +function CDivTable:updateGradInput(input, gradOutput) + self.gradInput[1] = self.gradInput[1] or input[1].new() + self.gradInput[2] = self.gradInput[2] or input[1].new() + self.gradInput[1]:resizeAs(input[1]):copy(gradOutput):cdiv(input[2]) + self.gradInput[2]:resizeAs(input[2]):zero():addcdiv(-1,self.gradInput[1],input[2]):cmul(input[1]) + + for i=#input+1, #self.gradInput do + self.gradInput[i] = nil + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/CMakeLists.txt b/contrib/lua-torch/nn/CMakeLists.txt new file mode 100644 index 000000000..cebddfbfc --- /dev/null +++ b/contrib/lua-torch/nn/CMakeLists.txt @@ -0,0 +1,14 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) +CMAKE_POLICY(VERSION 2.6) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../torch7/lib/TH) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/../torch7/lib/TH) +ADD_SUBDIRECTORY(lib) + +FILE(STRINGS lib/THNN/generic/THNN.h THNN_headers NEWLINE_CONSUME) +FILE(WRITE THNN_h.lua "return [[") +FILE(APPEND THNN_h.lua ${THNN_headers}) +FILE(APPEND THNN_h.lua "]]") + +FILE(GLOB luasrc *.lua) + +ADD_TORCH_PACKAGE(nn "" "${luasrc}") diff --git a/contrib/lua-torch/nn/CMaxTable.lua b/contrib/lua-torch/nn/CMaxTable.lua new file mode 100644 index 000000000..845e38d23 --- /dev/null +++ b/contrib/lua-torch/nn/CMaxTable.lua @@ -0,0 +1,46 @@ +local CMaxTable, parent = torch.class('nn.CMaxTable', 'nn.Module') + +function CMaxTable:__init() + parent.__init(self) + self.gradInput = {} + self.maxIdx = torch.Tensor() + self.mask = torch.Tensor() + self.maxVals = torch.Tensor() + self.gradMaxVals = torch.Tensor() +end + +function CMaxTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + self.maxIdx:resizeAs(input[1]):fill(1) + for i=2,#input do + self.maskByteTensor = self.maskByteTensor or + (torch.type(self.output) == 'torch.CudaTensor' and + torch.CudaByteTensor() or torch.ByteTensor()) + self.mask:gt(input[i], self.output) + self.maskByteTensor:resize(self.mask:size()):copy(self.mask) + self.maxIdx:maskedFill(self.maskByteTensor, i) + self.maxVals:maskedSelect(input[i], self.maskByteTensor) + self.output:maskedCopy(self.maskByteTensor, self.maxVals) + end + return self.output +end + +function CMaxTable:updateGradInput(input, gradOutput) + for i=1,#input do + self.gradInput[i] = self.gradInput[i] or input[i].new() + self.gradInput[i]:resizeAs(input[i]):fill(0.0) + self.maskByteTensor = self.maskByteTensor or + (torch.type(self.output) == 'torch.CudaTensor' and + torch.CudaByteTensor() or torch.ByteTensor()) + self.mask:eq(self.maxIdx, i) + self.maskByteTensor:resize(self.mask:size()):copy(self.mask) + self.gradMaxVals:maskedSelect(gradOutput, self.maskByteTensor) + self.gradInput[i]:maskedCopy(self.maskByteTensor, self.gradMaxVals) + end + + for i=#input+1, #self.gradInput do + self.gradInput[i] = nil + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/CMinTable.lua b/contrib/lua-torch/nn/CMinTable.lua new file mode 100644 index 000000000..25b9a19a2 --- /dev/null +++ b/contrib/lua-torch/nn/CMinTable.lua @@ -0,0 +1,46 @@ +local CMinTable, parent = torch.class('nn.CMinTable', 'nn.Module') + +function CMinTable:__init() + parent.__init(self) + self.gradInput = {} + self.minIdx = torch.Tensor() + self.mask = torch.Tensor() + self.minVals = torch.Tensor() + self.gradMaxVals = torch.Tensor() +end + +function CMinTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + self.minIdx:resizeAs(input[1]):fill(1) + for i=2,#input do + self.maskByteTensor = self.maskByteTensor or + (torch.type(self.output) == 'torch.CudaTensor' and + torch.CudaByteTensor() or torch.ByteTensor()) + self.mask:lt(input[i], self.output) + self.maskByteTensor:resize(self.mask:size()):copy(self.mask) + self.minIdx:maskedFill(self.maskByteTensor, i) + self.minVals:maskedSelect(input[i], self.maskByteTensor) + self.output:maskedCopy(self.maskByteTensor, self.minVals) + end + return self.output +end + +function CMinTable:updateGradInput(input, gradOutput) + for i=1,#input do + self.gradInput[i] = self.gradInput[i] or input[i].new() + self.gradInput[i]:resizeAs(input[i]):fill(0.0) + self.maskByteTensor = self.maskByteTensor or + (torch.type(self.output) == 'torch.CudaTensor' and + torch.CudaByteTensor() or torch.ByteTensor()) + self.mask:eq(self.minIdx, i) + self.maskByteTensor:resize(self.mask:size()):copy(self.mask) + self.gradMaxVals:maskedSelect(gradOutput, self.maskByteTensor) + self.gradInput[i]:maskedCopy(self.maskByteTensor, self.gradMaxVals) + end + + for i=#input+1, #self.gradInput do + self.gradInput[i] = nil + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/CMul.lua b/contrib/lua-torch/nn/CMul.lua new file mode 100644 index 000000000..890169761 --- /dev/null +++ b/contrib/lua-torch/nn/CMul.lua @@ -0,0 +1,166 @@ +local CMul, parent = torch.class('nn.CMul', 'nn.Module') + +function CMul:__init(...) + parent.__init(self) + + local arg = {...} + + self.size = torch.LongStorage() + local n = #arg + if n == 1 and torch.type(arg[1]) == 'torch.LongStorage' then + self.size:resize(#arg[1]):copy(arg[1]) + else + self.size:resize(n) + for i=1,n do + self.size[i] = arg[i] + end + end + + self.weight = torch.Tensor(self.size) + self.gradWeight = torch.Tensor(self.size) + + self.output:resize(self.size) + + self:reset() +end + +function CMul:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:nElement()) + end + self.weight:uniform(-stdv,stdv) +end + +function CMul:updateOutput(input) + -- lazy-initialize + self._output = self._output or input.new() + self._weight = self._weight or input.new() + self._expand = self._expand or input.new() + self._repeat = self._repeat or input.new() + + self.output:resizeAs(input):copy(input) + if input:nElement() == self.weight:nElement() then + self._output:view(self.output, -1) + self._weight:view(self.weight, -1) + + self._output:cmul(self._weight) + else + if self.weight:dim() == input:dim() then + self._output:set(self.output) + self._weight:set(self.weight) + else + local batchSize = input:size(1) + self._output:view(self.output, batchSize, -1) + self._weight:view(self.weight, 1, -1) + end + + self._expand:expandAs(self._weight, self._output) + + if torch.type(input) == 'torch.CudaTensor' then + self._repeat:resizeAs(self._expand):copy(self._expand) + self._output:cmul(self._repeat) + else + self._output:cmul(self._expand) + end + end + + return self.output +end + +function CMul:updateGradInput(input, gradOutput) + if not self.gradInput then + return + end + + self._gradOutput = self._gradOutput or input.new() + self._gradInput = self._gradInput or input.new() + + self.gradInput:resizeAs(input):zero() + if self.weight:nElement() == gradOutput:nElement() then + self.gradInput:addcmul(1, self.weight, gradOutput) + else + if self.weight:dim() == input:dim() then + nn.utils.contiguousView(self._gradOutput, gradOutput, gradOutput:size()) + nn.utils.contiguousView(self._gradInput, self.gradInput, self.gradInput:size()) + self._weight:set(self.weight) + else + local batchSize = input:size(1) + nn.utils.contiguousView(self._gradOutput, gradOutput, batchSize, -1) + nn.utils.contiguousView(self._gradInput, self.gradInput, batchSize, -1) + self._weight:view(self.weight, 1, -1) + end + + self._expand:expandAs(self._weight, self._gradOutput) + + if torch.type(input) == 'torch.CudaTensor' then + self._repeat:resizeAs(self._expand):copy(self._expand) + self._gradInput:addcmul(1, self._repeat, self._gradOutput) + else + self._gradInput:addcmul(1, self._expand, self._gradOutput) + end + end + + return self.gradInput +end + +function CMul:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + + self._input = self._input or input.new() + self._gradWeight = self._gradWeight or input.new() + self._sum = self._sum or input.new() + + if self.weight:nElement() == gradOutput:nElement() then + self.gradWeight:addcmul(scale, input, gradOutput) + else + if self.weight:dim() == input:dim() then + nn.utils.contiguousView(self._input, input, input:size()) + nn.utils.contiguousView(self._gradOutput, gradOutput, gradOutput:size()) + self._gradWeight:set(self.gradWeight) + + self._repeat:cmul(self._input, self._gradOutput) + local sumInto = self._sum + local sumFrom = self._repeat + for i=1,self.weight:dim() do + if self.weight:size(i) ~= input:size(i) then + sumInto:sum(sumFrom, i) + sumInto = sumFrom + sumFrom = sumFrom == self._repeat and self._sum or self._repeat + end + end + self._gradWeight:add(scale, sumFrom) + else + local batchSize = input:size(1) + nn.utils.contiguousView(self._input, input, batchSize, -1) + nn.utils.contiguousView(self._gradOutput, gradOutput, batchSize, -1) + self._gradWeight:view(self.gradWeight, 1, -1) + + self._repeat:cmul(self._input, self._gradOutput) + self._sum:sum(self._repeat, 1) + self._gradWeight:add(scale, self._sum) + end + + end +end + +function CMul:type(type, tensorCache) + if type then + self:clearState() + end + return parent.type(self, type, tensorCache) +end + +function CMul:clearState() + nn.utils.clear(self, { + '_input', + '_output', + '_weight', + '_gradWeight', + '_expand', + '_repeat', + '_sum', + }) + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/CMulTable.lua b/contrib/lua-torch/nn/CMulTable.lua new file mode 100644 index 000000000..b47378e83 --- /dev/null +++ b/contrib/lua-torch/nn/CMulTable.lua @@ -0,0 +1,55 @@ + +local CMulTable, parent = torch.class('nn.CMulTable', 'nn.Module') + +function CMulTable:__init() + parent.__init(self) + self.gradInput = {} +end + +function CMulTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + for i=2,#input do + self.output:cmul(input[i]) + end + return self.output +end + +function CMulTable:updateGradInput_efficient(input, gradOutput) + self.tout = self.tout or input[1].new() + self.tout:resizeAs(self.output) + for i=1,#input do + self.gradInput[i] = self.gradInput[i] or input[1].new() + self.gradInput[i]:resizeAs(input[i]):copy(gradOutput) + self.tout:copy(self.output):cdiv(input[i]) + self.gradInput[i]:cmul(self.tout) + end + + for i=#input+1, #self.gradInput do + self.gradInput[i] = nil + end + + return self.gradInput +end + +function CMulTable:updateGradInput(input, gradOutput) + for i=1,#input do + self.gradInput[i] = self.gradInput[i] or input[1].new() + self.gradInput[i]:resizeAs(input[i]):copy(gradOutput) + for j=1,#input do + if i~=j then + self.gradInput[i]:cmul(input[j]) + end + end + end + + for i=#input+1, #self.gradInput do + self.gradInput[i] = nil + end + + return self.gradInput +end + +function CMulTable:clearState() + if self.tout then self.tout:set() end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/CONTRIBUTING.md b/contrib/lua-torch/nn/CONTRIBUTING.md new file mode 100644 index 000000000..cc800154e --- /dev/null +++ b/contrib/lua-torch/nn/CONTRIBUTING.md @@ -0,0 +1,136 @@ +# Contributing to Torch7 Core (torch7, nn, cutorch, cunn) + +Thanks a lot! There are plenty of ways you can help! + +Please take a moment to review this document in order to make the contribution +process easy and effective for everyone involved. + +Following these guidelines helps to communicate that you respect the time of +the developers managing and developing this open source project. In return, +they should reciprocate that respect in addressing your issue or assessing +patches and features. + + +## Using the issue tracker + +The [issue tracker](https://github.com/torch/nn/issues) is +the preferred channel for [bug reports](#bugs), [features requests](#features) +and [submitting pull requests](#pull-requests), but please respect the following +restrictions: + +* Please **do not** use the issue tracker for personal support requests (use + [mailing-list](http://groups.google.com/forum/#!forum/torch7)). + +* Please **do not** open issues regarding the code in a torch package + outside the core. For example don't open issues about the + REPL in the nn issue tracker, use the trepl issue tracker for that. + +<a name="bugs"></a> +## Bug reports + +A bug is a _demonstrable problem_ that is caused by the code in the repository. +Good bug reports are extremely helpful - thank you! + +Guidelines for bug reports: + +1. **Use the GitHub issue search** — check if the issue has already been + reported. + +2. **Check if the issue has been fixed** — try to reproduce it using the + latest `master` or development branch in the repository. + +3. **Isolate the problem** — ideally create test case that is within reason, + preferably within 100 lines of code. + +A good bug report shouldn't leave others needing to chase you up for more +information. Please try to be as detailed as possible in your report. What is +your environment? What steps will reproduce the issue? What OS do you +experience the problem? What would you expect to be the outcome? All these +details will help people to fix any potential bugs. + +<a name="features"></a> +## Feature requests + +Feature requests are welcome to be filed. Torch is community-developed, +the maintainers are not exclusive torch developers, so keep that in mind. +The purpose of feature requests is for others who are looking to implement +a feature are aware of the interest in the feature. + + +<a name="pull-requests"></a> +## Pull requests + +Good pull requests - patches, improvements, new features - are a fantastic +help. They should remain focused in scope **and avoid containing unrelated +commits.** + +**Please ask first** before embarking on any significant pull request (e.g. +implementing features, refactoring code, porting to a different language), +otherwise you risk spending a lot of time working on something that the +project's developers might not want to merge into the project. + +Please adhere to the coding conventions used throughout a project (indentation, +accurate comments, etc.) and any other requirements (such as test coverage). + +Adhering to the following this process is the best way to get your work +included in the project: + +1. [Fork](https://help.github.com/articles/fork-a-repo) the project, clone your + fork, and configure the remotes: + + ```bash + # Clone your fork of the repo into the current directory + git clone https://github.com/<your-username>/nn.git + # Navigate to the newly cloned directory + cd nn + # Assign the original repo to a remote called "upstream" + git remote add upstream https://github.com/torch/nn.git + ``` + +2. If you cloned a while ago, get the latest changes from upstream: + + ```bash + git checkout master + git pull upstream master + ``` + +3. Create a new topic branch (off the main project development branch) to + contain your feature, change, or fix: + + ```bash + git checkout -b <topic-branch-name> + ``` + +4. Commit your changes in logical chunks. Please try to adhere to these [git commit + message guidelines](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) + . Use Git's [interactive rebase](https://help.github.com/articles/about-git-rebase) + feature to tidy up your commits before making them public. This helps us keep the + commit history in logical blocks and clean, as torch grows. + For example: + - If you are adding a new function or a module, keep the module + tests + doc + to a single commit unless logically warranted. + - If you are fixing a bug, keep the bugfix to a single commit unless logically warranted. + +5. Locally merge (or rebase) the upstream development branch into your topic branch: + + ```bash + git pull [--rebase] upstream master + ``` + +6. Push your topic branch up to your fork: + + ```bash + git push origin <topic-branch-name> + ``` + +7. [Open a Pull Request](https://help.github.com/articles/using-pull-requests/) + with a clear title and description. + +**IMPORTANT**: By submitting a patch, you agree to allow the project owners to +license your work under the terms of the BSD License. + +## Development workflow tips + +* While you are changing lua files, one can simply symlink the cloned nn directory to ~/torch/install/share/lua/5.1/nn so that any change is reflected in the current install, without constantly having to do luarocks make rocks/* +* If you are changing C files, then, after every change, you run luarocks make rocks/* +* To test, you can just use: th -lnn -e "nn.test()" diff --git a/contrib/lua-torch/nn/COPYRIGHT.txt b/contrib/lua-torch/nn/COPYRIGHT.txt new file mode 100644 index 000000000..bc002b78a --- /dev/null +++ b/contrib/lua-torch/nn/COPYRIGHT.txt @@ -0,0 +1,36 @@ +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Deepmind Technologies, NYU, NEC Laboratories America + and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/lua-torch/nn/CReLU.lua b/contrib/lua-torch/nn/CReLU.lua new file mode 100644 index 000000000..8da6e7974 --- /dev/null +++ b/contrib/lua-torch/nn/CReLU.lua @@ -0,0 +1,57 @@ +local CReLU, parent = torch.class('nn.CReLU', 'nn.Sequential') + +-- Implements the CReLU activation function as described by +-- W. Shang et al. in "Understanding and Improving Convolutional Neural Networks +-- via Concatenated Rectified Linear Units" +function CReLU:__init(nInputDims, inplace) + parent.__init(self) + self.nInputDims = nInputDims + self.inplace = inplace or false + + local concatTable = nn.ConcatTable() + concatTable:add(nn.Identity()) + concatTable:add(nn.MulConstant(-1)) + self:add(concatTable) + self:add(nn.JoinTable(2)) + self:add(nn.ReLU(self.inplace)) +end + +function CReLU:updateOutput(input) + local input_ + local batched = input:dim() == (self.nInputDims + 1) + if not batched then + input_ = input:view(1, -1) + else + input_ = input:view(input:size(1), -1) + end + parent.updateOutput(self, input_) + local osize = input:size() + if not batched then + osize[1] = osize[1] * 2 + else + osize[2] = osize[2] * 2 + end + self.output:resize(osize) + return self.output +end + +function CReLU:backward(input, gradOutput) + return self:updateGradInput(input, gradOutput) +end + +function CReLU:updateGradInput(input, gradOutput) + local batched = input:dim() == (self.nInputDims + 1) + if not batched then + parent.updateGradInput(self, input:view(1, -1), gradOutput:view(1, -1)) + else + parent.updateGradInput(self, input:view(input:size(1), -1), + gradOutput:view(input:size(1), -1)) + end + + self.gradInput:resizeAs(input) + return self.gradInput +end + +function CReLU:__tostring__() + return "CReLU()" +end diff --git a/contrib/lua-torch/nn/CSubTable.lua b/contrib/lua-torch/nn/CSubTable.lua new file mode 100644 index 000000000..eb7492055 --- /dev/null +++ b/contrib/lua-torch/nn/CSubTable.lua @@ -0,0 +1,26 @@ + +local CSubTable, parent = torch.class('nn.CSubTable', 'nn.Module') + +function CSubTable:__init() + parent.__init(self) + self.gradInput = {} +end + +function CSubTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + self.output:add(-1,input[2]) + return self.output +end + +function CSubTable:updateGradInput(input, gradOutput) + self.gradInput[1] = self.gradInput[1] or input[1].new() + self.gradInput[2] = self.gradInput[2] or input[1].new() + self.gradInput[1]:resizeAs(input[1]):copy(gradOutput) + self.gradInput[2]:resizeAs(input[2]):copy(gradOutput):mul(-1) + + for i=#input+1, #self.gradInput do + self.gradInput[i] = nil + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Clamp.lua b/contrib/lua-torch/nn/Clamp.lua new file mode 100644 index 000000000..36397a157 --- /dev/null +++ b/contrib/lua-torch/nn/Clamp.lua @@ -0,0 +1,5 @@ +local Clamp, Parent = torch.class('nn.Clamp', 'nn.HardTanh') + +function Clamp:__init(min_value, max_value) + Parent.__init(self, min_value, max_value) +end diff --git a/contrib/lua-torch/nn/ClassNLLCriterion.lua b/contrib/lua-torch/nn/ClassNLLCriterion.lua new file mode 100644 index 000000000..dae0e6685 --- /dev/null +++ b/contrib/lua-torch/nn/ClassNLLCriterion.lua @@ -0,0 +1,82 @@ +local THNN = require 'nn.THNN' +local ClassNLLCriterion, parent = torch.class('nn.ClassNLLCriterion', 'nn.Criterion') + +function ClassNLLCriterion:__init(weights, sizeAverage, ignoreIndex) + parent.__init(self) + self.sizeAverage = (sizeAverage == nil) and true or sizeAverage + self.ignoreIndex = ignoreIndex or -100 -- this target index will be ignored + if weights then + assert(weights:dim() == 1, "weights input should be 1-D Tensor") + self.weights = weights + end + + self.output_tensor = torch.zeros(1) + self.total_weight_tensor = torch.ones(1) + self.target = torch.zeros(1):long() +end + +function ClassNLLCriterion:__len() + if (self.weights) then + return #self.weights + else + return 0 + end +end + +function ClassNLLCriterion:updateOutput(input, target) + if type(target) == 'number' then + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda() + else + self.target = self.target:long() + end + self.target:resize(1) + self.target[1] = target + elseif torch.typename(input):find('torch%.Cuda.*Tensor') then + self.target = torch.CudaLongTensor and target:cudaLong() or target + else + self.target = target:long() + end + + input.THNN.ClassNLLCriterion_updateOutput( + input:cdata(), + self.target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage, + THNN.optionalTensor(self.weights), + self.total_weight_tensor:cdata(), + self.ignoreIndex + ) + self.output = self.output_tensor[1] + return self.output, self.total_weight_tensor[1] +end + +function ClassNLLCriterion:updateGradInput(input, target) + if type(target) == 'number' then + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda() + else + self.target = self.target:long() + end + self.target:resize(1) + self.target[1] = target + elseif torch.typename(input):find('torch%.Cuda.*Tensor') then + self.target = torch.CudaLongTensor and target:cudaLong() or target + else + self.target = target:long() + end + + self.gradInput:resizeAs(input):zero() + + input.THNN.ClassNLLCriterion_updateGradInput( + input:cdata(), + self.target:cdata(), + self.gradInput:cdata(), + self.sizeAverage, + THNN.optionalTensor(self.weights), + self.total_weight_tensor:cdata(), + self.ignoreIndex + ) + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/ClassSimplexCriterion.lua b/contrib/lua-torch/nn/ClassSimplexCriterion.lua new file mode 100644 index 000000000..9cabc011f --- /dev/null +++ b/contrib/lua-torch/nn/ClassSimplexCriterion.lua @@ -0,0 +1,118 @@ +local ClassSimplexCriterion, parent + = torch.class('nn.ClassSimplexCriterion', 'nn.MSECriterion') + +--[[ + This file implements a criterion for multi-class classification. + It learns an embedding per class, where each class' embedding + is a point on an (N-1)-dimensional simplex, where N is + the number of classes. + For example usage of this class, look at doc/criterion.md + + Reference: http://arxiv.org/abs/1506.08230 + +]]-- + + +--[[ + function regsplex(n): + regsplex returns the coordinates of the vertices of a + regular simplex centered at the origin. + The Euclidean norms of the vectors specifying the vertices are + all equal to 1. The input n is the dimension of the vectors; + the simplex has n+1 vertices. + + input: + n -- dimension of the vectors specifying the vertices of the simplex + + output: + a -- tensor dimensioned (n+1,n) whose rows are + vectors specifying the vertices + + reference: + http://en.wikipedia.org/wiki/Simplex#Cartesian_coordinates_for_regular_n-dimensional_simplex_in_Rn +--]] +local function regsplex(n) + local a = torch.zeros(n+1,n) + + for k = 1,n do + -- determine the last nonzero entry in the vector for the k-th vertex + if k==1 then a[k][k] = 1 end + if k>1 then a[k][k] = math.sqrt( 1 - a[{ {k},{1,k-1} }]:norm()^2 ) end + + -- fill the k-th coordinates for the vectors of the remaining vertices + local c = (a[k][k]^2 - 1 - 1/n) / a[k][k] + a[{ {k+1,n+1},{k} }]:fill(c) + end + + return a +end + + +function ClassSimplexCriterion:__init(nClasses) + parent.__init(self) + assert(nClasses and nClasses > 1 and nClasses == (nClasses -(nClasses % 1)), + "Required positive integer argument nClasses > 1") + self.nClasses = nClasses + + -- embedding the simplex in a space of dimension strictly greater than + -- the minimum possible (nClasses-1) is critical for effective training. + local simp = regsplex(nClasses - 1) + self.simplex = torch.cat(simp, + torch.zeros(simp:size(1), nClasses -simp:size(2)), + 2) + self._target = torch.Tensor(nClasses) +end + +-- handle target being both 1D tensor, and +-- target being 2D tensor (2D tensor means don't do anything) +local function transformTarget(self, target) + if torch.type(target) == 'number' then + self._target:resize(self.nClasses) + self._target:copy(self.simplex[target]) + elseif torch.isTensor(target) then + assert(target:dim() == 1, '1D tensors only!') + local nSamples = target:size(1) + self._target:resize(nSamples, self.nClasses) + for i=1,nSamples do + self._target[i]:copy(self.simplex[target[i]]) + end + end +end + +function ClassSimplexCriterion:updateOutput(input, target) + transformTarget(self, target) + assert(input:nElement() == self._target:nElement()) + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.MSECriterion_updateOutput( + input:cdata(), + self._target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage + ) + self.output = self.output_tensor[1] + return self.output +end + +function ClassSimplexCriterion:updateGradInput(input, target) + assert(input:nElement() == self._target:nElement()) + input.THNN.MSECriterion_updateGradInput( + input:cdata(), + self._target:cdata(), + self.gradInput:cdata(), + self.sizeAverage + ) + return self.gradInput +end + +function ClassSimplexCriterion:getPredictions(input) + if input:dim() == 1 then + input = input:view(1, -1) + end + return torch.mm(input, self.simplex:t()) +end + +function ClassSimplexCriterion:getTopPrediction(input) + local prod = self:getPredictions(input) + local _, maxs = prod:max(prod:nDimension()) + return maxs:view(-1) +end diff --git a/contrib/lua-torch/nn/Collapse.lua b/contrib/lua-torch/nn/Collapse.lua new file mode 100644 index 000000000..a088608ca --- /dev/null +++ b/contrib/lua-torch/nn/Collapse.lua @@ -0,0 +1,30 @@ +local Collapse, parent = torch.class('nn.Collapse', 'nn.Module') + +-- collapses non-batch dims +function Collapse:__init(nInputDim) + parent.__init(self) + self.nInputDim = nInputDim +end + +function Collapse:updateOutput(input) + if not input:isContiguous() then + self._input = self._input or input.new() + self._input:resize(input:size()):copy(input) + input = self._input + end + if input:dim() > self.nInputDim then + self.output:view(input,input:size(1),-1) + else + self.output:view(input,-1) + end + return self.output +end + +function Collapse:updateGradInput(input, gradOutput) + self.gradInput:view(gradOutput, input:size()) + return self.gradInput +end + +function Collapse:clearState() + self._input = nil +end diff --git a/contrib/lua-torch/nn/Concat.lua b/contrib/lua-torch/nn/Concat.lua new file mode 100644 index 000000000..d7e3ee711 --- /dev/null +++ b/contrib/lua-torch/nn/Concat.lua @@ -0,0 +1,158 @@ +local Concat, parent = torch.class('nn.Concat', 'nn.Container') + +function Concat:__init(dimension) + parent.__init(self) + self.outputSize = torch.LongStorage() + self.dimension = dimension +end + +function Concat:updateOutput(input) + self.outputSize = self.outputSize or torch.LongStorage() + + local outs = {} + for i=1,#self.modules do + local currentOutput = self:rethrowErrors(self.modules[i], i, 'updateOutput', input) + outs[i] = currentOutput + if i == 1 then + self.outputSize:resize(currentOutput:dim()):copy(currentOutput:size()) + else + self.outputSize[self.dimension] = self.outputSize[self.dimension] + currentOutput:size(self.dimension) + end + end + self.output:resize(self.outputSize) + + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = outs[i] + self.output:narrow(self.dimension, offset, currentOutput:size(self.dimension)):copy(currentOutput) + offset = offset + currentOutput:size(self.dimension) + end + return self.output +end + +local function retable(t1, t2, f) + for k, v in ipairs(t2) do + if (torch.type(v) == "table") then + t1[k] = retable(t1[k] or {}, t2[k], f) + else + f(t1, k, v) + end + end + for i=#t2+1, #t1 do + t1[i] = nil + end + return t1 +end + +local function backward(self, method, input, gradOutput, scale) + local isTable = torch.type(input) == 'table' + local wasTable = torch.type(self.gradInput) == 'table' + scale = scale or 1 + + if isTable then + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local currentGradInput = self:rethrowErrors(module, i, method, input, + gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)), scale) + if torch.type(currentGradInput) ~= 'table' then + error"currentGradInput is not a table!" + end + if #input ~= #currentGradInput then + error("table size mismatch: "..#input.." ~= "..#currentGradInput) + end + if i == 1 then + self.gradInput = wasTable and self.gradInput or {} + retable(self.gradInput, currentGradInput, + function(t, k, v) + t[k] = t[k] or v:clone() + t[k]:resizeAs(v) + t[k]:copy(v) + end + ) + else + retable(self.gradInput, currentGradInput, + function(t, k, v) + if t[k] then + t[k]:add(v) + else + t[k] = v:clone() + end + end + ) + end + offset = offset + currentOutput:size(self.dimension) + end + else + self.gradInput = (not wasTable) and self.gradInput:resizeAs(input) or input:clone() + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local currentGradInput = self:rethrowErrors(module, i, method, input, + gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)), scale) + if currentGradInput then -- if the module does not produce a gradInput (for example first layer), then ignore it and move on. + if i==1 then + self.gradInput:copy(currentGradInput) + else + self.gradInput:add(currentGradInput) + end + end + offset = offset + currentOutput:size(self.dimension) + end + end + return self.gradInput +end + +function Concat:updateGradInput(input, gradOutput) + return backward(self, 'updateGradInput', input, gradOutput) +end + +function Concat:backward(input, gradOutput, scale) + return backward(self, 'backward', input, gradOutput, scale) +end + +function Concat:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + self:rethrowErrors(module, i, 'accGradParameters', input, + gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)), + scale) + offset = offset + currentOutput:size(self.dimension) + end +end + +function Concat:accUpdateGradParameters(input, gradOutput, lr) + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + self:rethrowErrors(module, i, 'accUpdateGradParameters', + input, + gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)), + lr) + offset = offset + currentOutput:size(self.dimension) + end +end + +function Concat:__tostring__() + local tab = ' ' + local line = '\n' + local next = ' |`-> ' + local lastNext = ' `-> ' + local ext = ' | ' + local extlast = ' ' + local last = ' ... -> ' + local str = torch.type(self) + str = str .. ' {' .. line .. tab .. 'input' + for i=1,#self.modules do + if i == #self.modules then + str = str .. line .. tab .. lastNext .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. extlast) + else + str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. ext) + end + end + str = str .. line .. tab .. last .. 'output' + str = str .. line .. '}' + return str +end diff --git a/contrib/lua-torch/nn/ConcatTable.lua b/contrib/lua-torch/nn/ConcatTable.lua new file mode 100644 index 000000000..742719344 --- /dev/null +++ b/contrib/lua-torch/nn/ConcatTable.lua @@ -0,0 +1,118 @@ +local ConcatTable, parent = torch.class('nn.ConcatTable', 'nn.Container') + +function ConcatTable:__init() + parent.__init(self) + self.modules = {} + self.output = {} +end + +function ConcatTable:updateOutput(input) + for i=1,#self.modules do + self.output[i] = self:rethrowErrors(self.modules[i], i, 'updateOutput', input) + end + return self.output +end + +local function retable(t1, t2, f) + for k, v in ipairs(t2) do + if (torch.type(v) == "table") then + t1[k] = retable(t1[k] or {}, t2[k], f) + else + f(t1, k, v) + end + end + for i=#t2+1, #t1 do + t1[i] = nil + end + return t1 +end + +local function backward(self, method, input, gradOutput, scale) + local isTable = torch.type(input) == 'table' + local wasTable = torch.type(self.gradInput) == 'table' + if isTable then + for i,module in ipairs(self.modules) do + local currentGradInput = self:rethrowErrors(module, i, method, input, gradOutput[i], scale) + if torch.type(currentGradInput) ~= 'table' then + error"currentGradInput is not a table!" + end + if #input ~= #currentGradInput then + error("table size mismatch: "..#input.." ~= "..#currentGradInput) + end + if i == 1 then + self.gradInput = wasTable and self.gradInput or {} + retable(self.gradInput, currentGradInput, + function(t, k, v) + t[k] = t[k] or v:clone() + t[k]:resize(v:size()) + t[k]:copy(v) + end + ) + else + retable(self.gradInput, currentGradInput, + function(t, k, v) + if t[k] then + t[k]:add(v) + else + t[k] = v:clone() + end + end + ) + end + end + else + self.gradInput = (not wasTable) and self.gradInput or input:clone() + for i,module in ipairs(self.modules) do + local currentGradInput = self:rethrowErrors(module, i, method, input, gradOutput[i], scale) + if i == 1 then + self.gradInput:resize(currentGradInput:size()):copy(currentGradInput) + else + self.gradInput:add(currentGradInput) + end + end + end + return self.gradInput +end + +function ConcatTable:updateGradInput(input, gradOutput) + return backward(self, 'updateGradInput', input, gradOutput) +end + +function ConcatTable:backward(input, gradOutput, scale) + return backward(self, 'backward', input, gradOutput, scale) +end + +function ConcatTable:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + for i,module in ipairs(self.modules) do + self:rethrowErrors(module, i, 'accGradParameters', input, gradOutput[i], scale) + end +end + +function ConcatTable:accUpdateGradParameters(input, gradOutput, lr) + for i,module in ipairs(self.modules) do + self:rethrowErrors(module, i, 'accUpdateGradParameters', input, gradOutput[i], lr) + end +end + +function ConcatTable:__tostring__() + local tab = ' ' + local line = '\n' + local next = ' |`-> ' + local lastNext = ' `-> ' + local ext = ' | ' + local extlast = ' ' + local last = ' ... -> ' + local str = torch.type(self) + str = str .. ' {' .. line .. tab .. 'input' + for i=1,#self.modules do + if i == #self.modules then + str = str .. line .. tab .. lastNext .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. extlast) + else + str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. ext) + end + end + str = str .. line .. tab .. last .. 'output' + str = str .. line .. '}' + return str +end diff --git a/contrib/lua-torch/nn/Constant.lua b/contrib/lua-torch/nn/Constant.lua new file mode 100644 index 000000000..07773feb2 --- /dev/null +++ b/contrib/lua-torch/nn/Constant.lua @@ -0,0 +1,36 @@ +------------------------------------------------------------------------ +--[[ Constant ]]-- +-- Outputs a constant value given an input. +-- If nInputDim is specified, uses the input to determine the size of +-- the batch. The value is then replicated over the batch. +-- You can use this with nn.ConcatTable() to append constant inputs to +-- an input : nn.ConcatTable():add(nn.Constant(v)):add(nn.Identity()) . +------------------------------------------------------------------------ +local Constant, parent = torch.class("nn.Constant", "nn.Module") + +function Constant:__init(value, nInputDim) + self.value = value + if torch.type(self.value) == 'number' then + self.value = torch.Tensor{self.value} + end + assert(torch.isTensor(self.value), "Expecting number or tensor at arg 1") + self.nInputDim = nInputDim + parent.__init(self) +end + +function Constant:updateOutput(input) + if self.nInputDim and input:dim() > self.nInputDim then + local vsize = self.value:size():totable() + self.output:resize(input:size(1), table.unpack(vsize)) + local value = self.value:view(1, table.unpack(vsize)) + self.output:copy(value:expand(self.output:size())) + else + self.output:resize(self.value:size()):copy(self.value) + end + return self.output +end + +function Constant:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input):zero() + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Container.lua b/contrib/lua-torch/nn/Container.lua new file mode 100644 index 000000000..7e264bab9 --- /dev/null +++ b/contrib/lua-torch/nn/Container.lua @@ -0,0 +1,149 @@ +-- This is code common to container modules, which are collections of +-- smaller constituent modules like Parallel, Sequential, etc. +local Container, parent = torch.class('nn.Container', 'nn.Module') + +function Container:__init(...) + parent.__init(self, ...) + self.modules = {} +end + +function Container:add(module) + table.insert(self.modules, module) + return self +end + +function Container:get(index) + return self.modules[index] +end + +function Container:size() + return #self.modules +end + +-- Check if passing arguments through xpcall is supported in this Lua interpreter. +local _, XPCALL_ARGS = xpcall(function(x) return x ~= nil end, function() end, 1) +local TRACEBACK_WARNING = "WARNING: If you see a stack trace below, it doesn't point to the place where this error occurred. Please use only the one above." +-- module argument can be retrieved with moduleIndex, but code is cleaner when +-- it has to be specified anyway. +function Container:rethrowErrors(module, moduleIndex, funcName, ...) + assert(module == self.modules[moduleIndex], + "mismatch between moduleIndex and self.modules in rethrowErrors") + local function handleError(err) + -- This will be executed only in the first container that handles the error. + if not err:find(TRACEBACK_WARNING) then + local traceback = debug.traceback() + -- Remove this handler from the stack + local _, first_line_end = traceback:find('^.-\n') + local _, second_line_end = traceback:find('^.-\n.-\n') + traceback = traceback:sub(1, first_line_end) .. traceback:sub(second_line_end+1) + err = err .. '\n' .. traceback .. '\n\n' .. TRACEBACK_WARNING + else + -- Remove file path + err = err:sub(err:find('\n')+1) + end + local msg = string.format('In %d module of %s:', + moduleIndex, torch.type(self)) + -- Preceding newline has to be here, because Lua will prepend a file path. + err = '\n' .. msg .. '\n' .. err + return err + end + + -- Lua 5.1 doesn't support passing arguments through xpcall, so they have to + -- be passed via a closure. This incurs some overhead, so it's better not to + -- make it the default. + local ok, ret, noret + if not XPCALL_ARGS then + local args = {...} + local unpack = unpack or table.unpack + ok, ret, noret = xpcall(function() + return module[funcName](module, unpack(args)) + end, + handleError) + else + ok, ret, noret = xpcall(module[funcName], handleError, module, ...) + end + assert(noret == nil, "rethrowErrors supports only one return argument") + + if not ok then error(ret) end + return ret +end + +function Container:applyToModules(func) + for _, module in ipairs(self.modules) do + func(module) + end +end + +function Container:zeroGradParameters() + self:applyToModules(function(module) module:zeroGradParameters() end) +end + +function Container:updateParameters(learningRate) + self:applyToModules(function(module) module:updateParameters(learningRate) end) +end + +function Container:training() + self:applyToModules(function(module) module:training() end) + parent.training(self) +end + +function Container:evaluate() + self:applyToModules(function(module) module:evaluate() end) + parent.evaluate(self) +end + +function Container:share(mlp, ...) + for i=1,#self.modules do + self.modules[i]:share(mlp.modules[i], ...); + end + return self +end + +function Container:reset(stdv) + self:applyToModules(function(module) module:reset(stdv) end) +end + +function Container:parameters() + local function tinsert(to, from) + if type(from) == 'table' then + for i=1,#from do + tinsert(to,from[i]) + end + else + table.insert(to,from) + end + end + local w = {} + local gw = {} + for i=1,#self.modules do + local mw,mgw = self.modules[i]:parameters() + if mw then + tinsert(w,mw) + tinsert(gw,mgw) + end + end + return w,gw +end + +function Container:clearState() + -- don't call set because it might reset referenced tensors + local function clear(f) + if self[f] then + if torch.isTensor(self[f]) then + self[f] = self[f].new() + elseif type(self[f]) == 'table' then + self[f] = {} + else + self[f] = nil + end + end + end + clear('output') + clear('gradInput') + if self.modules then + for i,module in pairs(self.modules) do + module:clearState() + end + end + return self +end diff --git a/contrib/lua-torch/nn/Contiguous.lua b/contrib/lua-torch/nn/Contiguous.lua new file mode 100755 index 000000000..f9974ce5a --- /dev/null +++ b/contrib/lua-torch/nn/Contiguous.lua @@ -0,0 +1,21 @@ +local Contiguous, parent = torch.class('nn.Contiguous', 'nn.Module') + +function Contiguous:updateOutput(input) + if not input:isContiguous() then + if self.output:storage() == input:storage() then self.output:set() end + self.output:resizeAs(input):copy(input) + else + self.output:set(input) + end + return self.output +end + +function Contiguous:updateGradInput(input, gradOutput) + if not gradOutput:isContiguous() then + if self.gradInput:storage() == gradOutput:storage() then self.gradInput:set() end + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + else + self.gradInput:set(gradOutput) + end + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Convert.lua b/contrib/lua-torch/nn/Convert.lua new file mode 100644 index 000000000..855338dd6 --- /dev/null +++ b/contrib/lua-torch/nn/Convert.lua @@ -0,0 +1,245 @@ +------------------------------------------------------------------------ +--[ nn.Convert ]-- +-- Module to convert between different data formats +-- nn.Convert('bchw', 'bf') or nn.Convert('chw', 'f') +-- Automatically converts input to same type as self.output +-- Simplest use is for automatic input type converions : nn.Convert() +------------------------------------------------------------------------ +local _ = require 'moses' +local Convert, parent = torch.class("nn.Convert", "nn.Container") + +function Convert:__init(inputShape, outputShape) + if outputShape and not inputShape then + error"Expecting non-nil arg 1 when arg 2 is provided" + end + inputShape = inputShape or 'b*' + outputShape = outputShape or inputShape + self.inputShape = inputShape:find('b') and inputShape or ('b'..inputShape) + self.outputShape = outputShape:find('b') and outputShape or ('b'..outputShape) + self.inputBatchDim = self.inputShape:find('b') + self.outputBatchDim = self.outputShape:find('b') + if self.inputShape == 'b*' or self.outputShape == 'b*' then + assert(self.inputShape == 'b*' and self.outputShape == 'b*', 'Both or neither shapes must be b*') + self.nInputDim = -1 + self.nOutputDim = -1 + self.transposition = true + else + -- number of dims in batch mode + self.nInputDim = #self.inputShape + self.nOutputDim = #self.outputShape + -- is the outputShape just a transposition of the inputShape? + if self.nInputDim == self.nOutputDim then + self.transposition = true + for i=1,self.nInputDim do + if not self.outputShape:find(self.inputShape:sub(i,i)) then + self.transposition = false + break + end + end + end + end + parent.__init(self) +end + +-- post-initialization +function Convert:buildConverter(input) + if self.transposition then + self.converter = self:transpose(self.outputShape) + else + if (torch.type(self[self.outputShape]) ~= 'function') then + error(string.format("Unrecognized conversion of shape %s to %s", self.inputShape, self.outputShape)) + end + self.converter = self[self.outputShape](self, input) + end + assert(torch.isTensor(self.output), "Expecting Tensor output") + + self.converter:type(torch.type(self.output)) + + self.modules[1] = self.converter +end + +function Convert:updateOutput(input) + assert(torch.isTensor(input), "expecting Tensor") + if not torch.isTypeOf(input, torch.type(self.output)) then + -- handle different input type + self._input = self._input or self.output.new() + self._input:resize(input:size()):copy(input) + input = self._input + end + self.batchMode = true + if input:dim() < self.nInputDim then + -- handle non-batch mode + local inputSize = input:size():totable() + table.insert(inputSize, self.inputBatchDim, 1) + self.__input = self.__input or input.new() + self.__input:set(input):resize(table.unpack(inputSize)) + input = self.__input + self.batchMode = false + end + if not self.converter then + self:buildConverter(input) + end + + self.output = self.converter:updateOutput(input) + + if not self.batchMode then + local outputSize = self.output:size():totable() + table.remove(outputSize, self.outputBatchDim) + self.__output = self.__output or self.output.new() + self.__output:set(self.output):resize(table.unpack(outputSize)) + self.output = self.__output + end + return self.output +end + +function Convert:updateGradInput(input, gradOutput) + local input_ = input + input = self._input or input + if not self.batchMode then + input = self.__input + self.__gradOutput = self.__gradOutput or gradOutput.new() + self.__gradOutput:set(gradOutput):resize(self.converter.output:size()) + gradOutput = self.__gradOutput + end + + local gradInput = self.converter:updateGradInput(input, gradOutput) + + if not self.batchMode then + self.__gradInput = self.__gradInput or gradInput.new() + self.__gradInput:set(gradInput):resize(input_:size()) + gradInput = self.__gradInput + end + if self._input then + self._gradInput = self._gradInput or input.new() + self._gradInput:resize(input:size()):copy(gradInput) + self.gradInput = self._gradInput + else + self.gradInput = gradInput + end + + return self.gradInput +end + +function Convert:accGradParameters(input, gradOutput, scale) + input = self.batchMode and self.__input or self._input or input + gradOutput = self.batchMode and self.__gradOutput or gradOutput + self.converter:accGradParameters(input, gradOutput, scale) +end + +function Convert:accUpdateGradParameters(input, gradOutput, lr) + input = self.batchMode and self.__input or self._input or input + gradOutput = self.batchMode and self.__gradOutput or gradOutput + self.converter:accUpdateGradParameters(input, gradOutput, lr) +end + +-- batch feature +function Convert:bf(input) + local b_pos = self:findAxis('b', self.inputShape) + local dim = #self.inputShape + if self.inputShape == 'bt' then + error"Conversion of shape bt to bf not supported: open an issue on github" + end + -- was b + if dim == 1 then + return nn.Reshape(1) + end + -- was b... + local modula + if b_pos ~= 1 then + modula = nn.Transpose({1, b_pos}) + end + if dim > 2 then + local transpose = modula + local sampleSize = input:select(self:findAxis('b'),1):nElement() + local reshape = nn.Reshape(sampleSize) + if transpose then + modula = nn.Sequential() + modula:add(transpose) + modula:add(reshape) + else + modula = reshape + end + end + return modula or nn.Identity() +end + +-- each example is a scalar; batch is a vector +function Convert:b(input) + local b_pos = self:findAxis('b') + if self.inputShape == 'bt' or self.inputShape == 'tb' then + local t_pos = self:findAxis('t') + -- select first set of classes + return nn.Select(t_pos, 1) + elseif self.inputShape == 'bf' or self.inputShape == 'fb' then + -- this wont work as expected with size(f) > 1 + local f_pos = self:findAxis('f') + if input:size(f_pos) > 1 then + error("Cannot convert shape "..self.inputShape.." to b when feature > 1") + end + return nn.Select(f_pos, 1) + else + error("Cannot convert shape "..self.inputShape.." to shape b") + end +end + +-- returns the current shape of the data +function Convert:default() + return nn.Identity() +end + +-- multi-class (batch target) +function Convert:bt() + local b_pos = self:findAxis('b') + local modula + if self.inputShape == 'b' then + modula = nn.Reshape(1) + else + error("cannot convert shape '"..self.inputShape.."' to bt") + end + return modula +end + +-- a generic function for transposing shape axes +function Convert:transpose(newShape) + if newShape == self.inputShape then + return nn.Identity() + end + local inputShape = {} + for i=1,#self.inputShape do + table.insert(inputShape, self.inputShape:sub(i,i)) + end + local transpositions = {} + for i=1,#newShape do + local j = _.indexOf(inputShape, newShape:sub(i,i)) + if i ~= j then + local char = inputShape[i] + inputShape[i] = inputShape[j] + inputShape[j] = char + table.insert(transpositions, {j, i}) + end + end + return nn.Transpose(table.unpack(transpositions)) +end + +function Convert:findAxis(axis_char, shape, silent) + shape = shape or self.inputShape + local axis_pos = shape:find(axis_char) + if (not silent) and (not axis_pos) then + error("Provided shape '"..shape.."' has no axis '"..axis_char.."'", 2) + end + return axis_pos +end + +function Convert:clearState() + self._input = nil + self._gradInput = nil + self.__input = nil + self.__output = nil + self.__gradInput = nil + self.__gradOutput = nil +end + +function Convert:type(type) + self:clearState() + return parent.type(self, type) +end diff --git a/contrib/lua-torch/nn/Copy.lua b/contrib/lua-torch/nn/Copy.lua new file mode 100644 index 000000000..9f83cf9b4 --- /dev/null +++ b/contrib/lua-torch/nn/Copy.lua @@ -0,0 +1,42 @@ +local Copy, parent = torch.class('nn.Copy', 'nn.Module') + +function Copy:__init(intype, outtype, forceCopy, dontCast) + intype = intype or torch.Tensor.__typename + outtype = outtype or torch.Tensor.__typename + + self.dontCast = dontCast + + parent.__init(self) + self.gradInput = torch.getmetatable(intype).new() + self.output = torch.getmetatable(outtype).new() + + if (not forceCopy) and intype == outtype then + + self.updateOutput = function(self, input) + self.output:set(input) + return input + end + + self.updateGradInput = function(self, input, gradOutput) + self.gradInput:set(gradOutput) + return gradOutput + end + end +end + +function Copy:updateOutput(input) + self.output:resize(input:size()):copy(input) + return self.output +end + +function Copy:updateGradInput(input, gradOutput) + self.gradInput:resize(gradOutput:size()):copy(gradOutput) + return self.gradInput +end + +function Copy:type(type, tensorCache) + if type and self.dontCast then + return self + end + return parent.type(self, type, tensorCache) +end diff --git a/contrib/lua-torch/nn/Cosine.lua b/contrib/lua-torch/nn/Cosine.lua new file mode 100644 index 000000000..19a9cba82 --- /dev/null +++ b/contrib/lua-torch/nn/Cosine.lua @@ -0,0 +1,175 @@ +local Cosine, parent = torch.class('nn.Cosine', 'nn.Module') + +function Cosine:__init(inputSize,outputSize) + parent.__init(self) + + self.weight = torch.Tensor(outputSize,inputSize) + self.gradWeight = torch.Tensor(outputSize,inputSize) + + self:reset() +end + +function Cosine:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(1)) + end + self.weight:uniform(-stdv, stdv) +end + +function Cosine:updateOutput(input) + local inputSize = self.weight:size(2) + local outputSize = self.weight:size(1) + + self._weightNorm = self._weightNorm or self.weight.new() + self._inputNorm = self._inputNorm or self.weight.new() + + -- y_j = (w_j * x) / ( || w_j || * || x || ) + + self._weightNorm:norm(self.weight,2,2):add(1e-12) + if input:dim() == 1 then + self.output:resize(outputSize):zero() + self.output:addmv(1, self.weight, input) + self.__norm = input:norm()+1e-12 + self.output:cdiv(self._weightNorm:view(outputSize)):div(self.__norm) + elseif input:dim() == 2 then + local batchSize = input:size(1) + local nElement = self.output:nElement() + self.output:resize(batchSize, outputSize) + if self.output:nElement() ~= nElement then + self.output:zero() + end + self.output:addmm(0, self.output, 1, input, self.weight:t()) + + self._inputNorm:norm(input,2,2):add(1e-12) + self.output:cdiv(self._weightNorm:view(1,outputSize):expandAs(self.output)) + self.output:cdiv(self._inputNorm:expandAs(self.output)) + else + error('input must be vector or matrix') + end + + return self.output +end + +function Cosine:updateGradInput(input, gradOutput) + if not self.gradInput then + return + end + + local inputSize = self.weight:size(2) + local outputSize = self.weight:size(1) + + --[[ + dy_j w_ji x_i + ---- = ------------------- - y_j --------- + dx_i || w_j || * || x || || x ||^2 + --]] + + local nElement = self.gradInput:nElement() + self.gradInput:resizeAs(input) + if self.gradInput:nElement() ~= nElement then + self.gradInput:zero() + end + + if input:dim() == 1 then + self._weight = self._weight or input.new() + self._weight:resizeAs(self.weight):copy(self.weight) + self._weight:cdiv(self._weightNorm:expandAs(self.weight)) + self._weight:div(self.__norm) + self._weight:addr(1, self._weight, -1/(self.__norm*self.__norm), self.output, input) + self.gradInput:addmv(0, 1, self._weight:t(), gradOutput) + elseif input:dim() == 2 then + local inputNorm = self._inputNorm:expandAs(input) + local weightNorm = self._weightNorm:view(1,outputSize):expandAs(gradOutput) + + self.gradInput:copy(input):cdiv(inputNorm) + self._gradOutput = self._gradOutput or gradOutput.new() + self._gradOutput:resizeAs(gradOutput):copy(gradOutput) + self._gradOutput:cmul(self.output) + self._sum = self._sum or input.new() + self._sum:sum(self._gradOutput, 2) + self.gradInput:cmul(self._sum:expandAs(input)) + + self._gradOutput:resizeAs(gradOutput):copy(gradOutput) + self._gradOutput:cdiv(weightNorm) + self.gradInput:addmm(-1, self.gradInput, 1, self._gradOutput, self.weight) + + self.gradInput:cdiv(inputNorm) + end + + return self.gradInput +end + +function Cosine:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + local inputSize = self.weight:size(2) + local outputSize = self.weight:size(1) + + --[[ + dy_j x_i w_ji + ----- = ------------------- - y_j ----------- + dw_ji || w_j || * || x || || w_j ||^2 + --]] + + if input:dim() == 1 then + self._gradOutput = self._gradOutput or gradOutput.new() + self._gradOutput:resizeAs(gradOutput):copy(gradOutput) + local weightNorm = self._weightNorm:view(outputSize) + self._gradOutput:cdiv(weightNorm) + self.gradWeight:addr(scale/self.__norm, self._gradOutput, input) + + self._gradOutput:cdiv(weightNorm) + self._gradOutput:cmul(self.output) + self._weight = self._weight or self.weight.new() + self._weight:resizeAs(self._weight):copy(self.weight) + self._weight:cmul(self._gradOutput:view(outputSize, 1):expandAs(self.weight)) + self.gradWeight:add(-1, self._weight) + elseif input:dim() == 2 then + self._weight = self._weight or self.weight.new() + self._weight:resizeAs(self.weight):copy(self.weight) + self._gradOutput = self._gradOutput or gradOutput.new() + self._gradOutput:resizeAs(gradOutput):copy(gradOutput) + self._gradOutput:cmul(self.output) + self._sum = self._sum or input.new() + self._sum:sum(self._gradOutput, 1) + local grad = self._sum[1] + grad:cdiv(self._weightNorm:select(2,1)) + self._weight:cmul(grad:view(outputSize,1):expandAs(self._weight)) + + local input_ = self._gradOutput + input_:resizeAs(input):copy(input) + input_:cdiv(self._inputNorm:expandAs(input)) + self._weight:addmm(-1, self._weight, 1, gradOutput:t(), input_) + + self._weight:cdiv(self._weightNorm:expandAs(self._weight)) + self.gradWeight:add(self._weight) + else + error"1D or 2D input expected" + end +end + +function Cosine:type(type, tensorCache) + if type then + -- prevent premature memory allocations + self._input = nil + self._weight = nil + self._inputNorm = nil + self._weightNorm = nil + self._gradOutput = nil + self._sum = nil + end + return parent.type(self, type, tensorCache) +end + +function Cosine:clearState() + nn.utils.clear(self, { + '_input', + '_weight', + '_gradOutput', + '_sum', + '_inputNorm', + '_weightNorm', + }) + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/CosineDistance.lua b/contrib/lua-torch/nn/CosineDistance.lua new file mode 100644 index 000000000..fe4e4b9f5 --- /dev/null +++ b/contrib/lua-torch/nn/CosineDistance.lua @@ -0,0 +1,116 @@ +local CosineDistance, parent = torch.class('nn.CosineDistance', 'nn.Module') + +function CosineDistance:__init() + parent.__init(self) + self.gradInput = {torch.Tensor(), torch.Tensor()} +end + +local function makeContiguous(self, input1, input2) + if not input1:isContiguous() then + self._input1 = self._input1 or input1.new() + self._input1:resizeAs(input1):copy(input1) + input1 = self._input1 + end + if not input2:isContiguous() then + self._input2 = self._input2 or input2.new() + self._input2:resizeAs(input2):copy(input2) + input2 = self._input2 + end + return input1, input2 +end + +function CosineDistance:updateOutput(input) + local input1, input2 = input[1], input[2] + + input1, input2 = makeContiguous(self, input1, input2) + + if input1:dim() == 1 then + input1 = input1:view(1,-1) + input2 = input2:view(1,-1) + end + + if not self.buffer then + self.buffer = input1.new() + self.w1 = input1.new() + self.w22 = input1.new() + self.w = input1.new() + self.w32 = input1.new() + self.ones = input1.new() + end + + self.buffer:cmul(input1,input2) + self.w1:sum(self.buffer,2) + + local epsilon = 1e-12 + self.buffer:cmul(input1,input1) + self.w22:sum(self.buffer,2):add(epsilon) + self.ones:resizeAs(self.w22):fill(1) + self.w22:cdiv(self.ones, self.w22) + self.w:resizeAs(self.w22):copy(self.w22) + + self.buffer:cmul(input2,input2) + self.w32:sum(self.buffer,2):add(epsilon) + self.w32:cdiv(self.ones, self.w32) + self.w:cmul(self.w32) + self.w:sqrt() + + self.output:cmul(self.w1,self.w) + self.output:resize(input1:size(1)) + + return self.output +end + +function CosineDistance:updateGradInput(input, gradOutput) + local v1 = input[1] + local v2 = input[2] + local not_batch = false + + v1, v2 = makeContiguous(self, v1, v2) + + if v1:dim() == 1 then + v1 = v1:view(1,-1) + v2 = v2:view(1,-1) + not_batch = true + end + + if #self.gradInput ~= 2 then + self.gradInput[1] = self.gradInput[1] or v1.new() + self.gradInput[2] = self.gradInput[2] or v1.new() + end + + local gw1 = self.gradInput[1] + local gw2 = self.gradInput[2] + gw1:resizeAs(v1):copy(v2) + gw2:resizeAs(v1):copy(v1) + + self.buffer:cmul(self.w1,self.w22) + gw1:addcmul(-1,self.buffer:expandAs(v1),v1) + gw1:cmul(self.w:expandAs(v1)) + + self.buffer:cmul(self.w1,self.w32) + gw2:addcmul(-1,self.buffer:expandAs(v1),v2) + gw2:cmul(self.w:expandAs(v1)) + + local go = gradOutput:view(-1,1):expandAs(v1) + gw1:cmul(go) + gw2:cmul(go) + + if not_batch then + self.gradInput[1]:resize(gw1:size(2)) + self.gradInput[2]:resize(gw2:size(2)) + end + + return self.gradInput +end + +function CosineDistance:clearState() + nn.utils.clear(self, { + 'buffer', + 'w1', + 'w22', + 'w', + 'w32', + 'ones', + }) + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/CosineEmbeddingCriterion.lua b/contrib/lua-torch/nn/CosineEmbeddingCriterion.lua new file mode 100644 index 000000000..d55e03130 --- /dev/null +++ b/contrib/lua-torch/nn/CosineEmbeddingCriterion.lua @@ -0,0 +1,142 @@ +local CosineEmbeddingCriterion, parent = torch.class('nn.CosineEmbeddingCriterion', 'nn.Criterion') + +function CosineEmbeddingCriterion:__init(margin) + parent.__init(self) + margin = margin or 0 + self.margin = margin + self.gradInput = {torch.Tensor(), torch.Tensor()} + self.sizeAverage = true +end + +function CosineEmbeddingCriterion:updateOutput(input,y) + + local input1, input2 = input[1], input[2] + + -- keep backward compatibility + if type(y) == 'number' then + self._y = self._y or input1.new(1) + self._y[1] = y + y = self._y + end + + if input1:dim() == 1 then + input1 = input1:view(1,-1) + input2 = input2:view(1,-1) + end + + if not self.buffer then + self.buffer = input1.new() + self.w1 = input1.new() + self.w22 = input1.new() + self.w = input1.new() + self.w32 = input1.new() + self._outputs = input1.new() + -- comparison operators behave differently from cuda/c implementations + if input1:type() == 'torch.CudaTensor' then + self._idx = input1.new() + else + self._idx = torch.ByteTensor() + end + end + + self.buffer:cmul(input1,input2) + self.w1:sum(self.buffer,2) + + local epsilon = 1e-12 + self.buffer:cmul(input1,input1) + self.w22:sum(self.buffer,2):add(epsilon) + -- self._outputs is also used as a temporary buffer + self._outputs:resizeAs(self.w22):fill(1) + self.w22:cdiv(self._outputs, self.w22) + self.w:resizeAs(self.w22):copy(self.w22) + + self.buffer:cmul(input2,input2) + self.w32:sum(self.buffer,2):add(epsilon) + self.w32:cdiv(self._outputs, self.w32) + self.w:cmul(self.w32) + self.w:sqrt() + + self._outputs:cmul(self.w1,self.w) + self._outputs = self._outputs:select(2,1) + + y.eq(self._idx,y,-1) + self._outputs[self._idx] = self._outputs[self._idx]:add(-self.margin):cmax(0) + y.eq(self._idx,y,1) + self._outputs[self._idx] = self._outputs[self._idx]:mul(-1):add(1) + + self.output = self._outputs:sum() + + if self.sizeAverage then + self.output = self.output/y:size(1) + end + + return self.output +end + +function CosineEmbeddingCriterion:updateGradInput(input, y) + + local v1 = input[1] + local v2 = input[2] + local not_batch = false + + -- keep backward compatibility + if type(y) == 'number' then + self._y = self._y or input1.new(1) + self._y[1] = y + y = self._y + end + + if v1:dim() == 1 then + v1 = v1:view(1,-1) + v2 = v2:view(1,-1) + not_batch = true + end + + local gw1 = self.gradInput[1] + local gw2 = self.gradInput[2] + gw1:resizeAs(v1):copy(v2) + gw2:resizeAs(v1):copy(v1) + + self.buffer:cmul(self.w1,self.w22) + gw1:addcmul(-1,self.buffer:expandAs(v1),v1) + gw1:cmul(self.w:expandAs(v1)) + + self.buffer:cmul(self.w1,self.w32) + gw2:addcmul(-1,self.buffer:expandAs(v1),v2) + gw2:cmul(self.w:expandAs(v1)) + + -- self._idx = self._outputs <= 0 + y.le(self._idx,self._outputs,0) + self._idx = self._idx:view(-1,1):expand(gw1:size()) + gw1[self._idx] = 0 + gw2[self._idx] = 0 + + y.eq(self._idx,y,1) + self._idx = self._idx:view(-1,1):expand(gw2:size()) + gw1[self._idx] = gw1[self._idx]:mul(-1) + gw2[self._idx] = gw2[self._idx]:mul(-1) + + if self.sizeAverage then + gw1:div(y:size(1)) + gw2:div(y:size(1)) + end + + if not_batch then + self.gradInput[1]:resize(gw1:size(2)) + self.gradInput[2]:resize(gw2:size(2)) + end + + return self.gradInput +end + +function CosineEmbeddingCriterion:type(type) + self._idx = nil + parent.type(self,type) + -- comparison operators behave differently from cuda/c implementations + if type == 'torch.CudaTensor' then + self._idx = torch.CudaTensor() + else + self._idx = torch.ByteTensor() + end + return self +end diff --git a/contrib/lua-torch/nn/Criterion.lua b/contrib/lua-torch/nn/Criterion.lua new file mode 100644 index 000000000..e48f06876 --- /dev/null +++ b/contrib/lua-torch/nn/Criterion.lua @@ -0,0 +1,64 @@ +local Criterion = torch.class('nn.Criterion') + +function Criterion:__init() + self.gradInput = torch.Tensor() + self.output = 0 +end + +function Criterion:updateOutput(input, target) +end + +function Criterion:forward(input, target) + return self:updateOutput(input, target) +end + +function Criterion:backward(input, target) + return self:updateGradInput(input, target) +end + +function Criterion:updateGradInput(input, target) +end + +function Criterion:clone() + local f = torch.MemoryFile("rw"):binary() + f:writeObject(self) + f:seek(1) + local clone = f:readObject() + f:close() + return clone +end + +function Criterion:type(type, tensorCache) + assert(type, 'Criterion: must provide a type to convert to') + -- find all tensors and convert them + for key,param in pairs(self) do + self[key] = nn.utils.recursiveType(param, type, tensorCache) + end + return self +end + +function Criterion:float() + return self:type('torch.FloatTensor') +end + +function Criterion:double() + return self:type('torch.DoubleTensor') +end + +function Criterion:cuda() + return self:type('torch.CudaTensor') +end + +function Criterion:cudaHalf() + return self:type('torch.CudaHalfTensor') +end + +function Criterion:cudaDouble() + return self:type('torch.CudaDoubleTensor') +end + +function Criterion:__call__(input, target) + self.output = self:forward(input, target) + self.gradInput = self:backward(input, target) + return self.output, self.gradInput +end diff --git a/contrib/lua-torch/nn/CriterionTable.lua b/contrib/lua-torch/nn/CriterionTable.lua new file mode 100644 index 000000000..14f67bd39 --- /dev/null +++ b/contrib/lua-torch/nn/CriterionTable.lua @@ -0,0 +1,17 @@ +local CriterionTable, parent = torch.class('nn.CriterionTable', 'nn.Module') + +function CriterionTable:__init(criterion) + parent.__init(self) + self.criterion = criterion + self.gradInput = {criterion.gradInput} +end + +function CriterionTable:updateOutput(input) + self.output = self.criterion:updateOutput(table.unpack(input)) + return self.output +end + +function CriterionTable:updateGradInput(input, gradOutput) + self.criterion:updateGradInput(table.unpack(input)) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/CrossEntropyCriterion.lua b/contrib/lua-torch/nn/CrossEntropyCriterion.lua new file mode 100644 index 000000000..2f72cf87f --- /dev/null +++ b/contrib/lua-torch/nn/CrossEntropyCriterion.lua @@ -0,0 +1,42 @@ +local CrossEntropyCriterion, Criterion = torch.class('nn.CrossEntropyCriterion', 'nn.Criterion') + +function CrossEntropyCriterion:__init(weights, sizeAverage) + Criterion.__init(self) + self.lsm = nn.LogSoftMax() + self.nll = nn.ClassNLLCriterion(weights, sizeAverage) + self.sizeAverage = self.nll.sizeAverage + self.oldSizeAverage = self.sizeAverage +end + +function CrossEntropyCriterion:updateOutput(input, target) + input = input:squeeze() + target = type(target) == 'number' and target or target:squeeze() + -- only propagate if value has changed to preserve old behavior + -- of setting nll.sizeAverage directly + if self.sizeAverage ~= self.oldSizeAverage then + self.nll.sizeAverage = self.sizeAverage + end + self.lsm:updateOutput(input) + self.nll:updateOutput(self.lsm.output, target) + self.output = self.nll.output + self.oldSizeAverage = self.sizeAverage + return self.output +end + +function CrossEntropyCriterion:updateGradInput(input, target) + local size = input:size() + input = input:squeeze() + target = type(target) == 'number' and target or target:squeeze() + -- only propagate if value has changed to preserve old behavior + -- of setting nll.sizeAverage directly + if self.sizeAverage ~= self.oldSizeAverage then + self.nll.sizeAverage = self.sizeAverage + end + self.nll:updateGradInput(self.lsm.output, target) + self.lsm:updateGradInput(input, self.nll.gradInput) + self.gradInput:view(self.lsm.gradInput, size) + self.oldSizeAverage = self.sizeAverage + return self.gradInput +end + +return nn.CrossEntropyCriterion diff --git a/contrib/lua-torch/nn/Decorator.lua b/contrib/lua-torch/nn/Decorator.lua new file mode 100644 index 000000000..05fb4db92 --- /dev/null +++ b/contrib/lua-torch/nn/Decorator.lua @@ -0,0 +1,47 @@ +local Decorator, parent = torch.class("nn.Decorator", "nn.Container") + +function Decorator:__init(module) + parent.__init(self) + -- so that it can be handled like a Container + self.modules[1] = module +end + +function Decorator:updateOutput(input) + self.output = self.modules[1]:updateOutput(input) + return self.output +end + +function Decorator:updateGradInput(input, gradOutput) + self.gradInput = self.modules[1]:updateGradInput(input, gradOutput) + return self.gradInput +end + +function Decorator:accGradParameters(input, gradOutput, scale) + self.modules[1]:accGradParameters(input, gradOutput, scale) +end + +function Decorator:accUpdateGradParameters(input, gradOutput, lr) + self.modules[1]:accUpdateGradParameters(input, gradOutput, lr) +end + +function Decorator:sharedAccUpdateGradParameters(input, gradOutput, lr) + self.modules[1]:sharedAccUpdateGradParameters(input, gradOutput, lr) +end + +function Decorator:__tostring__() + if self.modules[1].__tostring__ then + return torch.type(self) .. ' @ ' .. self.modules[1]:__tostring__() + else + return torch.type(self) .. ' @ ' .. torch.type(self.modules[1]) + end +end + +-- useful for multiple-inheritance +function Decorator.decorate(class) + class.updateOutput = nn.Decorator.updateOutput + class.updateGradInput = nn.Decorator.updateGradInput + class.accGradParameters = nn.Decorator.accGradParameters + class.accUpdateGradParameters = nn.Decorator.accUpdateGradParameters + class.sharedAccUpdateGradParameters = nn.Decorator.sharedAccUpdateGradParameters + class.__tostring__ = nn.Decorator.__tostring__ +end diff --git a/contrib/lua-torch/nn/DepthConcat.lua b/contrib/lua-torch/nn/DepthConcat.lua new file mode 100644 index 000000000..f64a90eb8 --- /dev/null +++ b/contrib/lua-torch/nn/DepthConcat.lua @@ -0,0 +1,116 @@ +------------------------------------------------------------------------ +--[[ DepthConcat ]]-- +-- Concatenates the output of Convolutions along the depth dimension +-- (nOutputFrame). This is used to implement the DepthConcat layer +-- of the Going deeper with convolutions paper : +-- http://arxiv.org/pdf/1409.4842v1.pdf +-- The normal Concat Module can't be used since the spatial dimensions +-- of tensors to be concatenated may have different values. To deal with +-- this, we select the largest spatial dimensions and add zero-padding +-- around the smaller dimensions. +------------------------------------------------------------------------ +local DepthConcat, _ = torch.class('nn.DepthConcat', 'nn.Concat') + +function DepthConcat:windowNarrow(output, currentOutput, offset) + local outputWindow = output:narrow(self.dimension, offset, currentOutput:size(self.dimension)) + for dim=1,self.outputSize:size(1) do + local currentSize = currentOutput:size(dim) + if dim ~= self.dimension and self.outputSize[dim] ~= currentSize then + -- 5x5 vs 3x3 -> start = [(5-3)/2] + 1 = 2 (1 pad each side) + -- 9x9 vs 5x5 -> start = [(9-5)/2] + 1 = 3 (2 pad each side) + -- 9x9 vs 4x4 -> start = [(9-4)/2] + 1 = 3.5 (2 pad, 3 pad) + local start = math.floor(((self.outputSize[dim] - currentSize) / 2) + 1) + outputWindow = outputWindow:narrow(dim, start, currentSize) + end + end + return outputWindow +end + +function DepthConcat:updateOutput(input) + self.outputSize = self.outputSize or torch.LongStorage() + + local outs = {} + for i=1,#self.modules do + local currentOutput = self:rethrowErrors(self.modules[i], i, 'updateOutput', input) + outs[i] = currentOutput + if i == 1 then + self.outputSize:resize(currentOutput:dim()):copy(currentOutput:size()) + else + self.outputSize[self.dimension] = self.outputSize[self.dimension] + currentOutput:size(self.dimension) + for dim=1,self.outputSize:size(1) do + if dim ~= self.dimension then + -- take the maximum size (shouldn't change anything for batch dim) + self.outputSize[dim] = math.max(self.outputSize[dim], currentOutput:size(dim)) + end + end + end + end + self.output:resize(self.outputSize):zero() --zero for padding + + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = outs[i] + local outputWindow = self:windowNarrow(self.output, currentOutput, offset) + outputWindow:copy(currentOutput) + offset = offset + currentOutput:size(self.dimension) + end + return self.output +end + +function DepthConcat:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input) + + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local gradOutputWindow = self:windowNarrow(gradOutput, currentOutput, offset) + local currentGradInput = self:rethrowErrors(module, i, 'updateGradInput', input, gradOutputWindow) + if i==1 then + self.gradInput:copy(currentGradInput) + else + self.gradInput:add(currentGradInput) + end + offset = offset + currentOutput:size(self.dimension) + end + return self.gradInput +end + +function DepthConcat:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local gradOutputWindow = self:windowNarrow(gradOutput, currentOutput, offset) + self:rethrowErrors(module, i, 'accGradParameters', input, gradOutputWindow, scale) + offset = offset + currentOutput:size(self.dimension) + end +end + +function DepthConcat:backward(input, gradOutput, scale) + self.gradInput:resizeAs(input) + + scale = scale or 1 + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local gradOutputWindow = self:windowNarrow(gradOutput, currentOutput, offset) + local currentGradInput = self:rethrowErrors(module, i, 'backward', input, gradOutputWindow) + if i==1 then + self.gradInput:copy(currentGradInput) + else + self.gradInput:add(currentGradInput) + end + offset = offset + currentOutput:size(self.dimension) + end + return self.gradInput +end + +function DepthConcat:accUpdateGradParameters(input, gradOutput, lr) + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local gradOutputWindow = self:windowNarrow(gradOutput, currentOutput, offset) + self:rethrowErrors(module, i, 'accUpdateGradParameters', input, gradOutputWindow, lr) + offset = offset + currentOutput:size(self.dimension) + end +end diff --git a/contrib/lua-torch/nn/DistKLDivCriterion.lua b/contrib/lua-torch/nn/DistKLDivCriterion.lua new file mode 100644 index 000000000..bfad57567 --- /dev/null +++ b/contrib/lua-torch/nn/DistKLDivCriterion.lua @@ -0,0 +1,34 @@ +local DistKLDivCriterion, parent = torch.class('nn.DistKLDivCriterion', 'nn.Criterion') + +function DistKLDivCriterion:__init() + parent.__init(self) + self.sizeAverage = true +end + +function DistKLDivCriterion:updateOutput(input, target) + assert(input:dim() == target:dim() and + torch.LongTensor(input:size()):eq(torch.LongTensor(target:size())):all(), + 'input and target should have the same size') + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.DistKLDivCriterion_updateOutput( + input:cdata(), + target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage + ) + self.output = self.output_tensor[1] + return self.output +end + +function DistKLDivCriterion:updateGradInput(input, target) + assert(input:dim() == target:dim() and + torch.LongTensor(input:size()):eq(torch.LongTensor(target:size())):all(), + 'input and target should have the same size') + input.THNN.DistKLDivCriterion_updateGradInput( + input:cdata(), + target:cdata(), + self.gradInput:cdata(), + self.sizeAverage + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/DistanceRatioCriterion.lua b/contrib/lua-torch/nn/DistanceRatioCriterion.lua new file mode 100644 index 000000000..6b79d0620 --- /dev/null +++ b/contrib/lua-torch/nn/DistanceRatioCriterion.lua @@ -0,0 +1,142 @@ +--[[ + Probabilistic Criterion for Triplet Siamese Model for learning embedding. + Ref: https://arxiv.org/pdf/1610.00243.pdf + + loss = -log( exp(-X) / ( exp(-X) + exp(-Y) ) ) + where + X : Distance between similar samples + Y : Distance between dissimilar samples + + The loss could be break down to following log expansion + + loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) )) + = -log( exp(-X) ) + log( exp(-X) + exp(-Y) ) + = -(-X) + log( exp(-X) + exp(-Y) ) + = X + log( exp(-X) + exp(-Y) ) + + Gradients: + dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X) + = 1 - exp(-X) / (exp(-X) + exp(-Y)) + + dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y) + = -exp(-Y) / (exp(-X) + exp(-Y)) + +--]] + +local DistanceRatioCriterion, parent = torch.class('nn.DistanceRatioCriterion', + 'nn.Criterion') + +function DistanceRatioCriterion:__init(sizeAverage) + parent.__init(self) + if sizeAverage ~= nil then + self.sizeAverage = sizeAverage + else + self.sizeAverage = true + end +end + +-- Forward +--[[ +-- X : Distance between similar samples +-- Y : Distance between dissimilar samples + loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) )) + = -log( exp(-X) ) + log( exp(-X) + exp(-Y) ) + = -(-X) + log( exp(-X) + exp(-Y) ) + = X + log( exp(-X) + exp(-Y) ) +--]] +function DistanceRatioCriterion:updateOutput(input) + assert(#input == 2, "Invalid number of inputs") + + local X = input[1] + local Y = input[2] + + assert(X:nElement() == Y:nElement(), "Number of distances don't match.") + assert(X:size(1) == Y:size(1), "Invalid distances' size.") + + -- Compute exp(-X) and exp(-Y) + self._expMinusX = self._expMinusX or X.new() + self._expMinusY = self._expMinusY or Y.new() + + -- Compute ( exp(-X) + exp(-Y) ) + self._expMinusX:resizeAs(X):copy(X):mul(-1):exp() + self._expMinusY:resizeAs(Y):copy(Y):mul(-1):exp() + + self._sumExpMinusXY = self.sumExpMinusExp or X.new() + self._sumExpMinusXY:resizeAs(self._expMinusX):copy(self._expMinusX) + :add(self._expMinusY) + + -- Compute log( exp(-X) + exp(-Y) ) + self._logSumExpMinusXY = self._logSumExpMinusXY or self._sumExpMinusXY.new() + self._logSumExpMinusXY:resizeAs(self._sumExpMinusXY) + :copy(self._sumExpMinusXY):log() + + -- Compute log( exp(-X) + exp(-Y) ) + self.loss = self.loss or self._logSumExpMinusXY.new() + self.loss:resizeAs(X):copy(X):add(self._logSumExpMinusXY) + + if self.sizeAverage then + return self.loss:sum()/X:size(1) + else + return self.loss:sum() + end +end + +-- Backward +--[[ +-- X : Distance between similar samples +-- Y : Distance between dissimilar samples + + Gradients: + dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X) + = 1 - exp(-X) / (exp(-X) + exp(-Y)) + + dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y) + = -exp(-Y) / (exp(-X) + exp(-Y)) + +--]] +function DistanceRatioCriterion:updateGradInput(input) + assert(#input == 2, "Invalid number of inputs") + local X = input[1] + local Y = input[2] + assert(X:nElement() == Y:nElement(), "Number of distances don't match.") + assert(X:size(1) == Y:size(1), "Invalid distances' size.") + + -- dLoss/dX + -- -exp(-X) + self.dX = self.dX or X.new() + self.dX:resizeAs(self._expMinusX):copy(self._expMinusX):mul(-1) + + -- -exp(-X) / (exp(-X) + exp(-Y)) + self.dX:cdiv(self._sumExpMinusXY) + + -- 1 - exp(-X) / (exp(-X) + exp(-Y)) + self.dX:add(1) + + -- dLoss/dY + -- -exp(-Y) + self.dY = self.dY or Y.new() + self.dY:resizeAs(self._expMinusY):copy(self._expMinusY):mul(-1) + + -- -exp(-Y) / (exp(-X) + exp(-Y)) + self.dY:cdiv(self._sumExpMinusXY) + + if self.sizeAverage then + self.dX:div(X:size(1)) + self.dY:div(X:size(1)) + end + + return {self.dX, self.dY} +end + +function DistanceRatioCriterion:type(type, tensorCache) + if type then + self._expMinusX = nil + self._expMinusY = nil + self._sumExpMinusXY = nil + self._logSumExpMinusXY = nil + self.loss = nil + self.dX = nil + self.dY = nil + end + return parent.type(self, type, tensorCache) +end diff --git a/contrib/lua-torch/nn/DontCast.lua b/contrib/lua-torch/nn/DontCast.lua new file mode 100644 index 000000000..b89f5436b --- /dev/null +++ b/contrib/lua-torch/nn/DontCast.lua @@ -0,0 +1,124 @@ +local DontCast, parent = torch.class("nn.DontCast", "nn.Decorator") + +-- utility functions + +local function recursiveTypeCopy(dst, src, type_str) + if torch.type(src) == 'table' then + dst = (torch.type(dst) == 'table') and dst or {} + for k, v in pairs(src) do + dst[k] = recursiveTypeCopy(dst[k], v, type_str) + end + elseif torch.isTensor(src) then + dst = (torch.type(dst) == type_str) and dst or torch.getmetatable(type_str).new() + dst:resize(src:size()) + if src:nElement() > 0 then + dst:copy(src) + end + end + return dst +end + +local function tableTensorType(src) + if type(src) == 'table' then + local type_str, found + for k,v in pairs(src) do + type_str, found = tableTensorType(v) + if found then + return type_str, true + end + end + return type_str, found + else + return torch.type(src), torch.isTensor(src) + end +end + +-- DontCast methods and constructor + +function DontCast:__init(module, castin, castout, moduleType) + parent.__init(self, module) + self.castin = castin + self.castout = (castout == nil) and castin or castout + self.moduleType = moduleType + if (self.castin or self.castout) and not self.moduleType then + local moduleType, found = tableTensorType(module.output) + if found then + self.moduleType = moduleType + else + moduleType, found = tableTensorType(module:parameters()) + if found then + self.moduleType = moduleType + else + error"Cannot extrapolate moduleType. Provide constructor argument 4" + end + end + end +end + +function DontCast:updateOutput(input) + if self.castin and tableTensorType(input) ~= self.moduleType then + self._input = recursiveTypeCopy(self._input, input, self.moduleType) + input = self._input + end + + local output = self.modules[1]:updateOutput(input) + + if self.castout then + self.output = recursiveTypeCopy(self.output, output, tableTensorType(self.output)) + else + self.output = output + end + return self.output +end + +function DontCast:updateGradInput(input, gradOutput) + if self.castin and tableTensorType(input) ~= self.moduleType then + input = self._input + end + if self.castout and tableTensorType(gradOutput) ~= self.moduleType then + self._gradOutput = recursiveTypeCopy(self._gradOutput, gradOutput, self.moduleType) + gradOutput = self._gradOutput + end + + local gradInput = self.modules[1]:updateGradInput(input, gradOutput) + + if self.castin then + self.gradInput = recursiveTypeCopy(self.gradInput, gradInput, tableTensorType(self.gradInput)) + else + self.gradInput = gradInput + end + return self.gradInput +end + +function DontCast:accGradParameters(input, gradOutput, scale) + if self.castin and tableTensorType(input) ~= self.moduleType then + input = self._input + end + if self.castout and tableTensorType(gradOutput) ~= self.moduleType then + gradOutput = self._gradOutput + end + + self.modules[1]:accGradParameters(input, gradOutput, scale) +end + +function DontCast:accUpdateGradParameters(input, gradOutput, lr) + if self.castin and tableTensorType(input) ~= self.moduleType then + input = self._input + end + if self.castout and tableTensorType(gradOutput) ~= self.moduleType then + gradOutput = self._gradOutput + end + + self.modules[1]:accUpdateGradParameters(input, gradOutput, lr) +end + +-- dont cast (the essence thereof) +function DontCast:type(type) + if self.castout and tableTensorType(self.output) ~= type then + self.output = recursiveTypeCopy(nil, self.output, type) + end + if self.castin and tableTensorType(self.gradInput) ~= type then + self.gradInput = recursiveTypeCopy(nil, self.gradInput, type) + end + return self +end diff --git a/contrib/lua-torch/nn/DotProduct.lua b/contrib/lua-torch/nn/DotProduct.lua new file mode 100644 index 000000000..ccd347e6b --- /dev/null +++ b/contrib/lua-torch/nn/DotProduct.lua @@ -0,0 +1,61 @@ +local DotProduct, parent = torch.class('nn.DotProduct', 'nn.Module') + +function DotProduct:__init() + parent.__init(self) + self.gradInput = {torch.Tensor(), torch.Tensor()} +end + +function DotProduct:updateOutput(input) + local input1, input2 = input[1], input[2] + if input1:dim() == 1 then + -- convert non batch input to batch input + input1 = input1:view(1,-1) + input2 = input2:view(1,-1) + end + if not self.buffer then + self.buffer = input1.new() + end + self.buffer:cmul(input1, input2) + self.output:sum(self.buffer, 2) + self.output:resize(input1:size(1)) + return self.output +end + +function DotProduct:updateGradInput(input, gradOutput) + local v1 = input[1] + local v2 = input[2] + local not_batch = false + + if #self.gradInput ~= 2 then + self.gradInput[1] = self.gradInput[1] or input[1].new() + self.gradInput[2] = self.gradInput[2] or input[2].new() + end + + if v1:dim() == 1 then + v1 = v1:view(1,-1) + v2 = v2:view(1,-1) + not_batch = true + end + + local gw1 = self.gradInput[1] + local gw2 = self.gradInput[2] + gw1:resizeAs(v1):copy(v2) + gw2:resizeAs(v2):copy(v1) + + local go = gradOutput:view(-1,1):expandAs(v1) + gw1:cmul(go) + gw2:cmul(go) + + if not_batch then + -- unbatch gradInput + self.gradInput[1]:set(gw1:select(1,1)) + self.gradInput[2]:set(gw2:select(1,1)) + end + + return self.gradInput +end + +function DotProduct:clearState() + if self.buffer then self.buffer:set() end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Dropout.lua b/contrib/lua-torch/nn/Dropout.lua new file mode 100644 index 000000000..15f2f4699 --- /dev/null +++ b/contrib/lua-torch/nn/Dropout.lua @@ -0,0 +1,70 @@ +local Dropout, Parent = torch.class('nn.Dropout', 'nn.Module') + +function Dropout:__init(p,v1,inplace,stochasticInference) + Parent.__init(self) + self.p = p or 0.5 + self.train = true + self.inplace = inplace + self.stochastic_inference = stochasticInference or false + -- version 2 scales output during training instead of evaluation + self.v2 = not v1 + if self.p >= 1 or self.p < 0 then + error('<Dropout> illegal percentage, must be 0 <= p < 1') + end + self.noise = torch.Tensor() +end + +function Dropout:updateOutput(input) + if self.inplace then + self.output:set(input) + else + self.output:resizeAs(input):copy(input) + end + if self.p > 0 then + if self.train or self.stochastic_inference then + self.noise:resizeAs(input) + self.noise:bernoulli(1-self.p) + if self.v2 then + self.noise:div(1-self.p) + end + self.output:cmul(self.noise) + elseif not self.v2 then + self.output:mul(1-self.p) + end + end + return self.output +end + +function Dropout:updateGradInput(input, gradOutput) + if self.inplace then + self.gradInput:set(gradOutput) + else + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + end + if self.train then + if self.p > 0 then + self.gradInput:cmul(self.noise) -- simply mask the gradients with the noise vector + end + else + if not self.v2 and self.p > 0 then + self.gradInput:mul(1-self.p) + end + end + return self.gradInput +end + +function Dropout:setp(p) + self.p = p +end + +function Dropout:__tostring__() + return string.format('%s(%f)', torch.type(self), self.p) +end + + +function Dropout:clearState() + if self.noise then + self.noise:set() + end + return Parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/ELU.lua b/contrib/lua-torch/nn/ELU.lua new file mode 100644 index 000000000..48a6caa2c --- /dev/null +++ b/contrib/lua-torch/nn/ELU.lua @@ -0,0 +1,45 @@ +local ELU, parent = torch.class('nn.ELU', 'nn.Module') + +--[[ + Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter + Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) + http://arxiv.org/pdf/1511.07289.pdf +--]] + +function ELU:__init(alpha, inplace) + parent.__init(self) + self.alpha = alpha or 1 + assert(type(self.alpha) == 'number') + self.inplace = inplace or false + assert(type(self.inplace) == 'boolean') +end + +function ELU:updateOutput(input) + local inplace = self.inplace or false + + input.THNN.ELU_updateOutput( + input:cdata(), + self.output:cdata(), + self.alpha, + inplace + ) + return self.output +end + +function ELU:updateGradInput(input, gradOutput) + local inplace = self.inplace or false + + input.THNN.ELU_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata(), + self.alpha, + inplace + ) + return self.gradInput +end + +function ELU:__tostring__() + return string.format('%s (alpha:%f)', torch.type(self), self.alpha) +end diff --git a/contrib/lua-torch/nn/ErrorMessages.lua b/contrib/lua-torch/nn/ErrorMessages.lua new file mode 100644 index 000000000..a5cbed053 --- /dev/null +++ b/contrib/lua-torch/nn/ErrorMessages.lua @@ -0,0 +1,19 @@ + +local mt = { + __index = function(table, key) + error("nn."..key.." is only supported for Float or Double Tensors.") + end +} + +local tensors = { + torch.ByteTensor, + torch.CharTensor, + torch.ShortTensor, + torch.IntTensor, + torch.LongTensor, +} + +for _, t in ipairs(tensors) do + t.nn = {} + setmetatable(t.nn, mt) +end diff --git a/contrib/lua-torch/nn/Euclidean.lua b/contrib/lua-torch/nn/Euclidean.lua new file mode 100644 index 000000000..509feff50 --- /dev/null +++ b/contrib/lua-torch/nn/Euclidean.lua @@ -0,0 +1,197 @@ +local Euclidean, parent = torch.class('nn.Euclidean', 'nn.Module') + +function Euclidean:__init(inputSize,outputSize) + parent.__init(self) + + self.weight = torch.Tensor(inputSize,outputSize) + self.gradWeight = torch.Tensor(inputSize,outputSize) + + -- state + self.gradInput:resize(inputSize) + self.output:resize(outputSize) + + self.fastBackward = true + + self:reset() +end + +function Euclidean:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(1)) + end + if nn.oldSeed then + for i=1,self.weight:size(2) do + self.weight:select(2, i):apply(function() + return torch.uniform(-stdv, stdv) + end) + end + else + self.weight:uniform(-stdv, stdv) + end +end + +local function view(res, src, ...) + local args = {...} + if src:isContiguous() then + res:view(src, table.unpack(args)) + else + res:reshape(src, table.unpack(args)) + end +end + +function Euclidean:updateOutput(input) + -- lazy initialize buffers + self._input = self._input or input.new() + self._weight = self._weight or self.weight.new() + self._expand = self._expand or self.output.new() + self._expand2 = self._expand2 or self.output.new() + self._repeat = self._repeat or self.output.new() + self._repeat2 = self._repeat2 or self.output.new() + + local inputSize, outputSize = self.weight:size(1), self.weight:size(2) + + -- y_j = || w_j - x || = || x - w_j || + if input:dim() == 1 then + view(self._input, input, inputSize, 1) + self._expand:expandAs(self._input, self.weight) + self._repeat:resizeAs(self._expand):copy(self._expand) + self._repeat:add(-1, self.weight) + self.output:norm(self._repeat, 2, 1) + self.output:resize(outputSize) + elseif input:dim() == 2 then + local batchSize = input:size(1) + + view(self._input, input, batchSize, inputSize, 1) + self._expand:expand(self._input, batchSize, inputSize, outputSize) + -- make the expanded tensor contiguous (requires lots of memory) + self._repeat:resizeAs(self._expand):copy(self._expand) + + self._weight:view(self.weight, 1, inputSize, outputSize) + self._expand2:expandAs(self._weight, self._repeat) + + if torch.type(input) == 'torch.CudaTensor' then + -- requires lots of memory, but minimizes cudaMallocs and loops + self._repeat2:resizeAs(self._expand2):copy(self._expand2) + self._repeat:add(-1, self._repeat2) + else + self._repeat:add(-1, self._expand2) + end + + self.output:norm(self._repeat, 2, 2) + self.output:resize(batchSize, outputSize) + else + error"1D or 2D input expected" + end + + return self.output +end + +function Euclidean:updateGradInput(input, gradOutput) + if not self.gradInput then + return + end + + self._div = self._div or input.new() + self._output = self._output or self.output.new() + self._gradOutput = self._gradOutput or input.new() + self._expand3 = self._expand3 or input.new() + + if not self.fastBackward then + self:updateOutput(input) + end + + local inputSize, outputSize = self.weight:size(1), self.weight:size(2) + + --[[ + dy_j -2 * (w_j - x) x - w_j + ---- = --------------- = ------- + dx 2 || w_j - x || y_j + --]] + + -- to prevent div by zero (NaN) bugs + self._output:resizeAs(self.output):copy(self.output):add(0.0000001) + view(self._gradOutput, gradOutput, gradOutput:size()) + self._div:cdiv(gradOutput, self._output) + if input:dim() == 1 then + self._div:resize(1, outputSize) + self._expand3:expandAs(self._div, self.weight) + + if torch.type(input) == 'torch.CudaTensor' then + self._repeat2:resizeAs(self._expand3):copy(self._expand3) + self._repeat2:cmul(self._repeat) + else + self._repeat2:cmul(self._repeat, self._expand3) + end + + self.gradInput:sum(self._repeat2, 2) + self.gradInput:resizeAs(input) + elseif input:dim() == 2 then + local batchSize = input:size(1) + + self._div:resize(batchSize, 1, outputSize) + self._expand3:expand(self._div, batchSize, inputSize, outputSize) + + if torch.type(input) == 'torch.CudaTensor' then + self._repeat2:resizeAs(self._expand3):copy(self._expand3) + self._repeat2:cmul(self._repeat) + else + self._repeat2:cmul(self._repeat, self._expand3) + end + + self.gradInput:sum(self._repeat2, 3) + self.gradInput:resizeAs(input) + else + error"1D or 2D input expected" + end + + return self.gradInput +end + +function Euclidean:accGradParameters(input, gradOutput, scale) + local inputSize, outputSize = self.weight:size(1), self.weight:size(2) + scale = scale or 1 + + --[[ + dy_j 2 * (w_j - x) w_j - x + ---- = --------------- = ------- + dw_j 2 || w_j - x || y_j + --]] + -- assumes a preceding call to updateGradInput + if input:dim() == 1 then + self.gradWeight:add(-scale, self._repeat2) + elseif input:dim() == 2 then + self._sum = self._sum or input.new() + self._sum:sum(self._repeat2, 1) + self._sum:resize(inputSize, outputSize) + self.gradWeight:add(-scale, self._sum) + else + error"1D or 2D input expected" + end +end + +function Euclidean:type(type, tensorCache) + if type then + -- prevent premature memory allocations + self:clearState() + end + return parent.type(self, type, tensorCache) +end + +function Euclidean:clearState() + nn.utils.clear(self, { + '_input', + '_output', + '_gradOutput', + '_weight', + '_div', + '_sum', + '_expand', + '_expand2', + '_expand3', + '_repeat', + '_repeat2', + }) + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Exp.lua b/contrib/lua-torch/nn/Exp.lua new file mode 100644 index 000000000..f41569026 --- /dev/null +++ b/contrib/lua-torch/nn/Exp.lua @@ -0,0 +1,9 @@ +local Exp = torch.class('nn.Exp', 'nn.Module') + +function Exp:updateOutput(input) + return self.output:exp(input) +end + +function Exp:updateGradInput(input, gradOutput) + return self.gradInput:cmul(self.output, gradOutput) +end diff --git a/contrib/lua-torch/nn/FlattenTable.lua b/contrib/lua-torch/nn/FlattenTable.lua new file mode 100644 index 000000000..1c182557c --- /dev/null +++ b/contrib/lua-torch/nn/FlattenTable.lua @@ -0,0 +1,106 @@ +local FlattenTable, parent = torch.class('nn.FlattenTable', 'nn.Module') + +function FlattenTable:__init() + parent.__init(self) + + self.output = {} + self.input_map = {} + self.gradInput = {} +end + +-- Recursive function to flatten a table (output is a table) +local function flatten(output, input) + local input_map -- has the same structure as input, but stores the + -- indices to the corresponding output + if type(input) == 'table' then + input_map = {} + -- forward DFS order + for i = 1, #input do + input_map[#input_map+1] = flatten(output, input[i]) + end + else + input_map = #output + 1 + output[input_map] = input -- append the tensor + end + return input_map +end + +-- Recursive function to check if we need to rebuild the output table +local function checkMapping(output, input, input_map) + if input_map == nil or output == nil or input == nil then + return false + end + if type(input) == 'table' then + if type(input_map) ~= 'table' then + return false + end + if #input ~= #input_map then + return false + end + -- forward DFS order + for i = 1, #input do + local ok = checkMapping(output, input[i], input_map[i]) + if not ok then + return false + end + end + return true + else + if type(input_map) ~= 'number' then + return false + end + return output[input_map] == input + end +end + +-- During BPROP we have to build a gradInput with the same shape as the +-- input. This is a recursive function to build up a gradInput +local function inverseFlatten(gradOutput, input_map) + if type(input_map) == 'table' then + local gradInput = {} + for i = 1, #input_map do + gradInput[#gradInput + 1] = inverseFlatten(gradOutput, input_map[i]) + end + return gradInput + else + return gradOutput[input_map] + end +end + +function FlattenTable:updateOutput(input) + assert(type(input) == 'table', 'input must be a table') + -- to avoid updating rebuilding the flattened table every updateOutput call + -- we will do a DFS pass over the existing output table and the inputs to + -- see if it needs to be rebuilt. + if not checkMapping(self.output, input, self.input_map) then + self.output = {} + self.input_map = flatten(self.output, input) + end + return self.output +end + +function FlattenTable:updateGradInput(input, gradOutput) + assert(type(input) == 'table', 'input must be a table') + assert(type(input) == 'table', 'gradOutput must be a table') + -- If the input changes between the updateOutput and updateGradInput call, + -- then we may have to rebuild the input_map! However, let's assume that + -- the input_map is valid and that forward has already been called. + + -- However, we should check that the gradInput is valid: + if not checkMapping(gradOutput, self.gradInput, self.input_map) then + self.gradInput = inverseFlatten(gradOutput, self.input_map) + end + + return self.gradInput +end + +function FlattenTable:type(type, tensorCache) + -- This function just stores references so we don't need to do any type + -- conversions. Just force the tables to be empty. + self:clearState() +end + +function FlattenTable:clearState() + self.input_map = {} + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/GPU.lua b/contrib/lua-torch/nn/GPU.lua new file mode 100644 index 000000000..758618d8b --- /dev/null +++ b/contrib/lua-torch/nn/GPU.lua @@ -0,0 +1,273 @@ +------------------------------------------------------------------------ +--[[ GPU ]]-- +-- Decorates a module such that its parameters are +-- hosted on a specified GPU device. +-- The operations are also executed on that device. +-- Arguments input and gradOutput are converted to the specified device +-- before being fed to the decorated module. +-- Returned output is on the specified outdevice (defaults to device). +-- Returned gradInput is allocated on the same device as the input. +-- The unit test is located in cunn. +------------------------------------------------------------------------ +local GPU, parent = torch.class("nn.GPU", "nn.Container") + +function GPU:__init(module, device, outdevice) + parent.__init(self) + assert(torch.type(device) == 'number') + self.device = device + self.outdevice = outdevice or device + + assert(torch.isTypeOf(module, 'nn.Module')) + self.modules[1] = module + + if module:type():find('torch%.Cuda.*Tensor') then + self:type(module:type()) + end +end + +function GPU.recursiveModuleDevice(obj, device) + if type(obj) == 'table' and not torch.isTypeOf(obj, 'nn.GPU') and not obj.__noGPU__ then + for k,v in pairs(obj) do + obj[k] = GPU.recursiveModuleDevice(v, device) + end + elseif torch.type(obj):match('torch.Cuda.*Tensor') then + if obj:getDevice() ~= device then + obj = obj:clone() -- this will reallocate it to device + local newdevice = obj:getDevice() + -- when nElement() == 0 newdevice is 0 + assert(newdevice == device or newdevice == 0) + end + end + assert(obj ~= nil) + return obj +end + +-- set the device of the decorated module +function GPU:setDevice(device) + self.device = device or self.device + + assert(self.modules[1]) + self.modules[1] = cutorch.withDevice(self.device, function() + return self.recursiveModuleDevice(self.modules[1], self.device) + end) + return self +end + +-- when proto is a device number, returns a dst that has device device for each element in src +-- otherwise, if proto is a table/tensor, makes sure dst is a identical to src, yet on the same device as proto +function GPU.recursiveSetDevice(dst, src, proto) + local device, prototable + if torch.isTensor(proto) then + device = proto:getDevice() + elseif torch.type(proto) == 'number' then + device = proto + elseif torch.type(proto) == 'table' then + prototable = true + else + error"Expecting number, table or tensor for arg 3 (proto)" + end + if torch.type(src) == 'table' then + dst = torch.type(dst) == 'table' and dst or {} + for k,v in ipairs(src) do + dst[k] = GPU.recursiveSetDevice(dst[k], v, prototable and proto[k] or device) + end + for k=#src+1,#dst do + dst[k] = nil + end + elseif torch.type(src):match('torch.Cuda.*Tensor') and src:getDevice() ~= device and src:getDevice() ~= 0 then + if not (torch.type(dst):match('torch.Cuda.*Tensor') and dst:getDevice() == device) then + dst = src.new() + end + cutorch.withDevice(device, function() dst:resizeAs(src):copy(src) end) + else + dst = src + end + return dst +end + +function GPU:updateOutput(input) + if self._type:find('torch%.Cuda.*Tensor') then + self._input = self.recursiveSetDevice(self._input, input, self.device) + + local output = cutorch.withDevice(self.device, function() + return self.modules[1]:updateOutput(self._input) + end) + + if self.device ~= self.outdevice then + self.output = self.recursiveSetDevice(self.output, output, self.outdevice) + else + self.output = output + end + else + self.output = self.modules[1]:updateOutput(input) + end + + return self.output +end + +function GPU:updateGradInput(input, gradOutput) + if self._type:find('torch%.Cuda.*Tensor') then + self._gradOutput = self.recursiveSetDevice(self._gradOutput, gradOutput, self.device) + + local gradInput = cutorch.withDevice(self.device, function() + return self.modules[1]:updateGradInput(self._input, self._gradOutput) + end) + + self.gradInput = self.recursiveSetDevice(self.gradInput, gradInput, input) + else + self.gradInput = self.modules[1]:updateGradInput(input, gradOutput) + end + + return self.gradInput +end + +function GPU:accGradParameters(input, gradOutput, scale) + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() + self.modules[1]:accGradParameters(self._input, self._gradOutput, scale) + end) + else + self.modules[1]:accGradParameters(input, gradOutput, scale) + end +end + +function GPU:apply(callback) + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.apply(self, callback) end) + else + parent.apply(self, callback) + end +end + +function GPU:type(type, typecache) + if type and type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.type(self, type, typecache) end) + self:setDevice() + else + self.output = nil + self.gradInput = nil + self._input = nil + self._gradOutput = nil + parent.type(self, type, typecache) + end + return self +end + +function GPU:clearState() + nn.utils.clear(self, 'output', 'gradInput') + self._input = nil + self._gradOutput = nil + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.clearState(self) end) + else + parent.clearState(self) + end +end + +function GPU:zeroGradParameters() + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.zeroGradParameters(self) end) + else + parent.zeroGradParameters(self) + end +end + +function GPU:updateParameters(lr) + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.updateParameters(self, lr) end) + else + parent.updateParameters(self, lr) + end +end + +function GPU:training() + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.training(self) end) + else + parent.training(self) + end +end + +function GPU:evaluate() + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.evaluate(self) end) + else + parent.evaluate(self) + end +end + +function GPU:share(mlp, ...) + local args = {...} + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.share(self, mlp, unpack(args)) end) + else + parent.share(self, mlp, unpack(args)) + end + return self +end + +function GPU:reset(...) + local args = {...} + if self._type:find('torch%.Cuda.*Tensor') then + cutorch.withDevice(self.device, function() parent.reset(self, unpack(args)) end) + else + parent.reset(self, unpack(args)) + end + return self +end + +function GPU:clone(...) + local args = {...} + if self._type:find('torch%.Cuda.*Tensor') then + return cutorch.withDevice(self.device, function() parent.clone(self, unpack(args)) end) + else + return parent.clone(self, unpack(args)) + end +end + +function GPU:write(file) + -- Write all values in the object as a table. + local object = {} + for k, v in pairs(self) do + object[k] = v + end + local header = {self._type, self.device} + file:writeObject(header) + file:writeObject(object) +end + +function GPU:read(file) + local header = file:readObject() + local object + if header[1] and header[1]:find('torch%.Cuda.*Tensor') then + local device = header[2] + if device > cutorch.getDeviceCount() then + print"Warning : model was saved with more devices than available on current host." + print"Attempting to load module onto device 1" + device = 1 + end + object = cutorch.withDevice(device, function() return file:readObject() end) + else + object = file:readObject() + end + + for k, v in pairs(object) do + self[k] = v + end +end + +function GPU:__tostring__() + if self.modules[1].__tostring__ then + return torch.type(self) .. '(' .. self.device ..') @ ' .. self.modules[1]:__tostring__() + else + return torch.type(self) .. '(' .. self.device ..') @ ' .. torch.type(self.modules[1]) + end +end + +function GPU:accUpdateGradParameters(input, gradOutput, lr) + error("Not Implemented for "..torch.type(self)) +end + +function GPU:sharedAccUpdateGradParameters(input, gradOutput, lr) + error("Not Implemented for "..torch.type(self)) +end diff --git a/contrib/lua-torch/nn/GatedLinearUnit.lua b/contrib/lua-torch/nn/GatedLinearUnit.lua new file mode 100644 index 000000000..5273abfd4 --- /dev/null +++ b/contrib/lua-torch/nn/GatedLinearUnit.lua @@ -0,0 +1,27 @@ +local GatedLinearUnit, parent = torch.class('nn.GatedLinearUnit', 'nn.Module') + +function GatedLinearUnit:__init(dim) + parent.__init(self) + self.dim = dim +end + +function GatedLinearUnit:updateOutput(input) + local dim = self.dim or input:dim() + input.THNN.GatedLinear_updateOutput( + input:cdata(), + self.output:cdata(), + dim + ) + return self.output +end + +function GatedLinearUnit:updateGradInput(input, gradOutput) + local dim = self.dim or input:dim() + input.THNN.GatedLinear_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + dim + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/GradientReversal.lua b/contrib/lua-torch/nn/GradientReversal.lua new file mode 100644 index 000000000..c08b1dfb0 --- /dev/null +++ b/contrib/lua-torch/nn/GradientReversal.lua @@ -0,0 +1,32 @@ +local GradientReversal, parent = torch.class('nn.GradientReversal', 'nn.Module') + +GradientReversal.__version = 2 + +function GradientReversal:__init(lambda) + lambda = lambda or 1 + parent.__init(self) + self.lambda = lambda +end + +function GradientReversal:setLambda(lambda) + self.lambda = lambda +end + +function GradientReversal:updateOutput(input) + self.output:set(input) + return self.output +end + +function GradientReversal:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(gradOutput) + self.gradInput:copy(gradOutput) + self.gradInput:mul(-self.lambda) + return self.gradInput +end + +function GradientReversal:read(file, version) + parent.read(self, file) + if version < 2 then + self.lambda = 1 + end +end diff --git a/contrib/lua-torch/nn/HardShrink.lua b/contrib/lua-torch/nn/HardShrink.lua new file mode 100644 index 000000000..85ff5909c --- /dev/null +++ b/contrib/lua-torch/nn/HardShrink.lua @@ -0,0 +1,25 @@ +local HardShrink, parent = torch.class('nn.HardShrink', 'nn.Module') + +function HardShrink:__init(lam) + parent.__init(self) + self.lambda = lam or 0.5 +end + +function HardShrink:updateOutput(input) + input.THNN.HardShrink_updateOutput( + input:cdata(), + self.output:cdata(), + self.lambda + ) + return self.output +end + +function HardShrink:updateGradInput(input, gradOutput) + input.THNN.HardShrink_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.lambda + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/HardTanh.lua b/contrib/lua-torch/nn/HardTanh.lua new file mode 100644 index 000000000..07cfc6255 --- /dev/null +++ b/contrib/lua-torch/nn/HardTanh.lua @@ -0,0 +1,37 @@ +local HardTanh, parent = torch.class('nn.HardTanh', 'nn.Module') + +function HardTanh:__init(min_value, max_value, inplace) + parent.__init(self) + self.min_val = min_value or -1 + self.max_val = max_value or 1 + self.inplace = inplace or false + if (inplace and type(inplace) ~= 'boolean') then + error('in-place flag must be boolean') + end + assert(self.max_val>self.min_val, 'max_value must be larger than min_value') +end + +function HardTanh:updateOutput(input) + self.min_val = self.min_val or -1 + self.max_val = self.max_val or 1 + input.THNN.HardTanh_updateOutput( + input:cdata(), + self.output:cdata(), + self.min_val, + self.max_val, + self.inplace or false + ) + return self.output +end + +function HardTanh:updateGradInput(input, gradOutput) + input.THNN.HardTanh_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.min_val, + self.max_val, + self.inplace or false + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/HingeEmbeddingCriterion.lua b/contrib/lua-torch/nn/HingeEmbeddingCriterion.lua new file mode 100644 index 000000000..13ad00f19 --- /dev/null +++ b/contrib/lua-torch/nn/HingeEmbeddingCriterion.lua @@ -0,0 +1,43 @@ +local HingeEmbeddingCriterion, parent = torch.class('nn.HingeEmbeddingCriterion', 'nn.Criterion') + +function HingeEmbeddingCriterion:__init(margin) + parent.__init(self) + self.margin = margin or 1 + self.sizeAverage = true +end + +function HingeEmbeddingCriterion:updateOutput(input,y) + self.buffer = self.buffer or input.new() + if not torch.isTensor(y) then + self.ty = self.ty or input.new():resize(1) + self.ty[1]=y + y=self.ty + end + + self.buffer:resizeAs(input):copy(input) + self.buffer[torch.eq(y, -1)] = 0 + self.output = self.buffer:sum() + + self.buffer:fill(self.margin):add(-1, input) + self.buffer:cmax(0) + self.buffer[torch.eq(y, 1)] = 0 + self.output = self.output + self.buffer:sum() + + if (self.sizeAverage == nil or self.sizeAverage == true) then + self.output = self.output / input:nElement() + end + + return self.output +end + +function HingeEmbeddingCriterion:updateGradInput(input, y) + if not torch.isTensor(y) then self.ty[1]=y; y=self.ty end + self.gradInput:resizeAs(input):copy(y) + self.gradInput[torch.cmul(torch.eq(y, -1), torch.gt(input, self.margin))] = 0 + + if (self.sizeAverage == nil or self.sizeAverage == true) then + self.gradInput:mul(1 / input:nElement()) + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Identity.lua b/contrib/lua-torch/nn/Identity.lua new file mode 100644 index 000000000..5e6ccb624 --- /dev/null +++ b/contrib/lua-torch/nn/Identity.lua @@ -0,0 +1,30 @@ +local Identity, _ = torch.class('nn.Identity', 'nn.Module') + +function Identity:updateOutput(input) + self.output = input + return self.output +end + + +function Identity:updateGradInput(input, gradOutput) + self.gradInput = gradOutput + return self.gradInput +end + +function Identity:clearState() + -- don't call set because it might reset referenced tensors + local function clear(f) + if self[f] then + if torch.isTensor(self[f]) then + self[f] = self[f].new() + elseif type(self[f]) == 'table' then + self[f] = {} + else + self[f] = nil + end + end + end + clear('output') + clear('gradInput') + return self +end diff --git a/contrib/lua-torch/nn/Index.lua b/contrib/lua-torch/nn/Index.lua new file mode 100644 index 000000000..6aa429708 --- /dev/null +++ b/contrib/lua-torch/nn/Index.lua @@ -0,0 +1,32 @@ +local Index, parent = torch.class('nn.Index', 'nn.Module') + +function Index:__init(dimension) + parent.__init(self) + self.dimension = dimension + self.gradInput = {self.gradInput, self.gradInput.new()} +end + +function Index:updateOutput(input) + local t = input[1] + local index = input[2] + self.output:index(t, self.dimension, index) + return self.output +end + +function Index:updateGradInput(input, gradOutput) + local t = input[1] + local index = input[2] + + self.gradInput[2]:resize(index:size()):zero() + local gradInput = self.gradInput[1] -- no gradient for the index variable + gradInput:resizeAs(t):zero() + gradInput:indexAdd(self.dimension, index, gradOutput) + return self.gradInput +end + +function Index:clearState() + self.gradInput[1]:set() + self.gradInput[2]:set() + self.output:set() + return self +end diff --git a/contrib/lua-torch/nn/IndexLinear.lua b/contrib/lua-torch/nn/IndexLinear.lua new file mode 100644 index 000000000..928e5d3f2 --- /dev/null +++ b/contrib/lua-torch/nn/IndexLinear.lua @@ -0,0 +1,398 @@ +local ffi = require 'ffi' +local IndexLinear, parent = torch.class('nn.IndexLinear', 'nn.Module') + + + +function IndexLinear:__init(inputSize, outputSize, doGradInput, keysOffset, weight, bias, normalize) + parent.__init(self) + + -- We need for 3 extra parameters per feature + -- if we normalize: + -- * The max-abs value + -- * The inverse of the max-abs value + -- * The per-feature bias + -- We keep an extra placeholder for further per learning rate feature manipulation. + -- So it's 4 total. + self.normalize = normalize and 4 or 0 + + -- This is important to keep the possibility of sharing a weight + -- directly, without having to allocate it first. + -- The reason is these weights can be very large. + self.weight = weight or torch.Tensor(inputSize, outputSize + self.normalize):zero() + self.bias = bias or torch.Tensor(outputSize):zero() + self.inputSize = self.weight and self.weight:size(1) or inputSize + self.outputSize = self.weight and (self.weight:size(2)-self.normalize) or outputSize + + -- gradWeight is not initialized as we're doing dense gradient accumulation + -- This is more efficient and avoids allocating a giant useless gradWeight + self.gradWeight = torch.Tensor() + + -- gradBias still works the same as it's already dense + self.gradBias = torch.Tensor(self.outputSize):zero() + + -- Buffers + self.gradWeightBuffer = torch.Tensor() + self.valuesBuffer = torch.Tensor() + self.normalizedValues = torch.Tensor() + + -- That is used to accumulate keys and gradWeight + -- when doing gradients accumulations + self.running = { + cumSumSizes = {}, + keys = {}, + gradWeight = {}, + counter = 1, + } + + -- self.sizes, self.cumSumSizes are calculated on the CPU even when using CUDA. + -- These two tables make it easier to resize these buffers instead of re-allocating them. + -- self.*Cache[1] always contains values on CPU. + -- If CUDA is being used, self.*Cache[2] contains values on GPU. + self.sizesCache = {} + self.cumSumSizesCache = {} + + -- A few options + self.weightDecay = 0 + self.doGradInput = doGradInput or false + self.offset = keysOffset and keysOffset-1 or -1 -- if this adds self.offset to indices +end + +-- Reset all the parameters needed +-- for normalization to 0 +function IndexLinear:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(2)) + end + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv):mul(0.000001) + if self.normalize and self.normalize > 0 then + self.weight[{{}, {1,self.normalize}}]:zero() + end +end + +function IndexLinear:reshapeInput(input) + assert(type(input) == 'table') + + local ninputs = 0 + for _, v in ipairs(input) do + ninputs = ninputs + 1 + end + + assert(ninputs == 2 or ninputs == 3) + + -- If format is: + -- { + -- torch.LongTensor(size1+size2+...+sizeN), -- concatenated batch of keys + -- torch.Tensor(size1+size2+...+sizeN), -- concatenated batch of values + -- torch.LongTensor(N), -- keys/values sizes (values are {size1, ..., sizeN}) + -- } + if ninputs == 3 then + local fkeys = input[1] + local fvals = input[2] + local fsizes = torch.isTensor(input[3]) and input[3] or fkeys.new{input[3]} + assert(fkeys:nElement() == fvals:nElement(), 'Keys and values should be of same size') + assert(fkeys:dim() == 1, 'Keys and values should be 1D') + self.isFlat = true + self.noBatch = false + return fkeys, fvals, fsizes + end + + local keys = input[1] + local values = input[2] + local lkeys, lvalues + + -- If format is: + -- { + -- { torch.LongTensor(size1), torch.LongTensor(size2), ..., torch.LongTensor(sizeN) }, -- batch of keys + -- { torch.Tensor(size1), torch.Tensor(size2), ..., torch.Tensor(sizeN) }, -- batch of values, + -- } + if type(keys) == 'table' and type(values) == 'table' then + lkeys, lvalues = keys, values + self.isFlat = false + self.noBatch = false + + -- If format is not a batch: + -- { + -- torch.LongTensor(size1), -- keys + -- torch.Tensor(size1), -- values, + -- } + elseif torch.isTensor(keys) and torch.isTensor(values) then + lkeys, lvalues = {keys}, {values} + self.isFlat = false + self.noBatch = true + else + error('Wrong input format.') + end + + for i=1,#lkeys do + assert(lvalues[i]:dim() == 1 and lkeys[i]:dim() == 1, "keys and values should be 1D") + end + + return lkeys, lvalues +end + +function IndexLinear:longTensor(...) + if (self:type() == 'torch.CudaTensor') then + return torch.CudaLongTensor(...) + else + return torch.LongTensor(...) + end +end + +function IndexLinear:flattenInputs(input) + local lkeys, lvalues, sizes = self:reshapeInput(input) + + local counter = self.running.counter + + -- Ensure everything is of the right type + local isCuda = (self:type() == 'torch.CudaTensor') + self.running.keys[counter] = self.running.keys[counter] or self:longTensor() + self.keys = self.running.keys[counter] + + if self.isFlat then + self.values = self.values or lvalues.new() + self.sizes = self.sizes or self:longTensor() + + self.keys:resize(lkeys:size()):copy(lkeys) + self.values:resize(lvalues:size()):copy(lvalues) + self.sizes = sizes + self.cumSumSizes = self.cumSumSizes or self.sizes.new() + self.cumSumSizes:cumsum(self.sizes) + else + self.values = self.values or lvalues[1].new() + + self.lkeys = lkeys + self.lvalues = lvalues + local batchSize = #self.lkeys + + self.sizesCache[1] = self.sizesCache[1] or torch.LongTensor(batchSize) + self.cumSumSizesCache[1] = self.cumSumSizesCache[1] or torch.LongTensor(batchSize) + + self.sizes = self.sizesCache[1] + self.cumSumSizes = self.cumSumSizesCache[1] + + self.sizes:resize(batchSize) + self.cumSumSizes:resize(batchSize) + + for i = 1,batchSize do + self.sizes[i] = self.lkeys[i]:size(1) + end + self.cumSumSizes:cumsum(self.sizes) + + self.keys:cat(self.lkeys, 1) + self.values:cat(self.lvalues, 1) + + if isCuda then + -- Get the GPU cache + self.sizesCache[2] = self.sizesCache[2] or torch.CudaLongTensor() + self.cumSumSizesCache[2] = self.cumSumSizesCache[2] or torch.CudaLongTensor() + + self.sizes = self.sizesCache[2] + self.cumSumSizes = self.cumSumSizesCache[2] + + -- Resize and copy to GPU + self.sizes:resize(batchSize):copy(self.sizesCache[1]) + self.cumSumSizes:resize(batchSize):copy(self.cumSumSizesCache[1]) + end + end + self.running.cumSumSizes[counter] = self.cumSumSizes +end + +function IndexLinear:updateOutput(input) + + self:flattenInputs(input) + + self.values.THNN.IndexLinear_updateOutput( + self.keys:cdata(), + self.offset, + self.values:cdata(), + self.sizes:cdata(), + self.cumSumSizes:cdata(), + self.output:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.normalizedValues:cdata(), + self.train and 1 or 0 + ) + + if self.noBatch then + self.output:resize(self.output:size(2)) + end + return self.output +end + +function IndexLinear:accUpdateGradParameters(input, gradOutput, scale) + self.values.THNN.IndexLinear_accUpdateGradParameters( + self.keys:cdata(), + self.offset, + self.normalize > 0 and self.normalizedValues:cdata() or self.values:cdata(), + self.sizes:cdata(), + self.cumSumSizes:cdata(), + gradOutput:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.weightDecay or 0, + scale or 1 + ) +end + +function IndexLinear:accGradParameters(input, gradOutput, scale) + + local counter = self.running.counter + + -- Same as the running.keys in the updateOutput function, + -- get a table of dense running.gradWeight + self.running.gradWeight[counter] = self.running.gradWeight[counter] or self.values.new() + self.values.THNN.IndexLinear_accGradParameters( + self.keys:cdata(), + self.offset, + self.normalize > 0 and self.normalizedValues:cdata() or self.values:cdata(), + self.sizes:cdata(), + self.cumSumSizes:cdata(), + gradOutput:cdata(), + self.running.gradWeight[counter]:cdata(), + self.gradBias:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.valuesBuffer:cdata(), + self.weightDecay or 0, + scale or 1 + ) + + -- Increment the running counter to create a new buffer + -- if we don't flush them in zerogradParameters + self.running.counter = self.running.counter + 1 +end + +function IndexLinear:updateGradInput(input, gradOutput) + self.gradInput = {} + -- Revamped from nn.SparseLinear.updateGradInput + if self.doGradInput and self.normalize > 0 then + error('updateGradInput is not implemented in max-normalize mode') + end + + local ini = self.weight:size(1) + + if self.doGradInput then + local gi = gradOutput.new() + if gradOutput:dim() == 1 then + gi:resize(self.weight:size(1)) + gi:mv(self.weight,gradOutput) + gi:resize(1, self.weight:size(1)) + elseif gradOutput:dim() == 2 then + gi:resize(gradOutput:size(1), self.weight:size(1)) + gi:mm(gradOutput, self.weight:t()) + end + + local indices = self.running.keys[1].new(ini):range(1, ini) + + if self.isFlat then + self.gradInput[1] = torch.repeatTensor(indices, gi:size(1), 1) + self.gradInput[2] = gi + else + self.gradInput[1] = {} + self.gradInput[2] = {} + for i = 1,gi:size(1) do + self.gradInput[1][i] = self.running.keys[1].new(ini) + self.gradInput[1][i]:copy(indices) + self.gradInput[2][i] = gradOutput.new(ini) + self.gradInput[2][i]:copy(gi[i]) + end + end + end + + if self.noBatch then + if self.isFlat then + self.gradInput = {self.gradInput[1]:resize(ini), self.gradInput[2]:resize(ini)} + else + self.gradInput = {self.gradInput[1][1], self.gradInput[2][1]} + end + end + return self.gradInput +end + +function IndexLinear:updateParameters(lr) + local counter = self.running.counter + if counter > 1 then + if counter == 2 then + self.updateKeys = self.running.keys[1] + self.gradWeight = self.running.gradWeight[1] + else + self.updateKeysBuffer = self.updateKeysBuffer or self:longTensor() + local lkeys = {} + local lgweights = {} + local totalSize = 0 + local lCumSumSizes = {} + for i=1,counter-1 do + lkeys[i] = self.running.keys[i] + -- Change layout to take advantage of the 1-D contiguous torch.cat + lgweights[i] = self.running.gradWeight[i]:contiguous() + lgweights[i]:resize(lgweights[i]:nElement()) + lCumSumSizes[i] = totalSize + self.running.cumSumSizes[i] + totalSize = totalSize + lkeys[i]:size(1) + end + + self.updateKeysBuffer:cat(lkeys, 1) + self.gradWeightBuffer:cat(lgweights, 1) + self.cumSumSizes:cat(lCumSumSizes, 1) + self.gradWeightBuffer:resize(totalSize, self.outputSize) + self.gradWeight = self.gradWeightBuffer + self.updateKeys = self.updateKeysBuffer + end + self.values.THNN.IndexLinear_updateParameters( + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.updateKeys:cdata(), + self.cumSumSizes:cdata(), + self.offset, + self.weightDecay or 0, + lr or error('You must specify a learning rate') + ) + end +end + +function IndexLinear:zeroGradParameters() + -- No need to do anything here as gradWeight is dense + self.gradBias:zero() + + -- The below piece of code would reset + -- the smart scaling parameters for each features + -- each time we call zeroGradParameters + -- TODO: decide what to do with that piece of code. + -- NB: this should be commented along with the corresponding + -- piece of code in lib/THNN/generic/IndexLinear.c, in the accUpdateGradParameters function. + + --[[ + local w = self.weight:select(2, 3) + if self.updateKeys and self.updateKeys:nElement() > 0 then + self.updateKeysBuffer:resizeAs(self.updateKeys):copy(self.updateKeys):add(self.offset+1) + w:indexFill(1, self.updateKeysBuffer, 0) + end + ]]-- + self.running.counter = 1 +end + +function IndexLinear:parameters() + return {self.weight, self.bias}, {self.running, self.gradBias} +end + +function IndexLinear:clearState() + self.running.keys = {} + self.running.gradWeight = {} + self.keys = nil + self.zerokeys = nil + self.updateKeys = nil + self.values = nil + self.sizes = nil + self.lkeys = {} + self.lvalues = {} + self.gradWeightBuffer = self.gradWeightBuffer.new() + self.valuesBuffer = self.valuesBuffer.new() + self.updateKeysBuffer = nil + self.values = nil + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Jacobian.lua b/contrib/lua-torch/nn/Jacobian.lua new file mode 100644 index 000000000..4f728b18c --- /dev/null +++ b/contrib/lua-torch/nn/Jacobian.lua @@ -0,0 +1,389 @@ +nn.Jacobian = {} + +function nn.Jacobian.backward(module, input, param, dparam) + local doparam = 0 + if param then + doparam = 1 + end + param = param or input + -- output deriv + module:forward(input) + local dout = module.output.new():resizeAs(module.output) + -- 1D view + local sdout = module.output.new(dout:storage(),1,dout:nElement()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero() + + for i=1,sdout:nElement() do + dout:zero() + sdout[i] = 1 + module:zeroGradParameters() + local din = module:updateGradInput(input, dout) + module:accGradParameters(input, dout) + if doparam == 1 then + jacobian:select(2,i):copy(dparam) + else + jacobian:select(2,i):copy(din) + end + end + return jacobian +end + +function nn.Jacobian.backwardUpdate(module, input, param) + + -- output deriv + module:forward(input) + local dout = module.output.new():resizeAs(module.output) + -- 1D view + local sdout = module.output.new(dout:storage(),1,dout:nElement()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero() + + -- original param + local params = module:parameters() + local origparams = {} + for j=1,#params do + table.insert(origparams, params[j]:clone()) + end + + for i=1,sdout:nElement() do + for j=1,#params do + params[j]:copy(origparams[j]) + end + dout:zero() + sdout[i] = 1 + module:updateGradInput(input, dout) + module:accUpdateGradParameters(input, dout, 1) + jacobian:select(2,i):copy(param) + end + + for j=1,#params do + params[j]:copy(origparams[j]) + end + + return jacobian +end + +function nn.Jacobian.forward(module, input, param, perturbation) + param = param or input + -- perturbation amount + perturbation = perturbation or 1e-6 + -- 1D view of input + --local tst = param:storage() + local sin = param.new(param):resize(param:nElement())--param.new(tst,1,tst:size()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement()) + + local outa = torch.Tensor(jacobian:size(2)) + local outb = torch.Tensor(jacobian:size(2)) + + for i=1,sin:nElement() do + local orig = sin[i] + sin[i] = orig - perturbation + outa:copy(module:forward(input)) + sin[i] = orig + perturbation + outb:copy(module:forward(input)) + sin[i] = orig + + outb:add(-1,outa):div(2*perturbation) + jacobian:select(1,i):copy(outb) + end + + return jacobian +end + +function nn.Jacobian.backwardDiagHessian(module, input, diagHessianParamName) + -- Compute the second derivatives (diagonal Hessian elements) + -- by backpropagation (using the code from hessian.lua). + -- + -- This function computes the diagonal Hessian elements of the following function: + -- + -- F(x_1, x_2, ..., x_n) = y_1^2/2 + y_2^2/2 + ... + y_m^2/2, + -- + -- where + -- x_1, ..., x_n are the input values and parameters of the given module, + -- y_1, ..., y_m are the output values of the given module. + -- + -- All x_i and y_i values are scalars here. In other words, + -- x_1, ..., x_n denote the scalar elements of the module input tensor, + -- the scalar elements of module.weight, + -- and the scalar elements of module.bias; + -- y_1, ..., y_m are the scalar elements of the module output tensor. + -- + -- The diagonal Hessian elements of F are computed with respect to + -- the module input values and parameters (x_1, .., x_n). + -- + -- The function F is chosen for its convenient properties: + -- + -- dF / dy_i = y_i, + -- d^2F / dy_i^2 = 1. + -- + -- In other words, the diagonal Hessian elements of F with respect + -- to the module OUTPUT values (y_1, ... y_m) are equal to 1. + -- + -- Because of that, computing the diagonal Hessian elements of F + -- with respect to the module INPUT values and PARAMETERS (x_1, ..., x_n) + -- can be done by calling updateDiagHessianInput() and accDiagHessianParameters() + -- using a tensor of ones as diagHessianOutput. + + module:forward(input) + local diagHessianOutput = module.output.new():resizeAs(module.output):fill(1) + + module.diagHessianWeight:zero() + module.diagHessianBias:zero() + module:updateDiagHessianInput(input, diagHessianOutput) + module:accDiagHessianParameters(input, diagHessianOutput) + + return module[diagHessianParamName] +end + +function nn.Jacobian.linearModuleDiagHessian(module, input, gradParamName) + -- Compute the second derivatives (diagonal Hessian elements) + -- from the first derivatives for the given module + -- (without using the code from hessian.lua). + -- + -- The given module is assumed to be linear with respect to its inputs and weights + -- (like nn.Linear, nn.SpatialConvolution, etc.) + -- + -- This function computes the diagonal Hessian elements of the following function: + -- + -- F(x_1, x_2, ..., x_n) = y_1^2/2 + y_2^2/2 + ... + y_m^2/2. + -- + -- (See the the comment for nn.Jacobian.backwardDiagHessian() for explanation.) + -- + -- The first derivatives of F with respect to + -- the module inputs and parameters (x_1, ..., x_n) are: + -- + -- dF / dx_i = \sum_k (dF / dy_k) (dy_k / dx_i). + -- + -- The second derivatives are: + -- + -- d^2F / dx_i = \sum_k [(d^2F / dy_k^2) (dy_k / dx_i)^2 + (dF / dy_k) (d^2y_k / dx_i^2)]. + -- + -- The second derivatives of F with respect to the module outputs (y_1, ..., y_m) + -- are equal to 1, so: + -- + -- d^2F / dx_i = \sum_k [(dy_k / dx_i)^2 + (dF / dy_k) (d^2y_k / dx_i^2)]. + -- + -- Assuming the linearity of module outputs (y_1, ..., y_m) + -- with respect to module inputs and parameters (x_1, ..., x_n), + -- we have (d^2y_k / dx_i^2) = 0, + -- and the expression finally becomes: + -- + -- d^2F / dx_i = \sum_k (dy_k / dx_i)^2. + -- + -- The first derivatives (dy_k / dx_i) are computed by normal backpropagation, + -- using updateGradInput() and accGradParameters(). + + local gradParam = module[gradParamName] + + local diagHessian = gradParam.new():resize(gradParam:nElement()):zero() + + module:forward(input) + local gradOutput = module.output.new():resizeAs(module.output) + local gradOutput1D = gradOutput:view(gradOutput:nElement()) + + for i=1,gradOutput:nElement() do + gradOutput1D:zero() + gradOutput1D[i] = 1 + module.gradWeight:zero() + if module.bias then + module.gradBias:zero() + end + module:updateGradInput(input, gradOutput) + module:accGradParameters(input, gradOutput) + diagHessian:addcmul(gradParam, gradParam) + end + + return diagHessian +end + +function nn.Jacobian.forwardUpdate(module, input, param, perturbation) + -- perturbation amount + perturbation = perturbation or 1e-6 + -- 1D view of input + --local tst = param:storage() + local sin = param.new(param):resize(param:nElement())--param.new(tst,1,tst:size()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement()) + + local outa = torch.Tensor(jacobian:size(2)) + local outb = torch.Tensor(jacobian:size(2)) + + for i=1,sin:nElement() do + local orig = sin[i] + sin[i] = orig - perturbation + outa:copy(module:forward(input)) + sin[i] = orig + perturbation + outb:copy(module:forward(input)) + sin[i] = orig + + outb:add(-1,outa):div(2*perturbation) + jacobian:select(1,i):copy(outb) + jacobian:select(1,i):mul(-1) + jacobian:select(1,i):add(sin[i]) + end + return jacobian +end + +function nn.Jacobian.testJacobian(module, input, minval, maxval, perturbation) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:copy(torch.rand(input:nElement()):mul(inrange):add(minval)) + local jac_fprop = nn.Jacobian.forward(module, input, input, perturbation) + local jac_bprop = nn.Jacobian.backward(module, input) + local error = jac_fprop-jac_bprop + return error:abs():max() +end + +function nn.Jacobian.testJacobianParameters(module, input, param, dparam, minval, maxval, perturbation) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:copy(torch.rand(input:nElement()):mul(inrange):add(minval)) + param:copy(torch.rand(param:nElement()):mul(inrange):add(minval)) + local jac_bprop = nn.Jacobian.backward(module, input, param, dparam) + local jac_fprop = nn.Jacobian.forward(module, input, param, perturbation) + local error = jac_fprop - jac_bprop + return error:abs():max() +end + +function nn.Jacobian.testJacobianUpdateParameters(module, input, param, minval, maxval, perturbation) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:copy(torch.rand(input:nElement()):mul(inrange):add(minval)) + param:copy(torch.rand(param:nElement()):mul(inrange):add(minval)) + local params_bprop = nn.Jacobian.backwardUpdate(module, input, param) + local params_fprop = nn.Jacobian.forwardUpdate(module, input, param, perturbation) + + local error = params_fprop - params_bprop + return error:abs():max() +end + +function nn.Jacobian.testDiagHessian(module, input, gradParamName, diagHessianParamName, minval, maxval) + -- Compute the diagonal Hessian elements for the same function in two different ways, + -- then compare the results and return the difference. + + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:copy(torch.rand(input:nElement()):mul(inrange):add(minval)) + module:initDiagHessianParameters() + local h_bprop = nn.Jacobian.backwardDiagHessian(module, input, diagHessianParamName) + local h_linearmodule = nn.Jacobian.linearModuleDiagHessian(module, input, gradParamName) + local error = h_bprop - h_linearmodule + return error:abs():max() +end + +function nn.Jacobian.testDiagHessianInput(module, input, minval, maxval) + return nn.Jacobian.testDiagHessian(module, input, 'gradInput', 'diagHessianInput', minval, maxval) +end + +function nn.Jacobian.testDiagHessianWeight(module, input, minval, maxval) + return nn.Jacobian.testDiagHessian(module, input, 'gradWeight', 'diagHessianWeight', minval, maxval) +end + +function nn.Jacobian.testDiagHessianBias(module, input, minval, maxval) + return nn.Jacobian.testDiagHessian(module, input, 'gradBias', 'diagHessianBias', minval, maxval) +end + +function nn.Jacobian.testIO(module,input, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + local inputclone = input:clone() + + -- run module + module:forward(input) + local go = module.output:clone():copy(torch.rand(module.output:nElement()):mul(inrange):add(minval)) + local goclone = go:clone() + module:zeroGradParameters() + module:updateGradInput(input,go) + module:accGradParameters(input,go) + + local fo = module.output:clone() + local bo = module.gradInput:clone() + + -- write module + local filename = os.tmpname() + local f = torch.DiskFile(filename, 'w'):binary() + -- call clearState and check that it returns itself + assert(module == module:clearState(),'clearState did not return self') + f:writeObject(module) + f:close() + -- read module + local m = torch.DiskFile(filename):binary():readObject() + m:forward(inputclone) + m:zeroGradParameters() + m:updateGradInput(inputclone,goclone) + m:accGradParameters(inputclone,goclone) + -- cleanup + os.remove(filename) + + local fo2 = m.output:clone() + local bo2 = m.gradInput:clone() + + local errf = fo - fo2 + local errb = bo - bo2 + return errf:abs():max(), errb:numel() == 0 and 0 or errb:abs():max() +end + +function nn.Jacobian.testAllUpdate(module, input, weight, gradWeight) + local gradOutput + local lr = torch.uniform(0.1, 1) + local errors = {} + + -- accGradParameters + local maccgp = module:clone() + local weightc = maccgp[weight]:clone() + maccgp:forward(input) + gradOutput = torch.rand(maccgp.output:size()) + maccgp:zeroGradParameters() + maccgp:updateGradInput(input, gradOutput) + maccgp:accGradParameters(input, gradOutput) + maccgp:updateParameters(lr) + errors["accGradParameters"] = (weightc-maccgp[gradWeight]*lr-maccgp[weight]):norm() + + -- accUpdateGradParameters + local maccugp = module:clone() + maccugp:forward(input) + maccugp:updateGradInput(input, gradOutput) + maccugp:accUpdateGradParameters(input, gradOutput, lr) + errors["accUpdateGradParameters"] = (maccugp[weight]-maccgp[weight]):norm() + + -- shared, accGradParameters + local macsh1 = module:clone() + local macsh2 = module:clone() + macsh2:share(macsh1, weight) + macsh1:forward(input) + macsh2:forward(input) + macsh1:zeroGradParameters() + macsh2:zeroGradParameters() + macsh1:updateGradInput(input, gradOutput) + macsh2:updateGradInput(input, gradOutput) + macsh1:accGradParameters(input, gradOutput) + macsh2:accGradParameters(input, gradOutput) + macsh1:updateParameters(lr) + macsh2:updateParameters(lr) + local err = (weightc-maccgp[gradWeight]*(lr*2)-macsh1[weight]):norm() + err = err + (weightc-maccgp[gradWeight]*(lr*2)-macsh2[weight]):norm() + errors["accGradParameters [shared]"] = err + + -- shared, accUpdateGradParameters + local macshu1 = module:clone() + local macshu2 = module:clone() + macshu2:share(macshu1, weight) + macshu1:forward(input) + macshu2:forward(input) + macshu1:updateGradInput(input, gradOutput) + macshu2:updateGradInput(input, gradOutput) + macshu1:accUpdateGradParameters(input, gradOutput, lr) + macshu2:accUpdateGradParameters(input, gradOutput, lr) + err = (weightc-maccgp[gradWeight]*(lr*2)-macshu1[weight]):norm() + err = err + (weightc-maccgp[gradWeight]*(lr*2)-macshu2[weight]):norm() + errors["accUpdateGradParameters [shared]"] = err + + return errors +end diff --git a/contrib/lua-torch/nn/JoinTable.lua b/contrib/lua-torch/nn/JoinTable.lua new file mode 100644 index 000000000..6ab68e189 --- /dev/null +++ b/contrib/lua-torch/nn/JoinTable.lua @@ -0,0 +1,74 @@ +local JoinTable, parent = torch.class('nn.JoinTable', 'nn.Module') + +function JoinTable:__init(dimension, nInputDims) + parent.__init(self) + self.size = torch.LongStorage() + self.dimension = dimension + self.gradInput = {} + self.nInputDims = nInputDims +end + +function JoinTable:_getPositiveDimension(input) + local dimension = self.dimension + if dimension < 0 then + dimension = input[1]:dim() + dimension + 1 + elseif self.nInputDims and input[1]:dim()==(self.nInputDims+1) then + dimension = dimension + 1 + end + return dimension +end + +function JoinTable:updateOutput(input) + local dimension = self:_getPositiveDimension(input) + + for i=1,#input do + local currentOutput = input[i] + if i == 1 then + self.size:resize(currentOutput:dim()):copy(currentOutput:size()) + else + self.size[dimension] = self.size[dimension] + + currentOutput:size(dimension) + end + end + self.output:resize(self.size) + + local offset = 1 + for i=1,#input do + local currentOutput = input[i] + self.output:narrow(dimension, offset, + currentOutput:size(dimension)):copy(currentOutput) + offset = offset + currentOutput:size(dimension) + end + return self.output +end + +function JoinTable:updateGradInput(input, gradOutput) + local dimension = self:_getPositiveDimension(input) + + for i=1,#input do + if self.gradInput[i] == nil then + self.gradInput[i] = input[i].new() + end + self.gradInput[i]:resizeAs(input[i]) + end + + -- clear out invalid gradInputs + for i=#input+1, #self.gradInput do + self.gradInput[i] = nil + end + + local offset = 1 + for i=1,#input do + local currentOutput = input[i] + local currentGradInput = gradOutput:narrow(dimension, offset, + currentOutput:size(dimension)) + self.gradInput[i]:copy(currentGradInput) + offset = offset + currentOutput:size(dimension) + end + return self.gradInput +end + +function JoinTable:type(type, tensorCache) + self.gradInput = {} + return parent.type(self, type, tensorCache) +end diff --git a/contrib/lua-torch/nn/Kmeans.lua b/contrib/lua-torch/nn/Kmeans.lua new file mode 100644 index 000000000..56066b63d --- /dev/null +++ b/contrib/lua-torch/nn/Kmeans.lua @@ -0,0 +1,215 @@ +-- Online (Hard) Kmeans layer. +local Kmeans, parent = torch.class('nn.Kmeans', 'nn.Module') + +function Kmeans:__init(k, dim, scale) + parent.__init(self) + self.k = k + self.dim = dim + + -- scale for online kmean update + self.scale = scale + + assert(k > 0, "Clusters cannot be 0 or negative.") + assert(dim > 0, "Dimensionality cannot be 0 or negative.") + + -- Kmeans centers -> self.weight + self.weight = torch.Tensor(self.k, self.dim) + + self.gradWeight = torch.Tensor(self.weight:size()) + self.loss = 0 -- within cluster error of the last forward + + self.clusterSampleCount = torch.Tensor(self.k) + + self:reset() +end + +-- Reset +function Kmeans:reset(stdev) + stdev = stdev or 1 + self.weight:uniform(-stdev, stdev) +end + +-- Initialize Kmeans weight with random samples from input. +function Kmeans:initRandom(input) + local inputDim = input:nDimension() + assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") + + local noOfSamples = input:size(1) + local dim = input:size(2) + assert(dim == self.dim, "Dimensionality of input and weight don't match.") + assert(noOfSamples >= self.k, "Need atleast k samples for initialization.") + + local indices = torch.zeros(self.k) + indices:random(1, noOfSamples) + + for i=1, self.k do + self.weight[i]:copy(input[indices[i]]) + end +end + +-- Initialize using Kmeans++ +function Kmeans:initKmeansPlus(input, p) + self.p = p or self.p or 0.95 + assert(self.p>=0 and self.p<=1, "P value should be between 0-1.") + + local inputDim = input:nDimension() + assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") + local noOfSamples = input:size(1) + + local pcount = math.ceil((1-self.p)*noOfSamples) + if pcount <= 0 then pcount = 1 end + + local initializedK = 1 + self.weight[initializedK]:copy(input[torch.random(noOfSamples)]) + initializedK = initializedK + 1 + + local clusters = self.weight.new() + local clusterDistances = self.weight.new() + local temp = self.weight.new() + local expandedSample = self.weight.new() + local distances = self.weight.new() + distances:resize(noOfSamples):fill(math.huge) + local maxScores = self.weight.new() + local maxIndx = self.weight.new() + + for k=initializedK, self.k do + clusters = self.weight[{{initializedK-1, initializedK-1}}] + for i=1, noOfSamples do + temp:expand(input[{{i}}], 1, self.dim) + expandedSample:resize(temp:size()):copy(temp) + + -- Squared Euclidean distance + expandedSample:add(-1, clusters) + clusterDistances:norm(expandedSample, 2, 2) + clusterDistances:pow(2) + distances[i] = math.min(clusterDistances:min(), distances[i]) + end + maxScores, maxIndx = distances:sort(true) + local tempIndx = torch.random(pcount) + local indx = maxIndx[tempIndx] + self.weight[initializedK]:copy(input[indx]) + initializedK = initializedK + 1 + end +end + +local function isCudaTensor(tensor) + local typename = torch.typename(tensor) + if typename and typename:find('torch.Cuda*Tensor') then + return true + end + return false +end + +-- Kmeans updateOutput (forward) +function Kmeans:updateOutput(input) + local inputDim = input:nDimension() + assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") + + local batchSize = input:size(1) + local dim = input:size(2) + assert(dim == self.dim, "Dimensionality of input and weight don't match.") + + assert(input:isContiguous(), "Input is not contiguous.") + + -- a sample copied k times to compute distance between sample and weight + self._expandedSamples = self._expandedSamples or self.weight.new() + + -- distance between a sample and weight + self._clusterDistances = self._clusterDistances or self.weight.new() + + self._temp = self._temp or input.new() + self._tempExpanded = self._tempExpanded or input.new() + + -- Expanding inputs + self._temp:view(input, 1, batchSize, self.dim) + self._tempExpanded:expand(self._temp, self.k, batchSize, self.dim) + self._expandedSamples:resize(self.k, batchSize, self.dim) + :copy(self._tempExpanded) + + -- Expanding weights + self._tempWeight = self._tempWeight or self.weight.new() + self._tempWeightExp = self._tempWeightExp or self.weight.new() + self._expandedWeight = self._expanedWeight or self.weight.new() + self._tempWeight:view(self.weight, self.k, 1, self.dim) + self._tempWeightExp:expand(self._tempWeight, self._expandedSamples:size()) + self._expandedWeight:resize(self.k, batchSize, self.dim) + :copy(self._tempWeightExp) + + -- x-c + self._expandedSamples:add(-1, self._expandedWeight) + -- Squared Euclidean distance + self._clusterDistances:norm(self._expandedSamples, 2, 3) + self._clusterDistances:pow(2) + self._clusterDistances:resize(self.k, batchSize) + + self._minScore = self._minScore or self.weight.new() + self._minIndx = self._minIndx or (isCudaTensor(input) and torch.CudaLongTensor() or torch.LongTensor()) + self._minScore:min(self._minIndx, self._clusterDistances, 1) + self._minIndx:resize(batchSize) + + self.output:resize(batchSize):copy(self._minIndx) + self.loss = self._minScore:sum() + + return self.output +end + +-- Kmeans has its own criterion hence gradInput are zeros +function Kmeans:updateGradInput(input, gradOuput) + self.gradInput:resize(input:size()):zero() + + return self.gradInput +end + +-- We define kmeans update rule as c -> c + scale * 1/n * sum_i (x-c). +-- n is no. of x's belonging to c. +-- With this update rule and gradient descent will be negative the gradWeights. +function Kmeans:accGradParameters(input, gradOutput, scale) + local scale = self.scale or scale or 1 + assert(scale > 0 , " Scale has to be positive.") + + -- Update cluster sample count + local batchSize = input:size(1) + self._cscAdder = self._cscAdder or self.weight.new() + self._cscAdder:resize(batchSize):fill(1) + self.clusterSampleCount:zero() + self.clusterSampleCount:indexAdd(1, self._minIndx, self._cscAdder) + + -- scale * (x[k]-c[k]) where k is nearest cluster to x + self._gradWeight = self._gradWeight or self.gradWeight.new() + self._gradWeight:index(self.weight, 1, self._minIndx) + self._gradWeight:mul(-1) + self._gradWeight:add(input) + self._gradWeight:mul(-scale) + + self._gradWeight2 = self._gradWeight2 or self.gradWeight.new() + self._gradWeight2:resizeAs(self.gradWeight):zero() + self._gradWeight2:indexAdd(1, self._minIndx, self._gradWeight) + + -- scale/n * sum_i (x-c) + self._ccounts = self._ccounts or self.clusterSampleCount.new() + self._ccounts:resize(self.k):copy(self.clusterSampleCount) + self._ccounts:add(0.0000001) -- prevent division by zero errors + + self._gradWeight2:cdiv(self._ccounts:view(self.k,1):expandAs(self.gradWeight)) + + self.gradWeight:add(self._gradWeight2) +end + +function Kmeans:clearState() + -- prevent premature memory allocations + self._expandedSamples = nil + self._clusterDistances = nil + self._temp = nil + self._tempExpanded = nil + self._tempWeight = nil + self._tempWeightExp = nil + self._expandedWeight = nil + self._minScore = nil + self._minIndx = nil + self._cscAdder = nil +end + +function Kmeans:type(type, tensorCache) + self:clearState() + return parent.type(self, type, tensorCache) +end diff --git a/contrib/lua-torch/nn/L1Cost.lua b/contrib/lua-torch/nn/L1Cost.lua new file mode 100644 index 000000000..6b58e0ec9 --- /dev/null +++ b/contrib/lua-torch/nn/L1Cost.lua @@ -0,0 +1,30 @@ +local THNN = require 'nn.THNN' +local L1Cost, parent = torch.class('nn.L1Cost','nn.Criterion') + +function L1Cost:__init() + parent.__init(self) +end + +function L1Cost:updateOutput(input) + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.L1Cost_updateOutput( + input:cdata(), + self.output_tensor:cdata() + ) + self.output = self.output_tensor[1] + return self.output +end + +function L1Cost:updateGradInput(input) + input.THNN.L1Cost_updateGradInput( + input:cdata(), + THNN.NULL, + self.gradInput:cdata() + ) + return self.gradInput +end + +function L1Cost:clearState() + if self.output_tensor then self.output_tensor:set() end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/L1HingeEmbeddingCriterion.lua b/contrib/lua-torch/nn/L1HingeEmbeddingCriterion.lua new file mode 100644 index 000000000..6957278f5 --- /dev/null +++ b/contrib/lua-torch/nn/L1HingeEmbeddingCriterion.lua @@ -0,0 +1,41 @@ +local L1HingeEmbeddingCriterion, parent = torch.class('nn.L1HingeEmbeddingCriterion', 'nn.Criterion') + +function L1HingeEmbeddingCriterion:__init(margin) + parent.__init(self) + margin = margin or 1 + self.margin = margin + self.gradInput = {torch.Tensor(), torch.Tensor()} +end + +function L1HingeEmbeddingCriterion:updateOutput(input,y) + self.output=input[1]:dist(input[2],1); + if y == -1 then + self.output = math.max(0,self.margin - self.output); + end + return self.output +end + + +local function mathsign(t) + if t>0 then return 1; end + if t<0 then return -1; end + return 2*torch.random(2)-3; +end + +function L1HingeEmbeddingCriterion:updateGradInput(input, y) + self.gradInput[1]:resizeAs(input[1]) + self.gradInput[2]:resizeAs(input[2]) + self.gradInput[1]:copy(input[1]) + self.gradInput[1]:add(-1, input[2]) + local dist = self.gradInput[1]:norm(1); + self.gradInput[1]:apply(mathsign) -- L1 gradient + if y == -1 then -- just to avoid a mul by 1 + if dist > self.margin then + self.gradInput[1]:zero() + else + self.gradInput[1]:mul(-1) + end + end + self.gradInput[2]:zero():add(-1, self.gradInput[1]) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/L1Penalty.lua b/contrib/lua-torch/nn/L1Penalty.lua new file mode 100644 index 000000000..9ee6b35ff --- /dev/null +++ b/contrib/lua-torch/nn/L1Penalty.lua @@ -0,0 +1,42 @@ +local L1Penalty, parent = torch.class('nn.L1Penalty','nn.Module') + +--This module acts as an L1 latent state regularizer, adding the +--[gradOutput] to the gradient of the L1 loss. The [input] is copied to +--the [output]. + +function L1Penalty:__init(l1weight, sizeAverage, provideOutput) + parent.__init(self) + self.l1weight = l1weight + self.sizeAverage = sizeAverage or false + if provideOutput == nil then + self.provideOutput = true + else + self.provideOutput = provideOutput + end +end + +function L1Penalty:updateOutput(input) + local m = self.l1weight + if self.sizeAverage == true then + m = m/input:nElement() + end + local loss = m*input:norm(1) + self.loss = loss + self.output = input + return self.output +end + +function L1Penalty:updateGradInput(input, gradOutput) + local m = self.l1weight + if self.sizeAverage == true then + m = m/input:nElement() + end + + self.gradInput:resizeAs(input):copy(input):sign():mul(m) + + if self.provideOutput == true then + self.gradInput:add(gradOutput) + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/LayerNormalization.lua b/contrib/lua-torch/nn/LayerNormalization.lua new file mode 100644 index 000000000..722d7c802 --- /dev/null +++ b/contrib/lua-torch/nn/LayerNormalization.lua @@ -0,0 +1,27 @@ +-- Reference: https://arxiv.org/pdf/1607.06450.pdf (Section 3) + +local LayerNormalization, parent = torch.class('nn.LayerNormalization', 'nn.Sequential') +function LayerNormalization:__init(nOutput, bias, eps, affine) + parent.__init(self) + eps = eps or 1e-10 + affine = (affine == nil) and true or affine + bias = bias or 0 + + self:add(nn.ConcatTable() + :add(nn.Identity()) + :add(nn.Sequential() + :add(nn.Mean(1, 1)) + :add(nn.Replicate(nOutput,1,1)))) + :add(nn.CSubTable()) + :add(nn.Normalize(2, eps)) + :add(nn.MulConstant(torch.sqrt(nOutput))) + + if affine then + local biasTransform = nn.Add(nOutput, false) + biasTransform.bias:fill(bias) + local gainTransform = nn.CMul(nOutput) + gainTransform.weight:fill(1.) + self:add(gainTransform) + self:add(biasTransform) + end +end diff --git a/contrib/lua-torch/nn/LeakyReLU.lua b/contrib/lua-torch/nn/LeakyReLU.lua new file mode 100644 index 000000000..56b7f2542 --- /dev/null +++ b/contrib/lua-torch/nn/LeakyReLU.lua @@ -0,0 +1,41 @@ +local LeakyReLU, parent = torch.class('nn.LeakyReLU','nn.Module') + +function LeakyReLU:__init(negval,ip) + parent.__init(self) + if type(negval) == 'boolean' then + local ip = negval + self.negval = 1/100 + else + self.negval = negval or (1/100) + end + -- default for inplace is false + self.inplace = ip or false + if self.negval < 0 then + self.inplace = false + end +end + +function LeakyReLU:updateOutput(input) + input.THNN.LeakyReLU_updateOutput( + input:cdata(), + self.output:cdata(), + self.negval, + self.inplace + ) + return self.output +end + +function LeakyReLU:updateGradInput(input, gradOutput) + input.THNN.LeakyReLU_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.negval, + self.inplace + ) + return self.gradInput +end + +function LeakyReLU:__tostring__() + return torch.type(self) .. string.format('(%g)', self.negval) +end diff --git a/contrib/lua-torch/nn/Linear.lua b/contrib/lua-torch/nn/Linear.lua new file mode 100644 index 000000000..09b5979ce --- /dev/null +++ b/contrib/lua-torch/nn/Linear.lua @@ -0,0 +1,122 @@ +local Linear, parent = torch.class('nn.Linear', 'nn.Module') + +function Linear:__init(inputSize, outputSize, bias) + parent.__init(self) + local bias = ((bias == nil) and true) or bias + self.weight = torch.Tensor(outputSize, inputSize) + self.gradWeight = torch.Tensor(outputSize, inputSize) + if bias then + self.bias = torch.Tensor(outputSize) + self.gradBias = torch.Tensor(outputSize) + end + self:reset() +end + +function Linear:noBias() + self.bias = nil + self.gradBias = nil + return self +end + +function Linear:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(2)) + end + if nn.oldSeed then + for i=1,self.weight:size(1) do + self.weight:select(1, i):apply(function() + return torch.uniform(-stdv, stdv) + end) + end + if self.bias then + for i=1,self.bias:nElement() do + self.bias[i] = torch.uniform(-stdv, stdv) + end + end + else + self.weight:uniform(-stdv, stdv) + if self.bias then self.bias:uniform(-stdv, stdv) end + end + return self +end + +function Linear:updateAddBuffer(input) + local nframe = input:size(1) + self.addBuffer = self.addBuffer or input.new() + if self.addBuffer:nElement() ~= nframe then + self.addBuffer:resize(nframe):fill(1) + end +end + +function Linear:updateOutput(input) + if input:dim() == 1 then + self.output:resize(self.weight:size(1)) + if self.bias then self.output:copy(self.bias) else self.output:zero() end + self.output:addmv(1, self.weight, input) + elseif input:dim() == 2 then + local nframe = input:size(1) + local nElement = self.output:nElement() + self.output:resize(nframe, self.weight:size(1)) + if self.output:nElement() ~= nElement then + self.output:zero() + end + self:updateAddBuffer(input) + self.output:addmm(0, self.output, 1, input, self.weight:t()) + if self.bias then self.output:addr(1, self.addBuffer, self.bias) end + else + error('input must be vector or matrix') + end + + return self.output +end + +function Linear:updateGradInput(input, gradOutput) + if self.gradInput then + + local nElement = self.gradInput:nElement() + self.gradInput:resizeAs(input) + if self.gradInput:nElement() ~= nElement then + self.gradInput:zero() + end + if input:dim() == 1 then + self.gradInput:addmv(0, 1, self.weight:t(), gradOutput) + elseif input:dim() == 2 then + self.gradInput:addmm(0, 1, gradOutput, self.weight) + end + + return self.gradInput + end +end + +function Linear:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + if input:dim() == 1 then + self.gradWeight:addr(scale, gradOutput, input) + if self.bias then self.gradBias:add(scale, gradOutput) end + elseif input:dim() == 2 then + self.gradWeight:addmm(scale, gradOutput:t(), input) + if self.bias then + -- update the size of addBuffer if the input is not the same size as the one we had in last updateGradInput + self:updateAddBuffer(input) + self.gradBias:addmv(scale, gradOutput:t(), self.addBuffer) + end + end +end + +function Linear:sharedAccUpdateGradParameters(input, gradOutput, lr) + -- we do not need to accumulate parameters when sharing: + self:defaultAccUpdateGradParameters(input, gradOutput, lr) +end + +function Linear:clearState() + if self.addBuffer then self.addBuffer:set() end + return parent.clearState(self) +end + +function Linear:__tostring__() + return torch.type(self) .. + string.format('(%d -> %d)', self.weight:size(2), self.weight:size(1)) .. + (self.bias == nil and ' without bias' or '') +end diff --git a/contrib/lua-torch/nn/LinearWeightNorm.lua b/contrib/lua-torch/nn/LinearWeightNorm.lua new file mode 100755 index 000000000..a712f5535 --- /dev/null +++ b/contrib/lua-torch/nn/LinearWeightNorm.lua @@ -0,0 +1,168 @@ +local LinearWeightNorm, parent = torch.class('nn.LinearWeightNorm', 'nn.Linear') + +function LinearWeightNorm:__init(inputSize, outputSize, bias, eps) + nn.Module.__init(self) -- Skip nn.Linear constructor + + local bias = ((bias == nil) and true) or bias + + self.eps = eps or 1e-16 + + self.outputSize = outputSize + self.inputSize = inputSize + + self.v = torch.Tensor(outputSize, inputSize) + self.gradV = torch.Tensor(outputSize, inputSize) + + self.weight = torch.Tensor(outputSize, inputSize) + + self.g = torch.Tensor(outputSize,1) + self.gradG = torch.Tensor(outputSize,1) + + self.norm = torch.Tensor(outputSize,1) + self.scale = torch.Tensor(outputSize,1) + + if bias then + self.bias = torch.Tensor(outputSize) + self.gradBias = torch.Tensor(outputSize) + end + + self:reset() +end + +function LinearWeightNorm:evaluate() + if self.train ~= false then + self:updateWeightMatrix() + end + + parent.evaluate(self) +end + +function LinearWeightNorm:initFromWeight(weight) + weight = weight or self.weight + + self.g:norm(weight,2,2):clamp(self.eps,math.huge) + self.v:copy(weight) + + return self +end + +function LinearWeightNorm.fromLinear(linear) + local module = nn.LinearWeightNorm(linear.weight:size(2), linear.weight:size(1), torch.isTensor(linear.bias)) + module.weight:copy(linear.weight) + module:initFromWeight() + + if linear.bias then + module.bias:copy(linear.bias) + end + + return module +end + +function LinearWeightNorm:toLinear() + self:updateWeightMatrix() + + local module = nn.Linear(self.inputSize, self.outputSize, torch.isTensor(self.bias)) + + module.weight:copy(self.weight) + if self.bias then + module.bias:copy(self.bias) + end + + return module +end + +function LinearWeightNorm:parameters() + if self.bias then + return {self.v, self.g, self.bias}, {self.gradV, self.gradG, self.gradBias} + else + return {self.v, self.g}, {self.gradV, self.gradG} + end +end + +function LinearWeightNorm:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1 / math.sqrt(self.inputSize) + end + + self.weight:uniform(-stdv,stdv) + self:initFromWeight() + + if self.bias then + self.bias:uniform(-stdv,stdv) + end +end + +function LinearWeightNorm:updateWeightMatrix() + if self.norm:dim() == 0 then self.norm:resizeAs(self.g) end + if self.scale:dim() == 0 then self.scale:resizeAs(self.g) end + if self.weight:dim() == 0 then self.weight:resizeAs(self.v) end + + self.norm:norm(self.v,2,2):clamp(self.eps,math.huge) + self.scale:cdiv(self.g,self.norm) + self.weight:cmul(self.v,self.scale:expandAs(self.v)) +end + +function LinearWeightNorm:updateOutput(input) + if self.train ~= false then + self:updateWeightMatrix() + end + + return parent.updateOutput(self, input) +end + +function LinearWeightNorm:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + if input:dim() == 1 then + self.gradV:addr(scale, gradOutput, input) + if self.bias then self.gradBias:add(scale, gradOutput) end + elseif input:dim() == 2 then + self.gradV:addmm(scale, gradOutput:t(), input) + if self.bias then + -- update the size of addBuffer if the input is not the same size as the one we had in last updateGradInput + self:updateAddBuffer(input) + self.gradBias:addmv(scale, gradOutput:t(), self.addBuffer) + end + end + + local scale = self.scale:expandAs(self.v) + local norm = self.norm:expandAs(self.v) + + self.weight:cmul(self.gradV,self.v):cdiv(norm) + self.gradG:sum(self.weight,2) + + self.gradV:cmul(scale) + + self.weight:cmul(self.v,scale):cdiv(norm) + self.weight:cmul(self.gradG:expandAs(self.weight)) + + self.gradV:add(-1,self.weight) +end + +function LinearWeightNorm:defaultAccUpdateGradParameters(input, gradOutput, lr) + local gradV = self.gradV + local gradG = self.gradG + local gradBias = self.gradBias + + self.gradV = self.v + self.gradG = self.g + self.gradBias = self.bias + + self:accGradParameters(input, gradOutput, -lr) + + self.gradV = gradV + self.gradG = gradG + self.gradBias = gradBias +end + +function LinearWeightNorm:clearState() + nn.utils.clear(self, 'weight', 'norm', 'scale') + return parent.clearState(self) +end + +function LinearWeightNorm:__tostring__() + return torch.type(self) .. + string.format('(%d -> %d)', self.inputSize, self.outputSize) .. + (self.bias == nil and ' without bias' or '') +end
\ No newline at end of file diff --git a/contrib/lua-torch/nn/Log.lua b/contrib/lua-torch/nn/Log.lua new file mode 100644 index 000000000..e8f236bfb --- /dev/null +++ b/contrib/lua-torch/nn/Log.lua @@ -0,0 +1,20 @@ +local Log, parent = torch.class('nn.Log', 'nn.Module') + +function Log:__init() + parent.__init(self) +end + +function Log:updateOutput(input) + self.output:resizeAs(input) + self.output:copy(input) + self.output:log() + return self.output +end + +function Log:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input) + self.gradInput:fill(1) + self.gradInput:cdiv(input) + self.gradInput:cmul(gradOutput) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/LogSigmoid.lua b/contrib/lua-torch/nn/LogSigmoid.lua new file mode 100644 index 000000000..cab848f4d --- /dev/null +++ b/contrib/lua-torch/nn/LogSigmoid.lua @@ -0,0 +1,27 @@ +local LogSigmoid, parent = torch.class('nn.LogSigmoid', 'nn.Module') + +function LogSigmoid:updateOutput(input) + self.buffer = self.buffer or input.new() + input.THNN.LogSigmoid_updateOutput( + input:cdata(), + self.output:cdata(), + self.buffer:cdata() + ) + return self.output +end + +function LogSigmoid:updateGradInput(input, gradOutput) + input.THNN.LogSigmoid_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.buffer:cdata() + ) + return self.gradInput +end + +function LogSigmoid:clearState() + if self.buffer then self.buffer:set() end + return parent.clearState(self) +end + diff --git a/contrib/lua-torch/nn/LogSoftMax.lua b/contrib/lua-torch/nn/LogSoftMax.lua new file mode 100644 index 000000000..37c8acae4 --- /dev/null +++ b/contrib/lua-torch/nn/LogSoftMax.lua @@ -0,0 +1,19 @@ +local LogSoftMax = torch.class('nn.LogSoftMax', 'nn.Module') + +function LogSoftMax:updateOutput(input) + input.THNN.LogSoftMax_updateOutput( + input:cdata(), + self.output:cdata() + ) + return self.output +end + +function LogSoftMax:updateGradInput(input, gradOutput) + input.THNN.LogSoftMax_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/LookupTable.lua b/contrib/lua-torch/nn/LookupTable.lua new file mode 100644 index 000000000..6cffc6c3e --- /dev/null +++ b/contrib/lua-torch/nn/LookupTable.lua @@ -0,0 +1,166 @@ +local THNN = require 'nn.THNN' +local LookupTable, parent = torch.class('nn.LookupTable', 'nn.Module') + +LookupTable.__version = 4 + +function LookupTable:__init(nIndex, nOutput, paddingValue, maxNorm, normType) + parent.__init(self) + + self.weight = torch.Tensor(nIndex, nOutput) + self.gradWeight = torch.Tensor(nIndex, nOutput):zero() + self.paddingValue = paddingValue or 0 + self.maxNorm = maxNorm or nil + self.normType = normType or nil + + self:reset() +end + +function LookupTable:backCompatibility() + self._count = self._count or torch.IntTensor() + self._input = self._input or torch.LongTensor() + + if not self.shouldScaleGradByFreq then + self.shouldScaleGradByFreq = false + end +end + +function LookupTable:accUpdateOnly() + self.gradWeight = nil + return self +end + +function LookupTable:setPadding(paddingValue) + self.paddingValue = paddingValue + return self +end + +function LookupTable:setMaxNorm(maxNorm) + self.maxNorm = maxNorm + return self +end + +function LookupTable:setNormType(normType) + self.normType = normType + return self +end + +function LookupTable:scaleGradByFreq() + self.shouldScaleGradByFreq = true + return self +end + +function LookupTable:reset(stdv) + stdv = stdv or 1 + self.weight:normal(0, stdv) +end + +function LookupTable:makeInputContiguous(input) + -- make sure input is a contiguous torch.LongTensor + if (not input:isContiguous()) or torch.type(input) ~= torch.type(self._input) then + self.copiedInput = true + self._input:resize(input:size()):copy(input) + return self._input + end + self.copiedInput = false + return input +end + +function LookupTable:updateOutput(input) + self:backCompatibility() + self:renorm(input) + input = self:makeInputContiguous(input) + if input:dim() == 1 then + self.output:index(self.weight, 1, input) + elseif input:dim() == 2 then + self.output:index(self.weight, 1, input:view(-1)) + self.output = self.output:view(input:size(1), input:size(2), self.weight:size(2)) + else + error("input must be a vector or matrix") + end + return self.output +end + +function LookupTable:updateGradInput(input, gradOutput) + -- the input can be of any type (as in the forward it's + -- converted anyway to LongTensor) thus, need to allocate + -- new memory each time the user changes the input type + if torch.type(self.gradInput) ~= torch.type(input) then + self.gradInput = input.new() + end + if not self.gradInput:isSameSizeAs(input) then + self.gradInput:resizeAs(input):zero() + end + return self.gradInput +end + +function LookupTable:accGradParameters(input, gradOutput, scale) + self:backCompatibility() + input = self.copiedInput and self._input or input + if input:dim() == 2 then + input = input:view(-1) + elseif input:dim() ~= 1 then + error("input must be a vector or matrix") + end + + self.gradWeight.THNN.LookupTable_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + self._count:cdata(), + THNN.optionalTensor(self._sorted), + THNN.optionalTensor(self._indices), + self.shouldScaleGradByFreq or false, + self.paddingValue or 0, + scale or 1 + ) +end + +function LookupTable:renorm(input) + if not self.maxNorm then + return + end + -- copy input into _input, so _input is continuous. + -- The copied _input will be modified in the C code. + self._input:resize(input:size()):copy(input) + local row_idx = self._input + if row_idx:dim() == 2 then + row_idx = row_idx:view(-1) + elseif row_idx:dim() ~= 1 then + error("input must be a vector or matrix") + end + -- "row_idx" and "weight" will be modified in the C code + self.weight.THNN.LookupTable_renorm( + row_idx:cdata(), + self.weight:cdata(), + self.maxNorm, + self.normType or 2 + ) +end + +function LookupTable:type(type, tensorCache) + parent.type(self, type, tensorCache) + + if type and type:find('torch%.Cuda.*Tensor') then + -- CUDA uses _sorted and _indices temporary tensors + self._sorted = torch.CudaLongTensor and torch.CudaLongTensor.new() or torch.CudaTensor.new() + self._indices = torch.CudaLongTensor and torch.CudaLongTensor.new() or torch.CudaTensor.new() + self._count = torch.CudaLongTensor and torch.CudaLongTensor.new() or torch.CudaTensor.new() + self._input = torch.CudaLongTensor and torch.CudaLongTensor.new() or torch.CudaTensor.new() + else + -- self._count and self._input should only be converted if using Cuda + self._count = torch.IntTensor() + self._input = torch.LongTensor() + end + + return self +end + +function LookupTable:clearState() + nn.utils.clear(self, '_count', '_input') + return parent.clearState(self) +end + +function LookupTable:sharedAccUpdateGradParameters(input, gradOutput, lr) + -- we do not need to accumulate parameters when sharing: + self:defaultAccUpdateGradParameters(input, gradOutput, lr) +end diff --git a/contrib/lua-torch/nn/MM.lua b/contrib/lua-torch/nn/MM.lua new file mode 100644 index 000000000..cc978c8cb --- /dev/null +++ b/contrib/lua-torch/nn/MM.lua @@ -0,0 +1,92 @@ +--[[ Module to perform matrix multiplication on two minibatch inputs, + producing a minibatch. +]] + +local MM, parent = torch.class('nn.MM', 'nn.Module') + +--[[ The constructor takes two optional arguments, specifying whether or not transpose + any of the input matrices before perfoming the multiplication. +]] +function MM:__init(transA, transB) + parent.__init(self) + + self.transA = transA or false + self.transB = transB or false + + self.gradInput = {torch.Tensor(), torch.Tensor()} +end + +function MM:updateOutput(input) + assert(#input == 2, 'input must be a pair of minibatch matrices') + local a, b = table.unpack(input) + assert(a:nDimension() == 2 or a:nDimension() == 3, 'input tensors must be 2D or 3D') + + if a:nDimension() == 2 then + assert(b:nDimension() == 2, 'second input tensor must be 2D') + + if self.transA then a = a:t() end + if self.transB then b = b:t() end + assert(a:size(2) == b:size(1), 'matrix sizes do not match') + + self.output:resize(a:size(1), b:size(2)) + self.output:mm(a, b) + else + assert(b:nDimension() == 3, 'second input tensor must be 3D') + assert(a:size(1) == b:size(1), 'inputs must contain the same number of minibatches') + + if self.transA then a = a:transpose(2, 3) end + if self.transB then b = b:transpose(2, 3) end + assert(a:size(3) == b:size(2), 'matrix sizes do not match') + + self.output:resize(a:size(1), a:size(2), b:size(3)) + self.output:bmm(a, b) + end + + return self.output +end + +function MM:updateGradInput(input, gradOutput) + self.gradInput[1] = self.gradInput[1] or input[1].new() + self.gradInput[2] = self.gradInput[2] or input[2].new() + + assert(#input == 2, 'input must be a pair of tensors') + local a, b = table.unpack(input) + self.gradInput[1]:resizeAs(a) + self.gradInput[2]:resizeAs(b) + + assert(gradOutput:nDimension() == 2 or gradOutput:nDimension() == 3, 'arguments must be a 2D or 3D Tensor') + + local h_dim, w_dim, f + if gradOutput:nDimension() == 2 then + assert(a:nDimension() == 2, 'first input tensor must be 2D') + assert(b:nDimension() == 2, 'second input tensor must be 2D') + + h_dim, w_dim = 1, 2 + f = "mm" + else + assert(a:nDimension() == 3, 'first input tensor must be 3D') + assert(b:nDimension() == 3, 'second input tensor must be 3D') + + h_dim, w_dim = 2, 3 + f = "bmm" + end + + if self.transA == self.transB then + a = a:transpose(h_dim, w_dim) + b = b:transpose(h_dim, w_dim) + end + + if self.transA then + self.gradInput[1][f](self.gradInput[1], b, gradOutput:transpose(h_dim, w_dim)) + else + self.gradInput[1][f](self.gradInput[1], gradOutput, b) + end + + if self.transB then + self.gradInput[2][f](self.gradInput[2], gradOutput:transpose(h_dim, w_dim), a) + else + self.gradInput[2][f](self.gradInput[2], a, gradOutput) + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/MSECriterion.lua b/contrib/lua-torch/nn/MSECriterion.lua new file mode 100644 index 000000000..d38beb6bf --- /dev/null +++ b/contrib/lua-torch/nn/MSECriterion.lua @@ -0,0 +1,32 @@ +local MSECriterion, parent = torch.class('nn.MSECriterion', 'nn.Criterion') + +function MSECriterion:__init(sizeAverage) + parent.__init(self) + if sizeAverage ~= nil then + self.sizeAverage = sizeAverage + else + self.sizeAverage = true + end +end + +function MSECriterion:updateOutput(input, target) + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.MSECriterion_updateOutput( + input:cdata(), + target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage + ) + self.output = self.output_tensor[1] + return self.output +end + +function MSECriterion:updateGradInput(input, target) + input.THNN.MSECriterion_updateGradInput( + input:cdata(), + target:cdata(), + self.gradInput:cdata(), + self.sizeAverage + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/MV.lua b/contrib/lua-torch/nn/MV.lua new file mode 100644 index 000000000..a00478ef6 --- /dev/null +++ b/contrib/lua-torch/nn/MV.lua @@ -0,0 +1,82 @@ +--[[ Module to perform matrix vector multiplication on two minibatch inputs, +producing a minibatch. +]] + +local MV, parent = torch.class('nn.MV', 'nn.Module') + +-- Backward compatibility +local unpack = unpack or table.unpack + +function MV:__init(trans) + parent.__init(self) + + self.trans = trans or false + assert(type(self.trans) == 'boolean', "argument must be a boolean, matrix transpose before multiplication") + + self.gradInput = {torch.Tensor(), torch.Tensor()} +end + +function MV:updateOutput(input) + assert(#input == 2, 'input must be a pair of minibatch matrices') + local M, v = unpack(input) + assert(M:nDimension() == 2 or M:nDimension() == 3, 'input matrix must be 2D or 3D') + assert(v:nDimension() == 1 or v:nDimension() == 2, 'input vector must be 1D or 2D') + + if M:nDimension() == 2 then + assert(v:nDimension() == 1, 'vector must be 1D') + + if self.trans then M = M:transpose(1,2) end + assert(M:size(2) == v:size(1), 'matrix row count and vector length do not match') + + self.output:resize(M:size(1)) + self.output:mv(M, v) + else + assert(v:nDimension() == 2, 'vector must be 2D (batch dimension)') + assert(M:size(1) == v:size(1), 'inputs must contain the same number of minibatches') + + if self.trans then M = M:transpose(2,3) end + assert(M:size(3) == v:size(2), 'matrix row count and vector length do not match') + + self.output:resize(M:size(1), M:size(2), 1) + self.output:bmm(M, v:view(v:size(1), v:size(2), 1)):resize(M:size(1), M:size(2)) + end + + return self.output +end + +function MV:updateGradInput(input, gradOutput) + assert(#input == 2, 'input must be a pair of tensors') + local M, v = unpack(input) + self.gradInput[1]:resizeAs(M) + self.gradInput[2]:resizeAs(v) + + assert(gradOutput:nDimension() == 1 or gradOutput:nDimension() == 2, 'arguments must be a 1D or 2D Tensor') + + if gradOutput:nDimension() == 2 then + assert(M:nDimension() == 3, 'matrix must must be 3D (batched)') + assert(v:nDimension() == 2, 'vector must be 2D (batched)') + local bdim = M:size(1) + local odim = M:size(2) + local idim = M:size(3) + + if self.trans then + self.gradInput[1]:bmm(v:view(bdim, odim, 1), gradOutput:view(bdim, 1, idim)) + self.gradInput[2]:view(bdim, odim, 1):bmm(M, gradOutput:view(bdim, idim, 1)) + else + self.gradInput[1]:bmm(gradOutput:view(bdim, odim, 1), v:view(bdim, 1, idim)) + self.gradInput[2]:view(bdim, idim, 1):bmm(M:transpose(2,3), gradOutput:view(bdim, odim, 1)) + end + else + assert(M:nDimension() == 2, 'matrix must be 2D') + assert(v:nDimension() == 1, 'vector must be 1D') + + if self.trans then + self.gradInput[1]:ger(v, gradOutput) + self.gradInput[2] = M * gradOutput + else + self.gradInput[1]:ger(gradOutput, v) + self.gradInput[2] = M:t() * gradOutput + end + end + return self.gradInput +end diff --git a/contrib/lua-torch/nn/MapTable.lua b/contrib/lua-torch/nn/MapTable.lua new file mode 100644 index 000000000..c79f1ea1d --- /dev/null +++ b/contrib/lua-torch/nn/MapTable.lua @@ -0,0 +1,119 @@ +local MapTable, parent = torch.class('nn.MapTable', 'nn.Container') + +function MapTable:__init(module, shared) + parent.__init(self) + self.shared = (shared == nil) and true or shared + self.sharedparams = {'weight', 'bias', 'gradWeight', 'gradBias'} + self.output = {} + self.gradInput = {} + self:add(module) +end + +function MapTable:_extend(n) + self.sharedparams = self.sharedparams or {'weight', 'bias', 'gradWeight', 'gradBias'} + self.modules[1] = self.module + for i = 2, n do + if not self.modules[i] then + if self.shared then + self.modules[i] = self.module:clone(table.unpack(self.sharedparams)) + else + self.modules[i] = self.module:clone() + end + end + end +end + +function MapTable:resize(n) + self:_extend(n) + for i = n + 1, #self.modules do + -- It's not clear why this clearState call is necessary, but it fixes + -- https://github.com/torch/nn/issues/1141 . + self.modules[i]:clearState() + self.modules[i] = nil + end +end + +function MapTable:add(module) + assert(not self.module, 'Single module required') + self.module = module + self.modules[1] = self.module + return self +end + +function MapTable:updateOutput(input) + self.output = {} + self:_extend(#input) + for i = 1, #input do + self.output[i] = self:rethrowErrors(self.modules[i], i, 'updateOutput', input[i]) + end + return self.output +end + +function MapTable:updateGradInput(input, gradOutput) + self.gradInput = {} + self:_extend(#input) + for i = 1, #input do + self.gradInput[i] = self:rethrowErrors(self.modules[i], i, 'updateGradInput', input[i], gradOutput[i]) + end + return self.gradInput +end + +function MapTable:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + self:_extend(#input) + for i = 1, #input do + self:rethrowErrors(self.modules[i], i, 'accGradParameters', input[i], gradOutput[i], scale) + end +end + +function MapTable:accUpdateGradParameters(input, gradOutput, lr) + lr = lr or 1 + self:_extend(#input) + for i = 1, #input do + self:rethrowErrors(self.modules[i], i, 'accUpdateGradParameters', input[i], gradOutput[i], lr) + end +end + +function MapTable:zeroGradParameters() + if self.module then + if self.shared then + self.module:zeroGradParameters() + else + parent.zeroGradParameters(self) + end + end +end + +function MapTable:updateParameters(learningRate) + if self.module then + if self.shared then + self.module:updateParameters(learningRate) + else + parent.updateParameters(self, learningRate) + end + end +end + +function MapTable:clearState() + for i = 2, #self.modules do + -- It's not clear why this clearState call is necessary, but it fixes + -- https://github.com/torch/nn/issues/1141 . + self.modules[i]:clearState() + self.modules[i] = nil + end + parent.clearState(self) +end + +function MapTable:__tostring__() + local tab = ' ' + local line = '\n' + local extlast = ' ' + local str = torch.type(self) + if self.module then + str = str .. ' {' .. line .. tab + str = str .. tostring(self.module):gsub(line, line .. tab .. extlast) .. line .. '}' + else + str = str .. ' { }' + end + return str +end diff --git a/contrib/lua-torch/nn/MarginCriterion.lua b/contrib/lua-torch/nn/MarginCriterion.lua new file mode 100644 index 000000000..1ab8ad784 --- /dev/null +++ b/contrib/lua-torch/nn/MarginCriterion.lua @@ -0,0 +1,31 @@ +local MarginCriterion, parent = torch.class('nn.MarginCriterion', 'nn.Criterion') + +function MarginCriterion:__init(margin) + parent.__init(self) + self.sizeAverage = true + self.margin = margin or 1 +end + +function MarginCriterion:updateOutput(input, target) + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.MarginCriterion_updateOutput( + input:cdata(), + target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage, + self.margin + ) + self.output = self.output_tensor[1] + return self.output +end + +function MarginCriterion:updateGradInput(input, target) + input.THNN.MarginCriterion_updateGradInput( + input:cdata(), + target:cdata(), + self.gradInput:cdata(), + self.sizeAverage, + self.margin + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/MarginRankingCriterion.lua b/contrib/lua-torch/nn/MarginRankingCriterion.lua new file mode 100644 index 000000000..844d905d5 --- /dev/null +++ b/contrib/lua-torch/nn/MarginRankingCriterion.lua @@ -0,0 +1,75 @@ +local MarginRankingCriterion, parent = torch.class('nn.MarginRankingCriterion', 'nn.Criterion') + +function MarginRankingCriterion:__init(margin) + parent.__init(self) + margin=margin or 1 + self.margin = margin + self.gradInput = {torch.Tensor(1), torch.Tensor(1)} + self.sizeAverage = true +end + +function MarginRankingCriterion:updateOutput(input, y) + if torch.type(y) == 'number' then -- non-batch mode + self.output = math.max(0, -y * (input[1][1] - input[2][1]) + self.margin) + else + self._output = self._output or input[1]:clone() + self._output:resizeAs(input[1]) + self._output:copy(input[1]) + + self._output:add(-1, input[2]) + self._output:mul(-1):cmul(y) + self._output:add(self.margin) + + self._output:cmax(0) + + self.output = self._output:sum() + + if self.sizeAverage then + self.output = self.output/y:size(1) + end + end + + return self.output +end + +function MarginRankingCriterion:updateGradInput(input, y) + if torch.type(y) == 'number' then -- non-batch mode + local dist = -y * (input[1][1] - input[2][1]) + self.margin + if dist < 0 then + self.gradInput[1][1] = 0; + self.gradInput[2][1] = 0; + else + self.gradInput[1][1] = -y + self.gradInput[2][1] = y + end + else + self.dist = self.dist or input[1].new() + self.dist = self.dist:resizeAs(input[1]):copy(input[1]) + local dist = self.dist + + dist:add(-1, input[2]) + dist:mul(-1):cmul(y) + dist:add(self.margin) + + self.mask = self.mask or input[1].new() + self.mask = self.mask:resizeAs(input[1]):copy(dist) + local mask = self.mask + + mask:ge(dist, 0) + + self.gradInput[1]:resize(dist:size()) + self.gradInput[2]:resize(dist:size()) + + self.gradInput[1]:copy(mask) + self.gradInput[1]:mul(-1):cmul(y) + self.gradInput[2]:copy(mask) + self.gradInput[2]:cmul(y) + + if self.sizeAverage then + self.gradInput[1]:div(y:size(1)) + self.gradInput[2]:div(y:size(1)) + end + + end + return self.gradInput +end diff --git a/contrib/lua-torch/nn/MaskedSelect.lua b/contrib/lua-torch/nn/MaskedSelect.lua new file mode 100644 index 000000000..c3f7834e1 --- /dev/null +++ b/contrib/lua-torch/nn/MaskedSelect.lua @@ -0,0 +1,71 @@ +local unpack = unpack or table.unpack + +local MaskedSelect, parent = torch.class('nn.MaskedSelect', 'nn.Module') + +--[[ Sets the provided mask value for the module. ]] +function MaskedSelect:__init() + parent.__init(self) + self._maskIndices = torch.LongTensor() + self._maskIndexBuffer = torch.LongTensor() + self._maskIndexBufferCPU = torch.FloatTensor() + self._gradBuffer = torch.Tensor() + self._gradMask = torch.ByteTensor() +end + +--[[ Performs maskedSelect operation. ]] +function MaskedSelect:updateOutput(input) + local input, mask = unpack(input) + self.output:maskedSelect(input, mask) + return self.output +end + +--[[ Reverse maps unmasked gradOutput back to gradInput. ]] +function MaskedSelect:updateGradInput(input, gradOutput) + local input, mask = unpack(input) + if input:type() == 'torch.CudaTensor' then + self._maskIndexBufferCPU:range(1, mask:nElement()):resize(mask:size()) + self._maskIndexBuffer:resize( + self._maskIndexBufferCPU:size()):copy(self._maskIndexBufferCPU) + else + self._maskIndexBuffer:range(1, mask:nElement()):resize(mask:size()) + end + self._maskIndices:maskedSelect(self._maskIndexBuffer, mask) + self._gradBuffer:resize(input:nElement()):zero() + self._gradBuffer:scatter(1, self._maskIndices, gradOutput) + self._gradBuffer:resize(input:size()) + self.gradInput = {self._gradBuffer, + self._gradMask:resize(mask:size()):fill(0)} + return self.gradInput +end + +function MaskedSelect:type(type, tensorCache) + if not type then + return self._type + end + self._gradBuffer = self._gradBuffer:type(type) + self.gradInput = self.gradInput:type(type) + self.output = self.output:type(type) + + -- These casts apply when switching between cuda/non-cuda types + if type ~= 'torch.CudaTensor' then + self._maskIndexBuffer = self._maskIndexBuffer:long() + self._maskIndices = self._maskIndices:long() + self._gradMask = self._gradMask:byte() + elseif type == 'torch.CudaTensor' then + self._maskIndexBuffer = self._maskIndexBuffer:cuda() + self._maskIndices = self._maskIndices:cuda() + self._gradMask = self._gradMask:cuda() + end + self._type = type + return self +end + +function MaskedSelect:clearState() + return nn.utils.clear(self, {'output', + 'gradInput', + '_maskIndexBuffer', + '_maskIndexBufferCPU', + '_maskIndices', + '_gradBuffer', + '_gradMask'}) +end diff --git a/contrib/lua-torch/nn/Max.lua b/contrib/lua-torch/nn/Max.lua new file mode 100644 index 000000000..8273e808c --- /dev/null +++ b/contrib/lua-torch/nn/Max.lua @@ -0,0 +1,66 @@ +local Max, parent = torch.class('nn.Max', 'nn.Module') + +function Max:__init(dimension, nInputDims) + parent.__init(self) + dimension = dimension or 1 + self.dimension = dimension + -- do not assign default value to nInputDims or it will break backward compatibility + self.nInputDims = nInputDims +end + +function Max:_getPositiveDimension(input) + local dimension = self.dimension + if dimension < 0 then + dimension = input:dim() + dimension + 1 + elseif self.nInputDims and input:dim()==(self.nInputDims+1) then + dimension = dimension + 1 + end + return dimension +end + +function Max:_lazyInit() + self._output = self._output or self.output.new() + if not self._indices then + if torch.typename(self.output):find('torch%.Cuda.*Tensor') then + self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor() + else + self._indices = torch.LongTensor() + end + end +end + +function Max:updateOutput(input) + self:_lazyInit() + local dimension = self:_getPositiveDimension(input) + torch.max(self._output, self._indices, input, dimension) + if input:dim() > 1 then + self.output:set(self._output:select(dimension, 1)) + else + self.output:set(self._output) + end + return self.output +end + +function Max:updateGradInput(input, gradOutput) + self:_lazyInit() + local dimension = self:_getPositiveDimension(input) + local gradOutputView + if input:dim() > 1 then + gradOutputView = nn.utils.addSingletonDimension(gradOutput, dimension) + else + gradOutputView = gradOutput + end + self.gradInput:resizeAs(input):zero():scatter(dimension, self._indices, gradOutputView) + return self.gradInput +end + +function Max:type(type, tensorCache) + self._indices = nil + parent.type(self, type, tensorCache) + return self +end + +function Max:clearState() + nn.utils.clear(self, '_indices', '_output') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Maxout.lua b/contrib/lua-torch/nn/Maxout.lua new file mode 100644 index 000000000..a797a9f43 --- /dev/null +++ b/contrib/lua-torch/nn/Maxout.lua @@ -0,0 +1,13 @@ +-- Reference: http://jmlr.org/proceedings/papers/v28/goodfellow13.pdf + +local Maxout, parent = torch.class('nn.Maxout', 'nn.Sequential') + +function Maxout:__init(inputSize, outputSize, maxoutNumber, preprocess) + parent.__init(self) + self:add(nn.Linear(inputSize, outputSize * maxoutNumber)) + self:add(nn.View(maxoutNumber, outputSize):setNumInputDims(1)) + if preprocess then + self:add(preprocess) + end + self:add(nn.Max(1, 2)) +end diff --git a/contrib/lua-torch/nn/Mean.lua b/contrib/lua-torch/nn/Mean.lua new file mode 100644 index 000000000..8087ac95e --- /dev/null +++ b/contrib/lua-torch/nn/Mean.lua @@ -0,0 +1,14 @@ +local Mean, parent = torch.class('nn.Mean', 'nn.Sum') + +--[[ + +This file is still here because of backward compatibility. + +Please use instead "nn.Sum(dimension, nInputDims, sizeAverage)" + +]]-- + + +function Mean:__init(dimension, nInputDims) + parent.__init(self, dimension, nInputDims, true) +end diff --git a/contrib/lua-torch/nn/Min.lua b/contrib/lua-torch/nn/Min.lua new file mode 100644 index 000000000..3a3e4a802 --- /dev/null +++ b/contrib/lua-torch/nn/Min.lua @@ -0,0 +1,66 @@ +local Min, parent = torch.class('nn.Min', 'nn.Module') + +function Min:__init(dimension, nInputDims) + parent.__init(self) + dimension = dimension or 1 + self.dimension = dimension + -- do not assign default value to nInputDims or it will break backward compatibility + self.nInputDims = nInputDims +end + +function Min:_getPositiveDimension(input) + local dimension = self.dimension + if dimension < 0 then + dimension = input:dim() + dimension + 1 + elseif self.nInputDims and input:dim()==(self.nInputDims+1) then + dimension = dimension + 1 + end + return dimension +end + +function Min:_lazyInit() + self._output = self._output or self.output.new() + if not self._indices then + if torch.typename(self.output):find('torch%.Cuda.*Tensor') then + self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor() + else + self._indices = torch.LongTensor() + end + end +end + +function Min:updateOutput(input) + self:_lazyInit() + local dimension = self:_getPositiveDimension(input) + torch.min(self._output, self._indices, input, dimension) + if input:dim() > 1 then + self.output:set(self._output:select(dimension, 1)) + else + self.output:set(self._output) + end + return self.output +end + +function Min:updateGradInput(input, gradOutput) + self:_lazyInit() + local dimension = self:_getPositiveDimension(input) + local gradOutputView + if input:dim() > 1 then + gradOutputView = nn.utils.addSingletonDimension(gradOutput, dimension) + else + gradOutputView = gradOutput + end + self.gradInput:resizeAs(input):zero():scatter(dimension, self._indices, gradOutputView) + return self.gradInput +end + +function Min:type(type, tensorCache) + self._indices = nil + parent.type(self, type, tensorCache) + return self +end + +function Min:clearState() + nn.utils.clear(self, '_indices', '_output') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/MixtureTable.lua b/contrib/lua-torch/nn/MixtureTable.lua new file mode 100644 index 000000000..dbe19742f --- /dev/null +++ b/contrib/lua-torch/nn/MixtureTable.lua @@ -0,0 +1,165 @@ +local MixtureTable, parent = torch.class('nn.MixtureTable', 'nn.Module') + +function MixtureTable:__init(dim) + parent.__init(self) + self.dim = dim + self.size = torch.LongStorage() + self.batchSize = 0 + self.size2 = torch.LongStorage() + self.backwardSetup = false + self.gradInput = {} +end + +function MixtureTable:updateOutput(input) + local gaterInput, expertInputs = table.unpack(input) + + -- buffers + self._gaterView = self._gaterView or input[1].new() + self._expert = self._expert or input[1].new() + self._expertView = self._expertView or input[1].new() + + self.dimG = 2 + local batchSize = gaterInput:size(1) + if gaterInput:dim() < 2 then + self.dimG = 1 + self.dim = self.dim or 1 + batchSize = 1 + end + self.dim = self.dim or 2 + + if self.table or torch.type(expertInputs) == 'table' then + -- expertInputs is a Table : + self.table = true + if gaterInput:size(self.dimG) ~= #expertInputs then + error"Should be one gater output per expert" + end + local expertInput = expertInputs[1] + self.size:resize(expertInput:dim()+1):fill(1) + if self.dimG > 1 then + self.size[1] = gaterInput:size(1) + end + self.size[self.dim] = gaterInput:size(self.dimG) + self.output:resizeAs(expertInput) + self.batchSize = batchSize + self._gaterView:view(gaterInput, self.size) + self.output:zero() + -- multiply accumulate gater outputs by their commensurate expert + for i,expertInput in ipairs(expertInputs) do + local gate = self._gaterView:select(self.dim,i):expandAs(expertInput) + self.output:addcmul(expertInput, gate) + end + else + -- expertInputs is a Tensor : + self.size:resize(expertInputs:dim()):fill(1) + if self.dimG > 1 then + self.size[1] = gaterInput:size(1) + end + self.size[self.dim] = gaterInput:size(self.dimG) + self.output:resizeAs(expertInputs:select(self.dim, 1)) + self.batchSize = batchSize + self._gaterView:view(gaterInput, self.size) + self._expert:cmul(self._gaterView:expandAs(expertInputs), expertInputs) + self.output:sum(self._expert, self.dim) + self.output:resizeAs(expertInputs:select(self.dim, 1)) + end + + return self.output +end + +function MixtureTable:updateGradInput(input, gradOutput) + local gaterInput, expertInputs = table.unpack(input) + nn.utils.recursiveResizeAs(self.gradInput, input) + local gaterGradInput, expertGradInputs = table.unpack(self.gradInput) + + -- buffers + self._sum = self._sum or input[1].new() + self._expertView2 = self._expertView2 or input[1].new() + self._expert2 = self._expert2 or input[1].new() + + if self.table then + for i,expertInput in ipairs(expertInputs) do + local expertGradInput = expertGradInputs[i] or expertInput:clone() + expertGradInput:resizeAs(expertInput) + expertGradInputs[i] = expertGradInput + end + gaterGradInput:resizeAs(gaterInput) + + -- Clear invalid gradients + if #expertGradInputs > #expertInputs then + for i=#expertInputs+1, #expertGradInputs do + expertGradInputs[i] = nil + end + end + + -- like CMulTable, but with broadcasting + for i,expertGradInput in ipairs(expertGradInputs) do + -- gater updateGradInput + self._expert:cmul(gradOutput, expertInputs[i]) + if self.dimG == 1 then + self._expertView:view(self._expert, -1) + else + self._expertView:view(self._expert, gradOutput:size(1), -1) + end + self._sum:sum(self._expertView, self.dimG) + if self.dimG == 1 then + gaterGradInput[i] = self._sum:select(self.dimG,1) + else + gaterGradInput:select(self.dimG,i):copy(self._sum:select(self.dimG,1)) + end + + -- expert updateGradInput + local gate = self._gaterView:select(self.dim,i):expandAs(expertGradInput) + expertGradInput:cmul(gate, gradOutput) + end + else + self.size2:resize(expertInputs:dim()) + self.size2:copy(expertInputs:size()) + self.size2[self.dim] = 1 + gaterGradInput:resizeAs(gaterInput) + + -- gater updateGradInput + self._expertView:view(gradOutput, self.size2) + local gradOutput = self._expertView:expandAs(expertInputs) + self._expert:cmul(gradOutput, expertInputs) + local expert = self._expert:transpose(self.dim, self.dimG) + if not expert:isContiguous() then + self._expert2:resizeAs(expert) + self._expert2:copy(expert) + expert = self._expert2 + end + if self.dimG == 1 then + self._expertView2:view(expert, gaterInput:size(1), -1) + else + self._expertView2:view(expert, gaterInput:size(1), gaterInput:size(2), -1) + end + gaterGradInput:sum(self._expertView2, self.dimG+1) + gaterGradInput:resizeAs(gaterInput) + + -- expert updateGradInput + expertGradInputs:cmul(self._gaterView:expandAs(expertInputs), gradOutput) + end + + return self.gradInput +end + +function MixtureTable:type(type, tensorCache) + self._gaterView = nil + self._expert = nil + self._expertView = nil + self._sum = nil + self._expert2 = nil + self._expertView2 = nil + return parent.type(self, type, tensorCache) +end + +function MixtureTable:clearState() + nn.utils.clear(self, { + '_gaterView', + '_expert', + '_expertView', + '_sum', + '_expert2', + '_expertView2', + }) + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Module.lua b/contrib/lua-torch/nn/Module.lua new file mode 100644 index 000000000..3debc5789 --- /dev/null +++ b/contrib/lua-torch/nn/Module.lua @@ -0,0 +1,429 @@ +local Module = torch.class('nn.Module') + +function Module:__init() + self.gradInput = torch.Tensor() + self.output = torch.Tensor() + self._type = self.output:type() +end + +function Module:parameters() + if self.weight and self.bias then + return {self.weight, self.bias}, {self.gradWeight, self.gradBias} + elseif self.weight then + return {self.weight}, {self.gradWeight} + elseif self.bias then + return {self.bias}, {self.gradBias} + else + return + end +end + +function Module:updateOutput(input) + return self.output +end + +function Module:forward(input) + return self:updateOutput(input) +end + +function Module:backward(input, gradOutput, scale) + scale = scale or 1 + self:updateGradInput(input, gradOutput) + self:accGradParameters(input, gradOutput, scale) + return self.gradInput +end + +function Module:backwardUpdate(input, gradOutput, lr) + self:updateGradInput(input, gradOutput) + self:accUpdateGradParameters(input, gradOutput, lr) + return self.gradInput +end + +function Module:updateGradInput(input, gradOutput) + return self.gradInput +end + +function Module:accGradParameters(input, gradOutput, scale) +end + +function Module:accUpdateGradParameters(input, gradOutput, lr) + if self.shared then + self:sharedAccUpdateGradParameters(input, gradOutput, lr) + else + self:defaultAccUpdateGradParameters(input, gradOutput, lr) + end +end + +function Module:defaultAccUpdateGradParameters(input, gradOutput, lr) + local gradWeight = self.gradWeight + local gradBias = self.gradBias + self.gradWeight = self.weight + self.gradBias = self.bias + self:accGradParameters(input, gradOutput, -lr) + self.gradWeight = gradWeight + self.gradBias = gradBias +end + +function Module:sharedAccUpdateGradParameters(input, gradOutput, lr) + if self:parameters() then + self:zeroGradParameters() + self:accGradParameters(input, gradOutput, 1) + self:updateParameters(lr) + end +end + +function Module:zeroGradParameters() + local _,gradParams = self:parameters() + if gradParams then + for i=1,#gradParams do + gradParams[i]:zero() + end + end +end + +function Module:updateParameters(learningRate) + local params, gradParams = self:parameters() + if params then + for i=1,#params do + params[i]:add(-learningRate, gradParams[i]) + end + end +end + +function Module:training() + self.train = true +end + +function Module:evaluate() + self.train = false +end + +function Module:share(mlp, ...) + local arg = {...} + for i,v in ipairs(arg) do + if self[v] ~= nil then + self[v]:set(mlp[v]) + self.shared = true + mlp.shared = true + end + end + return self +end + +local function sharedWrite(...) + local arg = {...} + local shared = {} + for i,v in ipairs(arg) do + shared[v] = true + end + return function(self, file) + local object = {} + for k, v in pairs(self) do + if shared[k] then + assert(torch.isTensor(v), 'Shared parameters have to be Tensors') + object[k] = v.new() + else + object[k] = v + end + end + file:writeObject(object) + end +end + +function Module:clone(...) + local oldWrite = nn.Module.write + nn.Module.write = sharedWrite(...) + + local f = torch.MemoryFile("rw"):binary() + f:writeObject(self) + f:seek(1) + local clone = f:readObject() + f:close() + + nn.Module.write = oldWrite + + if select('#',...) > 0 then + clone:share(self,...) + end + return clone +end + +function Module:type(type, tensorCache) + if not type then + return self._type + end + + tensorCache = tensorCache or {} + + -- find all tensors and convert them + for key,param in pairs(self) do + self[key] = nn.utils.recursiveType(param, type, tensorCache) + end + + self._type = type + return self +end + +function Module:float(...) + return self:type('torch.FloatTensor',...) +end + +function Module:double(...) + return self:type('torch.DoubleTensor',...) +end + +function Module:cuda(...) + return self:type('torch.CudaTensor',...) +end + +function Module:reset() +end + +function Module:write(file) + -- Write all values in the object as a table. + local object = {} + for k, v in pairs(self) do + object[k] = v + end + file:writeObject(object) +end + +function Module:read(file) + local object = file:readObject() + for k, v in pairs(object) do + self[k] = v + end +end + +-- This function is not easy to understand. It works as follows: +-- +-- - gather all parameter tensors for this module (and children); +-- count all parameter values (floats) +-- - create one ginormous memory area (Storage object) with room for all +-- parameters +-- - remap each parameter tensor to point to an area within the ginormous +-- Storage, and copy it there +-- +-- It has the effect of making all parameters point to the same memory area, +-- which is then returned. +-- +-- The purpose is to allow operations over all parameters (such as momentum +-- updates and serialization), but it assumes that all parameters are of +-- the same type (and, in the case of CUDA, on the same device), which +-- is not always true. Use for_each() to iterate over this module and +-- children instead. +-- +-- Module._flattenTensorBuffer can be used by other packages (e.g. cunn) +-- to specify the type of temporary buffers. For example, the temporary +-- buffers for CudaTensor could be FloatTensor, to avoid GPU memory usage. +-- +-- TODO: This logically belongs to torch.Tensor, not nn. +Module._flattenTensorBuffer = {} +function Module.flatten(parameters) + + -- returns true if tensor occupies a contiguous region of memory (no holes) + local function isCompact(tensor) + local sortedStride, perm = torch.sort( + torch.LongTensor(tensor:nDimension()):set(tensor:stride()), 1, true) + local sortedSize = torch.LongTensor(tensor:nDimension()):set( + tensor:size()):index(1, perm) + local nRealDim = torch.clamp(sortedStride, 0, 1):sum() + sortedStride = sortedStride:narrow(1, 1, nRealDim):clone() + sortedSize = sortedSize:narrow(1, 1, nRealDim):clone() + local t = tensor.new():set(tensor:storage(), 1, + sortedSize:storage(), + sortedStride:storage()) + return t:isContiguous() + end + + if not parameters or #parameters == 0 then + return torch.Tensor() + end + local Tensor = parameters[1].new + local TmpTensor = Module._flattenTensorBuffer[torch.type(parameters[1])] or Tensor + + -- 1. construct the set of all unique storages referenced by parameter tensors + local storages = {} + local nParameters = 0 + local parameterMeta = {} + for k = 1,#parameters do + local param = parameters[k] + local storage = parameters[k]:storage() + local storageKey = torch.pointer(storage) + + if not storages[storageKey] then + storages[storageKey] = {storage, nParameters} + nParameters = nParameters + storage:size() + end + + parameterMeta[k] = {storageOffset = param:storageOffset() + + storages[storageKey][2], + size = param:size(), + stride = param:stride()} + end + + -- 2. construct a single tensor that will hold all the parameters + local flatParameters = TmpTensor(nParameters):zero() + + -- 3. determine if there are elements in the storage that none of the + -- parameter tensors reference ('holes') + local tensorsCompact = true + for k = 1,#parameters do + local meta = parameterMeta[k] + local tmp = TmpTensor():set( + flatParameters:storage(), meta.storageOffset, meta.size, meta.stride) + tmp:fill(1) + tensorsCompact = tensorsCompact and isCompact(tmp) + end + + local maskParameters = flatParameters:byte():clone() + local compactOffsets = flatParameters:long():cumsum(1) + local nUsedParameters = compactOffsets[-1] + + -- 4. copy storages into the flattened parameter tensor + for _, storageAndOffset in pairs(storages) do + local storage, offset = table.unpack(storageAndOffset) + flatParameters[{{offset+1,offset+storage:size()}}]:copy(Tensor():set(storage)) + end + + -- 5. allow garbage collection + storages = nil + for k = 1,#parameters do + parameters[k]:set(Tensor()) + end + + -- 6. compact the flattened parameters if there were holes + if nUsedParameters ~= nParameters then + assert(tensorsCompact, + "Cannot gather tensors that are not compact") + + flatParameters = TmpTensor(nUsedParameters):copy( + flatParameters:maskedSelect(maskParameters)) + for k = 1,#parameters do + parameterMeta[k].storageOffset = + compactOffsets[parameterMeta[k].storageOffset] + end + end + + if TmpTensor ~= Tensor then + flatParameters = Tensor(flatParameters:nElement()):copy(flatParameters) + end + + -- 7. fix up the parameter tensors to point at the flattened parameters + for k = 1,#parameters do + parameters[k]:set(flatParameters:storage(), + parameterMeta[k].storageOffset, + parameterMeta[k].size, + parameterMeta[k].stride) + end + + return flatParameters +end + +function Module:getParameters() + -- get parameters + local parameters,gradParameters = self:parameters() + local p, g = Module.flatten(parameters), Module.flatten(gradParameters) + assert(p:nElement() == g:nElement(), + 'check that you are sharing parameters and gradParameters') + if parameters then + for i=1,#parameters do + assert(parameters[i]:storageOffset() == gradParameters[i]:storageOffset(), + 'misaligned parameter at ' .. tostring(i)) + end + end + return p, g +end + +function Module:__call__(input, gradOutput) + self:forward(input) + if gradOutput then + self:backward(input, gradOutput) + return self.output, self.gradInput + else + return self.output + end +end + +-- Run a callback (called with the module as an argument) in preorder over this +-- module and its children. +-- +function Module:apply(callback) + callback(self) + + if self.modules then + for _, module in ipairs(self.modules) do + module:apply(callback) + end + end +end + +function Module:findModules(typename, container) + container = container or self + local nodes = {} + local containers = {} + local mod_type = torch.typename(self) + if mod_type == typename then + nodes[#nodes+1] = self + containers[#containers+1] = container + end + -- Recurse on nodes with 'modules' + if (self.modules ~= nil) then + if (torch.type(self.modules) == 'table') then + for i = 1, #self.modules do + local child = self.modules[i] + local cur_nodes, cur_containers = + child:findModules(typename, self) + assert(#cur_nodes == #cur_containers, + 'Internal error: incorrect return length') -- This shouldn't happen + -- add the list items from our child to our list (ie return a + -- flattened table of the return nodes). + for j = 1, #cur_nodes do + nodes[#nodes+1] = cur_nodes[j] + containers[#containers+1] = cur_containers[j] + end + end + end + end + return nodes, containers +end + +-- returns a list of modules +function Module:listModules() + local function tinsert(to, from) + if torch.type(from) == 'table' then + for i=1,#from do + tinsert(to,from[i]) + end + else + table.insert(to,from) + end + end + -- include self first + local modules = {self} + if self.modules then + for i=1,#self.modules do + local modulas = self.modules[i]:listModules() + if modulas then + tinsert(modules,modulas) + end + end + end + return modules +end + +function Module:clearState() + return nn.utils.clear(self, 'output', 'gradInput') +end + +-- similar to apply, recursively goes over network and calls +-- a callback function which returns a new module replacing the old one +function nn.Module:replace(callback) + local out = callback(self) + if self.modules then + for i, module in ipairs(self.modules) do + self.modules[i] = module:replace(callback) + end + end + return out +end diff --git a/contrib/lua-torch/nn/ModuleCriterion.lua b/contrib/lua-torch/nn/ModuleCriterion.lua new file mode 100644 index 000000000..bfc79ef55 --- /dev/null +++ b/contrib/lua-torch/nn/ModuleCriterion.lua @@ -0,0 +1,44 @@ +local ModuleCriterion, parent = torch.class("nn.ModuleCriterion", "nn.Criterion") + +function ModuleCriterion:__init(criterion, inputModule, targetModule, castTarget) + self.inputModule = inputModule + self.targetModule = targetModule + self.castTarget = (castTarget == nil) and true or castTarget + if self.inputModule then + local params = self.inputModule:parameters() + if params and #params > 0 then + print"Warning: nn.ModuleCriterion doesn't support parameter updates" + end + end + self.criterion = criterion +end + +function ModuleCriterion:updateOutput(input, target) + if self.inputModule then + self.input = self.inputModule:forward(input) + end + if self.targetModule then + self.target = self.targetModule:forward(target) + end + self.output = self.criterion:forward(self.input or input, self.target or target) + return self.output +end + +function ModuleCriterion:updateGradInput(input, target) + self.gradInput = self.criterion:backward(self.input or input, self.target or target) + if self.inputModule then + self.gradInput = self.inputModule:backward(input, self.gradInput) + end + return self.gradInput +end + +function ModuleCriterion:type(type, typecache) + if self.inputModule then + self.inputModule:type(type, typecache) + end + if self.castTarget and self.targetModule then + self.targetModule:type(type, typecache) + end + self.criterion:type(type, typecache) + return parent.type(self, type, typecache) +end diff --git a/contrib/lua-torch/nn/Mul.lua b/contrib/lua-torch/nn/Mul.lua new file mode 100644 index 000000000..efa1db656 --- /dev/null +++ b/contrib/lua-torch/nn/Mul.lua @@ -0,0 +1,38 @@ +local Mul, parent = torch.class('nn.Mul', 'nn.Module') + +function Mul:__init() + parent.__init(self) + + self.weight = torch.Tensor(1) + self.gradWeight = torch.Tensor(1) + + self:reset() +end + + +function Mul:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(1)) + end + + self.weight:uniform(-stdv, stdv); +end + +function Mul:updateOutput(input) + self.output:resizeAs(input):copy(input); + self.output:mul(self.weight[1]); + return self.output +end + +function Mul:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input):zero() + self.gradInput:add(self.weight[1], gradOutput) + return self.gradInput +end + +function Mul:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + self.gradWeight[1] = self.gradWeight[1] + scale*input:dot(gradOutput); +end diff --git a/contrib/lua-torch/nn/MulConstant.lua b/contrib/lua-torch/nn/MulConstant.lua new file mode 100644 index 000000000..e8c473bee --- /dev/null +++ b/contrib/lua-torch/nn/MulConstant.lua @@ -0,0 +1,41 @@ +local MulConstant, parent = torch.class('nn.MulConstant', 'nn.Module') + +function MulConstant:__init(constant_scalar,ip) + parent.__init(self) + assert(type(constant_scalar) == 'number', 'input is not scalar!') + self.constant_scalar = constant_scalar + + -- default for inplace is false + self.inplace = ip or false + if (ip and type(ip) ~= 'boolean') then + error('in-place flag must be boolean') + end +end + +function MulConstant:updateOutput(input) + if self.inplace then + input:mul(self.constant_scalar) + self.output:set(input) + else + self.output:resizeAs(input) + self.output:copy(input) + self.output:mul(self.constant_scalar) + end + return self.output +end + +function MulConstant:updateGradInput(input, gradOutput) + if self.gradInput then + if self.inplace then + gradOutput:mul(self.constant_scalar) + self.gradInput:set(gradOutput) + -- restore previous input value + input:div(self.constant_scalar) + else + self.gradInput:resizeAs(gradOutput) + self.gradInput:copy(gradOutput) + self.gradInput:mul(self.constant_scalar) + end + return self.gradInput + end +end diff --git a/contrib/lua-torch/nn/MultiCriterion.lua b/contrib/lua-torch/nn/MultiCriterion.lua new file mode 100644 index 000000000..959317711 --- /dev/null +++ b/contrib/lua-torch/nn/MultiCriterion.lua @@ -0,0 +1,40 @@ +local MultiCriterion, parent = torch.class('nn.MultiCriterion', 'nn.Criterion') + +function MultiCriterion:__init() + parent.__init(self) + self.criterions = {} + self.weights = torch.DoubleStorage() +end + +function MultiCriterion:add(criterion, weight) + assert(criterion, 'no criterion provided') + weight = weight or 1 + table.insert(self.criterions, criterion) + self.weights:resize(#self.criterions, true) + self.weights[#self.criterions] = weight + return self +end + +function MultiCriterion:updateOutput(input, target) + self.output = 0 + for i=1,#self.criterions do + self.output = self.output + self.weights[i]*self.criterions[i]:updateOutput(input, target) + end + return self.output +end + +function MultiCriterion:updateGradInput(input, target) + self.gradInput = nn.utils.recursiveResizeAs(self.gradInput, input) + nn.utils.recursiveFill(self.gradInput, 0) + for i=1,#self.criterions do + nn.utils.recursiveAdd(self.gradInput, self.weights[i], self.criterions[i]:updateGradInput(input, target)) + end + return self.gradInput +end + +function MultiCriterion:type(type) + for i,criterion in ipairs(self.criterions) do + criterion:type(type) + end + return parent.type(self, type) +end diff --git a/contrib/lua-torch/nn/MultiLabelMarginCriterion.lua b/contrib/lua-torch/nn/MultiLabelMarginCriterion.lua new file mode 100644 index 000000000..908b6133c --- /dev/null +++ b/contrib/lua-torch/nn/MultiLabelMarginCriterion.lua @@ -0,0 +1,41 @@ +local MultiLabelMarginCriterion, parent = torch.class('nn.MultiLabelMarginCriterion', 'nn.Criterion') + +function MultiLabelMarginCriterion:__init() + parent.__init(self) + self.sizeAverage = true + self.isTarget = torch.Tensor() +end + +function MultiLabelMarginCriterion:updateOutput(input, target) + if torch.typename(input):find('torch%.Cuda.*Tensor') then + target = torch.CudaLongTensor and target:cudaLong() or target + else + target = target:long() + end + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.MultiLabelMarginCriterion_updateOutput( + input:cdata(), + target:cdata(), + self.output_tensor:cdata(), + self.isTarget:cdata(), + self.sizeAverage + ) + self.output = self.output_tensor[1] + return self.output +end + +function MultiLabelMarginCriterion:updateGradInput(input, target) + if torch.typename(input):find('torch%.Cuda.*Tensor') then + target = torch.CudaLongTensor and target:cudaLong() or target + else + target = target:long() + end + input.THNN.MultiLabelMarginCriterion_updateGradInput( + input:cdata(), + target:cdata(), + self.gradInput:cdata(), + self.isTarget:cdata(), + self.sizeAverage + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/MultiLabelSoftMarginCriterion.lua b/contrib/lua-torch/nn/MultiLabelSoftMarginCriterion.lua new file mode 100644 index 000000000..9d471d449 --- /dev/null +++ b/contrib/lua-torch/nn/MultiLabelSoftMarginCriterion.lua @@ -0,0 +1,86 @@ +--[[ +-- A MultiLabel multiclass criterion based on sigmoid: +-- +-- the loss is: +-- l(x,y) = - sum_i y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i]) +-- where p[i] = exp(x[i]) / (1 + exp(x[i])) +-- +-- and with weights: +-- l(x,y) = - sum_i weights[i] (y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i])) +-- +-- This uses the stable form of the loss and gradients. +--]] + + +local MultiLabelSoftMarginCriterion, parent = torch.class('nn.MultiLabelSoftMarginCriterion', 'nn.Criterion') + + +function MultiLabelSoftMarginCriterion:__init(weights, sizeAverage) + parent.__init(self) + if sizeAverage ~= nil then + self.sizeAverage = sizeAverage + else + self.sizeAverage = true + end + if weights ~= nil then + assert(weights:dim() == 1, "weights input should be 1-D Tensor") + self.weights = weights + end + self.sigmoid = nn.Sigmoid() +end + +function MultiLabelSoftMarginCriterion:updateOutput(input, target) + local weights = self.weights + if weights ~= nil and target:dim() ~= 1 then + weights = self.weights:view(1, target:size(2)):expandAs(target) + end + + local x = input:view(input:nElement()) + local t = target:view(target:nElement()) + + self.sigmoid:updateOutput(x) + + self._buffer1 = self._buffer1 or input.new() + self._buffer2 = self._buffer2 or input.new() + + self._buffer1:ge(x, 0) -- indicator + + -- log(1 + exp(x - cmul(x, indicator):mul(2))) + self._buffer2:cmul(x, self._buffer1):mul(-2):add(x):exp():add(1):log() + -- cmul(x, t - indicator) + self._buffer1:mul(-1):add(t):cmul(x) + -- log(1 + exp(x - cmul(x, indicator):mul(2))) - cmul(x, t - indicator) + self._buffer2:add(-1, self._buffer1) + + if weights ~= nil then + self._buffer2:cmul(weights) + end + + self.output = self._buffer2:sum() + + if self.sizeAverage then + self.output = self.output / input:nElement() + end + + return self.output +end + +function MultiLabelSoftMarginCriterion:updateGradInput(input, target) + local weights = self.weights + if weights ~= nil and target:dim() ~= 1 then + weights = self.weights:view(1, target:size(2)):expandAs(target) + end + + self.gradInput:resizeAs(input):copy(self.sigmoid.output) + self.gradInput:add(-1, target) + + if weights ~= nil then + self.gradInput:cmul(weights) + end + + if self.sizeAverage then + self.gradInput:div(target:nElement()) + end + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/MultiMarginCriterion.lua b/contrib/lua-torch/nn/MultiMarginCriterion.lua new file mode 100644 index 000000000..e3122386a --- /dev/null +++ b/contrib/lua-torch/nn/MultiMarginCriterion.lua @@ -0,0 +1,64 @@ +local THNN = require 'nn.THNN' +local MultiMarginCriterion, parent = torch.class('nn.MultiMarginCriterion', 'nn.Criterion') + +function MultiMarginCriterion:__init(p, weights, margin) + assert(p == nil or p == 1 or p == 2, 'only p=1 and p=2 supported') + self.p = p or 1 + self.margin = margin or 1.0 + parent.__init(self) + self.sizeAverage = true + if weights then + assert(weights:dim() == 1, "weights input should be 1-D Tensor") + self.weights = weights + end +end + +function MultiMarginCriterion:updateOutput(input, target) + -- backward compatibility + if not torch.isTensor(target) then + self.target_tensor = self.target_tensor or torch.LongTensor(1) + self.target_tensor[1] = target + target = self.target_tensor + end + if torch.typename(input):find('torch%.Cuda.*Tensor') then + target = torch.CudaLongTensor and target:cudaLong() or target + else + target = target:long() + end + self.p = self.p or 1 + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.MultiMarginCriterion_updateOutput( + input:cdata(), + target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage, + self.p, + THNN.optionalTensor(self.weights), + self.margin + ) + self.output = self.output_tensor[1] + return self.output +end + +function MultiMarginCriterion:updateGradInput(input, target) + if not torch.isTensor(target) then + self.target_tensor = self.target_tensor or torch.LongTensor(1) + self.target_tensor[1] = target + target = self.target_tensor + end + if torch.typename(input):find('torch%.Cuda.*Tensor') then + target = torch.CudaLongTensor and target:cudaLong() or target + else + target = target:long() + end + input.THNN.MultiMarginCriterion_updateGradInput( + input:cdata(), + target:cdata(), + self.gradInput:cdata(), + self.sizeAverage, + self.p, + THNN.optionalTensor(self.weights), + self.margin + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/NaN.lua b/contrib/lua-torch/nn/NaN.lua new file mode 100644 index 000000000..b80f6a04d --- /dev/null +++ b/contrib/lua-torch/nn/NaN.lua @@ -0,0 +1,72 @@ +------------------------------------------------------------------------ +--[[ NaN ]]-- +-- Asserts that outputs and gradInputs do not contain NaNs. +-- Useful for locating the source of NaN errors. +------------------------------------------------------------------------ +local NaN, parent = torch.class("nn.NaN", "nn.Decorator") + +local idseq = 0 +function NaN.newId() + idseq = idseq + 1 + return idseq +end + +function NaN:__init(module, id) + parent.__init(self, module) + self.id = id or NaN.newId() +end + +function NaN:recursiveIsNaN(tensor) + local isNaN = false + if torch.type(tensor) == 'table' then + for k,v in pairs(tensor) do + isNaN = self:recursiveIsNaN(v) + if isNaN then break end + end + else + local _ = require 'moses' + isNaN = _.isNaN(tensor:sum()) + end + return isNaN +end + +function NaN:updateOutput(input) + self.output = self.modules[1]:updateOutput(input) + if self:recursiveIsNaN(self.output) then + if self:recursiveIsNaN(input) then + error(string.format("NaN found in input of module :\n%s", self:__tostring__())) + elseif self:recursiveIsNaN(self:parameters()) then + error(string.format("NaN found in parameters of module :\n%s", self:__tostring__())) + end + error(string.format("NaN found in output of module :\n%s", self:__tostring__())) + end + return self.output +end + +function NaN:updateGradInput(input, gradOutput) + self.gradInput = self.modules[1]:updateGradInput(input, gradOutput) + if self:recursiveIsNaN(self.gradInput) then + if self:recursiveIsNaN(gradOutput) then + error(string.format("NaN found in gradOutput of module :\n%s", self:__tostring__())) + end + error(string.format("NaN found in gradInput of module :\n%s", self:__tostring__())) + end + return self.gradInput +end + +function NaN:accGradParameters(input, gradOutput, scale) + self.modules[1]:accGradParameters(input, gradOutput, scale) + local params, gradParams = self:parameters() + if self:recursiveIsNaN(gradParams) then + error(string.format("NaN found in gradParameters of module :\n%s", self:__tostring__())) + end +end + +function NaN:__tostring__() + local selfstring = torch.type(self) .. '(' .. self.id .. ')' + if self.modules[1].__tostring__ then + return selfstring .. ' @ ' .. self.modules[1]:__tostring__() + else + return selfstring .. ' @ ' .. torch.type(self.modules[1]) + end +end diff --git a/contrib/lua-torch/nn/Narrow.lua b/contrib/lua-torch/nn/Narrow.lua new file mode 100644 index 000000000..a6ebaa321 --- /dev/null +++ b/contrib/lua-torch/nn/Narrow.lua @@ -0,0 +1,45 @@ +local Narrow, parent = torch.class('nn.Narrow', 'nn.Module') + +function Narrow:__init(dimension,offset,length) + parent.__init(self) + self.dimension=dimension + self.index=offset + self.length=length or 1 + if not dimension or not offset then + error('nn.Narrow(dimension, offset, length)') + end +end + +function Narrow:updateOutput(input) + local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension + local length = self.length + if length < 0 then + length = input:size(dim) - self.index + self.length + 2 + end + local index = self.index + if self.index < 0 then + index = 1 + length = input:size(dim) - length + end + local output=input:narrow(dim, index, length) + self.output = self.output:typeAs(output) + self.output:resizeAs(output):copy(output) + return self.output +end + +function Narrow:updateGradInput(input, gradOutput) + local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension + local length = self.length + if length < 0 then + length = input:size(dim) - self.index + self.length + 2 + end + local index = self.index + if self.index < 0 then + index = 1 + length = input:size(dim) - length + end + self.gradInput = self.gradInput:typeAs(input) + self.gradInput:resizeAs(input):zero() + self.gradInput:narrow(dim,index,length):copy(gradOutput) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/NarrowTable.lua b/contrib/lua-torch/nn/NarrowTable.lua new file mode 100644 index 000000000..17429f3b1 --- /dev/null +++ b/contrib/lua-torch/nn/NarrowTable.lua @@ -0,0 +1,43 @@ +local NarrowTable, parent = torch.class('nn.NarrowTable', 'nn.Module') + +function NarrowTable:__init(offset, length) + parent.__init(self) + self.offset = offset + self.length = length or 1 + if not offset then + error('nn.NarrowTable(offset, length)') + end + + self.output = {} + self.gradInput = {} +end + +function NarrowTable:updateOutput(input) + for k,v in ipairs(self.output) do self.output[k] = nil end + for i=1,self.length do + self.output[i] = input[self.offset+i-1] + end + return self.output +end + +function NarrowTable:updateGradInput(input, gradOutput) + for i=1,#gradOutput do + self.gradInput[self.offset+i-1] = gradOutput[i] + end + for i=1,#input do + if (i < self.offset) or (i >= self.offset + self.length) then + self.gradInput[i] = nn.utils.recursiveResizeAs(self.gradInput[i], input[i]) + nn.utils.recursiveFill(self.gradInput[i], 0) + end + end + for i=#input+1,#self.gradInput do self.gradInput[i] = nil end + return self.gradInput +end + +function NarrowTable:type(type, tensorCache) + self.output = {} + self.gradInput = {} + return parent.type(self, type, tensorCache) +end + +NarrowTable.clearState = nn.Identity.clearState diff --git a/contrib/lua-torch/nn/Normalize.lua b/contrib/lua-torch/nn/Normalize.lua new file mode 100644 index 000000000..0937ebba9 --- /dev/null +++ b/contrib/lua-torch/nn/Normalize.lua @@ -0,0 +1,150 @@ +local Normalize, parent = torch.class('nn.Normalize', 'nn.Module') + +function Normalize:__init(p,eps) + parent.__init(self) + assert(p,'p-norm not provided') + assert(p > 0, p..'-norm not supported') + self.p = p + self.eps = eps or 1e-10 +end + +function Normalize:updateOutput(input) + assert(input:dim() <= 2, 'only 1d layer supported') + local input_size = input:size() + if input:dim() == 1 then + input = input:view(1,-1) + end + + self._output = self._output or input.new() + self.norm = self.norm or input.new() + self.buffer = self.buffer or input.new() + + self._output:resizeAs(input) + + if self.p == math.huge then + -- specialization for the infinity norm + if not self._indices then + if torch.typename(self.output):find('torch%.Cuda.*Tensor') then + self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor() + else + self._indices = torch.LongTensor() + end + end + + self.buffer:abs(input) + torch.max(self.norm, self._indices, self.buffer, 2) + self.norm:add(self.eps) + else + self.normp = self.normp or input.new() + if self.p % 2 ~= 0 then + self.buffer:abs(input):pow(self.p) + else + self.buffer:pow(input,self.p) + end + self.normp:sum(self.buffer,2):add(self.eps) + self.norm:pow(self.normp,1/self.p) + end + self._output:cdiv(input, self.norm:view(-1,1):expandAs(input)) + + self.output:view(self._output, input_size) + return self.output +end + +function Normalize:updateGradInput(input, gradOutput) + assert(input:dim() <= 2, 'only 1d layer supported') + assert(gradOutput:dim() <= 2, 'only 1d layer supported') + + local input_size = input:size() + if input:dim() == 1 then + input = input:view(1,-1) + end + + local n = input:size(1) -- batch size + local d = input:size(2) -- dimensionality of vectors + + self._gradInput = self._gradInput or input.new() + self.cross = self.cross or input.new() + -- compute diagonal term with gradOutput + self._gradInput:resize(n,d) + if self.p == math.huge then + -- specialization for the inf case + self._gradInput:cmul(self.norm:view(n,1,1):expand(n,d,1),gradOutput) + self.buffer:resizeAs(input):zero() + self.cross:resize(n,1) + self.cross:gather(input,2,self._indices) + self.cross:cdiv(self.norm) + self.buffer:scatter(2,self._indices,self.cross) + else + self._gradInput:cmul(self.normp:view(n,1):expand(n,d), gradOutput) + -- small optimizations for different p + -- buffer = input*|input|^(p-2) + if self.p % 2 ~= 0 then + -- for non-even p, need to add absolute value + if self.p < 2 then + -- add eps to avoid possible division by 0 + self.buffer:abs(input):add(self.eps):pow(self.p-2):cmul(input) + else + self.buffer:abs(input):pow(self.p-2):cmul(input) + end + elseif self.p == 2 then + -- special case for p == 2, pow(x,0) = 1 + self.buffer:copy(input) + else + -- p is even and > 2, pow(x,p) is always positive + self.buffer:pow(input,self.p-2):cmul(input) + end + end + -- compute cross term in two steps + self.cross:resize(n,1) + + -- instead of having a huge temporary matrix (b1*b2), + -- do the computations as b1*(b2*gradOutput). This avoids redundant + -- computation and also a huge buffer of size n*d^2 + self.buffer2 = self.buffer2 or input.new() -- nxd + self.buffer2:cmul(input, gradOutput) + self.cross:sum(self.buffer2, 2) + + self.buffer:cmul(self.cross:expandAs(self.buffer)) + self._gradInput:add(-1, self.buffer) + + -- reuse cross buffer for normalization + if self.p == math.huge then + self.cross:cmul(self.norm,self.norm) + else + self.cross:cmul(self.normp,self.norm) + end + self._gradInput:cdiv(self.cross:expand(n,d)) + + self.gradInput:view(self._gradInput, input_size) + return self.gradInput +end + +function Normalize:__tostring__() + local s + -- different prints if the norm is integer + if self.p % 1 == 0 then + s = '%s(%d)' + else + s = '%s(%f)' + end + return string.format(s,torch.type(self),self.p) +end + +function Normalize:type(type, tensorCache) + self._indices = nil + parent.type(self, type, tensorCache) + return self +end + +function Normalize:clearState() + nn.utils.clear(self, { + '_output', + '_indices', + '_gradInput', + 'buffer', + 'norm', + 'normp', + 'cross', + }) + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/OneHot.lua b/contrib/lua-torch/nn/OneHot.lua new file mode 100644 index 000000000..d1dc1b52d --- /dev/null +++ b/contrib/lua-torch/nn/OneHot.lua @@ -0,0 +1,69 @@ +local OneHot, parent = torch.class('nn.OneHot', 'nn.Module') + +-- adapted from https://github.com/karpathy/char-rnn +-- and https://github.com/hughperkins/char-lstm + +function OneHot:__init(outputSize) + parent.__init(self) + self.outputSize = outputSize +end + +function OneHot:updateOutput(input) + local size + if type(input) == 'number' then + if self:type() == 'torch.CudaTensor' then + self._single = self._single or torch.CudaTensor():resize(1); + else + self._single = self._single or torch.LongTensor():resize(1); + end + self._single[1] = input + input = self._single; + size = {} + else + size = input:size():totable() + end + table.insert(size, self.outputSize) + + self.output:resize(table.unpack(size)):zero() + + size[#size] = 1 + local input_ = input:view(table.unpack(size)) + + if torch.type(input) == 'torch.CudaTensor' or torch.type(input) == 'torch.ClTensor' then + self.output:scatter(self.output:dim(), input_, 1) + else + if torch.type(self.output) == 'torch.CudaTensor' then + -- input is not cuda, module is, cast input to cuda + self._input = self._input or torch.CudaTensor() + self._input:resize(input_:size()):copy(input_) + input_ = self._input + elseif torch.type(input) ~= 'torch.LongTensor' then + -- input is not long, module isnot cuda, cast input to long + self._input = self._input or torch.LongTensor() + self._input:resize(input_:size()):copy(input_) + input_ = self._input + end + self.output:scatter(self.output:dim(), input_, 1) + end + + return self.output +end + +function OneHot:updateGradInput(input, gradOutput) + if type(input) == 'number' then + return 0 + else + self.gradInput:resize(input:size()):zero() + return self.gradInput + end +end + +function OneHot:clearState() + self._single = nil + self._input = nil +end + +function OneHot:type(type, typecache) + self:clearState() + return parent.type(self, type, typecache) +end diff --git a/contrib/lua-torch/nn/PReLU.lua b/contrib/lua-torch/nn/PReLU.lua new file mode 100644 index 000000000..2e58fba4e --- /dev/null +++ b/contrib/lua-torch/nn/PReLU.lua @@ -0,0 +1,52 @@ +local PReLU, parent = torch.class('nn.PReLU','nn.Module') + +function PReLU:__init(nOutputPlane) + parent.__init(self) + -- if no argument provided, use shared model (weight is scalar) + self.nOutputPlane = nOutputPlane or 0 + self.weight = torch.Tensor(nOutputPlane or 1):fill(0.25) + self.gradWeight = torch.Tensor(nOutputPlane or 1) +end + +function PReLU:updateOutput(input) + input.THNN.PReLU_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + self.nOutputPlane + ) + return self.output +end + +function PReLU:updateGradInput(input, gradOutput) + input.THNN.PReLU_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.nOutputPlane + ) + return self.gradInput +end + +function PReLU:accGradParameters(input, gradOutput, scale) + self.gradWeightBuf = self.gradWeightBuf or input.new() + self.gradWeightBuf2 = self.gradWeightBuf2 or input.new() + input.THNN.PReLU_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.gradWeight:cdata(), + self.gradWeightBuf:cdata(), + self.gradWeightBuf2:cdata(), + self.nOutputPlane, + scale or 1 + ) + return self.gradWeight +end + +function PReLU:clearState() + nn.utils.clear(self, 'gradWeightBuf', 'gradWeightBuf2') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Padding.lua b/contrib/lua-torch/nn/Padding.lua new file mode 100644 index 000000000..d5f7771d0 --- /dev/null +++ b/contrib/lua-torch/nn/Padding.lua @@ -0,0 +1,65 @@ +local Padding, parent = torch.class('nn.Padding', 'nn.Module') + +-- pad puts in [pad] amount of [value] over dimension [dim], starting at index [index] in that dimension. If pad<0, index counts from the left. If pad>0 index counts from the right +-- index = 1 pads before index 1. index = 2 pads starting before index 2 and after index 1 in dimension [dim] +function Padding:__init(dim, pad, nInputDim, value, index) + self.value = value or 0 + self.index = index or 1 + self.dim = dim + self.pad = pad + self.nInputDim = nInputDim + self.outputSize = torch.LongStorage() + parent.__init(self) +end + +function Padding:updateOutput(input) + self.outputSize:resize(input:dim()) + self.outputSize:copy(input:size()) + local dim = self.dim + if self.nInputDim and input:dim() ~= self.nInputDim then + dim = dim + 1 + end + self.outputSize[dim] = self.outputSize[dim] + math.abs(self.pad) + self.output:resize(self.outputSize) + self.output:fill(self.value) + local index = self.index + local pad = self.pad + if pad > 0 then + index = input:size(dim) - index + 2 + else + pad = -pad + end + if index == 1 then + self.output:narrow(dim, 1 + pad, input:size(dim)):copy(input) + elseif index == input:size(dim) + 1 then + self.output:narrow(dim, 1, input:size(dim)):copy(input) + else + self.output:narrow(dim, 1, index - 1):copy(input:narrow(dim, 1, index - 1)) + self.output:narrow(dim, index + pad, input:size(dim) - (index - 1)):copy(input:narrow(dim, index, input:size(dim) - (index - 1))) + end + return self.output +end + +function Padding:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input) + local dim = self.dim + if self.nInputDim and input:dim() ~= self.nInputDim then + dim = dim + 1 + end + local index = self.index + local pad = self.pad + if pad > 0 then + index = input:size(dim) - index + 2 + else + pad = -pad + end + if index == 1 then + self.gradInput:copy(gradOutput:narrow(dim, 1 + pad, input:size(dim))) + elseif index == input:size(dim) + 1 then + self.gradInput:copy(gradOutput:narrow(dim, 1, input:size(dim))) + else + self.gradInput:narrow(dim, 1, index - 1):copy(gradOutput:narrow(dim, 1, index - 1)) + self.gradInput:narrow(dim, index, input:size(dim) - (index - 1)):copy(gradOutput:narrow(dim, index + pad, input:size(dim) - (index - 1))) + end + return self.gradInput +end diff --git a/contrib/lua-torch/nn/PairwiseDistance.lua b/contrib/lua-torch/nn/PairwiseDistance.lua new file mode 100644 index 000000000..99a502c16 --- /dev/null +++ b/contrib/lua-torch/nn/PairwiseDistance.lua @@ -0,0 +1,91 @@ +local PairwiseDistance, parent = torch.class('nn.PairwiseDistance', 'nn.Module') + +function PairwiseDistance:__init(p) + parent.__init(self) + + -- state + self.gradInput = {} + self.diff = torch.Tensor() + self.norm = p or 2 -- Default using Euclidean distance +end + +function PairwiseDistance:updateOutput(input) + self.output:resize(1) + if input[1]:dim() == 1 then + self.output:resize(1) + self.output[1]=input[1]:dist(input[2],self.norm) + elseif input[1]:dim() == 2 then + self.diff = self.diff or input[1].new() + self.diff:resizeAs(input[1]) + + local diff = self.diff:zero() + diff:add(input[1], -1, input[2]) + diff:abs() + + self.output:resize(input[1]:size(1)) + self.output:zero() + self.output:add(diff:pow(self.norm):sum(2)) + self.output:pow(1./self.norm) + else + error('input must be vector or matrix') + end + + return self.output +end + +local function mathsign(x) + if x==0 then return 2*torch.random(2)-3; end + if x>0 then return 1; else return -1; end +end + +function PairwiseDistance:updateGradInput(input, gradOutput) + if input[1]:dim() > 2 then + error('input must be vector or matrix') + end + + self.gradInput[1] = (self.gradInput[1] or input[1].new()):resize(input[1]:size()) + self.gradInput[2] = (self.gradInput[2] or input[2].new()):resize(input[2]:size()) + self.gradInput[1]:copy(input[1]) + self.gradInput[1]:add(-1, input[2]) + + if self.norm==1 then + self.gradInput[1]:apply(mathsign) + else + -- Note: derivative of p-norm: + -- d/dx_k(||x||_p) = (x_k * abs(x_k)^(p-2)) / (||x||_p)^(p-1) + if (self.norm > 2) then + self.gradInput[1]:cmul(self.gradInput[1]:clone():abs():pow(self.norm-2)) + end + + if (input[1]:dim() > 1) then + self.outExpand = self.outExpand or self.output.new() + self.outExpand:resize(self.output:size(1), 1) + self.outExpand:copy(self.output) + self.outExpand:add(1.0e-6) -- Prevent divide by zero errors + self.outExpand:pow(-(self.norm-1)) + self.gradInput[1]:cmul(self.outExpand:expand(self.gradInput[1]:size(1), + self.gradInput[1]:size(2))) + else + self.gradInput[1]:mul(math.pow(self.output[1] + 1e-6, -(self.norm-1))) + end + end + if input[1]:dim() == 1 then + self.gradInput[1]:mul(gradOutput[1]) + else + self.grad = self.grad or gradOutput.new() + self.ones = self.ones or gradOutput.new() + + self.grad:resizeAs(input[1]):zero() + self.ones:resize(input[1]:size(2)):fill(1) + + self.grad:addr(gradOutput, self.ones) + self.gradInput[1]:cmul(self.grad) + end + self.gradInput[2]:zero():add(-1, self.gradInput[1]) + return self.gradInput +end + +function PairwiseDistance:clearState() + nn.utils.clear(self, 'diff', 'outExpand', 'grad', 'ones') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Parallel.lua b/contrib/lua-torch/nn/Parallel.lua new file mode 100644 index 000000000..58cb9748e --- /dev/null +++ b/contrib/lua-torch/nn/Parallel.lua @@ -0,0 +1,116 @@ +local Parallel, parent = torch.class('nn.Parallel', 'nn.Container') + +function Parallel:__init(inputDimension,outputDimension) + parent.__init(self) + self.modules = {} + self.inputDimension = inputDimension + self.outputDimension = outputDimension +end + +function Parallel:updateOutput(input) + local nModule=input:size(self.inputDimension) + local outputs = {} + self.totalOutputSize = self.totalOutputSize or torch.LongStorage() + local totalOutputSize = self.totalOutputSize + + for i=1,nModule do + local currentInput = input:select(self.inputDimension,i) + local currentOutput = self:rethrowErrors(self.modules[i], i, 'updateOutput', currentInput) + table.insert(outputs, currentOutput) + local outputSize = currentOutput:size(self.outputDimension) + + if i == 1 then + totalOutputSize:resize(currentOutput:dim()):copy(currentOutput:size()) + else + totalOutputSize[self.outputDimension] = totalOutputSize[self.outputDimension] + outputSize + end + + end + self.output:resize(totalOutputSize) + + local offset = 1 + for i=1,nModule do + local currentOutput = outputs[i] + local outputSize = currentOutput:size(self.outputDimension) + self.output:narrow(self.outputDimension, offset, outputSize):copy(currentOutput) + offset = offset + currentOutput:size(self.outputDimension) + end + return self.output +end + +function Parallel:updateGradInput(input, gradOutput) + local nModule=input:size(self.inputDimension) + self.gradInput:resizeAs(input) + + local offset = 1 + for i=1,nModule do + local module=self.modules[i] + local currentInput = input:select(self.inputDimension,i) + local currentOutput = module.output + local outputSize = currentOutput:size(self.outputDimension) + local currentGradOutput = gradOutput:narrow(self.outputDimension, offset, outputSize) + + local currentGradInput = self:rethrowErrors(module, i, 'updateGradInput', currentInput, currentGradOutput) + + self.gradInput:select(self.inputDimension,i):copy(currentGradInput) + offset = offset + outputSize + end + return self.gradInput +end + +function Parallel:accGradParameters(input, gradOutput, scale) + local nModule=input:size(self.inputDimension) + + local offset = 1 + for i=1,nModule do + local module = self.modules[i] + local currentOutput = module.output + local outputSize = currentOutput:size(self.outputDimension) + + self:rethrowErrors(module, i, 'accGradParameters', + input:select(self.inputDimension,i), + gradOutput:narrow(self.outputDimension, offset,outputSize), + scale) + + offset = offset + outputSize + end +end + +function Parallel:accUpdateGradParameters(input, gradOutput, lr) + local nModule=input:size(self.inputDimension) + + local offset = 1 + for i=1,nModule do + local module = self.modules[i]; + local currentOutput = module.output + self:rethrowErrors(module, i, 'accUpdateGradParameters', + input:select(self.inputDimension,i), + gradOutput:narrow(self.outputDimension, offset, + currentOutput:size(self.outputDimension)), + lr) + + offset = offset + currentOutput:size(self.outputDimension) + end +end + +function Parallel:__tostring__() + local tab = ' ' + local line = '\n' + local next = ' |`-> ' + local lastNext = ' `-> ' + local ext = ' | ' + local extlast = ' ' + local last = ' ... -> ' + local str = torch.type(self) + str = str .. ' {' .. line .. tab .. 'input' + for i=1,#self.modules do + if i == #self.modules then + str = str .. line .. tab .. lastNext .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. extlast) + else + str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. ext) + end + end + str = str .. line .. tab .. last .. 'output' + str = str .. line .. '}' + return str +end diff --git a/contrib/lua-torch/nn/ParallelCriterion.lua b/contrib/lua-torch/nn/ParallelCriterion.lua new file mode 100644 index 000000000..45607d5c3 --- /dev/null +++ b/contrib/lua-torch/nn/ParallelCriterion.lua @@ -0,0 +1,41 @@ +local ParallelCriterion, parent = torch.class('nn.ParallelCriterion', 'nn.Criterion') + +function ParallelCriterion:__init(repeatTarget) + parent.__init(self) + self.criterions = {} + self.weights = {} + self.gradInput = {} + self.repeatTarget = repeatTarget +end + +function ParallelCriterion:add(criterion, weight) + assert(criterion, 'no criterion provided') + weight = weight or 1 + table.insert(self.criterions, criterion) + table.insert(self.weights, weight) + return self +end + +function ParallelCriterion:updateOutput(input, target) + self.output = 0 + for i,criterion in ipairs(self.criterions) do + local target = self.repeatTarget and target or target[i] + self.output = self.output + self.weights[i]*criterion:updateOutput(input[i],target) + end + return self.output +end + +function ParallelCriterion:updateGradInput(input, target) + self.gradInput = nn.utils.recursiveResizeAs(self.gradInput, input) + nn.utils.recursiveFill(self.gradInput, 0) + for i,criterion in ipairs(self.criterions) do + local target = self.repeatTarget and target or target[i] + nn.utils.recursiveAdd(self.gradInput[i], self.weights[i], criterion:updateGradInput(input[i], target)) + end + return self.gradInput +end + +function ParallelCriterion:type(type, tensorCache) + self.gradInput = {} + return parent.type(self, type, tensorCache) +end diff --git a/contrib/lua-torch/nn/ParallelTable.lua b/contrib/lua-torch/nn/ParallelTable.lua new file mode 100644 index 000000000..2fe0899dd --- /dev/null +++ b/contrib/lua-torch/nn/ParallelTable.lua @@ -0,0 +1,58 @@ +local ParallelTable, parent = torch.class('nn.ParallelTable', 'nn.Container') + +function ParallelTable:__init() + parent.__init(self) + self.modules = {} + self.output = {} + self.gradInput = {} +end + +function ParallelTable:updateOutput(input) + for i=1,#self.modules do + self.output[i] = self:rethrowErrors(self.modules[i], i, 'updateOutput', input[i]) + end + return self.output +end + +function ParallelTable:updateGradInput(input, gradOutput) + for i,module in ipairs(self.modules) do + self.gradInput[i] = self:rethrowErrors(module, i, 'updateGradInput', input[i], gradOutput[i]) + end + return self.gradInput +end + +function ParallelTable:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + for i,module in ipairs(self.modules) do + self:rethrowErrors(module, i, 'accGradParameters', input[i], gradOutput[i], scale) + end +end + +function ParallelTable:accUpdateGradParameters(input, gradOutput, lr) + lr = lr or 1 + for i,module in ipairs(self.modules) do + self:rethrowErrors(module, i, 'accUpdateGradParameters', input[i], gradOutput[i], lr) + end +end + +function ParallelTable:__tostring__() + local tab = ' ' + local line = '\n' + local next = ' |`-> ' + local lastNext = ' `-> ' + local ext = ' | ' + local extlast = ' ' + local last = ' ... -> ' + local str = torch.type(self) + str = str .. ' {' .. line .. tab .. 'input' + for i=1,#self.modules do + if i == #self.modules then + str = str .. line .. tab .. lastNext .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. extlast) + else + str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. ext) + end + end + str = str .. line .. tab .. last .. 'output' + str = str .. line .. '}' + return str +end diff --git a/contrib/lua-torch/nn/PartialLinear.lua b/contrib/lua-torch/nn/PartialLinear.lua new file mode 100644 index 000000000..6e92cfc08 --- /dev/null +++ b/contrib/lua-torch/nn/PartialLinear.lua @@ -0,0 +1,114 @@ +local PartialLinear, Module = torch.class('nn.PartialLinear', 'nn.Module') + +--[[ + +PartialLinear is a Linear layer that allows the user to a set a collection of +column indices. When the column indices are set, the layer will behave like a +Linear layer that only has those columns. Meanwhile, all parameters are +preserved, so resetting the PartialLinear layer will result in a module that +behaves just like a regular Linear layer. + +This module is useful, for instance, when you want to do forward-backward on +only a subset of a Linear layer during training but use the full Linear layer +at test time. + +]]-- + +function PartialLinear:__init(inputsize, outputsize, bias) + local bias = ((bias == nil) and true) or bias + Module.__init(self) + + -- define the layer as a small network: + local pt = nn.ParallelTable() + pt:add(nn.Identity()):add(nn.LookupTable(outputsize, inputsize)) + self.network = nn.Sequential():add(pt):add(nn.MM(false, true)) + if bias then + self.bias = torch.Tensor(1, outputsize):zero() + self.gradBias = torch.Tensor(1, outputsize):zero() + end + + -- set partition: + self.inputsize = inputsize + self.outputsize = outputsize + self.allcolumns = torch.range(1, self.outputsize) + self:resetPartition() +end + +function PartialLinear:setPartition(indices) + self.partition = indices:type(self.allcolumns:type()) +end + +function PartialLinear:resetPartition() + self.partition = self.allcolumns +end + +function PartialLinear:parameters() + return {self.network:get(1):get(2).weight, self.bias}, + {self.network:get(1):get(2).gradWeight, self.gradBias} +end -- should return only the relevant partition? + +function PartialLinear:updateOutput(input) + self.output:set(self.network:forward{input, self.partition}) + if self.bias then + self.output:add( + self.bias:index(2, self.partition:long()):expandAs(self.output) + ) + self.addBuffer = self.addBuffer or input.new() + if self.addBuffer:nElement() ~= input:size(1) then + self.addBuffer:resize(input:size(1)):fill(1) + end + end + return self.output +end + +function PartialLinear:updateGradInput(input, gradOutput) + if self.gradInput then + self.network:updateGradInput({input, self.partition}, gradOutput) + self.gradInput:set(self.network.gradInput[1]) + end + return self.gradInput +end + +function PartialLinear:accGradParameters(input, gradOutput, scale) + local scale = scale or 1 + self.network:accGradParameters({input, self.partition}, gradOutput, scale) + if self.bias then + self.buffer = self.buffer or input.new() + self.buffer:resize(gradOutput:size(2)) + self.buffer:mv(gradOutput:t(), self.addBuffer):mul(scale) + self.gradBias:indexAdd( + 2, self.partition:long(), self.buffer:view(1, self.buffer:nElement()) + ) + end +end + +function PartialLinear:accUpdateGradParameters(input, gradOutput, lr) + local gradWeight = self.network:get(1):get(2).gradWeight + local gradBias = self.gradBias + self.network:get(1):get(2).gradWeight = self.network:get(1):get(2).weight + self.gradBias = self.bias + self:accGradParameters(input, gradOutput, -lr) + self.network:get(1):get(2).gradWeight = gradWeight + self.gradBias = gradBias +end + +function PartialLinear:zeroGradParameters() + self.network:zeroGradParameters() + self.gradBias:zero() +end + +function PartialLinear:updateParameters(learningRate) + self.network:updateParameters(learningRate) + self.bias:add(-learningRate, self.gradBias) +end + +function PartialLinear:sharedAccUpdateGradParameters(input, gradOutput, lr) + -- we do not need to accumulate parameters when sharing: + self:defaultAccUpdateGradParameters(input, gradOutput, lr) +end + +function PartialLinear:__tostring__() + return torch.type(self) .. + string.format('(%d -> %d)', self.inputsize, self.outputsize) .. + (self.bias == nil and ' without bias' or '') +end diff --git a/contrib/lua-torch/nn/PixelShuffle.lua b/contrib/lua-torch/nn/PixelShuffle.lua new file mode 100644 index 000000000..dd58ed948 --- /dev/null +++ b/contrib/lua-torch/nn/PixelShuffle.lua @@ -0,0 +1,111 @@ +local PixelShuffle, parent = torch.class("nn.PixelShuffle", "nn.Module") + +-- Shuffles pixels after upscaling with a ESPCNN model +-- Converts a [batch x channel*r^2 x m x p] tensor to [batch x channel x r*m x r*p] +-- tensor, where r is the upscaling factor. +-- @param upscaleFactor - the upscaling factor to use +function PixelShuffle:__init(upscaleFactor) + parent.__init(self) + self.upscaleFactor = upscaleFactor + self.upscaleFactorSquared = self.upscaleFactor * self.upscaleFactor +end + +-- Computes the forward pass of the layer i.e. Converts a +-- [batch x channel*r^2 x m x p] tensor to [batch x channel x r*m x r*p] tensor. +-- @param input - the input tensor to be shuffled of size [b x c*r^2 x m x p] +-- @return output - the shuffled tensor of size [b x c x r*m x r*p] +function PixelShuffle:updateOutput(input) + self._intermediateShape = self._intermediateShape or torch.LongStorage(6) + self._outShape = self.outShape or torch.LongStorage() + self._shuffleOut = self._shuffleOut or input.new() + + local batched = false + local batchSize = 1 + local inputStartIdx = 1 + local outShapeIdx = 1 + if input:nDimension() == 4 then + batched = true + batchSize = input:size(1) + inputStartIdx = 2 + outShapeIdx = 2 + self._outShape:resize(4) + self._outShape[1] = batchSize + else + self._outShape:resize(3) + end + + --input is of size h/r w/r, rc output should be h, r, c + local channels = input:size(inputStartIdx) / self.upscaleFactorSquared + local inHeight = input:size(inputStartIdx + 1) + local inWidth = input:size(inputStartIdx + 2) + + self._intermediateShape[1] = batchSize + self._intermediateShape[2] = channels + self._intermediateShape[3] = self.upscaleFactor + self._intermediateShape[4] = self.upscaleFactor + self._intermediateShape[5] = inHeight + self._intermediateShape[6] = inWidth + + self._outShape[outShapeIdx] = channels + self._outShape[outShapeIdx + 1] = inHeight * self.upscaleFactor + self._outShape[outShapeIdx + 2] = inWidth * self.upscaleFactor + + local inputView = torch.view(input, self._intermediateShape) + + self._shuffleOut:resize(inputView:size(1), inputView:size(2), inputView:size(5), + inputView:size(3), inputView:size(6), inputView:size(4)) + self._shuffleOut:copy(inputView:permute(1, 2, 5, 3, 6, 4)) + + self.output = torch.view(self._shuffleOut, self._outShape) + + return self.output +end + +-- Computes the backward pass of the layer, given the gradient w.r.t. the output +-- this function computes the gradient w.r.t. the input. +-- @param input - the input tensor of shape [b x c*r^2 x m x p] +-- @param gradOutput - the tensor with the gradients w.r.t. output of shape [b x c x r*m x r*p] +-- @return gradInput - a tensor of the same shape as input, representing the gradient w.r.t. input. +function PixelShuffle:updateGradInput(input, gradOutput) + self._intermediateShape = self._intermediateShape or torch.LongStorage(6) + self._shuffleIn = self._shuffleIn or input.new() + + local batchSize = 1 + local inputStartIdx = 1 + if input:nDimension() == 4 then + batchSize = input:size(1) + inputStartIdx = 2 + end + + local channels = input:size(inputStartIdx) / self.upscaleFactorSquared + local height = input:size(inputStartIdx + 1) + local width = input:size(inputStartIdx + 2) + + self._intermediateShape[1] = batchSize + self._intermediateShape[2] = channels + self._intermediateShape[3] = height + self._intermediateShape[4] = self.upscaleFactor + self._intermediateShape[5] = width + self._intermediateShape[6] = self.upscaleFactor + + local gradOutputView = torch.view(gradOutput, self._intermediateShape) + + self._shuffleIn:resize(gradOutputView:size(1), gradOutputView:size(2), gradOutputView:size(4), + gradOutputView:size(6), gradOutputView:size(3), gradOutputView:size(5)) + self._shuffleIn:copy(gradOutputView:permute(1, 2, 4, 6, 3, 5)) + + self.gradInput = torch.view(self._shuffleIn, input:size()) + + return self.gradInput +end + + +function PixelShuffle:clearState() + nn.utils.clear(self, { + "_intermediateShape", + "_outShape", + "_shuffleIn", + "_shuffleOut", + }) + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Power.lua b/contrib/lua-torch/nn/Power.lua new file mode 100644 index 000000000..771183c48 --- /dev/null +++ b/contrib/lua-torch/nn/Power.lua @@ -0,0 +1,22 @@ +local Power, parent = torch.class('nn.Power','nn.Module') + +function Power:__init(p) + parent.__init(self) + self.pow = p + if not p then + error('nn.Power(power)') + end +end + +function Power:updateOutput(input) + self.output:resizeAs(input):copy(input) + self.output:pow(self.pow) + return self.output +end + +function Power:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input):copy(input) + self.gradInput:pow(self.pow - 1) + self.gradInput:cmul(gradOutput):mul(self.pow) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/PrintSize.lua b/contrib/lua-torch/nn/PrintSize.lua new file mode 100644 index 000000000..d8dc91bff --- /dev/null +++ b/contrib/lua-torch/nn/PrintSize.lua @@ -0,0 +1,36 @@ +local PrintSize, parent = torch.class('nn.PrintSize', 'nn.Module') + +function PrintSize:__init(prefix) + parent.__init(self) + self.prefix = prefix or "PrintSize" +end + +function PrintSize:updateOutput(input) + self.output = input + local size + if torch.type(input) == 'table' then + size = input + elseif torch.type(input) == 'nil' then + size = 'missing size' + else + size = input:size() + end + print(self.prefix..":input\n", size) + return self.output +end + + +function PrintSize:updateGradInput(input, gradOutput) + local size + if torch.type(gradOutput) == 'table' then + size = gradOutput + elseif torch.type(gradOutput) == 'nil' then + size = 'missing size' + else + size = gradOutput:size() + end + print(self.prefix..":gradOutput\n", size) + self.gradInput = gradOutput + return self.gradInput +end + diff --git a/contrib/lua-torch/nn/Profile.lua b/contrib/lua-torch/nn/Profile.lua new file mode 100644 index 000000000..36cd909cd --- /dev/null +++ b/contrib/lua-torch/nn/Profile.lua @@ -0,0 +1,55 @@ +local ProfileModule, parent = torch.class("nn.Profile", "nn.Decorator") + +function ProfileModule:__init(module, print_interval, name) + parent.__init(self, module) + self.print_interval = print_interval or 100 + self.name = name or torch.type(module) + self.module = module + self.numFwds = 0 + self.numBwds = 0 + self.summedFwdTime = 0 + self.summedBwdTime = 0 + self.timer = torch.Timer() +end + +function ProfileModule:updateOutput(input) + self.timer:reset() + self.output = self.module:updateOutput(input) + self.summedFwdTime = self.summedFwdTime + self.timer:time().real + self.numFwds = self.numFwds + 1 + if self.numFwds % self.print_interval == 0 then + print (string.format('%s took %.3f seconds for %d forward passes', + self.name, self.summedFwdTime, self.print_interval)) + self.numFwds = 0 + self.summedFwdTime = 0 + end + return self.output +end + +function ProfileModule:updateGradInput(input, gradOutput) + self.timer:reset() + self.gradInput = self.module:updateGradInput(input, gradOutput) + self.summedBwdTime = self.summedBwdTime + self.timer:time().real + self.numBwds = self.numBwds + 1 + if self.numBwds % self.print_interval == 0 then + print (string.format('%s took %.3f seconds for %d backward passes', + self.name, self.summedBwdTime, self.print_interval)) + self.numBwds = 0 + self.summedBwdTime = 0 + end + return self.gradInput +end + +local function makeTorchTimerSerializable() + -- The Timer object part of this class needs to be serializable + -- so that the layer can be saved, cloned, etc. We add a dummy + -- serialization of torch.Timer that just creates a new instance at read + local timerMetatable = getmetatable(torch.Timer()) + timerMetatable['__factory'] = torch.Timer + timerMetatable['write'] = function(object, file) end + timerMetatable['read'] = function(object, file, versionNumber) + return object + end +end + +makeTorchTimerSerializable() diff --git a/contrib/lua-torch/nn/README.md b/contrib/lua-torch/nn/README.md new file mode 100644 index 000000000..6efd60962 --- /dev/null +++ b/contrib/lua-torch/nn/README.md @@ -0,0 +1,21 @@ +[![Build Status](https://travis-ci.org/torch/nn.svg?branch=master)](https://travis-ci.org/torch/nn) +<a name="nn.dok"></a> +# Neural Network Package # + +This package provides an easy and modular way to build and train simple or complex neural networks using [Torch](https://github.com/torch/torch7/blob/master/README.md): + * Modules are the bricks used to build neural networks. Each are themselves neural networks, but can be combined with other networks using containers to create complex neural networks: + * [Module](doc/module.md#nn.Module): abstract class inherited by all modules; + * [Containers](doc/containers.md#nn.Containers): composite and decorator classes like [`Sequential`](doc/containers.md#nn.Sequential), [`Parallel`](doc/containers.md#nn.Parallel), [`Concat`](doc/containers.md#nn.Concat) and [`NaN`](doc/containers.md#nn.NaN); + * [Transfer functions](doc/transfer.md#nn.transfer.dok): non-linear functions like [`Tanh`](doc/transfer.md#nn.Tanh) and [`Sigmoid`](doc/transfer.md#nn.Sigmoid); + * [Simple layers](doc/simple.md#nn.simplelayers.dok): like [`Linear`](doc/simple.md#nn.Linear), [`Mean`](doc/simple.md#nn.Mean), [`Max`](doc/simple.md#nn.Max) and [`Reshape`](doc/simple.md#nn.Reshape); + * [Table layers](doc/table.md#nn.TableLayers): layers for manipulating `table`s like [`SplitTable`](doc/table.md#nn.SplitTable), [`ConcatTable`](doc/table.md#nn.ConcatTable) and [`JoinTable`](doc/table.md#nn.JoinTable); + * [Convolution layers](doc/convolution.md#nn.convlayers.dok): [`Temporal`](doc/convolution.md#nn.TemporalModules), [`Spatial`](doc/convolution.md#nn.SpatialModules) and [`Volumetric`](doc/convolution.md#nn.VolumetricModules) convolutions; + * Criterions compute a gradient according to a given loss function given an input and a target: + * [Criterions](doc/criterion.md#nn.Criterions): a list of all criterions, including [`Criterion`](doc/criterion.md#nn.Criterion), the abstract class; + * [`MSECriterion`](doc/criterion.md#nn.MSECriterion): the Mean Squared Error criterion used for regression; + * [`ClassNLLCriterion`](doc/criterion.md#nn.ClassNLLCriterion): the Negative Log Likelihood criterion used for classification; + * Additional documentation: + * [Overview](doc/overview.md#nn.overview.dok) of the package essentials including modules, containers and training; + * [Training](doc/training.md#nn.traningneuralnet.dok): how to train a neural network using [`StochasticGradient`](doc/training.md#nn.StochasticGradient); + * [Testing](doc/testing.md): how to test your modules. + * [Experimental Modules](https://github.com/clementfarabet/lua---nnx/blob/master/README.md): a package containing experimental modules and criteria. diff --git a/contrib/lua-torch/nn/RReLU.lua b/contrib/lua-torch/nn/RReLU.lua new file mode 100644 index 000000000..843415f7e --- /dev/null +++ b/contrib/lua-torch/nn/RReLU.lua @@ -0,0 +1,50 @@ +local ffi = require 'ffi' +local RReLU, parent = torch.class('nn.RReLU', 'nn.Module') + +function RReLU:__init(l, u, ip) + parent.__init(self) + self.lower = l or 1/8 + self.upper = u or 1/3 + assert(self.lower <= self.upper and self.lower >= 0 and self.upper >= 0) + self.noise = torch.Tensor() + self.train = true + self.inplace = ip or false +end + +function RReLU:updateOutput(input) + local gen = ffi.typeof('THGenerator**')(torch._gen)[0] + input.THNN.RReLU_updateOutput( + input:cdata(), + self.output:cdata(), + self.noise:cdata(), + self.lower, + self.upper, + self.train, + self.inplace, + gen + ) + return self.output +end + +function RReLU:updateGradInput(input, gradOutput) + input.THNN.RReLU_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.noise:cdata(), + self.lower, + self.upper, + self.train, + self.inplace + ) + return self.gradInput +end + +function RReLU:__tostring__() + return string.format('%s (l:%f, u:%f)', torch.type(self), self.lower, self.upper) +end + +function RReLU:clearState() + if self.noise then self.noise:set() end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/ReLU.lua b/contrib/lua-torch/nn/ReLU.lua new file mode 100644 index 000000000..a6eb271ee --- /dev/null +++ b/contrib/lua-torch/nn/ReLU.lua @@ -0,0 +1,5 @@ +local ReLU, Parent = torch.class('nn.ReLU', 'nn.Threshold') + +function ReLU:__init(p) + Parent.__init(self,0,0,p) +end diff --git a/contrib/lua-torch/nn/ReLU6.lua b/contrib/lua-torch/nn/ReLU6.lua new file mode 100644 index 000000000..1cde00b46 --- /dev/null +++ b/contrib/lua-torch/nn/ReLU6.lua @@ -0,0 +1,32 @@ +local ReLU6, parent = torch.class('nn.ReLU6', 'nn.Module') + +function ReLU6:__init(inplace) + parent.__init(self) + + if inplace == nil then + self.inplace = false + else + self.inplace = inplace + end + + if (inplace and type(inplace) ~= 'boolean') then + error('in-place flag must be boolean') + end +end + +function ReLU6:updateOutput(input) + input.THNN.HardTanh_updateOutput( + input:cdata(), + self.output:cdata(), + 0, 6, self.inplace) + return self.output +end + +function ReLU6:updateGradInput(input, gradOutput) + input.THNN.HardTanh_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + 0, 6, self.inplace) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Replicate.lua b/contrib/lua-torch/nn/Replicate.lua new file mode 100644 index 000000000..c7dedd767 --- /dev/null +++ b/contrib/lua-torch/nn/Replicate.lua @@ -0,0 +1,57 @@ +local Replicate, parent = torch.class('nn.Replicate','nn.Module') + +function Replicate:__init(nf, dim, ndim) + parent.__init(self) + self.nfeatures = nf + self.dim = dim or 1 + self.ndim = ndim + assert(self.dim > 0, "Can only replicate across positive integer dimensions.") +end + +function Replicate:updateOutput(input) + self.dim = self.dim or 1 --backwards compatible + assert( + self.dim <= input:dim()+1, + "Not enough input dimensions to replicate along dimension " .. + tostring(self.dim) .. ".") + local batchOffset = self.ndim and input:dim() > self.ndim and 1 or 0 + local rdim = self.dim + batchOffset + local sz = torch.LongStorage(input:dim()+1) + sz[rdim] = self.nfeatures + for i = 1,input:dim() do + local offset = 0 + if i >= rdim then + offset = 1 + end + sz[i+offset] = input:size(i) + end + local st = torch.LongStorage(input:dim()+1) + st[rdim] = 0 + for i = 1,input:dim() do + local offset = 0 + if i >= rdim then + offset = 1 + end + st[i+offset] = input:stride(i) + end + self.output:set(input:storage(),input:storageOffset(),sz,st) + return self.output +end + +function Replicate:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input):zero() + local batchOffset = self.ndim and input:dim() > self.ndim and 1 or 0 + local rdim = self.dim + batchOffset + local sz = torch.LongStorage(input:dim()+1) + sz[rdim] = 1 + for i = 1,input:dim() do + local offset = 0 + if i >= rdim then + offset = 1 + end + sz[i+offset] = input:size(i) + end + local gradInput = self.gradInput:view(sz) + gradInput:sum(gradOutput, rdim) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Reshape.lua b/contrib/lua-torch/nn/Reshape.lua new file mode 100644 index 000000000..d508369fa --- /dev/null +++ b/contrib/lua-torch/nn/Reshape.lua @@ -0,0 +1,72 @@ +local Reshape, parent = torch.class('nn.Reshape', 'nn.Module') + +function Reshape:__init(...) + parent.__init(self) + local arg = {...} + + self.size = torch.LongStorage() + self.batchsize = torch.LongStorage() + if torch.type(arg[#arg]) == 'boolean' then + self.batchMode = arg[#arg] + table.remove(arg, #arg) + end + local n = #arg + if n == 1 and torch.typename(arg[1]) == 'torch.LongStorage' then + self.size:resize(#arg[1]):copy(arg[1]) + else + self.size:resize(n) + for i=1,n do + self.size[i] = arg[i] + end + end + + self.nelement = 1 + self.batchsize:resize(#self.size+1) + for i=1,#self.size do + self.nelement = self.nelement * self.size[i] + self.batchsize[i+1] = self.size[i] + end +end + +function Reshape:updateOutput(input) + if not input:isContiguous() then + self._input = self._input or input.new() + self._input:resizeAs(input) + self._input:copy(input) + input = self._input + end + + if (self.batchMode == false) or ( + (self.batchMode == nil) and + (input:nElement() == self.nelement and input:size(1) ~= 1) + ) then + self.output:view(input, self.size) + else + self.batchsize[1] = input:size(1) + self.output:view(input, self.batchsize) + end + return self.output +end + +function Reshape:updateGradInput(input, gradOutput) + if not gradOutput:isContiguous() then + self._gradOutput = self._gradOutput or gradOutput.new() + self._gradOutput:resizeAs(gradOutput) + self._gradOutput:copy(gradOutput) + gradOutput = self._gradOutput + end + + self.gradInput:viewAs(gradOutput, input) + return self.gradInput +end + + +function Reshape:__tostring__() + return torch.type(self) .. '(' .. + table.concat(self.size:totable(), 'x') .. ')' +end + +function Reshape:clearState() + nn.utils.clear(self, '_input', '_gradOutput') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/Select.lua b/contrib/lua-torch/nn/Select.lua new file mode 100644 index 000000000..be87c6465 --- /dev/null +++ b/contrib/lua-torch/nn/Select.lua @@ -0,0 +1,24 @@ +local Select, parent = torch.class('nn.Select', 'nn.Module') + +function Select:__init(dimension,index) + parent.__init(self) + self.dimension = dimension + self.index = index +end + +function Select:updateOutput(input) + local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension + local index = self.index < 0 and input:size(dim) + self.index + 1 or self.index + local output = input:select(dim, index); + self.output:resizeAs(output) + return self.output:copy(output) +end + +function Select:updateGradInput(input, gradOutput) + local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension + local index = self.index < 0 and input:size(dim) + self.index + 1 or self.index + self.gradInput:resizeAs(input) + self.gradInput:zero() + self.gradInput:select(dim,index):copy(gradOutput) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SelectTable.lua b/contrib/lua-torch/nn/SelectTable.lua new file mode 100644 index 000000000..ef26f3507 --- /dev/null +++ b/contrib/lua-torch/nn/SelectTable.lua @@ -0,0 +1,71 @@ +local SelectTable, parent = torch.class('nn.SelectTable', 'nn.Module') + +function SelectTable:__init(index) + parent.__init(self) + self.index = index + self.gradInput = {} +end + +function SelectTable:updateOutput(input) + + -- handle negative indices + local index = self.index + if type(index) == "number" then + index = index < 0 and #input + index + 1 or index + end + + assert(input[index], "index does not exist in the input table") + self.output = input[index] + + return self.output +end + +local function zeroTableCopy(t1, t2) + for k, v in pairs(t2) do + if (torch.type(v) == "table") then + t1[k] = zeroTableCopy(t1[k] or {}, t2[k]) + elseif torch.isTensor(v) then + if not t1[k] then + t1[k] = v:clone():zero() + else + t1[k]:resizeAs(v) + t1[k]:zero() + end + else + t1[k] = nil + end + end + for k, v in pairs(t1) do + if not t2[k] then + t1[k] = nil + end + end + return t1 +end + +function SelectTable:updateGradInput(input, gradOutput) + -- make gradInput a zeroed copy of input + zeroTableCopy(self.gradInput, input) + -- handle negative indices + local index = self.index + if type(index) == "number" then + index = index < 0 and #input + index + 1 or index + end + -- copy into gradInput[index] (necessary for variable sized inputs) + assert(self.gradInput[index]) + nn.utils.recursiveCopy(self.gradInput[index], gradOutput) + + return self.gradInput +end + +function SelectTable:type(type, tensorCache) + self.gradInput = {} + self.output = {} + return parent.type(self, type, tensorCache) +end + +function SelectTable:__tostring__() + return torch.type(self) .. '(' .. self.index .. ')' +end + +SelectTable.clearState = nn.Identity.clearState diff --git a/contrib/lua-torch/nn/Sequential.lua b/contrib/lua-torch/nn/Sequential.lua new file mode 100644 index 000000000..22b0886b8 --- /dev/null +++ b/contrib/lua-torch/nn/Sequential.lua @@ -0,0 +1,122 @@ +local Sequential, _ = torch.class('nn.Sequential', 'nn.Container') + +function Sequential:__len() + return #self.modules +end + +function Sequential:add(module) + if #self.modules == 0 then + self.gradInput = module.gradInput + end + table.insert(self.modules, module) + self.output = module.output + return self +end + +function Sequential:insert(module, index) + index = index or (#self.modules + 1) + if index > (#self.modules + 1) or index < 1 then + error"index should be contiguous to existing modules" + end + table.insert(self.modules, index, module) + self.output = self.modules[#self.modules].output + self.gradInput = self.modules[1].gradInput +end + +function Sequential:remove(index) + index = index or #self.modules + if index > #self.modules or index < 1 then + error"index out of range" + end + table.remove(self.modules, index) + if #self.modules > 0 then + self.output = self.modules[#self.modules].output + self.gradInput = self.modules[1].gradInput + else + self.output = torch.Tensor() + self.gradInput = torch.Tensor() + end +end + +function Sequential:updateOutput(input) + local currentOutput = input + for i=1,#self.modules do + currentOutput = self:rethrowErrors(self.modules[i], i, 'updateOutput', currentOutput) + end + self.output = currentOutput + return currentOutput +end + +function Sequential:updateGradInput(input, gradOutput) + local currentGradOutput = gradOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + currentGradOutput = self:rethrowErrors(currentModule, i+1, 'updateGradInput', previousModule.output, currentGradOutput) + currentModule = previousModule + end + currentGradOutput = self:rethrowErrors(currentModule, 1, 'updateGradInput', input, currentGradOutput) + self.gradInput = currentGradOutput + return currentGradOutput +end + +function Sequential:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + + local currentGradOutput = gradOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + self:rethrowErrors(currentModule, i+1, 'accGradParameters', previousModule.output, currentGradOutput, scale) + currentGradOutput = currentModule.gradInput + currentModule = previousModule + end + + self:rethrowErrors(currentModule, 1, 'accGradParameters', input, currentGradOutput, scale) +end + +function Sequential:backward(input, gradOutput, scale) + scale = scale or 1 + local currentGradOutput = gradOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + currentGradOutput = self:rethrowErrors(currentModule, i+1, 'backward', previousModule.output, currentGradOutput, scale) + currentModule.gradInput = currentGradOutput + currentModule = previousModule + end + currentGradOutput = self:rethrowErrors(currentModule, 1, 'backward', input, currentGradOutput, scale) + self.gradInput = currentGradOutput + return currentGradOutput +end + +function Sequential:accUpdateGradParameters(input, gradOutput, lr) + local currentGradOutput = gradOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + self:rethrowErrors(currentModule, i+1, 'accUpdateGradParameters', previousModule.output, currentGradOutput, lr) + currentGradOutput = currentModule.gradInput + currentModule = previousModule + end + + self:rethrowErrors(currentModule, 1, 'accUpdateGradParameters', input, currentGradOutput, lr) +end + + +function Sequential:__tostring__() + local tab = ' ' + local line = '\n' + local next = ' -> ' + local str = 'nn.Sequential' + str = str .. ' {' .. line .. tab .. '[input' + for i=1,#self.modules do + str = str .. next .. '(' .. i .. ')' + end + str = str .. next .. 'output]' + for i=1,#self.modules do + str = str .. line .. tab .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab) + end + str = str .. line .. '}' + return str +end diff --git a/contrib/lua-torch/nn/Sigmoid.lua b/contrib/lua-torch/nn/Sigmoid.lua new file mode 100644 index 000000000..0126f6f8f --- /dev/null +++ b/contrib/lua-torch/nn/Sigmoid.lua @@ -0,0 +1,19 @@ +local Sigmoid = torch.class('nn.Sigmoid', 'nn.Module') + +function Sigmoid:updateOutput(input) + input.THNN.Sigmoid_updateOutput( + input:cdata(), + self.output:cdata() + ) + return self.output +end + +function Sigmoid:updateGradInput(input, gradOutput) + input.THNN.Sigmoid_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SmoothL1Criterion.lua b/contrib/lua-torch/nn/SmoothL1Criterion.lua new file mode 100644 index 000000000..be636a94c --- /dev/null +++ b/contrib/lua-torch/nn/SmoothL1Criterion.lua @@ -0,0 +1,32 @@ +local SmoothL1Criterion, parent = torch.class('nn.SmoothL1Criterion', 'nn.Criterion') + +function SmoothL1Criterion:__init(sizeAverage) + parent.__init(self) + if sizeAverage ~= nil then + self.sizeAverage = sizeAverage + else + self.sizeAverage = true + end +end + +function SmoothL1Criterion:updateOutput(input, target) + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.SmoothL1Criterion_updateOutput( + input:cdata(), + target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage + ) + self.output = self.output_tensor[1] + return self.output +end + +function SmoothL1Criterion:updateGradInput(input, target) + input.THNN.SmoothL1Criterion_updateGradInput( + input:cdata(), + target:cdata(), + self.gradInput:cdata(), + self.sizeAverage + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SoftMarginCriterion.lua b/contrib/lua-torch/nn/SoftMarginCriterion.lua new file mode 100644 index 000000000..96ccda8a4 --- /dev/null +++ b/contrib/lua-torch/nn/SoftMarginCriterion.lua @@ -0,0 +1,24 @@ +local SoftMarginCriterion, parent = torch.class('nn.SoftMarginCriterion', 'nn.Criterion') + +function SoftMarginCriterion:__init() + parent.__init(self) + self.sizeAverage = true +end + +function SoftMarginCriterion:updateOutput(input, target) + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.SoftMarginCriterion_updateOutput( + input:cdata(), target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage) + self.output = self.output_tensor[1] + return self.output +end + +function SoftMarginCriterion:updateGradInput(input, target) + input.THNN.SoftMarginCriterion_updateGradInput( + input:cdata(), target:cdata(), + self.gradInput:cdata(), + self.sizeAverage) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SoftMax.lua b/contrib/lua-torch/nn/SoftMax.lua new file mode 100644 index 000000000..23a444cf6 --- /dev/null +++ b/contrib/lua-torch/nn/SoftMax.lua @@ -0,0 +1,19 @@ +local SoftMax, _ = torch.class('nn.SoftMax', 'nn.Module') + +function SoftMax:updateOutput(input) + input.THNN.SoftMax_updateOutput( + input:cdata(), + self.output:cdata() + ) + return self.output +end + +function SoftMax:updateGradInput(input, gradOutput) + input.THNN.SoftMax_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SoftMin.lua b/contrib/lua-torch/nn/SoftMin.lua new file mode 100644 index 000000000..7da2a6589 --- /dev/null +++ b/contrib/lua-torch/nn/SoftMin.lua @@ -0,0 +1,31 @@ +local SoftMin, parent = torch.class('nn.SoftMin', 'nn.Module') + +function SoftMin:updateOutput(input) + self.mininput = self.mininput or input.new() + self.mininput:resizeAs(input):copy(input):mul(-1) + input.THNN.SoftMax_updateOutput( + self.mininput:cdata(), + self.output:cdata() + ) + return self.output +end + +function SoftMin:updateGradInput(input, gradOutput) + self.mininput = self.mininput or input.new() + self.mininput:resizeAs(input):copy(input):mul(-1) + + input.THNN.SoftMax_updateGradInput( + self.mininput:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata() + ) + + self.gradInput:mul(-1) + return self.gradInput +end + +function SoftMin:clearState() + if self.mininput then self.mininput:set() end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SoftPlus.lua b/contrib/lua-torch/nn/SoftPlus.lua new file mode 100644 index 000000000..f77b25380 --- /dev/null +++ b/contrib/lua-torch/nn/SoftPlus.lua @@ -0,0 +1,35 @@ +local SoftPlus, parent = torch.class('nn.SoftPlus', 'nn.Module') + +function SoftPlus:__init(beta) + parent.__init(self) + self.beta = beta or 1 -- Beta controls sharpness of transfer function + self.threshold = 20 -- Avoid floating point issues with exp(x), x>20 +end + +function SoftPlus:updateOutput(input) + -- f(x) = 1/beta * log(1 + exp(beta * x)) + input.THNN.SoftPlus_updateOutput( + input:cdata(), + self.output:cdata(), + self.beta, + self.threshold + ) + return self.output +end + +function SoftPlus:updateGradInput(input, gradOutput) + -- d/dx[log(1+exp(k*x))/k] = exp(kx) / (exp(kx) + 1) + -- SINCE + -- y = (1/k)*log(1+exp(k*x)) --> x = (1/k)*log(exp(k*y)-1) + -- THEREFORE: + -- d/dx(f(x)) = (exp(k*y) - 1) / exp(k*y) + input.THNN.SoftPlus_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata(), + self.beta, + self.threshold + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SoftShrink.lua b/contrib/lua-torch/nn/SoftShrink.lua new file mode 100644 index 000000000..67af15a98 --- /dev/null +++ b/contrib/lua-torch/nn/SoftShrink.lua @@ -0,0 +1,25 @@ +local SoftShrink, parent = torch.class('nn.SoftShrink', 'nn.Module') + +function SoftShrink:__init(lam) + parent.__init(self) + self.lambda = lam or 0.5 +end + +function SoftShrink:updateOutput(input) + input.THNN.SoftShrink_updateOutput( + input:cdata(), + self.output:cdata(), + self.lambda + ) + return self.output +end + +function SoftShrink:updateGradInput(input, gradOutput) + input.THNN.SoftShrink_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.lambda + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SoftSign.lua b/contrib/lua-torch/nn/SoftSign.lua new file mode 100644 index 000000000..ee72011f1 --- /dev/null +++ b/contrib/lua-torch/nn/SoftSign.lua @@ -0,0 +1,20 @@ +local SoftSign, parent = torch.class('nn.SoftSign', 'nn.Module') + +function SoftSign:updateOutput(input) + self.temp = self.temp or input.new() + self.temp:resizeAs(input):copy(input):abs():add(1) + self.output:resizeAs(input):copy(input):cdiv(self.temp) + return self.output +end + +function SoftSign:updateGradInput(input, gradOutput) + self.tempgrad = self.tempgrad or input.new() + self.tempgrad:resizeAs(self.output):copy(input):abs():add(1):cmul(self.tempgrad) + self.gradInput:resizeAs(input):copy(gradOutput):cdiv(self.tempgrad) + return self.gradInput +end + +function SoftSign:clearState() + nn.utils.clear(self, 'temp', 'tempgrad') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SparseJacobian.lua b/contrib/lua-torch/nn/SparseJacobian.lua new file mode 100644 index 000000000..7f4c02444 --- /dev/null +++ b/contrib/lua-torch/nn/SparseJacobian.lua @@ -0,0 +1,277 @@ +nn.SparseJacobian = {} + +function nn.SparseJacobian.backward (module, input, param, dparam) + local doparam = 0 + if param then + doparam = 1 + end + + -- output deriv + module:forward(input) + local dout = module.output.new():resizeAs(module.output) + -- 1D view + local sdout = module.output.new(dout:storage(), 1, dout:nElement()) + -- jacobian matrix to calculate + local jacobian + if doparam == 1 then + jacobian = torch.Tensor(param:nElement(), dout:nElement()):zero() + else + jacobian = torch.Tensor(input:size(1), dout:nElement()):zero() + end + + for i=1,sdout:nElement() do + dout:zero() + sdout[i] = 1 + module:zeroGradParameters() + local din = module:updateGradInput(input, dout) + module:accGradParameters(input, dout) + if doparam == 1 then + jacobian:select(2,i):copy(dparam) + else + jacobian:select(2,i):copy(din:select(2,2)) + end + end + + return jacobian +end + + +function nn.SparseJacobian.backwardUpdate (module, input, param) + + -- output deriv + module:forward(input) + local dout = module.output.new():resizeAs(module.output) + -- 1D view + local sdout = module.output.new(dout:storage(),1,dout:nElement()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero() + + -- original param + local params = module:parameters() + local origparams = {} + for j=1,#params do + table.insert(origparams, params[j]:clone()) + end + + for i=1,sdout:nElement() do + -- Reset parameters + for j=1,#params do + params[j]:copy(origparams[j]) + end + dout:zero() + sdout[i] = 1 + module:zeroGradParameters() + module:updateGradInput(input, dout) + module:accUpdateGradParameters(input, dout, 1) + jacobian:select(2,i):copy(param) + end + + for j=1,#params do + params[j]:copy(origparams[j]) + end + + return jacobian +end + +function nn.SparseJacobian.forward(module, input, param) + local doparam = 0 + if param then + doparam = 1 + end + param = param or input + + -- perturbation amount + local small = 1e-6 + -- 1D view of input + --local tst = param:storage() + local sin + if doparam == 1 then + sin = param.new(param):resize(param:nElement()) + else + sin = input.new(input):select(2,2) + end + + local out = module:forward(input) + -- jacobian matrix to calculate + local jacobian + if doparam == 1 then + jacobian = torch.Tensor():resize(param:nElement(), + out:nElement()) + else + jacobian = torch.Tensor():resize(input:size(1), + out:nElement()) + end + + local outa = torch.Tensor(jacobian:size(2)) + local outb = torch.Tensor(jacobian:size(2)) + + for i=1,sin:nElement() do + sin[i] = sin[i] - small + outa:copy(module:forward(input)) + sin[i] = sin[i] + 2*small + outb:copy(module:forward(input)) + sin[i] = sin[i] - small + + outb:add(-1,outa):div(2*small) + jacobian:select(1,i):copy(outb) + end + + return jacobian +end + +function nn.SparseJacobian.forwardUpdate(module, input, param) + -- perturbation amount + local small = 1e-6 + -- 1D view of input + --local tst = param:storage() + local sin = param.new(param):resize(param:nElement())--param.new(tst,1,tst:size()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement()) + + local outa = torch.Tensor(jacobian:size(2)) + local outb = torch.Tensor(jacobian:size(2)) + + for i=1,sin:nElement() do + sin[i] = sin[i] - small + outa:copy(module:forward(input)) + sin[i] = sin[i] + 2*small + outb:copy(module:forward(input)) + sin[i] = sin[i] - small + + outb:add(-1,outa):div(2*small) + jacobian:select(1,i):copy(outb) + jacobian:select(1,i):mul(-1) + jacobian:select(1,i):add(sin[i]) + end + return jacobian +end + +function nn.SparseJacobian.testJacobian (module, input, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:select(2,2):copy(torch.rand(input:size(1)):mul(inrange):add(minval)) + local jac_fprop = nn.SparseJacobian.forward(module,input) + local jac_bprop = nn.SparseJacobian.backward(module,input) + local error = jac_fprop-jac_bprop + return error:abs():max() +end + +function nn.SparseJacobian.testJacobianParameters (module, input, param, dparam, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:select(2,2):copy(torch.rand(input:size(1)):mul(inrange):add(minval)) + param:copy(torch.rand(param:nElement()):mul(inrange):add(minval)) + local jac_bprop = nn.SparseJacobian.backward(module, input, param, dparam) + local jac_fprop = nn.SparseJacobian.forward(module, input, param) + local error = jac_fprop - jac_bprop + return error:abs():max() +end + +function nn.SparseJacobian.testJacobianUpdateParameters (module, input, param, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:select(2,2):copy(torch.rand(input:size(1)):mul(inrange):add(minval)) + param:copy(torch.rand(param:nElement()):mul(inrange):add(minval)) + local params_bprop = nn.SparseJacobian.backwardUpdate(module, input, param) + local params_fprop = nn.SparseJacobian.forwardUpdate(module, input, param) + + local error = params_fprop - params_bprop + return error:abs():max() +end + +function nn.SparseJacobian.testIO(module,input, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + + -- run module + module:forward(input) + local go = module.output:clone():copy(torch.rand(module.output:nElement()):mul(inrange):add(minval)) + module:zeroGradParameters() + module:updateGradInput(input,go) + module:accGradParameters(input,go) + + local fo = module.output:clone() + local bo = module.gradInput:clone() + + -- write module + local f = torch.DiskFile('tmp.bin','w'):binary() + f:writeObject(module) + f:close() + -- read module + local m = torch.DiskFile('tmp.bin'):binary():readObject() + m:forward(input) + m:zeroGradParameters() + m:updateGradInput(input,go) + m:accGradParameters(input,go) + -- cleanup + os.remove('tmp.bin') + + local fo2 = m.output:clone() + local bo2 = m.gradInput:clone() + + local errf = fo - fo2 + local errb = bo - bo2 + return errf:abs():max(), errb:abs():max() +end + +function nn.SparseJacobian.testAllUpdate(module, input, weight, gradWeight) + local gradOutput + local lr = torch.uniform(0.1, 1) + local errors = {} + + -- accGradParameters + local maccgp = module:clone() + local weightc = maccgp[weight]:clone() + maccgp:forward(input) + gradOutput = torch.rand(maccgp.output:size()) + maccgp:zeroGradParameters() + maccgp:updateGradInput(input, gradOutput) + maccgp:accGradParameters(input, gradOutput) + maccgp:updateParameters(lr) + errors["accGradParameters"] = (weightc-maccgp[gradWeight]*lr-maccgp[weight]):norm() + + -- accUpdateGradParameters + local maccugp = module:clone() + maccugp:forward(input) + maccugp:updateGradInput(input, gradOutput) + maccugp:accUpdateGradParameters(input, gradOutput, lr) + errors["accUpdateGradParameters"] = (maccugp[weight]-maccgp[weight]):norm() + + -- shared, accGradParameters + local macsh1 = module:clone() + local macsh2 = module:clone() + macsh2:share(macsh1, weight) + macsh1:forward(input) + macsh2:forward(input) + macsh1:zeroGradParameters() + macsh2:zeroGradParameters() + macsh1:updateGradInput(input, gradOutput) + macsh2:updateGradInput(input, gradOutput) + macsh1:accGradParameters(input, gradOutput) + macsh2:accGradParameters(input, gradOutput) + macsh1:updateParameters(lr) + macsh2:updateParameters(lr) + local err = (weightc-maccgp[gradWeight]*(lr*2)-macsh1[weight]):norm() + err = err + (weightc-maccgp[gradWeight]*(lr*2)-macsh2[weight]):norm() + errors["accGradParameters [shared]"] = err + + -- shared, accUpdateGradParameters + local macshu1 = module:clone() + local macshu2 = module:clone() + macshu2:share(macshu1, weight) + macshu1:forward(input) + macshu2:forward(input) + macshu1:updateGradInput(input, gradOutput) + macshu2:updateGradInput(input, gradOutput) + macshu1:accUpdateGradParameters(input, gradOutput, lr) + macshu2:accUpdateGradParameters(input, gradOutput, lr) + err = (weightc-maccgp[gradWeight]*(lr*2)-macshu1[weight]):norm() + err = err + (weightc-maccgp[gradWeight]*(lr*2)-macshu2[weight]):norm() + errors["accUpdateGradParameters [shared]"] = err + + return errors +end diff --git a/contrib/lua-torch/nn/SparseLinear.lua b/contrib/lua-torch/nn/SparseLinear.lua new file mode 100644 index 000000000..9a50c6912 --- /dev/null +++ b/contrib/lua-torch/nn/SparseLinear.lua @@ -0,0 +1,242 @@ +local THNN = require 'nn.THNN' +local SparseLinear, parent = torch.class('nn.SparseLinear', 'nn.Module') + +local NO_LAST_INPUT = 0 +local ONE_LAST_INPUT = 1 +local ACC_MULTIPLE_TIMES = 2 + +function SparseLinear:__init(inputSize, outputSize, doGradInput) + parent.__init(self) + + self.weightDecay = 0 + self.doGradInput = doGradInput or false + self.weight = torch.Tensor(outputSize, inputSize):zero() + self.bias = torch.Tensor(outputSize):zero() + self.gradWeight = torch.Tensor(outputSize, inputSize):zero() + self.gradBias = torch.Tensor(outputSize):zero() + + assert(type(self.doGradInput) == type(true)) + + self.lastInput = nil + self.sparseUpdate = NO_LAST_INPUT + self.formatted_input = nil + + -- state + self.gradInput = {} + self.output:resize(outputSize) + + self:reset() +end + +function SparseLinear:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(2)) + end + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv):mul(0.000001) +end + +function SparseLinear:reshapeInput(input) + if type(input) == 'table' then + return input, true, false + else + if input:dim() == 2 then + return {input}, false, false + else + return input, true, true + end + end +end + +function SparseLinear:updateOutput(input) + if self.sparseUpdate == ONE_LAST_INPUT then + self.sparseUpdate = ACC_MULTIPLE_TIMES + end + local input, batchMode, legacyMode = self:reshapeInput(input) + self.legacyMode = legacyMode + + if legacyMode then + input.THNN.SparseLinear_legacyUpdateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + self.bias:cdata() + ) + else + local nbatches = #input + if nbatches == 0 then + self.output:copy(self.bias) + return self.output + end + + local size = 0 + local marker = 1 + self.formatted_input = self.formatted_input or input[1].new() + + for i,v in ipairs(input) do size = size + input[i]:size(1) end + self.formatted_input:resize(size, 3) + for i,v in ipairs(input) do + local buf = self.formatted_input:narrow(1, marker, input[i]:size(1)) + buf:narrow(2,2,2):copy(input[i]) + buf:select(2,1):fill(i) + marker = marker + input[i]:size(1) + end + + self.output:resize(nbatches, self.weight:size(1)) + input[1].THNN.SparseLinear_updateOutput( + self.formatted_input:cdata(), + self.output:cdata(), + self.weight:cdata(), + self.bias:cdata() + ) + + -- fix output size for batchSize = 1 + if not batchMode then + self.output = self.output[1] + end + end + + return self.output +end + +function SparseLinear:accGradParameters(input, gradOutput, scale) + local input, batchMode, legacyMode = self:reshapeInput(input) + self.legacyMode = legacyMode + self.lastInput = self.lastInput or gradOutput.new() + if self.sparseUpdate == NO_LAST_INPUT then + local v = self.formatted_input + if self.legacyMode then v = input end + self.lastInput:resizeAs(v):copy(v) + self.sparseUpdate = ONE_LAST_INPUT + elseif self.sparseUpdate == ONE_LAST_INPUT then + self.sparseUpdate = ACC_MULTIPLE_TIMES + end + + if legacyMode then + input.THNN.SparseLinear_legacyAccGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.weightDecay or 0, + scale or 1 + ) + else + if not batchMode then + gradOutput:resize(1, gradOutput:size(1)) + end + + local rows = self.formatted_input:select(2, 1) + local cols = self.formatted_input:select(2, 2) + local sortinds = cols * gradOutput:size(1) + rows + local _, inds = sortinds:sort(1, false) + local newinput = self.formatted_input:index(1, inds) + input[1].THNN.SparseLinear_accGradParameters( + newinput:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.weightDecay or 0, + scale or 1 + ) + end +end + +function SparseLinear:updateGradInput(input, gradOutput) + if self.legacyMode then + if type(self.gradInput) ~= type(gradOutput) then self.gradInput = gradOutput.new() end + self.gradInput:resizeAs(input) + else + self.gradInput = {} + end + if self.doGradInput then + -- GradInput should be dense anyway + local gi + local batchMode = true + if gradOutput:dim() == 1 then + gi = self.weight:t()*gradOutput + batchMode = false + elseif gradOutput:dim() == 2 then + gi = gradOutput*self.weight + end + local ini = self.weight:size(2) + + if self.legacyMode then + local batches = self.gradInput:size(1) + self.gradInput:resize(batches, ini, 2) + self.gradInput:select(3,1):copy(torch.repeatTensor(torch.range(1, ini), batches, 1)) + self.gradInput:select(3,2):copy(gi) + else + local indicies = torch.range(1, ini) + if not batchMode then gi:resize(1, ini) end + for i = 1,gi:size(1) do + self.gradInput[i] = gradOutput.new(ini, 2) + self.gradInput[i]:select(2, 2):copy(gi[i]) + self.gradInput[i]:select(2, 1):range(1, ini) + end + end + end + return self.gradInput +end + +-- These functions do sparse updates / zeros. However, if we accumulated +-- gradients multiple times, we can't depend on the last input to do sparse +-- updates. +function SparseLinear:updateParameters(learningRate) + if self.lastInput and self.sparseUpdate == ONE_LAST_INPUT then + if self.legacyMode then + self.lastInput.THNN.SparseLinear_legacyUpdateParameters( + self.weight:cdata(), + self.bias:cdata(), + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.lastInput:cdata(), + learningRate + ) + else + self.lastInput.THNN.SparseLinear_updateParameters( + self.weight:cdata(), + self.bias:cdata(), + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.lastInput:cdata(), + learningRate + ) + end + else + parent.updateParameters(self, learningRate) + end +end + +function SparseLinear:zeroGradParameters() + if self.lastInput and self.sparseUpdate == ONE_LAST_INPUT then + if self.legacyMode then + self.lastInput.THNN.SparseLinear_legacyZeroGradParameters( + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.lastInput:cdata() + ) + else + self.lastInput.THNN.SparseLinear_zeroGradParameters( + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.lastInput:cdata() + ) + end + else + parent.zeroGradParameters(self) + end + self.sparseUpdate = NO_LAST_INPUT +end + +function SparseLinear:clearState() + if self.lastInput then self.lastInput:set() end + input.THNN.SparseLinear_cudaClearState() + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialAdaptiveAveragePooling.lua b/contrib/lua-torch/nn/SpatialAdaptiveAveragePooling.lua new file mode 100644 index 000000000..2e223580a --- /dev/null +++ b/contrib/lua-torch/nn/SpatialAdaptiveAveragePooling.lua @@ -0,0 +1,35 @@ +local SpatialAdaptiveAveragePooling, parent = torch.class('nn.SpatialAdaptiveAveragePooling', 'nn.Module') + +function SpatialAdaptiveAveragePooling:__init(W, H) + parent.__init(self) + + self.W = W + self.H = H +end + +function SpatialAdaptiveAveragePooling:updateOutput(input) + input.THNN.SpatialAdaptiveAveragePooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.W, self.H + ) + return self.output +end + +function SpatialAdaptiveAveragePooling:updateGradInput(input, gradOutput) + input.THNN.SpatialAdaptiveAveragePooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata() + ) + return self.gradInput +end + +-- for backward compat +function SpatialAdaptiveAveragePooling:empty() + self:clearState() +end + +function SpatialAdaptiveAveragePooling:clearState() + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialAdaptiveMaxPooling.lua b/contrib/lua-torch/nn/SpatialAdaptiveMaxPooling.lua new file mode 100644 index 000000000..b78261c3d --- /dev/null +++ b/contrib/lua-torch/nn/SpatialAdaptiveMaxPooling.lua @@ -0,0 +1,46 @@ +local SpatialAdaptiveMaxPooling, parent = torch.class('nn.SpatialAdaptiveMaxPooling', 'nn.Module') + +function SpatialAdaptiveMaxPooling:__init(W, H) + parent.__init(self) + + self.W = W + self.H = H +end + +function SpatialAdaptiveMaxPooling:updateOutput(input) + self.indices = self.indices or torch.LongTensor() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices + else + self.indices = self.indices:long() + end + input.THNN.SpatialAdaptiveMaxPooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.indices:cdata(), + self.W, self.H + ) + return self.output +end + +function SpatialAdaptiveMaxPooling:updateGradInput(input, gradOutput) + input.THNN.SpatialAdaptiveMaxPooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.indices:cdata() + ) + return self.gradInput +end + +-- for backward compat +function SpatialAdaptiveMaxPooling:empty() + self:clearState() +end + +function SpatialAdaptiveMaxPooling:clearState() + if self.indices then + self.indices:set() + end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialAutoCropMSECriterion.lua b/contrib/lua-torch/nn/SpatialAutoCropMSECriterion.lua new file mode 100644 index 000000000..97206a062 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialAutoCropMSECriterion.lua @@ -0,0 +1,74 @@ +--[[ + SpatialAutoCropMSECriterion. + Implements the MSECriterion when the spatial resolution of the input is less than + or equal to the spatial resolution of the target. It achieves this center-cropping + the target to the same spatial resolution of the input and the MSE is then + calculated between these cropped inputs +]] +local SpatialAutoCropMSECriterion, parent = torch.class('nn.SpatialAutoCropMSECriterion', 'nn.MSECriterion') + +function SpatialAutoCropMSECriterion:__init(sizeAverage) + parent.__init(self, sizeAverage) +end + +local function centerCrop(input, cropSize) + assert(input:dim() == 3 or input:dim() == 4, "input should be a 3D or 4D tensor") + assert(#cropSize == 2, "cropSize should have two elements only") + local _input = input + if input:dim() == 3 then + _input = input:view(1, input:size(1), input:size(2), input:size(3)) + end + assert(cropSize[1] > 0 and cropSize[1] <= _input:size(3), + "0 < cropSize[1] <= input:size(3) not satisfied") + assert(cropSize[2] > 0 and cropSize[2] <= _input:size(4), + "0 < cropSize[1] <= input:size(3) not satisfied") + + local inputHeight = _input:size(3) + local inputWidth = _input:size(4) + + local rowStart = 1 + math.floor((inputHeight - cropSize[1])/2.0) + local rowEnd = rowStart + cropSize[1] - 1 + local colStart = 1 + math.floor((inputWidth - cropSize[2])/2.0) + local colEnd = colStart + cropSize[2] - 1 + if input:dim() == 3 then + return input[{{}, {rowStart, rowEnd}, {colStart, colEnd}}] + else + return input[{{}, {}, {rowStart, rowEnd}, {colStart, colEnd}}] + end +end + +local function getTensorHeightAndWidth(tensor) + local heightIdx = 2 + local widthIdx = 3 + if tensor:dim() == 4 then + heightIdx = 3 + widthIdx = 4 + end + return tensor:size(heightIdx), tensor:size(widthIdx) +end + +local function inputResolutionIsSmallerThanTargetResolution(input, target) + local inputHeight, inputWidth = getTensorHeightAndWidth(input) + local targetHeight, targetWidth = getTensorHeightAndWidth(target) + return inputHeight <= targetHeight and inputWidth <= targetWidth +end + +function SpatialAutoCropMSECriterion:updateOutput(input, target) + assert(input:dim() == target:dim(), "input and target should have the same number of dimensions") + assert(input:dim() == 4 or input:dim() == 3, "input and target must have 3 or 4 dimensions") + assert(inputResolutionIsSmallerThanTargetResolution(input, target), + "Spatial resolution of input should be less than or equal to the spatial resolution of the target") + + local inputHeight, inputWidth = getTensorHeightAndWidth(input) + local targetCropped = centerCrop(target, {inputHeight, inputWidth}) + return parent.updateOutput(self, input, targetCropped) +end + + +function SpatialAutoCropMSECriterion:updateGradInput(input, gradOutput) + assert(input:dim() == gradOutput:dim(), "input and gradOutput should have the same number of dimensions") + assert(input:dim() == 4 or input:dim() == 3, "input and gradOutput must have 3 or 4 dimensions") + assert(input:isSameSizeAs(gradOutput), "gradOutput and input must have the same size") + + return parent.updateGradInput(self, input, gradOutput) +end diff --git a/contrib/lua-torch/nn/SpatialAveragePooling.lua b/contrib/lua-torch/nn/SpatialAveragePooling.lua new file mode 100644 index 000000000..1e7666827 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialAveragePooling.lua @@ -0,0 +1,93 @@ +local SpatialAveragePooling, parent = torch.class('nn.SpatialAveragePooling', 'nn.Module') + +function SpatialAveragePooling:__init(kW, kH, dW, dH, padW, padH) + parent.__init(self) + + self.kW = kW + self.kH = kH + self.dW = dW or 1 + self.dH = dH or 1 + self.padW = padW or 0 + self.padH = padH or 0 + self.ceil_mode = false + self.count_include_pad = true + self.divide = true +end + +function SpatialAveragePooling:ceil() + self.ceil_mode = true + return self +end + +function SpatialAveragePooling:floor() + self.ceil_mode = false + return self +end + +function SpatialAveragePooling:setCountIncludePad() + self.count_include_pad = true + return self +end + +function SpatialAveragePooling:setCountExcludePad() + self.count_include_pad = false + return self +end + +local function backwardCompatible(self) + if self.ceil_mode == nil then + self.ceil_mode = false + self.count_include_pad = true + self.padH = 0 + self.padW = 0 + end +end + +function SpatialAveragePooling:updateOutput(input) + backwardCompatible(self) + input.THNN.SpatialAveragePooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.ceil_mode, + self.count_include_pad + ) + -- for backward compatibility with saved models + -- which are not supposed to have "divide" field + if not self.divide then + self.output:mul(self.kW*self.kH) + end + return self.output +end + +function SpatialAveragePooling:updateGradInput(input, gradOutput) + if self.gradInput then + input.THNN.SpatialAveragePooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.ceil_mode, + self.count_include_pad + ) + -- for backward compatibility + if not self.divide then + self.gradInput:mul(self.kW*self.kH) + end + return self.gradInput + end +end + +function SpatialAveragePooling:__tostring__() + local s = string.format('%s(%dx%d, %d,%d', torch.type(self), + self.kW, self.kH, self.dW, self.dH) + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ','.. self.padH + end + s = s .. ')' + return s +end diff --git a/contrib/lua-torch/nn/SpatialBatchNormalization.lua b/contrib/lua-torch/nn/SpatialBatchNormalization.lua new file mode 100644 index 000000000..c5004ce3a --- /dev/null +++ b/contrib/lua-torch/nn/SpatialBatchNormalization.lua @@ -0,0 +1,35 @@ +--[[ + This file implements Batch Normalization as described in the paper: + "Batch Normalization: Accelerating Deep Network Training + by Reducing Internal Covariate Shift" + by Sergey Ioffe, Christian Szegedy + + This implementation is useful for inputs coming from convolution layers. + For non-convolutional layers, see BatchNormalization.lua + + The operation implemented is: + y = ( x - mean(x) ) + -------------------- * gamma + beta + standard-deviation(x) + where gamma and beta are learnable parameters. + + The learning of gamma and beta is optional. + + Usage: + with learnable parameters: nn.SpatialBatchNormalization(N [,eps] [,momentum]) + where N = dimensionality of input + without learnable parameters: nn.SpatialBatchNormalization(N [,eps] [,momentum], false) + + eps is a small value added to the variance to avoid divide-by-zero. + Defaults to 1e-5 + + In training time, this layer keeps a running estimate of it's computed mean and std. + The running sum is kept with a default momentum of 0.1 (unless over-ridden) + In test time, this running mean/std is used to normalize. +]]-- +local BN, parent = torch.class('nn.SpatialBatchNormalization', 'nn.BatchNormalization') + +BN.__version = 2 + +-- expected dimension of input +BN.nDim = 4 diff --git a/contrib/lua-torch/nn/SpatialClassNLLCriterion.lua b/contrib/lua-torch/nn/SpatialClassNLLCriterion.lua new file mode 100644 index 000000000..fbd367410 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialClassNLLCriterion.lua @@ -0,0 +1,81 @@ +local THNN = require 'nn.THNN' +local SpatialClassNLLCriterion, parent = torch.class('nn.SpatialClassNLLCriterion', 'nn.Criterion') + +function SpatialClassNLLCriterion:__init(weights, sizeAverage) + parent.__init(self) + if sizeAverage ~= nil then + self.sizeAverage = sizeAverage + else + self.sizeAverage = true + end + if weights then + assert(weights:dim() == 1, "weights input should be 1-D Tensor") + self.weights = weights + end + + self.output_tensor = torch.zeros(1) + self.total_weight_tensor = torch.ones(1) + self.target = torch.zeros(1):long() +end + +function SpatialClassNLLCriterion:__len() + if (self.weights) then + return #self.weights + else + return 0 + end +end + +function SpatialClassNLLCriterion:updateOutput(input, target) + if type(target) == 'number' then + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda() + else + self.target = self.target:long() + end + self.target[1] = target + elseif torch.typename(input):find('torch%.Cuda.*Tensor') then + self.target = torch.CudaLongTensor and target:cudaLong() or target + else + self.target = target:long() + end + + input.THNN.SpatialClassNLLCriterion_updateOutput( + input:cdata(), + self.target:cdata(), + self.output_tensor:cdata(), + self.sizeAverage, + THNN.optionalTensor(self.weights), + self.total_weight_tensor:cdata() + ) + self.output = self.output_tensor[1] + return self.output, self.total_weight_tensor[1] +end + +function SpatialClassNLLCriterion:updateGradInput(input, target) + if type(target) == 'number' then + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda() + else + self.target = self.target:long() + end + self.target[1] = target + elseif torch.typename(input):find('torch%.Cuda.*Tensor') then + self.target = torch.CudaLongTensor and target:cudaLong() or target + else + self.target = target:long() + end + + self.gradInput:resizeAs(input):zero() + + input.THNN.SpatialClassNLLCriterion_updateGradInput( + input:cdata(), + self.target:cdata(), + self.gradInput:cdata(), + self.sizeAverage, + THNN.optionalTensor(self.weights), + self.total_weight_tensor:cdata() + ) + + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SpatialContrastiveNormalization.lua b/contrib/lua-torch/nn/SpatialContrastiveNormalization.lua new file mode 100644 index 000000000..0ad251ae4 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialContrastiveNormalization.lua @@ -0,0 +1,36 @@ +local SpatialContrastiveNormalization, parent = torch.class('nn.SpatialContrastiveNormalization','nn.Module') + +function SpatialContrastiveNormalization:__init(nInputPlane, kernel, threshold, thresval) + parent.__init(self) + + -- get args + self.nInputPlane = nInputPlane or 1 + self.kernel = kernel or torch.Tensor(9,9):fill(1) + self.threshold = threshold or 1e-4 + self.thresval = thresval or threshold or 1e-4 + local kdim = self.kernel:nDimension() + + -- check args + if kdim ~= 2 and kdim ~= 1 then + error('<SpatialContrastiveNormalization> averaging kernel must be 2D or 1D') + end + if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then + error('<SpatialContrastiveNormalization> averaging kernel must have ODD dimensions') + end + + -- instantiate sub+div normalization + self.normalizer = nn.Sequential() + self.normalizer:add(nn.SpatialSubtractiveNormalization(self.nInputPlane, self.kernel)) + self.normalizer:add(nn.SpatialDivisiveNormalization(self.nInputPlane, self.kernel, + self.threshold, self.thresval)) +end + +function SpatialContrastiveNormalization:updateOutput(input) + self.output = self.normalizer:forward(input) + return self.output +end + +function SpatialContrastiveNormalization:updateGradInput(input, gradOutput) + self.gradInput = self.normalizer:backward(input, gradOutput) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SpatialConvolution.lua b/contrib/lua-torch/nn/SpatialConvolution.lua new file mode 100644 index 000000000..15a2b4b62 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialConvolution.lua @@ -0,0 +1,155 @@ +local THNN = require 'nn.THNN' +local SpatialConvolution, parent = torch.class('nn.SpatialConvolution', 'nn.Module') + +function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kW = kW + self.kH = kH + + self.dW = dW + self.dH = dH + self.padW = padW or 0 + self.padH = padH or self.padW + + self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW) + self.bias = torch.Tensor(nOutputPlane) + self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW) + self.gradBias = torch.Tensor(nOutputPlane) + + self:reset() +end + +function SpatialConvolution:noBias() + self.bias = nil + self.gradBias = nil + return self +end + +function SpatialConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane) + end + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + if self.bias then + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + end + else + self.weight:uniform(-stdv, stdv) + if self.bias then + self.bias:uniform(-stdv, stdv) + end + end +end + +local function backCompatibility(self) + self.finput = self.finput or self.weight.new() + self.fgradInput = self.fgradInput or self.weight.new() + if self.padding then + self.padW = self.padding + self.padH = self.padding + self.padding = nil + else + self.padW = self.padW or 0 + self.padH = self.padH or 0 + end + if self.weight:dim() == 2 then + self.weight = self.weight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW) + end + if self.gradWeight and self.gradWeight:dim() == 2 then + self.gradWeight = self.gradWeight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW) + end +end + +function SpatialConvolution:updateOutput(input) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + backCompatibility(self) + input.THNN.SpatialConvolutionMM_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH + ) + return self.output +end + +function SpatialConvolution:updateGradInput(input, gradOutput) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + if self.gradInput then + backCompatibility(self) + input.THNN.SpatialConvolutionMM_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH + ) + return self.gradInput + end +end + +function SpatialConvolution:accGradParameters(input, gradOutput, scale) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + scale = scale or 1 + backCompatibility(self) + input.THNN.SpatialConvolutionMM_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + scale + ) +end + +function SpatialConvolution:type(type,tensorCache) + self.finput = self.finput and torch.Tensor() + self.fgradInput = self.fgradInput and torch.Tensor() + return parent.type(self,type,tensorCache) +end + +function SpatialConvolution:__tostring__() + local s = string.format('%s(%d -> %d, %dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.kW, self.kH) + if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d', self.dW, self.dH) + end + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ',' .. self.padH + end + if self.bias then + return s .. ')' + else + return s .. ') without bias' + end +end + +function SpatialConvolution:clearState() + nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialConvolutionLocal.lua b/contrib/lua-torch/nn/SpatialConvolutionLocal.lua new file mode 100644 index 000000000..9494c2ffe --- /dev/null +++ b/contrib/lua-torch/nn/SpatialConvolutionLocal.lua @@ -0,0 +1,188 @@ +local SpatialConvolutionLocal, parent = torch.class('nn.SpatialConvolutionLocal', 'nn.Module') + +function SpatialConvolutionLocal:__init(nInputPlane, nOutputPlane, iW, iH ,kW, kH, dW, dH, padW, padH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kW = kW + self.kH = kH + self.iW = iW + self.iH = iH + + self.dW = dW + self.dH = dH + self.padW = padW or 0 + self.padH = padH or self.padW + self.oW = math.floor((self.padW * 2 + iW - self.kW) / self.dW) + 1 + self.oH = math.floor((self.padH * 2 + iH - self.kH) / self.dH) + 1 + assert(1 <= self.oW and 1 <= self.oH, 'illegal configuration: output width or height less than 1') + + self.weight = torch.Tensor(self.oH, self.oW, nOutputPlane, nInputPlane, kH, kW) + self.bias = torch.Tensor(nOutputPlane, self.oH, self.oW) + self.gradWeight = torch.Tensor():resizeAs(self.weight) + self.gradBias = torch.Tensor():resizeAs(self.bias) + + self:reset() +end + +function SpatialConvolutionLocal:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane) + end + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) + end +end + +local function viewWeight(self) + self.weight = self.weight:view(self.oH * self.oW, self.nOutputPlane, self.nInputPlane * self.kH * self.kW) + if self.gradWeight and self.gradWeight:dim() > 0 then + self.gradWeight = self.gradWeight:view(self.oH * self.oW, self.nOutputPlane, self.nInputPlane * self.kH * self.kW) + end +end + +local function unviewWeight(self) + self.weight = self.weight:view(self.oH, self.oW, self.nOutputPlane, self.nInputPlane, self.kH, self.kW) + if self.gradWeight and self.gradWeight:dim() > 0 then + self.gradWeight = self.gradWeight:view(self.oH, self.oW, self.nOutputPlane, self.nInputPlane, self.kH, self.kW) + end +end + +local function checkInputSize(self, input) + if input:nDimension() == 3 then + if input:size(1) ~= self.nInputPlane or input:size(2) ~= self.iH or input:size(3) ~= self.iW then + error(string.format('Given input size: (%dx%dx%d) inconsistent with expected input size: (%dx%dx%d).', + input:size(1), input:size(2), input:size(3), self.nInputPlane, self.iH, self.iW)) + end + elseif input:nDimension() == 4 then + if input:size(2) ~= self.nInputPlane or input:size(3) ~= self.iH or input:size(4) ~= self.iW then + error(string.format('Given input size: (%dx%dx%dx%d) inconsistent with expected input size: (batchsize x%dx%dx%d).', + input:size(1), input:size(2), input:size(3), input:size(4), self.nInputPlane, self.iH, self.iW)) + end + else + error('3D or 4D(batch mode) tensor expected') + end +end + +local function checkOutputSize(self, input, output) + if output:nDimension() ~= input:nDimension() then + error('inconsistent dimension between output and input.') + end + if output:nDimension() == 3 then + if output:size(1) ~= self.nOutputPlane or output:size(2) ~= self.oH or output:size(3) ~= self.oW then + error(string.format('Given output size: (%dx%dx%d) inconsistent with expected output size: (%dx%dx%d).', + output:size(1), output:size(2), output:size(3), self.nOutputPlane, self.oH, self.oW)) + end + elseif output:nDimension() == 4 then + if output:size(2) ~= self.nOutputPlane or output:size(3) ~= self.oH or output:size(4) ~= self.oW then + error(string.format('Given output size: (%dx%dx%dx%d) inconsistent with expected output size: (batchsize x%dx%dx%d).', + output:size(1), output:size(2), output:size(3), output:size(4), self.nOutputPlane, self.oH, self.oW)) + end + else + error('3D or 4D(batch mode) tensor expected') + end +end + +function SpatialConvolutionLocal:updateOutput(input) + self.finput = self.finput or input.new() + self.fgradInput = self.fgradInput or input.new() + checkInputSize(self, input) + viewWeight(self) + input.THNN.SpatialConvolutionLocal_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.iW, self.iH, + self.oW, self.oH + ) + unviewWeight(self) + return self.output +end + +function SpatialConvolutionLocal:updateGradInput(input, gradOutput) + checkInputSize(self, input) + checkOutputSize(self, input, gradOutput) + if self.gradInput then + viewWeight(self) + input.THNN.SpatialConvolutionLocal_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.iW, self.iH, + self.oW, self.oH + ) + unviewWeight(self) + return self.gradInput + end +end + +function SpatialConvolutionLocal:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + checkInputSize(self, input) + checkOutputSize(self, input, gradOutput) + viewWeight(self) + input.THNN.SpatialConvolutionLocal_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.iW, self.iH, + self.oW, self.oH, + scale + ) + unviewWeight(self) +end + +function SpatialConvolutionLocal:type(type,tensorCache) + self.finput = self.finput and torch.Tensor() + self.fgradInput = self.fgradInput and torch.Tensor() + return parent.type(self,type,tensorCache) +end + +function SpatialConvolutionLocal:__tostring__() + local s = string.format('%s(%d -> %d, %dx%d, %dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.iW, self.iH, self.kW, self.kH) + if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d', self.dW, self.dH) + end + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ',' .. self.padH + end + return s .. ')' +end + +function SpatialConvolutionLocal:clearState() + nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialConvolutionMM.lua b/contrib/lua-torch/nn/SpatialConvolutionMM.lua new file mode 100644 index 000000000..f20734f9b --- /dev/null +++ b/contrib/lua-torch/nn/SpatialConvolutionMM.lua @@ -0,0 +1,139 @@ +local THNN = require 'nn.THNN' +local SpatialConvolutionMM, parent = torch.class('nn.SpatialConvolutionMM', 'nn.Module') + +function SpatialConvolutionMM:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kW = kW + self.kH = kH + + self.dW = dW + self.dH = dH + self.padW = padW or 0 + self.padH = padH or self.padW + + self.weight = torch.Tensor(nOutputPlane, nInputPlane*kH*kW) + self.bias = torch.Tensor(nOutputPlane) + self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane*kH*kW) + self.gradBias = torch.Tensor(nOutputPlane) + + self:reset() +end + +function SpatialConvolutionMM:noBias() + self.bias = nil + self.gradBias = nil + return self +end + +function SpatialConvolutionMM:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane) + end + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) + end +end + +function SpatialConvolutionMM:updateOutput(input) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + self.finput = self.finput or input.new() + self.fgradInput = self.fgradInput or input.new() + -- backward compatibility + if self.padding then + self.padW = self.padding + self.padH = self.padding + self.padding = nil + end + input.THNN.SpatialConvolutionMM_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH + ) + return self.output +end + +function SpatialConvolutionMM:updateGradInput(input, gradOutput) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + if self.gradInput then + input.THNN.SpatialConvolutionMM_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH + ) + return self.gradInput + end +end + +function SpatialConvolutionMM:accGradParameters(input, gradOutput, scale) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + scale = scale or 1 + assert((self.bias and self.gradBias) or (self.bias == nil and self.gradBias == nil)) + input.THNN.SpatialConvolutionMM_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + scale + ) +end + +function SpatialConvolutionMM:type(type,tensorCache) + self.finput = self.finput and torch.Tensor() + self.fgradInput = self.fgradInput and torch.Tensor() + return parent.type(self,type,tensorCache) +end + +function SpatialConvolutionMM:__tostring__() + local s = string.format('%s(%d -> %d, %dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.kW, self.kH) + if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d', self.dW, self.dH) + end + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ',' .. self.padH + end + if self.bias then + return s .. ')' + else + return s .. ') without bias' + end +end + +function SpatialConvolutionMM:clearState() + nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') + return parent.clearState(self) +end + diff --git a/contrib/lua-torch/nn/SpatialConvolutionMap.lua b/contrib/lua-torch/nn/SpatialConvolutionMap.lua new file mode 100644 index 000000000..9051c119e --- /dev/null +++ b/contrib/lua-torch/nn/SpatialConvolutionMap.lua @@ -0,0 +1,154 @@ +local SpatialConvolutionMap, parent = torch.class('nn.SpatialConvolutionMap', 'nn.Module') + +nn.tables = nn.tables or {} + +function nn.tables.full(nin, nout) + local ft = torch.Tensor(nin*nout,2) + local p = 1 + for j=1,nout do + for i=1,nin do + ft[p][1] = i + ft[p][2] = j + p = p + 1 + end + end + return ft +end + +function nn.tables.oneToOne(nfeat) + local ft = torch.Tensor(nfeat,2) + for i=1,nfeat do + ft[i][1] = i + ft[i][2] = i + end + return ft +end + +function nn.tables.random(nin, nout, nto) + local nker = nto * nout + local tbl = torch.Tensor(nker, 2) + local fi = torch.randperm(nin) + local frcntr = 1 + local nfi = math.floor(nin/nto) -- number of distinct nto chunks + local totbl = tbl:select(2,2) + local frtbl = tbl:select(2,1) + local fitbl = fi:narrow(1, 1, (nfi * nto)) -- part of fi that covers distinct chunks + local ufrtbl= frtbl:unfold(1, nto, nto) + local utotbl= totbl:unfold(1, nto, nto) + local ufitbl= fitbl:unfold(1, nto, nto) + + -- start filling frtbl + for i=1,nout do -- fro each unit in target map + ufrtbl:select(1,i):copy(ufitbl:select(1,frcntr)) + frcntr = frcntr + 1 + if frcntr-1 == nfi then -- reset fi + fi:copy(torch.randperm(nin)) + frcntr = 1 + end + end + for tocntr=1,utotbl:size(1) do + utotbl:select(1,tocntr):fill(tocntr) + end + return tbl +end + +function SpatialConvolutionMap:__init(conMatrix, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + self.connTable = conMatrix + self.nInputPlane = self.connTable:select(2,1):max() + self.nOutputPlane = self.connTable:select(2,2):max() + self.weight = torch.Tensor(self.connTable:size(1), kH, kW) + self.bias = torch.Tensor(self.nOutputPlane) + self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW) + self.gradBias = torch.Tensor(self.nOutputPlane) + + self:reset() +end + +function SpatialConvolutionMap:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) + end + else + local ninp = torch.Tensor(self.nOutputPlane):zero() + for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] = ninp[self.connTable[i][2]]+1 end + for k=1,self.connTable:size(1) do + stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]]) + if nn.oldSeed then + self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end) + else + self.weight:select(1,k):uniform(-stdv,stdv) + end + end + for k=1,self.bias:size(1) do + stdv = 1/math.sqrt(self.kW*self.kH*ninp[k]) + self.bias[k] = torch.uniform(-stdv,stdv) + end + end +end + +function SpatialConvolutionMap:updateOutput(input) + input.THNN.SpatialConvolutionMap_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.connTable:cdata(), + self.nInputPlane, + self.nOutputPlane, + self.dW, self.dH + ) + return self.output +end + +function SpatialConvolutionMap:updateGradInput(input, gradOutput) + input.THNN.SpatialConvolutionMap_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.connTable:cdata(), + self.nInputPlane, + self.nOutputPlane, + self.dW, self.dH + ) + return self.gradInput +end + +function SpatialConvolutionMap:accGradParameters(input, gradOutput, scale) + input.THNN.SpatialConvolutionMap_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.connTable:cdata(), + self.nInputPlane, + self.nOutputPlane, + self.dW, self.dH, + scale or 1 + ) +end + +function SpatialConvolutionMap:decayParameters(decay) + self.weight:add(-decay, self.weight) + self.bias:add(-decay, self.bias) +end diff --git a/contrib/lua-torch/nn/SpatialCrossMapLRN.lua b/contrib/lua-torch/nn/SpatialCrossMapLRN.lua new file mode 100644 index 000000000..088eb07f0 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialCrossMapLRN.lua @@ -0,0 +1,153 @@ +local SpatialCrossMapLRN, parent = torch.class('nn.SpatialCrossMapLRN', 'nn.Module') + +function SpatialCrossMapLRN:__init(size, alpha, beta, k) + parent.__init(self) + + self.size = size + self.alpha = alpha or 0.0001 + self.beta = beta or 0.75 + self.k = k or 1 +end + +function SpatialCrossMapLRN:updateOutput(input) + assert(input:dim() == 3 or input:dim() == 4, + 'Input must be 3D or 4D') + + self.scale = self.scale or input.new() + + if torch.typename(input):find('torch%.Cuda.*Tensor') then + input.THNN.SpatialCrossMapLRN_updateOutput( + input:cdata(), + self.output:cdata(), + self.scale:cdata(), + self.size, + self.alpha, + self.beta, + self.k + ) + else + local isBatch = true + if input:dim() == 3 then + input = nn.utils.addSingletonDimension(input) + isBatch = false + end + + local batchSize = input:size(1) + local channels = input:size(2) + local inputHeight = input:size(3) + local inputWidth = input:size(4) + + self.output:resizeAs(input) + self.scale:resizeAs(input) + + -- use output storage as temporary buffer + local inputSquare = self.output + inputSquare:pow(input, 2) + + local prePad = (self.size - 1)/2 + 1 + local prePadCrop = prePad > channels and channels or prePad + + local scaleFirst = self.scale:select(2,1) + scaleFirst:zero() + -- compute first feature map normalization + for c = 1, prePadCrop do + scaleFirst:add(inputSquare:select(2, c)) + end + + -- reuse computations for next feature maps normalization + -- by adding the next feature map and removing the previous + for c = 2, channels do + local scalePrevious = self.scale:select(2, c -1) + local scaleCurrent = self.scale:select(2, c) + scaleCurrent:copy(scalePrevious) + if c < channels - prePad + 2 then + local squareNext = inputSquare:select(2, c + prePad - 1) + scaleCurrent:add(1, squareNext) + end + if c > prePad then + local squarePrevious = inputSquare:select(2, c - prePad ) + scaleCurrent:add(-1, squarePrevious) + end + end + + self.scale:mul(self.alpha/self.size):add(self.k) + + self.output:pow(self.scale,-self.beta) + self.output:cmul(input) + + if not isBatch then + self.output = self.output[1] + end + end + + return self.output +end + +function SpatialCrossMapLRN:updateGradInput(input, gradOutput) + assert(input:dim() == 3 or input:dim() == 4, + 'Input must be 3D or 4D') + + if torch.typename(input):find('torch%.Cuda.*Tensor') then + input.THNN.SpatialCrossMapLRN_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.scale:cdata(), + self.output:cdata(), + self.size, + self.alpha, + self.beta, + self.k + ) + else + local isBatch = true + if input:dim() == 3 then + input = nn.utils.addSingletonDimension(input) + gradOutput = nn.utils.addSingletonDimension(gradOutput) + self.output = nn.utils.addSingletonDimension(self.output) + isBatch = false + end + + local batchSize = input:size(1) + local channels = input:size(2) + local inputHeight = input:size(3) + local inputWidth = input:size(4) + + self.paddedRatio = self.paddedRatio or input.new() + self.accumRatio = self.accumRatio or input.new() + self.paddedRatio:resize(channels + self.size - 1, inputHeight, inputWidth) + self.accumRatio:resize(inputHeight,inputWidth) + + local cacheRatioValue = 2*self.alpha*self.beta/self.size + local inversePrePad = self.size - (self.size - 1) / 2 + + self.gradInput:resizeAs(input) + self.gradInput:pow(self.scale,-self.beta):cmul(gradOutput) + + self.paddedRatio:zero() + local paddedRatioCenter = self.paddedRatio:narrow(1, inversePrePad, channels) + for n = 1, batchSize do + paddedRatioCenter:cmul(gradOutput[n],self.output[n]) + paddedRatioCenter:cdiv(self.scale[n]) + self.accumRatio:sum(self.paddedRatio:narrow(1,1,self.size-1), 1) + for c = 1, channels do + self.accumRatio:add(self.paddedRatio[c+self.size-1]) + self.gradInput[n][c]:addcmul(-cacheRatioValue, input[n][c], self.accumRatio) + self.accumRatio:add(-1, self.paddedRatio[c]) + end + end + + if not isBatch then + self.gradInput = self.gradInput[1] + self.output = self.output[1] + end + end + + return self.gradInput +end + + +function SpatialCrossMapLRN:clearState() + nn.utils.clear(self, 'scale', 'paddedRatio', 'accumRatio') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialDepthWiseConvolution.lua b/contrib/lua-torch/nn/SpatialDepthWiseConvolution.lua new file mode 100644 index 000000000..1132f04cb --- /dev/null +++ b/contrib/lua-torch/nn/SpatialDepthWiseConvolution.lua @@ -0,0 +1,139 @@ +local THNN = require 'nn.THNN' +local SpatialDepthWiseConvolution, parent = torch.class('nn.SpatialDepthWiseConvolution', 'nn.Module') + +function SpatialDepthWiseConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kW = kW + self.kH = kH + + self.dW = dW + self.dH = dH + self.padW = padW or 0 + self.padH = padH or self.padW + + self.weight = torch.Tensor(nOutputPlane, nInputPlane*kH*kW) + self.bias = torch.Tensor(nOutputPlane, nInputPlane) + self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane*kH*kW) + self.gradBias = torch.Tensor(nOutputPlane, nInputPlane) + + self:reset() +end + +function SpatialDepthWiseConvolution:noBias() + self.bias = nil + self.gradBias = nil + return self +end + +function SpatialDepthWiseConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane) + end + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) + end +end + +function SpatialDepthWiseConvolution:updateOutput(input) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + self.finput = self.finput or input.new() + self.fgradInput = self.fgradInput or input.new() + -- backward compatibility + if self.padding then + self.padW = self.padding + self.padH = self.padding + self.padding = nil + end + input.THNN.SpatialDepthWiseConvolution_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH + ) + return self.output +end + +function SpatialDepthWiseConvolution:updateGradInput(input, gradOutput) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + if self.gradInput then + input.THNN.SpatialDepthWiseConvolution_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH + ) + return self.gradInput + end +end + +function SpatialDepthWiseConvolution:accGradParameters(input, gradOutput, scale) + assert(input.THNN, torch.type(input)..'.THNN backend not imported') + scale = scale or 1 + assert((self.bias and self.gradBias) or (self.bias == nil and self.gradBias == nil)) + input.THNN.SpatialDepthWiseConvolution_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + scale + ) +end + +function SpatialDepthWiseConvolution:type(type,tensorCache) + self.finput = self.finput and torch.Tensor() + self.fgradInput = self.fgradInput and torch.Tensor() + return parent.type(self,type,tensorCache) +end + +function SpatialDepthWiseConvolution:__tostring__() + local s = string.format('%s(%d -> %d, %dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.kW, self.kH) + if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d', self.dW, self.dH) + end + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ',' .. self.padH + end + if self.bias then + return s .. ')' + else + return s .. ') without bias' + end +end + +function SpatialDepthWiseConvolution:clearState() + nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') + return parent.clearState(self) +end + diff --git a/contrib/lua-torch/nn/SpatialDilatedConvolution.lua b/contrib/lua-torch/nn/SpatialDilatedConvolution.lua new file mode 100644 index 000000000..a0590c7e9 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialDilatedConvolution.lua @@ -0,0 +1,80 @@ +local THNN = require 'nn.THNN' +local SpatialDilatedConvolution, parent = torch.class('nn.SpatialDilatedConvolution', 'nn.SpatialConvolution') + +function SpatialDilatedConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH, dilationW, dilationH) + parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) + + self.dilationW = dilationW or 1 + self.dilationH = dilationH or 1 +end + +function SpatialDilatedConvolution:updateOutput(input) + self.finput = self.finput or self.weight.new() + self.fgradInput = self.fgradInput or self.weight.new() + input.THNN.SpatialDilatedConvolution_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.dilationW, self.dilationH + ) + return self.output +end + +function SpatialDilatedConvolution:updateGradInput(input, gradOutput) + if self.gradInput then + self.fgradInput = self.fgradInput or self.weight.new() + input.THNN.SpatialDilatedConvolution_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.dilationW, self.dilationH + ) + return self.gradInput + end +end + +function SpatialDilatedConvolution:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + self.fgradInput = self.fgradInput or self.weight.new() + input.THNN.SpatialDilatedConvolution_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.dilationW, self.dilationH, + scale + ) +end + +function SpatialDilatedConvolution:__tostring__() + local s = string.format('%s(%d -> %d, %dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.kW, self.kH) + if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d', self.dW, self.dH) + end + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ',' .. self.padH + end + s = s .. ', ' .. self.dilationW .. ',' .. self.dilationH + if self.bias then + return s .. ')' + else + return s .. ') without bias' + end +end diff --git a/contrib/lua-torch/nn/SpatialDilatedMaxPooling.lua b/contrib/lua-torch/nn/SpatialDilatedMaxPooling.lua new file mode 100644 index 000000000..34525a4ad --- /dev/null +++ b/contrib/lua-torch/nn/SpatialDilatedMaxPooling.lua @@ -0,0 +1,67 @@ +local THNN = require 'nn.THNN' +local SpatialDilatedMaxPooling, parent = torch.class('nn.SpatialDilatedMaxPooling', 'nn.SpatialMaxPooling') + +function SpatialDilatedMaxPooling:__init(kW, kH, dW, dH, padW, padH, dilationW, dilationH) + parent.__init(self, kW, kH, dW, dH, padW, padH) + + self.dilationW = dilationW or 1 + self.dilationH = dilationH or 1 +end + +function SpatialDilatedMaxPooling:updateOutput(input) + self.indices = self.indices or torch.LongTensor() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices + else + self.indices = self.indices:long() + end + + local dims = input:dim() + self.iheight = input:size(dims-1) + self.iwidth = input:size(dims) + + input.THNN.SpatialDilatedMaxPooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.indices:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.dilationW, self.dilationH, + self.ceil_mode + ) + return self.output +end + +function SpatialDilatedMaxPooling:updateGradInput(input, gradOutput) + input.THNN.SpatialDilatedMaxPooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.indices:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.dilationW, self.dilationH, + self.ceil_mode + ) + return self.gradInput +end + +function SpatialDilatedMaxPooling:__tostring__() + local s = string.format('%s(%dx%d, %d,%d', torch.type(self), + self.kW, self.kH, self.dW, self.dH) + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ','.. self.padH + end + s = s .. ', ' .. self.dilationW .. ',' .. self.dilationH + s = s .. ')' + return s +end + +function SpatialDilatedMaxPooling:clearState() + if self.indices then + self.indices:set() + end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialDivisiveNormalization.lua b/contrib/lua-torch/nn/SpatialDivisiveNormalization.lua new file mode 100644 index 000000000..dc2b8c530 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialDivisiveNormalization.lua @@ -0,0 +1,136 @@ +local SpatialDivisiveNormalization, parent = torch.class('nn.SpatialDivisiveNormalization','nn.Module') + +function SpatialDivisiveNormalization:__init(nInputPlane, kernel, threshold, thresval) + parent.__init(self) + + -- get args + self.nInputPlane = nInputPlane or 1 + self.kernel = kernel or torch.Tensor(9,9):fill(1) + self.threshold = threshold or 1e-4 + self.thresval = thresval or threshold or 1e-4 + local kdim = self.kernel:nDimension() + + -- check args + if kdim ~= 2 and kdim ~= 1 then + error('<SpatialDivisiveNormalization> averaging kernel must be 2D or 1D') + end + if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then + error('<SpatialDivisiveNormalization> averaging kernel must have ODD dimensions') + end + + -- padding values + local padH = math.floor(self.kernel:size(1)/2) + local padW = padH + if kdim == 2 then + padW = math.floor(self.kernel:size(2)/2) + end + + -- create convolutional mean estimator + self.meanestimator = nn.Sequential() + self.meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH)) + if kdim == 2 then + self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, self.kernel:size(2), self.kernel:size(1))) + else + self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1)) + self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1))) + end + self.meanestimator:add(nn.Replicate(self.nInputPlane,1,3)) + + -- create convolutional std estimator + self.stdestimator = nn.Sequential() + self.stdestimator:add(nn.Square()) + self.stdestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH)) + if kdim == 2 then + self.stdestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, self.kernel:size(2), self.kernel:size(1))) + else + self.stdestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1)) + self.stdestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1))) + end + self.stdestimator:add(nn.Replicate(self.nInputPlane,1,3)) + self.stdestimator:add(nn.Sqrt()) + + -- set kernel and bias + if kdim == 2 then + self.kernel:div(self.kernel:sum() * self.nInputPlane) + for i = 1,self.nInputPlane do + self.meanestimator.modules[2].weight[1][i] = self.kernel + self.stdestimator.modules[3].weight[1][i] = self.kernel + end + self.meanestimator.modules[2].bias:zero() + self.stdestimator.modules[3].bias:zero() + else + self.kernel:div(self.kernel:sum() * math.sqrt(self.nInputPlane)) + for i = 1,self.nInputPlane do + self.meanestimator.modules[2].weight[i]:copy(self.kernel) + self.meanestimator.modules[3].weight[1][i]:copy(self.kernel) + self.stdestimator.modules[3].weight[i]:copy(self.kernel) + self.stdestimator.modules[4].weight[1][i]:copy(self.kernel) + end + self.meanestimator.modules[2].bias:zero() + self.meanestimator.modules[3].bias:zero() + self.stdestimator.modules[3].bias:zero() + self.stdestimator.modules[4].bias:zero() + end + + -- other operation + self.normalizer = nn.CDivTable() + self.divider = nn.CDivTable() + self.thresholder = nn.Threshold(self.threshold, self.thresval) + + -- coefficient array, to adjust side effects + self.coef = torch.Tensor(1,1,1) +end + +function SpatialDivisiveNormalization:updateOutput(input) + + self.localstds = self.stdestimator:updateOutput(input) + + -- compute side coefficients + local dim = input:dim() + if self.localstds:dim() ~= self.coef:dim() or (input:size(dim) ~= self.coef:size(dim)) or (input:size(dim-1) ~= self.coef:size(dim-1)) then + self.ones = self.ones or input.new() + if dim == 4 then + -- batch mode + self.ones:resizeAs(input[1]):fill(1) + local coef = self.meanestimator:updateOutput(self.ones) + self._coef = self._coef or input.new() + self._coef:resizeAs(coef):copy(coef) -- make contiguous for view + self.coef = self._coef:view(1,table.unpack(self._coef:size():totable())):expandAs(self.localstds) + else + self.ones:resizeAs(input):fill(1) + self.coef = self.meanestimator:updateOutput(self.ones) + end + + end + + -- normalize std dev + self.adjustedstds = self.divider:updateOutput{self.localstds, self.coef} + self.thresholdedstds = self.thresholder:updateOutput(self.adjustedstds) + self.output = self.normalizer:updateOutput{input, self.thresholdedstds} + + -- done + return self.output +end + +function SpatialDivisiveNormalization:updateGradInput(input, gradOutput) + -- resize grad + self.gradInput:resizeAs(input):zero() + + -- backprop through all modules + local gradnorm = self.normalizer:updateGradInput({input, self.thresholdedstds}, gradOutput) + local gradadj = self.thresholder:updateGradInput(self.adjustedstds, gradnorm[2]) + local graddiv = self.divider:updateGradInput({self.localstds, self.coef}, gradadj) + self.gradInput:add(self.stdestimator:updateGradInput(input, graddiv[1])) + self.gradInput:add(gradnorm[1]) + + -- done + return self.gradInput +end + +function SpatialDivisiveNormalization:clearState() + if self.ones then self.ones:set() end + if self._coef then self._coef:set() end + self.meanestimator:clearState() + self.stdestimator:clearState() + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialDropout.lua b/contrib/lua-torch/nn/SpatialDropout.lua new file mode 100644 index 000000000..4320061b7 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialDropout.lua @@ -0,0 +1,55 @@ +local SpatialDropout, Parent = torch.class('nn.SpatialDropout', 'nn.Module') + +function SpatialDropout:__init(p,stochasticInference) + Parent.__init(self) + self.p = p or 0.5 + self.train = true + self.stochastic_inference = stochasticInference or false + self.noise = torch.Tensor() +end + +function SpatialDropout:updateOutput(input) + self.output:resizeAs(input):copy(input) + if self.train or self.stochastic_inference then + if input:dim() == 4 then + self.noise:resize(input:size(1), input:size(2), 1, 1) + elseif input:dim() == 3 then + self.noise:resize(input:size(1), 1, 1) + else + error('Input must be 4D (nbatch, nfeat, h, w) or 3D (nfeat, h, w)') + end + self.noise:bernoulli(1-self.p) + -- We expand the random dropouts to the entire feature map because the + -- features are likely correlated across the map and so the dropout + -- should also be correlated. + self.output:cmul(torch.expandAs(self.noise, input)) + else + self.output:mul(1-self.p) + end + return self.output +end + +function SpatialDropout:updateGradInput(input, gradOutput) + if self.train then + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + self.gradInput:cmul(torch.expandAs(self.noise, input)) -- simply mask the gradients with the noise vector + else + error('backprop only defined while training') + end + return self.gradInput +end + +function SpatialDropout:setp(p) + self.p = p +end + +function SpatialDropout:__tostring__() + return string.format('%s(%f)', torch.type(self), self.p) +end + +function SpatialDropout:clearState() + if self.noise then + self.noise:set() + end + return Parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialFractionalMaxPooling.lua b/contrib/lua-torch/nn/SpatialFractionalMaxPooling.lua new file mode 100644 index 000000000..884751d41 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialFractionalMaxPooling.lua @@ -0,0 +1,165 @@ +local SpatialFractionalMaxPooling, parent = + torch.class('nn.SpatialFractionalMaxPooling', 'nn.Module') + +-- Usage: +-- nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH, outW, outH) +-- the output should be the exact size (outH x outW) +-- nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH, ratioW, ratioH) +-- the output should be the size (floor(inH x ratioH) x floor(inW x ratioW)) +-- ratios are numbers between (0, 1) exclusive +function SpatialFractionalMaxPooling:__init(poolSizeW, poolSizeH, arg1, arg2) + parent.__init(self) + assert(poolSizeW >= 2) + assert(poolSizeH >= 2) + + -- Pool size (how wide the pooling for each output unit is) + self.poolSizeW = poolSizeW + self.poolSizeH = poolSizeH + + -- Random samples are drawn for all + -- batch * plane * (height, width; i.e., 2) points. This determines + -- the 2d "pseudorandom" overlapping pooling regions for each + -- (batch element x input plane). A new set of random samples is + -- drawn every updateOutput call, unless we disable it via + -- :fixPoolingRegions(). + self.randomSamples = nil + + -- Flag to disable re-generation of random samples for producing + -- a new pooling. For testing purposes + self.newRandomPool = false + + if arg1 >= 1 and arg2 >= 1 then + -- Desired output size: the input tensor will determine the reduction + -- ratio + self.outW = arg1 + self.outH = arg2 + else + -- Reduction ratio specified per each input + -- This is the reduction ratio that we use + self.ratioW = arg1 + self.ratioH = arg2 + + -- The reduction ratio must be between 0 and 1 + assert(self.ratioW > 0 and self.ratioW < 1) + assert(self.ratioH > 0 and self.ratioH < 1) + end +end + +function SpatialFractionalMaxPooling:getBufferSize_(input) + local batchSize = 0 + local planeSize = 0 + + if input:nDimension() == 3 then + batchSize = 1 + planeSize = input:size(1) + elseif input:nDimension() == 4 then + batchSize = input:size(1) + planeSize = input:size(2) + else + error('input must be dim 3 or 4') + end + + return torch.LongStorage({batchSize, planeSize, 2}) +end + +function SpatialFractionalMaxPooling:initSampleBuffer_(input) + local sampleBufferSize = self:getBufferSize_(input) + + if self.randomSamples == nil then + self.randomSamples = input.new():resize(sampleBufferSize):uniform() + elseif (self.randomSamples:size(1) ~= sampleBufferSize[1] or + self.randomSamples:size(2) ~= sampleBufferSize[2]) then + self.randomSamples:resize(sampleBufferSize):uniform() + else + if not self.newRandomPool then + -- Create new pooling windows, since this is a subsequent call + self.randomSamples:uniform() + end + end +end + +function SpatialFractionalMaxPooling:getOutputSizes_(input) + local outW = self.outW + local outH = self.outH + if self.ratioW ~= nil and self.ratioH ~= nil then + if input:nDimension() == 4 then + outW = math.floor(input:size(4) * self.ratioW) + outH = math.floor(input:size(3) * self.ratioH) + elseif input:nDimension() == 3 then + outW = math.floor(input:size(3) * self.ratioW) + outH = math.floor(input:size(2) * self.ratioH) + else + error('input must be dim 3 or 4') + end + + -- Neither can be smaller than 1 + assert(outW > 0, 'reduction ratio or input width too small') + assert(outH > 0, 'reduction ratio or input height too small') + else + assert(outW ~= nil and outH ~= nil) + end + + return outW, outH +end + +-- Call this to turn off regeneration of random pooling regions each +-- updateOutput call. +function SpatialFractionalMaxPooling:fixPoolingRegions(val) + if val == nil then + val = true + end + + self.newRandomPool = val + return self +end + +function SpatialFractionalMaxPooling:updateOutput(input) + self.indices = self.indices or torch.LongTensor() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices + else + self.indices = self.indices:long() + end + self:initSampleBuffer_(input) + local outW, outH = self:getOutputSizes_(input) + + input.THNN.SpatialFractionalMaxPooling_updateOutput( + input:cdata(), + self.output:cdata(), + outW, outH, self.poolSizeW, self.poolSizeH, + self.indices:cdata(), self.randomSamples:cdata()) + return self.output +end + +function SpatialFractionalMaxPooling:updateGradInput(input, gradOutput) + assert(self.randomSamples ~= nil, + 'must call updateOutput/forward first') + + local outW, outH = self:getOutputSizes_(input) + + input.THNN.SpatialFractionalMaxPooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + outW, outH, self.poolSizeW, self.poolSizeH, + self.indices:cdata()) + return self.gradInput +end + +-- backward compat +function SpatialFractionalMaxPooling:empty() + self:clearState() +end + +function SpatialFractionalMaxPooling:clearState() + self.indices = nil + self.randomSamples = nil + return parent.clearState(self) +end + +function SpatialFractionalMaxPooling:__tostring__() + return string.format('%s(%dx%d, %d,%d)', torch.type(self), + self.outW and self.outW or self.ratioW, + self.outH and self.outH or self.ratioH, + self.poolSizeW, self.poolSizeH) +end diff --git a/contrib/lua-torch/nn/SpatialFullConvolution.lua b/contrib/lua-torch/nn/SpatialFullConvolution.lua new file mode 100644 index 000000000..e6019bc18 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialFullConvolution.lua @@ -0,0 +1,219 @@ +local THNN = require 'nn.THNN' +local SpatialFullConvolution, parent = torch.class('nn.SpatialFullConvolution','nn.Module') + +function SpatialFullConvolution:__init(nInputPlane, nOutputPlane, + kW, kH, dW, dH, padW, padH, adjW, adjH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + self.padW = padW or 0 + self.padH = padH or 0 + self.adjW = adjW or 0 + self.adjH = adjH or 0 + + if self.adjW > self.dW - 1 or self.adjH > self.dH - 1 then + error('adjW and adjH must be smaller than self.dW - 1' .. + ' and self.dH - 1 respectively') + end + + self.weight = torch.Tensor(nInputPlane, nOutputPlane, kH, kW) + self.gradWeight = torch.Tensor(nInputPlane, nOutputPlane, kH, kW) + self.bias = torch.Tensor(self.nOutputPlane) + self.gradBias = torch.Tensor(self.nOutputPlane) + + self.ones = torch.Tensor() + + self:reset() +end + +function SpatialFullConvolution:noBias() + self.bias = nil + self.gradBias = nil + return self +end + +function SpatialFullConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + local nInputPlane = self.nInputPlane + local kH = self.kH + local kW = self.kW + stdv = 1/math.sqrt(kW*kH*nInputPlane) + end + self.weight:uniform(-stdv, stdv) + if self.bias then + self.bias:uniform(-stdv, stdv) + end +end + +local function calculateAdj(targetSize, ker, pad, stride) + return (targetSize + 2 * pad - ker) % stride +end + +function SpatialFullConvolution:backCompatibility() + self.adjW = self.adjW or 0 + self.adjH = self.adjH or 0 +end + +function SpatialFullConvolution:updateOutput(input) + self:backCompatibility() + + local inputTensor = input + local adjW, adjH = self.adjW, self.adjH + + -- The input can be a table where the second element indicates the target + -- output size, in which case the adj factors are computed automatically + if type(inputTensor) == 'table' then + inputTensor = input[1] + local targetTensor = input[2] + local tDims = targetTensor:dim() + local tH = targetTensor:size(tDims-1) + local tW = targetTensor:size(tDims) + adjW = calculateAdj(tW, self.kW, self.padW, self.dW) + adjH = calculateAdj(tH, self.kH, self.padH, self.dH) + self.finput = self.finput or input[1].new() + self.fgradInput = self.fgradInput or input[1].new() + else + self.finput = self.finput or input.new() + self.fgradInput = self.fgradInput or input.new() + end + + inputTensor.THNN.SpatialFullConvolution_updateOutput( + inputTensor:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + adjW, adjH + ) + + return self.output +end + +function SpatialFullConvolution:updateGradInput(input, gradOutput) + self:backCompatibility() + + if self.gradInput then + + local inputTensor = input + local adjW, adjH = self.adjW, self.adjH + + -- The input can be a table where the second element indicates the target + -- output size, in which case the adj factors are computed automatically + if type(inputTensor) == 'table' then + inputTensor = input[1] + local targetTensor = input[2] + local tDims = targetTensor:dim() + local tH = targetTensor:size(tDims-1) + local tW = targetTensor:size(tDims) + adjW = calculateAdj(tW, self.kW, self.padW, self.dW) + adjH = calculateAdj(tH, self.kH, self.padH, self.dH) + -- Momentarily extract the gradInput tensor + if type(self.gradInput) == 'table' then + self.gradInput = self.gradInput[1] or inputTensor.new() + end + end + + inputTensor.THNN.SpatialFullConvolution_updateGradInput( + inputTensor:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + adjW, adjH + ) + + if type(input) == 'table' then + -- Create a zero tensor to be expanded and used as gradInput[2]. + self.zeroScalar = self.zeroScalar or input[2].new(1):zero() + self.ones:resize(input[2]:dim()):fill(1) + local zeroTensor = self.zeroScalar + :view(table.unpack(self.ones:totable())) + :expandAs(input[2]) + self.gradInput = {self.gradInput, zeroTensor} + end + + return self.gradInput + end +end + +function SpatialFullConvolution:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + self:backCompatibility() + + local inputTensor = input + local adjW, adjH = self.adjW, self.adjH + + -- The input can be a table where the second element indicates the target + -- output size, in which case the adj factors are computed automatically + if type(inputTensor) == 'table' then + inputTensor = input[1] + local targetTensor = input[2] + local tDims = targetTensor:dim() + local tH = targetTensor:size(tDims-1) + local tW = targetTensor:size(tDims) + adjW = calculateAdj(tW, self.kW, self.padW, self.dW) + adjH = calculateAdj(tH, self.kH, self.padH, self.dH) + end + + inputTensor.THNN.SpatialFullConvolution_accGradParameters( + inputTensor:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + adjW, adjH, + scale + ) +end + +function SpatialFullConvolution:type(type, tensorCache) + self.finput = self.finput and torch.Tensor() + self.fgradInput = self.fgradInput and torch.Tensor() + return parent.type(self, type, tensorCache) +end + +function SpatialFullConvolution:__tostring__() + local s = string.format('%s(%d -> %d, %dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.kW, self.kH) + if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d', self.dW, self.dH) + end + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ',' .. self.padH + end + if (self.adjW or self.adjH) and (self.adjW ~= 0 or self.adjH ~= 0) then + s = s .. ', ' .. self.adjW .. ',' .. self.adjH + end + if self.bias then + return s .. ')' + else + return s .. ') without bias' + end +end + +function SpatialFullConvolution:clearState() + nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') + return parent.clearState(self) +end + diff --git a/contrib/lua-torch/nn/SpatialFullConvolutionMap.lua b/contrib/lua-torch/nn/SpatialFullConvolutionMap.lua new file mode 100644 index 000000000..008f5e7cf --- /dev/null +++ b/contrib/lua-torch/nn/SpatialFullConvolutionMap.lua @@ -0,0 +1,91 @@ +local SpatialFullConvolutionMap, parent = torch.class('nn.SpatialFullConvolutionMap', 'nn.Module') + +function SpatialFullConvolutionMap:__init(conMatrix, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + self.connTable = conMatrix + self.nInputPlane = self.connTable:select(2,1):max() + self.nOutputPlane = self.connTable:select(2,2):max() + + self.weight = torch.Tensor(self.connTable:size(1), kH, kW) + self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW) + + self.bias = torch.Tensor(self.nOutputPlane) + self.gradBias = torch.Tensor(self.nOutputPlane) + + self:reset() +end + +function SpatialFullConvolutionMap:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + local ninp = torch.Tensor(self.nOutputPlane):zero() + for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] = ninp[self.connTable[i][2]]+1 end + for k=1,self.connTable:size(1) do + stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]]) + self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end) + end + for k=1,self.bias:size(1) do + stdv = 1/math.sqrt(self.kW*self.kH*ninp[k]) + self.bias[k] = torch.uniform(-stdv,stdv) + end + + end +end + +function SpatialFullConvolutionMap:updateOutput(input) + input.THNN.SpatialFullConvolutionMap_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.connTable:cdata(), + self.nInputPlane, + self.nOutputPlane, + self.dW, self.dH + ) + return self.output +end + +function SpatialFullConvolutionMap:updateGradInput(input, gradOutput) + input.THNN.SpatialFullConvolutionMap_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.connTable:cdata(), + self.nInputPlane, + self.nOutputPlane, + self.dW, self.dH + ) + return self.gradInput +end + +function SpatialFullConvolutionMap:accGradParameters(input, gradOutput, scale) + input.THNN.SpatialFullConvolutionMap_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.connTable:cdata(), + self.nInputPlane, + self.nOutputPlane, + self.dW, self.dH, + scale or 1 + ) +end diff --git a/contrib/lua-torch/nn/SpatialLPPooling.lua b/contrib/lua-torch/nn/SpatialLPPooling.lua new file mode 100644 index 000000000..49a8493cf --- /dev/null +++ b/contrib/lua-torch/nn/SpatialLPPooling.lua @@ -0,0 +1,43 @@ +local SpatialLPPooling, parent = torch.class('nn.SpatialLPPooling', 'nn.Sequential') + +function SpatialLPPooling:__init(nInputPlane, pnorm, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or kW + dH = dH or kH + + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + + if pnorm == 2 then + self:add(nn.Square()) + else + self:add(nn.Power(pnorm)) + end + self:add(nn.SpatialAveragePooling(kW, kH, dW, dH)) + self:add(nn.MulConstant(kW*kH)) + if pnorm == 2 then + self:add(nn.Sqrt()) + else + self:add(nn.Power(1/pnorm)) + end +end + +-- the module is a Sequential: by default, it'll try to learn the parameters +-- of the sub sampler: we avoid that by redefining its methods. +function SpatialLPPooling:reset() +end + +function SpatialLPPooling:accGradParameters() +end + +function SpatialLPPooling:accUpdateGradParameters() +end + +function SpatialLPPooling:zeroGradParameters() +end + +function SpatialLPPooling:updateParameters() +end diff --git a/contrib/lua-torch/nn/SpatialLogSoftMax.lua b/contrib/lua-torch/nn/SpatialLogSoftMax.lua new file mode 100644 index 000000000..9c81d49e1 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialLogSoftMax.lua @@ -0,0 +1,19 @@ +local SpatialLogSoftMax = torch.class('nn.SpatialLogSoftMax', 'nn.Module') + +function SpatialLogSoftMax:updateOutput(input) + input.THNN.LogSoftMax_updateOutput( + input:cdata(), + self.output:cdata() + ) + return self.output +end + +function SpatialLogSoftMax:updateGradInput(input, gradOutput) + input.THNN.LogSoftMax_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SpatialMaxPooling.lua b/contrib/lua-torch/nn/SpatialMaxPooling.lua new file mode 100644 index 000000000..5c865c631 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialMaxPooling.lua @@ -0,0 +1,94 @@ +local SpatialMaxPooling, parent = torch.class('nn.SpatialMaxPooling', 'nn.Module') + +function SpatialMaxPooling:__init(kW, kH, dW, dH, padW, padH) + parent.__init(self) + + dW = dW or kW + dH = dH or kH + + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + + self.padW = padW or 0 + self.padH = padH or 0 + + self.ceil_mode = false + self.indices = torch.LongTensor() +end + +function SpatialMaxPooling:ceil() + self.ceil_mode = true + return self +end + +function SpatialMaxPooling:floor() + self.ceil_mode = false + return self +end + +function SpatialMaxPooling:updateOutput(input) + self.indices = self.indices or torch.LongTensor() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices + else + self.indices = self.indices:long() + end + + local dims = input:dim() + self.iheight = input:size(dims-1) + self.iwidth = input:size(dims) + + -- backward compatibility + self.ceil_mode = self.ceil_mode or false + self.padW = self.padW or 0 + self.padH = self.padH or 0 + input.THNN.SpatialMaxPooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.indices:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.ceil_mode + ) + return self.output +end + +function SpatialMaxPooling:updateGradInput(input, gradOutput) + input.THNN.SpatialMaxPooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.indices:cdata(), + self.kW, self.kH, + self.dW, self.dH, + self.padW, self.padH, + self.ceil_mode + ) + return self.gradInput +end + +-- for backward compat +function SpatialMaxPooling:empty() + self:clearState() +end + +function SpatialMaxPooling:__tostring__() + local s = string.format('%s(%dx%d, %d,%d', torch.type(self), + self.kW, self.kH, self.dW, self.dH) + if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padW .. ','.. self.padH + end + s = s .. ')' + + return s +end + +function SpatialMaxPooling:clearState() + if self.indices then + self.indices:set() + end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialMaxUnpooling.lua b/contrib/lua-torch/nn/SpatialMaxUnpooling.lua new file mode 100644 index 000000000..408bcc052 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialMaxUnpooling.lua @@ -0,0 +1,45 @@ +local SpatialMaxUnpooling, parent = torch.class('nn.SpatialMaxUnpooling', 'nn.Module') + +function SpatialMaxUnpooling:__init(poolingModule) + parent.__init(self) + assert(torch.type(poolingModule)=='nn.SpatialMaxPooling', 'Argument must be a nn.SpatialMaxPooling module') + assert(poolingModule.kH==poolingModule.dH and poolingModule.kW==poolingModule.dW, "The size of pooling module's kernel must be equal to its stride") + self.pooling = poolingModule +end + +function SpatialMaxUnpooling:setParams() + self.indices = self.pooling.indices + self.oheight = self.pooling.iheight + self.owidth = self.pooling.iwidth +end + +function SpatialMaxUnpooling:updateOutput(input) + self:setParams() + input.THNN.SpatialMaxUnpooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.indices:cdata(), + self.owidth, self.oheight + ) + return self.output +end + +function SpatialMaxUnpooling:updateGradInput(input, gradOutput) + self:setParams() + input.THNN.SpatialMaxUnpooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.indices:cdata(), + self.owidth, self.oheight + ) + return self.gradInput +end + +function SpatialMaxUnpooling:empty() + self:clearState() +end + +function SpatialMaxUnpooling:__tostring__() + return 'nn.SpatialMaxUnpooling associated to '..tostring(self.pooling) +end diff --git a/contrib/lua-torch/nn/SpatialReflectionPadding.lua b/contrib/lua-torch/nn/SpatialReflectionPadding.lua new file mode 100644 index 000000000..9ce4612ad --- /dev/null +++ b/contrib/lua-torch/nn/SpatialReflectionPadding.lua @@ -0,0 +1,51 @@ +local SpatialReflectionPadding, parent = + torch.class('nn.SpatialReflectionPadding', 'nn.Module') + +function SpatialReflectionPadding:__init(pad_l, pad_r, pad_t, pad_b) + parent.__init(self) + self.pad_l = pad_l + self.pad_r = pad_r or self.pad_l + self.pad_t = pad_t or self.pad_l + self.pad_b = pad_b or self.pad_l +end + +function SpatialReflectionPadding:updateOutput(input) + if input:dim() == 3 or input:dim() == 4 then + input.THNN.SpatialReflectionPadding_updateOutput( + input:cdata(), self.output:cdata(), + self.pad_l, self.pad_r, self.pad_t, self.pad_b) + else + error('input must be 3 or 4-dimensional') + end + return self.output +end + +function SpatialReflectionPadding:updateGradInput(input, gradOutput) + if input:dim() == 3 and gradOutput:dim() == 3 then + assert(input:size(1) == gradOutput:size(1) + and input:size(2) + self.pad_t + self.pad_b == gradOutput:size(2) + and input:size(3) + self.pad_l + self.pad_r == gradOutput:size(3), + 'input and gradOutput must be compatible in size') + elseif input:dim() == 4 and gradOutput:dim() == 4 then + assert(input:size(1) == gradOutput:size(1) + and input:size(2) == gradOutput:size(2) + and input:size(3) + self.pad_t + self.pad_b == gradOutput:size(3) + and input:size(4) + self.pad_l + self.pad_r == gradOutput:size(4), + 'input and gradOutput must be compatible in size') + else + error( + [[input and gradOutput must be 3 or 4-dimensional + and have equal number of dimensions]] + ) + end + input.THNN.SpatialReflectionPadding_updateGradInput( + input:cdata(), gradOutput:cdata(), self.gradInput:cdata(), + self.pad_l, self.pad_r, self.pad_t, self.pad_b) + return self.gradInput +end + +function SpatialReflectionPadding:__tostring__() + return torch.type(self) .. + string.format('(l=%d, r=%d, t=%d, b=%d)', self.pad_l, self.pad_r, + self.pad_t, self.pad_b) +end diff --git a/contrib/lua-torch/nn/SpatialReplicationPadding.lua b/contrib/lua-torch/nn/SpatialReplicationPadding.lua new file mode 100644 index 000000000..429763f9b --- /dev/null +++ b/contrib/lua-torch/nn/SpatialReplicationPadding.lua @@ -0,0 +1,51 @@ +local SpatialReplicationPadding, parent = + torch.class('nn.SpatialReplicationPadding', 'nn.Module') + +function SpatialReplicationPadding:__init(pad_l, pad_r, pad_t, pad_b) + parent.__init(self) + self.pad_l = pad_l + self.pad_r = pad_r or self.pad_l + self.pad_t = pad_t or self.pad_l + self.pad_b = pad_b or self.pad_l +end + +function SpatialReplicationPadding:updateOutput(input) + if input:dim() == 3 or input:dim() == 4 then + input.THNN.SpatialReplicationPadding_updateOutput( + input:cdata(), self.output:cdata(), + self.pad_l, self.pad_r, self.pad_t, self.pad_b) + else + error('input must be 3 or 4-dimensional') + end + return self.output +end + +function SpatialReplicationPadding:updateGradInput(input, gradOutput) + if input:dim() == 3 and gradOutput:dim() == 3 then + assert(input:size(1) == gradOutput:size(1) + and input:size(2) + self.pad_t + self.pad_b == gradOutput:size(2) + and input:size(3) + self.pad_l + self.pad_r == gradOutput:size(3), + 'input and gradOutput must be compatible in size') + elseif input:dim() == 4 and gradOutput:dim() == 4 then + assert(input:size(1) == gradOutput:size(1) + and input:size(2) == gradOutput:size(2) + and input:size(3) + self.pad_t + self.pad_b == gradOutput:size(3) + and input:size(4) + self.pad_l + self.pad_r == gradOutput:size(4), + 'input and gradOutput must be compatible in size') + else + error( + [[input and gradOutput must be 3 or 4-dimensional + and have equal number of dimensions]] + ) + end + input.THNN.SpatialReplicationPadding_updateGradInput( + input:cdata(), gradOutput:cdata(), self.gradInput:cdata(), + self.pad_l, self.pad_r, self.pad_t, self.pad_b) + return self.gradInput +end + +function SpatialReplicationPadding:__tostring__() + return torch.type(self) .. + string.format('(l=%d, r=%d, t=%d, b=%d)', self.pad_l, self.pad_r, + self.pad_t, self.pad_b) +end diff --git a/contrib/lua-torch/nn/SpatialSoftMax.lua b/contrib/lua-torch/nn/SpatialSoftMax.lua new file mode 100644 index 000000000..56f0b40e2 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialSoftMax.lua @@ -0,0 +1,19 @@ +local SpatialSoftMax, _ = torch.class('nn.SpatialSoftMax', 'nn.Module') + +function SpatialSoftMax:updateOutput(input) + input.THNN.SoftMax_updateOutput( + input:cdata(), + self.output:cdata() + ) + return self.output +end + +function SpatialSoftMax:updateGradInput(input, gradOutput) + input.THNN.SoftMax_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SpatialSubSampling.lua b/contrib/lua-torch/nn/SpatialSubSampling.lua new file mode 100644 index 000000000..4e3fb8881 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialSubSampling.lua @@ -0,0 +1,79 @@ +local SpatialSubSampling, parent = torch.class('nn.SpatialSubSampling', 'nn.Module') + +function SpatialSubSampling:__init(nInputPlane, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + + self.weight = torch.Tensor(nInputPlane) + self.bias = torch.Tensor(nInputPlane) + self.gradWeight = torch.Tensor(nInputPlane) + self.gradBias = torch.Tensor(nInputPlane) + + self:reset() +end + +function SpatialSubSampling:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.kH) + end + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) + end +end + +function SpatialSubSampling:updateOutput(input) + input.THNN.SpatialSubSampling_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + self.bias:cdata(), + self.kW, self.kH, + self.dW, self.dH + ) + return self.output +end + +function SpatialSubSampling:updateGradInput(input, gradOutput) + if self.gradInput then + input.THNN.SpatialSubSampling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.kW, self.kH, + self.dW, self.dH + ) + return self.gradInput + end +end + +function SpatialSubSampling:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + input.THNN.SpatialSubSampling_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + self.gradBias:cdata(), + self.kW, self.kH, + self.dW, self.dH, + scale + ) +end diff --git a/contrib/lua-torch/nn/SpatialSubtractiveNormalization.lua b/contrib/lua-torch/nn/SpatialSubtractiveNormalization.lua new file mode 100644 index 000000000..d430083e9 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialSubtractiveNormalization.lua @@ -0,0 +1,115 @@ +local SpatialSubtractiveNormalization, parent = torch.class('nn.SpatialSubtractiveNormalization','nn.Module') + +function SpatialSubtractiveNormalization:__init(nInputPlane, kernel) + parent.__init(self) + + -- get args + self.nInputPlane = nInputPlane or 1 + self.kernel = kernel or torch.Tensor(9,9):fill(1) + local kdim = self.kernel:nDimension() + + -- check args + if kdim ~= 2 and kdim ~= 1 then + error('<SpatialSubtractiveNormalization> averaging kernel must be 2D or 1D') + end + if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then + error('<SpatialSubtractiveNormalization> averaging kernel must have ODD dimensions') + end + + -- normalize kernel + self.kernel:div(self.kernel:sum() * self.nInputPlane) + + -- padding values + local padH = math.floor(self.kernel:size(1)/2) + local padW = padH + if kdim == 2 then + padW = math.floor(self.kernel:size(2)/2) + end + + -- create convolutional mean extractor + self.meanestimator = nn.Sequential() + self.meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH)) + if kdim == 2 then + self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, self.kernel:size(2), self.kernel:size(1))) + else + self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1)) + self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1))) + end + self.meanestimator:add(nn.Replicate(self.nInputPlane,1,3)) + + -- set kernel and bias + if kdim == 2 then + for i = 1,self.nInputPlane do + self.meanestimator.modules[2].weight[1][i] = self.kernel + end + self.meanestimator.modules[2].bias:zero() + else + for i = 1,self.nInputPlane do + self.meanestimator.modules[2].weight[i]:copy(self.kernel) + self.meanestimator.modules[3].weight[1][i]:copy(self.kernel) + end + self.meanestimator.modules[2].bias:zero() + self.meanestimator.modules[3].bias:zero() + end + + -- other operation + self.subtractor = nn.CSubTable() + self.divider = nn.CDivTable() + + -- coefficient array, to adjust side effects + self.coef = torch.Tensor(1,1,1) +end + +function SpatialSubtractiveNormalization:updateOutput(input) + -- compute side coefficients + local dim = input:dim() + if input:dim()+1 ~= self.coef:dim() or (input:size(dim) ~= self.coef:size(dim)) or (input:size(dim-1) ~= self.coef:size(dim-1)) then + self.ones = self.ones or input.new() + self._coef = self._coef or self.coef.new() + if dim == 4 then + -- batch mode + self.ones:resizeAs(input[1]):fill(1) + local coef = self.meanestimator:updateOutput(self.ones) + self._coef:resizeAs(coef):copy(coef) -- make contiguous for view + local size = coef:size():totable() + table.insert(size,1,input:size(1)) + self.coef = self._coef:view(1,table.unpack(self._coef:size():totable())):expand(table.unpack(size)) + else + self.ones:resizeAs(input):fill(1) + local coef = self.meanestimator:updateOutput(self.ones) + self._coef:resizeAs(coef):copy(coef) -- copy meanestimator.output as it will be used below + self.coef = self._coef + end + + end + + -- compute mean + self.localsums = self.meanestimator:updateOutput(input) + self.adjustedsums = self.divider:updateOutput{self.localsums, self.coef} + self.output = self.subtractor:updateOutput{input, self.adjustedsums} + + -- done + return self.output +end + +function SpatialSubtractiveNormalization:updateGradInput(input, gradOutput) + -- resize grad + self.gradInput:resizeAs(input):zero() + + -- backprop through all modules + local gradsub = self.subtractor:updateGradInput({input, self.adjustedsums}, gradOutput) + local graddiv = self.divider:updateGradInput({self.localsums, self.coef}, gradsub[2]) + local size = self.meanestimator:updateGradInput(input, graddiv[1]):size() + self.gradInput:add(self.meanestimator:updateGradInput(input, graddiv[1])) + self.gradInput:add(gradsub[1]) + + -- done + return self.gradInput +end + +function SpatialSubtractiveNormalization:clearState() + if self.ones then self.ones:set() end + if self._coef then self._coef:set() end + self.meanestimator:clearState() + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/SpatialUpSamplingBilinear.lua b/contrib/lua-torch/nn/SpatialUpSamplingBilinear.lua new file mode 100644 index 000000000..12e1ce8f2 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialUpSamplingBilinear.lua @@ -0,0 +1,139 @@ +require 'nn.THNN' +local SpatialUpSamplingBilinear, parent = + torch.class('nn.SpatialUpSamplingBilinear', 'nn.Module') + +--[[ +Applies a 2D bilinear up-sampling over an input image composed of several +input planes. + +The Y and X dimensions are assumed to be the last 2 tensor dimensions. For +instance, if the tensor is 4D, then dim 3 is the y dimension and dim 4 is the x. + +scale_factor is assumed to be a positive integer. +owidth = (width-1)*(scale_factor-1) + width +oheight = (height-1)*(scale_factor-1) + height + +Alternatively, owidth and oheight can be directly provided as input. +--]] + +function SpatialUpSamplingBilinear:__init(params) + parent.__init(self) + + self.owidth, self.oheight, self.scale_factor = nil, nil, nil + if torch.type(params) == 'table' then + self.owidth, self.oheight = params.owidth, params.oheight + else + self.scale_factor = params + if self.scale_factor < 1 then + error('scale_factor must be greater than 1') + end + if math.floor(self.scale_factor) ~= self.scale_factor then + error('scale_factor must be integer') + end + end + self.inputSize = torch.LongStorage(4) + self.outputSize = torch.LongStorage(4) +end + +local function makeContiguous(self, input, gradOutput) + if not input:isContiguous() then + self._input = self._input or input.new() + self._input:resizeAs(input):copy(input) + input = self._input + end + if gradOutput then + if not gradOutput:isContiguous() then + self._gradOutput = self._gradOutput or gradOutput.new() + self._gradOutput:resizeAs(gradOutput):copy(gradOutput) + gradOutput = self._gradOutput + end + end + return input, gradOutput +end + +function SpatialUpSamplingBilinear:setSize(input) + local xdim = input:dim() + local ydim = xdim - 1 + for i = 1, input:dim() do + self.inputSize[i] = input:size(i) + self.outputSize[i] = input:size(i) + end + if self.scale_factor ~= nil then + self.outputSize[ydim] = self.outputSize[ydim] * self.scale_factor + self.outputSize[xdim] = self.outputSize[xdim] * self.scale_factor + else + self.outputSize[ydim] = self.oheight + self.outputSize[xdim] = self.owidth + end +end + +function SpatialUpSamplingBilinear:updateOutput(input) + assert(input:dim() == 4 or input:dim()==3, + 'SpatialUpSamplingBilinear only supports 3D or 4D tensors' ) + input = makeContiguous(self, input) + local inputwas3D = false + if input:dim() == 3 then + input=input:view(-1, input:size(1), input:size(2), input:size(3)) + inputwas3D = true + end + local xdim = input:dim() + local ydim = xdim - 1 + self:setSize(input) + input.THNN.SpatialUpSamplingBilinear_updateOutput( + input:cdata(), + self.output:cdata(), + self.outputSize[ydim], + self.outputSize[xdim] + ) + if inputwas3D then + input = input:squeeze(1) + self.output = self.output:squeeze(1) + end + return self.output +end + +function SpatialUpSamplingBilinear:updateGradInput(input, gradOutput) + assert(input:dim() == 4 or input:dim()==3, + 'SpatialUpSamplingBilinear only support 3D or 4D tensors' ) + assert(input:dim() == gradOutput:dim(), + 'Input and gradOutput should be of same dimension' ) + input, gradOutput = makeContiguous(self, input, gradOutput) + local inputwas3D = false + if input:dim() == 3 then + input = input:view(-1, input:size(1), input:size(2), input:size(3)) + gradOutput = gradOutput:view(-1, gradOutput:size(1), gradOutput:size(2), + gradOutput:size(3)) + inputwas3D = true + end + local xdim = input:dim() + local ydim = xdim - 1 + self.gradInput:resizeAs(input) + input.THNN.SpatialUpSamplingBilinear_updateGradInput( + gradOutput:cdata(), + self.gradInput:cdata(), + input:size(1), + input:size(2), + input:size(3), + input:size(4), + self.outputSize[ydim], + self.outputSize[xdim] + ) + if inputwas3D then + input = input:squeeze(1) + gradOutput = gradOutput:squeeze(1) + self.gradInput = self.gradInput:squeeze(1) + end + return self.gradInput +end + + +function SpatialUpSamplingBilinear:__tostring__() + local s + if self.scale_factor ~= nil then + s = string.format('%s(%d)', torch.type(self), self.scale_factor) + else + s = string.format('%s(%d, %d)', + torch.type(self), self.oheight, self.owidth) + end + return s +end diff --git a/contrib/lua-torch/nn/SpatialUpSamplingNearest.lua b/contrib/lua-torch/nn/SpatialUpSamplingNearest.lua new file mode 100644 index 000000000..362ae73a3 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialUpSamplingNearest.lua @@ -0,0 +1,59 @@ +local SpatialUpSamplingNearest, parent = torch.class('nn.SpatialUpSamplingNearest', 'nn.Module') + +--[[ +Applies a 2D up-sampling over an input image composed of several input planes. + +The upsampling is done using the simple nearest neighbor technique. + +The Y and X dimensions are assumed to be the last 2 tensor dimensions. For +instance, if the tensor is 4D, then dim 3 is the y dimension and dim 4 is the x. + +owidth = width*scale_factor +oheight = height*scale_factor +--]] + +function SpatialUpSamplingNearest:__init(scale) + parent.__init(self) + + self.scale_factor = scale + if self.scale_factor < 1 then + error('scale_factor must be greater than 1') + end + if math.floor(self.scale_factor) ~= self.scale_factor then + error('scale_factor must be integer') + end + self.inputSize = torch.LongStorage(4) + self.outputSize = torch.LongStorage(4) +end + +function SpatialUpSamplingNearest:updateOutput(input) + if input:dim() ~= 4 and input:dim() ~= 3 then + error('SpatialUpSamplingNearest only support 3D or 4D tensors') + end + -- Copy the input size + local xdim = input:dim() + local ydim = input:dim() - 1 + for i = 1, input:dim() do + self.inputSize[i] = input:size(i) + self.outputSize[i] = input:size(i) + end + self.outputSize[ydim] = self.outputSize[ydim] * self.scale_factor + self.outputSize[xdim] = self.outputSize[xdim] * self.scale_factor + input.THNN.SpatialUpSamplingNearest_updateOutput( + input:cdata(), + self.output:cdata(), + self.scale_factor + ) + return self.output +end + +function SpatialUpSamplingNearest:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input) + input.THNN.SpatialUpSamplingNearest_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.scale_factor + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/SpatialZeroPadding.lua b/contrib/lua-torch/nn/SpatialZeroPadding.lua new file mode 100644 index 000000000..f19925841 --- /dev/null +++ b/contrib/lua-torch/nn/SpatialZeroPadding.lua @@ -0,0 +1,104 @@ +local SpatialZeroPadding, parent = torch.class('nn.SpatialZeroPadding', 'nn.Module') + +function SpatialZeroPadding:__init(pad_l, pad_r, pad_t, pad_b) + parent.__init(self) + self.pad_l = pad_l + self.pad_r = pad_r or self.pad_l + self.pad_t = pad_t or self.pad_l + self.pad_b = pad_b or self.pad_l +end + +function SpatialZeroPadding:updateOutput(input) + if input:dim() == 3 then + -- sizes + local h = input:size(2) + self.pad_t + self.pad_b + local w = input:size(3) + self.pad_l + self.pad_r + if w < 1 or h < 1 then error('input is too small') end + self.output:resize(input:size(1), h, w) + self.output:zero() + -- crop input if necessary + local c_input = input + if self.pad_t < 0 then c_input = c_input:narrow(2, 1 - self.pad_t, c_input:size(2) + self.pad_t) end + if self.pad_b < 0 then c_input = c_input:narrow(2, 1, c_input:size(2) + self.pad_b) end + if self.pad_l < 0 then c_input = c_input:narrow(3, 1 - self.pad_l, c_input:size(3) + self.pad_l) end + if self.pad_r < 0 then c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_r) end + -- crop outout if necessary + local c_output = self.output + if self.pad_t > 0 then c_output = c_output:narrow(2, 1 + self.pad_t, c_output:size(2) - self.pad_t) end + if self.pad_b > 0 then c_output = c_output:narrow(2, 1, c_output:size(2) - self.pad_b) end + if self.pad_l > 0 then c_output = c_output:narrow(3, 1 + self.pad_l, c_output:size(3) - self.pad_l) end + if self.pad_r > 0 then c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_r) end + -- copy input to output + c_output:copy(c_input) + elseif input:dim() == 4 then + -- sizes + local h = input:size(3) + self.pad_t + self.pad_b + local w = input:size(4) + self.pad_l + self.pad_r + if w < 1 or h < 1 then error('input is too small') end + self.output:resize(input:size(1), input:size(2), h, w) + self.output:zero() + -- crop input if necessary + local c_input = input + if self.pad_t < 0 then c_input = c_input:narrow(3, 1 - self.pad_t, c_input:size(3) + self.pad_t) end + if self.pad_b < 0 then c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_b) end + if self.pad_l < 0 then c_input = c_input:narrow(4, 1 - self.pad_l, c_input:size(4) + self.pad_l) end + if self.pad_r < 0 then c_input = c_input:narrow(4, 1, c_input:size(4) + self.pad_r) end + -- crop outout if necessary + local c_output = self.output + if self.pad_t > 0 then c_output = c_output:narrow(3, 1 + self.pad_t, c_output:size(3) - self.pad_t) end + if self.pad_b > 0 then c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_b) end + if self.pad_l > 0 then c_output = c_output:narrow(4, 1 + self.pad_l, c_output:size(4) - self.pad_l) end + if self.pad_r > 0 then c_output = c_output:narrow(4, 1, c_output:size(4) - self.pad_r) end + -- copy input to output + c_output:copy(c_input) + else + error('input must be 3 or 4-dimensional') + end + return self.output +end + +function SpatialZeroPadding:updateGradInput(input, gradOutput) + if input:dim() == 3 then + self.gradInput:resizeAs(input):zero() + -- crop gradInput if necessary + local cg_input = self.gradInput + if self.pad_t < 0 then cg_input = cg_input:narrow(2, 1 - self.pad_t, cg_input:size(2) + self.pad_t) end + if self.pad_b < 0 then cg_input = cg_input:narrow(2, 1, cg_input:size(2) + self.pad_b) end + if self.pad_l < 0 then cg_input = cg_input:narrow(3, 1 - self.pad_l, cg_input:size(3) + self.pad_l) end + if self.pad_r < 0 then cg_input = cg_input:narrow(3, 1, cg_input:size(3) + self.pad_r) end + -- crop gradOutout if necessary + local cg_output = gradOutput + if self.pad_t > 0 then cg_output = cg_output:narrow(2, 1 + self.pad_t, cg_output:size(2) - self.pad_t) end + if self.pad_b > 0 then cg_output = cg_output:narrow(2, 1, cg_output:size(2) - self.pad_b) end + if self.pad_l > 0 then cg_output = cg_output:narrow(3, 1 + self.pad_l, cg_output:size(3) - self.pad_l) end + if self.pad_r > 0 then cg_output = cg_output:narrow(3, 1, cg_output:size(3) - self.pad_r) end + -- copy gradOuput to gradInput + cg_input:copy(cg_output) + elseif input:dim() == 4 then + self.gradInput:resizeAs(input):zero() + -- crop gradInput if necessary + local cg_input = self.gradInput + if self.pad_t < 0 then cg_input = cg_input:narrow(3, 1 - self.pad_t, cg_input:size(3) + self.pad_t) end + if self.pad_b < 0 then cg_input = cg_input:narrow(3, 1, cg_input:size(3) + self.pad_b) end + if self.pad_l < 0 then cg_input = cg_input:narrow(4, 1 - self.pad_l, cg_input:size(4) + self.pad_l) end + if self.pad_r < 0 then cg_input = cg_input:narrow(4, 1, cg_input:size(4) + self.pad_r) end + -- crop gradOutout if necessary + local cg_output = gradOutput + if self.pad_t > 0 then cg_output = cg_output:narrow(3, 1 + self.pad_t, cg_output:size(3) - self.pad_t) end + if self.pad_b > 0 then cg_output = cg_output:narrow(3, 1, cg_output:size(3) - self.pad_b) end + if self.pad_l > 0 then cg_output = cg_output:narrow(4, 1 + self.pad_l, cg_output:size(4) - self.pad_l) end + if self.pad_r > 0 then cg_output = cg_output:narrow(4, 1, cg_output:size(4) - self.pad_r) end + -- copy gradOuput to gradInput + cg_input:copy(cg_output) + else + error('input must be 3 or 4-dimensional') + end + return self.gradInput +end + + +function SpatialZeroPadding:__tostring__() + return torch.type(self) .. + string.format('(l=%d, r=%d, t=%d, b=%d)', self.pad_l, self.pad_r, + self.pad_t, self.pad_b) +end diff --git a/contrib/lua-torch/nn/SplitTable.lua b/contrib/lua-torch/nn/SplitTable.lua new file mode 100644 index 000000000..7c4f968e6 --- /dev/null +++ b/contrib/lua-torch/nn/SplitTable.lua @@ -0,0 +1,43 @@ +local SplitTable, parent = torch.class('nn.SplitTable', 'nn.Module') + +function SplitTable:__init(dimension, nInputDims) + parent.__init(self) + self.dimension = dimension + self.nInputDims = nInputDims +end + +function SplitTable:_getPositiveDimension(input) + local dimension = self.dimension + if dimension < 0 then + dimension = input:dim() + dimension + 1 + elseif self.nInputDims and input:dim()==(self.nInputDims+1) then + dimension = dimension + 1 + end + return dimension +end + +function SplitTable:updateOutput(input) + local dimension = self:_getPositiveDimension(input) + local slices = input:size(dimension) + + local currentOutput= {} + for i=1,slices do + currentOutput[#currentOutput+1] = input:select(dimension,i) + end + self.output = currentOutput + return self.output +end + +function SplitTable:updateGradInput(input, gradOutput) + local dimension = self:_getPositiveDimension(input) + local slices = input:size(dimension) + if self.gradInput then + self.gradInput:resizeAs(input) + + for i=1,slices do + local currentGradInput = gradOutput[i]; + self.gradInput:select(dimension,i):copy(currentGradInput) + end + end + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Sqrt.lua b/contrib/lua-torch/nn/Sqrt.lua new file mode 100644 index 000000000..df354a175 --- /dev/null +++ b/contrib/lua-torch/nn/Sqrt.lua @@ -0,0 +1,26 @@ +local Sqrt, parent = torch.class('nn.Sqrt','nn.Module') + +function Sqrt:__init(b) + parent.__init(self) + self.eps = b or 0 +end + +function Sqrt:updateOutput(input) + self.eps = self.eps or 0 + input.THNN.Sqrt_updateOutput( + input:cdata(), + self.output:cdata(), + self.eps + ) + return self.output +end + +function Sqrt:updateGradInput(input, gradOutput) + input.THNN.Sqrt_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Square.lua b/contrib/lua-torch/nn/Square.lua new file mode 100644 index 000000000..a6292afb9 --- /dev/null +++ b/contrib/lua-torch/nn/Square.lua @@ -0,0 +1,22 @@ +local Square, parent = torch.class('nn.Square', 'nn.Module') + +function Square:__init(args) + parent.__init(self) +end + +function Square:updateOutput(input) + input.THNN.Square_updateOutput( + input:cdata(), + self.output:cdata() + ) + return self.output +end + +function Square:updateGradInput(input, gradOutput) + input.THNN.Square_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Squeeze.lua b/contrib/lua-torch/nn/Squeeze.lua new file mode 100644 index 000000000..7d204a19d --- /dev/null +++ b/contrib/lua-torch/nn/Squeeze.lua @@ -0,0 +1,40 @@ +local Squeeze, parent = torch.class('nn.Squeeze', 'nn.Module') + +function Squeeze:__init(dim, numInputDims) + parent.__init(self) + self.dim = dim + self:setNumInputDims(numInputDims) +end + +function Squeeze:setNumInputDims(numInputDims) + self.numInputDims = numInputDims + return self +end + +function Squeeze:updateOutput(input) + assert(input and torch.isTensor(input), 'Squeeze only works on tensors') + local dim = self.dim + local addone = false + if self.numInputDims and input:dim()==(self.numInputDims+1) then + if dim then + dim = dim + 1 + elseif input:size(1) == 1 then + addone = true -- in case of minibatch of size 1. + end + end + self.output:set(dim and input:squeeze(dim) or input:squeeze()) + if addone then + local s = self.output:size():totable{} + table.insert(s, 1, 1) + self.output:set(self.output:view(torch.LongStorage(s))) + end + return self.output +end + +function Squeeze:updateGradInput(input, gradOutput) + assert(input and torch.isTensor(input), 'Squeeze only works on tensors') + assert(gradOutput and torch.isTensor(gradOutput), 'Squeeze only works on tensors') + assert(input:nElement() == gradOutput:nElement()) + self.gradInput:set(gradOutput:view(input:size())) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/StochasticGradient.lua b/contrib/lua-torch/nn/StochasticGradient.lua new file mode 100644 index 000000000..a060371e8 --- /dev/null +++ b/contrib/lua-torch/nn/StochasticGradient.lua @@ -0,0 +1,62 @@ +local StochasticGradient = torch.class('nn.StochasticGradient') + +function StochasticGradient:__init(module, criterion) + self.learningRate = 0.01 + self.learningRateDecay = 0 + self.maxIteration = 25 + self.shuffleIndices = true + self.module = module + self.criterion = criterion + self.verbose = true +end + +function StochasticGradient:train(dataset) + local iteration = 1 + local currentLearningRate = self.learningRate + local module = self.module + local criterion = self.criterion + + local shuffledIndices = torch.randperm(dataset:size(), 'torch.LongTensor') + if not self.shuffleIndices then + for t = 1,dataset:size() do + shuffledIndices[t] = t + end + end + + print("# StochasticGradient: training") + + while true do + local currentError = 0 + for t = 1,dataset:size() do + local example = dataset[shuffledIndices[t]] + local input = example[1] + local target = example[2] + + currentError = currentError + criterion:forward(module:forward(input), target) + + module:updateGradInput(input, criterion:updateGradInput(module.output, target)) + module:accUpdateGradParameters(input, criterion.gradInput, currentLearningRate) + + if self.hookExample then + self.hookExample(self, example) + end + end + + currentError = currentError / dataset:size() + + if self.hookIteration then + self.hookIteration(self, iteration, currentError) + end + + if self.verbose then + print("# current error = " .. currentError) + end + iteration = iteration + 1 + currentLearningRate = self.learningRate/(1+iteration*self.learningRateDecay) + if self.maxIteration > 0 and iteration > self.maxIteration then + print("# StochasticGradient: you have reached the maximum number of iterations") + print("# training error = " .. currentError) + break + end + end +end diff --git a/contrib/lua-torch/nn/Sum.lua b/contrib/lua-torch/nn/Sum.lua new file mode 100644 index 000000000..7fe8a1ab8 --- /dev/null +++ b/contrib/lua-torch/nn/Sum.lua @@ -0,0 +1,67 @@ +local Sum, parent = torch.class('nn.Sum', 'nn.Module') + +function Sum:__init(dimension, nInputDims, sizeAverage, squeeze) + parent.__init(self) + self.dimension = dimension or 1 + -- do not assign default value to nInputDims or it will break backward compatibility + self.nInputDims = nInputDims + self.sizeAverage = sizeAverage or false + if squeeze ~= nil then + assert(type(squeeze) == 'boolean', 'squeeze has to be true/false') + self.squeeze = squeeze + else + self.squeeze = true + end +end + +function Sum:_getPositiveDimension(input) + local dimension = self.dimension + if dimension < 0 then + dimension = input:dim() + dimension + 1 + elseif self.nInputDims and input:dim()==(self.nInputDims+1) then + dimension = dimension + 1 + end + assert(input:dim() >= dimension, "dimension exceeds input dimensions") + return dimension +end + +function Sum:updateOutput(input) + local dimension = self:_getPositiveDimension(input) + if type(self.output) == 'number' then + self.output = input.new() + end + self.output:sum(input, dimension) + if self.sizeAverage then + self.output:div(input:size(dimension)) + end + if (self.squeeze == nil or self.squeeze) and self.output:nDimension() > 1 then + self.output:set(self.output:select(dimension, 1)) + end + return self.output +end + +function Sum:updateGradInput(input, gradOutput) + local dimension = self:_getPositiveDimension(input) + -- zero-strides don't work with MKL/BLAS, so + -- don't set self.gradInput to zero-stride tensor. + -- Instead, do a deepcopy + local size = input:size() + size[dimension] = 1 + if not gradOutput:isContiguous() then + self._gradOutput = self._gradOutput or gradOutput.new() + self._gradOutput:resizeAs(gradOutput):copy(gradOutput) + gradOutput = self._gradOutput + end + gradOutput = gradOutput:view(size) + self.gradInput:resizeAs(input) + self.gradInput:copy(gradOutput:expandAs(input)) + if self.sizeAverage then + self.gradInput:div(input:size(dimension)) + end + return self.gradInput +end + +function Sum:clearState() + nn.utils.clear(self, '_gradOutput') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/THNN.lua b/contrib/lua-torch/nn/THNN.lua new file mode 100644 index 000000000..0848e9ed2 --- /dev/null +++ b/contrib/lua-torch/nn/THNN.lua @@ -0,0 +1,140 @@ +local ffi = require 'ffi' + +local THNN = {} + + +local generic_THNN_h = require 'nn.THNN_h' +-- strip all lines starting with # +-- to remove preprocessor directives originally present +-- in THNN.h +generic_THNN_h = generic_THNN_h:gsub("\n#[^\n]*", "") +generic_THNN_h = generic_THNN_h:gsub("^#[^\n]*\n", "") + +-- THGenerator struct declaration copied from torch7/lib/TH/THRandom.h +local base_declarations = [[ +typedef void THNNState; + +typedef struct { + unsigned long the_initial_seed; + int left; + int seeded; + unsigned long next; + unsigned long state[624]; /* the array for the state vector 624 = _MERSENNE_STATE_N */ + double normal_x; + double normal_y; + double normal_rho; + int normal_is_valid; +} THGenerator; +]] + +-- polyfill for LUA 5.1 +if not package.searchpath then + local sep = package.config:sub(1,1) + function package.searchpath(mod, path) + mod = mod:gsub('%.', sep) + for m in path:gmatch('[^;]+') do + local nm = m:gsub('?', mod) + local f = io.open(nm, 'r') + if f then + f:close() + return nm + end + end + end +end + +-- load libTHNN +THNN.C = ffi.load(package.searchpath('libTHNN', package.cpath)) + +ffi.cdef(base_declarations) + +-- expand macros, allow to use original lines from lib/THNN/generic/THNN.h +local preprocessed = string.gsub(generic_THNN_h, 'TH_API void THNN_%(([%a%d_]+)%)', 'void THNN_TYPE%1') + +local replacements = +{ + { + ['TYPE'] = 'Double', + ['accreal'] = 'double', + ['THTensor'] = 'THDoubleTensor', + ['THIndexTensor'] = 'THLongTensor', + ['THIntegerTensor'] = 'THIntTensor', + ['THIndex_t'] = 'long', + ['THInteger_t'] = 'int' + }, + { + ['TYPE'] = 'Float', + ['accreal'] = 'double', + ['THTensor'] = 'THFloatTensor', + ['THIndexTensor'] = 'THLongTensor', + ['THIntegerTensor'] = 'THIntTensor', + ['THIndex_t'] = 'long', + ['THInteger_t'] = 'int' + } +} + +for i=1,#replacements do + local r = replacements[i] + local s = preprocessed + for k,v in pairs(r) do + s = string.gsub(s, k, v) + end + ffi.cdef(s) +end + +THNN.NULL = ffi.NULL or nil + +function THNN.getState() + return ffi.NULL or nil +end + +function THNN.optionalTensor(t) + return t and t:cdata() or THNN.NULL +end + +local function extract_function_names(s) + local t = {} + for n in string.gmatch(s, 'TH_API void THNN_%(([%a%d_]+)%)') do + t[#t+1] = n + end + return t +end + +function THNN.bind(lib, base_names, type_name, state_getter) + local ftable = {} + local prefix = 'THNN_' .. type_name + for i,n in ipairs(base_names) do + -- use pcall since some libs might not support all functions (e.g. cunn) + local ok,v = pcall(function() return lib[prefix .. n] end) + if ok then + ftable[n] = function(...) v(state_getter(), ...) end -- implicitely add state + else + print('not found: ' .. prefix .. n .. v) + end + end + return ftable +end + +-- build function table +local function_names = extract_function_names(generic_THNN_h) + +THNN.kernels = {} +THNN.kernels['torch.FloatTensor'] = THNN.bind(THNN.C, function_names, 'Float', THNN.getState) +THNN.kernels['torch.DoubleTensor'] = THNN.bind(THNN.C, function_names, 'Double', THNN.getState) + +torch.getmetatable('torch.FloatTensor').THNN = THNN.kernels['torch.FloatTensor'] +torch.getmetatable('torch.DoubleTensor').THNN = THNN.kernels['torch.DoubleTensor'] + +function THNN.runKernel(f, type, ...) + local ftable = THNN.kernels[type] + if not ftable then + error('Unsupported tensor type: '..type) + end + local f = ftable[f] + if not f then + error(string.format("Function '%s' not found for tensor type '%s'.", f, type)) + end + f(...) +end + +return THNN diff --git a/contrib/lua-torch/nn/Tanh.lua b/contrib/lua-torch/nn/Tanh.lua new file mode 100644 index 000000000..fc42cbbfd --- /dev/null +++ b/contrib/lua-torch/nn/Tanh.lua @@ -0,0 +1,19 @@ +local Tanh = torch.class('nn.Tanh', 'nn.Module') + +function Tanh:updateOutput(input) + input.THNN.Tanh_updateOutput( + input:cdata(), + self.output:cdata() + ) + return self.output +end + +function Tanh:updateGradInput(input, gradOutput) + input.THNN.Tanh_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.output:cdata() + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/TanhShrink.lua b/contrib/lua-torch/nn/TanhShrink.lua new file mode 100644 index 000000000..96df6c5b7 --- /dev/null +++ b/contrib/lua-torch/nn/TanhShrink.lua @@ -0,0 +1,20 @@ +local TanhShrink, parent = torch.class('nn.TanhShrink','nn.Module') + +function TanhShrink:__init() + parent.__init(self) + self.tanh = nn.Tanh() +end + +function TanhShrink:updateOutput(input) + local th = self.tanh:updateOutput(input) + self.output:resizeAs(input):copy(input) + self.output:add(-1,th) + return self.output +end + +function TanhShrink:updateGradInput(input, gradOutput) + local dth = self.tanh:updateGradInput(input,gradOutput) + self.gradInput:resizeAs(input):copy(gradOutput) + self.gradInput:add(-1,dth) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/TemporalConvolution.lua b/contrib/lua-torch/nn/TemporalConvolution.lua new file mode 100644 index 000000000..4b3a89eb6 --- /dev/null +++ b/contrib/lua-torch/nn/TemporalConvolution.lua @@ -0,0 +1,73 @@ +local TemporalConvolution, parent = torch.class('nn.TemporalConvolution', 'nn.Module') + +function TemporalConvolution:__init(inputFrameSize, outputFrameSize, kW, dW) + parent.__init(self) + + dW = dW or 1 + + self.inputFrameSize = inputFrameSize + self.outputFrameSize = outputFrameSize + self.kW = kW + self.dW = dW + + self.weight = torch.Tensor(outputFrameSize, inputFrameSize*kW) + self.bias = torch.Tensor(outputFrameSize) + self.gradWeight = torch.Tensor(outputFrameSize, inputFrameSize*kW) + self.gradBias = torch.Tensor(outputFrameSize) + + self:reset() +end + +function TemporalConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.inputFrameSize) + end + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) + end +end + +function TemporalConvolution:updateOutput(input) + input.THNN.TemporalConvolution_updateOutput( + input:cdata(), self.output:cdata(), + self.weight:cdata(), self.bias:cdata(), + self.kW, self.dW, + self.inputFrameSize, self.outputFrameSize + ) + return self.output +end + +function TemporalConvolution:updateGradInput(input, gradOutput) + if self.gradInput then + input.THNN.TemporalConvolution_updateGradInput( + input:cdata(), gradOutput:cdata(), + self.gradInput:cdata(), self.weight:cdata(), + self.kW, self.dW + ) + return self.gradInput + end +end + +function TemporalConvolution:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + input.THNN.TemporalConvolution_accGradParameters( + input:cdata(), gradOutput:cdata(), + self.gradWeight:cdata(), self.gradBias:cdata(), + self.kW, self.dW, scale + ) +end + +function TemporalConvolution:sharedAccUpdateGradParameters(input, gradOutput, lr) + -- we do not need to accumulate parameters when sharing: + self:defaultAccUpdateGradParameters(input, gradOutput, lr) +end diff --git a/contrib/lua-torch/nn/TemporalDynamicKMaxPooling.lua b/contrib/lua-torch/nn/TemporalDynamicKMaxPooling.lua new file mode 100644 index 000000000..644a0fa9c --- /dev/null +++ b/contrib/lua-torch/nn/TemporalDynamicKMaxPooling.lua @@ -0,0 +1,65 @@ +--[[ + This file implements Dynamic K Max Pooling as described in the paper: + "A Convolutional Neural Network for Modelling Sentences" + by Nal Kalchbrenner, Edward Grefenstette, Phil Blunsom + + The operation is simply selecting the k highest values out of a sequence. + k can be a calculated value or pre-defined + + The value of k can be calulated as in the paper by using: + k_top as minK + (L-l)/L as factor + + Where: + k_top is the desired sequence length at the end of the convolution part, + L is the total number of layers, + l is this layers number +]] + +local TemporalDynamicKMaxPooling, parent = torch.class('nn.TemporalDynamicKMaxPooling', 'nn.Module') + +function TemporalDynamicKMaxPooling:__init(minK, factor) + parent.__init(self) + + self.minK = minK + self.factor = factor or 0 +end + +function TemporalDynamicKMaxPooling:updateOutput(input) + assert(input:dim() == 2 or input:dim() == 3, 'Only 2D or 3D(batch mode) accepted') + + local seqDim = input:dim()-1 + local k = math.max(self.minK, math.ceil(self.factor*input:size(seqDim))) + assert(input:size(seqDim) >= self.minK, 'Input sequence length (' .. input:size(seqDim) .. ') too small for desired k value (' .. k .. ')') + + -- Sort input in descending order + local sorted, allIndices = input:sort(seqDim,true) + -- Reduce the indices to only include the top-k and return to original order by sorting + self.indices = allIndices:narrow(seqDim, 1, k):sort(seqDim) + + self.output = input:gather(seqDim, self.indices) + + return self.output +end + +function TemporalDynamicKMaxPooling:updateGradInput(input, gradOutput) + if self.gradInput then + local seqDim = input:dim()-1 + + self.gradInput:resizeAs(input) + self.gradInput:zero() + + -- Using the previously stored indices, add the gradOutputs to their respective + -- input indices in the self.gradInput buffer + local updateValues = self.gradInput:gather(seqDim, self.indices) + updateValues:add(gradOutput) + self.gradInput:scatter(seqDim, self.indices, updateValues) + + return self.gradInput + end +end + +function TemporalDynamicKMaxPooling:clearState() + nn.utils.clear(self, 'indices') + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/TemporalMaxPooling.lua b/contrib/lua-torch/nn/TemporalMaxPooling.lua new file mode 100644 index 000000000..894f4a99f --- /dev/null +++ b/contrib/lua-torch/nn/TemporalMaxPooling.lua @@ -0,0 +1,44 @@ +local TemporalMaxPooling, parent = torch.class('nn.TemporalMaxPooling', 'nn.Module') + +function TemporalMaxPooling:__init(kW, dW) + parent.__init(self) + + dW = dW or kW + + self.kW = kW + self.dW = dW +end + +function TemporalMaxPooling:updateOutput(input) + self.indices = self.indices or torch.LongTensor() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices + else + self.indices = self.indices:long() + end + input.THNN.TemporalMaxPooling_updateOutput( + input:cdata(), self.output:cdata(), + self.indices:cdata(), self.kW, self.dW + ) + return self.output +end + +function TemporalMaxPooling:updateGradInput(input, gradOutput) + if self.gradInput then + input.THNN.TemporalMaxPooling_updateGradInput( + input:cdata(), gradOutput:cdata(), + self.gradInput:cdata(), self.indices:cdata(), + self.kW, self.dW + ) + return self.gradInput + end +end + +function TemporalMaxPooling:empty() + self:clearState() +end + +function TemporalMaxPooling:clearState() + if self.indices then self.indices:set() end + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/TemporalRowConvolution.lua b/contrib/lua-torch/nn/TemporalRowConvolution.lua new file mode 100644 index 000000000..7c9d6a269 --- /dev/null +++ b/contrib/lua-torch/nn/TemporalRowConvolution.lua @@ -0,0 +1,120 @@ +local THNN = require "nn.THNN" + +local TemporalRowConvolution, parent = torch.class("nn.TemporalRowConvolution", "nn.Module") + +function TemporalRowConvolution:__init(inputFrameSize, kW, dW, featFirst) + parent.__init(self) + + self.inputFrameSize = inputFrameSize + self.kW = kW + self.dW = dW or 1 + + self.weight = torch.Tensor(inputFrameSize, 1, kW) + self.bias = torch.Tensor(inputFrameSize) + self.gradWeight = torch.Tensor(inputFrameSize, 1, kW) + self.gradBias = torch.Tensor(inputFrameSize) + + -- Set to true for batch x inputFrameSize x nInputFrame + self.featFirst = featFirst and true or false + self:reset() +end + +function TemporalRowConvolution:noBias() + self.bias = nil + self.gradBias = nil + return self +end + +function TemporalRowConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1 / math.sqrt(self.kW * self.inputFrameSize) + end + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) +end + +function TemporalRowConvolution:updateOutput(input) + assert(input.THNN, torch.type(input)..".THNN backend not imported") + self.finput = self.finput or input.new() + self.fgradInput = self.fgradInput or input.new() + + input.THNN.TemporalRowConvolution_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, + self.dW, + 0, -- would be self.padW + self.featFirst + ) + + return self.output +end + +function TemporalRowConvolution:updateGradInput(input, gradOutput) + assert(input.THNN, torch.type(input)..".THNN backend not imported") + + if self.gradInput then + input.THNN.TemporalRowConvolution_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, + self.dW, + 0, -- would be self.padW + self.featFirst + ) + return self.gradInput + end +end + +function TemporalRowConvolution:accGradParameters(input, gradOutput, scale) + assert(input.THNN, torch.type(input)..".THNN backend not imported") + + input.THNN.TemporalRowConvolution_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kW, + self.dW, + 0, -- would be self.padW + self.featFirst, + scale or 1) +end + +function TemporalRowConvolution:type(type, tensorCache) + if self.finput then self.finput:set() end + if self.fgradInput then self.fgradInput:set() end + return parent.type(self, type, tensorCache) +end + +function TemporalRowConvolution:__tostring__() + local s = string.format("%s(%d, %d", torch.type(self), self.inputFrameSize, self.kW) + if self.dW ~= 1 then + s = s .. string.format(", %d", self.dW) + end + if self.padW and self.padW ~= 0 then -- currently padding is not supported + s = s .. ", " .. self.padW + end + if self.bias then + return s .. ")" + else + return s .. ") without bias" + end +end + +function TemporalRowConvolution:clearState() + nn.utils.clear(self, "finput", "fgradInput", "_input", "_gradOutput") + return parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/TemporalSubSampling.lua b/contrib/lua-torch/nn/TemporalSubSampling.lua new file mode 100644 index 000000000..e9287d63d --- /dev/null +++ b/contrib/lua-torch/nn/TemporalSubSampling.lua @@ -0,0 +1,64 @@ +local TemporalSubSampling, parent = torch.class('nn.TemporalSubSampling', 'nn.Module') + +function TemporalSubSampling:__init(inputFrameSize, kW, dW) + parent.__init(self) + + dW = dW or 1 + + self.inputFrameSize = inputFrameSize + self.kW = kW + self.dW = dW + + self.weight = torch.Tensor(inputFrameSize) + self.bias = torch.Tensor(inputFrameSize) + self.gradWeight = torch.Tensor(inputFrameSize) + self.gradBias = torch.Tensor(inputFrameSize) + + self:reset() +end + +function TemporalSubSampling:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW) + end + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) + end +end + +function TemporalSubSampling:updateOutput(input) + input.THNN.TemporalSubSampling_updateOutput( + input:cdata(), self.output:cdata(), + self.weight:cdata(), self.bias:cdata(), + self.kW, self.dW, self.inputFrameSize + ) + return self.output +end + +function TemporalSubSampling:updateGradInput(input, gradOutput) + if self.gradInput then + input.THNN.TemporalSubSampling_updateGradInput( + input:cdata(), gradOutput:cdata(), self.gradInput:cdata(), + self.weight:cdata(), self.kW, self.dW + ) + return self.gradInput + end +end + +function TemporalSubSampling:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + input.THNN.TemporalSubSampling_accGradParameters( + input:cdata(), gradOutput:cdata(), self.gradWeight:cdata(), + self.gradBias:cdata(), self.kW, self.dW, scale + ) +end diff --git a/contrib/lua-torch/nn/Threshold.lua b/contrib/lua-torch/nn/Threshold.lua new file mode 100644 index 000000000..6fdd26408 --- /dev/null +++ b/contrib/lua-torch/nn/Threshold.lua @@ -0,0 +1,51 @@ +local Threshold, parent = torch.class('nn.Threshold','nn.Module') + +function Threshold:__init(th,v,ip) + parent.__init(self) + self.threshold = th or 1e-6 + self.val = v or 0 + if (th and type(th) ~= 'number') or (v and type(v) ~= 'number') then + error('nn.Threshold(threshold, value)') + end + -- default for inplace is false + self.inplace = ip or false + if (ip and type(ip) ~= 'boolean') then + error('in-place flag must be boolean') + end + self:validateParameters() +end + +function Threshold:updateOutput(input) + self:validateParameters() + input.THNN.Threshold_updateOutput( + input:cdata(), + self.output:cdata(), + self.threshold, + self.val, + self.inplace + ) + return self.output +end + +function Threshold:updateGradInput(input, gradOutput) + self:validateParameters() + input.THNN.Threshold_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.threshold, + self.val, + self.inplace + ) + return self.gradInput +end + +function Threshold:validateParameters() + self.inplace = self.inplace or false -- backwards compatibility pre inplace + if self.inplace then + if self.val > self.threshold then + error('in-place processing requires value (' .. self.val .. + ') not exceed threshold (' .. self.threshold .. ')') + end + end +end diff --git a/contrib/lua-torch/nn/Transpose.lua b/contrib/lua-torch/nn/Transpose.lua new file mode 100644 index 000000000..cceb2b643 --- /dev/null +++ b/contrib/lua-torch/nn/Transpose.lua @@ -0,0 +1,35 @@ +local Transpose, parent = torch.class('nn.Transpose', 'nn.Module') + +-- transpose dimensions: +-- n = nn.Transpose({1,4},{1,3}) +-- will transpose dims 1 and 4, then 1 and 3... + +function Transpose:__init(...) + parent.__init(self) + self.permutations = {...} + self.numInputDims = nil +end + +function Transpose:setNumInputDims(numInputDims) + self.numInputDims = numInputDims + return self +end + +function Transpose:updateOutput(input) + local offset = self.numInputDims and input:nDimension()-self.numInputDims or 0 + for _,perm in ipairs(self.permutations) do + input = input:transpose(perm[1]+offset,perm[2]+offset) + end + self.output:resizeAs(input):copy(input) + return self.output +end + +function Transpose:updateGradInput(input, gradOutput) + for i = #self.permutations,1,-1 do + local perm = self.permutations[i] + local offset = self.numInputDims and input:nDimension()-self.numInputDims or 0 + gradOutput = gradOutput:transpose(perm[1]+offset,perm[2]+offset) + end + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/Unsqueeze.lua b/contrib/lua-torch/nn/Unsqueeze.lua new file mode 100644 index 000000000..2e82a25a0 --- /dev/null +++ b/contrib/lua-torch/nn/Unsqueeze.lua @@ -0,0 +1,52 @@ +local Unsqueeze, parent = torch.class('nn.Unsqueeze', 'nn.Module') + +local function _assertTensor(t) + assert(torch.isTensor(t), "This module only works on tensor") +end + +function Unsqueeze:__init(pos, numInputDims) + parent.__init(self) + self.pos = pos or error('the position to insert singleton dim not specified') + self:setNumInputDims(numInputDims) +end + +function Unsqueeze:setNumInputDims(numInputDims) + self.numInputDims = numInputDims + return self +end + +function Unsqueeze:updateOutput(input) + _assertTensor(input) + local actualPos = self:_getActualPosition(input) + nn.utils.addSingletonDimension(self.output, input, actualPos) + return self.output +end + +function Unsqueeze:updateGradInput(input, gradOutput) + _assertTensor(input) + _assertTensor(gradOutput) + assert(input:nElement() == gradOutput:nElement()) + + self.gradInput:view(gradOutput, input:size()) + return self.gradInput +end + +function Unsqueeze:__tostring__() + return torch.type(self)..'(dim ' .. self.pos .. ')' +end + +function Unsqueeze:_getActualPosition(input) + -- get valid dimesion offset for batchMode (if any) + local inputDim = input:dim() -- data batch dim + self.numInputDims = self.numInputDims or inputDim -- feature map dim + local offsetDim = inputDim - self.numInputDims + assert(offsetDim >= 0, "input feature map dim (numInputDims) must be <= input:dim()") + + -- the actual position; clearer error message for batchMode (if any) + local actualPos = self.pos + offsetDim + assert(actualPos >= 1 and actualPos <= (inputDim + 1), + ("Invalid position: %d. input:dim() is %d, input feature map dim (numInputDims) is %d.") + :format(self.pos, inputDim, self.numInputDims) + ) + return actualPos +end diff --git a/contrib/lua-torch/nn/View.lua b/contrib/lua-torch/nn/View.lua new file mode 100644 index 000000000..542e57e16 --- /dev/null +++ b/contrib/lua-torch/nn/View.lua @@ -0,0 +1,96 @@ +local View, parent = torch.class('nn.View', 'nn.Module') + +function View:resetSize(...) + if select('#', ...) == 1 and torch.typename(select(1, ...)) == 'torch.LongStorage' then + self.size = select(1, ...) + else + self.size = torch.LongStorage({...}) + end + + self.numElements = 1 + local inferdim = false + for i = 1,#self.size do + local szi = self.size[i] + if szi >= 0 then + self.numElements = self.numElements * self.size[i] + else + assert(szi == -1, 'size should be positive or -1') + assert(not inferdim, 'only one dimension can be at -1') + inferdim = true + end + end + + return self +end + +function View:__init(...) + parent.__init(self) + self:resetSize(...) + self.numInputDims = nil +end + +function View:setNumInputDims(numInputDims) + self.numInputDims = numInputDims + return self +end + +local function batchsize(input, size, numInputDims, numElements) + local ind = input:nDimension() + local isz = input:size() + local maxdim = numInputDims and numInputDims or ind + local ine = 1 + for i=ind,ind-maxdim+1,-1 do + ine = ine * isz[i] + end + + if ine % numElements ~= 0 then + error(string.format( + 'input view (%s) and desired view (%s) do not match', + table.concat(input:size():totable(), 'x'), + table.concat(size:totable(), 'x'))) + end + + -- the remainder is either the batch... + local bsz = ine / numElements + + -- ... or the missing size dim + for i=1,size:size() do + if size[i] == -1 then + bsz = 1 + break + end + end + + -- for dim over maxdim, it is definitively the batch + for i=ind-maxdim,1,-1 do + bsz = bsz * isz[i] + end + + -- special card + if bsz == 1 and (not numInputDims or input:nDimension() <= numInputDims) then + return + end + + return bsz +end + +function View:updateOutput(input) + self.output = self.output or input.new() + local bsz = batchsize(input, self.size, self.numInputDims, self.numElements) + if bsz then + self.output:view(input, bsz, table.unpack(self.size:totable())) + else + self.output:view(input, self.size) + end + return self.output +end + +function View:updateGradInput(input, gradOutput) + self.gradInput = self.gradInput or gradOutput.new() + self.gradInput:view(gradOutput, input:size()) + return self.gradInput +end + +function View:__tostring__() + return torch.type(self)..'('..table.concat(self.size:totable(), ', ')..')' +end diff --git a/contrib/lua-torch/nn/VolumetricAveragePooling.lua b/contrib/lua-torch/nn/VolumetricAveragePooling.lua new file mode 100644 index 000000000..df6d2c405 --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricAveragePooling.lua @@ -0,0 +1,54 @@ +local VolumetricAveragePooling, parent = torch.class( + 'nn.VolumetricAveragePooling', 'nn.Module') + +function VolumetricAveragePooling:__init(kT, kW, kH, dT, dW, dH) + parent.__init(self) + + dT = dT or kT + dW = dW or kW + dH = dH or kH + + self.kT = kT + self.kH = kH + self.kW = kW + self.dT = dT + self.dW = dW + self.dH = dH +end + +function VolumetricAveragePooling:updateOutput(input) + input.THNN.VolumetricAveragePooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH + ) + return self.output +end + +function VolumetricAveragePooling:updateGradInput(input, gradOutput) + input.THNN.VolumetricAveragePooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH + ) + return self.gradInput +end + +function VolumetricAveragePooling:empty() + return parent.clearState(self) +end + +function VolumetricAveragePooling:__tostring__() + local s = string.format('%s(%dx%dx%d, %d,%d,%d', torch.type(self), + self.kT, self.kW, self.kH, self.dT, self.dW, self.dH) + if (self.padT or self.padW or self.padH) and + (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padT.. ',' .. self.padW .. ','.. self.padH + end + s = s .. ')' + + return s +end diff --git a/contrib/lua-torch/nn/VolumetricBatchNormalization.lua b/contrib/lua-torch/nn/VolumetricBatchNormalization.lua new file mode 100644 index 000000000..6168a9245 --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricBatchNormalization.lua @@ -0,0 +1,4 @@ +local BN, parent = torch.class('nn.VolumetricBatchNormalization', 'nn.BatchNormalization') + +-- expected dimension of input +BN.nDim = 5 diff --git a/contrib/lua-torch/nn/VolumetricConvolution.lua b/contrib/lua-torch/nn/VolumetricConvolution.lua new file mode 100644 index 000000000..329609aff --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricConvolution.lua @@ -0,0 +1,169 @@ +local THNN = require 'nn.THNN' +local VolumetricConvolution, parent = torch.class('nn.VolumetricConvolution', 'nn.Module') + +function VolumetricConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH) + parent.__init(self) + + dT = dT or 1 + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kT = kT + self.kW = kW + self.kH = kH + self.dT = dT + self.dW = dW + self.dH = dH + self.padT = padT or 0 + self.padW = padW or self.padT + self.padH = padH or self.padW + + self.weight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW) + self.bias = torch.Tensor(nOutputPlane) + self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW) + self.gradBias = torch.Tensor(nOutputPlane) + self:reset() +end + +function VolumetricConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kT*self.kW*self.kH*self.nInputPlane) + end + if nn.oldSeed then + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + if self.bias then + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + end + else + self.weight:uniform(-stdv, stdv) + if self.bias then + self.bias:uniform(-stdv, stdv) + end + end +end + +function VolumetricConvolution:noBias() + self.bias = nil + self.gradBias = nil + return self +end + +function VolumetricConvolution:updateOutput(input) + self.finput = self.finput or input.new() + self.fgradInput = self.fgradInput or input.new() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + input.THNN.VolumetricConvolution_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH + ) + else + input.THNN.VolumetricConvolutionMM_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH + ) + end + return self.output +end + +function VolumetricConvolution:updateGradInput(input, gradOutput) + if torch.typename(input):find('torch%.Cuda.*Tensor') then + input.THNN.VolumetricConvolution_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH + ) + return self.gradInput + else + if self.gradInput then + input.THNN.VolumetricConvolutionMM_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH + ) + return self.gradInput + end + end +end + +function VolumetricConvolution:accGradParameters(input, gradOutput, scale) + if torch.typename(input):find('torch%.Cuda.*Tensor') then + input.THNN.VolumetricConvolution_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + scale or 1 + ) + else + input.THNN.VolumetricConvolutionMM_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + scale or 1 + ) + end +end + +function VolumetricConvolution:type(type, tensorCache) + if self.finput then self.finput:set() end + if self.fgradInput then self.fgradInput:set() end + return parent.type(self, type, tensorCache) +end + +function VolumetricConvolution:clearState() + nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') + return parent.clearState(self) +end + +function VolumetricConvolution:__tostring__() + local s = string.format('%s(%d -> %d, %dx%dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH) + if self.dT ~= 1 or self.dW ~= 1 or self.dH ~= 1 or + self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d,%d', self.dT, self.dW, self.dH) + end + if (self.padT or self.padW or self.padH) and + (self.padT ~=0 or self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padT .. ',' .. self.padW .. ',' .. self.padH + end + return s .. ')' +end diff --git a/contrib/lua-torch/nn/VolumetricDilatedConvolution.lua b/contrib/lua-torch/nn/VolumetricDilatedConvolution.lua new file mode 100644 index 000000000..f1337ebaa --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricDilatedConvolution.lua @@ -0,0 +1,84 @@ +local THNN = require 'nn.THNN' +local VolumetricDilatedConvolution, parent = torch.class('nn.VolumetricDilatedConvolution', 'nn.VolumetricConvolution') + +function VolumetricDilatedConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH) + parent.__init(self, nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH) + + self.dilationT = dilationT or 1 + self.dilationW = dilationW or 1 + self.dilationH = dilationH or 1 +end + +function VolumetricDilatedConvolution:updateOutput(input) + self.finput = self.finput or self.weight.new() + self.fgradInput = self.fgradInput or self.weight.new() + input.THNN.VolumetricDilatedConvolution_updateOutput( + input:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.dilationT, self.dilationW, self.dilationH + ) + return self.output +end + +function VolumetricDilatedConvolution:updateGradInput(input, gradOutput) + if self.gradInput then + self.fgradInput = self.fgradInput or self.weight.new() + input.THNN.VolumetricDilatedConvolution_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.dilationT, self.dilationW, self.dilationH + ) + return self.gradInput + end +end + +function VolumetricDilatedConvolution:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + self.fgradInput = self.fgradInput or self.weight.new() + input.THNN.VolumetricDilatedConvolution_accGradParameters( + input:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.dilationT, self.dilationW, self.dilationH, + scale + ) +end + +function VolumetricDilatedConvolution:__tostring__() + local s = string.format('%s(%d -> %d, %dx%dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH) + if self.dT ~= 1 or self.dW ~= 1 or self.dH ~= 1 + or self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d,%d', self.dT, self.dW, self.dH) + end + if (self.padT or self.padW or self.padH) + and (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padT .. ',' .. self.padW .. ',' .. self.padH + end + s = s .. ', ' .. self.dilationT .. ',' + .. self.dilationW .. ',' .. self.dilationH + if self.bias then + return s .. ')' + else + return s .. ') without bias' + end +end diff --git a/contrib/lua-torch/nn/VolumetricDilatedMaxPooling.lua b/contrib/lua-torch/nn/VolumetricDilatedMaxPooling.lua new file mode 100644 index 000000000..249b2b58e --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricDilatedMaxPooling.lua @@ -0,0 +1,71 @@ +local THNN = require 'nn.THNN' +local VolumetricDilatedMaxPooling, parent = torch.class('nn.VolumetricDilatedMaxPooling', 'nn.VolumetricMaxPooling') + +function VolumetricDilatedMaxPooling:__init(kT, kW, kH, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH) + parent.__init(self, kT, kW, kH, dT, dW, dH, padT, padW, padH) + + self.dilationT = dilationT or 1 + self.dilationW = dilationW or 1 + self.dilationH = dilationH or 1 + +end + +function VolumetricDilatedMaxPooling:updateOutput(input) + local dims = input:dim() + self.itime = input:size(dims-2) + self.iheight = input:size(dims-1) + self.iwidth = input:size(dims) + + self.indices = self.indices or torch.LongTensor() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices + else + self.indices = self.indices:long() + end + input.THNN.VolumetricDilatedMaxPooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.indices:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.dilationT, self.dilationW, self.dilationH, + self.ceil_mode + ) + return self.output +end + +function VolumetricDilatedMaxPooling:updateGradInput(input, gradOutput) + input.THNN.VolumetricDilatedMaxPooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.indices:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.dilationT, self.dilationW, self.dilationH, + self.ceil_mode + ) + return self.gradInput +end + +function VolumetricDilatedMaxPooling:clearState() + if self.indices then + self.indices:set() + end + return parent.clearState(self) +end + +function VolumetricDilatedMaxPooling:__tostring__() + local s = string.format('%s(%dx%dx%d, %d,%d,%d', torch.type(self), + self.kT, self.kW, self.kH, self.dT, self.dW, self.dH) + if (self.padT or self.padW or self.padH) and + (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padT.. ',' .. self.padW .. ','.. self.padH + end + s = s .. ', ' .. self.dilationT .. ',' .. self.dilationW .. ',' .. self.dilationH + s = s .. ')' + + return s +end diff --git a/contrib/lua-torch/nn/VolumetricDropout.lua b/contrib/lua-torch/nn/VolumetricDropout.lua new file mode 100644 index 000000000..809e28afe --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricDropout.lua @@ -0,0 +1,55 @@ +local VolumetricDropout, Parent = torch.class('nn.VolumetricDropout', 'nn.Module') + +function VolumetricDropout:__init(p,stochasticInference) + Parent.__init(self) + self.p = p or 0.5 + self.train = true + self.stochastic_inference = stochasticInference or false + self.noise = torch.Tensor() +end + +function VolumetricDropout:updateOutput(input) + self.output:resizeAs(input):copy(input) + if self.train or self.stochastic_inference then + if input:dim() == 5 then + self.noise:resize(input:size(1), input:size(2), 1, 1, 1) + elseif input:dim() == 4 then + self.noise:resize(input:size(1), 1, 1, 1) + else + error('Input must be 5D (nbatch, nfeat, t, h, w) or 4D (nfeat, t, h, w)') + end + self.noise:bernoulli(1-self.p) + -- We expand the random dropouts to the entire feature map because the + -- features are likely correlated across the map and so the dropout + -- should also be correlated. + self.output:cmul(torch.expandAs(self.noise, input)) + else + self.output:mul(1-self.p) + end + return self.output +end + +function VolumetricDropout:updateGradInput(input, gradOutput) + if self.train then + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + self.gradInput:cmul(torch.expandAs(self.noise, input)) -- simply mask the gradients with the noise vector + else + error('backprop only defined while training') + end + return self.gradInput +end + +function VolumetricDropout:setp(p) + self.p = p +end + +function VolumetricDropout:__tostring__() + return string.format('%s(%f)', torch.type(self), self.p) +end + +function VolumetricDropout:clearState() + if self.noise then + self.noise:set() + end + return Parent.clearState(self) +end diff --git a/contrib/lua-torch/nn/VolumetricFractionalMaxPooling.lua b/contrib/lua-torch/nn/VolumetricFractionalMaxPooling.lua new file mode 100644 index 000000000..f5ff58cf0 --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricFractionalMaxPooling.lua @@ -0,0 +1,175 @@ +local VolumetricFractionalMaxPooling, parent = + torch.class('nn.VolumetricFractionalMaxPooling', 'nn.Module') + +-- Usage: +-- nn.VolumetricFractionalMaxPooling(poolSizeT, poolSizeW, poolSizeH, outT, outW, outH) +-- the output should be the exact size (outT x outH x outW) +-- nn.VolumetricFractionalMaxPooling(poolSizeT, poolSizeW, poolSizeH, ratioT, ratioW, ratioH) +-- the output should be the size (floor(inT x ratioT) x floor(inH x ratioH) x floor(inW x ratioW)) +-- ratios are numbers between (0, 1) exclusive +function VolumetricFractionalMaxPooling:__init(poolSizeT, poolSizeW, poolSizeH, arg1, arg2, arg3) + parent.__init(self) + assert(poolSizeT >= 2) + assert(poolSizeW >= 2) + assert(poolSizeH >= 2) + + -- Pool size (how wide the pooling for each output unit is) + self.poolSizeT = poolSizeT + self.poolSizeW = poolSizeW + self.poolSizeH = poolSizeH + + -- Random samples are drawn for all + -- batch * plane * (time, height, width; i.e., 3) points. This determines + -- the 3d "pseudorandom" overlapping pooling regions for each + -- (batch element x input plane). A new set of random samples is + -- drawn every updateOutput call, unless we disable it via + -- :fixPoolingRegions(). + self.randomSamples = nil + + -- Flag to disable re-generation of random samples for producing + -- a new pooling. For testing purposes + self.newRandomPool = false + + if arg1 >= 1 and arg2 >= 1 and arg3 >= 1 then + -- Desired output size: the input tensor will determine the reduction + -- ratio + self.outT = arg1 + self.outW = arg2 + self.outH = arg3 + else + -- Reduction ratio specified per each input + -- This is the reduction ratio that we use + self.ratioT = arg1 + self.ratioW = arg2 + self.ratioH = arg3 + + -- The reduction ratio must be between 0 and 1 + assert(self.ratioT > 0 and self.ratioT < 1) + assert(self.ratioW > 0 and self.ratioW < 1) + assert(self.ratioH > 0 and self.ratioH < 1) + end +end + +function VolumetricFractionalMaxPooling:getBufferSize_(input) + local batchSize = 0 + local planeSize = 0 + + if input:nDimension() == 4 then + batchSize = 1 + planeSize = input:size(1) + elseif input:nDimension() == 5 then + batchSize = input:size(1) + planeSize = input:size(2) + else + error('input must be dim 4 or 5') + end + + return torch.LongStorage({batchSize, planeSize, 3}) +end + +function VolumetricFractionalMaxPooling:initSampleBuffer_(input) + local sampleBufferSize = self:getBufferSize_(input) + + if self.randomSamples == nil then + self.randomSamples = input.new():resize(sampleBufferSize):uniform() + elseif (self.randomSamples:size(1) ~= sampleBufferSize[1] or + self.randomSamples:size(2) ~= sampleBufferSize[2]) then + self.randomSamples:resize(sampleBufferSize):uniform() + else + if not self.newRandomPool then + -- Create new pooling windows, since this is a subsequent call + self.randomSamples:uniform() + end + end +end + +function VolumetricFractionalMaxPooling:getOutputSizes_(input) + local outT = self.outT + local outW = self.outW + local outH = self.outH + if self.ratioW ~= nil and self.ratioH ~= nil then + if input:nDimension() == 5 then + outT = math.floor(input:size(5) * self.ratioT) + outW = math.floor(input:size(4) * self.ratioW) + outH = math.floor(input:size(3) * self.ratioH) + elseif input:nDimension() == 4 then + outT = math.floor(input:size(4) * self.ratioT) + outW = math.floor(input:size(3) * self.ratioW) + outH = math.floor(input:size(2) * self.ratioH) + else + error('input must be dim 4 or 5') + end + + -- Neither can be smaller than 1 + assert(outT > 0, 'reduction ratio or input time too small') + assert(outW > 0, 'reduction ratio or input width too small') + assert(outH > 0, 'reduction ratio or input height too small') + else + assert(outT ~= nil and outW ~= nil and outH ~= nil) + end + + return outT, outW, outH +end + +-- Call this to turn off regeneration of random pooling regions each +-- updateOutput call. +function VolumetricFractionalMaxPooling:fixPoolingRegions(val) + if val == nil then + val = true + end + + self.newRandomPool = val + return self +end + +function VolumetricFractionalMaxPooling:updateOutput(input) + self.indices = self.indices or torch.LongTensor() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices + else + self.indices = self.indices:long() + end + self:initSampleBuffer_(input) + local outT, outW, outH = self:getOutputSizes_(input) + + input.THNN.VolumetricFractionalMaxPooling_updateOutput( + input:cdata(), + self.output:cdata(), + outT, outW, outH, self.poolSizeT, self.poolSizeW, self.poolSizeH, + self.indices:cdata(), self.randomSamples:cdata()) + return self.output +end + +function VolumetricFractionalMaxPooling:updateGradInput(input, gradOutput) + assert(self.randomSamples ~= nil, + 'must call updateOutput/forward first') + + local outT, outW, outH = self:getOutputSizes_(input) + + input.THNN.VolumetricFractionalMaxPooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + outT, outW, outH, self.poolSizeT, self.poolSizeW, self.poolSizeH, + self.indices:cdata()) + return self.gradInput +end + +-- backward compat +function VolumetricFractionalMaxPooling:empty() + self:clearState() +end + +function VolumetricFractionalMaxPooling:clearState() + self.indices = nil + self.randomSamples = nil + return parent.clearState(self) +end + +function VolumetricFractionalMaxPooling:__tostring__() + return string.format('%s(%dx%dx%d, %d,%d,%d)', torch.type(self), + self.outT and self.outT or self.ratioT, + self.outW and self.outW or self.ratioW, + self.outH and self.outH or self.ratioH, + self.poolSizeT, self.poolSizeW, self.poolSizeH) +end diff --git a/contrib/lua-torch/nn/VolumetricFullConvolution.lua b/contrib/lua-torch/nn/VolumetricFullConvolution.lua new file mode 100644 index 000000000..0ce23401e --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricFullConvolution.lua @@ -0,0 +1,225 @@ +local THNN = require 'nn.THNN' +local VolumetricFullConvolution, parent = torch.class('nn.VolumetricFullConvolution','nn.Module') + +function VolumetricFullConvolution:__init(nInputPlane, nOutputPlane, + kT, kW, kH, -- kernel size + dT, dW, dH, -- stride + padT, padW, padH, -- padding + adjT, adjW, adjH) -- extra output adjustment + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + dT = dT or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kW = kW + self.kH = kH + self.kT = kT + self.dW = dW + self.dH = dH + self.dT = dT + self.padW = padW or 0 + self.padH = padH or 0 + self.padT = padT or 0 + self.adjW = adjW or 0 + self.adjH = adjH or 0 + self.adjT = adjT or 0 + + if self.adjW > self.dW - 1 or self.adjH > self.dH - 1 or self.adjT > self.dT - 1 then + error('adjW, adjH and adjT must be smaller than self.dW - 1,' .. + ' self.dH - 1 and self.dT - 1 respectively') + end + + self.weight = torch.Tensor(nInputPlane, nOutputPlane, kT, kH, kW) + self.gradWeight = torch.Tensor(nInputPlane, nOutputPlane, kT, kH, kW) + self.bias = torch.Tensor(self.nOutputPlane) + self.gradBias = torch.Tensor(self.nOutputPlane) + + self.ones = torch.Tensor() + self.finput = torch.Tensor() + self.fgradInput = torch.Tensor() + + self:reset() +end + +function VolumetricFullConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + local nInputPlane = self.nInputPlane + local kT = self.kT + local kH = self.kH + local kW = self.kW + stdv = 1/math.sqrt(kW*kH*kT*nInputPlane) + end + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) +end + +local function calculateAdj(targetSize, ker, pad, stride) + return (targetSize + 2 * pad - ker) % stride +end + +function VolumetricFullConvolution:backCompatibility() + -- Transpose the weight when loading from an old version + if not self.adjW then + self.weight = self.weight:transpose(1, 2):contiguous() + end + + -- Rename the padding when loading from an old version + self.padW = self.padW or self.pW + self.padH = self.padH or self.pH + self.padT = self.padT or self.pT + + self.adjW = self.adjW or 0 + self.adjH = self.adjH or 0 + self.adjT = self.adjT or 0 +end + + +function VolumetricFullConvolution:noBias() + self.bias = nil + self.gradBias = nil + return self +end + +function VolumetricFullConvolution:updateOutput(input) + self:backCompatibility() + + local inputTensor = input + local adjT, adjW, adjH = self.adjT, self.adjW, self.adjH + + -- The input can be a table where the second element indicates the target + -- output size, in which case the adj factors are computed automatically + if type(inputTensor) == 'table' then + inputTensor = input[1] + local targetTensor = input[2] + local tDims = targetTensor:dim() + local tT = targetTensor:size(tDims-2) + local tH = targetTensor:size(tDims-1) + local tW = targetTensor:size(tDims) + adjT = calculateAdj(tT, self.kT, self.padT, self.dT) + adjW = calculateAdj(tW, self.kW, self.padW, self.dW) + adjH = calculateAdj(tH, self.kH, self.padH, self.dH) + end + + inputTensor.THNN.VolumetricFullConvolution_updateOutput( + inputTensor:cdata(), + self.output:cdata(), + self.weight:cdata(), + THNN.optionalTensor(self.bias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + adjT, adjW, adjH + ) + + return self.output +end + +function VolumetricFullConvolution:updateGradInput(input, gradOutput) + self:backCompatibility() + + local inputTensor = input + local adjT, adjW, adjH = self.adjT, self.adjW, self.adjH + + -- The input can be a table where the second element indicates the target + -- output size, in which case the adj factors are computed automatically + if type(inputTensor) == 'table' then + inputTensor = input[1] + local targetTensor = input[2] + local tDims = targetTensor:dim() + local tT = targetTensor:size(tDims-2) + local tH = targetTensor:size(tDims-1) + local tW = targetTensor:size(tDims) + adjT = calculateAdj(tT, self.kT, self.padT, self.dT) + adjW = calculateAdj(tW, self.kW, self.padW, self.dW) + adjH = calculateAdj(tH, self.kH, self.padH, self.dH) + -- Momentarily extract the gradInput tensor + if type(self.gradInput) == 'table' then + self.gradInput = self.gradInput[1] + end + end + + inputTensor.THNN.VolumetricFullConvolution_updateGradInput( + inputTensor:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.weight:cdata(), + self.finput:cdata(), + self.fgradInput:cdata(), + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + adjT, adjW, adjH + ) + + if type(input) == 'table' then + -- Create a zero tensor to be expanded and used as gradInput[2]. + self.zeroScalar = self.zeroScalar or input[2].new(1):zero() + self.ones:resize(input[2]:dim()):fill(1) + local zeroTensor = self.zeroScalar + :view(table.unpack(self.ones:totable())) + :expandAs(input[2]) + self.gradInput = {self.gradInput, zeroTensor} + end + + return self.gradInput +end + +function VolumetricFullConvolution:accGradParameters(input, gradOutput, scale) + self:backCompatibility() + + local inputTensor = input + local adjT, adjW, adjH = self.adjT, self.adjW, self.adjH + + -- The input can be a table where the second element indicates the target + -- output size, in which case the adj factors are computed automatically + if type(inputTensor) == 'table' then + inputTensor = input[1] + local targetTensor = input[2] + local tDims = targetTensor:dim() + local tT = targetTensor:size(tDims-2) + local tH = targetTensor:size(tDims-1) + local tW = targetTensor:size(tDims) + adjT = calculateAdj(tT, self.kT, self.padT, self.dT) + adjW = calculateAdj(tW, self.kW, self.padW, self.dW) + adjH = calculateAdj(tH, self.kH, self.padH, self.dH) + end + + inputTensor.THNN.VolumetricFullConvolution_accGradParameters( + inputTensor:cdata(), + gradOutput:cdata(), + self.gradWeight:cdata(), + THNN.optionalTensor(self.gradBias), + self.finput:cdata(), + self.fgradInput:cdata(), + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + adjT, adjW, adjH, + scale or 1 + ) +end + +function VolumetricFullConvolution:type(type, tensorCache) + self.finput = torch.Tensor() + self.fgradInput = torch.Tensor() + return parent.type(self, type, tensorCache) +end + +function VolumetricFullConvolution:__tostring__() + local s = string.format('%s(%d -> %d, %dx%dx%d', torch.type(self), + self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH) + if self.dT ~= 1 or self.dW ~= 1 or self.dH ~= 1 or self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0 then + s = s .. string.format(', %d,%d,%d', self.dT, self.dW, self.dH) + end + if (self.padT or self.padW or self.padH) and (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padT .. ',' .. self.padW .. ',' .. self.padH + end + if (self.adjT or self.adjW or self.adjH) and (self.adjT ~= 0 or self.adjW ~= 0 or self.adjH ~= 0) then + s = s .. ', ' .. self.adjT .. ',' .. self.adjW .. ',' .. self.adjH + end + return s .. ')' +end diff --git a/contrib/lua-torch/nn/VolumetricMaxPooling.lua b/contrib/lua-torch/nn/VolumetricMaxPooling.lua new file mode 100644 index 000000000..e25c5b31c --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricMaxPooling.lua @@ -0,0 +1,102 @@ +local VolumetricMaxPooling, parent = torch.class('nn.VolumetricMaxPooling', 'nn.Module') + +VolumetricMaxPooling.__version = 2 + +function VolumetricMaxPooling:__init(kT, kW, kH, dT, dW, dH, padT, padW, padH) + parent.__init(self) + + dT = dT or kT + dW = dW or kW + dH = dH or kH + + self.kT = kT + self.kH = kH + self.kW = kW + self.dT = dT + self.dW = dW + self.dH = dH + + self.padT = padT or 0 + self.padW = padW or 0 + self.padH = padH or 0 + + + self.ceil_mode = false + self.indices = torch.LongTensor() +end + +function VolumetricMaxPooling:ceil() + self.ceil_mode = true + return self +end + +function VolumetricMaxPooling:floor() + self.ceil_mode = false + return self +end + +function VolumetricMaxPooling:updateOutput(input) + local dims = input:dim() + self.itime = input:size(dims-2) + self.iheight = input:size(dims-1) + self.iwidth = input:size(dims) + + self.indices = self.indices or torch.LongTensor() + if torch.typename(input):find('torch%.Cuda.*Tensor') then + self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices + else + self.indices = self.indices:long() + end + input.THNN.VolumetricMaxPooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.indices:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.ceil_mode + ) + return self.output +end + +function VolumetricMaxPooling:updateGradInput(input, gradOutput) + input.THNN.VolumetricMaxPooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.indices:cdata(), + self.kT, self.kW, self.kH, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH, + self.ceil_mode + ) + return self.gradInput +end + +function VolumetricMaxPooling:empty() + self:clearState() +end + +function VolumetricMaxPooling:clearState() + if self.indices then self.indices:set() end + return parent.clearState(self) +end + +function VolumetricMaxPooling:read(file, version) + parent.read(self, file) + if version < 2 then + self.ceil_mode = false + end +end + +function VolumetricMaxPooling:__tostring__() + local s = string.format('%s(%dx%dx%d, %d,%d,%d', torch.type(self), + self.kT, self.kW, self.kH, self.dT, self.dW, self.dH) + if (self.padT or self.padW or self.padH) and + (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then + s = s .. ', ' .. self.padT.. ',' .. self.padW .. ','.. self.padH + end + s = s .. ')' + + return s +end diff --git a/contrib/lua-torch/nn/VolumetricMaxUnpooling.lua b/contrib/lua-torch/nn/VolumetricMaxUnpooling.lua new file mode 100644 index 000000000..6291f5b85 --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricMaxUnpooling.lua @@ -0,0 +1,56 @@ +local VolumetricMaxUnpooling, parent = torch.class('nn.VolumetricMaxUnpooling', 'nn.Module') + +function VolumetricMaxUnpooling:__init(poolingModule) + parent.__init(self) + assert(torch.type(poolingModule)=='nn.VolumetricMaxPooling', 'Argument must be a nn.VolumetricMaxPooling module') + assert(poolingModule.kT==poolingModule.dT and poolingModule.kH==poolingModule.dH and poolingModule.kW==poolingModule.dW, "The size of pooling module's kernel must be equal to its stride") + self.pooling = poolingModule +end + +function VolumetricMaxUnpooling:setParams() + self.indices = self.pooling.indices + self.otime = self.pooling.itime + self.oheight = self.pooling.iheight + self.owidth = self.pooling.iwidth + self.dT = self.pooling.dT + self.dH = self.pooling.dH + self.dW = self.pooling.dW + self.padT = self.pooling.padT + self.padH = self.pooling.padH + self.padW = self.pooling.padW +end + +function VolumetricMaxUnpooling:updateOutput(input) + self:setParams() + input.THNN.VolumetricMaxUnpooling_updateOutput( + input:cdata(), + self.output:cdata(), + self.indices:cdata(), + self.otime, self.owidth, self.oheight, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH + ) + return self.output +end + +function VolumetricMaxUnpooling:updateGradInput(input, gradOutput) + self:setParams() + input.THNN.VolumetricMaxUnpooling_updateGradInput( + input:cdata(), + gradOutput:cdata(), + self.gradInput:cdata(), + self.indices:cdata(), + self.otime, self.owidth, self.oheight, + self.dT, self.dW, self.dH, + self.padT, self.padW, self.padH + ) + return self.gradInput +end + +function VolumetricMaxUnpooling:empty() + self:clearState() +end + +function VolumetricMaxUnpooling:__tostring__() + return 'nn.VolumetricMaxUnpooling associated to '..tostring(self.pooling) +end diff --git a/contrib/lua-torch/nn/VolumetricReplicationPadding.lua b/contrib/lua-torch/nn/VolumetricReplicationPadding.lua new file mode 100644 index 000000000..31a9503fd --- /dev/null +++ b/contrib/lua-torch/nn/VolumetricReplicationPadding.lua @@ -0,0 +1,58 @@ +local VolumetricReplicationPadding, parent = + torch.class('nn.VolumetricReplicationPadding', 'nn.Module') + +function VolumetricReplicationPadding:__init(pleft, pright, ptop, pbottom, + pfront, pback) + parent.__init(self) + self.pleft = pleft + self.pright = pright or self.pleft + self.ptop = ptop or self.pleft + self.pbottom = pbottom or self.pleft + self.pfront = pfront or self.pleft + self.pback = pback or self.pleft +end + +function VolumetricReplicationPadding:updateOutput(input) + if input:dim() == 4 or input:dim() == 5 then + input.THNN.VolumetricReplicationPadding_updateOutput( + input:cdata(), self.output:cdata(), + self.pleft, self.pright, self.ptop, self.pbottom, self.pfront, + self.pback) + else + error('input must be 4 or 5-dimensional') + end + return self.output +end + +function VolumetricReplicationPadding:updateGradInput(input, gradOutput) + if input:dim() == 4 and gradOutput:dim() == 4 then + assert(input:size(1) == gradOutput:size(1) + and input:size(2) + self.pfront + self.pback == gradOutput:size(2) + and input:size(3) + self.ptop + self.pbottom == gradOutput:size(3) + and input:size(4) + self.pleft + self.pright == gradOutput:size(4), + 'input and gradOutput must be compatible in size') + elseif input:dim() == 5 and gradOutput:dim() == 5 then + assert(input:size(1) == gradOutput:size(1) + and input:size(2) == gradOutput:size(2) + and input:size(3) + self.pfront + self.pback == gradOutput:size(3) + and input:size(4) + self.ptop + self.pbottom == gradOutput:size(4) + and input:size(5) + self.pleft + self.pright == gradOutput:size(5), + 'input and gradOutput must be compatible in size') + else + error( + [[input and gradOutput must be 4 or 5-dimensional + and have equal number of dimensions]] + ) + end + input.THNN.VolumetricReplicationPadding_updateGradInput( + input:cdata(), gradOutput:cdata(), self.gradInput:cdata(), + self.pleft, self.pright, self.ptop, self.pbottom, self.pfront, self.pback) + return self.gradInput +end + +function VolumetricReplicationPadding:__tostring__() + return torch.type(self) .. + string.format('(left=%d, right=%d, top=%d, bottom=%d, front=%d, back=%d)', + self.pleft, self.pright, self.ptop, self.pbottom, + self.pfront, self.pback) +end diff --git a/contrib/lua-torch/nn/WeightNorm.lua b/contrib/lua-torch/nn/WeightNorm.lua new file mode 100644 index 000000000..3ffcd90aa --- /dev/null +++ b/contrib/lua-torch/nn/WeightNorm.lua @@ -0,0 +1,208 @@ +-- Weight Normalization +-- https://arxiv.org/pdf/1602.07868v3.pdf +local WeightNorm, parent = torch.class("nn.WeightNorm", "nn.Decorator") + +function WeightNorm:__init(module, outputDim) + -- this container will apply Weight Normalization to any module it wraps + -- it accepts parameter ``outputDim`` that represents the dimension of the output of the weight + -- if outputDim is not 1, the container will transpose the weight + -- if the weight is not 2D, the container will view the weight into a 2D shape + -- that is nOut x (nIn x kw x dw x ...) + + parent.__init(self, module) + assert(module.weight) + + if module.bias then + self.bias = module.bias + self.gradBias = module.gradBias + end + self.gradWeight = module.gradWeight + self.weight = module.weight + + self.outputDim = outputDim or 1 + + -- track the non-output weight dimensions + self.otherDims = 1 + for i = 1, self.weight:dim() do + if i ~= self.outputDim then + self.otherDims = self.otherDims * self.weight:size(i) + end + end + + -- view size for weight norm 2D calculations + self.viewIn = torch.LongStorage({self.weight:size(self.outputDim), self.otherDims}) + + -- view size back to original weight + self.viewOut = self.weight:size() + self.weightSize = self.weight:size() + + -- bubble outputDim size up to the front + for i = self.outputDim - 1, 1, -1 do + self.viewOut[i], self.viewOut[i + 1] = self.viewOut[i + 1], self.viewOut[i] + end + + -- weight is reparametrized to decouple the length from the direction + -- such that w = g * ( v / ||v|| ) + self.v = torch.Tensor(self.viewIn[1], self.viewIn[2]) + self.g = torch.Tensor(self.viewIn[1]) + + self._norm = torch.Tensor(self.viewIn[1]) + self._scale = torch.Tensor(self.viewIn[1]) + + -- gradient of g + self.gradG = torch.Tensor(self.viewIn[1]):zero() + -- gradient of v + self.gradV = torch.Tensor(self.viewIn) + + self:resetInit() +end + +function WeightNorm:permuteIn(inpt) + local ans = inpt + for i = self.outputDim - 1, 1, -1 do + ans = ans:transpose(i, i+1) + end + return ans +end + +function WeightNorm:permuteOut(inpt) + local ans = inpt + for i = 1, self.outputDim - 1 do + ans = ans:transpose(i, i+1) + end + return ans +end + +function WeightNorm:resetInit(inputSize, outputSize) + self.v:normal(0, math.sqrt(2/self.viewIn[2])) + self.g:norm(self.v, 2, 2) + if self.bias then + self.bias:zero() + end +end + +function WeightNorm:evaluate() + if not(self.train == false) then + self:updateWeight() + parent.evaluate(self) + end +end + +function WeightNorm:updateWeight() + -- view to 2D when weight norm container operates + self.gradV:copy(self:permuteIn(self.weight)) + self.gradV = self.gradV:view(self.viewIn) + + -- ||w|| + self._norm:norm(self.v, 2, 2):pow(2):add(10e-5):sqrt() + -- g * w / ||w|| + self.gradV:copy(self.v) + self._scale:copy(self.g):cdiv(self._norm) + self.gradV:cmul(self._scale:view(self.viewIn[1], 1) + :expand(self.viewIn[1], self.viewIn[2])) + + -- otherwise maintain size of original module weight + self.gradV = self.gradV:view(self.viewOut) + + self.weight:copy(self:permuteOut(self.gradV)) +end + +function WeightNorm:updateOutput(input) + if not(self.train == false) then + self:updateWeight() + end + self.output:set(self.modules[1]:updateOutput(input)) + return self.output +end + +function WeightNorm:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + self.modules[1]:accGradParameters(input, gradOutput, scale) + + self.weight:copy(self:permuteIn(self.weight)) + self.gradV:copy(self:permuteIn(self.gradWeight)) + self.weight = self.weight:view(self.viewIn) + + local norm = self._norm:view(self.viewIn[1], 1):expand(self.viewIn[1], self.viewIn[2]) + local scale = self._scale:view(self.viewIn[1], 1):expand(self.viewIn[1], self.viewIn[2]) + + -- dL / dw * (w / ||w||) + self.weight:copy(self.gradV) + self.weight:cmul(self.v):cdiv(norm) + self.gradG:sum(self.weight, 2) + + -- dL / dw * g / ||w|| + self.gradV:cmul(scale) + + -- dL / dg * (w * g / ||w||^2) + self.weight:copy(self.v):cmul(scale):cdiv(norm) + self.weight:cmul(self.gradG:view(self.viewIn[1], 1) + :expand(self.viewIn[1], self.viewIn[2])) + + -- dL / dv update + self.gradV:add(-1, self.weight) + + self.gradV = self.gradV:view(self.viewOut) + self.weight = self.weight:view(self.viewOut) + self.gradWeight:copy(self:permuteOut(self.gradV)) +end + +function WeightNorm:updateGradInput(input, gradOutput) + self.gradInput:set(self.modules[1]:updateGradInput(input, gradOutput)) + return self.gradInput +end + +function WeightNorm:zeroGradParameters() + self.modules[1]:zeroGradParameters() + self.gradV:zero() + self.gradG:zero() +end + +function WeightNorm:updateParameters(lr) + self.modules[1]:updateParameters(lr) + self.g:add(-lr, self.gradG) + self.v:add(-lr, self.gradV) +end + +function WeightNorm:parameters() + if self.bias then + return {self.v, self.g, self.bias}, {self.gradV, self.gradG, self.gradBias} + else + return {self.v, self.g}, {self.gradV, self.gradG} + end +end + +function WeightNorm:write(file) + -- Don't save weight and gradWeight since we can easily re-compute it from v + -- and g. + local weight = self.modules[1].weight + local gradWeight = self.modules[1].gradWeight + self.weight = nil + self.gradWeight = nil + self.modules[1].weight = nil + self.modules[1].gradWeight = nil + if not self.weightSize then + self.weightSize = weight:size() + end + + parent.write(self, file) + + self.modules[1].weight = weight + self.modules[1].gradWeight = gradWeight + self.weight = weight + self.gradWeight = gradWeight +end + +function WeightNorm:read(file) + parent.read(self, file) + + -- Re-compute weight and gradWeight + if not self.weight then + self.modules[1].weight = self.v.new(self.weightSize) + self.modules[1].gradWeight = self.v.new(self.weightSize) + self.weight = self.modules[1].weight + self.gradWeight = self.modules[1].gradWeight + self:updateWeight() + self.gradWeight:copy(self:permuteOut(self.gradV)) + end +end diff --git a/contrib/lua-torch/nn/WeightedEuclidean.lua b/contrib/lua-torch/nn/WeightedEuclidean.lua new file mode 100644 index 000000000..dbf4158a9 --- /dev/null +++ b/contrib/lua-torch/nn/WeightedEuclidean.lua @@ -0,0 +1,244 @@ +local WeightedEuclidean, parent = torch.class('nn.WeightedEuclidean', 'nn.Module') + +function WeightedEuclidean:__init(inputSize,outputSize) + parent.__init(self) + + self.weight = torch.Tensor(inputSize,outputSize) + self.gradWeight = torch.Tensor(inputSize,outputSize) + + -- each template (output dim) has its own diagonal covariance matrix + self.diagCov = torch.Tensor(inputSize,outputSize) + self.gradDiagCov = torch.Tensor(inputSize,outputSize) + + self:reset() +end + +function WeightedEuclidean:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(1)) + end + self.weight:uniform(-stdv, stdv) + self.diagCov:fill(1) +end + +local function view(res, src, ...) + local args = {...} + if src:isContiguous() then + res:view(src, table.unpack(args)) + else + res:reshape(src, table.unpack(args)) + end +end + +function WeightedEuclidean:updateOutput(input) + -- lazy-initialize + self._diagCov = self._diagCov or self.output.new() + + self._input = self._input or input.new() + self._weight = self._weight or self.weight.new() + self._expand = self._expand or self.output.new() + self._expand2 = self._expand or self.output.new() + self._expand3 = self._expand3 or self.output.new() + self._repeat = self._repeat or self.output.new() + self._repeat2 = self._repeat2 or self.output.new() + self._repeat3 = self._repeat3 or self.output.new() + + local inputSize, outputSize = self.weight:size(1), self.weight:size(2) + + -- y_j = || c_j * (w_j - x) || + if input:dim() == 1 then + view(self._input, input, inputSize, 1) + self._expand:expandAs(self._input, self.weight) + self._repeat:resizeAs(self._expand):copy(self._expand) + self._repeat:add(-1, self.weight) + self._repeat:cmul(self.diagCov) + self.output:norm(self._repeat, 2, 1) + self.output:resize(outputSize) + elseif input:dim() == 2 then + local batchSize = input:size(1) + + view(self._input, input, batchSize, inputSize, 1) + self._expand:expand(self._input, batchSize, inputSize, outputSize) + -- make the expanded tensor contiguous (requires lots of memory) + self._repeat:resizeAs(self._expand):copy(self._expand) + + self._weight:view(self.weight, 1, inputSize, outputSize) + self._expand2:expandAs(self._weight, self._repeat) + + self._diagCov:view(self.diagCov, 1, inputSize, outputSize) + self._expand3:expandAs(self._diagCov, self._repeat) + if torch.type(input) == 'torch.CudaTensor' then + -- requires lots of memory, but minimizes cudaMallocs and loops + self._repeat2:resizeAs(self._expand2):copy(self._expand2) + self._repeat:add(-1, self._repeat2) + self._repeat3:resizeAs(self._expand3):copy(self._expand3) + self._repeat:cmul(self._repeat3) + else + self._repeat:add(-1, self._expand2) + self._repeat:cmul(self._expand3) + end + + self.output:norm(self._repeat, 2, 2) + self.output:resize(batchSize, outputSize) + else + error"1D or 2D input expected" + end + return self.output +end + +function WeightedEuclidean:updateGradInput(input, gradOutput) + if not self.gradInput then + return + end + + self._div = self._div or input.new() + self._output = self._output or self.output.new() + self._expand4 = self._expand4 or input.new() + self._gradOutput = self._gradOutput or input.new() + + if not self.fastBackward then + self:updateOutput(input) + end + + local inputSize, outputSize = self.weight:size(1), self.weight:size(2) + + --[[ + dy_j -2 * c_j * c_j * (w_j - x) c_j * c_j * (x - w_j) + ---- = -------------------------- = --------------------- + dx 2 || c_j * (w_j - x) || y_j + --]] + + -- to prevent div by zero (NaN) bugs + self._output:resizeAs(self.output):copy(self.output):add(0.0000001) + view(self._gradOutput, gradOutput, gradOutput:size()) + self._div:cdiv(gradOutput, self._output) + if input:dim() == 1 then + self._div:resize(1, outputSize) + self._expand4:expandAs(self._div, self.weight) + + if torch.type(input) == 'torch.CudaTensor' then + self._repeat2:resizeAs(self._expand4):copy(self._expand4) + self._repeat2:cmul(self._repeat) + else + self._repeat2:cmul(self._repeat, self._expand4) + end + + self._repeat2:cmul(self.diagCov) + self.gradInput:sum(self._repeat2, 2) + self.gradInput:resizeAs(input) + elseif input:dim() == 2 then + local batchSize = input:size(1) + + self._div:resize(batchSize, 1, outputSize) + self._expand4:expand(self._div, batchSize, inputSize, outputSize) + + if torch.type(input) == 'torch.CudaTensor' then + self._repeat2:resizeAs(self._expand4):copy(self._expand4) + self._repeat2:cmul(self._repeat) + self._repeat2:cmul(self._repeat3) + else + self._repeat2:cmul(self._repeat, self._expand4) + self._repeat2:cmul(self._expand3) + end + + self.gradInput:sum(self._repeat2, 3) + self.gradInput:resizeAs(input) + else + error"1D or 2D input expected" + end + + return self.gradInput +end + +function WeightedEuclidean:accGradParameters(input, gradOutput, scale) + local inputSize, outputSize = self.weight:size(1), self.weight:size(2) + scale = scale or 1 + + --[[ + dy_j 2 * c_j * c_j * (w_j - x) c_j * c_j * (w_j - x) + ---- = ------------------------- = --------------------- + dw_j 2 || c_j * (w_j - x) || y_j + + dy_j 2 * c_j * (w_j - x)^2 c_j * (w_j - x)^2 + ---- = ----------------------- = ----------------- + dc_j 2 || c_j * (w_j - x) || y_j + --]] + -- assumes a preceding call to updateGradInput + if input:dim() == 1 then + self.gradWeight:add(-scale, self._repeat2) + + self._repeat:cdiv(self.diagCov) + self._repeat:cmul(self._repeat) + self._repeat:cmul(self.diagCov) + + if torch.type(input) == 'torch.CudaTensor' then + self._repeat2:resizeAs(self._expand4):copy(self._expand4) + self._repeat2:cmul(self._repeat) + else + self._repeat2:cmul(self._repeat, self._expand4) + end + + self.gradDiagCov:add(self._repeat2) + elseif input:dim() == 2 then + self._sum = self._sum or input.new() + self._sum:sum(self._repeat2, 1) + self._sum:resize(inputSize, outputSize) + self.gradWeight:add(-scale, self._sum) + + if torch.type(input) == 'torch.CudaTensor' then + -- requires lots of memory, but minimizes cudaMallocs and loops + self._repeat:cdiv(self._repeat3) + self._repeat:cmul(self._repeat) + self._repeat:cmul(self._repeat3) + self._repeat2:resizeAs(self._expand4):copy(self._expand4) + self._repeat:cmul(self._repeat2) + else + self._repeat:cdiv(self._expand3) + self._repeat:cmul(self._repeat) + self._repeat:cmul(self._expand3) + self._repeat:cmul(self._expand4) + end + + self._sum:sum(self._repeat, 1) + self._sum:resize(inputSize, outputSize) + self.gradDiagCov:add(scale, self._sum) + else + error"1D or 2D input expected" + end +end + +function WeightedEuclidean:type(type, tensorCache) + if type then + -- prevent premature memory allocations + self._input = nil + self._output = nil + self._gradOutput = nil + self._weight = nil + self._div = nil + self._sum = nil + self._expand = nil + self._expand2 = nil + self._expand3 = nil + self._expand4 = nil + self._repeat = nil + self._repeat2 = nil + self._repeat3 = nil + end + return parent.type(self, type, tensorCache) +end + +function WeightedEuclidean:parameters() + return {self.weight, self.diagCov}, {self.gradWeight, self.gradDiagCov} +end + +function WeightedEuclidean:accUpdateGradParameters(input, gradOutput, lr) + local gradWeight = self.gradWeight + local gradDiagCov = self.gradDiagCov + self.gradWeight = self.weight + self.gradDiagCov = self.diagCov + self:accGradParameters(input, gradOutput, -lr) + self.gradWeight = gradWeight + self.gradDiagCov = gradDiagCov +end diff --git a/contrib/lua-torch/nn/WeightedMSECriterion.lua b/contrib/lua-torch/nn/WeightedMSECriterion.lua new file mode 100644 index 000000000..933472937 --- /dev/null +++ b/contrib/lua-torch/nn/WeightedMSECriterion.lua @@ -0,0 +1,45 @@ +local WeightedMSECriterion, parent = torch.class('nn.WeightedMSECriterion','nn.MSECriterion') + +function WeightedMSECriterion:__init(w) + parent.__init(self) + self.weight = w:clone() +end + +function WeightedMSECriterion:updateOutput(input,target) + self.buffer = self.buffer or input.new() + self.buffer:resizeAs(input):copy(target) + if input:dim() - 1 == self.weight:dim() then + for i=1,input:size(1) do + self.buffer[i]:cmul(self.weight) + end + else + self.buffer:cmul(self.weight) + end + self.output_tensor = self.output_tensor or input.new(1) + input.THNN.MSECriterion_updateOutput( + input:cdata(), + self.buffer:cdata(), + self.output_tensor:cdata(), + self.sizeAverage + ) + self.output = self.output_tensor[1] + return self.output +end + +function WeightedMSECriterion:updateGradInput(input, target) + self.buffer:resizeAs(input):copy(target) + if input:dim() - 1 == self.weight:dim() then + for i=1,input:size(1) do + self.buffer[i]:cmul(self.weight) + end + else + self.buffer:cmul(self.weight) + end + input.THNN.MSECriterion_updateGradInput( + input:cdata(), + self.buffer:cdata(), + self.gradInput:cdata(), + self.sizeAverage + ) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/WhiteNoise.lua b/contrib/lua-torch/nn/WhiteNoise.lua new file mode 100644 index 000000000..f1defb646 --- /dev/null +++ b/contrib/lua-torch/nn/WhiteNoise.lua @@ -0,0 +1,40 @@ +local WhiteNoise, parent = torch.class('nn.WhiteNoise', 'nn.Module') + +function WhiteNoise:__init(mean, std) + parent.__init(self) + self.mean = mean or 0 + self.std = std or 0.1 + self.noise = torch.Tensor() +end + +function WhiteNoise:updateOutput(input) + self.output:resizeAs(input):copy(input) + if self.train ~= false then + self.noise:resizeAs(input) + self.noise:normal(self.mean, self.std) + self.output:add(self.noise) + else + if self.mean ~= 0 then + self.output:add(self.mean) + end + end + return self.output +end + +function WhiteNoise:updateGradInput(input, gradOutput) + if self.train ~= false then + -- Simply return the gradients. + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + else + error('backprop only defined while training') + end + return self.gradInput +end + +function WhiteNoise:clearState() + self.noise:set() +end + +function WhiteNoise:__tostring__() + return string.format('%s mean: %f, std: %f', torch.type(self), self.mean, self.std) +end diff --git a/contrib/lua-torch/nn/ZeroGrad.lua b/contrib/lua-torch/nn/ZeroGrad.lua new file mode 100644 index 000000000..7c941ce1c --- /dev/null +++ b/contrib/lua-torch/nn/ZeroGrad.lua @@ -0,0 +1,14 @@ +local ZeroGrad, parent = torch.class('nn.ZeroGrad', 'nn.Module') + +function ZeroGrad:updateOutput(input) + self.output:set(input) + return self.output +end + +-- the gradient is simply zeroed. +-- useful when you don't want to backpropgate through certain paths. +function ZeroGrad:updateGradInput(input, gradOutput) + self.gradInput = nn.utils.recursiveResizeAs(self.gradInput, input) + self.gradInput = nn.utils.recursiveFill(self.gradInput, 0) + return self.gradInput +end diff --git a/contrib/lua-torch/nn/ZipTable.lua b/contrib/lua-torch/nn/ZipTable.lua new file mode 100644 index 000000000..7b18619eb --- /dev/null +++ b/contrib/lua-torch/nn/ZipTable.lua @@ -0,0 +1,34 @@ +local ZipTable, parent = torch.class('nn.ZipTable', 'nn.Module') + +-- input : { {a1,a2}, {b1,b2}, {c1,c2} } +-- output : { {a1,b1,c1}, {a2,b2,c2} } +function ZipTable:__init() + parent.__init(self) + self.output = {} + self.gradInput = {} +end + +function ZipTable:updateOutput(inputTable) + self.output = {} + for i,inTable in ipairs(inputTable) do + for j,input in ipairs(inTable) do + local output = self.output[j] or {} + output[i] = input + self.output[j] = output + end + end + return self.output +end + +function ZipTable:updateGradInput(inputTable, gradOutputTable) + self.gradInput = {} + for i,gradOutTable in ipairs(gradOutputTable) do + for j,gradOutput in ipairs(gradOutTable) do + local gradInput = self.gradInput[j] or {} + gradInput[i] = gradOutput + self.gradInput[j] = gradInput + end + end + return self.gradInput +end + diff --git a/contrib/lua-torch/nn/ZipTableOneToMany.lua b/contrib/lua-torch/nn/ZipTableOneToMany.lua new file mode 100644 index 000000000..d4a80fe0d --- /dev/null +++ b/contrib/lua-torch/nn/ZipTableOneToMany.lua @@ -0,0 +1,37 @@ +local ZipTableOneToMany, parent = torch.class('nn.ZipTableOneToMany', 'nn.Module') + +-- based on ZipTable in dpnn + +-- input : { v, {a, b, c} } +-- output : { {v,a}, {v,b}, {v,c} } +function ZipTableOneToMany:__init() + parent.__init(self) + self.output = {} + self.gradInput = {} + -- make buffer to update during forward/backward + self.gradInputEl = torch.Tensor() +end + +function ZipTableOneToMany:updateOutput(input) + assert(#input == 2, "input must be table of element and table") + local inputEl, inputTable = input[1], input[2] + self.output = {} + for i,v in ipairs(inputTable) do + self.output[i] = {inputEl, v} + end + return self.output +end + +function ZipTableOneToMany:updateGradInput(input, gradOutput) + assert(#input == 2, "input must be table of element and table") + local inputEl, inputTable = input[1], input[2] + self.gradInputEl:resizeAs(inputEl):zero() + local gradInputTable = {} + for i,gradV in ipairs(gradOutput) do + self.gradInputEl:add(gradV[1]) + gradInputTable[i] = gradV[2] + end + self.gradInput = {self.gradInputEl, gradInputTable} + return self.gradInput +end + diff --git a/contrib/lua-torch/nn/hessian.lua b/contrib/lua-torch/nn/hessian.lua new file mode 100644 index 000000000..b841d8c59 --- /dev/null +++ b/contrib/lua-torch/nn/hessian.lua @@ -0,0 +1,391 @@ +---------------------------------------------------------------------- +-- hessian.lua: this file appends extra methods to modules in nn, +-- to estimate diagonal elements of the Hessian. This is useful +-- to condition learning rates individually. +---------------------------------------------------------------------- +nn.hessian = {} + +---------------------------------------------------------------------- +-- Hessian code is still experimental, +-- and deactivated by default +---------------------------------------------------------------------- +function nn.hessian.enable() + + local function accDiagHessianParameters(module, input, diagHessianOutput, gw, hw) + if #gw ~= #hw then + error('Number of gradients is nto equal to number of hessians') + end + module.inputSq = module.inputSq or input.new() + module.inputSq:resizeAs(input) + torch.cmul(module.inputSq, input, input) + -- replace gradients with hessian + for i=1,#gw do + local gwname = gw[i] + local hwname = hw[i] + local gwval = module[gwname] + local hwval = module[hwname] + if hwval == nil then + module[hwname] = gwval.new():resizeAs(gwval) + hwval = module[hwname] + end + module[gwname] = hwval + module[hwname] = gwval + end + local oldOutput = module.output + module.output = module.output.new():resizeAs(oldOutput) + module.forward(module, module.inputSq) + module.accGradParameters(module, module.inputSq, diagHessianOutput, 1) + -- put back gradients + for i=1,#gw do + local gwname = gw[i] + local hwname = hw[i] + local gwval = module[gwname] + local hwval = module[hwname] + module[gwname] = hwval + module[hwname] = gwval + end + module.output = oldOutput + end + nn.hessian.accDiagHessianParameters = accDiagHessianParameters + + local function updateDiagHessianInput(module, input, diagHessianOutput, w, wsq) + if #w ~= #wsq then + error('Number of weights is not equal to number of weights squares') + end + module.diagHessianInput = module.diagHessianInput or input.new() + module.diagHessianInput:resizeAs(input):zero() + + local gi = module.gradInput + module.gradInput = module.diagHessianInput + for i=1,#w do + local wname = w[i] + local wsqname = wsq[i] + local wval = module[wname] + local wsqval = module[wsqname] + if wsqval == nil then + module[wsqname] = wval.new() + wsqval = module[wsqname] + end + wsqval:resizeAs(wval) + torch.cmul(wsqval, wval, wval) + module[wsqname] = wval + module[wname] = wsqval + end + module.updateGradInput(module,input,diagHessianOutput) + for i=1,#w do + local wname = w[i] + local wsqname = wsq[i] + local wval = module[wname] + local wsqval = module[wsqname] + module[wname] = wsqval + module[wsqname] = wval + end + module.gradInput = gi + end + nn.hessian.updateDiagHessianInput = updateDiagHessianInput + + local function updateDiagHessianInputPointWise(module, input, diagHessianOutput) + local tdh = diagHessianOutput.new():resizeAs(diagHessianOutput):fill(1) + updateDiagHessianInput(module,input,tdh,{},{}) + module.diagHessianInput:cmul(module.diagHessianInput) + module.diagHessianInput:cmul(diagHessianOutput) + end + nn.hessian.updateDiagHessianInputPointWise = updateDiagHessianInputPointWise + + local function initDiagHessianParameters(module,gw,hw) + module.diagHessianInput = module.diagHessianInput or module.gradInput.new(); + for i=1,#gw do + module[hw[i]] = module[hw[i]] or module[gw[i]].new():resizeAs(module[gw[i]]) + end + end + nn.hessian.initDiagHessianParameters = initDiagHessianParameters + + ---------------------------------------------------------------------- + -- Module + ---------------------------------------------------------------------- + function nn.Module.updateDiagHessianInput(self, input, diagHessianOutput) + error(torch.typename(self) .. ':updateDiagHessianInput() is undefined') + end + + function nn.Module.accDiagHessianParameters(self, input, diagHessianOutput) + end + + function nn.Module.initDiagHessianParameters() + end + + ---------------------------------------------------------------------- + -- Sequential + ---------------------------------------------------------------------- + function nn.Sequential.initDiagHessianParameters(self) + for i=1,#self.modules do + self.modules[i]:initDiagHessianParameters() + end + end + + function nn.Sequential.updateDiagHessianInput(self, input, diagHessianOutput) + local currentDiagHessianOutput = diagHessianOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + currentDiagHessianOutput = currentModule:updateDiagHessianInput(previousModule.output, currentDiagHessianOutput) + currentModule = previousModule + end + currentDiagHessianOutput = currentModule:updateDiagHessianInput(input, currentDiagHessianOutput) + self.diagHessianInput = currentDiagHessianOutput + return currentDiagHessianOutput + end + + function nn.Sequential.accDiagHessianParameters(self, input, diagHessianOutput) + local currentDiagHessianOutput = diagHessianOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + currentModule:accDiagHessianParameters(previousModule.output, currentDiagHessianOutput) + currentDiagHessianOutput = currentModule.diagHessianInput + currentModule = previousModule + end + currentModule:accDiagHessianParameters(input, currentDiagHessianOutput) + end + + ---------------------------------------------------------------------- + -- Criterion + ---------------------------------------------------------------------- + function nn.Criterion.updateDiagHessianInput(self, input, diagHessianOutput) + error(torch.typename(self) .. ':updateDiagHessianInput() is undefined') + end + + ---------------------------------------------------------------------- + -- MSECriterion + ---------------------------------------------------------------------- + function nn.MSECriterion.updateDiagHessianInput(self, input, target) + self.diagHessianInput = self.diagHessianInput or input.new() + local val = 2 + if self.sizeAverage then + val = val / input:nElement() + end + self.diagHessianInput:resizeAs(input):fill(val) + return self.diagHessianInput + end + + ---------------------------------------------------------------------- + -- WeightedMSECriterion + ---------------------------------------------------------------------- + function nn.WeightedMSECriterion.updateDiagHessianInput(self,input,target) + return nn.MSECriterion.updateDiagHessianInput(self,input,target) + end + + ---------------------------------------------------------------------- + -- L1Cost + ---------------------------------------------------------------------- + function nn.L1Cost.updateDiagHessianInput(self,input) + self.diagHessianInput = self.diagHessianInput or input.new() + self.diagHessianInput:resizeAs(input) + self.diagHessianInput:fill(1) + self.diagHessianInput[torch.eq(input,0)] = 0 + return self.diagHessianInput + end + + ---------------------------------------------------------------------- + -- Linear + ---------------------------------------------------------------------- + function nn.Linear.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'}) + return self.diagHessianInput + end + + function nn.Linear.accDiagHessianParameters(self, input, diagHessianOutput) + accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'}) + end + + function nn.Linear.initDiagHessianParameters(self) + initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'}) + end + + ---------------------------------------------------------------------- + -- SpatialConvolution + ---------------------------------------------------------------------- + function nn.SpatialConvolution.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'}) + return self.diagHessianInput + end + + function nn.SpatialConvolution.accDiagHessianParameters(self, input, diagHessianOutput) + accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'}) + end + + function nn.SpatialConvolution.initDiagHessianParameters(self) + initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'}) + end + + ---------------------------------------------------------------------- + -- SpatialConvolutionLocal + ---------------------------------------------------------------------- + function nn.SpatialConvolutionLocal.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'}) + return self.diagHessianInput + end + + function nn.SpatialConvolutionLocal.accDiagHessianParameters(self, input, diagHessianOutput) + accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'}) + end + + function nn.SpatialConvolutionLocal.initDiagHessianParameters(self) + initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'}) + end + + ---------------------------------------------------------------------- + -- SpatialFullConvolution + ---------------------------------------------------------------------- + function nn.SpatialFullConvolution.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'}) + return self.diagHessianInput + end + + function nn.SpatialFullConvolution.accDiagHessianParameters(self, input, diagHessianOutput) + accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'}) + end + + function nn.SpatialFullConvolution.initDiagHessianParameters(self) + initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'}) + end + + ---------------------------------------------------------------------- + -- SpatialConvolutionMap + ---------------------------------------------------------------------- + function nn.SpatialConvolutionMap.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInput(self, input, diagHessianOutput, {'weight','bias'}, {'weightSq','biasSq'}) + return self.diagHessianInput + end + + function nn.SpatialConvolutionMap.accDiagHessianParameters(self, input, diagHessianOutput) + accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'}) + end + + function nn.SpatialConvolutionMap.initDiagHessianParameters(self) + initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'}) + end + + ---------------------------------------------------------------------- + -- SpatialFullConvolutionMap + ---------------------------------------------------------------------- + function nn.SpatialFullConvolutionMap.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'}) + return self.diagHessianInput + end + + function nn.SpatialFullConvolutionMap.accDiagHessianParameters(self, input, diagHessianOutput) + accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'}) + end + + function nn.SpatialFullConvolutionMap.initDiagHessianParameters(self) + initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'}) + end + +---------------------------------------------------------------------- + -- Tanh + ---------------------------------------------------------------------- + function nn.Tanh.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInputPointWise(self, input, diagHessianOutput) + return self.diagHessianInput + end + + ---------------------------------------------------------------------- + -- TanhShrink + ---------------------------------------------------------------------- + function nn.TanhShrink.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInputPointWise(self.tanh, input, diagHessianOutput) + self.diagHessianInput = self.diagHessianInput or input.new():resizeAs(input) + torch.add(self.diagHessianInput, self.tanh.diagHessianInput, diagHessianOutput) + return self.diagHessianInput + end + + ---------------------------------------------------------------------- + -- Square + ---------------------------------------------------------------------- + function nn.Square.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInputPointWise(self, input, diagHessianOutput) + return self.diagHessianInput + end + + ---------------------------------------------------------------------- + -- Sqrt + ---------------------------------------------------------------------- + function nn.Sqrt.updateDiagHessianInput(self, input, diagHessianOutput) + updateDiagHessianInputPointWise(self, input, diagHessianOutput) + return self.diagHessianInput + end + + ---------------------------------------------------------------------- + -- Reshape + ---------------------------------------------------------------------- + function nn.Reshape.updateDiagHessianInput(self, input, diagHessianOutput) + self.diagHessianInput = self.diagHessianInput or input.new() + diagHessianOutput = diagHessianOutput:contiguous() + self.diagHessianInput:set(diagHessianOutput):resizeAs(input) + return self.diagHessianInput + end + + ---------------------------------------------------------------------- + -- Parameters manipulation: + -- we modify these functions such that they return hessian coefficients + ---------------------------------------------------------------------- + function nn.Module.parameters(self) + if self.weight and self.bias then + return {self.weight, self.bias}, {self.gradWeight, self.gradBias}, {self.diagHessianWeight, self.diagHessianBias} + elseif self.weight then + return {self.weight}, {self.gradWeight}, {self.diagHessianWeight} + elseif self.bias then + return {self.bias}, {self.gradBias}, {self.diagHessianBias} + else + return + end + end + + function nn.Module.getParameters(self) + -- get parameters + local parameters,gradParameters,hessianParameters = self:parameters() + -- flatten parameters and gradients + local flatParameters = nn.Module.flatten(parameters) + collectgarbage() + local flatGradParameters = nn.Module.flatten(gradParameters) + collectgarbage() + local flatHessianParameters + if hessianParameters and hessianParameters[1] then + flatHessianParameters = nn.Module.flatten(hessianParameters) + collectgarbage() + end + + -- return new flat vector that contains all discrete parameters + return flatParameters, flatGradParameters, flatHessianParameters + end + + function nn.Sequential.parameters(self) + local function tinsert(to, from) + if type(from) == 'table' then + for i=1,#from do + tinsert(to,from[i]) + end + else + table.insert(to,from) + end + end + local w = {} + local gw = {} + local ggw = {} + for i=1,#self.modules do + local mw,mgw,mggw = self.modules[i]:parameters() + if mw then + tinsert(w,mw) + tinsert(gw,mgw) + tinsert(ggw,mggw) + end + end + return w,gw,ggw + end + + ---------------------------------------------------------------------- + -- Avoid multiple calls to enable() + ---------------------------------------------------------------------- + function nn.hessian.enable() + end +end diff --git a/contrib/lua-torch/nn/init.lua b/contrib/lua-torch/nn/init.lua new file mode 100755 index 000000000..4319a8868 --- /dev/null +++ b/contrib/lua-torch/nn/init.lua @@ -0,0 +1,221 @@ +require('torch') + +nn = {} -- define the global nn table + +require('nn.THNN') + +require('nn.utils') + + +require('nn.ErrorMessages') +require('nn.Module') + +require('nn.Container') +require('nn.Concat') +require('nn.Parallel') +require('nn.Sequential') +require('nn.DepthConcat') + +require('nn.Decorator') +require('nn.Bottle') +require('nn.WeightNorm') +require('nn.DontCast') +require('nn.NaN') +require('nn.Profile') + +require('nn.Linear') +require('nn.LinearWeightNorm') +require('nn.Bilinear') +require('nn.PartialLinear') +require('nn.SparseLinear') +require('nn.IndexLinear') +require('nn.Reshape') +require('nn.View') +require('nn.Contiguous') +require('nn.Select') +require('nn.Narrow') +require('nn.Index') +require('nn.Squeeze') +require('nn.Unsqueeze') +require('nn.Replicate') +require('nn.Transpose') +require('nn.BatchNormalization') +require('nn.LayerNormalization') +require('nn.Padding') +require('nn.GradientReversal') +require('nn.MaskedSelect') + +require('nn.Copy') +require('nn.Min') +require('nn.Max') +require('nn.Sum') +require('nn.Mean') +require('nn.CMul') +require('nn.Mul') +require('nn.MulConstant') +require('nn.CAdd') +require('nn.Add') +require('nn.AddConstant') +require('nn.Constant') +require('nn.Dropout') +require('nn.SpatialDropout') +require('nn.VolumetricDropout') +require('nn.WhiteNoise') +require('nn.OneHot') +require('nn.PrintSize') +require('nn.ZeroGrad') + +require('nn.CAddTable') +require('nn.CDivTable') +require('nn.CMulTable') +require('nn.CSubTable') +require('nn.CMaxTable') +require('nn.CMinTable') +require('nn.CAddTensorTable') + +require('nn.Euclidean') +require('nn.WeightedEuclidean') +require('nn.PairwiseDistance') +require('nn.CosineDistance') +require('nn.DotProduct') +require('nn.Normalize') +require('nn.Cosine') +require('nn.Kmeans') + +require('nn.Exp') +require('nn.Log') +require('nn.HardTanh') +require('nn.Clamp') +require('nn.LogSigmoid') +require('nn.LogSoftMax') +require('nn.Sigmoid') +require('nn.SoftMax') +require('nn.SoftMin') +require('nn.SoftPlus') +require('nn.SoftSign') +require('nn.Tanh') +require('nn.TanhShrink') +require('nn.Abs') +require('nn.Power') +require('nn.Square') +require('nn.Sqrt') +require('nn.HardShrink') +require('nn.SoftShrink') +require('nn.Threshold') +require('nn.Maxout') +require('nn.ReLU') +require('nn.ReLU6') +require('nn.PReLU') +require('nn.CReLU') +require('nn.LeakyReLU') +require('nn.SpatialSoftMax') +require('nn.SpatialLogSoftMax') +require('nn.RReLU') +require('nn.ELU') +require('nn.GatedLinearUnit') + +require('nn.LookupTable') +require('nn.SpatialConvolution') +require('nn.SpatialConvolutionLocal') +require('nn.SpatialFullConvolution') +require('nn.SpatialFullConvolutionMap') +require('nn.SpatialConvolutionMM') +require('nn.SpatialDepthWiseConvolution') +require('nn.SpatialConvolutionMap') +require('nn.SpatialDilatedConvolution') +require('nn.SpatialSubSampling') +require('nn.SpatialMaxPooling') +require('nn.SpatialDilatedMaxPooling') +require('nn.SpatialMaxUnpooling') +require('nn.SpatialFractionalMaxPooling') +require('nn.SpatialLPPooling') +require('nn.SpatialAveragePooling') +require('nn.SpatialAdaptiveMaxPooling') +require('nn.SpatialAdaptiveAveragePooling') +require('nn.TemporalConvolution') +require('nn.TemporalSubSampling') +require('nn.TemporalMaxPooling') +require('nn.TemporalDynamicKMaxPooling') +require('nn.TemporalRowConvolution') +require('nn.SpatialSubtractiveNormalization') +require('nn.SpatialDivisiveNormalization') +require('nn.SpatialContrastiveNormalization') +require('nn.SpatialCrossMapLRN') +require('nn.SpatialZeroPadding') +require('nn.SpatialReflectionPadding') +require('nn.SpatialReplicationPadding') +require('nn.SpatialUpSamplingNearest') +require('nn.SpatialUpSamplingBilinear') +require('nn.SpatialBatchNormalization') + +require('nn.VolumetricConvolution') +require('nn.VolumetricFullConvolution') +require('nn.VolumetricDilatedConvolution') +require('nn.VolumetricMaxPooling') +require('nn.VolumetricDilatedMaxPooling') +require('nn.VolumetricFractionalMaxPooling') +require('nn.VolumetricMaxUnpooling') +require('nn.VolumetricAveragePooling') +require('nn.VolumetricBatchNormalization') +require('nn.VolumetricReplicationPadding') + +require('nn.GPU') + +require('nn.ParallelTable') +require('nn.Identity') +require('nn.ConcatTable') +require('nn.SplitTable') +require('nn.JoinTable') +require('nn.SelectTable') +require('nn.MixtureTable') +require('nn.CriterionTable') +require('nn.FlattenTable') +require('nn.NarrowTable') +require('nn.MapTable') +require('nn.ZipTable') +require('nn.ZipTableOneToMany') +require('nn.Collapse') +require('nn.Convert') + +require('nn.Criterion') +require('nn.MSECriterion') +require('nn.SpatialAutoCropMSECriterion') +require('nn.SmoothL1Criterion') +require('nn.MarginCriterion') +require('nn.SoftMarginCriterion') +require('nn.AbsCriterion') +require('nn.ClassNLLCriterion') +require('nn.SpatialClassNLLCriterion') +require('nn.ClassSimplexCriterion') +require('nn.DistKLDivCriterion') +require('nn.MultiCriterion') +require('nn.L1HingeEmbeddingCriterion') +require('nn.HingeEmbeddingCriterion') +require('nn.CosineEmbeddingCriterion') +require('nn.MarginRankingCriterion') +require('nn.MultiMarginCriterion') +require('nn.MultiLabelMarginCriterion') +require('nn.MultiLabelSoftMarginCriterion') +require('nn.L1Cost') +require('nn.L1Penalty') +require('nn.WeightedMSECriterion') +require('nn.BCECriterion') +require('nn.CrossEntropyCriterion') +require('nn.ParallelCriterion') +require('nn.DistanceRatioCriterion') +require('nn.ModuleCriterion') + +require('nn.PixelShuffle') + +require('nn.StochasticGradient') + +require('nn.MM') +require('nn.MV') + +require('nn.Jacobian') +require('nn.SparseJacobian') +require('nn.hessian') +require('nn.test') + + +return nn diff --git a/contrib/lua-torch/nn/lib/CMakeLists.txt b/contrib/lua-torch/nn/lib/CMakeLists.txt new file mode 100644 index 000000000..de04595f6 --- /dev/null +++ b/contrib/lua-torch/nn/lib/CMakeLists.txt @@ -0,0 +1,5 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) +CMAKE_POLICY(VERSION 2.6) +SET(THNN_INSTALL_LIB_SUBDIR "${RSPAMD_LIBDIR}") +SET(THNN_INSTALL_INCLUDE_SUBDIR "${Torch_INSTALL_INCLUDE_SUBDIR}") +ADD_SUBDIRECTORY(THNN)
\ No newline at end of file diff --git a/contrib/lua-torch/nn/lib/THNN/CMakeLists.txt b/contrib/lua-torch/nn/lib/THNN/CMakeLists.txt new file mode 100644 index 000000000..00908a5b1 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/CMakeLists.txt @@ -0,0 +1,47 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) +CMAKE_POLICY(VERSION 2.6) + +IF(NOT TH_LIBRARIES) + SET(TH_LIBRARIES "TH") +ENDIF(NOT TH_LIBRARIES) +MESSAGE(STATUS "TH_LIBRARIES: ${TH_LIBRARIES}") + +IF(NOT THNN_INSTALL_LIB_SUBDIR) + SET(THNN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "THNN install library directory") + SET(THNN_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "THNN install include subdirectory") +ENDIF() + +# Flags +# When using MSVC +IF(MSVC) + # we want to respect the standard, and we are bored of those **** . + ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) + ADD_DEFINITIONS(-DTH_EXPORTS) +ENDIF(MSVC) + +IF (CMAKE_VERSION VERSION_LESS "3.1") + SET(CMAKE_C_FLAGS "-std=c99 ${CMAKE_C_FLAGS}") +ELSE () + SET(CMAKE_C_STANDARD 99) +ENDIF () + +IF (WITH_OPENMP) + FIND_PACKAGE(OpenMP) + IF(OPENMP_FOUND) + MESSAGE(STATUS "Compiling with OpenMP support") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + ENDIF(OPENMP_FOUND) +ENDIF (WITH_OPENMP) + +SET(src init.c) +ADD_LIBRARY(THNN SHARED init.c) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +### Torch packages supposes libraries prefix is "lib" +SET_TARGET_PROPERTIES(THNN PROPERTIES + PREFIX "lib" + IMPORT_PREFIX "lib") + +TARGET_LINK_LIBRARIES(THNN ${TH_LIBRARIES}) +INSTALL(TARGETS THNN DESTINATION ${RSPAMD_LIBDIR}) diff --git a/contrib/lua-torch/nn/lib/THNN/README.md b/contrib/lua-torch/nn/lib/THNN/README.md new file mode 100644 index 000000000..e6c61601d --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/README.md @@ -0,0 +1,32 @@ +# THNN + +THNN is a library that gathers nn's C implementations of neural network modules. It's entirely free of Lua dependency and therefore can be used in any application that has a C FFI. Please note that it only contains quite low level functions, and an object oriented C/C++ wrapper will be created soon as another library. + +There is also a CUDA counterpart of THNN (THCUNN) in the [cunn repository](https://github.com/torch/cunn/tree/master/lib/THCUNN). + +## Links + +* [API reference](doc/api_reference.md) +* [Style guidelines](doc/style_guidelines.md) + +## Motivation + +Torch's neural network package (nn) provided many optimized C implementations of modules, but the source files contained Lua specific code and headers so they couldn't be easily compiled and included anywhere else. + +THNN is based on the same code, but is written in pure C, so it can be easily included in other code. **Future C implementations should be committed to THNN.** + +## API + +THNN is a purely functional library. It provides 2-3 functions for each module, that perform the most important operations: + +* **updateOutput** - applies the module to an input +* **updateGradInput** - accepts gradient w.r.t. output and previous module input, and computes a gradient w.r.t. that input +* **accGradParameters** - *(optional, only modules with parameters)* accepts gradient w.r.t. output and previous module input, and computes gradient w.r.t. the parameters + +For information on argument types please check the [API reference](doc/api_reference.md). + +## Developer docs + +* [Style guidelines](doc/style_guidelines.md) + +This section will be expanded when FFI refactoring will be finished. diff --git a/contrib/lua-torch/nn/lib/THNN/THNN.h b/contrib/lua-torch/nn/lib/THNN/THNN.h new file mode 100644 index 000000000..0019b7976 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/THNN.h @@ -0,0 +1,33 @@ +#ifndef THNN_H +#define THNN_H + +#include <stdbool.h> +#include <TH.h> +#ifdef _OPENMP +#include <omp.h> +#endif + +#define THNN_(NAME) TH_CONCAT_3(THNN_, Real, NAME) + +#define THIndexTensor THLongTensor +#define THIndexTensor_(NAME) THLongTensor_ ## NAME + +#define THIntegerTensor THIntTensor +#define THIntegerTensor_(NAME) THIntTensor_ ## NAME + +typedef long THIndex_t; +typedef int THInteger_t; +typedef void THNNState; + +#define THNN_resizeAs_indices(I1, I2) \ + THLongStorage *size2 = THIndexTensor_(newSizeOf)(I2); \ + if (!THTensor_(isSize)(I1, size2)) \ + { \ + THTensor_(resize)(I1, size2, NULL); \ + } \ + THLongStorage_free(size2); + +#include "generic/THNN.h" +#include <THGenerateFloatTypes.h> + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Abs.c b/contrib/lua-torch/nn/lib/THNN/generic/Abs.c new file mode 100644 index 000000000..28721ec8e --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/Abs.c @@ -0,0 +1,28 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Abs.c" +#else + +void THNN_(Abs_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output) +{ + THTensor_(resizeAs)(output, input); + THTensor_(abs)(output, input); +} + +void THNN_(Abs_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput) +{ + THNN_CHECK_NELEMENT(input, gradOutput); + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + real z = *input_data; + *gradInput_data = *gradOutput_data * (z >= 0 ? 1 : -1); + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/AbsCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/AbsCriterion.c new file mode 100644 index 000000000..9bee5de9e --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/AbsCriterion.c @@ -0,0 +1,40 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/AbsCriterion.c" +#else + +void THNN_(AbsCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage) +{ + real sum = 0; + THNN_CHECK_NELEMENT(input, target); + TH_TENSOR_APPLY2(real, input, real, target, + sum += fabs(*input_data - *target_data); + ); + + if (sizeAverage) + sum /= THTensor_(nElement)(input); + + THTensor_(set1d)(output, 0, sum); +} + +void THNN_(AbsCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + *gradInput_data = (*input_data - *target_data) >= 0 ? norm : -norm; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/BCECriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/BCECriterion.c new file mode 100644 index 000000000..637a4067e --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/BCECriterion.c @@ -0,0 +1,66 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/BCECriterion.c" +#else + +#define EPS 1e-12 + +void THNN_(BCECriterion_updateOutput)(THNNState *state, THTensor *input, + THTensor *target, THTensor *output, + bool sizeAverage, THTensor *weights) +{ + THNN_CHECK_NELEMENT(input, target); + THNN_CHECK_NELEMENT(input, weights); + THNN_CHECK_DIM_SIZE(output, 1, 0, 1); + real sum = 0; + + if(weights) + TH_TENSOR_APPLY3(real, input, real, target, real, weights, + real x = *input_data; + real y = *target_data; + real w = *weights_data; + THAssertMsg(x >= 0. && x <= 1., + "input value should be between 0~1, but got %f", + (double) x); + sum -= (log(x + EPS) * y + log(1. - x + EPS) * (1. - y)) * w; + ) + else + TH_TENSOR_APPLY2(real, input, real, target, + real x = *input_data; + real y = *target_data; + THAssertMsg(x >= 0. && x <= 1., + "input value should be between 0~1, but got %f", + (double) x); + sum -= log(x + EPS) * y + log(1. - x + EPS) * (1. - y); + ); + + + if (sizeAverage) + sum /= THTensor_(nElement)(input); + + THTensor_(set1d)(output, 0, sum); +} + +void THNN_(BCECriterion_updateGradInput)(THNNState *state, THTensor *input, + THTensor *target, THTensor *gradInput, + bool sizeAverage, THTensor *weights) +{ + THNN_CHECK_NELEMENT(input, target); + THNN_CHECK_NELEMENT(input, weights); + + real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.); + + THTensor_(resizeAs)(gradInput, input); + + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + real x = *input_data; + real y = *target_data; + *gradInput_data = - norm * (y - x) / ((1. - x + EPS) * (x + EPS)); + ); + + if(weights) + THTensor_(cmul)(gradInput, gradInput, weights); +} + +#undef EPS + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/BatchNormalization.c b/contrib/lua-torch/nn/lib/THNN/generic/BatchNormalization.c new file mode 100644 index 000000000..b8f462790 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/BatchNormalization.c @@ -0,0 +1,149 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/BatchNormalization.c" +#else + +void THNN_(BatchNormalization_updateOutput)( + THNNState *state, THTensor *input, THTensor *output, + THTensor *weight, THTensor *bias, + THTensor *running_mean, THTensor *running_var, + THTensor *save_mean, THTensor *save_std, + bool train, double momentum, double eps) +{ + THTensor_(resizeAs)(output, input); + long nInput = THTensor_(size)(input, 1); + long f; + ptrdiff_t n = THTensor_(nElement)(input) / nInput; + + #pragma omp parallel for + for (f = 0; f < nInput; ++f) { + THTensor *in = THTensor_(newSelect)(input, 1, f); + THTensor *out = THTensor_(newSelect)(output, 1, f); + + real mean, invstd; + + if (train) { + // compute mean per input + accreal sum = 0; + TH_TENSOR_APPLY(real, in, sum += *in_data;); + + mean = (real) sum / n; + THTensor_(set1d)(save_mean, f, (real) mean); + + // compute variance per input + sum = 0; + TH_TENSOR_APPLY(real, in, + sum += (*in_data - mean) * (*in_data - mean);); + + if (sum == 0 && eps == 0.0) { + invstd = 0; + } else { + invstd = (real) (1 / sqrt(sum/n + eps)); + } + THTensor_(set1d)(save_std, f, (real) invstd); + + // update running averages + THTensor_(set1d)(running_mean, f, + (real) (momentum * mean + (1 - momentum) * THTensor_(get1d)(running_mean, f))); + + accreal unbiased_var = sum / (n - 1); + THTensor_(set1d)(running_var, f, + (real) (momentum * unbiased_var + (1 - momentum) * THTensor_(get1d)(running_var, f))); + } else { + mean = THTensor_(get1d)(running_mean, f); + invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps); + } + + // compute output + real w = weight ? THTensor_(get1d)(weight, f) : 1; + real b = bias ? THTensor_(get1d)(bias, f) : 0; + + TH_TENSOR_APPLY2(real, in, real, out, + *out_data = (real) (((*in_data - mean) * invstd) * w + b);); + + THTensor_(free)(out); + THTensor_(free)(in); + } +} + +void THNN_(BatchNormalization_backward)( + THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, + THTensor *gradWeight, THTensor *gradBias, THTensor *weight, + THTensor *running_mean, THTensor *running_var, + THTensor *save_mean, THTensor *save_std, + bool train, double scale, double eps) +{ + THNN_CHECK_SHAPE(input, gradOutput); + long nInput = THTensor_(size)(input, 1); + long f; + ptrdiff_t n = THTensor_(nElement)(input) / nInput; + + #pragma omp parallel for + for (f = 0; f < nInput; ++f) { + THTensor *in = THTensor_(newSelect)(input, 1, f); + THTensor *gradOut = THTensor_(newSelect)(gradOutput, 1, f); + real w = weight ? THTensor_(get1d)(weight, f) : 1; + real mean, invstd; + if (train) { + mean = THTensor_(get1d)(save_mean, f); + invstd = THTensor_(get1d)(save_std, f); + } else { + mean = THTensor_(get1d)(running_mean, f); + invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps); + } + + // sum over all gradOutput in feature plane + accreal sum = 0; + TH_TENSOR_APPLY(real, gradOut, sum += *gradOut_data;); + + // dot product of the Q(X) and gradOuput + accreal dotp = 0; + TH_TENSOR_APPLY2(real, in, real, gradOut, + dotp += (*in_data - mean) * (*gradOut_data);); + + if (gradInput) { + THTensor_(resizeAs)(gradInput, input); + THTensor *gradIn = THTensor_(newSelect)(gradInput, 1, f); + + if (train) { + // when in training mode + // Q(X) = X - E[x] ; i.e. input centered to zero mean + // Y = Q(X) / σ ; i.e. BN output before weight and bias + // dL/dX = (Q(dL/dY) - dot(Y, dL/dY) * Y) / σ * w + + // projection of gradOutput on to output scaled by std + real k = (real) dotp * invstd * invstd / n; + TH_TENSOR_APPLY2(real, gradIn, real, in, + *gradIn_data = (*in_data - mean) * k;); + + accreal gradMean = sum / n; + TH_TENSOR_APPLY2(real, gradIn, real, gradOut, + *gradIn_data = (*gradOut_data - gradMean - *gradIn_data) * invstd * w;); + + } else { + // when in evaluation mode + // Q(X) = X - running_mean ; i.e. input centered to zero mean + // Y = Q(X) / running_std ; i.e. BN output before weight and bias + // dL/dX = w / running_std + TH_TENSOR_APPLY2(real, gradIn, real, gradOut, + *gradIn_data = *gradOut_data * invstd * w;); + } + + THTensor_(free)(gradIn); + } + + if (gradWeight) { + real val = THTensor_(get1d)(gradWeight, f); + THTensor_(set1d)(gradWeight, f, val + scale * dotp * invstd); + } + + if (gradBias) { + real val = THTensor_(get1d)(gradBias, f); + THTensor_(set1d)(gradBias, f, val + scale * sum); + } + + THTensor_(free)(gradOut); + THTensor_(free)(in); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/ClassNLLCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/ClassNLLCriterion.c new file mode 100644 index 000000000..4cf37aeaf --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/ClassNLLCriterion.c @@ -0,0 +1,163 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/ClassNLLCriterion.c" +#else + +void THNN_(ClassNLLCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *output, + bool sizeAverage, + THTensor *weights, + THTensor *total_weight, + long ignore_index) +{ + THNN_CHECK_DIM_SIZE(output, 1, 0, 1); + THNN_CHECK_DIM_SIZE(total_weight, 1, 0, 1); + int n_dims = THTensor_(nDimension)(input); + int n_classes = THTensor_(size)(input, n_dims - 1); + ignore_index -= TH_INDEX_BASE; + + if (THIndexTensor_(nDimension)(target) > 1) { + THError("multi-target not supported"); + } + if (THTensor_(nDimension)(input) > 2) { + THError("input tensor should be 1D or 2D"); + } + if (weights && THTensor_(nElement)(weights) != n_classes) { + THDescBuff s1 = THTensor_(sizeDesc)(weights); + THError("weight tensor should be defined either for all %d classes or no classes" + " but got weight tensor of shape: %s", n_classes, s1.str); + } + + input = THTensor_(newContiguous)(input); + target = THIndexTensor_(newContiguous)(target); + weights = weights ? THTensor_(newContiguous)(weights) : NULL; + + real *input_data = THTensor_(data)(input); + THIndex_t *target_data = THIndexTensor_(data)(target); + real *weights_data = weights ? THTensor_(data)(weights) : NULL; + real *output_data = THTensor_(data)(output); + real *total_weight_data = THTensor_(data)(total_weight); + + output_data[0] = total_weight_data[0] = 0.0; + + if (THTensor_(nDimension)(input) == 1) { + int cur_target = target_data[0] - TH_INDEX_BASE; + if (cur_target != ignore_index) { + THAssert(cur_target >= 0 && cur_target < n_classes); + total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f; + output_data[0] = -input_data[cur_target] * total_weight_data[0]; + } + } else if (THTensor_(nDimension)(input) == 2) { + int batch_size = THTensor_(size)(input, 0); + THAssert(THIndexTensor_(size)(target, 0) == batch_size); + + int n_target = THTensor_(size)(input, 1); + + int i; + for (i = 0; i < batch_size; i++) { + int cur_target = target_data[i] - TH_INDEX_BASE; + if (cur_target != ignore_index) { + THAssert(cur_target >= 0 && cur_target < n_classes); + + real cur_weight = weights ? weights_data[cur_target] : 1.0f; + total_weight_data[0] += cur_weight; + output_data[0] -= input_data[i * n_target + cur_target] * cur_weight; + } + } + } + + if (sizeAverage && total_weight_data[0]) { + output_data[0] /= total_weight_data[0]; + } + + if (weights) { + THTensor_(free)(weights); + } + THTensor_(free)(input); + THIndexTensor_(free)(target); +} + +void THNN_(ClassNLLCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *gradInput, + bool sizeAverage, + THTensor *weights, + THTensor *total_weight, + long ignore_index) +{ + int n_dims = THTensor_(nDimension)(input); + int n_classes = THTensor_(size)(input, n_dims - 1); + ignore_index -= TH_INDEX_BASE; + + if (!THTensor_(isContiguous)(gradInput)) { + THError("gradInput must be contiguous"); + } + + real *total_weight_data = THTensor_(data)(total_weight); + + if (!(*total_weight_data > 0)) { + return; + } + + if (THIndexTensor_(nDimension)(target) > 1) { + THError("multi-target not supported"); + } + + if (THTensor_(nDimension)(input) > 2) { + THError("input tensor should be 1D or 2D"); + } + + if (weights && THTensor_(nElement)(weights) != n_classes) { + THError("weight tensor should be defined either for all or no classes"); + } + + target = THIndexTensor_(newContiguous)(target); + weights = weights ? THTensor_(newContiguous)(weights) : NULL; + + THIndex_t *target_data = THIndexTensor_(data)(target); + real *weights_data = weights ? THTensor_(data)(weights) : NULL; + real *gradInput_data = THTensor_(data)(gradInput); + + if (THTensor_(nDimension)(input) == 1) { + int cur_target = target_data[0] - TH_INDEX_BASE; + if (cur_target != ignore_index) { + THAssert(cur_target >= 0 && cur_target < n_classes); + + gradInput_data[cur_target] = + (!sizeAverage && weights) ? -weights_data[cur_target] : -1; + } + + } else if (THTensor_(nDimension)(input) == 2) { + int batch_size = THTensor_(size)(input, 0); + THAssert(THIndexTensor_(size)(target, 0) == batch_size); + + int n_target = THTensor_(size)(input, 1); + + int i; + for (i = 0; i < batch_size; i++){ + int cur_target = target_data[i] - TH_INDEX_BASE; + + if (cur_target != ignore_index) { + THAssert(cur_target >= 0 && cur_target < n_classes); + + gradInput_data[i * n_target + cur_target] = + -(weights ? weights_data[cur_target] : 1.0f); + + if (sizeAverage && *total_weight_data) { + gradInput_data[i * n_target + cur_target] /= *total_weight_data; + } + } + } + } + + THIndexTensor_(free)(target); + if (weights) { + THTensor_(free)(weights); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/DistKLDivCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/DistKLDivCriterion.c new file mode 100644 index 000000000..6bd6aa067 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/DistKLDivCriterion.c @@ -0,0 +1,44 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/DistKLDivCriterion.c" +#else + +void THNN_(DistKLDivCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + THNN_CHECK_DIM_SIZE(output, 1, 0, 1); + + real sum = 0; + + TH_TENSOR_APPLY2(real, input, real, target, + sum += *target_data > 0 ? *target_data * (log(*target_data) - *input_data) : 0; + ); + + if (sizeAverage) + sum /= THTensor_(nElement)(input); + + THTensor_(set1d)(output, 0, sum); +} + +void THNN_(DistKLDivCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + + real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + *gradInput_data = *target_data > 0 ? norm * (-*target_data) : 0; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/ELU.c b/contrib/lua-torch/nn/lib/THNN/generic/ELU.c new file mode 100644 index 000000000..ddcfb9705 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/ELU.c @@ -0,0 +1,54 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/ELU.c" +#else + +void THNN_(ELU_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal alpha_, + bool inplace) +{ + real alpha = TH_CONVERT_ACCREAL_TO_REAL(alpha_); + if(inplace) { + TH_TENSOR_APPLY(real, input, + if(*input_data <= 0) { + *input_data = (exp(*input_data) - 1) * alpha; + } + ); + THTensor_(set)(output, input); + } else { + THTensor_(resizeAs)(output, input); + TH_TENSOR_APPLY2(real, input, real, output, + *output_data = *input_data <= 0 ? (exp(*input_data)-1)*alpha : *input_data; + ); + } +} + +void THNN_(ELU_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output, + accreal alpha_, + bool inplace) +{ + real alpha = TH_CONVERT_ACCREAL_TO_REAL(alpha_); + THNN_CHECK_NELEMENT(input, gradOutput); + if(inplace) { + TH_TENSOR_APPLY2(real, gradOutput, real, output, + if(*output_data <= 0) { + *gradOutput_data *= *output_data + alpha; + } + ); + THTensor_(set)(gradInput, gradOutput); + } else { + THTensor_(resizeAs)(gradInput, output); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, + *gradInput_data = *output_data <= 0 ? *gradOutput_data * (*output_data + alpha) : *gradOutput_data; + ); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/FusedRNNKernel.c b/contrib/lua-torch/nn/lib/THNN/generic/FusedRNNKernel.c new file mode 100644 index 000000000..30788b0a2 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/FusedRNNKernel.c @@ -0,0 +1,55 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/FusedRNNKernel.c" +#else + +void THNN_(GRUFused_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *hidden, + THTensor *bias1, + THTensor *bias2, + THTensor *hx, + THTensor *hy, + THTensor *storage) +{ + THAssertMsg(false, "Not implemented for CPU"); +} + +void THNN_(GRUFused_updateGradInput)( + THNNState *state, + THTensor *gradInInput, + THTensor *gradInHidden, + THTensor *gradOutput, + THTensor *gradInputHx, + THTensor *storage) +{ + THAssertMsg(false, "Not implemented for CPU"); +} + +void THNN_(LSTMFused_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *hidden, + THTensor *bias1, + THTensor *bias2, + THTensor *cx, + THTensor *hy, + THTensor *cy) +{ + THAssertMsg(false, "Not implemented for CPU"); +} + +void THNN_(LSTMFused_updateGradInput)( + THNNState *state, + THTensor *storage, + THTensor *gradInGates, + THTensor *prevC, + THTensor *cy, + THTensor *gradOutput, + THTensor *gradOutputCell, + THTensor *gradInputCx) +{ + THAssertMsg(false, "Not implemented for CPU"); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/GatedLinearUnit.c b/contrib/lua-torch/nn/lib/THNN/generic/GatedLinearUnit.c new file mode 100644 index 000000000..274a27e3b --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/GatedLinearUnit.c @@ -0,0 +1,73 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/GatedLinearUnit.c" +#else + +void THNN_(GatedLinear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int dim) +{ + // size output to half of input + dim = dim - TH_INDEX_BASE; + const long nIn = THTensor_(size)(input, dim); + THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", + dim + TH_INDEX_BASE, nIn); + + const long inputSize = THTensor_(size)(input, dim) / 2; + THLongStorage *newSizes = THTensor_(newSizeOf)(input); + THLongStorage_set(newSizes, dim, inputSize); + THTensor_(resize)(output, newSizes, NULL); + + // halve tensor + THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize); + THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize); + + // x = x1:cmul( sigmoid(x2) ) + THTensor_(sigmoid)(output, secondHalf); + THTensor_(cmul)(output, output, firstHalf); + + THLongStorage_free(newSizes); + THTensor_(free)(firstHalf); + THTensor_(free)(secondHalf); +} + +void THNN_(GatedLinear_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int dim) +{ + // set up tensors + dim = dim - TH_INDEX_BASE; + const long nIn = THTensor_(size)(input, dim); + THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", + dim + TH_INDEX_BASE, nIn); + + THTensor_(resizeAs)(gradInput, input); + const long inputSize = THTensor_(size)(input, dim) / 2; + THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize); + THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize); + THTensor *gradInputfirstHalf = THTensor_(newNarrow)(gradInput, dim, 0, inputSize); + THTensor *gradInputsecondHalf = THTensor_(newNarrow)(gradInput, dim, inputSize, inputSize); + + THTensor_(sigmoid)(gradInputfirstHalf, secondHalf); + + TH_TENSOR_APPLY2(real, gradInputsecondHalf, real, gradInputfirstHalf, + real z = *gradInputfirstHalf_data; + *gradInputsecondHalf_data = (1. - z) * z; + ); + + THTensor_(cmul)(gradInputfirstHalf, gradInputfirstHalf, gradOutput); + + THTensor_(cmul)(gradInputsecondHalf, gradInputsecondHalf, gradOutput); + THTensor_(cmul)(gradInputsecondHalf, gradInputsecondHalf, firstHalf); + + THTensor_(free)(firstHalf); + THTensor_(free)(secondHalf); + THTensor_(free)(gradInputfirstHalf); + THTensor_(free)(gradInputsecondHalf); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/HardShrink.c b/contrib/lua-torch/nn/lib/THNN/generic/HardShrink.c new file mode 100644 index 000000000..aaae85bac --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/HardShrink.c @@ -0,0 +1,42 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/HardShrink.c" +#else + +void THNN_(HardShrink_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal lambda_) +{ + real lambda = TH_CONVERT_ACCREAL_TO_REAL(lambda_); + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, + if (*input_data > lambda) + *output_data = *input_data; + else if (*input_data < -lambda) + *output_data = *input_data; + else + *output_data = 0; + ); +} + +void THNN_(HardShrink_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + accreal lambda_) +{ + real lambda = TH_CONVERT_ACCREAL_TO_REAL(lambda_); + THNN_CHECK_NELEMENT(input, gradOutput); + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + if (*input_data > lambda || *input_data < -lambda) + *gradInput_data = *gradOutput_data; + else + *gradInput_data = 0; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/HardTanh.c b/contrib/lua-torch/nn/lib/THNN/generic/HardTanh.c new file mode 100644 index 000000000..589a66e15 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/HardTanh.c @@ -0,0 +1,133 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/HardTanh.c" +#else + +void THNN_(HardTanh_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal min_val_, + accreal max_val_, + bool inplace) +{ + real min_val = TH_CONVERT_ACCREAL_TO_REAL(min_val_); + real max_val = TH_CONVERT_ACCREAL_TO_REAL(max_val_); + if (inplace) + THTensor_(set)(output, input); + else + THTensor_(resizeAs)(output, input); + + if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output)) + { + if (inplace) + TH_TENSOR_APPLY(real, input, + if (*input_data < min_val) + *input_data = min_val; + else if (*input_data > max_val) + *input_data = max_val; + ); + TH_TENSOR_APPLY2(real, output, real, input, + if (*input_data < min_val) + *output_data = min_val; + else if (*input_data <= max_val) + *output_data = *input_data; + else + *output_data = max_val; + ); + } + else + { + real* ptr_input = THTensor_(data)(input); + real* ptr_output = THTensor_(data)(output); + ptrdiff_t i; + ptrdiff_t n = THTensor_(nElement)(input); + + if (inplace) +#pragma omp parallel for private(i) + for (i = 0; i < n; i++) + { + if (ptr_input[i] < min_val) + ptr_input[i] = min_val; + else if (ptr_input[i] > max_val) + ptr_input[i] = max_val; + } + else +#pragma omp parallel for private(i) + for (i = 0; i < n; i++) + { + if (ptr_input[i] < min_val) + ptr_output[i] = min_val; + else if (ptr_input[i] <= max_val) + ptr_output[i] = ptr_input[i]; + else + ptr_output[i] = max_val; + } + } +} + +void THNN_(HardTanh_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + accreal min_val_, + accreal max_val_, + bool inplace) +{ + real min_val = TH_CONVERT_ACCREAL_TO_REAL(min_val_); + real max_val = TH_CONVERT_ACCREAL_TO_REAL(max_val_); + + THNN_CHECK_NELEMENT(input, gradOutput); + if (inplace) + THTensor_(set)(gradInput, gradOutput); + else + THTensor_(resizeAs)(gradInput, input); + + if (input->nDimension == 1 || + !THTensor_(isContiguous)(input) || + !THTensor_(isContiguous)(gradOutput) || + !THTensor_(isContiguous)(gradInput)) + { + if (inplace) + { + TH_TENSOR_APPLY2(real, gradOutput, real, input, + if (*input_data <= min_val || *input_data >= max_val) + *gradOutput_data = 0; + ); + } + else + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + if (*input_data <= min_val || *input_data >= max_val) + *gradInput_data = 0; + else + *gradInput_data = *gradOutput_data; + ); + } + else + { + real* ptr_gradOutput = THTensor_(data)(gradOutput); + real* ptr_gradInput = THTensor_(data)(gradInput); + real* ptr_input = THTensor_(data)(input); + ptrdiff_t i; + ptrdiff_t n = THTensor_(nElement)(input); + + if (inplace) +#pragma omp parallel for private(i) + for (i = 0; i < n; i++) + { + if (ptr_input[i] <= min_val || ptr_input[i] >= max_val) + ptr_gradInput[i] = 0; + } + else +#pragma omp parallel for private(i) + for (i = 0; i < n; i++) + { + if (ptr_input[i] <= min_val || ptr_input[i] >= max_val) + ptr_gradInput[i] = 0; + else + ptr_gradInput[i] = ptr_gradOutput[i]; + } + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/IndexLinear.c b/contrib/lua-torch/nn/lib/THNN/generic/IndexLinear.c new file mode 100644 index 000000000..42d8368ba --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/IndexLinear.c @@ -0,0 +1,742 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/IndexLinear.c" +#else + +#ifdef _OPENMP +#include <omp.h> +#endif + +/* Threshold used to trigger multithreading */ +#ifndef THNN_SPARSE_OMP_THRESHOLD +#define THNN_SPARSE_OMP_THRESHOLD 100000 +#endif + +/* Threshold used to trigger BLAS axpy call */ +#ifndef THNN_SPARSE_OUTDIM_THRESHOLD +#define THNN_SPARSE_OUTDIM_THRESHOLD 49 +#endif + +/* sign MACRO */ +#ifndef THNN_INDEXLINEAR_SIGN +#define THNN_INDEXLINEAR_SIGN(a) ( ( (a) < 0 ) ? -1 : ( (a) > 0 ) ) +#endif + +static bool THNN_(checkKeysValues)(THLongTensor* keys, THTensor* values) +{ + return THLongTensor_size(keys, 0) == THTensor_(nElement)(values) + && THTensor_(nDimension)(values) == 1 + && THLongTensor_nDimension(keys) == 1; +} + +void THNN_(IndexLinear_updateOutput)( + THNNState *state, + THLongTensor *keys, + long keysOffset, + THTensor *values, + THLongTensor *sizes, + THLongTensor *cumSumSizes, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *normalizedValues, + int train) +{ + /* Retrieve all the dimensions of the problem */ + long batchSize = THLongTensor_size(sizes, 0); + long keysSize = THLongTensor_size(keys, 0); + long outDim = THTensor_(size)(bias, 0); + long woutDim = THTensor_(size)(weight, 1); + int maxNormalize = woutDim - outDim; + long* sizesData = THLongTensor_data(sizes); + long* cumSumSizesData = THLongTensor_data(cumSumSizes); + + /* Define/resize the normalized values tensor if maxNormalize is > 0 */ + real* normalizedValuesData = NULL; + if (maxNormalize) + { + THTensor_(resize1d)(normalizedValues, keysSize); + normalizedValuesData = THTensor_(data)(normalizedValues); + } + + /* Resize the output */ + THTensor_(resize2d)(output, batchSize, outDim); + + /* Access the storage data/strides */ + real* outputData = THTensor_(data)(output); + real* valuesData = THTensor_(data)(values); + real* weightData = THTensor_(data)(weight); + long weightStride0 = weight->stride[0]; + real* biasData = THTensor_(data)(bias); + long* keysData = THLongTensor_data(keys); + + /* Make sure these inputs are contiguous to accelerate computations */ + THArgCheck(THLongTensor_isContiguous(keys), 1, "keys vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(values), 3, "values vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(output), 6, "output vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(weight), 7, "weight matrix must be contiguous"); + THArgCheck(THTensor_(isContiguous)(bias), 8, "bias vector must be contiguous"); + THArgCheck(THNN_(checkKeysValues)(keys, values), 1, "Keys and values should have the same number of elements"); + THArgCheck(THTensor_(isContiguous)(normalizedValues), 9, "normalizedValues vector must be contiguous"); + long i,j,k; + + /* Separate cases: output dimension is == 1, or > 1 + * This allows for some optimizations. */ + if (outDim == 1) + { + THVector_(fill)(outputData, *biasData, batchSize); + if (maxNormalize) + { + /* Parallelize on the batch itself */ +#pragma omp parallel \ + for private(i,j) \ + firstprivate(outDim, keysOffset, \ + weightData, keysData, \ + valuesData, outputData, \ + cumSumSizesData, sizesData) \ + schedule(static) \ + if(keysSize*outDim > THNN_SPARSE_OMP_THRESHOLD && batchSize > 1) + for (j = 0; j < batchSize; j++) + { + real* loutputData = outputData + j; + real val = 0; + real absVal = 0; + long offset = j == 0 ? 0 : cumSumSizesData[j - 1]; + + for (i = 0; i < sizesData[j]; i++) + { + long woffset = weightStride0*(keysData[offset] + keysOffset); + absVal = fabs(valuesData[offset]); + if (train) + { + if (absVal > weightData[woffset]) + { + weightData[woffset] = absVal; + weightData[woffset+1] = 1/absVal; + } + + /* + * The following can be used to scale the size of the updates + * depending on some rule, e.g. the frequency of a feature, ... + * This is used at update time. + * TODO: implement a smarter update scale. + */ + weightData[woffset+2] = 1; + } + normalizedValuesData[offset] = (absVal > weightData[woffset] ? THNN_INDEXLINEAR_SIGN(valuesData[offset]):valuesData[offset]*weightData[woffset+1]) + weightData[woffset+3]; + val += normalizedValuesData[offset] * weightData[woffset+maxNormalize]; + offset++; + } + *loutputData += val; + } + } + else + { + /* Parallelize on the batch itself */ +#pragma omp parallel \ + for private(i,j) \ + firstprivate(outDim, weightData, \ + keysData, valuesData, \ + outputData, cumSumSizesData, \ + sizesData) \ + schedule(static) \ + if(keysSize*outDim > THNN_SPARSE_OMP_THRESHOLD && batchSize > 1) + for (j = 0; j < batchSize; j++) + { + long offset = j == 0 ? 0 : cumSumSizesData[j - 1]; + real* loutputData = outputData + j; + real val = 0; + + for (i = 0; i < sizesData[j]; i++) + { + val += weightData[weightStride0*(keysData[offset] + keysOffset)] * valuesData[offset]; + offset++; + } + *loutputData += val; + } + } + } + else { +#pragma omp parallel \ + for private(i,j,k) \ + firstprivate(outDim, weightData, \ + keysData, valuesData, \ + biasData, outputData, \ + cumSumSizesData, sizesData) \ + schedule(static) \ + if(keysSize*outDim > THNN_SPARSE_OMP_THRESHOLD && batchSize > 1) + for (j = 0; j < batchSize; j++) + { + long offset = j == 0 ? 0 : cumSumSizesData[j - 1]; + real val = 0; + real* loutputData = outputData + j*outDim; + real* lweightData = weightData; + memcpy(loutputData, biasData, outDim*sizeof(real)); + for (i = 0; i < sizesData[j]; i++) + { + real val; + long woffset = weightStride0*(keysData[offset] + keysOffset); + if (maxNormalize) + { + val = valuesData[offset]; + real absVal = fabs(val); + if (train) + { + if (absVal > weightData[woffset]) + { + weightData[woffset] = absVal; + weightData[woffset+1] = 1/absVal; + } + + /* + * The following can be used to scale the size of the updates + * depending on some rule, e.g. the frequency of a feature, ... + * The commented section thereafter is just an example of what can be done: + * + *``` + * weightData[woffset+2] = weightData[woffset+2]==0?1:(weightData[woffset+2] / (weightData[woffset+2] + 1)); + * real alpha = 1; + * real beta = 0.01; + * real gamma = 1 - 0.000001; + * real l = weightData[woffset+2]==0?1/gamma:(weightData[woffset+2] - beta) / (alpha - beta); + * l = gamma*l; + * weightData[woffset+2] = (alpha-beta)*l + beta; + * ``` + * + * TODO: implement a smarter update scale. + */ + weightData[woffset+2] = 1; + } + + /* Normalize + Clamp */ + val = (absVal > weightData[woffset] ? THNN_INDEXLINEAR_SIGN(val):val*weightData[woffset+1]) + weightData[woffset+3]; + normalizedValuesData[offset] = val; + + lweightData = weightData + woffset + maxNormalize; + } + else + { + val = valuesData[offset]; + lweightData = weightData + woffset; + } + if (outDim > THNN_SPARSE_OUTDIM_THRESHOLD) + { + THBlas_(axpy)(outDim, val, lweightData, 1, loutputData, 1); + } + else + { + for (k=0; k < outDim; k++) + { + loutputData[k] += lweightData[k] * val; + } + } + offset++; + } + } + } + return; +} + +void THNN_(IndexLinear_updateParameters)( + THNNState *state, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *weight, + THTensor *bias, + THLongTensor *runningKeys, + THLongTensor *cumSumSizes, + long keysOffset, + accreal weightDecay_, + accreal learningRate_) +{ + real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_); + real learningRate = TH_CONVERT_ACCREAL_TO_REAL(learningRate_); + /* Retrieve all the dimensions of the problem */ + long outDim = THTensor_(size)(bias, 0); + long woutDim = THTensor_(size)(weight, 1); + int maxNormalize = woutDim - outDim; + long keysSize = THLongTensor_size(runningKeys, 0); + + /* Access the storage data/strides */ + real* gradWeightData = THTensor_(data)(gradWeight); + real* weightData = THTensor_(data)(weight); + long weightStride0 = weight->stride[0]; + real* gradBiasData = THTensor_(data)(gradBias); + real* biasData = THTensor_(data)(bias); + long* keysData = THLongTensor_data(runningKeys); + + /* Make sure these inputs are contiguous to accelerate computations */ + THArgCheck(THTensor_(isContiguous)(gradWeight), 1, "gradWeight must be contiguous"); + THArgCheck(THTensor_(isContiguous)(gradBias), 2, "gradBias vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(weight), 3, "gradBias vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(bias), 4, "gradBias vector must be contiguous"); + THArgCheck(THLongTensor_isContiguous(runningKeys), 5, "keys vector must be contiguous"); + + int j,k; + long offset = 0; + + /* Update the bias first */ + THVector_(cadd)(biasData, biasData, gradBiasData, -learningRate, outDim); + + /* Separate cases: output dimension is == 1, or > 1 + * This allows for some optimizations. + * No multithreading here as this could + * corrupt the results (hogwild style) */ + if (outDim == 1) + { + if (maxNormalize) + { + if (weightDecay) + { + for (j = 0; j < keysSize; j++) + { + long woffset = weightStride0*(keysData[j] + keysOffset) + maxNormalize; + real lr = learningRate*weightData[woffset-2]; + weightData[woffset-1] -= weightData[woffset]*gradWeightData[2*j]*lr; + weightData[woffset] -= gradWeightData[2*j+1]*lr - weightDecay * weightData[woffset-2] * weightData[woffset]; + } + } + else + { + for (j = 0; j < keysSize; j++) + { + long woffset = weightStride0*(keysData[j] + keysOffset) + maxNormalize; + real lr = learningRate*weightData[woffset-2]; + weightData[woffset-1] -= weightData[woffset]*gradWeightData[2*j]*lr; + weightData[woffset] -= gradWeightData[2*j+1]*lr; + } + } + } + else + { + if (weightDecay) + { + for (j = 0; j < keysSize; j++) + { + long woffset = weightStride0*(keysData[j] + keysOffset); + weightData[woffset] -= gradWeightData[j]*learningRate + weightDecay * weightData[woffset]; + } + } + else + { + for (j = 0; j < keysSize; j++) + { + weightData[weightStride0*(keysData[j] + keysOffset)] -= gradWeightData[j]*learningRate; + } + } + } + } + else + { + for (j = 0; j < keysSize; j++) + { + real lr = learningRate; + real wd = weightDecay; + real* lweightData; + long woffset = weightStride0*(keysData[j] + keysOffset); + real* lgradWeightData = gradWeightData + j*outDim; + if (maxNormalize) + { + lgradWeightData += j*outDim; + /* weightData[woffset + 2] */ + lweightData = weightData + woffset + maxNormalize - 2; + lr = lr*lweightData[0]; + wd = weightDecay*lweightData[0]; + /* weightData[woffset + 3] */ + lweightData++; + for (k=0; k < outDim; k++) + { + lweightData[0] -= lgradWeightData[k]*lweightData[k+1]*lr; + } + lweightData++; + lgradWeightData += outDim; + } + else + { + lweightData = weightData + woffset; + } + + /* We do sparse weight decay. + * We think it makes more sense. */ + if (weightDecay) + { + for (k=0; k < outDim; k++) + { + lweightData[k] -= lweightData[k]*wd; + } + } + + if (outDim > THNN_SPARSE_OUTDIM_THRESHOLD) + { + THBlas_(axpy)(outDim, -lr, lgradWeightData, 1, lweightData, 1); + } + else + { + for (k=0; k < outDim; k++) + { + lweightData[k] -= lgradWeightData[k]*lr; + } + } + } + } +} + + +void THNN_(IndexLinear_accUpdateGradParameters)( + THNNState *state, + THLongTensor *keys, + long keysOffset, + THTensor *values, + THLongTensor *sizes, + THLongTensor *cumSumSizes, + THTensor *gradOutput, + THTensor *weight, + THTensor *bias, + accreal weightDecay_, + accreal scale_) +{ + real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_); + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + /* Retrieve all the dimensions of the problem */ + long batchSize = THLongTensor_size(sizes, 0); + long keysSize = THLongTensor_size(keys, 0); + long outDim = THTensor_(size)(bias, 0); + long woutDim = THTensor_(size)(weight, 1); + int maxNormalize = woutDim - outDim; + THArgCheck(THNN_(checkKeysValues)(keys, values), 1, "Keys and values should have the same number of elements"); + + /* Access the storage data/strides */ + real* gradOutputData = THTensor_(data)(gradOutput); + real* valuesData =THTensor_(data)(values); + real* weightData = THTensor_(data)(weight); + real* biasData = THTensor_(data)(bias); + long weightStride0 = weight->stride[0]; + long biasStride = bias->stride[0]; + long* keysData = THLongTensor_data(keys); + long* sizesData = THLongTensor_data(sizes); + + /* Make sure these inputs are contiguous to accelerate computations */ + THArgCheck(THLongTensor_isContiguous(keys), 1, "keys vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(values), 3, "values vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(gradOutput), 6, "gradOutput vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(weight), 7, "weight matrix must be contiguous"); + THArgCheck(THTensor_(isContiguous)(bias), 8, "bias matrix must be contiguous"); + + int i,j,k; + + /* Separate cases: output dimension is == 1, or > 1 + * This allows for some optimizations. + * No multithreading here as this could + * corrupt the results (hogwild style) */ + if (outDim == 1) + { + if (maxNormalize) + { + long offset = 0; + for (j = 0; j < batchSize; j++) + { + real* lgradOutputData = gradOutputData + j; + *biasData -= *lgradOutputData * scale; + real val = *lgradOutputData * scale; + real* lweightData = weightData; + for (i = 0; i < sizesData[j]; i++) + { + long idx = weightStride0*(keysData[offset] + keysOffset) + maxNormalize; + weightData[idx-1] -= weightData[idx]*val*weightData[idx-2]; + weightData[idx] -= (val*valuesData[offset] - weightDecay * weightData[idx])*weightData[idx-2]; + offset++; + } + } + + offset = 0; + for (j = 0; j < batchSize; j++) + { + real* lweightData = weightData; + for (i = 0; i < sizesData[j]; i++) + { + long idx = weightStride0*(keysData[offset] + keysOffset) + maxNormalize; + weightData[idx-2] = 0; + offset++; + } + } + } + else + { + if (weightDecay) + { + long offset = 0; + for (j = 0; j < batchSize; j++) + { + real* lgradOutputData = gradOutputData + j; + *biasData -= *lgradOutputData * scale; + real val = *lgradOutputData * scale; + real* lweightData = weightData; + for (i = 0; i < sizesData[j]; i++) + { + long idx = weightStride0*(keysData[offset] + keysOffset); + weightData[idx] -= val * valuesData[offset] + weightData[idx] * weightDecay; + offset++; + } + } + } + else + { + long offset = 0; + for (j = 0; j < batchSize; j++) + { + real val = gradOutputData[j] * scale; + for (i = 0; i < sizesData[j]; i++) + { + weightData[(keysData[offset] + keysOffset)*weightStride0] -= val * valuesData[offset]; + offset++; + } + *biasData -= val; + } + } + } + } + else { + long offset = 0; + for (j = 0; j < batchSize; j++) + { + real val = 0; + real* lgradOutputData = gradOutputData + j*outDim; + real* lweightData = weightData; + THVector_(cadd)(biasData, biasData, lgradOutputData, -scale, outDim); + for (i = 0; i < sizesData[j]; i++) + { + real val = valuesData[offset] * scale; + real wd = weightDecay; + + // Max normalize case + if (maxNormalize) + { + lweightData = weightData + weightStride0*(keysData[offset] + keysOffset) + (maxNormalize-2); + val *= lweightData[0]; + wd *= lweightData[0]; + for (k=0; k < outDim; k++) + { + lweightData[1] -= lweightData[k+2]*scale*lgradOutputData[k]*lweightData[0]; + } + lweightData += 2; + } + else + { + lweightData = weightData + weightStride0*(keysData[offset] + keysOffset); + } + + /* We do sparse weight decay. + * We think it makes more sense. */ + if (weightDecay) + { + if (outDim > THNN_SPARSE_OUTDIM_THRESHOLD) + { + THBlas_(axpy)(outDim, -wd, lweightData, 1, lweightData, 1); + } + else + { + for (k=0; k < outDim; k++) + { + lweightData[k] -= wd * lweightData[k]; + } + } + } + + if (outDim > THNN_SPARSE_OUTDIM_THRESHOLD) + { + THBlas_(axpy)(outDim, -val, lgradOutputData, 1, lweightData, 1); + } + else + { + for (k=0; k < outDim; k++) + { + lweightData[k] -= val * lgradOutputData[k]; + } + } + offset++; + } + } + + /* Max Normalize case: + * Reset the smart update scaling if + * one does it batch-wise. + * TODO: Decide what to do with that piece of code. + * NB: If the code belowe is uncommented, so should the commented + * code in IndexLinear:zeroGradParameters() */ + + /* + if (maxNormalize) + { + offset = 0; + for (j = 0; j < batchSize; j++) + { + real* lweightData = weightData; + for (i = 0; i < sizesData[j]; i++) + { + real val = valuesData[offset] * scale; + real wd = weightDecay; + + lweightData = weightData + weightStride0*(keysData[offset] + keysOffset) + (maxNormalize-2); + lweightData[0] = 0; + offset++; + } + } + } + */ + } + return; +} + +void THNN_(IndexLinear_accGradParameters)( + THNNState *state, + THLongTensor *keys, + long keysOffset, + THTensor *values, + THLongTensor *sizes, + THLongTensor *cumSumSizes, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *weight, + THTensor *bias, + THTensor *valuesBuffer, + accreal weightDecay_, + accreal scale_) +{ + real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_); + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + /* Retrieve all the dimensions of the problem */ + long batchSize = THLongTensor_size(sizes, 0); + long keysSize = THLongTensor_size(keys, 0); + long outDim = THTensor_(size)(bias, 0); + long woutDim = THTensor_(size)(weight, 1); + long maxNormalize = (woutDim - outDim) > 0 ?1:0; + THArgCheck(THNN_(checkKeysValues)(keys, values), 1, "Keys and values should have the same number of elements"); + long* sizesData = THLongTensor_data(sizes); + + /* COmpute the cumulative sizes */ + THLongTensor* cumSizes = THLongTensor_new(); + THLongTensor_cumsum(cumSizes, sizes, 0); + long* cumSizesData = THLongTensor_data(cumSizes); + + /* Resize the gradWeight buffer to keep it dense. + * That speeds up updates A LOT assuming random mem access. */ + THTensor_(resize2d)(gradWeight, keysSize, outDim * (maxNormalize>0?2:1)); + + /* Access the storage data/strides */ + real* gradOutputData = THTensor_(data)(gradOutput); + real* valuesData =THTensor_(data)(values); + real* gradWeightData = THTensor_(data)(gradWeight); + real* weightData = THTensor_(data)(weight); + real* gradBiasData = THTensor_(data)(gradBias); + long gradWeightStride0 = gradWeight->stride[0]; + long weightStride0 = weight->stride[0]; + long* keysData = THLongTensor_data(keys); + + /* Make sure these inputs are contiguous to accelerate computations */ + THArgCheck(THLongTensor_isContiguous(keys), 1, "keys vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(values), 3, "values vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(gradOutput), 6, "gradOutput vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(gradWeight), 7, "gradWeight must be contiguous"); + THArgCheck(THTensor_(isContiguous)(gradBias), 8, "gradBias vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(weight), 9, "weight must be contiguous"); + THArgCheck(THTensor_(isContiguous)(bias), 10, "bias vector must be contiguous"); + THArgCheck(THTensor_(isContiguous)(valuesBuffer), 11, "valuesBuffer must be contiguous"); + + int i,j,k; + + /* Separate cases: output dimension is == 1, or > 1 + * This allows for some optimizations. + * No multithreading here as this could + * corrupt the results (hogwild style) */ + if (outDim == 1) + { + for (j = 0; j < batchSize; j++) + { + long offset = j==0?0:cumSizesData[j-1]; + real val = gradOutputData[j] * scale; + real* lgradWeightData = gradWeightData + offset; + real* lvaluesData = valuesData + offset; + long end = sizesData[j]; + + if (maxNormalize) + { + lgradWeightData += offset; + i = 0; + for(;i < end; i++) + { + lgradWeightData[2*i] = val; + lgradWeightData[2*i+1] = val * lvaluesData[i]; + } + } + else + { + i = 0; + for(;i < end-4; i += 4) + { + lgradWeightData[i] = val * lvaluesData[i]; + lgradWeightData[i+1] = val * lvaluesData[i+1]; + lgradWeightData[i+2] = val * lvaluesData[i+2]; + lgradWeightData[i+3] = val * lvaluesData[i+3]; + } + + for(; i < end; i++) + { + lgradWeightData[i] = val * lvaluesData[i]; + } + } + *gradBiasData += val; + offset += end; + } + } + else { + for (j = 0; j < batchSize; j++) + { + long offset = j==0?0:cumSizesData[j-1]; + real val = 0; + real* lgradOutputData = gradOutputData + j*outDim; + real* lgradWeightData = gradWeightData; + real* lweightData = weightData; + THVector_(cadd)(gradBiasData, gradBiasData, lgradOutputData, scale, outDim); + for (i = 0; i < sizesData[j]; i++) + { + real val = valuesData[offset] * scale; + lgradWeightData = gradWeightData + offset*outDim; + if (maxNormalize) + { + lgradWeightData += offset*outDim; + k = 0; + for(;k < outDim-4; k += 4) + { + lgradWeightData[k] = lgradOutputData[k]*scale; + lgradWeightData[k+1] = lgradOutputData[k+1]*scale; + lgradWeightData[k+2] = lgradOutputData[k+2]*scale; + lgradWeightData[k+3] = lgradOutputData[k+3]*scale; + } + + for(; k < outDim; k++) + { + lgradWeightData[k] = lgradOutputData[k]*scale; + } + lgradWeightData += outDim; + } + k = 0; + for(;k < outDim-4; k += 4) + { + lgradWeightData[k] = val * lgradOutputData[k]; + lgradWeightData[k+1] = val * lgradOutputData[k+1]; + lgradWeightData[k+2] = val * lgradOutputData[k+2]; + lgradWeightData[k+3] = val * lgradOutputData[k+3]; + } + + for(; k < outDim; k++) + { + lgradWeightData[k] = val * lgradOutputData[k]; + } + offset++; + } + } + } + THLongTensor_free(cumSizes); + return; +} +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/L1Cost.c b/contrib/lua-torch/nn/lib/THNN/generic/L1Cost.c new file mode 100644 index 000000000..53940e894 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/L1Cost.c @@ -0,0 +1,38 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/L1Cost.c" +#else + +void THNN_(L1Cost_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output) +{ + THNN_CHECK_DIM_SIZE(output, 1, 0, 1); + accreal sum = 0; + + TH_TENSOR_APPLY(real, input, + sum += fabs(*input_data); + ); + + THTensor_(set1d)(output, 0, sum); +} + +void THNN_(L1Cost_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput) +{ + THNN_CHECK_NELEMENT(input, gradOutput); + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY2(real, gradInput, real, input, + if (*input_data > 0) + *gradInput_data = 1; + else if (*input_data < 0) + *gradInput_data = -1; + else + *gradInput_data = 0; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LeakyReLU.c b/contrib/lua-torch/nn/lib/THNN/generic/LeakyReLU.c new file mode 100644 index 000000000..074047d83 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/LeakyReLU.c @@ -0,0 +1,57 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/LeakyReLU.c" +#else + +void THNN_(LeakyReLU_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal negval_, + bool inplace) +{ + real negval = TH_CONVERT_ACCREAL_TO_REAL(negval_); + if (inplace) + { + TH_TENSOR_APPLY(real, input, + if (*input_data <= 0) + *input_data *= negval; + ); + THTensor_(set)(output, input); + } + else + { + THTensor_(resizeAs)(output, input); + TH_TENSOR_APPLY2(real, output, real, input, + *output_data = *input_data > 0 ? *input_data : *input_data * negval; + ); + } +} + +void THNN_(LeakyReLU_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + accreal negval_, + bool inplace) +{ + real negval = TH_CONVERT_ACCREAL_TO_REAL(negval_); + THNN_CHECK_NELEMENT(input, gradOutput); + if (inplace) + { + TH_TENSOR_APPLY2(real, gradOutput, real, input, + if (*input_data <= 0) + *gradOutput_data *= negval; + ); + THTensor_(set)(gradInput, gradOutput); + } + else + { + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + *gradInput_data = *input_data > 0 ? *gradOutput_data : *gradOutput_data * negval; + ); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Linear.c b/contrib/lua-torch/nn/lib/THNN/generic/Linear.c new file mode 100644 index 000000000..8c5cd115e --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/Linear.c @@ -0,0 +1,114 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Linear.c" +#else + +void THNN_(Linear_updateAddBuffer)( + THNNState *state, + THTensor *input, + THTensor *addBuffer) +{ + long nframe = THTensor_(size)(input,0); + long nElement = THTensor_(nElement)(addBuffer); + if (nElement != nframe) { + THTensor_(resize1d)(addBuffer,nframe); + THTensor_(fill)(addBuffer,1.0); + } +} + +void THNN_(Linear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *addBuffer) +{ + long dim = THTensor_(nDimension)(input); + if (dim == 1) { + THTensor_(resize1d)(output,THTensor_(size)(weight,0)); + if (bias) { + THTensor_(copy)(output,bias); + } + else { + THTensor_(zero)(output); + } + THTensor_(addmv)(output,1,output,1,weight,input); + } + else if (dim == 2) { + long nframe = THTensor_(size)(input,0); + long nElement = THTensor_(nElement)(output); + THTensor_(resize2d)(output,nframe,THTensor_(size)(weight,0)); + if (THTensor_(nElement)(output) != nElement) { + THTensor_(zero)(output); + } + THNN_(Linear_updateAddBuffer)(state,input,addBuffer); + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight,weight,0,1); + THTensor_(addmm)(output,0,output,1,input,tweight); + THTensor_(free)(tweight); + if (bias) { + THTensor_(addr)(output,1,output,1,addBuffer,bias); + } + } +} + +void THNN_(Linear_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight) +{ + if (gradInput) { + long nElement = THTensor_(nElement)(gradInput); + THTensor_(resizeAs)(gradInput,input); + if (THTensor_(nElement)(gradInput) != nElement) { + THTensor_(zero)(gradInput); + } + + long dim = THTensor_(nDimension)(input); + if (dim == 1) { + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight,weight,0,1); + THTensor_(addmv)(gradInput,0,gradInput,1,tweight,gradOutput); + THTensor_(free)(tweight); + } + else if (dim == 2) { + THTensor_(addmm)(gradInput,0,gradInput,1,gradOutput,weight); + } + } +} + +void THNN_(Linear_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *bias, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *addBuffer, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + long dim = THTensor_(nDimension)(input); + if (dim == 1) { + THTensor_(addr)(gradWeight,1,gradWeight,scale,gradOutput,input); + if (bias) { + THTensor_(cadd)(gradBias,gradBias,scale,gradOutput); + } + } + else if (dim == 2) { + THTensor *tgradOutput = THTensor_(new)(); + THTensor_(transpose)(tgradOutput,gradOutput,0,1); + THTensor_(addmm)(gradWeight,1,gradWeight,scale,tgradOutput,input); + if (bias) { + THNN_(Linear_updateAddBuffer)(state,input,addBuffer); + THTensor_(addmv)(gradBias,1,gradBias,scale,tgradOutput,addBuffer); + } + THTensor_(free)(tgradOutput); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LogSigmoid.c b/contrib/lua-torch/nn/lib/THNN/generic/LogSigmoid.c new file mode 100644 index 000000000..651d56002 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/LogSigmoid.c @@ -0,0 +1,36 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/LogSigmoid.c" +#else + +void THNN_(LogSigmoid_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *buffer) +{ + THTensor_(resizeAs)(output, input); + THTensor_(resizeAs)(buffer, input); + + TH_TENSOR_APPLY3(real, output, real, input, real, buffer, + real z = exp(-*input_data); + *buffer_data = z; + *output_data = -log(1. + z); + ); +} + +void THNN_(LogSigmoid_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *buffer) +{ + THNN_CHECK_NELEMENT(input, gradOutput); + THTensor_(resizeAs)(gradInput, buffer); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer, + real z = *buffer_data; + *gradInput_data = *gradOutput_data * z / (1. + z); + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c b/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c new file mode 100644 index 000000000..a7280422b --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c @@ -0,0 +1,137 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/LogSoftMax.c" +#else + +void THNN_(LogSoftMax_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output) +{ + real *input_data, *output_data; + ptrdiff_t nframe = 0, dim = 0, stride = 0; + ptrdiff_t t, d; + + if (input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + stride = 1; + } + else if (input->nDimension == 2) + { + nframe = input->size[0]; + dim = input->size[1]; + stride = 1; + } + else if (input->nDimension == 3) + { + nframe = 1; + dim = input->size[0]; + stride = input->size[1]*input->size[2]; + } + else if (input->nDimension == 4) + { + nframe = input->size[0]; + dim = input->size[1]; + stride = input->size[2]*input->size[3]; + } + else + THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected"); + + input = THTensor_(newContiguous)(input); + THTensor_(resizeAs)(output, input); + + real *input_data0 = THTensor_(data)(input); + real *output_data0 = THTensor_(data)(output); + + accreal logsum; + real maxInput; + #pragma omp parallel for private(t, d, maxInput, logsum, input_data, output_data) + for (t = 0; t < stride*nframe; t++) + { + logsum = 0; + maxInput = -THInf; + input_data = input_data0 + (t/stride)*dim*stride + t % stride; + output_data = output_data0 + (t/stride)*dim*stride + t % stride; + + for (d = 0; d < dim; d++) + maxInput = THMax(maxInput, input_data[d*stride]); + + for (d = 0; d < dim; d++) + logsum += exp(input_data[d*stride] - maxInput); + logsum = maxInput + log(logsum); + + for (d = 0; d < dim; d++) + output_data[d*stride] = input_data[d*stride] - logsum; + } + + THTensor_(free)(input); +} + +void THNN_(LogSoftMax_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output) +{ + THNN_CHECK_SHAPE(input, gradOutput); + real *gradInput_data, *gradOutput_data, *output_data; + ptrdiff_t nframe = 0, dim = 0, stride = 0; + ptrdiff_t t, d; + + if (output->nDimension == 1) + { + nframe = 1; + dim = output->size[0]; + stride = 1; + } + else if (output->nDimension == 2) + { + nframe = output->size[0]; + dim = output->size[1]; + stride = 1; + } + else if (output->nDimension == 3) + { + nframe = 1; + dim = output->size[0]; + stride = output->size[1]*output->size[2]; + } + else if (output->nDimension == 4) + { + nframe = output->size[0]; + dim = output->size[1]; + stride = output->size[2]*output->size[3]; + } + else + THError("1D, 2D, 3D or 4D tensor expected"); + + output = THTensor_(newContiguous)(output); + gradOutput = THTensor_(newContiguous)(gradOutput); + + THTensor_(resizeAs)(gradInput, output); + real *gradInput_data0 = THTensor_(data)(gradInput); + real *output_data0 = THTensor_(data)(output); + real *gradOutput_data0 = THTensor_(data)(gradOutput); + accreal sum; + #pragma omp parallel for private(t, sum, d, gradInput_data, output_data, gradOutput_data) + for (t = 0; t < stride*nframe; t++) + { + sum = 0; + gradInput_data = gradInput_data0 + (t/stride)*dim*stride + t % stride; + output_data = output_data0 + (t/stride)*dim*stride + t % stride; + gradOutput_data = gradOutput_data0 + (t/stride)*dim*stride + t % stride; + + for (d = 0; d < dim; d++) + sum += gradOutput_data[d*stride]; + + for (d = 0; d < dim; d++) + gradInput_data[d*stride] = gradOutput_data[d*stride] - exp(output_data[d*stride])*sum; + } + + THTensor_(free)(gradOutput); + THTensor_(free)(output); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LookupTable.c b/contrib/lua-torch/nn/lib/THNN/generic/LookupTable.c new file mode 100644 index 000000000..46bc2c3c1 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/LookupTable.c @@ -0,0 +1,225 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/LookupTable.c" +#else + +static void THNN_(LookupTable_resetCount)( + THInteger_t *count_data, + THIndexTensor *input) +{ + ptrdiff_t i; + THIndex_t *input_data = THIndexTensor_(data)(input); + ptrdiff_t numel = THIndexTensor_(nElement)(input); + + for (i = 0; i<numel; i++) + { + long k = input_data[i] - TH_INDEX_BASE; + count_data[k] = 0; + } + for (i = 0; i<numel; i++) + { + long k = input_data[i] - TH_INDEX_BASE; + count_data[k]++; + } +} + +void THNN_(LookupTable_accGradParameters)( + THNNState *state, + THIndexTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THIntegerTensor *count, + THTensor *sorted, + THIndexTensor *indices, + bool scaleGradByFreq, + int paddingValue, + accreal ascale) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(ascale); + ptrdiff_t i; + THInteger_t *count_data = NULL; + + if (scaleGradByFreq) + { + THIntegerTensor_(resize1d)(count, gradWeight->size[0]); + count_data = THIntegerTensor_(data)(count); + } + + if (!THTensor_(isContiguous)(gradWeight)) + THError("gradWeight must be contiguous"); + if (!THIndexTensor_(isContiguous)(input)) + THError("input must be contiguous"); + if (THIndexTensor_(nDimension)(input) != 1 && THIndexTensor_(nDimension)(input) != 2) { + THDescBuff s1 = THIndexTensor_(sizeDesc)(input); + THError("input must be a vector or matrix, but is of shape: %s", s1.str); + } + + THIndex_t *input_data = THIndexTensor_(data)(input); + ptrdiff_t numel = THIndexTensor_(nElement)(input); + long numw = THTensor_(size)(gradWeight, 0); + + // check that inputs are all within range + for (i=0; i<numel; i++) + if (input_data[i] < TH_INDEX_BASE || input_data[i] >= numw + TH_INDEX_BASE) { + THError("inputs need to be in the range %ld <= input < %ld, " + "but got input of value: %ld", TH_INDEX_BASE, (numw + TH_INDEX_BASE), + input_data[i]); + } + + gradOutput = THTensor_(newContiguous)(gradOutput); + + real *gw = THTensor_(data)(gradWeight); + real *go = THTensor_(data)(gradOutput); + long stride = THTensor_(stride)(gradWeight, 0); + + if (count_data) + THNN_(LookupTable_resetCount)(count_data, input); + +#ifdef _OPENMP + if (numel > 1000) + { + // The strategy is to parallelize over sections of the vocabulary, so that + // thread 1 handles updates to gradWeight[0..nVocab/nThreads]. Every thread + // has to traverse the entire input, but the dominating factor is the axpy + // BLAS call. + #pragma omp parallel private(i) + { + int tid = omp_get_thread_num(); + int nthreads = omp_get_num_threads(); + + long start = tid * (numw/nthreads + 1); + long end = start + (numw/nthreads + 1); + for (i=0; i<numel; i++) + { + if (input_data[i] != paddingValue) + { + long k = input_data[i] - TH_INDEX_BASE; + if (k >= start && k < end) + { + real scale_ = scale; + if (count_data) scale_ /= count_data[k]; + THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1); + } + } + } + } + + THTensor_(free)(gradOutput); + return; + } +#endif + + for (i=0; i<numel; i++) + { + if (input_data[i] != paddingValue) + { + long k = input_data[i] - TH_INDEX_BASE; + real scale_ = scale; + if (count_data) scale_ /= count_data[k]; + THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1); + } + } + + THTensor_(free)(gradOutput); +} + +/* + * Keep the norm of weight smaller than maxNorm + */ + +static void THNN_(LookupTable_renormRow)( + real *row_data, + long stride, + real maxNorm, + real normType) +{ + real norm = 0; + real new_norm; + long j; + for (j=0; j<stride; j++) + { + if (normType == 1) { + norm += fabs(row_data[j]); + } else if (normType == 2) { + norm += row_data[j] * row_data[j]; + } else { + norm += pow(fabs(row_data[j]), normType); + } + } + norm = pow(norm, 1.0 / normType); + if (norm > maxNorm) + { + new_norm = maxNorm / (norm + 1e-7); + for (j=0; j<stride; j++) { + row_data[j] *= new_norm; + } + } +} + +static int THNN_(compare_THIndex)(const void* a, const void* b) +{ + return *(const THIndex_t*)a < *(const THIndex_t*)b ? -1 : 1; +} + +void THNN_(LookupTable_renorm)( + THNNState *state, + THIndexTensor *idx, + THTensor *weight, + accreal maxNorm_, + accreal normType_) +{ + real maxNorm = TH_CONVERT_ACCREAL_TO_REAL(maxNorm_); + real normType = TH_CONVERT_ACCREAL_TO_REAL(normType_); + if (!THTensor_(isContiguous)(weight)) + THError("weight must be contiguous"); + if (!THIndexTensor_(isContiguous)(idx)) + THError("input must be contiguous"); + if (THIndexTensor_(nDimension)(idx) != 1) + THError("idx must be a vector"); + if (normType <= 0) + THError("non-positive-norm not supported"); + + ptrdiff_t i; + THIndex_t *row_idx = THIndexTensor_(data)(idx); + ptrdiff_t numel = THIndexTensor_(nElement)(idx); + + long numw = THTensor_(size)(weight, 0); + long stride = THTensor_(stride)(weight, 0); + real *gw = THTensor_(data)(weight); + for (i=0; i<numel; i++) { + if (row_idx[i] < TH_INDEX_BASE || row_idx[i] >= numw + TH_INDEX_BASE) { + THError("input need to be in the range %ld <= input < %ld, " + "but got input of value: %ld", TH_INDEX_BASE, (numw + TH_INDEX_BASE), + row_idx[i]); + } + } + // get unique indices + qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex)); + ptrdiff_t ptr = 0; + for (i=0; i<numel; i++) + if (i == 0 || row_idx[i] != row_idx[i-1]) + row_idx[ptr++] = row_idx[i]; + numel = ptr; + +#ifdef _OPENMP + if (numel > 1000) + { + // The strategy is to parallelize over the rows that appear in + // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads]. + // This distributes the work evenly to each thread. + #pragma omp parallel for private(i) + for (i=0; i<numel; i++) + { + long k = row_idx[i] - TH_INDEX_BASE; + THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType); + } + return; + } +#endif + for (i=0; i<numel; i++) + { + long k = row_idx[i] - TH_INDEX_BASE; + THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/MSECriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/MSECriterion.c new file mode 100644 index 000000000..58911f6f0 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/MSECriterion.c @@ -0,0 +1,45 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/MSECriterion.c" +#else + +void THNN_(MSECriterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + THNN_CHECK_DIM_SIZE(output, 1, 0, 1); + + real sum = 0; + + TH_TENSOR_APPLY2(real, input, real, target, + real z = (*input_data - *target_data); + sum += z*z; + ); + + if (sizeAverage) + sum /= THTensor_(nElement)(input); + + THTensor_(set1d)(output, 0, sum); +} + +void THNN_(MSECriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + + real norm = (sizeAverage ? 2./((real)THTensor_(nElement)(input)) : 2.); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + *gradInput_data = norm * (*input_data - *target_data); + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/MarginCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/MarginCriterion.c new file mode 100644 index 000000000..d6d9b60b9 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/MarginCriterion.c @@ -0,0 +1,47 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/MarginCriterion.c" +#else + +void THNN_(MarginCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage, + accreal margin_) +{ + real margin = TH_CONVERT_ACCREAL_TO_REAL(margin_); + THNN_CHECK_NELEMENT(input, target); + THNN_CHECK_DIM_SIZE(output, 1, 0, 1); + real sum = 0; + + TH_TENSOR_APPLY2(real, input, real, target, + real z = (margin - *input_data * *target_data); + sum += z>0 ? z : 0; + ); + + if (sizeAverage) + sum /= THTensor_(nElement)(input); + + THTensor_(set1d)(output, 0, sum); +} + +void THNN_(MarginCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage, + accreal margin_) +{ + real margin = TH_CONVERT_ACCREAL_TO_REAL(margin_); + THNN_CHECK_NELEMENT(input, target); + real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + *gradInput_data = (*input_data * *target_data) < margin ? -norm * *target_data : 0; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/MultiLabelMarginCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/MultiLabelMarginCriterion.c new file mode 100644 index 000000000..16398c13c --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/MultiLabelMarginCriterion.c @@ -0,0 +1,184 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/MultiLabelMarginCriterion.c" +#else + +// TODO: improve error messages +void THNN_(MultiLabelMarginCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *output, + THTensor *isTarget, + bool sizeAverage) +{ + real *input_data, *isTarget_data; + THIndex_t *target_data; + long nframe, dim; + long t, d, dt, ddt; + real sum; + + THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, + "vector or matrix expected"); + + if (input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, + "inconsistent target size"); + } + else + { + nframe = input->size[0]; + dim = input->size[1]; + THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) + && (target->size[1] == dim), 3, "inconsistent target size"); + } + + THArgCheck(THIndexTensor_(minall)(target) >= -1+TH_INDEX_BASE, 3, "target out of range"); + THArgCheck(THIndexTensor_(maxall)(target) < dim+TH_INDEX_BASE, 3, "target out of range"); + + target = THIndexTensor_(newContiguous)(target); + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + target_data = THIndexTensor_(data)(target); + + THNN_resizeAs_indices(isTarget, target); + THTensor_(zero)(isTarget); + isTarget_data = THTensor_(data)(isTarget); + + sum = 0; + for (t = 0; t < nframe; t++) + { + for (ddt = 0; ddt < dim; ddt++) + { + THIndex_t target_idx = target_data[ddt] - TH_INDEX_BASE; + if (target_idx < 0) + break; + isTarget_data[target_idx] = 1; + } + for (dt = 0; dt < dim; dt++) + { + THIndex_t target_idx = target_data[dt] - TH_INDEX_BASE; + real input_target; + if (target_idx < 0) + break; + + input_target = input_data[target_idx]; + for (d = 0; d < dim; d++) + { + if (!isTarget_data[d]) + { + real z = 1 - input_target + input_data[d]; + if (z > 0) + sum += z; + } + } + } + input_data += dim; + target_data += dim; + isTarget_data += dim; + } + + sum /= dim; + if (sizeAverage) + sum /= nframe; + + THTensor_(set1d)(output, 0, sum); + + THTensor_(free)(input); + THIndexTensor_(free)(target); +} + +void THNN_(MultiLabelMarginCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *gradInput, + THTensor *isTarget, + bool sizeAverage) +{ + real *input_data; + real *gradInput_data; + THIndex_t *target_data; + real *isTarget_data; + long nframe, dim; + long t, d, dt; + real g; + + THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, + "vector or matrix expected"); + + if (input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, + "inconsistent target size"); + THArgCheck((isTarget->nDimension == 1) && (isTarget->size[0] == dim), 3, + "inconsistent isTarget size"); + } + else + { + nframe = input->size[0]; + dim = input->size[1]; + THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) + && (target->size[1] == dim), 3, "inconsistent target size"); + THArgCheck((isTarget->nDimension == 2) && (isTarget->size[0] == nframe) + && (isTarget->size[1] == dim), 3, "inconsistent isTarget size"); + } + + THArgCheck(THIndexTensor_(minall)(target) >= -1+TH_INDEX_BASE, 3, "target out of range"); + THArgCheck(THIndexTensor_(maxall)(target) < dim+TH_INDEX_BASE, 3, "target out of range"); + + THArgCheck(THTensor_(minall)(isTarget) >= 0, 3, "isTarget out of range"); + THArgCheck(THTensor_(maxall)(isTarget) <= 1, 3, "isTarget out of range"); + + target = THIndexTensor_(newContiguous)(target); + input = THTensor_(newContiguous)(input); + isTarget = THTensor_(newContiguous)(isTarget); + input_data = THTensor_(data)(input); + target_data = THIndexTensor_(data)(target); + isTarget_data = THTensor_(data)(isTarget); + + g = sizeAverage ? ( 1./((real)(nframe*dim)) ) : ( 1./((real)dim) ); + + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + gradInput_data = THTensor_(data)(gradInput); + + for (t = 0; t < nframe; t++) + { + for (dt = 0; dt < dim; dt++) + { + THIndex_t target_idx = target_data[dt] - TH_INDEX_BASE; + real input_target; + if (target_idx < 0) + break; + + input_target = input_data[target_idx]; + for (d = 0; d < dim; d++) + { + if (!isTarget_data[d]) + { + real z = 1 - input_target + input_data[d]; + if (z > 0) + { + gradInput_data[target_idx] -= g; + gradInput_data[d] += g; + } + } + } + } + input_data += dim; + target_data += dim; + isTarget_data += dim; + gradInput_data += dim; + } + + THTensor_(free)(input); + THIndexTensor_(free)(target); + THTensor_(free)(isTarget); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/MultiMarginCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/MultiMarginCriterion.c new file mode 100644 index 000000000..2f8f8ff58 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/MultiMarginCriterion.c @@ -0,0 +1,168 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/MultiMarginCriterion.c" +#else + +// TODO: improve error messages +void THNN_(MultiMarginCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *output, + bool sizeAverage, + int p, + THTensor *weights, + accreal margin_) +{ + real margin = TH_CONVERT_ACCREAL_TO_REAL(margin_); + real *input_data, *weights_data; + THIndex_t *target_data; + long nframe, dim; + long t, d; + real sum; + + THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, + "vector or matrix expected"); + + if (input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + } + else + { + nframe = input->size[0]; + dim = input->size[1]; + THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, + "inconsistent target size"); + } + + for (t = 0; t < nframe; t++) + { + THIndex_t idx = THIndexTensor_(get1d)(target, t); + THArgCheck((idx >= TH_INDEX_BASE) && (idx < dim + TH_INDEX_BASE), 3, + "target out of range"); + } + + input = THTensor_(newContiguous)(input); + target = THIndexTensor_(newContiguous)(target); + weights = weights ? THTensor_(newContiguous)(weights) : NULL; + input_data = THTensor_(data)(input); + target_data = THIndexTensor_(data)(target); + weights_data = weights ? THTensor_(data)(weights) : NULL; + + sum = 0; + for (t = 0; t < nframe; t++) + { + THIndex_t target_idx = target_data[t] - TH_INDEX_BASE; + real input_target = input_data[target_idx]; + for (d = 0; d < dim; d++) + { + real z = margin - input_target + input_data[d]; + if (d == target_idx) + continue; + + if (z > 0) { + real h = (p==1) ? z : z*z; + if(weights_data) + h *= weights_data[target_idx]; + sum += h; + } + } + input_data += dim; + } + + sum /= dim; + if(sizeAverage) + sum /= nframe; + + THTensor_(set1d)(output, 0, sum); + + THTensor_(free)(input); + THIndexTensor_(free)(target); + if(weights) + THTensor_(free)(weights); +} + +void THNN_(MultiMarginCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *gradInput, + bool sizeAverage, + int p, + THTensor *weights, + accreal margin_) +{ + real margin = TH_CONVERT_ACCREAL_TO_REAL(margin_); + real *input_data; + real *gradInput_data; + THIndex_t *target_data; + real *weights_data; + long nframe, dim; + long t, d; + real g; + + THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, + "vector or matrix expected"); + + if (input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + } + else + { + nframe = input->size[0]; + dim = input->size[1]; + THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, + "inconsistent target size"); + } + + g = (sizeAverage ? 1./((real)(nframe*dim)) : 1./((real)dim)); + + input = THTensor_(newContiguous)(input); + target = THIndexTensor_(newContiguous)(target); + input_data = THTensor_(data)(input); + + THTensor_(resizeAs)(gradInput, input); + gradInput_data = THTensor_(data)(gradInput); + + target_data = THIndexTensor_(data)(target); + weights = weights ? THTensor_(newContiguous)(weights) : NULL; + weights_data = weights ? THTensor_(data)(weights) : NULL; + + for (t = 0; t < nframe; t++) + { + THIndex_t target_idx = target_data[t] - TH_INDEX_BASE; + real input_target = input_data[target_idx]; + real gradInput_target = 0; + for (d = 0; d < dim; d++) + { + real z = margin - input_target + input_data[d]; + if (d == target_idx) + continue; + + if (z > 0) + { + real h = (p == 1) ? g : 2*g*z; + if(weights_data) + h *= weights_data[target_idx]; + gradInput_target -= h; + gradInput_data[d] = h; + } + else + gradInput_data[d] = 0; + } + gradInput_data[target_idx] = gradInput_target; + + input_data += dim; + gradInput_data += dim; + } + + THTensor_(free)(input); + THIndexTensor_(free)(target); + if(weights) + THTensor_(free)(weights); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/PReLU.c b/contrib/lua-torch/nn/lib/THNN/generic/PReLU.c new file mode 100644 index 000000000..488322fde --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/PReLU.c @@ -0,0 +1,207 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/PReLU.c" +#else + +void THNN_(PReLU_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THIndex_t nOutputPlane) +{ + THTensor_(resizeAs)(output, input); + + if (nOutputPlane == 0) + { + // handle shared parameter case + real w = *THTensor_(data)(weight); + TH_TENSOR_APPLY2(real, output, real, input, + *output_data = (*input_data > 0) ? *input_data : w*(*input_data); + ); + } + else + { + input = THTensor_(newContiguous)(input); + long bs = 1, ks = 1; + { + long input_ndim = THTensor_(nDimension)(input); + if (input->size[input_ndim > 1] != nOutputPlane) + THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]); + + if (input_ndim > 1) { + bs = input->size[0]; + for (int d = 2; d < input_ndim; d++) { + ks *= input->size[d]; + } + } + } + + real *output_data = THTensor_(data)(output); + real *input_data = THTensor_(data)(input); + real *weight_data = THTensor_(data)(weight); + THIndex_t i, j, k; +#pragma omp parallel for private(j,k) + for (i = 0; i < bs; ++i) + { + real* n_input_data = input_data + i*nOutputPlane*ks; + real* n_output_data = output_data + i*nOutputPlane*ks; + for (j = 0; j < nOutputPlane; ++j) + { + for (k = 0; k < ks; ++k) + n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : weight_data[j] * n_input_data[k]; + n_input_data += ks; + n_output_data += ks; + } + } + THTensor_(free)(input); + } +} + +void THNN_(PReLU_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THIndex_t nOutputPlane) +{ + THNN_CHECK_NELEMENT(input, gradOutput); + THTensor_(resizeAs)(gradInput, input); + + if (nOutputPlane == 0) + { + real w = THTensor_(data)(weight)[0]; + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + if ((*input_data) > 0) + *gradInput_data = *gradOutput_data; + else + *gradInput_data = w * (*gradOutput_data); + ); + } + else + { + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + weight = THTensor_(newContiguous)(weight); + const real *input_data = THTensor_(data)(input); + const real *gradOutput_data = THTensor_(data)(gradOutput); + const real *weight_data = THTensor_(data)(weight); + real *gradInput_data = THTensor_(data)(gradInput); + + long bs = 1, ks = 1; + { + long input_ndim = THTensor_(nDimension)(input); + if (input->size[input_ndim > 1] != nOutputPlane) + THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]); + + if (input_ndim > 1) { + bs = input->size[0]; + for (int d = 2; d < input_ndim; d++) { + ks *= input->size[d]; + } + } + } + + THIndex_t i, j, k; +#pragma omp parallel for private(j,k) + for (i = 0; i < bs; ++i) + { + const real *n_input_data = input_data + i*nOutputPlane*ks; + const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks; + real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks; + + for (j = 0; j < nOutputPlane; ++j) + { + real w = weight_data[j]; + for (k = 0; k < ks; ++k) + { + if (n_input_data[k] > 0) + n_gradInput_data[k] = n_gradOutput_data[k]; + else + n_gradInput_data[k] = n_gradOutput_data[k] * w; + } + n_input_data += ks; + n_gradInput_data += ks; + n_gradOutput_data += ks; + } + } + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); + } +} + +void THNN_(PReLU_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *gradWeight, + THTensor *gradWeightBuf, + THTensor *gradWeightBuf2, + THIndex_t nOutputPlane, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THNN_CHECK_NELEMENT(input, gradOutput); + + if (nOutputPlane == 0) + { + real *gradWeight_data = THTensor_(data)(gradWeight); + real sum = 0; + TH_TENSOR_APPLY2(real, input, real, gradOutput, + if ((*input_data) <= 0) + sum += (*input_data) * (*gradOutput_data); + ); + gradWeight_data[0] += scale * sum; + } + else + { + THArgCheck(THTensor_(isContiguous)(gradWeight), 6, "gradWeight needs to be contiguous"); + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + weight = THTensor_(newContiguous)(weight); + long bs = 1, ks = 1; + { + long input_ndim = THTensor_(nDimension)(input); + if (input->size[input_ndim > 1] != nOutputPlane) + THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]); + + if (input_ndim > 1) { + bs = input->size[0]; + for (int d = 2; d < input_ndim; d++) { + ks *= input->size[d]; + } + } + } + + const real *input_data = THTensor_(data)(input); + const real *gradOutput_data = THTensor_(data)(gradOutput); + const real *weight_data = THTensor_(data)(weight); + real *gradWeight_data = THTensor_(data)(gradWeight); + + THIndex_t i, j, k; + for (i = 0; i < bs; ++i) + { + const real *n_input_data = input_data + i*nOutputPlane*ks; + const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks; + + for (j = 0; j < nOutputPlane; ++j) + { + real sum = 0; + for (k = 0; k < ks; ++k) + if (n_input_data[k] <= 0) + sum += n_gradOutput_data[k] * n_input_data[k]; + gradWeight_data[j] += scale * sum; + n_input_data += ks; + n_gradOutput_data += ks; + } + } + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/RReLU.c b/contrib/lua-torch/nn/lib/THNN/generic/RReLU.c new file mode 100644 index 000000000..8fd46d3c2 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/RReLU.c @@ -0,0 +1,132 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/RReLU.c" +#else + +void THNN_(RReLU_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *noise, + accreal lower_, + accreal upper_, + bool train, + bool inplace, + THGenerator *generator) +{ + real lower = TH_CONVERT_ACCREAL_TO_REAL(lower_); + real upper = TH_CONVERT_ACCREAL_TO_REAL(upper_); + if (train) + { + // get default random generator + THTensor_(resizeAs)(noise, input); + if (inplace) + { + TH_TENSOR_APPLY2(real, input, real, noise, + if (*input_data <= 0) + { + const real r = (real)THRandom_uniform(generator, lower, upper); + *input_data = (*input_data) * r; + *noise_data = r; + } + else + { + *noise_data = 1; + } + ); + THTensor_(set)(output, input); + } + else + { + THTensor_(resizeAs)(output, input); + TH_TENSOR_APPLY3(real, input, real, output, real, noise, + if (*input_data <= 0) + { + const real r = (real)THRandom_uniform(generator, lower, upper); + *output_data = (*input_data) * r; + *noise_data = r; + } + else + { + *output_data = *input_data; + *noise_data = 1; + } + ); + } + } + else + { + const real negSlope = (lower + upper) / 2; + if (inplace) + { + TH_TENSOR_APPLY(real, input, + if (*input_data <= 0) + { + *input_data = *input_data * negSlope; + } + ); + THTensor_(set)(output, input); + } + else + { + THTensor_(resizeAs)(output, input); + TH_TENSOR_APPLY2(real, input, real, output, + const real r = (*input_data) <= 0 ? negSlope : 1; + *output_data = *input_data * r; + ); + } + } +} + +void THNN_(RReLU_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *noise, + accreal lower_, + accreal upper_, + bool train, + bool inplace) +{ + real lower = TH_CONVERT_ACCREAL_TO_REAL(lower_); + real upper = TH_CONVERT_ACCREAL_TO_REAL(upper_); + THNN_CHECK_NELEMENT(input, gradOutput); + if (train && upper - lower > 1E-6) // e.g. if upper == lower, RReLU behaves like LeakyReLU + { + // multiply the gradient by the noise tensor + if (inplace) + { + THTensor_(cmul)(gradOutput, gradOutput, noise); + THTensor_(set)(gradInput, gradOutput); + } + else + { + THTensor_(resizeAs)(gradInput, input); + THTensor_(cmul)(gradInput, gradOutput, noise); + } + } + else + { + // use constant factor for negative input values + const real negSlope = (lower + upper) / 2; + if (inplace) + { + TH_TENSOR_APPLY2(real, gradOutput, real, input, + if (*input_data <= 0) + { + *gradOutput_data = (*gradOutput_data) * negSlope; + } + ); + THTensor_(set)(gradInput, gradOutput); + } + else + { + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + *gradInput_data = (*input_data) <= 0 ? (*gradOutput_data) * negSlope : (*gradOutput_data); + ); + } + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Sigmoid.c b/contrib/lua-torch/nn/lib/THNN/generic/Sigmoid.c new file mode 100644 index 000000000..17fb2cb4d --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/Sigmoid.c @@ -0,0 +1,28 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Sigmoid.c" +#else + +void THNN_(Sigmoid_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output) +{ + THTensor_(sigmoid)(output, input); +} + +void THNN_(Sigmoid_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output) +{ + THNN_CHECK_NELEMENT(output, gradOutput); + THTensor_(resizeAs)(gradInput, output); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, + real z = *output_data; + *gradInput_data = *gradOutput_data * (1. - z) * z; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SmoothL1Criterion.c b/contrib/lua-torch/nn/lib/THNN/generic/SmoothL1Criterion.c new file mode 100644 index 000000000..d1928d11c --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SmoothL1Criterion.c @@ -0,0 +1,49 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SmoothL1Criterion.c" +#else + +void THNN_(SmoothL1Criterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + THNN_CHECK_DIM_SIZE(output, 1, 0, 1); + + real sum = 0; + TH_TENSOR_APPLY2(real, input, real, target, + real z = fabs(*input_data - *target_data); + sum += z < 1 ? 0.5*z*z : z - 0.5; + ); + + if (sizeAverage) + sum /= THTensor_(nElement)(input); + + THTensor_(set1d)(output, 0, sum); +} + +void THNN_(SmoothL1Criterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + real x = *input_data - *target_data; + if (x < -1.) + *gradInput_data = - norm; + else if (x > 1.) + *gradInput_data = norm; + else + *gradInput_data = norm * x; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SoftMarginCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/SoftMarginCriterion.c new file mode 100644 index 000000000..bac0a3b53 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SoftMarginCriterion.c @@ -0,0 +1,44 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SoftMarginCriterion.c" +#else + +void THNN_(SoftMarginCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + THNN_CHECK_DIM_SIZE(output, 1, 0, 1); + + real sum; + + sum = 0; + TH_TENSOR_APPLY2(real, input, real, target, + real z = log(1. + exp(-*input_data* *target_data)); + sum += z;) + + if(sizeAverage) + sum /= THTensor_(nElement)(input); + + THTensor_(set1d)(output, 0, sum); +} + +void THNN_(SoftMarginCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage) +{ + THNN_CHECK_NELEMENT(input, target); + real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + real z = exp(-*target_data * *input_data); + *gradInput_data = -norm*(*target_data)*z/(1. + z);) +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SoftMax.c b/contrib/lua-torch/nn/lib/THNN/generic/SoftMax.c new file mode 100644 index 000000000..7b60d64c2 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SoftMax.c @@ -0,0 +1,150 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SoftMax.c" +#else + +void THNN_(SoftMax_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output) +{ + real *input_data, *output_data; + ptrdiff_t nframe = 0, dim = 0, stride = 0; + ptrdiff_t t; + + if (input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + stride = 1; + } + else if (input->nDimension == 2) + { + nframe = input->size[0]; + dim = input->size[1]; + stride = 1; + } + else if (input->nDimension == 3) + { + nframe = 1; + dim = input->size[0]; + stride = input->size[1]*input->size[2]; + } + else if (input->nDimension == 4) + { + nframe = input->size[0]; + dim = input->size[1]; + stride = input->size[2]*input->size[3]; + } + else + { + THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected"); + } + + input = THTensor_(newContiguous)(input); + THTensor_(resizeAs)(output, input); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(t) + for (t = 0; t < stride*nframe; t++) + { + real *input_ptr = input_data + (t/stride)*dim*stride + t % stride; + real *output_ptr = output_data + (t/stride)*dim*stride + t % stride; + + real inputMax = -THInf; + accreal sum; + + ptrdiff_t d; + for (d = 0; d < dim; d++) + { + if (input_ptr[d*stride] >= inputMax) inputMax = input_ptr[d*stride]; + } + + sum = 0; + for (d = 0; d < dim; d++) + { + real z = exp(input_ptr[d*stride] - inputMax); + output_ptr[d*stride] = z; + sum += z; + } + + for (d = 0; d < dim; d++) + { + output_ptr[d*stride] *= 1/sum; + } + } + + THTensor_(free)(input); +} + +void THNN_(SoftMax_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output) +{ + THNN_CHECK_SHAPE(input, gradOutput); + real *gradInput_data, *gradOutput_data, *output_data; + ptrdiff_t nframe = 0, dim = 0, stride = 0; + ptrdiff_t t; + + if (output->nDimension == 1) + { + nframe = 1; + dim = output->size[0]; + stride = 1; + } + else if (output->nDimension == 2) + { + nframe = output->size[0]; + dim = output->size[1]; + stride = 1; + } + else if (output->nDimension == 3) + { + nframe = 1; + dim = output->size[0]; + stride = output->size[1]*output->size[2]; + } + else if (output->nDimension == 4) + { + nframe = output->size[0]; + dim = output->size[1]; + stride = output->size[2]*output->size[3]; + } + else + { + THError("1D, 2D, 3D or 4D tensor expected"); + } + + gradOutput = THTensor_(newContiguous)(gradOutput); + output = THTensor_(newContiguous)(output); + + THTensor_(resizeAs)(gradInput, output); + gradInput_data = THTensor_(data)(gradInput); + output_data = THTensor_(data)(output); + gradOutput_data = THTensor_(data)(gradOutput); + +#pragma omp parallel for private(t) + for (t = 0; t < stride*nframe; t++) + { + real *gradInput_ptr = gradInput_data + (t/stride)*dim*stride + t % stride; + real *output_ptr = output_data + (t/stride)*dim*stride + t % stride; + real *gradOutput_ptr = gradOutput_data + (t/stride)*dim*stride + t % stride; + + ptrdiff_t d; + accreal sum = 0; + for (d = 0; d < dim; d++) + sum += (accreal)gradOutput_ptr[d*stride] * output_ptr[d*stride]; + + for (d = 0; d < dim; d++) + gradInput_ptr[d*stride] = output_ptr[d*stride] * (gradOutput_ptr[d*stride] - sum); + } + + THTensor_(free)(gradOutput); + THTensor_(free)(output); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SoftPlus.c b/contrib/lua-torch/nn/lib/THNN/generic/SoftPlus.c new file mode 100644 index 000000000..6491e66d6 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SoftPlus.c @@ -0,0 +1,47 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SoftPlus.c" +#else + +void THNN_(SoftPlus_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal beta_, + accreal threshold_) +{ + real beta = TH_CONVERT_ACCREAL_TO_REAL(beta_); + real threshold = TH_CONVERT_ACCREAL_TO_REAL(threshold_); + THTensor_(resizeAs)(output, input); + + // f(x) = 1/beta * log(1 + exp(beta * x)) + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = (*input_data * beta) > threshold ? *input_data : THLog1p(exp(*input_data * beta)) / beta; + ); +} + +void THNN_(SoftPlus_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output, + accreal beta_, + accreal threshold_) +{ + real beta = TH_CONVERT_ACCREAL_TO_REAL(beta_); + real threshold = TH_CONVERT_ACCREAL_TO_REAL(threshold_); + THNN_CHECK_NELEMENT(input, gradOutput); + THTensor_(resizeAs)(gradInput, output); + + // d/dx[log(1+exp(k*x))/k] = exp(kx) / (exp(kx) + 1) + // SINCE + // y = (1/k)*log(1+exp(k*x)) --> x = (1/k)*log(exp(k*y)-1) + // THEREFORE: + // d/dx(f(x)) = (exp(k*y) - 1) / exp(k*y) + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, + real z = exp(*output_data * beta); + *gradInput_data = (*output_data * beta) > threshold ? *gradOutput_data : *gradOutput_data * (z - 1.)/z; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SoftShrink.c b/contrib/lua-torch/nn/lib/THNN/generic/SoftShrink.c new file mode 100644 index 000000000..e77950868 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SoftShrink.c @@ -0,0 +1,42 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SoftShrink.c" +#else + +void THNN_(SoftShrink_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal lambda_) +{ + real lambda = TH_CONVERT_ACCREAL_TO_REAL(lambda_); + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, + if ((*input_data) > lambda) + *output_data = *input_data - lambda; + else if ((*input_data) < -lambda) + *output_data = *input_data + lambda; + else + *output_data = 0; + ); +} + +void THNN_(SoftShrink_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + accreal lambda_) +{ + real lambda = TH_CONVERT_ACCREAL_TO_REAL(lambda_); + THNN_CHECK_NELEMENT(input, gradOutput); + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + if ((*input_data) > lambda || (*input_data) < -lambda) + *gradInput_data = (*gradOutput_data); + else + *gradInput_data = 0; + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SparseLinear.c b/contrib/lua-torch/nn/lib/THNN/generic/SparseLinear.c new file mode 100644 index 000000000..1cf712212 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SparseLinear.c @@ -0,0 +1,564 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SparseLinear.c" +#else + +#ifdef _OPENMP +#include <omp.h> +#endif + +#define ROW_PTR2(t, r) (THTensor_(data)(t) + (r) * (t)->stride[0]) +#define COL_PTR2(t, c) (THTensor_(data)(t) + (c) * (t)->stride[1]) + +static bool THNN_(checkLegacyInput)(THTensor* t) +{ + return t->nDimension == 3 && t->size[2] == 2; +} + +static bool THNN_(checkInput)(THTensor* t) +{ + return t->nDimension == 2 && t->size[1] == 3; +} + +static bool THNN_(checkSize2D)(THTensor* t, long size0, long size1) +{ + return t->nDimension == 2 && t->size[0] == size0 && t->size[1] == size1; +} + +static bool THNN_(checkSize1D)(THTensor* t, long size0) +{ + return t->nDimension == 1 && t->size[0] == size0; +} + +static void THNN_(set1d)(THTensor *t, long x0, real value) { + THStorage_(set)(t->storage, t->storageOffset + x0*t->stride[0], value); +} +static real THNN_(get3d)(const THTensor *t, long x0, long x1, long x2) { + return THStorage_(get)(t->storage, t->storageOffset + + x0*t->stride[0] + x1*t->stride[1] + x2*t->stride[2]); +} +static real THNN_(get2d)(const THTensor *t, long x0, long x1) { + return THStorage_(get)(t->storage, t->storageOffset + + x0*t->stride[0] + x1*t->stride[1]); +} + +void THNN_(SparseLinear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias) +{ + long h, i, j, hp0, hp1; + long outDim = THTensor_(size)(weight, 0); + long inDim = THTensor_(size)(weight, 1); + long batchSize = THTensor_(size)(output, 0); + + THArgCheck(THNN_(checkInput)(input), 2, "input must be in coo format, nnz x 3"); + THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous"); + THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong"); + + long nnz = THTensor_(size)(input, 0); + + THLongTensor * csr = THLongTensor_newWithSize1d(batchSize+1); + THLongTensor_zero(csr); + + weight = THTensor_(newContiguous)(weight); + +//#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000) + for (i=0; i<nnz; i++) { + hp0 = (long)(THNN_(get2d)(input, i, 0)) - 1; + hp1 = (i+1 == nnz) ? + batchSize : + (long)(THNN_(get2d)(input, i+1, 0)) - 1; + if (hp0 != hp1) for (h = hp0; h < hp1; h++) { + THLongTensor_set1d(csr, h+1, i+1); + } + } + + + // output = weight * input + bias + THTensor_(zero)(output); +#pragma omp parallel for private(h, i) schedule(static) if (nnz > 10000) + for (h = 0; h < batchSize; h++) { + long i_start = THLongTensor_get1d(csr, h); + long i_end = THLongTensor_get1d(csr, h+1); + for (i = i_start; i < i_end; i++) { + real val = THNN_(get2d)(input, i, 2); + if (val == 0) { + continue; + } + + long offset = (long)(THNN_(get2d)(input, i, 1)) - 1; + if (offset >= 0 && offset < inDim) { + THBlas_(axpy)(outDim, + val, + COL_PTR2(weight, offset), weight->stride[0], + ROW_PTR2(output, h), output->stride[1]); + } else { + THError("index out of bound. updateOutput: %d not between 1 and %d", + offset + 1, inDim); + } + } + } + + THTensor* output_row = THTensor_(new)(); + for (h = 0; h < batchSize; h++) { + THTensor_(select)(output_row, output, 0, h); + THTensor_(cadd)(output_row, bias, 1.0, output_row); + } + THTensor_(free)(output_row); + THLongTensor_free(csr); + THTensor_(free)(weight); +} + +void THNN_(SparseLinear_legacyUpdateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias) +{ + long h, i; + long outDim = THTensor_(size)(weight, 0); + long inDim = THTensor_(size)(weight, 1); + + THArgCheck(THNN_(checkLegacyInput)(input), 2, "input size must be batchsize x nnz x 2"); + THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous"); + THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong"); + + weight = THTensor_(newContiguous)(weight); + + long batchSize = THTensor_(size)(input, 0); + long nnz = THTensor_(size)(input, 1); + THTensor_(resize2d)(output, batchSize, outDim); + + // output = weight * input + bias + THTensor_(zero)(output); +#pragma omp parallel for private(h, i) schedule(static) if ( \ + batchSize > 1 && batchSize * nnz * outDim > 10000) + for (h = 0; h < batchSize; h++) { + for (i = 0; i < nnz; i++) { + real val = THNN_(get3d)(input, h, i, 1); + if (val == 0) { + continue; + } + + long offset = (long)(THNN_(get3d)(input, h, i, 0)) - 1; + if (offset >= 0 && offset < inDim) { + THBlas_(axpy)(outDim, + val, + COL_PTR2(weight, offset), weight->stride[0], + ROW_PTR2(output, h), output->stride[1]); + } else { + THError("index out of bound. updateOutput: %d not between 1 and %d", + offset + 1, inDim); + } + } + } + + THTensor* output_row = THTensor_(new)(); + for (h = 0; h < batchSize; h++) { + THTensor_(select)(output_row, output, 0, h); + THTensor_(cadd)(output_row, bias, 1.0, output_row); + } + THTensor_(free)(output_row); + THTensor_(free)(weight); +} + +void THNN_(SparseLinear_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *weight, + THTensor *bias, + accreal weightDecay_, + accreal scale_) +{ + real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_); + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + long h, i, col, hp0, hp1; + long outDim = THTensor_(size)(weight, 0); + long inDim = THTensor_(size)(weight, 1); + + THArgCheck(THNN_(checkInput)(input), 2, + "input must be in coo format, nnz x 3"); + THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4, + "gradWeight size wrong"); + THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, + "gradBias size wrong"); + THArgCheck(THTensor_(isContiguous)(gradOutput), 1, + "gradOutput must be contiguous"); + + long nnz = THTensor_(size)(input, 0); + + THLongTensor* csc = THLongTensor_newWithSize1d(inDim+1); + THLongTensor_zero(csc); + weight = THTensor_(newContiguous)(weight); + +#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000) + for (i = 0; i < nnz; i++) { + hp0 = (long)(THNN_(get2d)(input, i, 1)) - 1; + hp1 = (i+1 == nnz) ? + inDim : + (long)(THNN_(get2d)(input, i+1, 1)) - 1; + if (hp0 != hp1) for (h = hp0; h < hp1; h++) { + THLongTensor_set1d(csc, h+1, i+1); + } + } + + // gradWeight += gradOutput * input +#pragma omp parallel for private(h, i, col) schedule(static) if (nnz > 10000) + for (col = 0; col < inDim; col++) { + long i_start = THLongTensor_get1d(csc, col); + long i_end = THLongTensor_get1d(csc, col+1); + for (i = i_start; i < i_end; i++) { + real val = scale * THNN_(get2d)(input, i, 2); + + h = (long)(THNN_(get2d)(input, i, 0)) - 1; + long offset = (long)(THNN_(get2d)(input, i, 1)) - 1; + if (offset >= 0 && offset < inDim) { + THBlas_(axpy)(outDim, + val, + ROW_PTR2(gradOutput, h), gradOutput->stride[1], + COL_PTR2(gradWeight, offset), gradWeight->stride[0]); + } else { + THError( + "index out of bound. accGradParameters: %d not between 1 and %d", + offset + 1, + inDim); + } + } + } + + // gradBias += gradOutput + THTensor* buf = THTensor_(new)(); + THTensor_(sum)(buf, gradOutput, 0, 1); + THTensor_(cadd)(gradBias, gradBias, scale, buf); + THTensor_(free)(buf); + THLongTensor_free(csc); + + if (weightDecay != 0) { + THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight); + } + THTensor_(free)(weight); +} + +void THNN_(SparseLinear_legacyAccGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *weight, + THTensor *bias, + accreal weightDecay_, + accreal scale_) +{ + real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_); + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + long h, i; + long outDim = THTensor_(size)(weight, 0); + long inDim = THTensor_(size)(weight, 1); + + THArgCheck(THNN_(checkLegacyInput)(input), 2, + "input size must be batchsize x nnz x 2"); + THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4, + "gradWeight size wrong"); + THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, + "gradBias size wrong"); + THArgCheck(THTensor_(isContiguous)(gradOutput), 1, + "gradOutput must be contiguous"); + + long batchSize = THTensor_(size)(input, 0); + long nnz = THTensor_(size)(input, 1); + THTensor_(resize2d)(gradOutput, batchSize, outDim); + + // gradWeight += gradOutput * input +#pragma omp parallel for private(h, i) schedule(static) if (\ + batchSize * nnz * outDim > 10000) + for (i = 0; i < nnz; i++) { + for (h = 0; h < batchSize; h++) { + real val = scale * THNN_(get3d)(input, h, i, 1); + if (val == 0) { + continue; + } + + long offset = (long)(THNN_(get3d)(input, h, i, 0)) - 1; + if (offset >= 0 && offset < inDim) { + THBlas_(axpy)(outDim, + val, + ROW_PTR2(gradOutput, h), gradOutput->stride[1], + COL_PTR2(gradWeight, offset), gradWeight->stride[0]); + } else { + THError( + "index out of bound. accGradParameters: %d not between 1 and %d", + offset + 1, + inDim); + } + } + } + + // gradBias += gradOutput + THTensor* gradOutput_row = THTensor_(new)(); + for (h = 0; h < batchSize; h++) { + THTensor_(select)(gradOutput_row, gradOutput, 0, h); + THTensor_(cadd)(gradBias, gradBias, scale, gradOutput_row); + } + THTensor_(free)(gradOutput_row); + + if (weightDecay != 0) { + THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight); + } +} + +void THNN_(SparseLinear_updateParameters)( + THNNState *state, + THTensor *weight, + THTensor *bias, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *lastInput, + accreal learningRate_) +{ + real learningRate = TH_CONVERT_ACCREAL_TO_REAL(learningRate_); + long h, i; + long outDim = weight->size[0]; + long inDim = weight->size[1]; + + THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4, + "gradWeight size wrong"); + THArgCheck(THNN_(checkSize1D)(bias, outDim), 3, "bias size wrong"); + THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong"); + THArgCheck(THNN_(checkInput)(lastInput), 6, + "input must be in coo format, nnz x 3"); + + + long nnz = THTensor_(size)(lastInput, 0); + + // collect unique offsets of non-0 val in input + THTensor* offsets = THTensor_(newWithSize1d)(nnz); + long cnt = 0; + for (i = 0; i < nnz; i++) { + real val = THNN_(get2d)(lastInput, i, 2); + if (val == 0) { + continue; + } + long offset = (long)(THNN_(get2d)(lastInput, i, 1)) - 1; + if (offset >= 0 && offset < inDim) { + THNN_(set1d)(offsets, cnt++, offset); + } else { + THError( + "index out of bound. updateParameters: %d not between 1 and %d", + offset + 1, + inDim); + } + } + if (cnt == 0) return; + THTensor_(resize1d)(offsets, cnt); + + THTensor* uniqueOffsets = THTensor_(new)(); + THLongTensor* ri = THLongTensor_new(); + THTensor_(sort)(uniqueOffsets, ri, offsets, 0, 0); + THLongTensor_free(ri); + THTensor_(free)(offsets); + + cnt = 1; + real* uniqueOffsets_p = THTensor_(data)(uniqueOffsets); + for (i = 1; i < THTensor_(size)(uniqueOffsets, 0); i++) { + if (uniqueOffsets_p[i] != uniqueOffsets_p[i - 1]) { + uniqueOffsets_p[cnt++] = uniqueOffsets_p[i]; + } + } + THTensor_(resize1d)(uniqueOffsets, cnt); + + // weight += -learningRate * gradWeight + THTensor_(cadd)(bias, bias, -learningRate, gradBias); +#pragma omp parallel for private(i) schedule(static) if (cnt * outDim > 10000) + for (i = 0; i < cnt; i++) { + long offset = (long)uniqueOffsets_p[i]; + THBlas_(axpy)(outDim, + -learningRate, + COL_PTR2(gradWeight, offset), gradWeight->stride[0], + COL_PTR2(weight, offset), weight->stride[0]); + } + + THTensor_(free)(uniqueOffsets); +} + +void THNN_(SparseLinear_legacyUpdateParameters)( + THNNState *state, + THTensor *weight, + THTensor *bias, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *lastInput, + accreal learningRate_) +{ + real learningRate = TH_CONVERT_ACCREAL_TO_REAL(learningRate_); + long h, i; + long outDim = weight->size[0]; + long inDim = weight->size[1]; + + THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4, + "gradWeight size wrong"); + THArgCheck(THNN_(checkSize1D)(bias, outDim), 3, "bias size wrong"); + THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong"); + THArgCheck(THNN_(checkLegacyInput)(lastInput), 6, + "input size must be batchsize x nnz x 2"); + + + long batchSize = THTensor_(size)(lastInput, 0); + long nnz = THTensor_(size)(lastInput, 1); + + // collect unique offsets of non-0 val in input + THTensor* offsets = THTensor_(newWithSize1d)(batchSize * nnz); + long cnt = 0; + for (h = 0; h < batchSize; h++) { + for (i = 0; i < nnz; i++) { + real val = THNN_(get3d)(lastInput, h, i, 1); + if (val == 0 ) { + continue; + } + long offset = (long)(THNN_(get3d)(lastInput, h, i, 0)) - 1; + if (offset >= 0 && offset < inDim) { + THNN_(set1d)(offsets, cnt++, offset); + } else { + THError( + "index out of bound. updateParameters: %d not between 1 and %d", + offset + 1, + inDim); + } + } + } + THTensor_(resize1d)(offsets, cnt); + + THTensor* uniqueOffsets = THTensor_(new)(); + THLongTensor* ri = THLongTensor_new(); + THTensor_(sort)(uniqueOffsets, ri, offsets, 0, 0); + THLongTensor_free(ri); + THTensor_(free)(offsets); + + cnt = 1; + real* uniqueOffsets_p = THTensor_(data)(uniqueOffsets); + for (i = 1; i < THTensor_(size)(uniqueOffsets, 0); i++) { + if (uniqueOffsets_p[i] != uniqueOffsets_p[i - 1]) { + uniqueOffsets_p[cnt++] = uniqueOffsets_p[i]; + } + } + THTensor_(resize1d)(uniqueOffsets, cnt); + + // weight += -learningRate * gradWeight + THTensor_(cadd)(bias, bias, -learningRate, gradBias); +#pragma omp parallel for private(i) schedule(static) if (cnt * outDim > 10000) + for (i = 0; i < cnt; i++) { + long offset = (long)uniqueOffsets_p[i]; + THBlas_(axpy)(outDim, + -learningRate, + COL_PTR2(gradWeight, offset), gradWeight->stride[0], + COL_PTR2(weight, offset), weight->stride[0]); + } + + THTensor_(free)(uniqueOffsets); +} + +void THNN_(SparseLinear_zeroGradParameters)( + THNNState *state, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *lastInput) +{ + long h, i, j; + + long outDim = gradWeight->size[0]; + long inDim = gradWeight->size[1]; + + THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong"); + THArgCheck(THNN_(checkInput)(lastInput), 4, + "input must be in coo format, nnz x 3"); + + THTensor_(zero)(gradBias); + + long nnz = THTensor_(size)(lastInput, 0); + +#pragma omp parallel for private(i, j) schedule(static) if ( \ + nnz * outDim > 10000) + for (i = 0; i < nnz; i++) { + if (THNN_(get2d)(lastInput, i, 2) == 0 ) { + continue; + } + + long offset = (long)(THNN_(get2d)(lastInput, i, 1)) - 1; + if (offset >= 0 && offset < inDim) { + real* pGradWeight = COL_PTR2(gradWeight, offset); + if (gradWeight->stride[0] == 1) { + THVector_(fill)(pGradWeight, 0, outDim); + } else { + long stride = gradWeight->stride[0]; + for (j = 0; j < outDim; ++j) { + pGradWeight[j * stride] = 0; + } + } + } else { + THError( + "index out of bound. zeroGradParameters: %d not between 1 and %d", + offset + 1, + inDim); + } + } +} + +void THNN_(SparseLinear_legacyZeroGradParameters)( + THNNState *state, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *lastInput) +{ + long h, i, j; + + long outDim = gradWeight->size[0]; + long inDim = gradWeight->size[1]; + + THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong"); + THArgCheck(THNN_(checkLegacyInput)(lastInput), 4, + "input size must be batchsize x nnz x 2"); + + THTensor_(zero)(gradBias); + + long batchSize = THTensor_(size)(lastInput, 0); + long nnz = THTensor_(size)(lastInput, 1); + +#pragma omp parallel for private(h, i, j) schedule(static) if ( \ + batchSize > 1 && batchSize * nnz * outDim > 10000) + for (h = 0; h < batchSize; h++) { + for (i = 0; i < nnz; i++) { + if (THNN_(get3d)(lastInput, h, i, 1) == 0 ) { + continue; + } + + long offset = (long)(THNN_(get3d)(lastInput, h, i, 0)) - 1; + if (offset >= 0 && offset < inDim) { + real* pGradWeight = COL_PTR2(gradWeight, offset); + if (gradWeight->stride[0] == 1) { + THVector_(fill)(pGradWeight, 0, outDim); + } else { + long stride = gradWeight->stride[0]; + for (j = 0; j < outDim; ++j) { + pGradWeight[j * stride] = 0; + } + } + } else { + THError( + "index out of bound. zeroGradParameters: %d not between 1 and %d", + offset + 1, + inDim); + } + } + } +} + +#undef ROW_PTR2 +#undef COL_PTR2 + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveAveragePooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveAveragePooling.c new file mode 100644 index 000000000..3675b42d7 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveAveragePooling.c @@ -0,0 +1,258 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialAdaptiveAveragePooling.c" +#else + +#define START_IND(a,b,c) (int)floor((float)(a * c) / b) +#define END_IND(a,b,c) (int)ceil((float)((a + 1) * c) / b) +// #define START_IND(a,b,c) a * c / b +// #define END_IND(a,b,c) (a + 1) * c / b + ((a + 1) * c % b > 0)?1:0 + +static void THNN_(SpatialAdaptiveAveragePooling_updateOutput_frame)( + real *input_p, + real *output_p, + long nslices, + long iwidth, + long iheight, + long owidth, + long oheight, + long stridew, + long strideh, + long strided) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + /* loop over output */ + long i, j; + for(i = 0; i < oheight; i++) + { + int y_start = START_IND(i, oheight, iheight); + int y_end = END_IND(i, oheight, iheight); + int kH = y_end-y_start; + + for(j = 0; j < owidth; j++) + { + + int x_start = START_IND(j, owidth, iwidth); + int x_end = END_IND(j, owidth, iwidth); + int kW = x_end-x_start; + + /* local pointers */ + real *ip = input_p + k*strided + y_start*strideh + x_start*stridew; + real *op = output_p + k*owidth*oheight + i*owidth + j; + + /* compute local average: */ + real sum = 0; + int x,y; + for(y = 0; y < kH; y++) + { + for(x = 0; x < kW; x++) + { + real val = *(ip + y*strideh + x*stridew); + sum += val; + } + } + + /* set output to local average */ + *op = sum / kW / kH; + } + } + } +} + +void THNN_(SpatialAdaptiveAveragePooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int owidth, + int oheight) +{ + int dimw = 2; + int dimh = 1; + long nbatch = 1; + long nslices; + long iheight; + long iwidth; + + long istride_d; + long istride_h; + long istride_w; + long istride_b; + + real *input_data; + real *output_data; + + + THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, + "3D or 4D (batch mode) tensor expected for input, but got: %s"); + + if (input->nDimension == 4) + { + istride_b = input->stride[0]; + nbatch = input->size[0]; + dimw++; + dimh++; + } + + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + /* strides */ + istride_d = input->stride[dimh-1]; + istride_h = input->stride[dimh]; + istride_w = input->stride[dimw]; + + /* resize output */ + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + + THNN_(SpatialAdaptiveAveragePooling_updateOutput_frame)(input_data, output_data, + nslices, + iwidth, iheight, + owidth, oheight, + istride_w,istride_h, + istride_d); + } + else + { + long p; + + THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialAdaptiveAveragePooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight, + nslices, + iwidth, iheight, + owidth, oheight, + istride_w,istride_h, + istride_d); + } + } +} + +static void THNN_(SpatialAdaptiveAveragePooling_updateGradInput_frame)( + real *gradInput_p, + real *gradOutput_p, + long nslices, + long iwidth, + long iheight, + long owidth, + long oheight) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + real *gradInput_p_k = gradInput_p + k*iwidth*iheight; + real *gradOutput_p_k = gradOutput_p + k*owidth*oheight; + + /* calculate average */ + long i, j; + for(i = 0; i < oheight; i++) + { + int y_start = START_IND(i, oheight, iheight); + int y_end = END_IND(i, oheight, iheight); + int kH = y_end-y_start; + + for(j = 0; j < owidth; j++) + { + + int x_start = START_IND(j, owidth, iwidth); + int x_end = END_IND(j, owidth, iwidth); + int kW = x_end-x_start; + + int x,y; + for(y = y_start; y < y_end; y++) + { + for(x = x_start; x < x_end; x++) + { + /* update gradient */ + gradInput_p_k[y*iwidth + x] += gradOutput_p_k[i*owidth + j] / kW / kH; + } + } + } + } + } +} + +void THNN_(SpatialAdaptiveAveragePooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput) +{ + int dimw = 2; + int dimh = 1; + long nbatch = 1; + int nslices; + int iheight; + int iwidth; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 4) { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = gradOutput->size[dimh]; + owidth = gradOutput->size[dimw]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + + /* backprop */ + if (input->nDimension == 3) + { + THNN_(SpatialAdaptiveAveragePooling_updateGradInput_frame)(gradInput_data, gradOutput_data, + nslices, + iwidth, iheight, + owidth, oheight); + } + else + { + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialAdaptiveAveragePooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, + nslices, + iwidth, iheight, + owidth, oheight); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif + +#undef START_IND +#undef END_IND
\ No newline at end of file diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveMaxPooling.c new file mode 100644 index 000000000..fff716e67 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveMaxPooling.c @@ -0,0 +1,274 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialAdaptiveMaxPooling.c" +#else + +static void THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)( + real *input_p, + real *output_p, + THIndex_t *indx_p, + THIndex_t *indy_p, + long nslices, + long iwidth, + long iheight, + long owidth, + long oheight, + long stridew, + long strideh, + long strided) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + /* loop over output */ + long i, j; + for(i = 0; i < oheight; i++) + { + int y_start = (int)floor((float)i / oheight * iheight); + int y_end = (int)ceil((float)(i + 1) / oheight * iheight); + int kH = y_end-y_start; + + for(j = 0; j < owidth; j++) + { + + int x_start = (int)floor((float)j / owidth * iwidth); + int x_end = (int)ceil((float)(j + 1) / owidth * iwidth); + int kW = x_end-x_start; + + /* local pointers */ + real *ip = input_p + k*strided + y_start*strideh + x_start*stridew; + real *op = output_p + k*owidth*oheight + i*owidth + j; + THIndex_t *indyp = indy_p + k*owidth*oheight + i*owidth + j; + THIndex_t *indxp = indx_p + k*owidth*oheight + i*owidth + j; + + /* compute local max: */ + long maxindex = -1; + real maxval = -FLT_MAX; + long tcntr = 0; + int x,y; + for(y = 0; y < kH; y++) + { + for(x = 0; x < kW; x++) + { + real val = *(ip + y*strideh + x*stridew); + if (val > maxval) + { + maxval = val; + maxindex = tcntr; + } + tcntr++; + } + } + + /* set output to local max */ + *op = maxval; + + /* store location of max (x,y) */ + *indyp = (maxindex / kW) + TH_INDEX_BASE; + *indxp = (maxindex % kW) + TH_INDEX_BASE; + } + } + } +} + +void THNN_(SpatialAdaptiveMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int owidth, + int oheight) +{ + int dimw = 2; + int dimh = 1; + long nbatch = 1; + long nslices; + long iheight; + long iwidth; + + long istride_d; + long istride_h; + long istride_w; + long istride_b; + + real *input_data; + real *output_data; + THIndex_t *indices_data; + + + THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, + "3D or 4D (batch mode) tensor expected for input, but got: %s"); + + if (input->nDimension == 4) + { + istride_b = input->stride[0]; + nbatch = input->size[0]; + dimw++; + dimh++; + } + + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + /* strides */ + istride_d = input->stride[dimh-1]; + istride_h = input->stride[dimh]; + istride_w = input->stride[dimw]; + + /* resize output */ + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nslices, oheight, owidth); + /* indices will contain i,j locations for each output point */ + THIndexTensor_(resize4d)(indices, 2, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data, + indices_data+nslices*owidth*oheight, indices_data, + nslices, + iwidth, iheight, + owidth, oheight, + istride_w,istride_h, + istride_d); + } + else + { + long p; + + THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); + /* indices will contain i,j locations for each output point */ + THIndexTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight, + indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, + nslices, + iwidth, iheight, + owidth, oheight, + istride_w,istride_h, + istride_d); + } + } +} + +static void THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)( + real *gradInput_p, + real *gradOutput_p, + THIndex_t *indx_p, + THIndex_t *indy_p, + long nslices, + long iwidth, + long iheight, + long owidth, + long oheight) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + real *gradInput_p_k = gradInput_p + k*iwidth*iheight; + real *gradOutput_p_k = gradOutput_p + k*owidth*oheight; + THIndex_t *indx_p_k = indx_p + k*owidth*oheight; + THIndex_t *indy_p_k = indy_p + k*owidth*oheight; + + /* calculate max points */ + long i, j; + for(i = 0; i < oheight; i++) + { + int y_start = (int)floor((float) i / oheight * iheight); + for(j = 0; j < owidth; j++) + { + int x_start = (int)floor((float) j / owidth * iwidth); + /* retrieve position of max */ + long maxi = indy_p_k[i*owidth + j] - TH_INDEX_BASE + y_start; + long maxj = indx_p_k[i*owidth + j] - TH_INDEX_BASE + x_start; + + /* update gradient */ + gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j]; + } + } + } +} + +void THNN_(SpatialAdaptiveMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices) +{ + int dimw = 2; + int dimh = 1; + long nbatch = 1; + int nslices; + int iheight; + int iwidth; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + THIndex_t *indices_data; + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 4) { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = gradOutput->size[dimh]; + owidth = gradOutput->size[dimw]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THIndexTensor_(data)(indices); + + /* backprop */ + if (input->nDimension == 3) + { + THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, + indices_data+nslices*owidth*oheight, indices_data, + nslices, + iwidth, iheight, + owidth, oheight); + } + else + { + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, + indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, + nslices, + iwidth, iheight, + owidth, oheight); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialAveragePooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAveragePooling.c new file mode 100644 index 000000000..c063502e7 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAveragePooling.c @@ -0,0 +1,329 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialAveragePooling.c" +#else + +static inline void THNN_(SpatialAveragePooling_shapeCheck)( + THTensor *input, THTensor *gradOutput, + int kH, int kW, int dH, int dW, int padH, int padW, + bool ceil_mode) { + + THArgCheck(kW > 0 && kH > 0, 5, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + THArgCheck(dW > 0 && dH > 0, 8, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + + int ndim = input->nDimension; + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + + THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, + "pad should be smaller than half of kernel size, but got " + "padW = %d, padH = %d, kW = %d, kH = %d", + padW, padH, kW, kH); + + long nInputPlane = input->size[dimh-1]; + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long outputHeight, outputWidth; + long nOutputPlane = nInputPlane; + + if(ceil_mode) + { + outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1; + outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1; + } + else + { + outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1; + outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1; + } + + if (padW || padH) + { + // ensure that the last pooling starts inside the image + // needed to avoid problems in ceil mode + if ((outputHeight - 1)*dH >= inputHeight + padH) + --outputHeight; + if ((outputWidth - 1)*dW >= inputWidth + padW) + --outputWidth; + } + + if (outputWidth < 1 || outputHeight < 1) + THError("Given input size: (%dx%dx%d). " + "Calculated output size: (%dx%dx%d). Output size is too small", + nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } +} + +void THNN_(SpatialAveragePooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + bool ceil_mode, + bool count_include_pad) +{ + real *output_data; + real *input_data; + + int dimw = 2; + int dimh = 1; + int dimc = 0; + long nbatch = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + long nInputPlane; // number of channels (or colors) + + long k; + + THNN_(SpatialAveragePooling_shapeCheck) + (input, NULL, kH, kW, dH, dW, padH, padW, ceil_mode); + + if (input->nDimension == 4) { + nbatch = input->size[0]; + dimw++; + dimh++; + dimc++; + } + + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + nInputPlane = input->size[dimc]; + + if(ceil_mode) + { + outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1; + outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1; + } + else + { + outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1; + outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1; + } + if (padW || padH) + { + // ensure that the last pooling starts inside the image + // needed to avoid problems in ceil mode + if ((outputHeight - 1)*dH >= inputHeight + padH) + --outputHeight; + if ((outputWidth - 1)*dW >= inputWidth + padW) + --outputWidth; + } + + if (input->nDimension == 3) + THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); + else + THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth); + + input = THTensor_(newContiguous)(input); + THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous"); + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(k) + for(k = 0; k < nInputPlane; k++) + { + long p; + for(p = 0; p < nbatch; p++) + { + long xx, yy; + /* For all output pixels... */ + real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight; + real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight; + long i; + for(i = 0; i < outputWidth*outputHeight; i++) + ptr_output[i] = 0; + + for(yy = 0; yy < outputHeight; yy++) + { + for(xx = 0; xx < outputWidth; xx++) + { + /* Compute the mean of the input image... */ + long hstart = yy * dH - padH; + long wstart = xx * dW - padW; + long hend = fminf(hstart + kH, inputHeight + padH); + long wend = fminf(wstart + kW, inputWidth + padW); + int pool_size = (hend - hstart) * (wend - wstart); + hstart = fmaxf(hstart, 0); + wstart = fmaxf(wstart, 0); + hend = fminf(hend, inputHeight); + wend = fminf(wend, inputWidth); + + real sum = 0; + + int divide_factor; + if(count_include_pad) + divide_factor = pool_size; + else + divide_factor = (hend - hstart) * (wend - wstart); + + long kx, ky; + + for(ky = hstart; ky < hend; ky++) + { + for(kx = wstart; kx < wend; kx++) + sum += ptr_input[ky*inputWidth + kx]; + } + /* Update output */ + *ptr_output++ += sum/divide_factor; + } + } + } + } + THTensor_(free)(input); +} + +void THNN_(SpatialAveragePooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + bool ceil_mode, + bool count_include_pad) +{ + int dimw = 2; + int dimh = 1; + int dimc = 0; + long nbatch = 1; + long ndim = 3; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + long nInputPlane; // number of channels (or colors) + + real *gradOutput_data; + real *input_data, *gradInput_data; + + long k; + + THNN_(SpatialAveragePooling_shapeCheck) + (input, gradOutput, kH, kW, dH, dW, padH, padW, ceil_mode); + + + if (input->nDimension == 4) { + nbatch = input->size[0]; + dimw++; + dimh++; + dimc++; + ndim = 4; + } + + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + nInputPlane = input->size[dimc]; + + if(ceil_mode) + { + outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1; + outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1; + } + else + { + outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1; + outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1; + } + if (padW || padH) + { + // ensure that the last pooling starts inside the image + // needed to avoid problems in ceil mode + if ((outputHeight - 1)*dH >= inputHeight + padH) + --outputHeight; + if ((outputWidth - 1)*dW >= inputWidth + padW) + --outputWidth; + } + + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + + THTensor_(resizeAs)(gradInput, input); + + gradOutput = THTensor_(newContiguous)(gradOutput); + THArgCheck(THTensor_(isContiguous)(gradInput), 4, "gradInput must be contiguous"); + + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + +#pragma omp parallel for private(k) + for(k = 0; k < nInputPlane; k++) + { + long p; + for(p = 0; p < nbatch; p++) + { + real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; + long xx, yy; + + real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight; + real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight; + + long i; + for(i=0; i<inputWidth*inputHeight; i++) + ptr_gi[i] = 0.0; + + for(yy = 0; yy < outputHeight; yy++) + { + for(xx = 0; xx < outputWidth; xx++) + { + long hstart = yy * dH - padH; + long wstart = xx * dW - padW; + long hend = fminf(hstart + kH, inputHeight + padH); + long wend = fminf(wstart + kW, inputWidth + padW); + int pool_size = (hend - hstart) * (wend - wstart); + hstart = fmaxf(hstart, 0); + wstart = fmaxf(wstart, 0); + hend = fminf(hend, inputHeight); + wend = fminf(wend, inputWidth); + + real z = *ptr_gradOutput++; + + int divide_factor; + if(count_include_pad) + divide_factor = pool_size; + else + divide_factor = (hend - hstart) * (wend - wstart); + + long kx, ky; + for(ky = hstart ; ky < hend; ky++) + { + for(kx = wstart; kx < wend; kx++) + ptr_gradInput[ky*inputWidth + kx] += z/divide_factor; + } + } + } + } + } + + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialClassNLLCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialClassNLLCriterion.c new file mode 100644 index 000000000..d711c8590 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialClassNLLCriterion.c @@ -0,0 +1,131 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialClassNLLCriterion.c" +#else + +#define INITIAL_CHECK \ + THArgCheck(THIndexTensor_(nDimension)(target) == 3, 3, \ + "only batches of spatial targets supported (3D tensors)" \ + " but got targets of dimension: %d", \ + THIndexTensor_(nDimension)(target)); \ + THArgCheck(THTensor_(nDimension)(input) == 4, 2, \ + "only batches of spatial inputs supported (4D tensors), " \ + "but got input of dimension: %d", THTensor_(nDimension)(input)); \ + if (weights && THTensor_(nElement)(weights) != THTensor_(size)(input, 1)) { \ + THError("weight tensor should be defined either for all or no classes"); \ + } \ + \ + { \ + long input0 = THTensor_(size)(input, 0); \ + long input1 = THTensor_(size)(input, 1); \ + long input2 = THTensor_(size)(input, 2); \ + long input3 = THTensor_(size)(input, 3); \ + long target0 = THIndexTensor_(size)(target, 0); \ + long target1 = THIndexTensor_(size)(target, 1); \ + long target2 = THIndexTensor_(size)(target, 2); \ + THAssertMsg(input0 == target0 && input2 == target1 && input3 == target2, \ + "size mismatch (got input: %ldx%ldx%ldx%ld, target: %ldx%ldx%ld)", \ + input0, input1, input2, input3, target0, target1, target2); \ + } + +void THNN_(SpatialClassNLLCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *output, + bool sizeAverage, + THTensor *weights, + THTensor *total_weight) +{ + INITIAL_CHECK; + + input = THTensor_(newContiguous)(input); + target = THIndexTensor_(newContiguous)(target); + weights = weights ? THTensor_(newContiguous)(weights) : NULL; + + real *input_data = THTensor_(data)(input); + THIndex_t *target_data = THIndexTensor_(data)(target); + real *weights_data = weights ? THTensor_(data)(weights) : NULL; + real *output_data = THTensor_(data)(output); + real *total_weight_data = THTensor_(data)(total_weight); + + long batch_size = THTensor_(size)(input, 0); + long n_classes = THTensor_(size)(input, 1); + long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3); + long sample_size = map_size * n_classes; + + real total_weight_acc = 0; + real output_acc = 0; + for (int b = 0; b < batch_size; b++) { + for (int elem = 0; elem < map_size; elem++) { + int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE; + THAssert(cur_target >= 0 && cur_target < n_classes); + + real cur_weight = weights ? weights_data[cur_target] : 1.0f; + total_weight_acc += cur_weight; + output_acc -= input_data[b * sample_size + cur_target * map_size + elem] * cur_weight; + } + } + *total_weight_data = total_weight_acc; + *output_data = output_acc; + + if (sizeAverage && *total_weight_data) + *output_data /= *total_weight_data; + + THTensor_(free)(input); + THIndexTensor_(free)(target); + if (weights) + THTensor_(free)(weights); +} + +void THNN_(SpatialClassNLLCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *gradInput, + bool sizeAverage, + THTensor *weights, + THTensor *total_weight) +{ + INITIAL_CHECK; + THArgCheck(THTensor_(isContiguous)(gradInput), 4, + "gradInput must be contiguous"); + + real *total_weight_data = THTensor_(data)(total_weight); + if (*total_weight_data <= 0) + return; + + target = THIndexTensor_(newContiguous)(target); + weights = weights ? THTensor_(newContiguous)(weights) : NULL; + + THIndex_t *target_data = THIndexTensor_(data)(target); + real *weights_data = weights ? THTensor_(data)(weights) : NULL; + real *gradInput_data = THTensor_(data)(gradInput); + + long batch_size = THTensor_(size)(input, 0); + long n_classes = THTensor_(size)(input, 1); + long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3); + long sample_size = map_size * n_classes; + + real normalize = sizeAverage ? *total_weight_data : 1.0f; + + int b; + #pragma omp parallel for + for (b = 0; b < batch_size; b++) { + int elem; + for (elem = 0; elem < map_size; elem++) { + int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE; + THAssert(cur_target >= 0 && cur_target < n_classes); + + gradInput_data[b * sample_size + cur_target * map_size + elem] = + -(weights ? weights_data[cur_target] : 1.0f) / normalize; + } + } + + THIndexTensor_(free)(target); + if (weights) + THTensor_(free)(weights); +} + +#undef INITIAL_CHECK + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionLocal.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionLocal.c new file mode 100644 index 000000000..6db5a5db9 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionLocal.c @@ -0,0 +1,367 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialConvolutionLocal.c" +#else + +static inline void THNN_(SpatialConvolutionLocal_shapeCheck)( + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kH, int kW, int dH, + int dW, int padH, int padW, + long inputHeight, long inputWidth, + long outputHeight, long outputWidth) { + + THArgCheck(kW > 0 && kH > 0, 9, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + THArgCheck(dW > 0 && dH > 0, 11, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + + int ndim = input->nDimension; + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + + long nInputPlane = weight->size[2] / (kH * kW); + long nOutputPlane = weight->size[1]; + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 3, 0, nOutputPlane); + THNN_CHECK_DIM_SIZE(bias, 3, 1, outputHeight); + THNN_CHECK_DIM_SIZE(bias, 3, 2, outputWidth); + } + + THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } +} + +static THTensor* THNN_(view_weight_local)(THTensor *_weight) +{ + THTensor *weight = THTensor_(newContiguous)(_weight); + THArgCheck(weight->nDimension == 3 || weight->nDimension == 6, 4, + "weight tensor should be 3D or 6D - got %dD", weight->nDimension); + if (weight->nDimension == 6) { + long s1 = weight->size[0] * weight->size[1]; + long s2 = weight->size[2]; + long s3 = weight->size[3] * weight->size[4] * weight->size[5]; + THTensor *old_weight = weight; + weight = THTensor_(newWithStorage3d)(weight->storage, + weight->storageOffset, + s1, -1, s2, -1, s3, -1); + THTensor_(free)(old_weight); + } + return weight; +} + +static void THNN_(SpatialConvolutionLocal_updateOutput_frame) + ( + THTensor *input, THTensor *output, + THTensor *weight, THTensor *bias, THTensor *finput, + int kW, int kH, int dW, int dH, int padW, int padH, + long nInputPlane, long inputWidth, long inputHeight, + long nOutputPlane, long outputWidth, long outputHeight) +{ + long i; + THTensor *output3d, *finput3d; + + THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + outputWidth, outputHeight); + + THTensor_(copy)(output, bias); + + output3d = THTensor_(newWithStorage3d) + (output->storage, output->storageOffset, + outputHeight * outputWidth, 1, + nOutputPlane, outputHeight * outputWidth, + 1, nOutputPlane * outputHeight * outputWidth); + + finput3d = THTensor_(newWithStorage3d) + (finput->storage, finput->storageOffset, + outputHeight * outputWidth, 1, + kW * kH * nInputPlane, outputHeight * outputWidth, + 1, kW * kH * nInputPlane * outputHeight * outputWidth); + + // weight: oH*oW x nOutputPlane x nInputPlane*kH*kW + // finput3d: oH*oW x nInputPlane*kH*kW x 1 + THTensor_(baddbmm)(output3d, 1.0, output3d, 1.0, weight, finput3d); + // output3d: oH*oW x nOutputPlane x 1 + + THTensor_(free)(output3d); + THTensor_(free)(finput3d); +} + +void THNN_(SpatialConvolutionLocal_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + long inputWidth, long inputHeight, + long outputWidth, long outputHeight) +{ + weight = THNN_(view_weight_local)(weight); + + THNN_(SpatialConvolutionLocal_shapeCheck) + (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW, + inputHeight, inputWidth, outputHeight, outputWidth); + + input = THTensor_(newContiguous)(input); + + long nInputPlane = THTensor_(size)(weight, 2)/ (kW * kH); + long nOutputPlane = THTensor_(size)(weight, 1); + + if(input->nDimension == 3) + { + THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth); + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); + + THNN_(SpatialConvolutionLocal_updateOutput_frame) + (input, output, weight, bias, finput, + kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + } + else + { + long T = input->size[0]; + long t; + + THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth); + THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth); + +#pragma omp parallel for private(t) + for(t = 0; t < T; t++) + { + THTensor *input_t = THTensor_(newSelect)(input, 0, t); + THTensor *output_t = THTensor_(newSelect)(output, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(SpatialConvolutionLocal_updateOutput_frame) + (input_t, output_t, weight, bias, finput_t, + kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + + THTensor_(free)(input_t); + THTensor_(free)(output_t); + THTensor_(free)(finput_t); + } + } + + THTensor_(free)(input); + THTensor_(free)(weight); +} + + +static void THNN_(SpatialConvolutionLocal_updateGradInput_frame) + (THTensor *gradInput, THTensor *gradOutput, + THTensor *weight, THTensor *fgradInput, + int kW, int kH, int dW, int dH, int padW, int padH, + long nInputPlane, long inputWidth, long inputHeight, + long nOutputPlane, long outputWidth, long outputHeight) +{ + THTensor *gradOutput3d, *fgradInput3d; + gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset, + outputHeight*outputWidth, 1, + nOutputPlane, outputHeight*outputWidth, + 1, nOutputPlane*outputHeight*outputWidth); + fgradInput3d = THTensor_(newWithStorage3d)(fgradInput->storage, fgradInput->storageOffset, + outputHeight*outputWidth, 1, + kW*kH*nInputPlane, outputHeight*outputWidth, + 1, kW*kH*nInputPlane*outputHeight*outputWidth); + // weight: oH*oW x nInputPlane*kH*kW x nOutputPlane + // gradOutput3d: oH*oW x nOutputPlane x 1 + THTensor_(baddbmm)(fgradInput3d, 0.0, fgradInput3d, 1.0, weight, gradOutput3d); + // fgradInput3d: oH*oW x nInputPlane*kH*kW x 1 + + THTensor_(free)(gradOutput3d); + THTensor_(free)(fgradInput3d); + + THTensor_(zero)(gradInput); + + THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + outputWidth, outputHeight); + +} + +void THNN_(SpatialConvolutionLocal_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + long inputWidth, long inputHeight, + long outputWidth, long outputHeight) +{ + weight = THNN_(view_weight_local)(weight); + + THNN_(SpatialConvolutionLocal_shapeCheck) + (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, + inputHeight, inputWidth, outputHeight, outputWidth); + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + long nInputPlane = THTensor_(size)(weight,2)/(kW*kH); + long nOutputPlane = THTensor_(size)(weight,1); + + THTensor_(resizeAs)(gradInput, input); + THTensor_(resizeAs)(fgradInput, finput); + + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight, weight, 1, 2); + + if(input->nDimension == 3) + { + THNN_(SpatialConvolutionLocal_updateGradInput_frame) + (gradInput, gradOutput, tweight, + fgradInput, kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + } + else + { + long T = input->size[0]; + long t; + +#pragma omp parallel for private(t) + for(t = 0; t < T; t++) + { + THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); + + THNN_(SpatialConvolutionLocal_updateGradInput_frame) + (gradInput_t, gradOutput_t, tweight, fgradInput_t, + kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + + THTensor_(free)(gradInput_t); + THTensor_(free)(gradOutput_t); + THTensor_(free)(fgradInput_t); + } + } + + THTensor_(free)(tweight); + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); +} + +static void THNN_(SpatialConvolutionLocal_accGradParameters_frame) + (THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, + THTensor *finput, real scale, + int kW, int kH, int dW, int dH, int padW, int padH, + long nInputPlane, long inputWidth, long inputHeight, + long nOutputPlane, long outputWidth, long outputHeight) +{ + + THTensor *gradOutput3d, *finput3d; + gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset, + outputHeight*outputWidth, 1, + nOutputPlane, outputHeight*outputWidth, + 1, nOutputPlane*outputHeight*outputWidth); + finput3d = THTensor_(newWithStorage3d)(finput->storage, finput->storageOffset, + outputHeight*outputWidth, 1, + 1, kW*kH*nInputPlane*outputHeight*outputWidth, + kW*kH*nInputPlane, outputHeight*outputWidth); + // gradOutput3d: oH*oW x nOutputPlane x 1 + // finput3d: oH*oW x 1 x kW*kH*nInputPlane + THTensor_(baddbmm)(gradWeight, 1.0, gradWeight, scale, gradOutput3d, finput3d); + // gradWeight: oH*oW x nOutputPlane x kW*kH*nInputPlane + + THTensor_(cadd)(gradBias, gradBias, scale, gradOutput); + + THTensor_(free)(gradOutput3d); + THTensor_(free)(finput3d); +} + +void THNN_(SpatialConvolutionLocal_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + long inputWidth, long inputHeight, + long outputWidth, long outputHeight, + accreal scale_) +{ + THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous"); + THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous"); + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + gradWeight = THNN_(view_weight_local)(gradWeight); + + THNN_(SpatialConvolutionLocal_shapeCheck) + (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, + inputHeight, inputWidth, outputHeight, outputWidth); + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + long nInputPlane = THTensor_(size)(gradWeight,2)/(kW*kH); + long nOutputPlane = THTensor_(size)(gradWeight,1); + + if(input->nDimension == 3) + { + THNN_(SpatialConvolutionLocal_accGradParameters_frame) + (gradOutput, gradWeight, gradBias, finput, scale, + kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + } + else + { + long T = input->size[0]; + long t; + + for(t = 0; t < T; t++) + { + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(SpatialConvolutionLocal_accGradParameters_frame) + (gradOutput_t, gradWeight, gradBias, finput_t, scale, + kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + + THTensor_(free)(gradOutput_t); + THTensor_(free)(finput_t); + } + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(gradWeight); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMM.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMM.c new file mode 100644 index 000000000..28fea517c --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMM.c @@ -0,0 +1,377 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialConvolutionMM.c" +#else + +static inline void THNN_(SpatialConvolutionMM_shapeCheck)( + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kH, int kW, int dH, int dW, int padH, int padW) { + + THArgCheck(kW > 0 && kH > 0, 9, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + THArgCheck(dW > 0 && dH > 0, 11, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 4, 5, weight, + "2D or 4D weight tensor expected, but got: %s"); + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); + } + + int ndim = input->nDimension; + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + + long nInputPlane = weight->size[1] / (kH * kW); + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long nOutputPlane = weight->size[0]; + long outputHeight = (inputHeight + 2*padH - kH) / dH + 1; + long outputWidth = (inputWidth + 2*padW - kW) / dW + 1; + + if (outputWidth < 1 || outputHeight < 1) + THError("Given input size: (%d x %d x %d). " + "Calculated output size: (%d x %d x %d). Output size is too small", + nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth); + + THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } +} + +static THTensor* THNN_(view_weight_MM2d)(THTensor *weight) { + weight = THTensor_(newContiguous)(weight); + if (weight->nDimension == 4) { + long s1 = weight->size[0]; + long s2 = weight->size[1] * weight->size[2] * weight->size[3]; + THTensor *old_weight = weight; + weight = THTensor_(newWithStorage2d)(weight->storage, weight->storageOffset, + s1, -1, s2, -1); + THTensor_(free)(old_weight); + } + return weight; +} + +static void THNN_(SpatialConvolutionMM_updateOutput_frame)( + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + long nInputPlane, + long inputWidth, + long inputHeight, + long nOutputPlane, + long outputWidth, + long outputHeight) +{ + long i; + THTensor *output2d; + + THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + outputWidth, outputHeight); + + output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset, + nOutputPlane, -1, + outputHeight*outputWidth, -1); + if (bias) { + for(i = 0; i < nOutputPlane; i++) + THVector_(fill) + (output->storage->data + output->storageOffset + output->stride[0] * i, + THTensor_(get1d)(bias, i), outputHeight*outputWidth); + } else { + THTensor_(zero)(output); + } + + THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput); + + THTensor_(free)(output2d); +} + +void THNN_(SpatialConvolutionMM_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH) +{ + weight = THNN_(view_weight_MM2d)(weight); + + THNN_(SpatialConvolutionMM_shapeCheck) + (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW); + + input = THTensor_(newContiguous)(input); + int ndim = input->nDimension; + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + long nInputPlane = input->size[dimf]; + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long nOutputPlane = weight->size[0]; + long outputHeight = (inputHeight + 2*padH - kH) / dH + 1; + long outputWidth = (inputWidth + 2*padW - kW) / dW + 1; + + if(input->nDimension == 3) + { + THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth); + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); + + THNN_(SpatialConvolutionMM_updateOutput_frame) + (input, output, weight, bias, finput, + kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + } + else + { + long T = input->size[0]; + long t; + + THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth); + THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth); + +#pragma omp parallel for private(t) + for(t = 0; t < T; t++) + { + THTensor *input_t = THTensor_(newSelect)(input, 0, t); + THTensor *output_t = THTensor_(newSelect)(output, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(SpatialConvolutionMM_updateOutput_frame) + (input_t, output_t, weight, bias, finput_t, + kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + + THTensor_(free)(input_t); + THTensor_(free)(output_t); + THTensor_(free)(finput_t); + } + } + + THTensor_(free)(input); + THTensor_(free)(weight); +} + +static void THNN_(SpatialConvolutionMM_updateGradInput_frame)( + THTensor *gradInput, + THTensor *gradOutput, + THTensor *weight, + THTensor *fgradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH) +{ + THTensor *gradOutput2d = THTensor_(newWithStorage2d) + (gradOutput->storage, gradOutput->storageOffset, + gradOutput->size[0], -1, + gradOutput->size[1]*gradOutput->size[2], -1); + THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d); + THTensor_(free)(gradOutput2d); + + THTensor_(zero)(gradInput); + + THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, + padW, padH, + gradInput->size[0], gradInput->size[2], gradInput->size[1], + gradOutput->size[2], gradOutput->size[1]); +} + +void THNN_(SpatialConvolutionMM_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH) +{ + weight = THNN_(view_weight_MM2d)(weight); + + THNN_(SpatialConvolutionMM_shapeCheck) + (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW); + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + THTensor_(resizeAs)(gradInput, input); + THTensor_(resizeAs)(fgradInput, finput); + + // depending on the BLAS library, fgradInput (result tensor) might + // be left uninitialized on zero alpha, which might lead to weird behavior + // hence, to be safe, zero it + THTensor_(zero)(fgradInput); + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight, weight, 0, 1); + + if(input->nDimension == 3) + { + THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, + tweight, fgradInput, + kW, kH, dW, dH, padW, padH); + } + else + { + long T = input->size[0]; + long t; + +#pragma omp parallel for private(t) + for(t = 0; t < T; t++) + { + THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); + + THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, + tweight, fgradInput_t, + kW, kH, dW, dH, padW, padH); + + THTensor_(free)(gradInput_t); + THTensor_(free)(gradOutput_t); + THTensor_(free)(fgradInput_t); + } + } + + THTensor_(free)(tweight); + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); +} + +static void THNN_(SpatialConvolutionMM_accGradParameters_frame)( + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + real scale) +{ + long i; + THTensor *gradOutput2d = THTensor_(newWithStorage2d) + (gradOutput->storage, gradOutput->storageOffset, + gradOutput->size[0], -1, + gradOutput->size[1]*gradOutput->size[2], -1); + + THTensor *tfinput = THTensor_(new)(); + THTensor_(transpose)(tfinput, finput, 0, 1); + THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, tfinput); + THTensor_(free)(tfinput); + + if (gradBias) { + for(i = 0; i < gradBias->size[0]; i++) + { + long k; + real sum = 0; + real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0]; + for(k = 0; k < gradOutput2d->size[1]; k++) + sum += data[k]; + (gradBias->storage->data + gradBias->storageOffset)[i] += scale*sum; + } + } + + THTensor_(free)(gradOutput2d); +} + +void THNN_(SpatialConvolutionMM_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + accreal scale_) +{ + THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous"); + if (gradBias) + THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous"); + + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + gradWeight = THNN_(view_weight_MM2d)(gradWeight); + + THNN_(SpatialConvolutionMM_shapeCheck) + (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW); + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + if(input->nDimension == 3) + { + THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, + gradBias, finput, scale); + } + else + { + long T = input->size[0]; + long t; + + for(t = 0; t < T; t++) + { + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, + gradBias, finput_t, scale); + + THTensor_(free)(gradOutput_t); + THTensor_(free)(finput_t); + } + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(gradWeight); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMap.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMap.c new file mode 100644 index 000000000..142a03551 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMap.c @@ -0,0 +1,277 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialConvolutionMap.c" +#else + +void THNN_(SpatialConvolutionMap_updateOutput)( + THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, + THTensor *connTable, int nInputPlane, int nOutputPlane, + int dW, int dH) +{ + THArgCheck( + weight != NULL && weight->nDimension == 3 + && connTable != NULL && connTable->size[0] == weight->size[0], 4, + "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE + ); + + int dimw = 2; + int dimh = 1; + int dimc = 0; + long nbatch = 1; + + THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimc++; + dimw++; + dimh++; + } + + const long kH = weight->size[1]; + const long kW = weight->size[2]; + + THArgCheck(input->size[dimc] >= nInputPlane, 2, "invalid number of input planes"); + THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH, 2, "input image smaller than kernel size"); + + const long input_w = input->size[dimw]; + const long input_h = input->size[dimh]; + const long output_w = (input_w - kW) / dW + 1; + const long output_h = (input_h - kH) / dH + 1; + + if (input->nDimension == 3) + THTensor_(resize3d)(output, nOutputPlane, output_h, output_w); + else + THTensor_(resize4d)(output, input->size[0], nOutputPlane, output_h, output_w); + + /* contiguous */ + input = THTensor_(newContiguous)(input); + output = THTensor_(newContiguous)(output); + weight = THTensor_(newContiguous)(weight); + bias = bias ? THTensor_(newContiguous)(bias) : bias; + connTable = THTensor_(newContiguous)(connTable); + + /* get raw pointers */ + real *input_data = THTensor_(data)(input); + real *output_data = THTensor_(data)(output); + real *weight_data = THTensor_(data)(weight); + real *bias_data = THTensor_(data)(bias); + real *connTable_data = THTensor_(data)(connTable); + + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nOutputPlane; p++) + { + long m; + for (m = 0; m < nbatch; m++) + { + /* add bias */ + real *ptr_output = output_data + p*output_w*output_h + m*nOutputPlane*output_w*output_h; + long j, k; + real z= bias_data[p]; + for (j = 0; j < output_h*output_w; j++) + ptr_output[j] = z; + + /* convolve all maps */ + int nweight = connTable->size[0]; + for (k = 0; k < nweight; k++) + { + /* get offsets for input/output */ + int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE; + int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE; + + if (o == p) + { + THTensor_(validXCorr2Dptr)( + output_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h, + 1.0, + input_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, input_h, input_w, + weight_data + k*kW*kH, + kH, kW, + dH, dW + ); + } + } + } + } + + /* clean up */ + THTensor_(free)(input); + THTensor_(free)(output); + THTensor_(free)(weight); + if (bias) THTensor_(free)(bias); + THTensor_(free)(connTable); +} + +void THNN_(SpatialConvolutionMap_updateGradInput)( + THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *bias, + THTensor *connTable, int nInputPlane, int nOutputPlane, + int dW, int dH) +{ + THArgCheck( + weight != NULL && weight->nDimension == 3 + && connTable != NULL && connTable->size[0] == weight->size[0], 5, + "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE + ); + + /* and dims */ + int dimw = 2; + int dimh = 1; + long nbatch = 1; + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + const long input_h = input->size[dimh]; + const long input_w = input->size[dimw]; + const long output_h = gradOutput->size[dimh]; + const long output_w = gradOutput->size[dimw]; + const long kH = weight->size[1]; + const long kW = weight->size[2]; + + /* contiguous */ + gradInput = THTensor_(newContiguous)(gradInput); + gradOutput = THTensor_(newContiguous)(gradOutput); + weight = THTensor_(newContiguous)(weight); + connTable = THTensor_(newContiguous)(connTable); + + /* Resize/Zero */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* get raw pointers */ + real *gradInput_data = THTensor_(data)(gradInput); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *weight_data = THTensor_(data)(weight); + real *connTable_data = THTensor_(data)(connTable); + + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nInputPlane; p++) + { + long m; + for (m = 0; m < nbatch; m++) + { + long k; + /* backward all */ + int nkernel = connTable->size[0]; + for (k = 0; k < nkernel; k++) + { + int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE; + int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE; + if (i == p) + { + /* gradient to input */ + THTensor_(fullConv2Dptr)( + gradInput_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, 1.0, + gradOutput_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h, output_h, output_w, + weight_data + k*kW*kH, kH, kW, dH, dW + ); + } + } + } + } + + /* clean up */ + THTensor_(free)(gradInput); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); + THTensor_(free)(connTable); +} + +void THNN_(SpatialConvolutionMap_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *connTable, + int nInputPlane, + int nOutputPlane, + int dW, int dH, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THArgCheck( + gradWeight != NULL && gradWeight->nDimension == 3 + && connTable != NULL && connTable->size[0] == gradWeight->size[0], 5, + "3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE + ); + + /* and dims */ + int dimw = 2; + int dimh = 1; + long nbatch = 1; + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + const long input_h = input->size[dimh]; + const long input_w = input->size[dimw]; + const long output_h = gradOutput->size[dimh]; + const long output_w = gradOutput->size[dimw]; + const long kH = gradWeight->size[1]; + const long kW = gradWeight->size[2]; + + /* contiguous */ + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous"); + THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous"); + + /* get raw pointers */ + real *input_data = THTensor_(data)(input); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *gradWeight_data = THTensor_(data)(gradWeight); + real *gradBias_data = THTensor_(data)(gradBias); + + + long k; + /* gradients wrt bias */ +#pragma omp parallel for private(k) + for (k = 0; k < nOutputPlane; k++) + { + long m; + for (m = 0; m < nbatch; m++) + { + real *ptr_gradOutput = gradOutput_data + k*output_w*output_h + m*nOutputPlane*output_w*output_h; + long l; + for (l = 0; l < output_h*output_w; l++) + gradBias_data[k] += scale*ptr_gradOutput[l]; + } + } + + /* gradients wrt weight */ + const int nkernel = connTable->size[0]; +#pragma omp parallel for private(k) + for (k = 0; k < nkernel; k++) + { + long m; + for (m = 0; m < nbatch; m++) + { + int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE; + int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE; + + /* gradient to kernel */ + THTensor_(validXCorr2DRevptr)( + gradWeight_data + k*kW*kH, + scale, + input_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, input_h, input_w, + gradOutput_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h , output_h, output_w, + dH, dW + ); + } + } + + /* clean up */ + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialDepthWiseConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDepthWiseConvolution.c new file mode 100644 index 000000000..efb66a3e3 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDepthWiseConvolution.c @@ -0,0 +1,528 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialDepthWiseConvolution.c" +#else + +static inline void THNN_(SpatialDepthWiseConvolution_shapeCheck)( + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kH, int kW, int dH, int dW, int padH, int padW) { + + THArgCheck(kW > 0 && kH > 0, 9, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + THArgCheck(dW > 0 && dH > 0, 11, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + THNN_ARGCHECK(weight->nDimension == 4, 5, weight, + "2D or 4D weight tensor expected, but got: %s"); + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 2, 0, weight->size[0]); + THNN_CHECK_DIM_SIZE(bias, 2, 1, weight->size[1]); + } + + int ndim = input->nDimension; + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + + long nInputPlane = weight->size[1]; + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long nOutputPlane = weight->size[0]; + long outputHeight = (inputHeight + 2*padH - kH) / dH + 1; + long outputWidth = (inputWidth + 2*padW - kW) / dW + 1; + + if (outputWidth < 1 || outputHeight < 1) + THError("Given input size: (%d x %d x %d). " + "Calculated output size: (%d x %d x %d). Output size is too small", + nInputPlane,inputHeight,inputWidth,nOutputPlane*nInputPlane,outputHeight,outputWidth); + + THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimf, nInputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimh, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimw, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimw + 1, outputWidth); + } +} + +static void THNN_(SpatialDepthWiseConvolution_updateOutput_frame)( + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + long nInputPlane, + long inputWidth, + long inputHeight, + long nOutputPlane, + long outputWidth, + long outputHeight) +{ + long i; + THTensor *output2d; + + THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + outputWidth, outputHeight); + + output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset, + nOutputPlane, -1, + outputHeight*outputWidth, -1); + if (bias) { + for(i = 0; i < nOutputPlane; i++) + THVector_(fill) + (output->storage->data + output->storageOffset + output->stride[0] * i, + THTensor_(get1d)(bias, i), outputHeight*outputWidth); + } else { + THTensor_(zero)(output); + } + + THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput); + + THTensor_(free)(output2d); +} + +void THNN_(SpatialDepthWiseConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH) +{ + long nInputPlane = weight->nDimension == 2 ? weight->size[1]/(kH*kW) : weight->size[1]; + long nOutputPlane = weight->size[0]; + if (weight->nDimension == 2) { + THTensor_(resize4d)(weight, nOutputPlane, nInputPlane, kH, kW); + } + + THNN_(SpatialDepthWiseConvolution_shapeCheck) + (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW); + + THTensor *_weight = THTensor_(newTranspose)(weight, 0, 1); + weight = THTensor_(newContiguous)(_weight); + + THTensor *_bias = NULL; + if(bias) { + _bias = THTensor_(newTranspose)(bias, 0, 1); + bias = THTensor_(newContiguous)(_bias); + } + + // resize weight + long s1 = weight->size[0]; + long s2 = weight->size[1]; + long s3 = weight->size[2] * weight->size[3]; + weight = THTensor_(newWithStorage3d)(weight->storage, weight->storageOffset, + s1, -1, s2, -1, s3, -1); + + input = THTensor_(newContiguous)(input); + + int ndim = input->nDimension; + + int batch = 1; + if (ndim == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + } + + long inputHeight = input->size[3]; + long inputWidth = input->size[2]; + long outputHeight = (inputHeight + 2*padH - kH) / dH + 1; + long outputWidth = (inputWidth + 2*padW - kW) / dW + 1; + + long T = input->size[0]; + long t; + + THTensor_(resize5d)(output, T, nInputPlane, nOutputPlane, outputHeight, outputWidth); + THTensor_(resize4d)(finput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth); + +#pragma omp parallel for private(t) + for(t = 0; t < T; t++) + { + THTensor *input_t = THTensor_(newSelect)(input, 0, t); + THTensor *output_t = THTensor_(newSelect)(output, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + long i; +#pragma omp parallel for private(i) + for(i = 0; i < nInputPlane; i++) + { + THTensor *weight_i = THTensor_(newSelect)(weight, 0, i); + THTensor *input_i = THTensor_(newNarrow)(input_t, 0, i, 1); + THTensor *output_i = THTensor_(newSelect)(output_t, 0, i); + THTensor *finput_i = THTensor_(newSelect)(finput_t, 0, i); + THTensor *bias_i = NULL; + if(bias) { + bias_i = THTensor_(newSelect)(bias, 0, i); + } + THNN_(SpatialDepthWiseConvolution_updateOutput_frame) + (input_i, output_i, weight_i, bias_i, finput_i, + kW, kH, dW, dH, padW, padH, + 1, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); + + THTensor_(free)(input_i); + THTensor_(free)(weight_i); + THTensor_(free)(bias_i); + THTensor_(free)(output_i); + THTensor_(free)(finput_i); + } + THTensor_(free)(input_t); + THTensor_(free)(output_t); + THTensor_(free)(finput_t); + } + + THTensor_(free)(weight); + THTensor_(free)(_weight); + THTensor_(free)(bias); + THTensor_(free)(_bias); + THTensor_(resize4d)(output, T, nInputPlane * nOutputPlane, outputHeight, outputWidth); + + if (batch == 0) { + THTensor_(select)(output, NULL, 0, 0); + THTensor_(select)(input, NULL, 0, 0); + THTensor_(select)(finput, NULL, 0, 0); + } + THTensor_(free)(input); +} + +static void THNN_(SpatialDepthWiseConvolution_updateGradInput_frame)( + THTensor *gradInput, + THTensor *gradOutput, + THTensor *weight, + THTensor *fgradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH) +{ + THTensor *gradOutput2d = THTensor_(newWithStorage2d) + (gradOutput->storage, gradOutput->storageOffset, + gradOutput->size[0], -1, + gradOutput->size[1]*gradOutput->size[2], -1); + THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d); + THTensor_(free)(gradOutput2d); + + THTensor_(zero)(gradInput); + + THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, + padW, padH, + gradInput->size[0], gradInput->size[2], gradInput->size[1], + gradOutput->size[2], gradOutput->size[1]); +} + +void THNN_(SpatialDepthWiseConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH) +{ + long nInputPlane = weight->nDimension == 2 ? weight->size[1]/(kH*kW) : weight->size[1]; + long nOutputPlane = weight->size[0]; + if (weight->nDimension == 2) { + THTensor_(resize4d)(weight, nOutputPlane, nInputPlane, kH, kW); + } + gradOutput = THTensor_(newWithTensor)(gradOutput); + + if (input->nDimension == 3) { + if (gradOutput->nDimension == 3) { + THTensor_(resize4d)(gradOutput, nInputPlane, nOutputPlane, gradOutput->size[1], gradOutput->size[2]); + } + } + else + { + if (gradOutput->nDimension == 4) { + THTensor_(resize5d)(gradOutput, gradOutput->size[0], nInputPlane, nOutputPlane, gradOutput->size[2], gradOutput->size[3]); + } + } + + + THNN_(SpatialDepthWiseConvolution_shapeCheck) + (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW); + + THTensor *_weight = THTensor_(newTranspose)(weight, 0, 1); + weight = THTensor_(newContiguous)(_weight); + + + // resize weight + long s1 = weight->size[0]; + long s2 = weight->size[1]; + long s3 = weight->size[2] * weight->size[3]; + weight = THTensor_(newWithStorage3d)(weight->storage, weight->storageOffset, + s1, -1, s2, -1, s3, -1); + + input = THTensor_(newContiguous)(input); + + int batch = 1; + if (input->nDimension == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]); + } + + long inputHeight = input->size[3]; + long inputWidth = input->size[2]; + long outputHeight = (inputHeight + 2*padH - kH) / dH + 1; + long outputWidth = (inputWidth + 2*padW - kW) / dW + 1; + + long T = input->size[0]; + long t; + + THTensor_(resizeAs)(gradInput, input); + THTensor_(resize4d)(fgradInput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth); + + // depending on the BLAS library, fgradInput (result tensor) might + // be left uninitialized on zero alpha, which might lead to weird behavior + // hence, to be safe, zero it + THTensor_(zero)(fgradInput); + + + +#pragma omp parallel for private(t) + for(t = 0; t < T; t++) + { + THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); + + + long i; +#pragma omp parallel for private(i) + for(i = 0; i < nInputPlane; i++) + { + THTensor *weight_i = THTensor_(newSelect)(weight, 0, i); + THTensor *gradInput_i = THTensor_(newNarrow)(gradInput_t, 0, i, 1); + THTensor *gradOutput_i = THTensor_(newSelect)(gradOutput_t, 0, i); + THTensor *fgradInput_i = THTensor_(newSelect)(fgradInput_t, 0, i); + + THTensor_(transpose)(weight_i, weight_i, 0, 1); + + THNN_(SpatialDepthWiseConvolution_updateGradInput_frame)(gradInput_i, gradOutput_i, + weight_i, fgradInput_i, + kW, kH, dW, dH, padW, padH); + + THTensor_(free)(gradInput_i); + THTensor_(free)(weight_i); + THTensor_(free)(gradOutput_i); + THTensor_(free)(fgradInput_i); + } + + THTensor_(free)(gradInput_t); + THTensor_(free)(gradOutput_t); + THTensor_(free)(fgradInput_t); + } + + if (batch == 0) { + THTensor_(select)(gradOutput, NULL, 0, 0); + THTensor_(select)(input, NULL, 0, 0); + THTensor_(select)(gradInput, NULL, 0, 0); + THTensor_(select)(fgradInput, NULL, 0, 0); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); + THTensor_(free)(_weight); +} + +static void THNN_(SpatialDepthWiseConvolution_accGradParameters_frame)( + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + accreal scale) +{ + long i; + THTensor *gradOutput2d = THTensor_(newWithStorage2d) + (gradOutput->storage, gradOutput->storageOffset, + gradOutput->size[0], -1, + gradOutput->size[1]*gradOutput->size[2], -1); + + THTensor_(transpose)(finput, finput, 0, 1); + THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput); + THTensor_(transpose)(finput, finput, 0, 1); + + if (gradBias) { + for(i = 0; i < gradBias->size[0]; i++) + { + long k; + real sum = 0; + real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0]; + for(k = 0; k < gradOutput2d->size[1]; k++) + sum += data[k]; + (gradBias->storage->data + gradBias->storageOffset)[i] += scale*sum; + } + } + + THTensor_(free)(gradOutput2d); +} + +void THNN_(SpatialDepthWiseConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + accreal scale) +{ + long nInputPlane = gradWeight->nDimension == 2 ? gradWeight->size[1]/(kH*kW) : gradWeight->size[1]; + long nOutputPlane = gradWeight->size[0]; + if (gradWeight->nDimension == 2) { + THTensor_(resize4d)(gradWeight, nOutputPlane, nInputPlane, kH, kW); + } + + gradOutput = THTensor_(newWithTensor)(gradOutput); + if (input->nDimension == 3) { + if (gradOutput->nDimension == 3) { + THTensor_(resize4d)(gradOutput, nInputPlane, nOutputPlane, gradOutput->size[1], gradOutput->size[2]); + } + } + else + { + if (gradOutput->nDimension == 4) { + THTensor_(resize5d)(gradOutput, gradOutput->size[0], nInputPlane, nOutputPlane, gradOutput->size[2], gradOutput->size[3]); + } + } + + + THNN_(SpatialDepthWiseConvolution_shapeCheck) + (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW); + + // Transpose gradWeight & gradBias + THTensor_(transpose)(gradWeight, NULL, 0, 1); + THTensor *_gradWeight; + _gradWeight = gradWeight; + gradWeight = THTensor_(newContiguous)(gradWeight); + + THTensor *_gradBias = NULL; + if(gradBias) { + THTensor_(transpose)(gradBias, NULL, 0, 1); + _gradBias = gradBias; + gradBias = THTensor_(newContiguous)(gradBias); + } + + // resize gradWeight + long s1 = gradWeight->size[0]; + long s2 = gradWeight->size[1]; + long s3 = gradWeight->size[2] * gradWeight->size[3]; + gradWeight = THTensor_(newWithStorage3d)(gradWeight->storage, gradWeight->storageOffset, + s1, -1, s2, -1, s3, -1); + + input = THTensor_(newContiguous)(input); + + + int batch = 1; + if (input->nDimension == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]); + } + + long inputHeight = input->size[3]; + long inputWidth = input->size[2]; + long outputHeight = (inputHeight + 2*padH - kH) / dH + 1; + long outputWidth = (inputWidth + 2*padW - kW) / dW + 1; + + long T = input->size[0]; + long t; + THTensor_(resize4d)(finput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth); + + for(t = 0; t < T; t++) + { + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + long i; +#pragma omp parallel for private(i) + for(i = 0; i < nInputPlane; i++) + { + THTensor *finput_i = THTensor_(newSelect)(finput_t, 0, i); + THTensor *gradOutput_i = THTensor_(newSelect)(gradOutput_t, 0, i); + THTensor *gradWeight_i = THTensor_(newSelect)(gradWeight, 0, i); + THTensor *gradBias_i = NULL; + if(gradBias) { + gradBias_i = THTensor_(newSelect)(gradBias, 0, i); + } + THNN_(SpatialDepthWiseConvolution_accGradParameters_frame)(gradOutput_i, gradWeight_i, + gradBias_i, finput_i, scale); + + THTensor_(free)(finput_i); + THTensor_(free)(gradOutput_i); + THTensor_(free)(gradWeight_i); + THTensor_(free)(gradBias_i); + } + + THTensor_(free)(gradOutput_t); + THTensor_(free)(finput_t); + } + + // Copy back and transpose back + THTensor_(transpose)(_gradWeight, NULL, 0, 1); + THTensor_(resize4d)(_gradWeight, nInputPlane, nOutputPlane, kH, kW); + THTensor_(copy)(_gradWeight, gradWeight); + THTensor_(transpose)(_gradWeight, NULL, 0, 1); + + if(gradBias) { + THTensor_(transpose)(_gradBias, NULL, 0, 1); + THTensor_(resize2d)(_gradBias, nInputPlane, nOutputPlane); + THTensor_(copy)(_gradBias, gradBias); + THTensor_(transpose)(_gradBias, NULL, 0, 1); + } + + if (batch == 0) { + THTensor_(select)(gradOutput, NULL, 0, 0); + THTensor_(select)(input, NULL, 0, 0); + THTensor_(select)(finput, NULL, 0, 0); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(gradWeight); + THTensor_(free)(gradBias); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedConvolution.c new file mode 100644 index 000000000..897cc0da4 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedConvolution.c @@ -0,0 +1,408 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialDilatedConvolution.c" +#else + +static inline void THNN_(SpatialDilatedConvolution_shapeCheck)( + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kH, int kW, int dH, int dW, int padH, int padW, + int dilationH, int dilationW) { + + THNN_ARGCHECK(weight->nDimension == 4, 4, weight, + "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " + "but got: %s"); + THArgCheck(kW > 0 && kH > 0, 9, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + THArgCheck(dW > 0 && dH > 0, 11, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + THArgCheck(dilationW > 0 && dilationH > 0, 15, + "dilation should be greater than zero, but got dilationH: %d, dilationW: %d", + dilationH, dilationW); + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); + } + + int ndim = input->nDimension; + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + + long nInputPlane = weight->size[1]; + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long nOutputPlane = weight->size[0]; + long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; + long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + if (outputWidth < 1 || outputHeight < 1) + THError("Given input size: (%ld x %ld x %ld). " + "Calculated output size: (%ld x %ld x %ld). Output size is too small", + nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth); + + THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } +} + +void THNN_(SpatialDilatedConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *columns, + THTensor *ones, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int dilationW, int dilationH) +{ + + THNN_(SpatialDilatedConvolution_shapeCheck) + (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW, + dilationH, dilationW); + + // Params: + int nInputPlane = weight->size[1]; + int nOutputPlane = weight->size[0]; + + input = THTensor_(newContiguous)(input); + weight = THTensor_(newContiguous)(weight); + bias = bias ? THTensor_(newContiguous)(bias) : bias; + int batch = 1; + if (input->nDimension == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + } + long inputWidth = input->size[3]; + long inputHeight = input->size[2]; + long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Resize output + THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth); + THTensor_(zero)(output); + + // Resize temporary columns + THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth); + + // Define a buffer of ones, for bias accumulation + // Note: this buffer can be shared with other modules, it only ever gets increased, + // and always contains ones. + if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { + // Resize plane and fill with ones... + THTensor_(resize2d)(ones, outputHeight, outputWidth); + THTensor_(fill)(ones, 1); + } + + // Helpers + THTensor *input_n = THTensor_(new)(); + THTensor *output_n = THTensor_(new)(); + + // For each elt in batch, do: + for (int elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per output: + THTensor_(select)(input_n, input, 0, elt); + THTensor_(select)(output_n, output, 0, elt); + + // Do Bias first: + // M,N,K are dims of matrix A and B + long m_ = nOutputPlane; + long n_ = outputHeight * outputWidth; + long k_ = 1; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + if (bias) { + THBlas_(gemm)( + 't', 'n', + n_, m_, k_, + 1, + THTensor_(data)(ones), k_, + THTensor_(data)(bias), k_, + 0, + THTensor_(data)(output_n), n_ + ); + } else { + THTensor_(zero)(output_n); + } + + // Extract columns: + THNN_(im2col)( + THTensor_(data)(input_n), + nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, + dilationH, dilationW, + THTensor_(data)(columns) + ); + + // M,N,K are dims of matrix A and B + long m = nOutputPlane; + long n = columns->size[1]; + long k = nInputPlane*kH*kW; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 'n', 'n', + n, m, k, + 1, + THTensor_(data)(columns), n, + THTensor_(data)(weight), k, + 1, + THTensor_(data)(output_n), n + ); + } + + // Free + THTensor_(free)(input_n); + THTensor_(free)(output_n); + + // Resize output + if (batch == 0) { + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); + THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(weight); + if (bias) THTensor_(free)(bias); +} + +void THNN_(SpatialDilatedConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *gradColumns, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int dilationW, int dilationH) +{ + THNN_(SpatialDilatedConvolution_shapeCheck) + (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, + dilationH, dilationW); + + // Params + int nInputPlane = weight->size[1]; + int nOutputPlane = weight->size[0]; + + input = THTensor_(newContiguous)(input); + weight = THTensor_(newContiguous)(weight); + gradOutput = THTensor_(newContiguous)(gradOutput); + int batch = 1; + if (input->nDimension == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], + gradOutput->size[2]); + } + + long inputWidth = input->size[3]; + long inputHeight = input->size[2]; + long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Resize output + THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth); + + // Resize temporary columns + THTensor_(resize2d)(gradColumns, nInputPlane*kW*kH, outputHeight*outputWidth); + THTensor_(zero)(gradColumns); + + // Helpers + THTensor *gradInput_n = THTensor_(new)(); + THTensor *gradOutput_n = THTensor_(new)(); + + // For each elt in batch, do: + for (int elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per sample: + THTensor_(select)(gradInput_n, gradInput, 0, elt); + THTensor_(select)(gradOutput_n, gradOutput, 0, elt); + + // M,N,K are dims of matrix A and B + long m = nInputPlane*kW*kH; + long n = gradColumns->size[1]; + long k = nOutputPlane; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 'n', 't', + n, m, k, + 1, + THTensor_(data)(gradOutput_n), n, + THTensor_(data)(weight), m, + 0, + THTensor_(data)(gradColumns), n + ); + + // Unpack columns back into input: + THNN_(col2im)( + THTensor_(data)(gradColumns), + nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, + dilationH, dilationW, + THTensor_(data)(gradInput_n) + ); + } + + // Free + THTensor_(free)(gradInput_n); + THTensor_(free)(gradOutput_n); + + // Resize output + if (batch == 0) { + THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); + THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); + THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); +} + + +void THNN_(SpatialDilatedConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *columns, + THTensor *ones, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int dilationW, int dilationH, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THNN_(SpatialDilatedConvolution_shapeCheck) + (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, + dilationH, dilationW); + + // Params + int nInputPlane = gradWeight->size[1]; + int nOutputPlane = gradWeight->size[0]; + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous"); + if (gradBias) + THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous"); + int batch = 1; + if (input->nDimension == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], + gradOutput->size[1], gradOutput->size[2]); + } + + long inputWidth = input->size[3]; + long inputHeight = input->size[2]; + long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Define a buffer of ones, for bias accumulation + if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { + // Resize plane and fill with ones... + THTensor_(resize2d)(ones, outputHeight, outputWidth); + THTensor_(fill)(ones, 1); + } + + // Resize temporary columns + THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth); + + // Helpers + THTensor *input_n = THTensor_(new)(); + THTensor *gradOutput_n = THTensor_(new)(); + + // For each elt in batch, do: + for (int elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per output: + THTensor_(select)(input_n, input, 0, elt); + THTensor_(select)(gradOutput_n, gradOutput, 0, elt); + + // Extract columns: + THNN_(im2col)( + THTensor_(data)(input_n), + nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, + dilationH, dilationW, + THTensor_(data)(columns) + ); + + // M,N,K are dims of matrix A and B + long m = nOutputPlane; + long n = nInputPlane*kW*kH; + long k = columns->size[1]; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 't', 'n', + n, m, k, + scale, + THTensor_(data)(columns), k, + THTensor_(data)(gradOutput_n), k, + 1, + THTensor_(data)(gradWeight), n + ); + + // Do Bias: + // M,N,K are dims of matrix A and B + long m_ = nOutputPlane; + long k_ = outputHeight * outputWidth; + + // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) + if (gradBias) { + THBlas_(gemv)( + 't', + k_, m_, + scale, + THTensor_(data)(gradOutput_n), k_, + THTensor_(data)(ones), 1, + 1, + THTensor_(data)(gradBias), 1 + ); + } + } + + // Free + THTensor_(free)(input_n); + THTensor_(free)(gradOutput_n); + + // Resize + if (batch == 0) { + THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); + THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedMaxPooling.c new file mode 100644 index 000000000..8f4ad13c3 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedMaxPooling.c @@ -0,0 +1,401 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialDilatedMaxPooling.c" +#else + +static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)( + THTensor *input, THTensor *gradOutput, THIndexTensor *indices, + int kH, int kW, int dH, int dW, int padH, int padW, + int dilationH, int dilationW, bool ceil_mode) { + + THArgCheck(kW > 0 && kH > 0, 5, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + THArgCheck(dW > 0 && dH > 0, 8, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + THArgCheck(dilationH > 0 && dilationW > 0, 12, + "dilation should be greater than zero, but got dilationH: %d dilationW: %d", + dilationH, dilationW); + + int ndim = input->nDimension; + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + + THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, + "pad should be smaller than half of kernel size, but got " + "padW = %d, padH = %d, kW = %d, kH = %d", + padW, padH, kW, kH); + + long nInputPlane = input->size[dimh-1]; + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long outputHeight, outputWidth; + long nOutputPlane = nInputPlane; + + if (ceil_mode) + { + outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; + outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; + } + else + { + outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; + outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; + } + + if (padW || padH) + { + // ensure that the last pooling starts inside the image + // needed to avoid problems in ceil mode + if ((outputHeight - 1)*dH >= inputHeight + padH) + --outputHeight; + if ((outputWidth - 1)*dW >= inputWidth + padW) + --outputWidth; + } + + if (outputWidth < 1 || outputHeight < 1) + THError("Given input size: (%dx%dx%d). " + "Calculated output size: (%dx%dx%d). Output size is too small", + nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } + if (indices != NULL) { + THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth); + } +} + +static void THNN_(SpatialDilatedMaxPooling_updateOutput_frame)( + real *input_p, + real *output_p, + THIndex_t *ind_p, + long nslices, + long iwidth, + long iheight, + long owidth, + long oheight, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH + ) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + /* loop over output */ + long i, j; + real *ip = input_p + k*iwidth*iheight; + for(i = 0; i < oheight; i++) + { + for(j = 0; j < owidth; j++) + { + long hstart = i * dH - padH; + long wstart = j * dW - padW; + long hend = fminf(hstart + (kH - 1) * dilationH + 1, iheight); + long wend = fminf(wstart + (kW - 1) * dilationW + 1, iwidth); + while(hstart < 0) + hstart += dilationH; + while(wstart < 0) + wstart += dilationW; + + /* local pointers */ + real *op = output_p + k*owidth*oheight + i*owidth + j; + THIndex_t *indp = ind_p + k*owidth*oheight + i*owidth + j; + + /* compute local max: */ + long maxindex = -1; + real maxval = -THInf; + long tcntr = 0; + long x,y; + for(y = hstart; y < hend; y += dilationH) + { + for(x = wstart; x < wend; x += dilationW) + { + tcntr = y*iwidth + x; + real val = *(ip + tcntr); + if (val > maxval) + { + maxval = val; + maxindex = tcntr; + } + } + } + + /* set output to local max */ + *op = maxval; + + /* store location of max */ + *indp = maxindex + TH_INDEX_BASE; + } + } + } +} + +void THNN_(SpatialDilatedMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + bool ceil_mode) +{ + + int dimw = 2; + int dimh = 1; + long nbatch = 1; + long nInputPlane; + long inputHeight; + long inputWidth; + long outputHeight; + long outputWidth; + real *input_data; + real *output_data; + THIndex_t *indices_data; + + THNN_(SpatialDilatedMaxPooling_shapeCheck) + (input, NULL, NULL, kH, kW, dH, dW, + padH, padW, dilationH, dilationW, ceil_mode); + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + /* sizes */ + nInputPlane = input->size[dimh-1]; + inputHeight = input->size[dimh]; + inputWidth = input->size[dimw]; + if (ceil_mode) + { + outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; + outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; + } + else + { + outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1; + outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1; + } + + if (padW || padH) + { + // ensure that the last pooling starts inside the image + // needed to avoid problems in ceil mode + if ((outputHeight - 1)*dH >= inputHeight + padH) + --outputHeight; + if ((outputWidth - 1)*dW >= inputWidth + padW) + --outputWidth; + } + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + /* resize output */ + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); + /* indices will contain the locations for each output point */ + THIndexTensor_(resize3d)(indices, nInputPlane, outputHeight, outputWidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + THNN_(SpatialDilatedMaxPooling_updateOutput_frame) + (input_data, output_data, + indices_data, + nInputPlane, + inputWidth, inputHeight, + outputWidth, outputHeight, + kW, kH, dW, dH, + padW, padH, + dilationW, dilationH + ); + } + else + { + long p; + + THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth); + /* indices will contain the locations for each output point */ + THIndexTensor_(resize4d)(indices, nbatch, nInputPlane, outputHeight, outputWidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialDilatedMaxPooling_updateOutput_frame) + (input_data+p*nInputPlane*inputWidth*inputHeight, + output_data+p*nInputPlane*outputWidth*outputHeight, + indices_data+p*nInputPlane*outputWidth*outputHeight, + nInputPlane, + inputWidth, inputHeight, + outputWidth, outputHeight, + kW, kH, dW, dH, + padW, padH, + dilationW, dilationH + ); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)( + real *gradInput_p, + real *gradOutput_p, + THIndex_t *ind_p, + long nInputPlane, + long inputWidth, + long inputHeight, + long outputWidth, + long outputHeight, + int dW, + int dH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nInputPlane; k++) + { + real *gradInput_p_k = gradInput_p + k*inputWidth*inputHeight; + real *gradOutput_p_k = gradOutput_p + k*outputWidth*outputHeight; + THIndex_t *ind_p_k = ind_p + k*outputWidth*outputHeight; + + /* calculate max points */ + long i, j; + for(i = 0; i < outputHeight; i++) + { + for(j = 0; j < outputWidth; j++) + { + /* retrieve position of max */ + long maxp = ind_p_k[i*outputWidth + j] - TH_INDEX_BASE; + if (maxp != -1) { + /* update gradient */ + gradInput_p_k[maxp] += gradOutput_p_k[i*outputWidth + j]; + } + } + } + } +} + +void THNN_(SpatialDilatedMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int dilationW, + int dilationH, + bool ceil_mode) +{ + int dimw = 2; + int dimh = 1; + long nbatch = 1; + int nInputPlane; + int inputHeight; + int inputWidth; + int outputHeight; + int outputWidth; + real *gradInput_data; + real *gradOutput_data; + THIndex_t *indices_data; + + THNN_(SpatialDilatedMaxPooling_shapeCheck) + (input, gradOutput, indices, kH, kW, dH, dW, + padH, padW, dilationH, dilationW, ceil_mode); + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 4) { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + /* sizes */ + nInputPlane = input->size[dimh-1]; + inputHeight = input->size[dimh]; + inputWidth = input->size[dimw]; + outputHeight = gradOutput->size[dimh]; + outputWidth = gradOutput->size[dimw]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THIndexTensor_(data)(indices); + + /* backprop */ + if (input->nDimension == 3) + { + THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) + (gradInput_data, gradOutput_data, + indices_data, + nInputPlane, + inputWidth, inputHeight, + outputWidth, outputHeight, + dW, dH); + } + else + { + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) + (gradInput_data+p*nInputPlane*inputWidth*inputHeight, + gradOutput_data+p*nInputPlane*outputWidth*outputHeight, + indices_data+p*nInputPlane*outputWidth*outputHeight, + nInputPlane, + inputWidth, inputHeight, + outputWidth, outputHeight, + dW, dH); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialFractionalMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFractionalMaxPooling.c new file mode 100644 index 000000000..a98954cc6 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFractionalMaxPooling.c @@ -0,0 +1,253 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialFractionalMaxPooling.c" +#else + +static long* THNN_(SpatialFractionalMaxPooling_generateIntervals)( + real sample, + long inputSize, + long outputSize, + int poolSize) { + real alpha = (real) (inputSize - poolSize) / (real) (outputSize - 1); + long* sequence = (long*) THAlloc(sizeof(long) * outputSize); + + long i; + for (i = 0; i < outputSize - 1; ++i) { + sequence[i] = + (long) ((i + sample) * alpha) - (long) (sample * alpha); + } + sequence[outputSize - 1] = inputSize - poolSize; + + return sequence; +} + +static void THNN_(SpatialFractionalMaxPooling_updateOutput_frame)( + real* input, + real* output, + THIndex_t* indices, + real* randomSamples, + long numPlanes, + long inputW, long inputH, + long outputW, long outputH, + int poolSizeW, int poolSizeH) { + long plane; +#pragma omp parallel for private(plane) + for (plane = 0; plane < numPlanes; ++plane) { + /* each plane contains 2 random samples, one for W and one for H */ + real* randomSamplesForPlane = randomSamples + plane * 2; + + /* Generate interval sequence */ + long* sequenceW = + THNN_(SpatialFractionalMaxPooling_generateIntervals)( + randomSamplesForPlane[0], inputW, outputW, poolSizeW); + long* sequenceH = + THNN_(SpatialFractionalMaxPooling_generateIntervals)( + randomSamplesForPlane[1], inputH, outputH, poolSizeH); + + /* loop over output */ + long h, w; + + real* inputForPlane = input + plane * inputW * inputH; + real* outputForPlane = output + plane * outputW * outputH; + THIndex_t* indicesForPlane = indices + plane * outputW * outputH; + + for (h = 0; h < outputH; ++h) { + long inputHStart = sequenceH[h]; + + for (w = 0; w < outputW; ++w) { + long inputWStart = sequenceW[w]; + + real maxVal = -THInf; + long maxIndex = -1; + + long h2, w2; + for (h2 = inputHStart; h2 < inputHStart + poolSizeH; ++h2) { + for (w2 = inputWStart; w2 < inputWStart + poolSizeW; ++w2) { + THAssert(h2 >= 0 && h2 < inputH); + THAssert(w2 >= 0 && w2 < inputW); + + long planeIndex = h2 * inputW + w2; + real val = inputForPlane[planeIndex]; + if (val > maxVal) { + maxVal = val; + maxIndex = planeIndex; + } + } + } + + THAssert(maxVal != -THInf); + THAssert(maxIndex != -1); + + outputForPlane[h * outputW + w] = maxVal; + /* +1 to lua index */ + indicesForPlane[h * outputW + w] = maxIndex + TH_INDEX_BASE; + } + } + + THFree(sequenceW); + THFree(sequenceH); + } +} + +void THNN_(SpatialFractionalMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int outputW, int outputH, + int poolSizeW, int poolSizeH, + THIndexTensor *indices, + THTensor *randomSamples) { + + long numBatch = 1; + int planeDim = 0; + int heightDim = 1; + int widthDim = 2; + + long numInputDims = THTensor_(nDimension)(input); + THNN_ARGCHECK(numInputDims == 3 || numInputDims == 4, 2, input, + "3D or 4D (batch mode) tensor expected for input, but got: %s"); + + if (numInputDims == 4) { + numBatch = THTensor_(size)(input, 0); + planeDim++; + heightDim++; + widthDim++; + } + + /* sizes */ + long numPlanes = THTensor_(size)(input, planeDim); + long inputH = THTensor_(size)(input, heightDim); + long inputW = THTensor_(size)(input, widthDim); + + THArgCheck(outputH + poolSizeH - 1 < inputH, 7, + "poolSizeH (%d) too large relative to input height (%d)", + poolSizeH, inputH); + THArgCheck(outputW + poolSizeW - 1 < inputW, 6, + "poolSizeW (%d) too large relative to input width (%d)", + poolSizeW, inputW); + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + if (numInputDims == 3) { + /* resize output */ + THTensor_(resize3d)(output, numPlanes, outputH, outputW); + /* indices will contain the locations for each output point */ + THIndexTensor_(resize3d)(indices, numPlanes, outputH, outputW); + + THNN_(SpatialFractionalMaxPooling_updateOutput_frame)( + THTensor_(data)(input), + THTensor_(data)(output), + THIndexTensor_(data)(indices), + THTensor_(data)(randomSamples), + numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH); + } else { + THTensor_(resize4d)(output, numBatch, numPlanes, outputH, outputW); + /* indices will contain the locations for each output point */ + THIndexTensor_(resize4d)(indices, numBatch, numPlanes, outputH, outputW); + + long batch; +#pragma omp parallel for private(batch) + for (batch = 0; batch < numBatch; ++batch) { + THNN_(SpatialFractionalMaxPooling_updateOutput_frame)( + THTensor_(data)(input) + batch * numPlanes * inputH * inputW, + THTensor_(data)(output) + batch * numPlanes * outputH * outputW, + THIndexTensor_(data)(indices) + batch * numPlanes * outputH * outputW, + THTensor_(data)(randomSamples) + batch * numPlanes * 2, + numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)( + real* gradInput, + real* gradOutput, + THIndex_t* indices, + long numPlanes, + long inputW, long inputH, + long outputW, long outputH) { + long plane; +#pragma omp parallel for private(plane) + for (plane = 0; plane < numPlanes; plane++) { + real* gradInputForPlane = gradInput + plane * inputW * inputH; + real* gradOutputForPlane = gradOutput + plane * outputW * outputH; + THIndex_t* indicesForPlane = indices + plane * outputW * outputH; + + long h, w; + for (h = 0; h < outputH; ++h) { + for (w = 0; w < outputW; ++w) { + long outputIndex = h * outputW + w; + long index = indicesForPlane[outputIndex] - TH_INDEX_BASE; + THAssert(index >= 0 && index < inputW * inputH); + + gradInputForPlane[index] += gradOutputForPlane[outputIndex]; + } + } + } +} + +void THNN_(SpatialFractionalMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int outputW, int outputH, + int poolSizeW, int poolSizeH, + THIndexTensor *indices) { + + long numBatch = 1; + int planeDim = 0; + int heightDim = 1; + int widthDim = 2; + + long numInputDims = THTensor_(nDimension)(input); + if (numInputDims == 4) { + numBatch = THTensor_(size)(input, 0); + planeDim = 1; + heightDim++; + widthDim++; + } + + /* sizes */ + long numPlanes = THTensor_(size)(input, planeDim); + long inputH = THTensor_(size)(input, heightDim); + long inputW = THTensor_(size)(input, widthDim); + + THArgCheck(outputW == THTensor_(size)(gradOutput, widthDim), 3, + "gradOutput width unexpected"); + THArgCheck(outputH == THTensor_(size)(gradOutput, heightDim), 3, + "gradOutput height unexpected"); + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* backprop */ + if (numInputDims == 3) { + THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)( + THTensor_(data)(gradInput), + THTensor_(data)(gradOutput), + THIndexTensor_(data)(indices), + numPlanes, inputW, inputH, outputW, outputH); + } else { + long batch; +#pragma omp parallel for private(batch) + for (batch = 0; batch < numBatch; ++batch) { + THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)( + THTensor_(data)(gradInput) + batch * numPlanes * inputH * inputW, + THTensor_(data)(gradOutput) + batch * numPlanes * outputH * outputW, + THIndexTensor_(data)(indices) + batch * numPlanes * outputH * outputW, + numPlanes, inputW, inputH, outputW, outputH); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c new file mode 100644 index 000000000..2edc53b5a --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c @@ -0,0 +1,462 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialFullConvolution.c" +#else + +static void THNN_(im2col)(const real* data_im, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + real* data_col) { + const int height_col = (height + 2 * pad_h - + (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_col = (width + 2 * pad_w - + (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + const int channels_col = channels * kernel_h * kernel_w; + for (int c_col = 0; c_col < channels_col; ++c_col) { + int w_offset = c_col % kernel_w; + int h_offset = (c_col / kernel_w) % kernel_h; + int c_im = c_col / kernel_h / kernel_w; + for (int h_col = 0; h_col < height_col; ++h_col) { + for (int w_col = 0; w_col < width_col; ++w_col) { + int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; + int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; + data_col[(c_col * height_col + h_col) * width_col + w_col] = + (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? + data_im[(c_im * height + h_im) * width + w_im] : 0; + } + } + } +} + +static void THNN_(col2im)(const real* data_col, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + real* data_im) { + memset(data_im, 0, sizeof(real) * height * width * channels); + const int height_col = (height + 2 * pad_h - + (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; + const int width_col = (width + 2 * pad_w - + (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; + const int channels_col = channels * kernel_h * kernel_w; + for (int c_col = 0; c_col < channels_col; ++c_col) { + int w_offset = c_col % kernel_w; + int h_offset = (c_col / kernel_w) % kernel_h; + int c_im = c_col / kernel_h / kernel_w; + for (int h_col = 0; h_col < height_col; ++h_col) { + for (int w_col = 0; w_col < width_col; ++w_col) { + int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; + int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; + if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) + data_im[(c_im * height + h_im) * width + w_im] += + data_col[(c_col * height_col + h_col) * width_col + w_col]; + } + } + } +} + +static inline void THNN_(SpatialFullConvolution_shapeCheck)( + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kH, int kW, int dH, int dW, int padH, int padW, int adjH, int adjW) { + + THArgCheck(kW > 0 && kH > 0, 9, + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + THArgCheck(dW > 0 && dH > 0, 11, + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + THArgCheck(adjW < dW && adjH < dH, 15, + "output adjustment must be smaller than stride, but got adjH: %d adjW: %d dH: %d dW: %d", + adjH, adjW, dH, dW); + THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 4, 5, weight, + "2D or 4D weight tensor expected, but got: %s"); + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]); + } + + int ndim = input->nDimension; + int dimf = 0; + int dimh = 1; + int dimw = 2; + + if (ndim == 4) { + dimf++; + dimh++; + dimw++; + } + + THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + + long nInputPlane = weight->size[0]; + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long nOutputPlane = weight->size[1]; + long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; + long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; + + if (outputWidth < 1 || outputHeight < 1) + THError("Given input size: (%d x %d x %d). " + "Calculated output size: (%d x %d x %d). Output size is too small", + nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth); + + THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } +} + +void THNN_(SpatialFullConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *columns, + THTensor *ones, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int adjW, int adjH) +{ + THNN_(SpatialFullConvolution_shapeCheck) + (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW, adjH, adjW); + + int nInputPlane = THTensor_(size)(weight,0); + int nOutputPlane = THTensor_(size)(weight,1); + + input = THTensor_(newContiguous)(input); + weight = THTensor_(newContiguous)(weight); + bias = bias ? THTensor_(newContiguous)(bias) : bias; + int batch = 1; + if (input->nDimension == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + } + + long inputHeight = input->size[2]; + long inputWidth = input->size[3]; + long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; + long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Resize output + THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth); + + // Resize temporary columns + THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth); + THTensor_(zero)(columns); + + // Define a buffer of ones, for bias accumulation + // Note: this buffer can be shared with other modules, it only ever gets increased, + // and always contains ones. + if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { + // Resize plane and fill with ones... + THTensor_(resize2d)(ones, outputHeight, outputWidth); + THTensor_(fill)(ones, 1); + } + + // Helpers + THTensor *input_n = THTensor_(new)(); + THTensor *output_n = THTensor_(new)(); + + int elt; + // For each elt in batch, do: + for (elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per output: + THTensor_(select)(input_n, input, 0, elt); + THTensor_(select)(output_n, output, 0, elt); + + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + long m = weight->size[1] * weight->size[2] * weight->size[3]; + long n = columns->size[1]; + long k = weight->size[0]; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 'n', 't', + n, m, k, + 1, + THTensor_(data)(input_n), n, + THTensor_(data)(weight), m, + 0, + THTensor_(data)(columns), n + ); + + // Unpack columns back into input: + THNN_(col2im)( + THTensor_(data)(columns), + nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, + 1, 1, + THTensor_(data)(output_n) + ); + + // Do Bias after: + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + long m_ = nOutputPlane; + long n_ = outputHeight * outputWidth; + long k_ = 1; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + if (bias) { + THBlas_(gemm)( + 't', 'n', + n_, m_, k_, + 1, + THTensor_(data)(ones), k_, + THTensor_(data)(bias), k_, + 1, + THTensor_(data)(output_n), n_ + ); + } + } + + // Free + THTensor_(free)(input_n); + THTensor_(free)(output_n); + + // Resize output + if (batch == 0) { + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); + THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(weight); + if (bias) THTensor_(free)(bias); +} + +void THNN_(SpatialFullConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *gradColumns, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int adjW, int adjH) +{ + THNN_(SpatialFullConvolution_shapeCheck) + (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, adjH, adjW); + + int nInputPlane = THTensor_(size)(weight,0); + int nOutputPlane = THTensor_(size)(weight,1); + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + weight = THTensor_(newContiguous)(weight); + int batch = 1; + if (input->nDimension == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); + } + + long inputWidth = input->size[3]; + long inputHeight = input->size[2]; + long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; + long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Resize output + THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth); + THTensor_(zero)(gradInput); + + // Resize temporary columns + THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth); + + // Helpers + THTensor *gradInput_n = THTensor_(new)(); + THTensor *gradOutput_n = THTensor_(new)(); + + int elt; + // For each elt in batch, do: + for (elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per sample: + THTensor_(select)(gradInput_n, gradInput, 0, elt); + THTensor_(select)(gradOutput_n, gradOutput, 0, elt); + + // Extract columns: + THNN_(im2col)( + THTensor_(data)(gradOutput_n), + nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, + 1, 1, + THTensor_(data)(gradColumns) + ); + + + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + long m = weight->size[0]; + long n = gradColumns->size[1]; + long k = weight->size[1] * weight->size[2] * weight->size[3]; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 'n', 'n', + n, m, k, + 1, + THTensor_(data)(gradColumns), n, + THTensor_(data)(weight), k, + 0, + THTensor_(data)(gradInput_n), n + ); + } + + + // Free + THTensor_(free)(gradInput_n); + THTensor_(free)(gradOutput_n); + + // Resize output + if (batch == 0) { + THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); + THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); + THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); +} + + +void THNN_(SpatialFullConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *columns, + THTensor *ones, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int adjW, int adjH, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THNN_(SpatialFullConvolution_shapeCheck) + (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, adjH, adjW); + + int nInputPlane = THTensor_(size)(gradWeight,0); + int nOutputPlane = THTensor_(size)(gradWeight,1); + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous"); + if (gradBias) + THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous"); + int batch = 1; + if (input->nDimension == 3) { + // Force batch + batch = 0; + THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); + THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); + } + + long inputWidth = input->size[3]; + long inputHeight = input->size[2]; + long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; + long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Define a buffer of ones, for bias accumulation + if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { + // Resize plane and fill with ones... + THTensor_(resize2d)(ones, outputHeight, outputWidth); + THTensor_(fill)(ones, 1); + } + + // Resize temporary columns + THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth); + + // Helpers + THTensor *input_n = THTensor_(new)(); + THTensor *gradOutput_n = THTensor_(new)(); + + int elt; + // For each elt in batch, do: + for (elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per output: + THTensor_(select)(input_n, input, 0, elt); + THTensor_(select)(gradOutput_n, gradOutput, 0, elt); + + // Extract columns: + THNN_(im2col)( + THTensor_(data)(gradOutput_n), + nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, + 1, 1, + THTensor_(data)(columns) + ); + + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + long n = columns->size[0]; // nOutputPlane * kh * kw + long m = input_n->size[0]; // nInputPlane + long k = columns->size[1]; // inputHeight * inputWidth + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 't', 'n', + n, m, k, + scale, + THTensor_(data)(columns), k, + THTensor_(data)(input_n), k, + 1, + THTensor_(data)(gradWeight), n + ); + + + // Do Bias: + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + long m_ = nOutputPlane; + long k_ = outputHeight * outputWidth; + + // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) + if (gradBias) { + THBlas_(gemv)( + 't', + k_, m_, + scale, + THTensor_(data)(gradOutput_n), k_, + THTensor_(data)(ones), 1, + 1, + THTensor_(data)(gradBias), 1 + ); + } + } + + // Free + THTensor_(free)(input_n); + THTensor_(free)(gradOutput_n); + + // Resize + if (batch == 0) { + THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); + THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolutionMap.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolutionMap.c new file mode 100644 index 000000000..6952fbe25 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolutionMap.c @@ -0,0 +1,222 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialFullConvolutionMap.c" +#else + +void THNN_(SpatialFullConvolutionMap_updateOutput)( + THNNState *state, THTensor *input, THTensor *output_, THTensor *weight, THTensor *bias, + THTensor *connTable, int nInputPlane, int nOutputPlane, + int dW, int dH) +{ + THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); + THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous"); + THArgCheck( + weight != NULL && weight->nDimension == 3 + && connTable != NULL && connTable->size[0] == weight->size[0], 4, + "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE + ); + + const int kH = (int)weight->size[1]; + const int kW = (int)weight->size[2]; + + THArgCheck(input != NULL && input->nDimension == 3, 2, "3D tensor expected"); + THArgCheck(input->size[0] >= nInputPlane, 2, "invalid number of input planes"); + + THTensor_(resize3d)( + output_, nOutputPlane, + (input->size[1] - 1) * dH + kH, + (input->size[2] - 1) * dW + kW + ); + + /* contiguous */ + input = THTensor_(newContiguous)(input); + THTensor* output = THTensor_(newContiguous)(output_); + + /* get raw pointers */ + real *input_data = THTensor_(data)(input); + real *output_data = THTensor_(data)(output); + real *weight_data = THTensor_(data)(weight); + real *bias_data = THTensor_(data)(bias); + real *connTable_data = THTensor_(data)(connTable); + + /* and dims */ + const long input_h = input->size[1]; + const long input_w = input->size[2]; + const long output_h = output->size[1]; + const long output_w = output->size[2]; + const long weight_h = weight->size[1]; + const long weight_w = weight->size[2]; + + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nOutputPlane; p++) + { + /* add bias */ + real *ptr_output = output_data + p*output_w*output_h; + long j; + int nweight; + long k; + + for (j = 0; j < output_h*output_w; j++) + ptr_output[j] = bias_data[p]; + + /* convolve all maps */ + nweight = connTable->size[0]; + for (k = 0; k < nweight; k++) + { + /* get offsets for input/output */ + int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE; + int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE; + + if (o == p) + { + THTensor_(fullConv2Dptr)( + output_data + o*output_w*output_h, + 1.0, + input_data + i*input_w*input_h, input_h, input_w, + weight_data + k*weight_w*weight_h, weight_h, weight_w, + dH, dW + ); + } + } + } + + /* clean up */ + THTensor_(free)(input); + THTensor_(freeCopyTo)(output, output_); +} + +void THNN_(SpatialFullConvolutionMap_updateGradInput)( + THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput_, THTensor *weight, THTensor *bias, + THTensor *connTable, int nInputPlane, int nOutputPlane, + int dW, int dH) +{ + THArgCheck( + weight != NULL && weight->nDimension == 3 + && connTable != NULL && connTable->size[0] == weight->size[0], 5, + "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE + ); + + /* contiguous */ + THTensor* gradInput = THTensor_(newContiguous)(gradInput_); + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* Resize/Zero */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* get raw pointers */ + real *gradInput_data = THTensor_(data)(gradInput); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *weight_data = THTensor_(data)(weight); + real *connTable_data = THTensor_(data)(connTable); + + /* and dims */ + const long input_h = input->size[1]; + const long input_w = input->size[2]; + const long output_h = gradOutput->size[1]; + const long output_w = gradOutput->size[2]; + const long kH = weight->size[1]; + const long kW = weight->size[2]; + + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nInputPlane; p++) + { + long k; + /* backward all */ + int nkernel = connTable->size[0]; + for (k = 0; k < nkernel; k++) + { + int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE; + int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE; + if (i == p) + { + /* gradient to input */ + THTensor_(validXCorr2Dptr)( + gradInput_data + i*input_w*input_h, + 1.0, + gradOutput_data + o*output_w*output_h, output_h, output_w, + weight_data + k*kW*kH, kH, kW, + dH, dW + ); + } + } + } + + /* clean up */ + THTensor_(freeCopyTo)(gradInput, gradInput_); + THTensor_(free)(gradOutput); +} + +void THNN_(SpatialFullConvolutionMap_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *connTable, + int nInputPlane, + int nOutputPlane, + int dW, int dH, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THArgCheck( + gradWeight != NULL && gradWeight->nDimension == 3 + && connTable != NULL && connTable->size[0] == gradWeight->size[0], 5, + "3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE + ); + + /* contiguous */ + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* get raw pointers */ + real *input_data = THTensor_(data)(input); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *gradWeight_data = THTensor_(data)(gradWeight); + real *gradBias_data = THTensor_(data)(gradBias); + + /* and dims */ + const long input_h = input->size[1]; + const long input_w = input->size[2]; + const long output_h = gradOutput->size[1]; + const long output_w = gradOutput->size[2]; + const long weight_h = gradWeight->size[1]; + const long weight_w = gradWeight->size[2]; + + /* gradients wrt bias */ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nOutputPlane; k++) + { + real *ptr_gradOutput = gradOutput_data + k*output_w*output_h; + long l; + for (l = 0; l < output_h*output_w; l++) + gradBias_data[k] += scale*ptr_gradOutput[l]; + } + + /* gradients wrt weight */ + int nkernel = connTable->size[0]; +#pragma omp parallel for private(k) + for (k = 0; k < nkernel; k++) + { + int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE; + int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE; + + /* gradient to kernel */ + THTensor_(validXCorr2DRevptr)( + gradWeight_data + k*weight_w*weight_h, + scale, + gradOutput_data + o*output_w*output_h, output_h, output_w, + input_data + i*input_w*input_h, input_h, input_w, + dH, dW + ); + } + + /* clean up */ + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxPooling.c new file mode 100644 index 000000000..88aaa40e1 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxPooling.c @@ -0,0 +1,44 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialMaxPooling.c" +#else + +void THNN_(SpatialMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + bool ceil_mode) +{ + THNN_(SpatialDilatedMaxPooling_updateOutput)( + state, input, output, indices, + kW, kH, dW, dH, padW, padH, 1, 1, ceil_mode + ); +} + +void THNN_(SpatialMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + bool ceil_mode) +{ + THNN_(SpatialDilatedMaxPooling_updateGradInput)( + state, input, gradOutput, gradInput, indices, + kW, kH, dW, dH, padW, padH, 1, 1, ceil_mode + ); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxUnpooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxUnpooling.c new file mode 100644 index 000000000..320538686 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxUnpooling.c @@ -0,0 +1,234 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialMaxUnpooling.c" +#else + +static void THNN_(SpatialMaxUnpooling_updateOutput_frame)(real *input_p, real *output_p, + THIndex_t *ind_p, + int nslices, + int iwidth, int iheight, + int owidth, int oheight) +{ + int k; + int has_error = 0; + THIndex_t error_index; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + real *output_p_k = output_p + k*owidth*oheight; + real *input_p_k = input_p + k*iwidth*iheight; + THIndex_t *ind_p_k = ind_p + k*iwidth*iheight; + + int i, j; + THIndex_t maxp; + for(i = 0; i < iheight; i++) + { + for(j = 0; j < iwidth; j++) + { + maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */ + if(maxp<0 || maxp>=owidth*oheight){ +#pragma omp critical + { + has_error = 1; + error_index = maxp; + } + } else { + output_p_k[maxp] = input_p_k[i*iwidth + j]; /* update output */ + } + } + } + } + if (has_error) { + THError("found an invalid max index %ld (output volumes are of size %dx%d)", + error_index, oheight, owidth); + } +} + +void THNN_(SpatialMaxUnpooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int owidth, int oheight) +{ + int dimw = 2; + int dimh = 1; + int nbatch = 1; + int nslices; + int iheight; + int iwidth; + real *input_data; + real *output_data; + THIndex_t *indices_data; + + + THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, + "3D or 4D (batch mode) tensor expected for input, but got: %s"); + THNN_CHECK_SHAPE_INDICES(input, indices); + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + + /* get contiguous input and indices */ + input = THTensor_(newContiguous)(input); + indices = THIndexTensor_(newContiguous)(indices); + + /* resize output */ + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nslices, oheight, owidth); + THTensor_(zero)(output); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data, + indices_data, + nslices, + iwidth, iheight, + owidth, oheight); + } + else + { + int p; + + THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); + THTensor_(zero)(output); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialMaxUnpooling_updateOutput_frame)( + input_data+p*nslices*iwidth*iheight, + output_data+p*nslices*owidth*oheight, + indices_data+p*nslices*iwidth*iheight, + nslices, + iwidth, iheight, + owidth, oheight); + } + } + + /* cleanup */ + THTensor_(free)(input); + THIndexTensor_(free)(indices); +} + +static void THNN_(SpatialMaxUnpooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p, + THIndex_t *ind_p, + int nslices, + int iwidth, int iheight, + int owidth, int oheight) +{ + int k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + real *gradInput_p_k = gradInput_p + k*iwidth*iheight; + real *gradOutput_p_k = gradOutput_p + k*owidth*oheight; + THIndex_t *ind_p_k = ind_p + k*iwidth*iheight; + + int i, j; + THIndex_t maxp; + for(i = 0; i < iheight; i++) + { + for(j = 0; j < iwidth; j++) + { + maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */ + if(maxp < 0 || maxp >= owidth * oheight) { + THError("invalid max index %ld, owidth= %d, oheight= %d", maxp, owidth, oheight); + } + gradInput_p_k[i*iwidth + j] = gradOutput_p_k[maxp]; /* update gradient */ + } + } + } +} + +void THNN_(SpatialMaxUnpooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int owidth, int oheight) +{ + int dimw = 2; + int dimh = 1; + int nbatch = 1; + int nslices; + int iheight; + int iwidth; + real *gradInput_data; + real *gradOutput_data; + THIndex_t *indices_data; + + THNN_CHECK_SHAPE_INDICES(input, indices); + + /* get contiguous gradOutput and indices */ + gradOutput = THTensor_(newContiguous)(gradOutput); + indices = THIndexTensor_(newContiguous)(indices); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 4) { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + + if(owidth!=gradOutput->size[dimw] || oheight!=gradOutput->size[dimh]){ + THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d", + oheight, owidth, gradOutput->size[dimh], gradOutput->size[dimw]); + } + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THIndexTensor_(data)(indices); + + /* backprop */ + if (input->nDimension == 3) + { + THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data, gradOutput_data, + indices_data, + nslices, + iwidth, iheight, + owidth, oheight); + } + else + { + int p; + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, + indices_data+p*nslices*iwidth*iheight, + nslices, + iwidth, iheight, + owidth, oheight); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); + THIndexTensor_(free)(indices); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c new file mode 100644 index 000000000..dcde660ea --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c @@ -0,0 +1,260 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialReflectionPadding.c" +#else + +static void THNN_(SpatialReflectionPadding_updateOutput_frame)( + real *input_p, real *output_p, + long nslices, + long iwidth, long iheight, + long owidth, long oheight, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int iStartX = fmax(0, -pad_l); + int iStartY = fmax(0, -pad_t); + int oStartX = fmax(0, pad_l); + int oStartY = fmax(0, pad_t); + + long k, ip_x, ip_y; +#pragma omp parallel for private(k, ip_x, ip_y) + + for (k = 0; k < nslices; k++) + { + long i, j; + for (i = 0; i < oheight; i++) { + for (j = 0; j < owidth; j++) { + if (j < pad_l) { + ip_x = pad_l * 2 - j; + } else if (j >= pad_l && j < iwidth + pad_l) { + ip_x = j; + } else { + ip_x = (iwidth + pad_l - 1) * 2 - j; + } + ip_x = ip_x - oStartX + iStartX; + + if (i < pad_t) { + ip_y = pad_t * 2 - i; + } else if (i >= pad_t && i < iheight + pad_t) { + ip_y = i; + } else { + ip_y = (iheight + pad_t - 1) * 2 - i; + } + ip_y = ip_y - oStartY + iStartY; + + real *dest_p = output_p + k*owidth*oheight + i * owidth + j; + real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x; + *dest_p = *src_p; + } + } + } +} + +void THNN_(SpatialReflectionPadding_updateOutput)(THNNState *state, + THTensor *input, + THTensor *output, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int dimw = 2; + int dimh = 1; + int dimslices = 0; + long nbatch = 1; + long nslices; + long iheight; + long iwidth; + long oheight; + long owidth; + real *input_data; + real *output_data; + + THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, + "3D or 4D (batch mode) tensor expected for input, but got: %s"); + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = iheight + pad_t + pad_b; + owidth = iwidth + pad_l + pad_r; + + THArgCheck(owidth >= 1 || oheight >= 1 , 2, + "input (H: %d, W: %d)is too small." + " Calculated output H: %d W: %d", + iheight, iwidth, oheight, owidth); + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + /* resize output */ + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + + THNN_(SpatialReflectionPadding_updateOutput_frame)(input_data, output_data, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + else + { + long p; + + THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialReflectionPadding_updateOutput_frame)( + input_data+p*nslices*iwidth*iheight, + output_data+p*nslices*owidth*oheight, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(SpatialReflectionPadding_updateGradInput_frame)( + real *ginput_p, real *goutput_p, + long nslices, + long iwidth, long iheight, + long owidth, long oheight, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int iStartX = fmax(0, -pad_l); + int iStartY = fmax(0, -pad_t); + int oStartX = fmax(0, pad_l); + int oStartY = fmax(0, pad_t); + + long k, ip_x, ip_y; +#pragma omp parallel for private(k, ip_x, ip_y) + + for (k = 0; k < nslices; k++) + { + long i, j; + for (i = 0; i < oheight; i++) { + for (j = 0; j < owidth; j++) { + if (j < pad_l) { + ip_x = pad_l * 2 - j; + } else if (j >= pad_l && j < iwidth + pad_l) { + ip_x = j; + } else { + ip_x = (iwidth + pad_l - 1) * 2 - j; + } + ip_x = ip_x - oStartX + iStartX; + + if (i < pad_t) { + ip_y = pad_t * 2 - i; + } else if (i >= pad_t && i < iheight + pad_t) { + ip_y = i; + } else { + ip_y = (iheight + pad_t - 1) * 2 - i; + } + ip_y = ip_y - oStartY + iStartY; + + real *src_p = goutput_p + k*owidth*oheight + i * owidth + j; + real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x; + *dest_p += *src_p; + } + } + } +} + +void THNN_(SpatialReflectionPadding_updateGradInput)(THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int dimw = 2; + int dimh = 1; + int dimslices = 0; + long nbatch = 1; + long nslices; + long iheight; + long iwidth; + long oheight; + long owidth; + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = iheight + pad_t + pad_b; + owidth = iwidth + pad_l + pad_r; + + THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3, + "gradOutput width unexpected. Expected: %d, Got: %d", + owidth, THTensor_(size)(gradOutput, dimw)); + THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3, + "gradOutput height unexpected. Expected: %d, Got: %d", + oheight, THTensor_(size)(gradOutput, dimh)); + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* backprop */ + if (input->nDimension == 3) { + THNN_(SpatialReflectionPadding_updateGradInput_frame)( + THTensor_(data)(gradInput), + THTensor_(data)(gradOutput), + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } else { + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) { + THNN_(SpatialReflectionPadding_updateGradInput_frame)( + THTensor_(data)(gradInput) + p * nslices * iheight * iwidth, + THTensor_(data)(gradOutput) + p * nslices * oheight * owidth, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialReplicationPadding.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReplicationPadding.c new file mode 100644 index 000000000..4e318aa70 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReplicationPadding.c @@ -0,0 +1,260 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialReplicationPadding.c" +#else + +static void THNN_(SpatialReplicationPadding_updateOutput_frame)( + real *input_p, real *output_p, + long nslices, + long iwidth, long iheight, + long owidth, long oheight, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int iStartX = fmax(0, -pad_l); + int iStartY = fmax(0, -pad_t); + int oStartX = fmax(0, pad_l); + int oStartY = fmax(0, pad_t); + + long k, ip_x, ip_y; +#pragma omp parallel for private(k, ip_x, ip_y) + for (k = 0; k < nslices; k++) + { + long i, j; + for (i = 0; i < oheight; i++) { + for (j = 0; j < owidth; j++) { + if (j < pad_l) { + ip_x = pad_l; + } else if (j >= pad_l && j < iwidth + pad_l) { + ip_x = j; + } else { + ip_x = iwidth + pad_l - 1; + } + ip_x = ip_x - oStartX + iStartX; + + if (i < pad_t) { + ip_y = pad_t; + } else if (i >= pad_t && i < iheight + pad_t) { + ip_y = i; + } else { + ip_y = iheight + pad_t - 1; + } + ip_y = ip_y - oStartY + iStartY; + + real *dest_p = output_p + k*owidth*oheight + i * owidth + j; + real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x; + *dest_p = *src_p; + } + } + } +} + +void THNN_(SpatialReplicationPadding_updateOutput)(THNNState *state, + THTensor *input, + THTensor *output, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int dimw = 2; + int dimh = 1; + int dimslices = 0; + long nbatch = 1; + long nslices; + long iheight; + long iwidth; + long oheight; + long owidth; + real *input_data; + real *output_data; + + THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, + "3D or 4D (batch mode) tensor expected for input, but got: %s"); + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = iheight + pad_t + pad_b; + owidth = iwidth + pad_l + pad_r; + + THArgCheck(owidth >= 1 || oheight >= 1 , 2, + "input (H: %d, W: %d)is too small." + " Calculated output H: %d W: %d", + iheight, iwidth, oheight, owidth); + + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + /* resize output */ + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + + THNN_(SpatialReplicationPadding_updateOutput_frame)(input_data, output_data, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + else + { + long p; + + THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(SpatialReplicationPadding_updateOutput_frame)( + input_data+p*nslices*iwidth*iheight, + output_data+p*nslices*owidth*oheight, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(SpatialReplicationPadding_updateGradInput_frame)( + real *ginput_p, real *goutput_p, + long nslices, + long iwidth, long iheight, + long owidth, long oheight, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int iStartX = fmax(0, -pad_l); + int iStartY = fmax(0, -pad_t); + int oStartX = fmax(0, pad_l); + int oStartY = fmax(0, pad_t); + + long k, ip_x, ip_y; +#pragma omp parallel for private(k, ip_x, ip_y) + for (k = 0; k < nslices; k++) + { + long i, j; + for (i = 0; i < oheight; i++) { + for (j = 0; j < owidth; j++) { + if (j < pad_l) { + ip_x = pad_l; + } else if (j >= pad_l && j < iwidth + pad_l) { + ip_x = j; + } else { + ip_x = iwidth + pad_l - 1; + } + ip_x = ip_x - oStartX + iStartX; + + if (i < pad_t) { + ip_y = pad_t; + } else if (i >= pad_t && i < iheight + pad_t) { + ip_y = i; + } else { + ip_y = iheight + pad_t - 1; + } + ip_y = ip_y - oStartY + iStartY; + + real *src_p = goutput_p + k*owidth*oheight + i * owidth + j; + real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x; + *dest_p += *src_p; + } + } + } +} + +void THNN_(SpatialReplicationPadding_updateGradInput)(THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int pad_l, int pad_r, + int pad_t, int pad_b) +{ + int dimw = 2; + int dimh = 1; + int dimslices = 0; + long nbatch = 1; + long nslices; + long iheight; + long iwidth; + long oheight; + long owidth; + + if (input->nDimension == 4) + { + nbatch = input->size[0]; + dimw++; + dimh++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = iheight + pad_t + pad_b; + owidth = iwidth + pad_l + pad_r; + + THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3, + "gradOutput width unexpected. Expected: %d, Got: %d", + owidth, THTensor_(size)(gradOutput, dimw)); + THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3, + "gradOutput height unexpected. Expected: %d, Got: %d", + oheight, THTensor_(size)(gradOutput, dimh)); + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* backprop */ + if (input->nDimension == 3) { + THNN_(SpatialReplicationPadding_updateGradInput_frame)( + THTensor_(data)(gradInput), + THTensor_(data)(gradOutput), + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } else { + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) { + THNN_(SpatialReplicationPadding_updateGradInput_frame)( + THTensor_(data)(gradInput) + p * nslices * iheight * iwidth, + THTensor_(data)(gradOutput) + p * nslices * oheight * owidth, + nslices, + iwidth, iheight, + owidth, oheight, + pad_l, pad_r, + pad_t, pad_b); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialSubSampling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialSubSampling.c new file mode 100644 index 000000000..4c077bc64 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialSubSampling.c @@ -0,0 +1,302 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialSubSampling.c" +#else + +static inline void THNN_(SpatialSubSampling_shapeCheck)( + THTensor *input, + THTensor *gradOutput, + THTensor *weight, + int kW, int kH) { + int ndims = input->nDimension; + THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); + + int nInputPlane = THTensor_(size)(weight, 0); + + int dimw = 2; + int dimh = 1; + + long inputWidth; + long inputHeight; + + if (input->nDimension == 4) { + dimw++; + dimh++; + } + + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + + THArgCheck(input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes"); + THArgCheck(inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size"); +} + +void THNN_(SpatialSubSampling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + int kW, int kH, + int dW, int dH) +{ + THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous"); + + real *weight_data = THTensor_(data)(weight); + real *bias_data = THTensor_(data)(bias); + real *output_data; + real *input_data; + + int dimw = 2; + int dimh = 1; + long nbatch = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + int nInputPlane = THTensor_(size)(weight,0); + + long k; + + THNN_(SpatialSubSampling_shapeCheck)(input, NULL, weight, kW, kH); + + if (input->nDimension == 4) { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; + + if (input->nDimension == 3) + THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); + else + THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth); + + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(k) + for(k = 0; k < nInputPlane; k++) + { + long p; + for(p = 0; p < nbatch; p++) + { + long xx, yy; + /* For all output pixels... */ + real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight; + /* Get the good mask for (k,i) (k out, i in) */ + real the_weight = weight_data[k]; + /* Initialize to the bias */ + real z = bias_data[k]; + long i; + for(i = 0; i < outputWidth*outputHeight; i++) + ptr_output[i] = z; + + for(yy = 0; yy < outputHeight; yy++) + { + for(xx = 0; xx < outputWidth; xx++) + { + /* Compute the mean of the input image... */ + real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW; + real sum = 0; + long kx, ky; + + for(ky = 0; ky < kH; ky++) + { + for(kx = 0; kx < kW; kx++) + sum += ptr_input[kx]; + ptr_input += inputWidth; /* next input line */ + } + /* Update output */ + *ptr_output++ += the_weight*sum; + } + } + } + } + THTensor_(free)(input); +} + +void THNN_(SpatialSubSampling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + int kW, int kH, + int dW, int dH) +{ + THNN_(SpatialSubSampling_shapeCheck)(input, gradOutput, weight, kW, kH); + + int dimw = 2; + int dimh = 1; + long nbatch = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + int nInputPlane = THTensor_(size)(weight,0); + + real *weight_data; + real *gradOutput_data; + real *input_data, *gradInput_data; + + long k; + + if (input->nDimension == 4) { + nbatch = input->size[0]; + dimw++; + dimh++; + } + + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; + + weight_data = THTensor_(data)(weight); + gradOutput = THTensor_(newContiguous)(gradOutput); + gradOutput_data = THTensor_(data)(gradOutput); + + input_data = THTensor_(data)(input); + + THTensor_(resizeAs)(gradInput, input); + gradInput_data = THTensor_(data)(gradInput); + +#pragma omp parallel for private(k) + for(k = 0; k < nInputPlane; k++) + { + long p; + for(p = 0; p < nbatch; p++) + { + real the_weight = weight_data[k]; + real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; + long xx, yy; + + real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight; + long i; + for(i=0; i<inputWidth*inputHeight; i++) + ptr_gi[i] = 0.0; + + for(yy = 0; yy < outputHeight; yy++) + { + for(xx = 0; xx < outputWidth; xx++) + { + real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW; + real z = *ptr_gradOutput++ * the_weight; + long kx, ky; + + for(ky = 0; ky < kH; ky++) + { + for(kx = 0; kx < kW; kx++) + ptr_gradInput[kx] += z; + ptr_gradInput += inputWidth; + } + } + } + } + } + THTensor_(free)(gradOutput); +} + +void THNN_(SpatialSubSampling_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + int kW, int kH, + int dW, int dH, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THNN_(SpatialSubSampling_shapeCheck)(input, gradOutput, gradWeight, kW, kH); + + long nbatch = 1; + long dimw = 2; + long dimh = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + int nInputPlane = THTensor_(size)(gradWeight,0); + + real *gradWeight_data; + real *gradBias_data; + real *gradOutput_data; + real *input_data; + + long k; + + if (input->nDimension == 4) { + dimw++; + dimh++; + nbatch = input->size[0]; + } + + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; + + gradWeight_data = THTensor_(data)(gradWeight); + gradBias_data = THTensor_(data)(gradBias); + gradOutput = THTensor_(newContiguous)(gradOutput); + gradOutput_data = THTensor_(data)(gradOutput); + + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + +#pragma omp parallel for private(k) + for(k = 0; k < nInputPlane; k++) + { + long p; + for(p = 0; p < nbatch; p++) + { + real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; + real sum; + long xx, yy; + long i; + + sum = 0; + for(i = 0; i < outputWidth*outputHeight; i++) + sum += ptr_gradOutput[i]; + gradBias_data[k] += scale*sum; + + sum = 0; + for(yy = 0; yy < outputHeight; yy++) + { + for(xx = 0; xx < outputWidth; xx++) + { + real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW; + real z = *ptr_gradOutput++; + long kx, ky; + + for(ky = 0; ky < kH; ky++) + { + for(kx = 0; kx < kW; kx++) + sum += z * ptr_input[kx]; + ptr_input += inputWidth; + } + } + } + gradWeight_data[k] += scale*sum; + } + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingBilinear.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingBilinear.c new file mode 100644 index 000000000..8bc487ead --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingBilinear.c @@ -0,0 +1,174 @@ +// Adapted from interp.cpp from Caffe util by Pauline Luc +// Originally developed by George Papandreou + +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialUpSamplingBilinear.c" +#else + +static inline void THNN_(SpatialUpSamplingBilinear_shapeCheck) + (THTensor *input, THTensor *gradOutput, + int nBatch, int nChannels, + int inputHeight, int inputWidth, + int outputHeight, int outputWidth) { + THArgCheck(inputHeight > 0 && inputWidth > 0 + && outputHeight > 0 && outputWidth > 0, 2, + "input and output sizes should be greater than 0," + " but got input (H: %d, W: %d) output (H: %d, W: %d)", + inputHeight, inputWidth, outputHeight, outputWidth); + if (input != NULL) { + THNN_ARGCHECK(input->nDimension == 4, 2, input, + "4D input tensor expected but got: %s"); + } + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth); + } +} + +void THNN_(SpatialUpSamplingBilinear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int outputHeight, + int outputWidth){ + + int nbatch = THTensor_(size)(input, 0); + int channels = THTensor_(size)(input, 1); + int inputHeight = THTensor_(size)(input, 2); + int inputWidth = THTensor_(size)(input, 3); + + THNN_(SpatialUpSamplingBilinear_shapeCheck) + (input, NULL, + nbatch, channels, + inputHeight, inputWidth, + outputHeight, outputWidth); + + input = THTensor_(newContiguous)(input); + THTensor_(resize4d)(output, + THTensor_(size)(input, 0), + THTensor_(size)(input, 1), + outputHeight, outputWidth); + THTensor_(zero)(output); + real *idata = THTensor_(data)(input); + real *odata = THTensor_(data)(output); + channels = nbatch * channels; + THAssert(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0); + // special case: just copy + if (inputHeight == outputHeight && inputWidth == outputWidth) { + for (int h2 = 0; h2 < outputHeight; ++h2) { + const int h1 = h2; + for (int w2 = 0; w2 < outputWidth; ++w2) { + const int w1 = w2; + const real* pos1 = &idata[h1 * inputWidth + w1]; + real* pos2 = &odata[h2 * outputWidth + w2]; + for (int c = 0; c < channels; ++c) { + pos2[0] = pos1[0]; + pos1 += inputWidth * inputHeight; + pos2 += outputWidth * outputHeight; + } + } + } + return; + } + const float rheight =(outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f; + const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1) / (outputWidth - 1) : 0.f; + for (int h2 = 0; h2 < outputHeight; ++h2) { + const float h1r = rheight * h2; + const int h1 = h1r; + const int h1p = (h1 < inputHeight - 1) ? 1 : 0; + const real h1lambda = h1r - h1; + const real h0lambda = (real)1. - h1lambda; + for (int w2 = 0; w2 < outputWidth; ++w2) { + const float w1r = rwidth * w2; + const int w1 = w1r; + const int w1p = (w1 < inputWidth - 1) ? 1 : 0; + const real w1lambda = w1r - w1; + const real w0lambda = (real)1. - w1lambda; + const real* pos1 = &idata[h1 * inputWidth + w1]; + real* pos2 = &odata[h2 * outputWidth + w2]; + for (int c = 0; c < channels; ++c) { + pos2[0] = h0lambda * (w0lambda * pos1[0]+ w1lambda * pos1[w1p]) + + h1lambda * (w0lambda * pos1[h1p * inputWidth] + + w1lambda * pos1[h1p * inputWidth + w1p]); + pos1 += inputWidth * inputHeight; + pos2 += outputWidth * outputHeight; + } + } + } + THTensor_(free)(input); +} + +void THNN_(SpatialUpSamplingBilinear_updateGradInput)( + THNNState *state, + THTensor *gradOutput, + THTensor *gradInput, + int nbatch, + int channels, + int inputHeight, + int inputWidth, + int outputHeight, + int outputWidth){ + + THNN_(SpatialUpSamplingBilinear_shapeCheck) + (NULL, gradOutput, + nbatch, channels, + inputHeight, inputWidth, + outputHeight, outputWidth); + + THTensor_(resize4d)(gradInput, nbatch, channels, inputHeight, inputWidth); + THTensor_(zero)(gradInput); + gradOutput = THTensor_(newContiguous)(gradOutput); + real *data1 = THTensor_(data)(gradInput); + real *data2 = THTensor_(data)(gradOutput); + channels = nbatch * channels; + + // special case: same-size matching grids + if (inputHeight == outputHeight && inputWidth == outputWidth) { + for (int h2 = 0; h2 < outputHeight; ++h2) { + const int h1 = h2; + for (int w2 = 0; w2 < outputWidth; ++w2) { + const int w1 = w2; + real* pos1 = &data1[h1 * inputWidth + w1]; + const real* pos2 = &data2[h2 * outputWidth + w2]; + for (int c = 0; c < channels; ++c) { + pos1[0] += pos2[0]; + pos1 += inputWidth * inputHeight; + pos2 += outputWidth * outputHeight; + } + } + } + return; + } + const float rheight =(outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f; + const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1)/(outputWidth - 1) : 0.f; + for (int h2 = 0; h2 < outputHeight; ++h2) { + const float h1r = rheight * h2; + const int h1 = h1r; + const int h1p = (h1 < inputHeight - 1) ? 1 : 0; + const real h1lambda = h1r - h1; + const real h0lambda = (real)1. - h1lambda; + for (int w2 = 0; w2 < outputWidth; ++w2) { + const float w1r = rwidth * w2; + const int w1 = w1r; + const int w1p = (w1 < inputWidth - 1) ? 1 : 0; + const real w1lambda = w1r - w1; + const real w0lambda = (real)1. - w1lambda; + real* pos1 = &data1[h1 * inputWidth + w1]; + const real* pos2 = &data2[h2 * outputWidth + w2]; + for (int c = 0; c < channels; ++c) { + pos1[0] += h0lambda * w0lambda * pos2[0]; + pos1[w1p] += h0lambda * w1lambda * pos2[0]; + pos1[h1p * inputWidth] += h1lambda * w0lambda * pos2[0]; + pos1[h1p * inputWidth + w1p] += h1lambda * w1lambda * pos2[0]; + pos1 += inputWidth * inputHeight; + pos2 += outputWidth * outputHeight; + } + } + } + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingNearest.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingNearest.c new file mode 100644 index 000000000..b4699ff3e --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingNearest.c @@ -0,0 +1,199 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialUpSamplingNearest.c" +#else + + +static inline void THNN_(SpatialUpSamplingNearest_shapeCheck) + (THTensor *input, THTensor *gradOutput, + int scale_factor) { + THArgCheck(input != NULL, 2, "4D input tensor expected but got NULL"); + THArgCheck(scale_factor > 1, 4, + "scale_factor must be greater than 1, but got: %d", scale_factor); + THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input, + "3D or 4D input tensor expected but got: %s"); + if (input->nDimension == 3) { + int nChannels = THTensor_(size)(input, 0); + int inputHeight = THTensor_(size)(input, 1); + int inputWidth = THTensor_(size)(input, 2); + int outputHeight = inputHeight * scale_factor; + int outputWidth = inputWidth * scale_factor; + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, 3, 0, nChannels); + THNN_CHECK_DIM_SIZE(gradOutput, 3, 1, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, 3, 2, outputWidth); + } + } else { + int nBatch = THTensor_(size)(input, 0); + int nChannels = THTensor_(size)(input, 1); + int inputHeight = THTensor_(size)(input, 2); + int inputWidth = THTensor_(size)(input, 3); + int outputHeight = inputHeight * scale_factor; + int outputWidth = inputWidth * scale_factor; + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth); + } + } +} + +void THNN_(SpatialUpSamplingNearest_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int scale_factor) +{ + THNN_(SpatialUpSamplingNearest_shapeCheck)(input, NULL, scale_factor); + int inputHeight = THTensor_(size)(input, input->nDimension-2); + int inputWidth = THTensor_(size)(input, input->nDimension-1); + int outputHeight = inputHeight * scale_factor; + int outputWidth = inputWidth * scale_factor; + + if (input->nDimension == 3) { + THTensor_(resize3d)(output, + THTensor_(size)(input, 0), + outputHeight, outputWidth); + } else { + THTensor_(resize4d)(output, + THTensor_(size)(input, 0), + THTensor_(size)(input, 1), + outputHeight, outputWidth); + } + + int dW = scale_factor; + int dH = scale_factor; + int xDim = input->nDimension-2; + int yDim = input->nDimension-1; + + // dims + int idim = input->nDimension; + int osz0 = output->size[0]; + int osz1 = output->size[1]; + int osz2 = output->size[2]; + int osz3 = 1; + if (idim > 3) { + osz3 = output->size[3]; + } + + // get strides + long *is = input->stride; + long *os = output->stride; + + // get raw pointers + real *pin = THTensor_(data)(input); + real *pout = THTensor_(data)(output); + + // perform the upsampling + int i0, i1, i2, i3, isrc, idst; + int iout[4]; // Output indices + int iin[4]; // Input indices + + for (i0 = 0; i0 < osz0; i0++) { + iout[0] = i0; + iin[0] = i0; + for (i1 = 0; i1 < osz1; i1++) { + iout[1] = i1; + iin[1] = i1; + for (i2 = 0; i2 < osz2; i2++) { + iout[2] = i2; + iin[2] = i2; + for (i3 = 0; i3 < osz3; i3++) { + iout[3] = i3; + iin[3] = i3; + + // set the indices for the upsampled dimensions + iin[xDim] = iout[xDim] / dW; + iin[yDim] = iout[yDim] / dH; + + idst = i0*os[0] + i1*os[1] + i2*os[2]; + isrc = iin[0]*is[0] + iin[1]*is[1] + iin[2]*is[2]; + if (idim > 3) { + idst += i3*os[3]; + isrc += iin[3]*is[3]; + } + + pout[idst] = pin[isrc]; + } + } + } + } +} + +void THNN_(SpatialUpSamplingNearest_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int scale_factor) +{ + THNN_(SpatialUpSamplingNearest_shapeCheck)(input, gradOutput, scale_factor); + THTensor_(resizeAs)(gradInput, input); + + int dW = scale_factor; + int dH = scale_factor; + int xDim = gradInput->nDimension-2; + int yDim = gradInput->nDimension-1; + + // dims + int idim = gradInput->nDimension; // Guaranteed to be between 3 and 5 + int isz0 = gradInput->size[0]; + int isz1 = gradInput->size[1]; + int isz2 = gradInput->size[2]; + int isz3 = 1; + if (idim > 3) { + isz3 = gradInput->size[3]; + } + + // get strides + long *is = gradInput->stride; + long *os = gradOutput->stride; + + // get raw pointers + real *pin = THTensor_(data)(gradInput); + real *pout = THTensor_(data)(gradOutput); + + // perform the upsampling + int i0, i1, i2, i3, isrc, idst, x, y; + int iin[4]; // Input indices + int iout[4]; // Output indices + + THTensor_(zero)(gradInput); + + for (i0 = 0; i0 < isz0; i0++) { + iin[0] = i0; + iout[0] = i0; + for (i1 = 0; i1 < isz1; i1++) { + iin[1] = i1; + iout[1] = i1; + for (i2 = 0; i2 < isz2; i2++) { + iin[2] = i2; + iout[2] = i2; + for (i3 = 0; i3 < isz3; i3++) { + iin[3] = i3; + iout[3] = i3; + + idst = i0*is[0] + i1*is[1] + i2*is[2]; + if (idim > 3) { + idst += i3*is[3]; + } + + // Now accumulate the gradients from gradOutput + for (y = 0; y < dH; y++) { + for (x = 0; x < dW; x++) { + iout[xDim] = dW * iin[xDim] + x; + iout[yDim] = dH * iin[yDim] + y; + isrc = iout[0]*os[0] + iout[1]*os[1] + iout[2]*os[2]; + if (idim > 3) { + isrc += iout[3]*os[3]; + } + pin[idst] += pout[isrc]; + } + } + } + } + } + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Sqrt.c b/contrib/lua-torch/nn/lib/THNN/generic/Sqrt.c new file mode 100644 index 000000000..174884e34 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/Sqrt.c @@ -0,0 +1,52 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Sqrt.c" +#else + +void THNN_(Sqrt_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal eps_) +{ + real eps = TH_CONVERT_ACCREAL_TO_REAL(eps_); + THTensor_(resizeAs)(output, input); + THTensor_(sqrt)(output, input); +} + +void THNN_(Sqrt_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output) +{ + THNN_CHECK_SHAPE(output, gradOutput); + THTensor_(resizeAs)(gradInput, input); + + if (output->nDimension == 1 || + !THTensor_(isContiguous)(output) || + !THTensor_(isContiguous)(gradOutput) || + !THTensor_(isContiguous)(gradInput)) + { + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, + *gradInput_data = (*output_data == 0.0) ? 0.0 : (0.5 * (*gradOutput_data / *output_data)); + ); + } + else + { + real *gradOutput_data = THTensor_(data)(gradOutput); + real *gradInput_data = THTensor_(data)(gradInput); + real *output_data = THTensor_(data)(output); + long i; +#pragma omp parallel for private(i) + for(i = 0; i < THTensor_(nElement)(output); i++) + { + if (output_data[i] == 0.0) + gradInput_data[i] = 0.0; + else + gradInput_data[i] = 0.5 * (gradOutput_data[i] / output_data[i]); + } + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Square.c b/contrib/lua-torch/nn/lib/THNN/generic/Square.c new file mode 100644 index 000000000..aad0a911c --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/Square.c @@ -0,0 +1,59 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Square.c" +#else + +void THNN_(Square_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output) +{ + THTensor_(resizeAs)(output, input); + + if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output)) + { + TH_TENSOR_APPLY2(real, output, real, input, + *output_data = (*input_data) * (*input_data); + ); + } + else + { + real *output_data = THTensor_(data)(output); + real *input_data = THTensor_(data)(input); + long i; +#pragma omp parallel for private(i) + for (i = 0; i < THTensor_(nElement)(input); i++) + output_data[i] = input_data[i]*input_data[i]; + } +} + +void THNN_(Square_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput) +{ + THNN_CHECK_SHAPE(input, gradOutput); + THTensor_(resizeAs)(gradInput, input); + + if (input->nDimension == 1 || + !THTensor_(isContiguous)(input) || + !THTensor_(isContiguous)(gradOutput) || + !THTensor_(isContiguous)(gradInput)) + { + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + *gradInput_data = 2.0 * (*gradOutput_data) * (*input_data); + ); + } + else + { + real *gradOutput_data = THTensor_(data)(gradOutput); + real *gradInput_data = THTensor_(data)(gradInput); + real *input_data = THTensor_(data)(input); + long i; +#pragma omp parallel for private(i) + for (i = 0; i < THTensor_(nElement)(gradInput); i++) + gradInput_data[i] = 2.0 * gradOutput_data[i] * input_data[i]; + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/THNN.h b/contrib/lua-torch/nn/lib/THNN/generic/THNN.h new file mode 100644 index 000000000..76a28eb2d --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/THNN.h @@ -0,0 +1,1501 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/THNN.h" +#else + +TH_API void THNN_(Abs_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *output); // [OUT] Abs output +TH_API void THNN_(Abs_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. output + THTensor *gradInput); // [OUT] gradient w.r.t. input + +TH_API void THNN_(AbsCriterion_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *target, // tensor with target values + THTensor *output, // [OUT] a one-element tensor with loss + bool sizeAverage); // if true, the loss will be divided by batch size +TH_API void THNN_(AbsCriterion_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *target, // tensor with target values + THTensor *gradInput, // [OUT] gradient w.r.t. input + bool sizeAverage); // if true, the gradient will be normalized by batch size + +TH_API void THNN_(BCECriterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage, + THTensor *weights); // [OPTIONAL] +TH_API void THNN_(BCECriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage, + THTensor *weights); // [OPTIONAL] + +TH_API void THNN_(ClassNLLCriterion_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor (1D/2D) + THIndexTensor *target, // tensor containing indexes of target classes + THTensor *output, // [OUT] a one-element tensor with loss + bool sizeAverage, // if true, the loss will be normalized by batch size and class weights + THTensor *weights, // [OPTIONAL] class weights + THTensor *total_weight, // [BUFFER] + long ignore_index); // target index to ignore (loss = 0, gradInput = 0) +TH_API void THNN_(ClassNLLCriterion_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor (1D/2D) + THIndexTensor *target, // tensor containing indexes of target classes + THTensor *gradInput, // [OUT] gradient w.r.t. input + bool sizeAverage, // if true, the loss will be normalized by batch size and class weights + THTensor *weights, // [OPTIONAL] class weights + THTensor *total_weight, // [BUFFER] + long ignore_index); // target index to ignore (loss = 0, gradInput = 0) + +TH_API void THNN_(SpatialClassNLLCriterion_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor (4D) + THIndexTensor *target, // tensor containing indexes of target classes (3D) + THTensor *output, // [OUT] a one-element tensor with loss + bool sizeAverage, // if true, the loss will be normalized by batch size and class weights + THTensor *weights, // [OPTIONAL] class weights + THTensor *total_weight); // [BUFFER] +TH_API void THNN_(SpatialClassNLLCriterion_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor (4D) + THIndexTensor *target, // tensor containing indexes of target classes (3D) + THTensor *gradInput, // [OUT] gradient w.r.t. input + bool sizeAverage, // if true, the loss will be normalized by batch size and class weights + THTensor *weights, // [OPTIONAL] class weights + THTensor *total_weight); // [BUFFER] + +TH_API void THNN_(ELU_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *output, // [OUT] ELU output + accreal alpha, // an ELU parameter (as in paper) + bool inplace); // if true, modifies gradOutput and sets gradInput onto it (no additional memory is allocated) +TH_API void THNN_(ELU_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. output + THTensor *gradInput, // [OUT] gradient w.r.t. input + THTensor *output, // output from a forward pass + accreal alpha, // an ELU parameter (as in paper) + bool inplace); // if true, modifies gradOutput and sets gradInput onto it (no additional memory is allocated) + +TH_API void THNN_(DistKLDivCriterion_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *target, // target tensor + THTensor *output, // [OUT] a one-element tensor containing the loss + bool sizeAverage); // if true, the loss will be normalized **by total number of elements** +TH_API void THNN_(DistKLDivCriterion_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *target, // target tensor + THTensor *gradInput, // [OUT] gradient w.r.t. input + bool sizeAverage); // if true, the loss will be normalized **by total number of elements** + +TH_API void THNN_(GatedLinear_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *output, // [OUT] output tensor, half size of input along dimension dim + int dim); // dimension for halving operation +TH_API void THNN_(GatedLinear_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t module's output + THTensor *gradInput, // [OUT] gradient w.r.t input + int dim); // dimension for halving operation + +// HardShink outputs 0 on interval of (-lambda; lambda) or original value otherwise. +TH_API void THNN_(HardShrink_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *output, // [OUT] output tensor + accreal lambda); // HardShrink parameter +TH_API void THNN_(HardShrink_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. module's output + THTensor *gradInput, // [OUT] gradient w.r.t. input + accreal lambda); // HardShrink parameter + +// HardTanh clamps the values to the interval [min_val; max_val]. +TH_API void THNN_(HardTanh_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *output, // [OUT] output tensor + accreal min_val, // lower threshold + accreal max_val, // upper threshold + bool inplace); +TH_API void THNN_(HardTanh_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. module's output + THTensor *gradInput, // [OUT] gradient w.r.t. the input + accreal min_val, // lower threshold + accreal max_val, // upper threshold + bool inplace); + +TH_API void THNN_(L1Cost_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *output); // [OUT] output tensor +TH_API void THNN_(L1Cost_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *gradOutput, // [OPTIONAL] gradient w.r.t module's output + THTensor *gradInput); // [OUT] gradient w.r.t the input + +TH_API void THNN_(LeakyReLU_updateOutput)( + THNNState *state, // library's state + THTensor *input, // [MODIFIED] input tensor + THTensor *output, // [OUT] output tensor + accreal negval, // negative part slope + bool inplace); // if true, modifies the input tensor and sets the output tensor on it (no additional memory is allocated) +TH_API void THNN_(LeakyReLU_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *gradOutput, // [MODIFIED] gradient w.r.t. module's output + THTensor *gradInput, // [OUT] gradient w.r.t. the input + accreal negval, // negative part slope + bool inplace); // if true, modifies gradOutput and sets gradInput onto it (no additional memory is allocated) + +TH_API void THNN_(GRUFused_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *hidden, + THTensor *bias1, // [OPTIONAL] + THTensor *bias2, // [OPTIONAL] + THTensor *hx, + THTensor *output, + THTensor *storage); +TH_API void THNN_(GRUFused_updateGradInput)( + THNNState *state, + THTensor *gradInInput, + THTensor *gradInHidden, + THTensor *gradOutput, + THTensor *gradInputHx, + THTensor *storage); + +TH_API void THNN_(LSTMFused_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *hidden, + THTensor *bias1, // [OPTIONAL] + THTensor *bias2, // [OPTIONAL] + THTensor *cell, + THTensor *output, + THTensor *outputCell); +TH_API void THNN_(LSTMFused_updateGradInput)( + THNNState *state, + THTensor *storage, + THTensor *gradInGates, + THTensor *cx, + THTensor *cy, + THTensor *gradOutput, + THTensor *gradOutputCell, + THTensor *gradInputCx); + +TH_API void THNN_(LogSigmoid_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *output, // output tensor + THTensor *buffer); // [BUFFER] +TH_API void THNN_(LogSigmoid_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input + THTensor *gradOutput, // gradient w.r.t. module's output + THTensor *gradInput, // [OUT] gradient w.r.t. input + THTensor *buffer); // [BUFFER] + +TH_API void THNN_(LogSoftMax_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *output); // [OUT] output tensor +TH_API void THNN_(LogSoftMax_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. module's output + THTensor *gradInput, // [OUT] gradient w.r.t. input + THTensor *output); // module's output + +TH_API void THNN_(LookupTable_accGradParameters)( + THNNState *state, + THIndexTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THIntegerTensor *count, + THTensor *sorted, // [OPTIONAL] + THIndexTensor *indices, // [OPTIONAL] + bool scaleGradByFreq, + int paddingValue, + accreal scale); + +TH_API void THNN_(LookupTable_renorm)( + THNNState *state, // library's state + THIndexTensor *idx, // vector containing row indices (modified in function) + THTensor *weight, // 2D tensor whose rows will be renormalized + accreal maxNorm, // maximum norm + accreal normType); // the norm type (e.g., normType=2, then it's 2-norm) + +TH_API void THNN_(MarginCriterion_updateOutput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *target, // target tensor (should contain only 1s and -1s) + THTensor *output, // [OUT] a one-element tensor containing the loss + bool sizeAverage, // if true, the loss is normalized by **total number of elements** + accreal margin); // a margin that is required for the loss to be 0 + +TH_API void THNN_(MarginCriterion_updateGradInput)( + THNNState *state, // library's state + THTensor *input, // input tensor + THTensor *target, // target tensor (should contin only 1s and -1s) + THTensor *gradInput, // [OUT] gradient w.r.t. module's input + bool sizeAverage, // if true, the gradient is normalized by **total number of elements** + accreal margin); // a margin that is required for the loss to be 0 + +TH_API void THNN_(SoftMarginCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage); + +TH_API void THNN_(SoftMarginCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage); + +TH_API void THNN_(MSECriterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage); +TH_API void THNN_(MSECriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage); + +TH_API void THNN_(MultiLabelMarginCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *output, + THTensor *isTarget, + bool sizeAverage); +TH_API void THNN_(MultiLabelMarginCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *gradInput, + THTensor *isTarget, + bool sizeAverage); + +TH_API void THNN_(MultiMarginCriterion_updateOutput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *output, + bool sizeAverage, + int p, + THTensor* weights, // [OPTIONAL] + accreal margin); +TH_API void THNN_(MultiMarginCriterion_updateGradInput)( + THNNState *state, + THTensor *input, + THIndexTensor *target, + THTensor *gradInput, + bool sizeAverage, + int p, + THTensor *weights, // [OPTIONAL] + accreal margin); + +TH_API void THNN_(PReLU_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THIndex_t nOutputPlane); +TH_API void THNN_(PReLU_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THIndex_t nOutputPlane); +TH_API void THNN_(PReLU_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *gradWeight, + THTensor *gradWeightBuf, + THTensor *gradWeightBuf2, + THIndex_t nOutputPlane, + accreal scale); + +TH_API void THNN_(Linear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *addBuffer); +TH_API void THNN_(Linear_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight); +TH_API void THNN_(Linear_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *bias, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *addBuffer, + accreal scale); + +TH_API void THNN_(RReLU_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *noise, + accreal lower, + accreal upper, + bool train, + bool inplace, + THGenerator *generator); +TH_API void THNN_(RReLU_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *noise, + accreal lower, + accreal upper, + bool train, + bool inplace); + +TH_API void THNN_(Sigmoid_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output); +TH_API void THNN_(Sigmoid_updateGradInput)( + THNNState *state, + THTensor *input, // [OPTIONAL] + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output); + +TH_API void THNN_(SmoothL1Criterion_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *output, + bool sizeAverage); +TH_API void THNN_(SmoothL1Criterion_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *target, + THTensor *gradInput, + bool sizeAverage); + +TH_API void THNN_(SoftMax_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output); +TH_API void THNN_(SoftMax_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output); + +TH_API void THNN_(SoftPlus_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal beta, + accreal threshold); +TH_API void THNN_(SoftPlus_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output, + accreal beta, + accreal threshold); + +TH_API void THNN_(SoftShrink_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal lambda); +TH_API void THNN_(SoftShrink_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + accreal lambda); + + +TH_API void THNN_(IndexLinear_updateOutput)( + THNNState *state, + THIndexTensor *keys, + long keysOffset, + THTensor *values, + THIndexTensor *sizes, + THIndexTensor *cumSumSizes, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *normalizedValues, + int train); +TH_API void THNN_(IndexLinear_accGradParameters)( + THNNState *state, + THIndexTensor *keys, + long keysOffset, + THTensor *values, + THIndexTensor *sizes, + THIndexTensor *cumSumSizes, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *weight, + THTensor *bias, + THTensor* valuesBuffer, + accreal weightDecay, + accreal scale); +TH_API void THNN_(IndexLinear_accUpdateGradParameters)( + THNNState *state, + THIndexTensor *keys, + long keysOffset, + THTensor *values, + THIndexTensor *sizes, + THIndexTensor *cumSumSizes, + THTensor *gradOutput, + THTensor *weight, + THTensor *bias, + accreal weightDecay, + accreal scale); +TH_API void THNN_(IndexLinear_updateParameters)( + THNNState *state, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *weight, + THTensor *bias, + THIndexTensor *runningKeys, + THIndexTensor *cumSumSizes, + long keysOffset, + accreal weightDecay, + accreal learningRate); + +TH_API void THNN_(SparseLinear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias); +TH_API void THNN_(SparseLinear_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *weight, + THTensor *bias, + accreal weightDecay, + accreal scale); +TH_API void THNN_(SparseLinear_zeroGradParameters)( + THNNState *state, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *lastInput); +TH_API void THNN_(SparseLinear_updateParameters)( + THNNState *state, + THTensor *weight, + THTensor *bias, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *lastInput, + accreal learningRate); +TH_API void THNN_(SparseLinear_legacyUpdateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias); +TH_API void THNN_(SparseLinear_legacyAccGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *weight, + THTensor *bias, + accreal weightDecay, + accreal scale); +TH_API void THNN_(SparseLinear_legacyZeroGradParameters)( + THNNState *state, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *lastInput); +TH_API void THNN_(SparseLinear_legacyUpdateParameters)( + THNNState *state, + THTensor *weight, + THTensor *bias, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *lastInput, + accreal learningRate); + +TH_API void THNN_(Sqrt_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal eps); +TH_API void THNN_(Sqrt_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output); + +TH_API void THNN_(Square_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output); +TH_API void THNN_(Square_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput); + +TH_API void THNN_(Tanh_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output); +TH_API void THNN_(Tanh_updateGradInput)( + THNNState *state, + THTensor *input, // [OPTIONAL] + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output); + +TH_API void THNN_(Threshold_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal threshold, + accreal val, + bool inplace); +TH_API void THNN_(Threshold_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + accreal threshold, + accreal val, + bool inplace); + +TH_API void THNN_(TemporalConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + int kW, int dW, + int inputFrameSize, + int outputFrameSize); +TH_API void THNN_(TemporalConvolution_updateGradInput)( + THNNState* state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + int kW, int dW); +TH_API void THNN_(TemporalConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + int kW, int dW, + accreal scale); +TH_API void THNN_(TemporalMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kW, int dW); +TH_API void THNN_(TemporalMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kW, int dW); +TH_API void THNN_(TemporalSubSampling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + int kW, int dW, + int inputFrameSize); +TH_API void THNN_(TemporalSubSampling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + int kW, int dW); +TH_API void THNN_(TemporalSubSampling_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + int kW, int dW, + accreal scale); + +TH_API void THNN_(TemporalRowConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst); +TH_API void THNN_(TemporalRowConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst); +TH_API void THNN_(TemporalRowConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst, + accreal scale); + +TH_API void THNN_(BatchNormalization_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, // [OPTIONAL] + THTensor *bias, // [OPTIONAL] + THTensor *running_mean, + THTensor *running_var, + THTensor *save_mean, + THTensor *save_std, + bool train, + double momentum, + double eps); +TH_API void THNN_(BatchNormalization_backward)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, // [OPTIONAL] + THTensor *gradWeight, // [OPTIONAL] + THTensor *gradBias, // [OPTIONAL] + THTensor *weight, // [OPTIONAL] + THTensor *running_mean, + THTensor *running_var, + THTensor *save_mean, + THTensor *save_std, + bool train, + double scale, + double eps); + +TH_API void THNN_(SpatialConvolutionMap_updateOutput)( + THNNState *state, // library state + THTensor *input, // input tensor + THTensor *output, // [OUT] convolution output + THTensor *weight, // 3D weight tensor (connTable:size(1) x kH x kW) + THTensor *bias, // 1D bias tensor (nOutputPlane) + THTensor *connTable, // connection table + int nInputPlane, // number of input planes + int nOutputPlane, // number of output planes + int dW, int dH); // stride +TH_API void THNN_(SpatialConvolutionMap_updateGradInput)( + THNNState *state, // library state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. output + THTensor *gradInput, // [OUT] gradient w.r.t. input + THTensor *weight, // 3D weight tensor (connTable:size(1) x kH x kW) + THTensor *bias, // 1D bias tensor (nOutputPlane) + THTensor *connTable, // connection table + int nInputPlane, // number of input planes + int nOutputPlane, // number of output planes + int dW, int dH); // stride +TH_API void THNN_(SpatialConvolutionMap_accGradParameters)( + THNNState *state, // library state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. output + THTensor *gradWeight, // 3D gradWeight tensor (connTable:size(1) x kH x kW) + THTensor *gradBias, // 1D gradBias tensor (nOutputPlane) + THTensor *connTable, // connection table + int nInputPlane, // number of input planes + int nOutputPlane, // number of output planes + int dW, int dH, // stride + accreal scale); // scaling factor + +TH_API void THNN_(SpatialConvolutionMM_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, // [OPTIONAL] + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH); +TH_API void THNN_(SpatialConvolutionMM_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH); +TH_API void THNN_(SpatialConvolutionMM_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, // [OPTIONAL] + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + accreal scale); + +TH_API void THNN_(SpatialDepthWiseConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, // [OPTIONAL] + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH); +TH_API void THNN_(SpatialDepthWiseConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH); +TH_API void THNN_(SpatialDepthWiseConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, // [OPTIONAL] + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + accreal scale); + +TH_API void THNN_(SpatialConvolutionLocal_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + long inputWidth, long inputHeight, + long outputWidth, long outputHeight); +TH_API void THNN_(SpatialConvolutionLocal_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + long inputWidth, long inputHeight, + long outputWidth, long outputHeight); +TH_API void THNN_(SpatialConvolutionLocal_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + long inputWidth, long inputHeight, + long outputWidth, long outputHeight, + accreal scale); + +TH_API void THNN_(SpatialAdaptiveMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int owidth, int oheight); +TH_API void THNN_(SpatialAdaptiveMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices); + +TH_API void THNN_(SpatialAdaptiveAveragePooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int owidth, int oheight); +TH_API void THNN_(SpatialAdaptiveAveragePooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput); + +TH_API void THNN_(SpatialAveragePooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int kW, int kH, + int dW, int dH, + int padW, int padH, + bool ceil_mode, + bool count_include_pad); +TH_API void THNN_(SpatialAveragePooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int kW, int kH, + int dW, int dH, + int padW, int padH, + bool ceil_mode, + bool count_include_pad); + +TH_API void THNN_(SpatialFractionalMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int outputW, int outputH, + int poolSizeW, int poolSizeH, + THIndexTensor *indices, + THTensor *randomSamples); +TH_API void THNN_(SpatialFractionalMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int outputW, int outputH, + int poolSizeW, int poolSizeH, + THIndexTensor *indices); + +TH_API void THNN_(SpatialFullConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, // [OPTIONAL] + THTensor *columns, + THTensor *ones, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int adjW, int adjH); +TH_API void THNN_(SpatialFullConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *gradColumns, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int adjW, int adjH); +TH_API void THNN_(SpatialFullConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, // [OPTIONAL] + THTensor *columns, + THTensor *ones, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int adjW, int adjH, + accreal scale); + +TH_API void THNN_(SpatialFullConvolutionMap_updateOutput)( + THNNState *state, // library state + THTensor *input, // input tensor + THTensor *output, // [OUT] convolution output + THTensor *weight, // 3D weight tensor (connTable:size(1) x kH x kW) + THTensor *bias, // 1D bias tensor (nOutputPlane) + THTensor *connTable, // connection table + int nInputPlane, // number of input planes + int nOutputPlane, // number of output planes + int dW, int dH); // stride +TH_API void THNN_(SpatialFullConvolutionMap_updateGradInput)( + THNNState *state, // library state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. output + THTensor *gradInput, // [OUT] gradient w.r.t. input + THTensor *weight, // 3D weight tensor (connTable:size(1) x kH x kW) + THTensor *bias, // 1D bias tensor (nOutputPlane) + THTensor *connTable, // connection table + int nInputPlane, // number of input planes + int nOutputPlane, // number of output planes + int dW, int dH); // stride +TH_API void THNN_(SpatialFullConvolutionMap_accGradParameters)( + THNNState *state, // library state + THTensor *input, // input tensor + THTensor *gradOutput, // gradient w.r.t. output + THTensor *gradWeight, // 3D gradWeight tensor (connTable:size(1) x kH x kW) + THTensor *gradBias, // 1D gradBias tensor (nOutputPlane) + THTensor *connTable, // connection table + int nInputPlane, // number of input planes + int nOutputPlane, // number of output planes + int dW, int dH, // stride + accreal scale); // scaling factor + +TH_API void THNN_(SpatialDilatedConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, // [OPTIONAL] + THTensor *columns, + THTensor *ones, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int dilationW, int dilationH); + +TH_API void THNN_(SpatialDilatedConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *gradColumns, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int dilationW, int dilationH); + +TH_API void THNN_(SpatialDilatedConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, // [OPTIONAL] + THTensor *columns, + THTensor *ones, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int dilationW, int dilationH, + accreal scale); + +TH_API void THNN_(SpatialMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kW, int kH, + int dW, int dH, + int padW, int padH, + bool ceil_mode); +TH_API void THNN_(SpatialMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kW, int kH, + int dW, int dH, + int padW, int padH, + bool ceil_mode); + +TH_API void THNN_(SpatialDilatedMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int dilationW, int dilationH, + bool ceil_mode); +TH_API void THNN_(SpatialDilatedMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int dilationW, int dilationH, + bool ceil_mode); + +TH_API void THNN_(SpatialMaxUnpooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int owidth, int oheight); +TH_API void THNN_(SpatialMaxUnpooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int owidth, int oheight); + +TH_API void THNN_(SpatialSubSampling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + int kW, int kH, + int dW, int dH); +TH_API void THNN_(SpatialSubSampling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + int kW, int kH, + int dW, int dH); +TH_API void THNN_(SpatialSubSampling_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + int kW, int kH, + int dW, int dH, + accreal scale); + +TH_API void THNN_(SpatialUpSamplingNearest_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int scale_factor); +TH_API void THNN_(SpatialUpSamplingNearest_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int scale_factor); + +TH_API void THNN_(SpatialUpSamplingBilinear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int outputHeight, + int outputWidth); +TH_API void THNN_(SpatialUpSamplingBilinear_updateGradInput)( + THNNState *state, + THTensor *gradOutput, + THTensor *gradInput, + int nbatch, + int nchannels, + int inputHeight, + int inputWidth, + int outputHeight, + int outputWidth); + +TH_API void THNN_(unfolded_acc)( + THTensor *finput, + THTensor *input, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int nInputPlane, + int inputWidth, int inputHeight, + int outputWidth, int outputHeight); +TH_API void THNN_(unfolded_copy)( + THTensor *finput, + THTensor *input, + int kW, int kH, + int dW, int dH, + int padW, int padH, + int nInputPlane, + int inputWidth, int inputHeight, + int outputWidth, int outputHeight); + +TH_API void THNN_(VolumetricAveragePooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int kT, int kW, int kH, + int dT, int dW, int dH); +TH_API void THNN_(VolumetricAveragePooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int kT, int kW, int kH, + int dT, int dW, int dH); + +TH_API void THNN_(VolumetricConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, // [OPTIONAL] + THTensor *finput, + THTensor *fgradInput, + int dT, int dW, int dH, + int pT, int pW, int pH); +TH_API void THNN_(VolumetricConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + int dT, int dW, int dH, + int pT, int pW, int pH); +TH_API void THNN_(VolumetricConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, // [OPTIONAL] + THTensor *finput, + THTensor *fgradInput, + int dT, int dW, int dH, + int pT, int pW, int pH, + accreal scale); + +TH_API void THNN_(VolumetricConvolutionMM_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, // [OPTIONAL] + THTensor *finput, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH); +TH_API void THNN_(VolumetricConvolutionMM_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH); +TH_API void THNN_(VolumetricConvolutionMM_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, // [OPTIONAL] + THTensor *finput, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH, + accreal scale); + +TH_API void THNN_(VolumetricFractionalMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int outputT, int outputW, int outputH, + int poolSizeT, int poolSizeW, int poolSizeH, + THIndexTensor *indices, + THTensor *randomSamples); +TH_API void THNN_(VolumetricFractionalMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int outputT, int outputW, int outputH, + int poolSizeT, int poolSizeW, int poolSizeH, + THIndexTensor *indices); + +TH_API void THNN_(VolumetricFullConvolution_updateOutput)( + THNNState *state, // library state + THTensor *input, // 4D or 5D (batch) tensor + THTensor *output, // [OUT] volumetric convolution output + THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW) + THTensor *bias, // [OPTIONAL] gradBias tensor (nOutputPlane) + THTensor *finput, // [OUT] internal columns buffer + THTensor *fgradInput, // [OUT] internal ones buffer + int dT, int dW, int dH, // stride of the convolution + int pT, int pW, int pH, // padding + int aT, int aW, int aH); // extra output adjustment +TH_API void THNN_(VolumetricFullConvolution_updateGradInput)( + THNNState *state, // library state + THTensor *input, // 4D or 5D (batch) tensor + THTensor *gradOutput, // gradient w.r.t. output + THTensor *gradInput, // [OUT] gradient w.r.t. input + THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW) + THTensor *finput, // internal columns buffer + THTensor *fgradInput, // internal ones buffer + int dT, int dW, int dH, // stride + int pT, int pW, int pH, // padding + int aT, int aW, int aH); // extra output adjustment +TH_API void THNN_(VolumetricFullConvolution_accGradParameters)( + THNNState *state, // library state + THTensor *input, // 4D or 5D (batch) tensor + THTensor *gradOutput, // gradient w.r.t. output + THTensor *gradWeight, // gradWeight tensor (nInputPlane x nOutputPlane x kT x kH x kW) + THTensor *gradBias, // [OPTIONAL] gradBias tensor (nOutputPlane) + THTensor *finput, // internal columns buffer + THTensor *fgradInput, // internal ones buffer + int dT, int dW, int dH, // stride + int pT, int pW, int pH, // padding + int aT, int aW, int aH, // extra output adjustment + accreal scale); // scaling factor + +TH_API void THNN_(VolumetricDilatedConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, // [OPTIONAL] + THTensor *columns, + THTensor *ones, + int kT, int kW, int kH, + int dT, int dW, int dH, + int padT, int padW, int padH, + int dilationT, int dilationW, int dilationH); + +TH_API void THNN_(VolumetricDilatedConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *gradColumns, + int kT, int kW, int kH, + int dT, int dW, int dH, + int padT, int padW, int padH, + int dilationT, int dilationW, int dilationH); + +TH_API void THNN_(VolumetricDilatedConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, // [OPTIONAL] + THTensor *columns, + THTensor *ones, + int kT, int kW, int kH, + int dT, int dW, int dH, + int padT, int padW, int padH, + int dilationT, int dilationW, int dilationH, + accreal scale); + +TH_API void THNN_(VolumetricMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH, + bool ceilMode); +TH_API void THNN_(VolumetricMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH, + bool ceilMode); + +TH_API void THNN_(VolumetricDilatedMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH, + int dilationT, int dilationW, int dilationH, + bool ceilMode); +TH_API void THNN_(VolumetricDilatedMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH, + int dilationT, int dilationW, int dilationH, + bool ceilMode); + +TH_API void THNN_(VolumetricMaxUnpooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int oT, int oW, int oH, + int dT, int dW, int dH, + int pT, int pW, int pH); +TH_API void THNN_(VolumetricMaxUnpooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int oT, int oW, int oH, + int dT, int dW, int dH, + int pT, int pW, int pH); + +TH_API void THNN_(SpatialReflectionPadding_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int pad_l, int pad_r, + int pad_t, int pad_b); + +TH_API void THNN_(SpatialReflectionPadding_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int pad_l, int pad_r, + int pad_t, int pad_b); + +TH_API void THNN_(SpatialReplicationPadding_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int pad_l, int pad_r, + int pad_t, int pad_b); + +TH_API void THNN_(SpatialReplicationPadding_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int pad_l, int pad_r, + int pad_t, int pad_b); + +TH_API void THNN_(VolumetricReplicationPadding_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int pleft, int pright, + int ptop, int pbottom, + int pfront, int pback); + +TH_API void THNN_(VolumetricReplicationPadding_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int pleft, int pright, + int ptop, int pbottom, + int pfront, int pback); + +TH_API void THNN_(VolumetricUpSamplingNearest_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int scale_factor); +TH_API void THNN_(VolumetricUpSamplingNearest_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int scale_factor); + +TH_API void THNN_(VolumetricUpSamplingTrilinear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int outputDepth, + int outputHeight, + int outputWidth); +TH_API void THNN_(VolumetricUpSamplingTrilinear_updateGradInput)( + THNNState *state, + THTensor *gradOutput, + THTensor *gradInput, + int nbatch, + int nchannels, + int inputDepth, + int inputHeight, + int inputWidth, + int outputDepth, + int outputHeight, + int outputWidth); + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Tanh.c b/contrib/lua-torch/nn/lib/THNN/generic/Tanh.c new file mode 100644 index 000000000..ecf0708c2 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/Tanh.c @@ -0,0 +1,49 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Tanh.c" +#else + +void THNN_(Tanh_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output) +{ + THTensor_(tanh)(output, input); +} + +void THNN_(Tanh_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *output) +{ + THNN_CHECK_SHAPE(output, gradOutput); + THTensor_(resizeAs)(gradInput, output); + + if (output->nDimension == 1 || + !THTensor_(isContiguous)(output) || + !THTensor_(isContiguous)(gradOutput) || + !THTensor_(isContiguous)(gradInput)) + { + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, + real z = *output_data; \ + *gradInput_data = *gradOutput_data * (1. - z*z); + ); + } + else + { + real* ptr_gradOutput = THTensor_(data)(gradOutput); + real* ptr_gradInput = THTensor_(data)(gradInput); + real* ptr_output = THTensor_(data)(output); + long i; + +#pragma omp parallel for private(i) + for (i = 0; i < THTensor_(nElement)(gradInput); i++) + { + real z = ptr_output[i]; + ptr_gradInput[i] = ptr_gradOutput[i] * (1. - z*z); + } + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalConvolution.c new file mode 100644 index 000000000..8cfd97d85 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalConvolution.c @@ -0,0 +1,398 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/TemporalConvolution.c" +#else + +static inline void THNN_(TemporalConvolution_shapeCheck)( + THNNState *state, + THTensor *input, + int kW, + int dW, + int *inputFrameSize) { + + THArgCheck(kW > 0, 9, + "kernel size should be greater than zero, but got kW: %d", kW); + THArgCheck(dW > 0, 11, + "stride should be greater than zero, but got dW: %d", dW); + + int dimS = 0; // sequence dimension + int dimF = 1; // feature dimension + + if (input->nDimension == 3) + { + dimS = 1; + dimF = 2; + } + THNN_ARGCHECK(input->nDimension == 2 || input->nDimension == 3, 2, input, + "2D or 3D (batch mode) tensor expected for input, but got: %s"); + if (inputFrameSize != NULL) { + THArgCheck(input->size[dimF] == *inputFrameSize, 2, + "invalid input frame size. Got: %d, Expected: %d", + input->size[dimF], *inputFrameSize); + } + THArgCheck(input->size[dimS] >= kW, 2, + "input sequence smaller than kernel size. Got: %d, Expected: %d", + input->size[dimS], kW); +} + +void THNN_(TemporalConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + int kW, + int dW, + int inputFrameSize, + int outputFrameSize) +{ + THTensor *outputWindow, *inputWindow; + int nInputFrame, nOutputFrame; + long k, i; + + int dimS = 0; // sequence dimension + int dimF = 1; // feature dimension + + if (input->nDimension == 3) + { + dimS = 1; + dimF = 2; + } + + THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); + THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous"); + THNN_(TemporalConvolution_shapeCheck) + (state, input, kW, dW, &inputFrameSize); + input = THTensor_(newContiguous)(input); + outputWindow = THTensor_(new)(); + inputWindow = THTensor_(new)(); + + nInputFrame = input->size[dimS]; + nOutputFrame = (nInputFrame - kW) / dW + 1; + + if (input->nDimension == 2) + { + THTensor_(resize2d)(output, + nOutputFrame, + outputFrameSize); + + /* bias first */ + for(k = 0; k < nOutputFrame; k++) + { + THTensor_(select)(outputWindow, output, 0, k); + THTensor_(copy)(outputWindow, bias); + } + + /* ouch */ + for(k = 0; nOutputFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputFrame -= nFrame; + + THTensor_(setStorage2d)(inputWindow, input->storage, + input->storageOffset+k*dW*input->size[1], + nFrame, inputFrameStride*input->size[1], + kW*input->size[1], 1); + + THTensor_(setStorage2d)(outputWindow, output->storage, + output->storageOffset + k*output->size[1], + nFrame, outputFrameStride*output->size[1], + output->size[1], 1); + + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight, weight, 0, 1); + THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, tweight); + THTensor_(free)(tweight); + } + } + else + { + THTensor *outputSample = THTensor_(new)(); + THTensor *inputSample = THTensor_(new)(); + int nBatchFrame = input->size[0]; + + THTensor_(resize3d)(output, + nBatchFrame, + nOutputFrame, + outputFrameSize); + + for(i = 0; i < nBatchFrame; i++) + { + THTensor_(select)(outputSample, output, 0, i); + THTensor_(select)(inputSample, input, 0, i); + long nOutputSampleFrame = nOutputFrame; + + /* bias first */ + for(k = 0; k < nOutputFrame; k++) + { + THTensor_(select)(outputWindow, outputSample, 0, k); + THTensor_(copy)(outputWindow, bias); + } + + /* ouch */ + for(k = 0; nOutputSampleFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputSampleFrame -= nFrame; + + THTensor_(setStorage2d)(inputWindow, inputSample->storage, + inputSample->storageOffset+k*dW*inputSample->size[1], + nFrame, inputFrameStride*inputSample->size[1], + kW*inputSample->size[1], 1); + + THTensor_(setStorage2d)(outputWindow, outputSample->storage, + outputSample->storageOffset + k*outputSample->size[1], + nFrame, outputFrameStride*outputSample->size[1], + outputSample->size[1], 1); + + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight, weight, 0, 1); + THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, tweight); + THTensor_(free)(tweight); + } + } + THTensor_(free)(outputSample); + THTensor_(free)(inputSample); + } + + THTensor_(free)(outputWindow); + THTensor_(free)(inputWindow); + THTensor_(free)(input); + +} + +void THNN_(TemporalConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + int kW, + int dW) +{ + long nInputFrame; + long nOutputFrame; + + THTensor *gradOutputWindow; + THTensor *gradInputWindow; + long k, i; + + int dimS = 0; // sequence dimension + int dimF = 1; // feature dimension + + if (gradOutput->nDimension == 3) + { + dimS = 1; + dimF = 2; + } + + THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); + THNN_(TemporalConvolution_shapeCheck)( + state, input, kW, dW, NULL); + nInputFrame = input->size[dimS]; + nOutputFrame = gradOutput->size[dimS]; + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + gradOutputWindow = THTensor_(new)(); + gradInputWindow = THTensor_(new)(); + + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (gradOutput->nDimension == 2) + { + /* ouch */ + for(k = 0; nOutputFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputFrame -= nFrame; + + THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage, + gradOutput->storageOffset + k*gradOutput->size[1], + nFrame, outputFrameStride*gradOutput->size[1], + gradOutput->size[1], 1); + + THTensor_(setStorage2d)(gradInputWindow, gradInput->storage, + gradInput->storageOffset+k*dW*gradInput->size[1], + nFrame, inputFrameStride*gradInput->size[1], + kW*gradInput->size[1], 1); + + THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight); + } + } + else + { + THTensor *gradOutputSample = THTensor_(new)(); + THTensor *gradInputSample = THTensor_(new)(); + int nBatchFrame = input->size[0]; + + for(i = 0; i < nBatchFrame; i++) + { + THTensor_(select)(gradOutputSample, gradOutput, 0, i); + THTensor_(select)(gradInputSample, gradInput, 0, i); + int nOutputSampleFrame = nOutputFrame; + + /* ouch */ + for(k = 0; nOutputSampleFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputSampleFrame -= nFrame; + + THTensor_(setStorage2d)(gradOutputWindow, gradOutputSample->storage, + gradOutputSample->storageOffset + k*gradOutputSample->size[1], + nFrame, outputFrameStride*gradOutputSample->size[1], + gradOutputSample->size[1], 1); + + THTensor_(setStorage2d)(gradInputWindow, gradInputSample->storage, + gradInputSample->storageOffset+k*dW*gradInputSample->size[1], + nFrame, inputFrameStride*gradInputSample->size[1], + kW*gradInputSample->size[1], 1); + + THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight); + } + } + THTensor_(free)(gradOutputSample); + THTensor_(free)(gradInputSample); + } + + THTensor_(free)(gradOutputWindow); + THTensor_(free)(gradInputWindow); + THTensor_(free)(gradOutput); + THTensor_(free)(input); + +} + +void THNN_(TemporalConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + int kW, + int dW, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + long nInputFrame; + long nOutputFrame; + + THTensor *gradOutputWindow; + THTensor *inputWindow; + long k, i; + + int dimS = 0; // sequence dimension + int dimF = 1; // feature dimension + + if (gradOutput->nDimension == 3) + { + dimS = 1; + dimF = 2; + } + + THNN_(TemporalConvolution_shapeCheck)( + state, input, kW, dW, NULL); + nInputFrame = input->size[dimS]; + nOutputFrame = gradOutput->size[dimS]; + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + gradOutputWindow = THTensor_(new)(); + inputWindow = THTensor_(new)(); + + if (input->nDimension == 2) + { + /* bias first */ + for(k = 0; k < nOutputFrame; k++) + { + THTensor_(select)(gradOutputWindow, gradOutput, 0, k); + THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow); + } + + /* ouch */ + for(k = 0; nOutputFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputFrame -= nFrame; + + THTensor_(setStorage2d)(inputWindow, input->storage, + input->storageOffset+k*dW*input->size[1], + nFrame, inputFrameStride*input->size[1], + kW*input->size[1], 1); + + THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage, + gradOutput->storageOffset + k*gradOutput->size[1], + nFrame, outputFrameStride*gradOutput->size[1], + gradOutput->size[1], 1); + + THTensor *tgradOutputWindow = THTensor_(new)(); + THTensor_(transpose)(tgradOutputWindow, gradOutputWindow, 0, 1); + THTensor_(addmm)(gradWeight, 1, gradWeight, scale, tgradOutputWindow, inputWindow); + THTensor_(free)(tgradOutputWindow); + } + } + else + { + THTensor *gradOutputSample = THTensor_(new)(); + THTensor *inputSample = THTensor_(new)(); + int nBatchFrame = input->size[0]; + + for(i = 0; i < nBatchFrame; i++) + { + THTensor_(select)(gradOutputSample, gradOutput, 0, i); + THTensor_(select)(inputSample, input, 0, i); + int nOutputSampleFrame = nOutputFrame; + + /* bias first */ + for(k = 0; k < nOutputFrame; k++) + { + THTensor_(select)(gradOutputWindow, gradOutputSample, 0, k); + THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow); + } + + /* ouch */ + for(k = 0; nOutputSampleFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputSampleFrame -= nFrame; + + THTensor_(setStorage2d)(inputWindow, inputSample->storage, + inputSample->storageOffset+k*dW*inputSample->size[1], + nFrame, inputFrameStride*inputSample->size[1], + kW*inputSample->size[1], 1); + + THTensor_(setStorage2d)(gradOutputWindow, gradOutputSample->storage, + gradOutputSample->storageOffset + k*gradOutputSample->size[1], + nFrame, outputFrameStride*gradOutputSample->size[1], + gradOutputSample->size[1], 1); + + THTensor *tgradOutputWindow = THTensor_(new)(); + THTensor_(transpose)(tgradOutputWindow, gradOutputWindow, 0, 1); + THTensor_(addmm)(gradWeight, 1, gradWeight, scale, tgradOutputWindow, inputWindow); + THTensor_(free)(tgradOutputWindow); + } + } + THTensor_(free)(gradOutputSample); + THTensor_(free)(inputSample); + } + + THTensor_(free)(gradOutputWindow); + THTensor_(free)(inputWindow); + THTensor_(free)(gradOutput); + THTensor_(free)(input); + +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalMaxPooling.c new file mode 100644 index 000000000..344c1b3fd --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalMaxPooling.c @@ -0,0 +1,283 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/TemporalMaxPooling.c" +#else + +static inline void THNN_(TemporalMaxPooling_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THIndexTensor *indices, + int kW, + int dW) { + long niframe; + long framesize; + long noframe; + + int dimS = 0; // sequence dimension + int dimF = 1; // feature dimension + int ndims = input->nDimension; + + if (input->nDimension == 3) + { + dimS = 1; + dimF = 2; + } + + niframe = input->size[dimS]; + framesize = input->size[dimF]; + noframe = (niframe - kW) / dW + 1; + + THArgCheck(kW > 0, 5, + "kernel size should be greater than zero, but got kW: %d", kW); + THArgCheck(dW > 0, 6, + "stride should be greater than zero, but got dW: %d", dW); + + THNN_ARGCHECK(input->nDimension == 2 || input->nDimension == 3, 2, input, + "2D or 3D (batch mode) tensor expected for input, but got: %s"); + THArgCheck(input->size[dimS] >= kW, 2, + "input sequence smaller than kernel size. Got: %d, Expected: %d", + input->size[dimS], kW); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndims, dimS, noframe); + THNN_CHECK_DIM_SIZE(gradOutput, ndims, dimF, framesize) + } + if (indices != NULL) { + THNN_CHECK_DIM_SIZE_INDICES(indices, ndims, dimS, noframe); + THNN_CHECK_DIM_SIZE_INDICES(indices, ndims, dimF, framesize); + } +} + +void THNN_(TemporalMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kW, + int dW) +{ + long niframe; + long framesize; + long noframe; + + real *input_data; + real *output_data; + THIndex_t *indices_data; + + long t, y; + + int dimS = 0; // sequence dimension + int dimF = 1; // feature dimension + + THNN_(TemporalMaxPooling_shapeCheck)(state, input, NULL, NULL, kW, dW); + + if (input->nDimension == 3) + { + dimS = 1; + dimF = 2; + } + + /* sizes */ + niframe = input->size[dimS]; + framesize = input->size[dimF]; + noframe = (niframe - kW) / dW + 1; + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + if (input->nDimension == 2) + { + /* resize output */ + THTensor_(resize2d)(output, noframe, framesize); + + /* indices will contain index locations for each output point */ + THIndexTensor_(resize2d)(indices, noframe, framesize); + + /* get raw pointers */ + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + for(t = 0; t < noframe; t++) + { + real *ip = input_data + t*framesize*dW; + real *op = output_data + t*framesize; + THIndex_t *xp = indices_data + t*framesize; +#pragma omp parallel for private(y) + for(y = 0; y < framesize; y++) + { + /* compute local max: */ + long maxindex = -1; + real maxval = -THInf; + long x; + for(x = 0; x < kW; x++) + { + real val = ip[x*framesize+y]; + if (val > maxval) + { + maxval = val; + maxindex = x; + } + } + + /* set output to local max */ + op[y] = maxval; + xp[y] = (real)maxindex; + } + } + } + else + { + /* number of batch frames */ + long nbframe = input->size[0]; + long i; + + /* resize output */ + THTensor_(resize3d)(output, nbframe, noframe, framesize); + + /* indices will contain index locations for each output point */ + THIndexTensor_(resize3d)(indices, nbframe, noframe, framesize); + + /* get raw pointers */ + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + for(i = 0; i < nbframe; i++) + { + real *inputSample_data = input_data + i*niframe*framesize; + real *outputSample_data = output_data + i*noframe*framesize; + THIndex_t *indicesSample_data = indices_data + i*noframe*framesize; + + for(t = 0; t < noframe; t++) + { + real *ip = inputSample_data + t*framesize*dW; + real *op = outputSample_data + t*framesize; + THIndex_t *xp = indicesSample_data + t*framesize; + +#pragma omp parallel for private(y) + for(y = 0; y < framesize; y++) + { + /* compute local max: */ + long maxindex = -1; + real maxval = -THInf; + long x; + for(x = 0; x < kW; x++) + { + real val = ip[x*framesize+y]; + if (val > maxval) + { + maxval = val; + maxindex = x; + } + } + + /* set output to local max */ + op[y] = maxval; + xp[y] = (real)maxindex; + } + } + } + } + + /* cleanup */ + THTensor_(free)(input); + +} + +void THNN_(TemporalMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kW, + int dW) +{ + long niframe; + int noframe; + long framesize; + + real *gradInput_data; + real *gradOutput_data; + THIndex_t *indices_data; + + long t, y; + + THNN_(TemporalMaxPooling_shapeCheck)(state, input, gradOutput, indices, kW, dW); + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize and zero */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + int dimS = 0; // sequence dimension + int dimF = 1; // feature dimension + + if (input->nDimension == 3) + { + dimS = 1; + dimF = 2; + } + /* sizes */ + niframe = input->size[dimS]; + noframe = gradOutput->size[dimS]; + framesize = gradOutput->size[dimF]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THIndexTensor_(data)(indices); + + if (input->nDimension == 2) + { + for(t = 0; t < noframe; t++) + { + real *gip = gradInput_data + t*framesize*dW; + real *gop = gradOutput_data + t*framesize; + THIndex_t *xp = indices_data + t*framesize; +#pragma omp parallel for private(y) + for(y = 0; y < framesize; y++) + { + /* compute local max: */ + long maxindex = (long)xp[y]; + if (maxindex != -1) + gip[maxindex*framesize+y] += gop[y]; + } + } + } + else + { + /* number of batch frames */ + long nbframe = input->size[0]; + long i; + + for(i = 0; i < nbframe; i++) + { + real *gradInputSample_data = gradInput_data + i*niframe*framesize; + real *gradOutputSample_data = gradOutput_data + i*noframe*framesize; + THIndex_t *indicesSample_data = indices_data + i*noframe*framesize; + + for(t = 0; t < noframe; t++) + { + real *gip = gradInputSample_data + t*framesize*dW; + real *gop = gradOutputSample_data + t*framesize; + THIndex_t *xp = indicesSample_data + t*framesize; +#pragma omp parallel for private(y) + for(y = 0; y < framesize; y++) + { + /* compute local max: */ + long maxindex = (long)xp[y]; + if (maxindex != -1) + gip[maxindex*framesize+y] += gop[y]; + } + } + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c new file mode 100644 index 000000000..e3ae41e22 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c @@ -0,0 +1,472 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/TemporalRowConvolution.c" +#else + +static inline void THNN_(TemporalRowConvolution_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *weight, + THTensor *bias, + int kW, + int dW, + int padW) { + + THArgCheck(kW > 0, 5, + "kernel size should be greater than zero, but got kW: %d", kW); + THArgCheck(dW > 0, 6, + "stride should be greater than zero, but got dW: %d", dW); + THNN_ARGCHECK(weight->nDimension == 3, 3, weight, + "3D weight tensor expected, but got: %s"); + THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); + THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous"); + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); + } + + // we're always looking at (possibly batch) x feats x seq + int ndim = input->nDimension; + int dimF = 0; + int dimS = 1; + + if (ndim == 3) { + ++dimS; + ++dimF; + } + + THNN_ARGCHECK(ndim == 2 || ndim == 3, 1, input, + "2D or 3D (batch mode) input tensor expected, but got :%s"); + + long inputFrameSize = weight->size[0]; + long nInputFrame = input->size[dimS]; + long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + if (nOutputFrame < 1) { + THError("Given input size: (%d x %d). " + "Calculated output size: (%d x %d). Output size is too small", + inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame); + } + + THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame); + } +} + +static void THNN_(unfolded_acc_row)( + THTensor *finput, + THTensor *input, + int kW, + int dW, + int padW, + long inputFrameSize, + long nInputFrame, + long nOutputFrame) { + + size_t c; + real *input_data = THTensor_(data)(input); + real *finput_data = THTensor_(data)(finput); + +// #pragma omp parallel for private(c) + for (c = 0; c < inputFrameSize; c++) { + size_t kw, x; + long long ix = 0; + + for (kw = 0; kw < kW; kw++) { + real *src = finput_data + + c * (kW * nOutputFrame) + + kw * (nOutputFrame); + real *dst = input_data + c * (nInputFrame); + + ix = (long long)(kw); + if (dW == 1) { + real *dst_slice = dst + (size_t)(ix); + THVector_(cadd)(dst_slice, dst_slice, src, 1, nOutputFrame); + } else { + for (x = 0; x < nOutputFrame; x++) { + real *dst_slice = dst + (size_t)(ix + x * dW); + THVector_(cadd)(dst_slice, dst_slice, + src + (size_t)(x), 1, 1); + } + } + } + } +} + +static void THNN_(unfolded_copy_row)( + THTensor *finput, + THTensor *input, + int kW, + int dW, + int padW, + long inputFrameSize, + long nInputFrame, + long nOutputFrame) { + + long k; + real *input_data = THTensor_(data)(input); + real *finput_data = THTensor_(data)(finput); + +// #pragma omp parallel for private(k) + for (k = 0; k < inputFrameSize * kW; k++) { + size_t c = k / kW; + size_t rest = k % kW; + size_t kw = rest % kW; + size_t x; + long long ix; + real *dst = finput_data + c * (kW * nOutputFrame) + kw * (nOutputFrame); + real *src = input_data + c * (nInputFrame); + + ix = (long long)(kw); + if (dW == 1) { + memcpy(dst, src+(size_t)(ix), sizeof(real) * (nOutputFrame)); + } else { + for (x = 0; x < nOutputFrame; x++) { + memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW), + sizeof(real) * 1); + } + } + } +} + +static void THNN_(TemporalRowConvolution_updateOutput_frame)( + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + int kW, + int dW, + int padW, + long inputFrameSize, + long nInputFrame, + long nOutputFrame) { + + long i; + + THTensor *output3d = THTensor_(newWithStorage3d)( + output->storage, output->storageOffset, + inputFrameSize, -1, + 1, -1, + nOutputFrame, -1); + + THNN_(unfolded_copy_row)(finput, input, kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + + THTensor_(zero)(output); + + if (bias != NULL) { + for (i = 0; i < inputFrameSize; i++) + THVector_(fill) + (output->storage->data + output->storageOffset + + output->stride[0] * i, + THTensor_(get1d)(bias, i), nOutputFrame); + } + + THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput); + + THTensor_(free)(output3d); +} + +void THNN_(TemporalRowConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + THTensor *fgradInput, // unused here but needed for Cuda + int kW, + int dW, + int padW, + bool featFirst) { + + int ndim = input->nDimension; + + THTensor *tinput; + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + input = THTensor_(newContiguous)(tinput); + } else { + input = THTensor_(newContiguous)(input); + } + + THNN_(TemporalRowConvolution_shapeCheck)( + state, input, NULL, weight, bias, kW, dW, padW); + + long inputFrameSize = weight->size[0]; + long nInputFrame = input->size[ndim - 1]; + long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + if (ndim == 2) { /* non-batch mode */ + + THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame); + THTensor_(resize2d)(output, inputFrameSize, nOutputFrame); + + THTensor_(zero)(finput); + THTensor_(zero)(output); + + THNN_(TemporalRowConvolution_updateOutput_frame) + (input, output, weight, bias, finput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + + } else { + long T = input->size[0]; + long t; + + THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame); + THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame); + + THTensor_(zero)(finput); + THTensor_(zero)(output); + +#pragma omp parallel for private(t) + for (t = 0; t < T; t++) { + THTensor *input_t = THTensor_(newSelect)(input, 0, t); + THTensor *output_t = THTensor_(newSelect)(output, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(TemporalRowConvolution_updateOutput_frame) + (input_t, output_t, weight, bias, finput_t, + kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame); + + THTensor_(free)(input_t); + THTensor_(free)(output_t); + THTensor_(free)(finput_t); + } + } + + if (!featFirst) { // NOTE: output will NOT be contiguous in this case + THTensor_(transpose)(output, output, ndim - 1, ndim - 2); + THTensor_(free)(tinput); + } + + THTensor_(free)(input); +} + +static void THNN_(TemporalRowConvolution_updateGradInput_frame)( + THTensor *gradInput, + THTensor *gradOutput, + THTensor *weight, + THTensor *fgradInput, + int kW, + int dW, + int padW, + long inputFrameSize, + long nInputFrame, + long nOutputFrame) { + + THTensor *gradOutput3d = THTensor_(newWithStorage3d)( + gradOutput->storage, gradOutput->storageOffset, + inputFrameSize, -1, + 1, -1, + nOutputFrame, -1); + + // weight: inputFrameSize x kW x 1 + // gradOutput3d: inputFrameSize x 1 x nOutputFrame + THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d); + // fgradInput: inputFrameSize x kW x nOutputFrame + THTensor_(free)(gradOutput3d); + + THTensor_(zero)(gradInput); + + THNN_(unfolded_acc_row)(fgradInput, gradInput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); +} + +void THNN_(TemporalRowConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst) { + + int ndim = input->nDimension; + + THTensor *tinput, *tgradOutput; + + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); + + input = THTensor_(newContiguous)(tinput); + gradOutput = THTensor_(newContiguous)(tgradOutput); + + } else { + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + } + + THNN_(TemporalRowConvolution_shapeCheck)(state, input, gradOutput, weight, + NULL, kW, dW, padW); + + long inputFrameSize = weight->size[0]; + long nInputFrame = input->size[ndim - 1]; + long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + THTensor_(resizeAs)(fgradInput, finput); + THTensor_(resizeAs)(gradInput, input); + + THTensor_(zero)(fgradInput); + THTensor_(zero)(gradInput); + + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight, weight, 1, 2); + + if (ndim == 2) { + THNN_(TemporalRowConvolution_updateGradInput_frame) + (gradInput, gradOutput, tweight, fgradInput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + } else { + long T = input->size[0]; + long t; + +#pragma omp parallel for private(t) + for (t = 0; t < T; t++) { + + THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); + + THNN_(TemporalRowConvolution_updateGradInput_frame) + (gradInput_t, gradOutput_t, tweight, fgradInput_t, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + + THTensor_(free)(gradInput_t); + THTensor_(free)(gradOutput_t); + THTensor_(free)(fgradInput_t); + } + } + + THTensor_(free)(tweight); + + if (!featFirst) { // NOTE: gradInput will NOT be contiguous in this case + + THTensor_(free)(tinput); + THTensor_(free)(tgradOutput); + + THTensor_(transpose)(gradInput, gradInput, ndim - 1, ndim - 2); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + +} + +static void THNN_(TemporalRowConvolution_accGradParameters_frame)( + THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, + THTensor *finput, real scale) { + + long i; + THTensor *gradOutput3d = THTensor_(newWithStorage3d)( + gradOutput->storage, gradOutput->storageOffset, + gradOutput->size[0], -1, + 1, -1, + gradOutput->size[1], -1); + + THTensor *tfinput = THTensor_(new)(); + THTensor_(transpose)(tfinput, finput, 1, 2); + // gradOutput3d: inputFrameSize x 1 x nOutputFrame + // finput: inputFrameSize x nOutputFrame x kW + THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, tfinput); + // gradWeight: inputFrameSize x 1 x kW + THTensor_(free)(tfinput); + + if (gradBias != NULL) { + for (i = 0; i < gradBias->size[0]; i++) { + long k; + real sum = 0; + real *data = gradOutput3d->storage->data + + gradOutput3d->storageOffset + + i * gradOutput3d->stride[0]; + for (k = 0; k < gradOutput3d->size[2]; k++) { + sum += data[k]; + } + (gradBias->storage->data + gradBias->storageOffset)[i] + += scale * sum; + } + } + + THTensor_(free)(gradOutput3d); + +} + +void THNN_(TemporalRowConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst, + accreal scale_) { + + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + int ndim = input->nDimension; + + THTensor *tinput, *tgradOutput; + + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); + + input = THTensor_(newContiguous)(tinput); + gradOutput = THTensor_(newContiguous)(tgradOutput); + } else { + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + } + + THNN_(TemporalRowConvolution_shapeCheck) + (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW); + + long inputFrameSize = gradWeight->size[0]; + long nInputFrame = input->size[ndim - 1]; + long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + if (ndim == 2) { + THNN_(TemporalRowConvolution_accGradParameters_frame)( + gradOutput, gradWeight, gradBias, finput, scale); + } else { + long T = input->size[0]; + long t; + + for (t = 0; t < T; t++) { + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(TemporalRowConvolution_accGradParameters_frame)( + gradOutput_t, gradWeight, gradBias, finput_t, scale); + + THTensor_(free)(gradOutput_t); + THTensor_(free)(finput_t); + } + } + + if (!featFirst) { + THTensor_(free)(tinput); + THTensor_(free)(tgradOutput); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalSubSampling.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalSubSampling.c new file mode 100644 index 000000000..68f35e28a --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalSubSampling.c @@ -0,0 +1,156 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/TemporalSubSampling.c" +#else + +static inline void THNN_(TemporalSubSampling_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + int kW, + int dW, + int *inputFrameSize) { + int nInputFrame, nOutputFrame; + + THArgCheck(kW > 0, 6, + "kernel size should be greater than zero, but got kW: %d", kW); + THArgCheck(dW > 0, 7, + "stride should be greater than zero, but got dW: %d", dW); + + THNN_ARGCHECK(input->nDimension == 2, 2, input, + "2D or 3D (batch mode) tensor expected for input, but got: %s"); + if (inputFrameSize != NULL) { + THArgCheck( input->size[1] == *inputFrameSize, 2, + "invalid input frame size. Got: %d, Expected: %d", + input->size[1], *inputFrameSize); + } + THArgCheck( input->size[0] >= kW, 2, + "input sequence smaller than kernel size. Got %d, Expected: %d", + input->size[0], kW); + + nInputFrame = input->size[0]; + nOutputFrame = (nInputFrame - kW) / dW + 1; + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, input->nDimension, 0, nOutputFrame); + if (inputFrameSize != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, input->nDimension, 1, *inputFrameSize); + } + } +} + +void THNN_(TemporalSubSampling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + int kW, + int dW, + int inputFrameSize) +{ + THTensor *outputFrame, *inputWindow; + int nInputFrame, nOutputFrame; + long k; + + THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); + THArgCheck(!bias || THTensor_(isContiguous)(bias), 4, "bias must be contiguous"); + THNN_(TemporalSubSampling_shapeCheck)(state, input, NULL, kW, dW, &inputFrameSize); + + outputFrame = THTensor_(new)(); + inputWindow = THTensor_(new)(); + + nInputFrame = input->size[0]; + nOutputFrame = (nInputFrame - kW) / dW + 1; + + THTensor_(resize2d)(output, + nOutputFrame, + inputFrameSize); + + for(k = 0; k < nOutputFrame; k++) + { + THTensor_(narrow)(inputWindow, input, 0, k*dW, kW); + THTensor_(select)(outputFrame, output, 0, k); + THTensor_(sum)(outputFrame, inputWindow, 0, 1); + THTensor_(cmul)(outputFrame, outputFrame, weight); + THTensor_(cadd)(outputFrame, outputFrame, 1, bias); + } + + THTensor_(free)(outputFrame); + THTensor_(free)(inputWindow); +} + +void THNN_(TemporalSubSampling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + int kW, + int dW) +{ + + THTensor *gradOutputFrame; + THTensor *gradInputWindow, *buffer, *kwunit; + long k; + + THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); + THNN_(TemporalSubSampling_shapeCheck)(state, input, gradOutput, kW, dW, NULL); + + gradOutputFrame = THTensor_(new)(); + gradInputWindow = THTensor_(new)(); + buffer = THTensor_(new)(); + kwunit = THTensor_(newWithSize1d)(kW); + + THTensor_(fill)(kwunit, 1); + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + for(k = 0; k < gradOutput->size[0]; k++) + { + THTensor_(narrow)(gradInputWindow, gradInput, 0, k*dW, kW); + THTensor_(select)(gradOutputFrame, gradOutput, 0, k); + THTensor_(cmul)(buffer, weight, gradOutputFrame); + THTensor_(addr)(gradInputWindow, 1, gradInputWindow, 1, kwunit, buffer); + } + + THTensor_(free)(gradOutputFrame); + THTensor_(free)(gradInputWindow); + THTensor_(free)(buffer); + THTensor_(free)(kwunit); +} + +void THNN_(TemporalSubSampling_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + int kW, + int dW, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THTensor *gradOutputFrame; + THTensor *inputWindow, *buffer; + long k; + + THNN_(TemporalSubSampling_shapeCheck)(state, input, gradOutput, kW, dW, NULL); + gradOutputFrame = THTensor_(new)(); + inputWindow = THTensor_(new)(); + buffer = THTensor_(new)(); + + for(k = 0; k < gradOutput->size[0]; k++) + { + THTensor_(narrow)(inputWindow, input, 0, k*dW, kW); + THTensor_(select)(gradOutputFrame, gradOutput, 0, k); + THTensor_(sum)(buffer, inputWindow, 0, 1); + THTensor_(addcmul)(gradWeight, gradWeight, scale, buffer, gradOutputFrame); + THTensor_(cadd)(gradBias, gradBias, scale, gradOutputFrame); + } + + THTensor_(free)(gradOutputFrame); + THTensor_(free)(inputWindow); + THTensor_(free)(buffer); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Threshold.c b/contrib/lua-torch/nn/lib/THNN/generic/Threshold.c new file mode 100644 index 000000000..949c7a07c --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/Threshold.c @@ -0,0 +1,64 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Threshold.c" +#else + +void THNN_(Threshold_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + accreal threshold_, + accreal val_, + bool inplace) +{ + real threshold = TH_CONVERT_ACCREAL_TO_REAL(threshold_); + real val = TH_CONVERT_ACCREAL_TO_REAL(val_); + if (inplace) + { + TH_TENSOR_APPLY(real, input, + if (*input_data <= threshold) + *input_data = val; + ); + THTensor_(set)(output, input); + } + else + { + THTensor_(resizeAs)(output, input); + TH_TENSOR_APPLY2(real, output, real, input, + *output_data = (*input_data > threshold) ? *input_data : val; + ); + } +} + +void THNN_(Threshold_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + accreal threshold_, + accreal val_, + bool inplace) +{ + real threshold = TH_CONVERT_ACCREAL_TO_REAL(threshold_); + real val = TH_CONVERT_ACCREAL_TO_REAL(val_); + THNN_CHECK_NELEMENT(input, gradOutput); + if (inplace) + { + TH_TENSOR_APPLY2(real, gradOutput, real, input, + if ((*input_data) <= threshold) + *gradOutput_data = 0; + ); + THTensor_(set)(gradInput, gradOutput); + } + else + { + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, + if ((*input_data) > threshold) + *gradInput_data = *gradOutput_data; + else + *gradInput_data = 0; + ); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricAveragePooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricAveragePooling.c new file mode 100644 index 000000000..91c870e6f --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricAveragePooling.c @@ -0,0 +1,373 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c" +#else + +static inline void THNN_(VolumetricAveragePooling_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH) { + long nslices; + long itime; + long iheight; + long iwidth; + long otime; + long oheight; + long owidth; + int ndim = input->nDimension; + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + if (input->nDimension == 5) + { + dimN++; + dimt++; + dimh++; + dimw++; + } + + THArgCheck(kT > 0 && kW > 0 && kH > 0, 5, + "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", + kT, kH, kW); + THArgCheck(dT > 0 && dW > 0 && dH > 0, 8, + "stride should be greater than zero, but got dT: %d dH: %d dW: %d", + dT, dH, dW); + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + + THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH + && input->size[dimt] >= kT, 2, + "input image (T: %d H: %d W: %d) smaller than " + "kernel size (kT: %d kH: %d kW: %d)", + input->size[dimt], input->size[dimh], input->size[dimw], + kT, kH, kW); + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + otime = (itime - kT) / dT + 1; + oheight = (iheight - kH) / dH + 1; + owidth = (iwidth - kW) / dW + 1; + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimN, nslices); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, otime); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, oheight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, owidth); + } +} + +static void THNN_(VolumetricAveragePooling_updateOutput_frame)( + real *input_p, + real *output_p, + long nslices, + long itime, + long iwidth, + long iheight, + long otime, + long owidth, + long oheight, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + /* loop over output */ + long i, j, ti; + for (ti = 0; ti < otime; ti++) + { + for (i = 0; i < oheight; i++) + { + for (j = 0; j < owidth; j++) + { + /* local pointers */ + real *ip = input_p + k * itime * iwidth * iheight + + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW; + real *op = output_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + + /* compute local sum: */ + real sum = 0.0; + int x, y, z; + + for (z=0; z < kT; z++) + { + for (y = 0; y < kH; y++) + { + for (x = 0; x < kW; x++) + { + sum += *(ip + z * iwidth * iheight + y * iwidth + x); + } + } + } + + /* set output to local max */ + *op = sum / (kT * kW * kH); + } + } + } + } +} + +void THNN_(VolumetricAveragePooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH) +{ + long nslices; + long itime; + long iheight; + long iwidth; + long otime; + long oheight; + long owidth; + real *input_data; + real *output_data; + + THNN_(VolumetricAveragePooling_shapeCheck)( + state, input, NULL, kT, kW, kH, + dT, dW, dH); + + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + if (input->nDimension == 5) + { + dimN++; + dimt++; + dimh++; + dimw++; + } + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + otime = (itime - kT) / dT + 1; + oheight = (iheight - kH) / dH + 1; + owidth = (iwidth - kW) / dW + 1; + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + if (input->nDimension == 4) /* non-batch mode */ + { + /* resize output */ + THTensor_(resize4d)(output, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + + THNN_(VolumetricAveragePooling_updateOutput_frame)( + input_data, output_data, nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, + dT, dW, dH + ); + } + else /* batch mode */ + { + long p; + long nBatch = input->size[0]; + + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + + /* resize output */ + THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p=0; p < nBatch; p++) + { + THNN_(VolumetricAveragePooling_updateOutput_frame)( + input_data + p * istride, output_data + p * ostride, nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, + dT, dW, dH + ); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(VolumetricAveragePooling_updateGradInput_frame)( + real *gradInput_p, + real *gradOutput_p, + long nslices, + long itime, + long iwidth, + long iheight, + long otime, + long owidth, + long oheight, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + /* loop over output */ + long i, j, ti; + for (ti = 0; ti < otime; ti++) + { + for (i = 0; i < oheight; i++) + { + for (j = 0; j < owidth; j++) + { + /* local pointers */ + real *ip = gradInput_p + k * itime * iwidth * iheight + + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW; + real *op = gradOutput_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + + /* scatter gradients out to footprint: */ + real val = *op / (kT * kW * kH); + int x,y,z; + for (z=0; z < kT; z++) + { + for (y = 0; y < kH; y++) + { + for (x = 0; x < kW; x++) + { + *(ip + z * iwidth * iheight + y * iwidth + x) += val; + } + } + } + } + } + } + } +} + +void THNN_(VolumetricAveragePooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH) +{ + int nslices; + int itime; + int iheight; + int iwidth; + int otime; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + THNN_(VolumetricAveragePooling_shapeCheck)( + state, input, gradOutput, kT, kW, kH, + dT, dW, dH); + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 5) + { + dimN++; + dimt++; + dimh++; + dimw++; + } + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + otime = gradOutput->size[dimt]; + oheight = gradOutput->size[dimh]; + owidth = gradOutput->size[dimw]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + + /* backprop */ + if (input->nDimension == 4) /* non-batch mode*/ + { + THNN_(VolumetricAveragePooling_updateGradInput_frame)( + gradInput_data, gradOutput_data, nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, + dT, dW, dH + ); + } + else /* batch mode */ + { + long p; + long nBatch = input->size[0]; + + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + +#pragma omp parallel for private(p) + for (p = 0; p < nBatch; p++) + { + THNN_(VolumetricAveragePooling_updateGradInput_frame)( + gradInput_data + p * istride, gradOutput_data + p * ostride, nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, + dT, dW, dH + ); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolution.c new file mode 100644 index 000000000..be1aa82e6 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolution.c @@ -0,0 +1,260 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricConvolution.c" +#else + +void THNN_(VolumetricConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, // only used by cuda impl + THTensor *fgradInput, // only used by cuda impl + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version + + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + + int dimt = 1; + int dimh = 2; + int dimw = 3; + + if (input->nDimension == 5) + { + dimt++; + dimh++; + dimw++; + } + + long nOutputPlane = weight->size[0]; + long kT = weight->size[2]; + long kH = weight->size[3]; + long kW = weight->size[4]; + long inputDepth = input->size[dimt]; + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long outputDepth = (inputDepth - kT) / dT + 1; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + THTensor *outn = THTensor_(new)(); + long i, j; + if (input->nDimension == 4) /* non-batch mode */ + { + THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); + + /* add bias */ + if (bias) { + for (i = 0; i < bias->size[0]; i++) + { + THTensor_(select)(outn, output, 0, i); + THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); + } + } else { + THTensor_(zero)(output); + } + + /* do convolutions */ + THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X"); + } + else /* batch mode */ + { + long nBatch = input->size[0]; + THTensor_(resize5d)(output, nBatch, nOutputPlane, outputDepth, outputHeight, outputWidth); + THTensor *inb = THTensor_(new)(); + THTensor *outb = THTensor_(new)(); + + /* loop over batches */ + for (j = 0; j < nBatch; j++) + { + THTensor_(select)(inb, input, 0, j); + THTensor_(select)(outb, output, 0, j); + + /* add bias */ + if (bias) { + for (i = 0; i < bias->size[0]; i++) + { + THTensor_(select)(outn, outb, 0, i); + THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); + } + } else { + THTensor_(zero)(outb); + } + + /* do convolutions */ + THTensor_(conv3Dmv)(outb, 1.0, 1.0, inb, weight, dT, dH, dW, "V", "X"); + } + + THTensor_(free)(inb); + THTensor_(free)(outb); + } + THTensor_(free)(outn); +} + +void THNN_(VolumetricConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, // only used by cuda impl + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version + + THNN_ARGCHECK(weight->nDimension == 5, 4, weight, + "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor " + "expected for weight, but got: %s"); + + int nOutputPlane = (int)weight->size[0]; + + THNN_ARGCHECK(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3, + gradOutput, + "4D or 5D (batch mode) tensor expected for gradOutput, but got: %s"); + + int dimPlane = 0; + if (gradOutput->nDimension == 5) + { + dimPlane++; + } + + THArgCheck(nOutputPlane == gradOutput->size[dimPlane], 1, + "Number of output features is not equal to nOutputPlane" + ); + + /* gradient to input */ + THTensor *tweight = THTensor_(newTranspose)(weight, 0, 1); + if (gradOutput->nDimension == 4) /* non-batch mode */ + { + THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C"); + } + else /* batch mode */ + { + long nBatch = gradOutput->size[0]; + THTensor *ginpb = THTensor_(new)(); + THTensor *goutb = THTensor_(new)(); + long j; + + THTensor_(resize5d)(gradInput, + input->size[0], input->size[1], input->size[2], input->size[3], input->size[4] + ); + + /* loop over batches */ + for (j = 0; j < nBatch; j++) + { + THTensor_(select)(ginpb, gradInput, 0, j); + THTensor_(select)(goutb, gradOutput, 0, j); + THTensor_(conv3Dmv)(ginpb, 0.0, 1.0, goutb, tweight, dT, dH, dW, "F", "C"); + } + THTensor_(free)(ginpb); + THTensor_(free)(goutb); + } + + THTensor_(free)(tweight); +} + +void THNN_(VolumetricConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, // only used by cuda impl + THTensor *fgradInput, // only used by cuda impl + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version + + THNN_ARGCHECK(gradWeight->nDimension == 5, 4, gradWeight, + "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor " + "expected for gradWeight, but got: %s"); + + int nOutputPlane = (int)gradWeight->size[0]; + if (gradBias) { + THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5, + "gradBias tensor has wrong size" + ); + } + + long k; + real *gradBias_data; + THTensor *gradOutSlice; + int dimPlane = 0; + if (gradOutput->nDimension == 5) + { + dimPlane++; + } + + THArgCheck(nOutputPlane == gradOutput->size[dimPlane], 1, + "Number of output features is not equal to nOutputPlane" + ); + + if (gradOutput->nDimension == 4) /* non-batch mode */ + { + /* gradient to bias */ + if (gradBias) { + gradBias_data = THTensor_(data)(gradBias); + gradOutSlice = THTensor_(new)(); + for (k = 0; k < nOutputPlane; k++) + { + THTensor_(select)(gradOutSlice, gradOutput, 0, k); + gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice); + } + THTensor_(free)(gradOutSlice); + } + + /* gradient to kernels */ + THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW); + } + else /* batch mode */ + { + long nBatch = gradOutput->size[0]; + THTensor *inpb = THTensor_(new)(); + THTensor *goutb = THTensor_(new)(); + long j; + + /* loop over batches */ + for (j = 0; j < nBatch; j++) + { + THTensor_(select)(inpb, input, 0, j); + THTensor_(select)(goutb, gradOutput, 0, j); + + /* gradient to bias */ + if (gradBias) { + gradBias_data = THTensor_(data)(gradBias); + gradOutSlice = THTensor_(new)(); + for (k = 0; k < nOutputPlane; k++) + { + THTensor_(select)(gradOutSlice, goutb, 0, k); + gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice); + } + THTensor_(free)(gradOutSlice); + } + + /* gradient to kernels */ + THTensor_(conv3DRevger)(gradWeight, 1.0, scale, inpb, goutb, dT, dH, dW); + } + THTensor_(free)(inpb); + THTensor_(free)(goutb); + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolutionMM.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolutionMM.c new file mode 100644 index 000000000..00a121db6 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolutionMM.c @@ -0,0 +1,628 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricConvolutionMM.c" +#else + +static void inline THNN_(VolumetricConvolutionMM_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *weight, + THTensor *bias, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) { + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, + "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW); + THArgCheck(dT > 0 && dW > 0 && dH > 0, 11, + "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); + + int ndim = input->nDimension; + int dimf = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + if (ndim == 5) + { + dimf++; + dimt++; + dimh++; + dimw++; + } + + long nInputPlane; + long inputDepth; + long inputHeight; + long inputWidth; + long nOutputPlane; + long outputDepth; + long outputHeight; + long outputWidth; + + nInputPlane = input->size[dimf]; + inputDepth = input->size[dimt]; + inputHeight = input->size[dimh]; + inputWidth = input->size[dimw]; + nOutputPlane = weight->size[0]; + outputDepth = (inputDepth + 2*pT - kT) / dT + 1; + outputHeight = (inputHeight + 2*pH - kH) / dH + 1; + outputWidth = (inputWidth + 2*pW - kW) / dW + 1; + + if (outputWidth < 1 || outputHeight < 1 || outputDepth < 1) + { + THError( + "Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", + nInputPlane, inputDepth, inputHeight, inputWidth, + nOutputPlane, outputDepth, outputHeight, outputWidth + ); + } + + THArgCheck(weight->nDimension == 2 || weight->nDimension == 5, 4, + "weight tensor should be 2D or 5D - got %d", weight->nDimension); + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); + } + + THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, outputDepth); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } +} + +static int THNN_(view_weight)(THTensor **_weight) +{ + THTensor *weight = *_weight; + if (weight->nDimension == 5) { + long s1 = weight->size[0]; + long s2 = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4]; + *_weight = THTensor_(newWithStorage2d)(weight->storage, weight->storageOffset, s1, -1, s2, -1); + return 1; + } + return 0; +} + +/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */ +static void THNN_(unfolded_acc_vol)( + THTensor *finput, + THTensor *input, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int nInputPlane, + int inputDepth, + int inputWidth, + int inputHeight, + int outputDepth, + int outputWidth, + int outputHeight) +{ + int nip; + real *input_data = THTensor_(data)(input); + real *finput_data = THTensor_(data)(finput); + +//#pragma omp parallel for private(nip) + for (nip = 0; nip < nInputPlane; nip++) + { + int kt, kw, kh, t, y, x, it, ix, iy; + for (kt = 0; kt < kT; kt++) + { + for (kh = 0; kh < kH; kh++) + { + for (kw = 0; kw < kW; kw++) + { + real *src = finput_data + + nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth) + + kt * (kH*kW*outputDepth*outputHeight*outputWidth) + + kh * (kW*outputDepth*outputHeight*outputWidth) + + kw * (outputDepth*outputHeight*outputWidth); + + real *dst = input_data + nip*(inputDepth*inputHeight*inputWidth); + if (pT > 0 || pH > 0 || pW > 0) + { + for (t = 0; t < outputDepth; t++) + { + it = t*dT - pT + kt; + for (y = 0; y < outputHeight; y++) + { + iy = y*dH - pH + kh; + for (x = 0; x < outputWidth; x++) + { + ix = x*dW - pW + kw; + if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth) + { + } + else + { + real *dst_slice = dst+it*inputHeight*inputWidth+iy*inputWidth+ix; + THVector_(cadd)(dst_slice, dst_slice, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1); + } + } + } + } + } + else + { + for (t = 0; t < outputDepth; t++) + { + it = t*dT + kt; + for (y = 0; y < outputHeight; y++) + { + iy = y*dH + kh; + for(x = 0; x < outputWidth; x++) + { + ix = x*dW + kw; + real *dst_slice = dst+it*inputHeight*inputWidth+iy*inputWidth+ix; + THVector_(cadd)(dst_slice, dst_slice, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1); + } + } + } + } + } + } + } + } +} + +static void THNN_(unfolded_copy_vol)( + THTensor *finput, + THTensor *input, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int nInputPlane, + int inputDepth, + int inputWidth, + int inputHeight, + int outputDepth, + int outputWidth, + int outputHeight) +{ + long k; + real *input_data = THTensor_(data)(input); + real *finput_data = THTensor_(data)(finput); +// #pragma omp parallel for private(k) + for (k = 0; k < nInputPlane*kT*kH*kW; k++) + { + int nip = k / (kT*kH*kW); + int rest = k % (kT*kH*kW); + int kt = rest / (kH*kW); + rest = rest % (kH*kW); + int kh = rest / kW; + int kw = rest % kW; + int t,x,y,it,ix,iy; + real *dst = finput_data + + nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth) + + kt * (kH*kW*outputDepth*outputHeight*outputWidth) + + kh * (kW*outputDepth*outputHeight*outputWidth) + + kw * (outputDepth*outputHeight*outputWidth); + real *src = input_data + nip*(inputDepth*inputHeight*inputWidth); + + if (pT > 0 || pH > 0 || pW > 0) + { + for (t = 0; t < outputDepth; t++) + { + it = t*dT - pT + kt; + for (y = 0; y < outputHeight; y++) + { + iy = y*dH - pH + kh; + for (x = 0; x < outputWidth; x++) + { + ix = x*dW - pW + kw; + if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth) + memset(dst+t*outputHeight*outputWidth+y*outputWidth+x, 0, sizeof(real)*(1)); + else + memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1)); + } + } + } + } + else + { + for (t = 0; t < outputDepth; t++) + { + it = t*dT + kt; + for (y = 0; y < outputHeight; y++) + { + iy = y*dH + kh; + for(x = 0; x < outputWidth; x++) + { + ix = x*dW + kw; + memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1)); + } + } + } + } + } +} + +static void THNN_(VolumetricConvolutionMM_updateOutput_frame)( + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + long nInputPlane, + long inputDepth, + long inputWidth, + long inputHeight, + long nOutputPlane, + long outputDepth, + long outputWidth, + long outputHeight) +{ + long i; + THTensor *output2d; + + THNN_(unfolded_copy_vol)( + finput, input, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH, + nInputPlane, + inputDepth, inputWidth, inputHeight, + outputDepth, outputWidth, outputHeight + ); + + output2d = THTensor_(newWithStorage2d)( + output->storage, output->storageOffset, nOutputPlane, -1, + outputDepth*outputHeight*outputWidth, -1 + ); + + if (bias) { + for (i = 0; i < nOutputPlane; i++) + { + THVector_(fill)( + output->storage->data+output->storageOffset+output->stride[0]*i, + THTensor_(get1d)(bias, i), + outputDepth*outputHeight*outputWidth + ); + } + } else { + THTensor_(zero)(output); + } + + THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput); + + THTensor_(free)(output2d); +} + +void THNN_(VolumetricConvolutionMM_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + int dimf = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + int freeWeight = 0; + + long nInputPlane; + long inputDepth; + long inputHeight; + long inputWidth; + long nOutputPlane; + long outputDepth; + long outputHeight; + long outputWidth; + + THNN_(VolumetricConvolutionMM_shapeCheck)( + state, input, NULL, weight, bias, + kT, kW, kH, dT, dW, dH, pT, pW, pH); + input = THTensor_(newContiguous)(input); + + if (input->nDimension == 5) + { + dimf++; + dimt++; + dimh++; + dimw++; + } + + nInputPlane = input->size[dimf]; + inputDepth = input->size[dimt]; + inputHeight = input->size[dimh]; + inputWidth = input->size[dimw]; + nOutputPlane = weight->size[0]; + outputDepth = (inputDepth + 2*pT - kT) / dT + 1; + outputHeight = (inputHeight + 2*pH - kH) / dH + 1; + outputWidth = (inputWidth + 2*pW - kW) / dW + 1; + + freeWeight = THNN_(view_weight)(&weight); + + if (input->nDimension == 4) + { + THTensor_(resize2d)(finput, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth); + THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); + + THNN_(VolumetricConvolutionMM_updateOutput_frame)( + input, output, weight, bias, finput, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH, + nInputPlane, inputDepth, inputWidth, inputHeight, + nOutputPlane, outputDepth, outputWidth, outputHeight + ); + } + else + { + long T = input->size[0]; + long t; + + THTensor_(resize3d)(finput, T, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth); + THTensor_(resize5d)(output, T, nOutputPlane, outputDepth, outputHeight, outputWidth); + +// #pragma omp parallel for private(t) + for (t = 0; t < T; t++) + { + THTensor *input_t = THTensor_(newSelect)(input, 0, t); + THTensor *output_t = THTensor_(newSelect)(output, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(VolumetricConvolutionMM_updateOutput_frame)( + input_t, output_t, weight, bias, finput_t, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH, + nInputPlane, inputDepth, inputWidth, inputHeight, + nOutputPlane, outputDepth, outputWidth, outputHeight + ); + + THTensor_(free)(input_t); + THTensor_(free)(output_t); + THTensor_(free)(finput_t); + } + } + + THTensor_(free)(input); + if (freeWeight) + THTensor_(free)(weight); +} + +static void THNN_(VolumetricConvolutionMM_updateGradInput_frame)( + THTensor *gradInput, + THTensor *gradOutput, + THTensor *weight, + THTensor *fgradInput, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + THTensor *gradOutput2d = THTensor_(newWithStorage2d)( + gradOutput->storage, gradOutput->storageOffset, + gradOutput->size[0], -1, + gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1 + ); + + THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d); + THTensor_(free)(gradOutput2d); + + THTensor_(zero)(gradInput); + + THNN_(unfolded_acc_vol)( + fgradInput, gradInput, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH, + gradInput->size[0], gradInput->size[1], gradInput->size[3], gradInput->size[2], + gradOutput->size[1], gradOutput->size[3], gradOutput->size[2] + ); +} + +void THNN_(VolumetricConvolutionMM_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + int nOutputPlane = (int)weight->size[0]; + + THNN_(VolumetricConvolutionMM_shapeCheck)( + state, input, gradOutput, weight, NULL, + kT, kW, kH, dT, dW, dH, pT, pW, pH); + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + int freeWeight = THNN_(view_weight)(&weight); + + THTensor_(resizeAs)(gradInput, input); + THTensor_(resizeAs)(fgradInput, finput); + // depending on the BLAS library, fgradInput (result tensor) might + // be left uninitialized on zero alpha, which might lead to weird behavior + // hence, to be safe, zero it + THTensor_(zero)(fgradInput); + THTensor *tweight = THTensor_(new)(); + THTensor_(transpose)(tweight, weight, 0, 1); + + if (input->nDimension == 4) + { + THNN_(VolumetricConvolutionMM_updateGradInput_frame)( + gradInput, gradOutput, tweight, fgradInput, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH + ); + } + else + { + long T = input->size[0]; + long t; + +//#pragma omp parallel for private(t) + for (t = 0; t < T; t++) + { + THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); + + THNN_(VolumetricConvolutionMM_updateGradInput_frame)( + gradInput_t, gradOutput_t, tweight, fgradInput_t, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH + ); + + THTensor_(free)(gradInput_t); + THTensor_(free)(gradOutput_t); + THTensor_(free)(fgradInput_t); + } + } + + THTensor_(free)(tweight); + THTensor_(free)(input); + THTensor_(free)(gradOutput); + if (freeWeight) + THTensor_(free)(weight); +} + +static void THNN_(VolumetricConvolutionMM_accGradParameters_frame)( + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + real scale) +{ + long i; + THTensor *gradOutput2d = THTensor_(newWithStorage2d)( + gradOutput->storage, gradOutput->storageOffset, + gradOutput->size[0], -1, + gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1 + ); + + THTensor *tfinput = THTensor_(new)(); + THTensor_(transpose)(tfinput, finput, 0, 1); + THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, tfinput); + THTensor_(free)(tfinput); + + if (gradBias) { + for (i = 0; i < gradBias->size[0]; i++) + { + long k; + real sum = 0; + real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0]; + for (k = 0; k < gradOutput2d->size[1]; k++) + sum += data[k]; + + (gradBias->storage->data + gradBias->storageOffset)[i] += scale * sum; + } + } + + THTensor_(free)(gradOutput2d); +} + +void THNN_(VolumetricConvolutionMM_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + int freeWeight; + int nOutputPlane = (int)gradWeight->size[0]; + + THNN_(VolumetricConvolutionMM_shapeCheck)( + state, input, gradOutput, gradWeight, gradBias, + kT, kW, kH, dT, dW, dH, pT, pW, pH); + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + freeWeight = THNN_(view_weight)(&gradWeight); + + if (input->nDimension == 4) // non-batch mode + { + THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); + } + else // batch mode + { + long T = input->size[0]; + long t; + + for (t = 0; t < T; t++) + { + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); + + THTensor_(free)(gradOutput_t); + THTensor_(free)(finput_t); + } + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + if (freeWeight) + THTensor_(free)(gradWeight); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedConvolution.c new file mode 100644 index 000000000..ca740f78e --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedConvolution.c @@ -0,0 +1,420 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricDilatedConvolution.c" +#else + +static inline void THNN_(VolumetricDilatedConvolution_shapeCheck)( + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kT, int kH, int kW, int dT, int dH, int dW, + int padT, int padH, int padW, + int dilationT, int dilationH, int dilationW) { + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + THNN_ARGCHECK(weight->nDimension == 5, 4, weight, + "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor " + "expected for weight, but got: %s"); + THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, + "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW); + THArgCheck(dT > 0 && dW > 0 && dH > 0, 11, + "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); + THArgCheck(dilationT > 0 && dilationW > 0 && dilationH > 0, 15, + "dilation should be greater than zero, but got dilationT: %d, dilationH: %d, dilationW: %d", + dilationT, dilationH, dilationW); + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]); + } + + // Params + int ndim = input->nDimension; + int nInputPlane = weight->size[1]; + int nOutputPlane = weight->size[0]; + int dimf = 0; + int dimd = 1; + int dimh = 2; + int dimw = 3; + + if (ndim == 5) { + dimf++; + dimd++; + dimh++; + dimw++; + } + + long inputDepth = input->size[dimd]; + long inputHeight = input->size[dimh]; + long inputWidth = input->size[dimw]; + long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1; + long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; + long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) + THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", + nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth); + + THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } +} + +void THNN_(VolumetricDilatedConvolution_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *columns, + THTensor *ones, + int kT, int kW, int kH, + int dT, int dW, int dH, + int padT, int padW, int padH, + int dilationT, int dilationW, int dilationH) +{ + THNN_(VolumetricDilatedConvolution_shapeCheck)( + input, NULL, weight, bias, + kT, kH, kW, dT, dH, dW, padT, padH, padW, + dilationT, dilationH, dilationW); + + // Params: + int nInputPlane = weight->size[1]; + int nOutputPlane = weight->size[0]; + + input = THTensor_(newContiguous)(input); + weight = THTensor_(newContiguous)(weight); + bias = bias ? THTensor_(newContiguous)(bias) : bias; + int batch = 1; + if (input->nDimension == 4) { + // Force batch + batch = 0; + THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]); + } + + long inputDepth = input->size[2]; + long inputHeight = input->size[3]; + long inputWidth = input->size[4]; + long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1; + long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; + long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Resize output + THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth); + THTensor_(zero)(output); + + // Resize temporary columns + THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth); + + // Define a buffer of ones, for bias accumulation + // Note: this buffer can be shared with other modules, it only ever gets increased, + // and always contains ones. + if (ones->nDimension != 3 || + ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) { + // Resize plane and fill with ones... + THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth); + THTensor_(fill)(ones, 1); + } + + // Helpers + THTensor *input_n = THTensor_(new)(); + THTensor *output_n = THTensor_(new)(); + + // For each elt in batch, do: + for (int elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per output: + THTensor_(select)(input_n, input, 0, elt); + THTensor_(select)(output_n, output, 0, elt); + + // Do Bias first: + // M,N,K are dims of matrix A and B + long m_ = nOutputPlane; + long n_ = outputDepth * outputHeight * outputWidth; + long k_ = 1; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + if (bias) { + THBlas_(gemm)( + 't', 'n', + n_, m_, k_, + 1, + THTensor_(data)(ones), k_, + THTensor_(data)(bias), k_, + 0, + THTensor_(data)(output_n), n_ + ); + } else { + THTensor_(zero)(output_n); + } + + // Extract columns: + THNN_(vol2col)( + THTensor_(data)(input_n), + nInputPlane, inputDepth, inputHeight, inputWidth, + kT, kH, kW, padT, padH, padW, dT, dH, dW, + dilationT, dilationH, dilationW, + THTensor_(data)(columns) + ); + + // M,N,K are dims of matrix A and B + long m = nOutputPlane; + long n = columns->size[1]; + long k = nInputPlane*kT*kH*kW; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 'n', 'n', + n, m, k, + 1, + THTensor_(data)(columns), n, + THTensor_(data)(weight), k, + 1, + THTensor_(data)(output_n), n + ); + } + + // Free + THTensor_(free)(input_n); + THTensor_(free)(output_n); + + // Resize output + if (batch == 0) { + THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); + THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(weight); + if (bias) THTensor_(free)(bias); +} + +void THNN_(VolumetricDilatedConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *gradColumns, + int kT, int kW, int kH, + int dT, int dW, int dH, + int padT, int padW, int padH, + int dilationT, int dilationW, int dilationH) +{ + THNN_(VolumetricDilatedConvolution_shapeCheck)( + input, gradOutput, weight, NULL, + kT, kH, kW, dT, dH, dW, padT, padH, padW, + dilationT, dilationH, dilationW); + + // Params + int nInputPlane = weight->size[1]; + int nOutputPlane = weight->size[0]; + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + weight = THTensor_(newContiguous)(weight); + + int batch = 1; + if (input->nDimension == 4) { + // Force batch + batch = 0; + THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]); + THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]); + } + + long inputDepth = input->size[2]; + long inputWidth = input->size[4]; + long inputHeight = input->size[3]; + long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1; + long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Resize output + THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth); + + // Resize temporary columns + THTensor_(resize2d)(gradColumns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth); + THTensor_(zero)(gradColumns); + + // Helpers + THTensor *gradInput_n = THTensor_(new)(); + THTensor *gradOutput_n = THTensor_(new)(); + + // For each elt in batch, do: + for (int elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per sample: + THTensor_(select)(gradInput_n, gradInput, 0, elt); + THTensor_(select)(gradOutput_n, gradOutput, 0, elt); + + // M,N,K are dims of matrix A and B + long m = nInputPlane*kT*kW*kH; + long n = gradColumns->size[1]; + long k = nOutputPlane; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 'n', 't', + n, m, k, + 1, + THTensor_(data)(gradOutput_n), n, + THTensor_(data)(weight), m, + 0, + THTensor_(data)(gradColumns), n + ); + + // Unpack columns back into input: + THNN_(col2vol)( + THTensor_(data)(gradColumns), + nInputPlane, inputDepth, inputHeight, inputWidth, + kT, kH, kW, padT, padH, padW, dT, dH, dW, + dilationT, dilationH, dilationW, + THTensor_(data)(gradInput_n) + ); + } + + // Free + THTensor_(free)(gradInput_n); + THTensor_(free)(gradOutput_n); + + // Resize output + if (batch == 0) { + THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth); + THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth); + THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); +} + +void THNN_(VolumetricDilatedConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *columns, + THTensor *ones, + int kT, int kW, int kH, + int dT, int dW, int dH, + int padT, int padW, int padH, + int dilationT, int dilationW, int dilationH, + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + THNN_(VolumetricDilatedConvolution_shapeCheck)( + input, gradOutput, gradWeight, gradBias, + kT, kH, kW, dT, dH, dW, padT, padH, padW, + dilationT, dilationH, dilationW); + + // Params + int nInputPlane = gradWeight->size[1]; + int nOutputPlane = gradWeight->size[0]; + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + int batch = 1; + if (input->nDimension == 4) { + // Force batch + batch = 0; + THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]); + THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]); + } + + long inputDepth = input->size[2]; + long inputWidth = input->size[4]; + long inputHeight = input->size[3]; + long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1; + long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1; + long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1; + + // Batch size + input planes + long batchSize = input->size[0]; + + // Define a buffer of ones, for bias accumulation + if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) { + // Resize plane and fill with ones... + THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth); + THTensor_(fill)(ones, 1); + } + + // Resize temporary columns + THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth); + + // Helpers + THTensor *input_n = THTensor_(new)(); + THTensor *gradOutput_n = THTensor_(new)(); + + // For each elt in batch, do: + for (int elt = 0; elt < batchSize; elt ++) { + // Matrix mulitply per output: + THTensor_(select)(input_n, input, 0, elt); + THTensor_(select)(gradOutput_n, gradOutput, 0, elt); + + // Extract columns: + THNN_(vol2col)( + THTensor_(data)(input_n), + nInputPlane, inputDepth, inputHeight, inputWidth, + kT, kH, kW, padT, padH, padW, dT, dH, dW, + dilationT, dilationH, dilationW, + THTensor_(data)(columns) + ); + + // M,N,K are dims of matrix A and B + long m = nOutputPlane; + long n = nInputPlane*kT*kW*kH; + long k = columns->size[1]; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 't', 'n', + n, m, k, + scale, + THTensor_(data)(columns), k, + THTensor_(data)(gradOutput_n), k, + 1, + THTensor_(data)(gradWeight), n + ); + + // Do Bias: + // M,N,K are dims of matrix A and B + long m_ = nOutputPlane; + long k_ = outputDepth * outputHeight * outputWidth; + + // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) + if (gradBias) { + THBlas_(gemv)( + 't', + k_, m_, + scale, + THTensor_(data)(gradOutput_n), k_, + THTensor_(data)(ones), 1, + 1, + THTensor_(data)(gradBias), 1 + ); + } + } + + // Free + THTensor_(free)(input_n); + THTensor_(free)(gradOutput_n); + + // Resize + if (batch == 0) { + THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth); + THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedMaxPooling.c new file mode 100644 index 000000000..66c0f9531 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedMaxPooling.c @@ -0,0 +1,515 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricDilatedMaxPooling.c" +#else + +static inline void THNN_(VolumetricDilatedMaxPooling_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THIndexTensor *indices, + int kT, int kW, int kH, + int dT, int dW, int dH, + int pT, int pW, int pH, + int dilationT, int dilationW, int dilationH, + bool ceilMode) { + int ndim = input->nDimension; + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + long nslices; + long itime; + long iheight; + long iwidth; + long otime; + long oheight; + long owidth; + + THArgCheck(kT > 0 && kW > 0 && kH > 0, 5, + "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", + kT, kH, kW); + THArgCheck(dT > 0 && dW > 0 && dH > 0, 8, + "stride should be greater than zero, but got dT: %d dH: %d dW: %d", + dT, dH, dW); + THArgCheck(dilationT > 0 && dilationW > 0 && dilationH > 0, 14, + "dilation should be greater than 0, but got dilationT: %d dilationH: %d dilationW: %d", + dilationT, dilationH, dilationW); + + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + + if (input->nDimension == 5) + { + dimN++; + dimt++; + dimh++; + dimw++; + } + + THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2, + "pad should be smaller than half of kernel size, but got " + "kT: %d kW: %d, kH: %d, padT: %d, padW: %d, padH: %d", + kT, kW, kH, pT, pW, pH); + + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + if (ceilMode) + { + otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; + oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; + owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; + } + else + { + otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; + oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; + owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; + } + + if (pT || pW || pH) + { + // ensure that the last pooling starts inside the image + if ((otime - 1)*dT >= itime + pT) + --otime; + if ((oheight - 1)*dH >= iheight + pH) + --oheight; + if ((owidth - 1)*dW >= iwidth + pW) + --owidth; + } + + if (otime < 1 || owidth < 1 || oheight < 1) + THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", + nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth); + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimN, nslices); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, otime); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, oheight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, owidth); + } + if (indices != NULL) { + THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimN, nslices); + THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimt, otime); + THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, oheight); + THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, owidth); + } +} + +static void THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( + real *input_p, + real *output_p, + THIndex_t *indz_p, + long nslices, + long itime, + long iwidth, + long iheight, + long otime, + long owidth, + long oheight, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int dilationT, + int dilationW, + int dilationH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + /* loop over output */ + long i, j, ti; + for (ti = 0; ti < otime; ti++) + { + for (i = 0; i < oheight; i++) + { + for (j = 0; j < owidth; j++) + { + /* local pointers */ + + long start_t = ti * dT - pT; + long start_h = i * dH - pH; + long start_w = j * dW - pW; + + long kernel_t = fminf(kT, kT + start_t); + long kernel_h = fminf(kH, kH + start_h); + long kernel_w = fminf(kW, kW + start_w); + + while(start_t < 0) + start_t += dilationT; + while(start_h < 0) + start_h += dilationH; + while(start_w < 0) + start_w += dilationW; + + real *ip = input_p + k * itime * iwidth * iheight + + start_t * iwidth * iheight + start_h * iwidth + start_w; + real *op = output_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + THIndex_t *indzp = indz_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + + /* compute local max: */ + real maxval = -THInf; + int x,y,z; + int mx, my, mz; + mx = my = mz = -1; + + for (z = 0; z < kernel_t; z++) + { + for (y = 0; y < kernel_h; y++) + { + for (x = 0; x < kernel_w; x++) + { + if ((start_t + z * dilationT < itime) && (start_h + y * dilationH < iheight) && (start_w + x * dilationW < iwidth)) + { + real val = *(ip + z * dilationT * iwidth * iheight + y * dilationH * iwidth + x * dilationW); + if (val > maxval) + { + maxval = val; + // Store indices w.r.t the kernel dimension + mz = z + (kT - kernel_t); + my = y + (kH - kernel_h); + mx = x + (kW - kernel_w); + } + } + } + } + } + + // set max values + ((unsigned char*)(indzp))[0] = mz; + ((unsigned char*)(indzp))[1] = my; + ((unsigned char*)(indzp))[2] = mx; + ((unsigned char*)(indzp))[3] = 0; + + /* set output to local max */ + *op = maxval; + } + } + } + } +} + +void THNN_(VolumetricDilatedMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int dilationT, + int dilationW, + int dilationH, + bool ceilMode) +{ + long nslices; + long itime; + long iheight; + long iwidth; + long otime; + long oheight; + long owidth; + real *input_data; + real *output_data; + THIndex_t *indices_data; + + + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + if (input->nDimension == 5) + { + dimN++; + dimt++; + dimh++; + dimw++; + } + + THNN_(VolumetricDilatedMaxPooling_shapeCheck)( + state, input, NULL, NULL, + kT, kW, kH, dT, dW, dH, + pT, pW, pH, dilationT, dilationW, dilationH, + ceilMode); + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + if (ceilMode) + { + otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; + oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; + owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; + } + else + { + otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1; + oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1; + owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1; + } + + if (pT || pW || pH) + { + // ensure that the last pooling starts inside the image + if ((otime - 1)*dT >= itime + pT) + --otime; + if ((oheight - 1)*dH >= iheight + pH) + --oheight; + if ((owidth - 1)*dW >= iwidth + pW) + --owidth; + } + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + if (input->nDimension == 4) /* non-batch mode */ + { + /* resize output */ + THTensor_(resize4d)(output, nslices, otime, oheight, owidth); + /* indices will contain ti,i,j uchar locations packed into float/double */ + THIndexTensor_(resize4d)(indices, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( + input_data, output_data, + indices_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH, + dilationT, dilationW, dilationH + ); + } + else /* batch mode */ + { + long p; + long nBatch = input->size[0]; + + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + + /* resize output */ + THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); + /* indices will contain ti,i,j locations for each output point */ + THIndexTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + +#pragma omp parallel for private(p) + for (p=0; p < nBatch; p++) + { + THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)( + input_data + p * istride, + output_data + p * ostride, + indices_data + p * ostride, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, + dT, dW, dH, + pT, pW, pH, + dilationT, dilationW, dilationH + ); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)( + real *gradInput_p, + real *gradOutput_p, + THIndex_t *indz_p, + long nslices, + long itime, + long iwidth, + long iheight, + long otime, + long owidth, + long oheight, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int dilationT, + int dilationW, + int dilationH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight; + real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight; + THIndex_t *indz_p_k = indz_p + k * otime * owidth * oheight; + + /* calculate max points */ + long ti, i, j; + for (ti = 0; ti < otime; ti++) + { + for (i = 0; i < oheight; i++) + { + for (j = 0; j < owidth; j++) + { + /* retrieve position of max */ + THIndex_t * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j]; + long maxti = ((unsigned char*)(indzp))[0] * dilationT + ti * dT - pT; + long maxi = ((unsigned char*)(indzp))[1] * dilationH + i * dH - pH; + long maxj = ((unsigned char*)(indzp))[2] * dilationW + j * dW - pW; + + if (maxti != -1) { + /* update gradient */ + gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] += + gradOutput_p_k[ti * oheight * owidth + i * owidth + j]; + } + } + } + } + } +} + +void THNN_(VolumetricDilatedMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + int dilationT, + int dilationW, + int dilationH, + bool ceilMode) +{ + int nslices; + int itime; + int iheight; + int iwidth; + int otime; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + THIndex_t *indices_data; + + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + THNN_(VolumetricDilatedMaxPooling_shapeCheck)( + state, input, gradOutput, indices, + kT, kW, kH, dT, dW, dH, + pT, pW, pH, dilationT, dilationW, dilationH, + ceilMode); + + // TODO: gradOutput shape check + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 5) + { + dimN++; + dimt++; + dimh++; + dimw++; + } + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + otime = gradOutput->size[dimt]; + oheight = gradOutput->size[dimh]; + owidth = gradOutput->size[dimw]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THIndexTensor_(data)(indices); + + /* backprop */ + if (input->nDimension == 4) /* non-batch mode*/ + { + THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)( + gradInput_data, gradOutput_data, + indices_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + dT, dW, dH, + pT, pW, pH, + dilationT, dilationW, dilationH + ); + } + else /* batch mode */ + { + long p; + long nBatch = input->size[0]; + + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + +#pragma omp parallel for private(p) + for (p = 0; p < nBatch; p++) + { + THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)( + gradInput_data + p * istride, + gradOutput_data + p * ostride, + indices_data + p * ostride, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + dT, dW, dH, + pT, pW, pH, + dilationT, dilationW, dilationH + ); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFractionalMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFractionalMaxPooling.c new file mode 100644 index 000000000..236986bb9 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFractionalMaxPooling.c @@ -0,0 +1,279 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricFractionalMaxPooling.c" +#else + +static long* THNN_(VolumetricFractionalMaxPooling_generateIntervals)( + real sample, + long inputSize, + long outputSize, + int poolSize) { + real alpha = (real) (inputSize - poolSize) / (real) (outputSize - 1); + long* sequence = (long*) THAlloc(sizeof(long) * outputSize); + + long i; + for (i = 0; i < outputSize - 1; ++i) { + sequence[i] = + (long) ((i + sample) * alpha) - (long) (sample * alpha); + } + sequence[outputSize - 1] = inputSize - poolSize; + + return sequence; +} + +static void THNN_(VolumetricFractionalMaxPooling_updateOutput_frame)( + real* input, + real* output, + THIndex_t* indices, + real* randomSamples, + long numPlanes, + long inputT, long inputW, long inputH, + long outputT, long outputW, long outputH, + int poolSizeT, int poolSizeW, int poolSizeH) { + long plane; +#pragma omp parallel for private(plane) + for (plane = 0; plane < numPlanes; ++plane) { + /* each plane contains 3 random samples, one for T, one for W, and one for H */ + real* randomSamplesForPlane = randomSamples + plane * 3; + + /* Generate interval sequence */ + long* sequenceT = + THNN_(VolumetricFractionalMaxPooling_generateIntervals)( + randomSamplesForPlane[0], inputT, outputT, poolSizeT); + long* sequenceW = + THNN_(VolumetricFractionalMaxPooling_generateIntervals)( + randomSamplesForPlane[1], inputW, outputW, poolSizeW); + long* sequenceH = + THNN_(VolumetricFractionalMaxPooling_generateIntervals)( + randomSamplesForPlane[2], inputH, outputH, poolSizeH); + + /* loop over output */ + long h, w, t; + + real* inputForPlane = input + plane * inputT * inputW * inputH; + real* outputForPlane = output + plane * outputT * outputW * outputH; + THIndex_t* indicesForPlane = indices + plane * outputT * outputW * outputH; + + for (h = 0; h < outputH; ++h) { + long inputHStart = sequenceH[h]; + + for (w = 0; w < outputW; ++w) { + long inputWStart = sequenceW[w]; + + for (t = 0; t < outputT; ++t) { + long inputTStart = sequenceT[t]; + + real maxVal = -THInf; + long maxIndex = -1; + + long h2, w2, t2; + for (h2 = inputHStart; h2 < inputHStart + poolSizeH; ++h2) { + for (w2 = inputWStart; w2 < inputWStart + poolSizeW; ++w2) { + for (t2 = inputTStart; t2 < inputTStart + poolSizeT; ++t2) { + THAssert(h2 >= 0 && h2 < inputH); + THAssert(w2 >= 0 && w2 < inputW); + THAssert(t2 >= 0 && t2 < inputT); + + long planeIndex = h2 * inputW * inputT + w2 * inputT + t2; + real val = inputForPlane[planeIndex]; + if (val > maxVal) { + maxVal = val; + maxIndex = planeIndex; + } + } + } + } + + THAssert(maxVal != -THInf); + THAssert(maxIndex != -1); + + outputForPlane[h * outputW * outputT + w * outputT + t] = maxVal; + /* +1 to lua index */ + indicesForPlane[h * outputW * outputT + w * outputT + t] = maxIndex + TH_INDEX_BASE; + } + } + } + + THFree(sequenceT); + THFree(sequenceW); + THFree(sequenceH); + } +} + +void THNN_(VolumetricFractionalMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int outputT, int outputW, int outputH, + int poolSizeT, int poolSizeW, int poolSizeH, + THIndexTensor *indices, + THTensor *randomSamples) { + + long numBatch = 1; + int planeDim = 0; + int heightDim = 1; + int widthDim = 2; + int timeDim = 3; + + long numInputDims = THTensor_(nDimension)(input); + THNN_ARGCHECK(numInputDims == 4 || numInputDims == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + + if (numInputDims == 5) { + numBatch = THTensor_(size)(input, 0); + planeDim++; + heightDim++; + widthDim++; + timeDim++; + } + + /* sizes */ + long numPlanes = THTensor_(size)(input, planeDim); + long inputH = THTensor_(size)(input, heightDim); + long inputW = THTensor_(size)(input, widthDim); + long inputT = THTensor_(size)(input, timeDim); + + THArgCheck(outputH + poolSizeH - 1 < inputH, 9, + "poolSizeH (%d) too large relative to input height (%d)", + poolSizeH, inputH); + THArgCheck(outputW + poolSizeW - 1 < inputW, 8, + "poolSizeW (%d) too large relative to input width (%d)", + poolSizeW, inputW); + THArgCheck(outputT + poolSizeT - 1 < inputT, 7, + "poolSizeT (%d) too large relative to input time (%d)", + poolSizeT, inputT); + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + if (numInputDims == 4) { + /* resize output */ + THTensor_(resize4d)(output, numPlanes, outputH, outputW, outputT); + /* indices will contain the locations for each output point */ + THIndexTensor_(resize4d)(indices, numPlanes, outputH, outputW, outputT); + + THNN_(VolumetricFractionalMaxPooling_updateOutput_frame)( + THTensor_(data)(input), + THTensor_(data)(output), + THIndexTensor_(data)(indices), + THTensor_(data)(randomSamples), + numPlanes, inputT, inputW, inputH, + outputT, outputW, outputH, poolSizeT, poolSizeW, poolSizeH); + } else { + THTensor_(resize5d)(output, numBatch, numPlanes, outputH, outputW, outputT); + /* indices will contain the locations for each output point */ + THIndexTensor_(resize5d)(indices, numBatch, numPlanes, outputH, outputW, outputT); + + long batch; +#pragma omp parallel for private(batch) + for (batch = 0; batch < numBatch; ++batch) { + THNN_(VolumetricFractionalMaxPooling_updateOutput_frame)( + THTensor_(data)(input) + batch * numPlanes * inputH * inputW * inputT, + THTensor_(data)(output) + batch * numPlanes * outputH * outputW * outputT, + THIndexTensor_(data)(indices) + batch * numPlanes * outputH * outputW * outputT, + THTensor_(data)(randomSamples) + batch * numPlanes * 3, + numPlanes, inputT, inputW, inputH, + outputT, outputW, outputH, poolSizeT, poolSizeW, poolSizeH); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(VolumetricFractionalMaxPooling_updateGradInput_frame)( + real* gradInput, + real* gradOutput, + THIndex_t* indices, + long numPlanes, + long inputT, long inputW, long inputH, + long outputT, long outputW, long outputH) { + long plane; +#pragma omp parallel for private(plane) + for (plane = 0; plane < numPlanes; plane++) { + real* gradInputForPlane = gradInput + plane * inputT * inputW * inputH; + real* gradOutputForPlane = gradOutput + plane * outputT * outputW * outputH; + THIndex_t* indicesForPlane = indices + plane * outputT * outputW * outputH; + + long h, w, t; + for (h = 0; h < outputH; ++h) { + for (w = 0; w < outputW; ++w) { + for (t = 0; t < outputT; ++t) { + long outputIndex = h * outputW * outputT + w * outputT + t; + long index = indicesForPlane[outputIndex] - TH_INDEX_BASE; + THAssert(index >= 0 && index < inputT * inputW * inputH); + + gradInputForPlane[index] += gradOutputForPlane[outputIndex]; + } + } + } + } +} + +void THNN_(VolumetricFractionalMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int outputT, int outputW, int outputH, + int poolSizeT, int poolSizeW, int poolSizeH, + THIndexTensor *indices) { + + long numBatch = 1; + int planeDim = 0; + int heightDim = 1; + int widthDim = 2; + int timeDim = 3; + + long numInputDims = THTensor_(nDimension)(input); + if (numInputDims == 5) { + numBatch = THTensor_(size)(input, 0); + planeDim = 1; + heightDim++; + widthDim++; + timeDim++; + } + + /* sizes */ + long numPlanes = THTensor_(size)(input, planeDim); + long inputH = THTensor_(size)(input, heightDim); + long inputW = THTensor_(size)(input, widthDim); + long inputT = THTensor_(size)(input, timeDim); + + THArgCheck(outputT == THTensor_(size)(gradOutput, timeDim), 3, + "gradOutput time unexpected"); + THArgCheck(outputW == THTensor_(size)(gradOutput, widthDim), 3, + "gradOutput width unexpected"); + THArgCheck(outputH == THTensor_(size)(gradOutput, heightDim), 3, + "gradOutput height unexpected"); + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* backprop */ + if (numInputDims == 4) { + THNN_(VolumetricFractionalMaxPooling_updateGradInput_frame)( + THTensor_(data)(gradInput), + THTensor_(data)(gradOutput), + THIndexTensor_(data)(indices), + numPlanes, inputT, inputW, inputH, outputT, outputW, outputH); + } else { + long batch; +#pragma omp parallel for private(batch) + for (batch = 0; batch < numBatch; ++batch) { + THNN_(VolumetricFractionalMaxPooling_updateGradInput_frame)( + THTensor_(data)(gradInput) + batch * numPlanes * inputH * inputW * inputT, + THTensor_(data)(gradOutput) + batch * numPlanes * outputH * outputW * outputT, + THIndexTensor_(data)(indices) + batch * numPlanes * outputH * outputW * outputT, + numPlanes, inputT, inputW, inputH, outputT, outputW, outputH); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFullConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFullConvolution.c new file mode 100644 index 000000000..c974fab50 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFullConvolution.c @@ -0,0 +1,541 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricFullConvolution.c" +#else + +static void THNN_(vol2col)( + const real *data_vol, const int channels, + const int depth, const int height, const int width, + const int kT, const int kH, const int kW, + const int pT, const int pH, const int pW, + const int dT, const int dH, const int dW, + const int dilationT, const int dilationH, const int dilationW, + real *data_col) +{ + int c, t, h, w; + int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1; + int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1; + int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1; + int channels_col = channels * kT * kH * kW; + for (c = 0; c < channels_col; ++c) + { + int w_offset = c % kW; + int h_offset = (c / kW) % kH; + int t_offset = (c / kW / kH) % kT; + int c_vol = c / kT / kH / kW; + for (t = 0; t < depth_col; ++t) + { + for (h = 0; h < height_col; ++h) + { + for (w = 0; w < width_col; ++w) + { + int t_pad = t * dT - pT + t_offset * dilationT; + int h_pad = h * dH - pH + h_offset * dilationH; + int w_pad = w * dW - pW + w_offset * dilationW; + if (t_pad >= 0 && t_pad < depth && + h_pad >= 0 && h_pad < height && + w_pad >= 0 && w_pad < width) + data_col[((c * depth_col + t) * height_col + h) * width_col + w] = + data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad]; + else + data_col[((c * depth_col + t) * height_col + h) * width_col + w] = 0; + } + } + } + } +} + +static void THNN_(col2vol)( + const real* data_col, const int channels, + const int depth, const int height, const int width, + const int kT, const int kH, const int kW, + const int pT, const int pH, const int pW, + const int dT, const int dH, const int dW, + const int dilationT, const int dilationH, const int dilationW, + real* data_vol) +{ + int c, t, h, w; + memset(data_vol, 0, sizeof(real) * depth * height * width * channels); + int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1; + int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1; + int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1; + int channels_col = channels * kT * kH * kW; + for (c = 0; c < channels_col; ++c) + { + int w_offset = c % kW; + int h_offset = (c / kW) % kH; + int t_offset = (c / kW / kH) % kT; + int c_vol = c / kT / kH / kW; + for (t = 0; t < depth_col; ++t) + { + for (h = 0; h < height_col; ++h) + { + for (w = 0; w < width_col; ++w) + { + int t_pad = t * dT - pT + t_offset * dilationT; + int h_pad = h * dH - pH + h_offset * dilationH; + int w_pad = w * dW - pW + w_offset * dilationW; + if (t_pad >= 0 && t_pad < depth && + h_pad >= 0 && h_pad < height && + w_pad >= 0 && w_pad < width) + data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad] += + data_col[((c * depth_col + t) * height_col + h) * width_col + w]; + } + } + } + } +} + +static inline void THNN_(VolumetricFullConvolution_shapeCheck)( + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int dT, int dW, int dH, int pT, int pW, int pH, + int aT, int aW, int aH) { + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + // number of input & output planes and kernel size is indirectly defined by the weight tensor + THNN_ARGCHECK(weight->nDimension == 5, 4, weight, + "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor " + "expected for weight, but got: %s"); + THArgCheck(dT > 0 && dW > 0 && dH > 0, 11, + "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW); + THArgCheck(aT < dT && aW < dW && aH < dH, 15, + "output adjustment must be smaller than stride, but got " + "adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d", + aT, aH, aW, dT, dH, dW); + + int ndim = input->nDimension; + const int nInputPlane = (int)weight->size[0]; + const int nOutputPlane = (int)weight->size[1]; + const int kT = (int)weight->size[2]; + const int kH = (int)weight->size[3]; + const int kW = (int)weight->size[4]; + + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]); + } + + int dimf = 0; + int dimd = 1; + int dimh = 2; + int dimw = 3; + + if (ndim == 5) { + dimf++; + dimd++; + dimh++; + dimw++; + } + + const long inputWidth = input->size[dimw]; + const long inputHeight = input->size[dimh]; + const long inputDepth = input->size[dimd]; + const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW; + const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH; + const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT; + + if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1) + THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", + nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth); + + THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); + } +} + +void THNN_(VolumetricFullConvolution_updateOutput)( + THNNState *state, + THTensor *input, // 4D or 5D (batch) tensor + THTensor *output, + THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW) + THTensor *bias, + THTensor *finput, // internal columns buffer + THTensor *fgradInput, // internal ones buffer + int dT, int dW, int dH, // stride of the convolution + int pT, int pW, int pH, // padding + int aT, int aW, int aH) // extra output adjustment +{ + THTensor *columns = finput; + THTensor *ones = fgradInput; + + THNN_(VolumetricFullConvolution_shapeCheck)( + input, NULL, weight, bias, + dT, dW, dH, pT, pW, pH, aT, aW, aH); + + const int nInputPlane = (int)weight->size[0]; + const int nOutputPlane = (int)weight->size[1]; + const int kT = (int)weight->size[2]; + const int kH = (int)weight->size[3]; + const int kW = (int)weight->size[4]; + + input = THTensor_(newContiguous)(input); + weight = THTensor_(newContiguous)(weight); + bias = bias ? THTensor_(newContiguous)(bias) : bias; + int batch = 1; + if (input->nDimension == 4) + { + // Force batch + batch = 0; + THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]); + } + + const long inputWidth = input->size[4]; + const long inputHeight = input->size[3]; + const long inputDepth = input->size[2]; + const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW; + const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH; + const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT; + + // Batch size + input planes + const long batchSize = input->size[0]; + + // Resize output + THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth); + + // Resize temporary columns + THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth); + THTensor_(zero)(columns); + + // Define a buffer of ones, for bias accumulation + // Note: this buffer can be shared with other modules, it only ever gets increased, + // and always contains ones. + if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) + { + // Resize plane and fill with ones... + THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth); + THTensor_(fill)(ones, 1); + } + + // Helpers + THTensor *input_n = THTensor_(new)(); + THTensor *output_n = THTensor_(new)(); + + int elt; + // For each elt in batch, do: + for (elt = 0; elt < batchSize; ++elt) + { + // Matrix mulitply per output: + THTensor_(select)(input_n, input, 0, elt); + THTensor_(select)(output_n, output, 0, elt); + + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + const long m = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4]; + const long n = columns->size[1]; + const long k = weight->size[0]; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 'n', 't', + n, m, k, + 1, + THTensor_(data)(input_n), n, + THTensor_(data)(weight), m, + 0, + THTensor_(data)(columns), n + ); + + // Unpack columns back into input: + THNN_(col2vol)( + THTensor_(data)(columns), + nOutputPlane, outputDepth, outputHeight, outputWidth, + kT, kH, kW, + pT, pH, pW, + dT, dH, dW, + 1, 1, 1, + THTensor_(data)(output_n) + ); + + // Do Bias after: + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + const long m_ = nOutputPlane; + const long n_ = outputDepth * outputHeight * outputWidth; + const long k_ = 1; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + if (bias) { + THBlas_(gemm)( + 't', 'n', + n_, m_, k_, + 1, + THTensor_(data)(ones), k_, + THTensor_(data)(bias), k_, + 1, + THTensor_(data)(output_n), n_ + ); + } + } + + // Free + THTensor_(free)(input_n); + THTensor_(free)(output_n); + + // Resize output + if (batch == 0) + { + THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); + THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(weight); + if (bias) THTensor_(free)(bias); +} + +void THNN_(VolumetricFullConvolution_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, // only used by cuda impl + int dT, int dW, int dH, // stride + int pT, int pW, int pH, // padding + int aT, int aW, int aH) // extra output adjustment +{ + THTensor *gradColumns = finput; + + // number of input & output planes and kernel size is indirectly defined by the weight tensor + THNN_(VolumetricFullConvolution_shapeCheck)( + input, gradOutput, weight, NULL, + dT, dW, dH, pT, pW, pH, aT, aW, aH); + + const int nInputPlane = (int)weight->size[0]; + const int nOutputPlane = (int)weight->size[1]; + const int kT = (int)weight->size[2]; + const int kH = (int)weight->size[3]; + const int kW = (int)weight->size[4]; + + input = THTensor_(newContiguous)(input); + weight = THTensor_(newContiguous)(weight); + gradOutput = THTensor_(newContiguous)(gradOutput); + + int batch = 1; + if (input->nDimension == 4) + { + // Force batch + batch = 0; + THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]); + THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]); + } + + const long inputWidth = input->size[4]; + const long inputHeight = input->size[3]; + const long inputDepth = input->size[2]; + const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW; + const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH; + const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT; + + // Batch size + input planes + const long batchSize = input->size[0]; + + // Resize output + THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth); + THTensor_(zero)(gradInput); + + // Resize temporary columns + THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth); + + // Helpers + THTensor *gradInput_n = THTensor_(new)(); + THTensor *gradOutput_n = THTensor_(new)(); + + int elt; + // For each elt in batch, do: + for (elt = 0; elt < batchSize; ++elt) + { + // Matrix mulitply per sample: + THTensor_(select)(gradInput_n, gradInput, 0, elt); + THTensor_(select)(gradOutput_n, gradOutput, 0, elt); + + // Extract columns: + THNN_(vol2col)( + THTensor_(data)(gradOutput_n), + nOutputPlane, outputDepth, outputHeight, outputWidth, + kT, kH, kW, + pT, pH, pW, + dT, dH, dW, + 1, 1, 1, + THTensor_(data)(gradColumns) + ); + + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + const long m = weight->size[0]; + const long n = gradColumns->size[1]; + const long k = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4]; + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 'n', 'n', + n, m, k, + 1, + THTensor_(data)(gradColumns), n, + THTensor_(data)(weight), k, + 0, + THTensor_(data)(gradInput_n), n + ); + } + + // Free + THTensor_(free)(gradInput_n); + THTensor_(free)(gradOutput_n); + + // Resize output + if (batch == 0) + { + THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth); + THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth); + THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); + THTensor_(free)(weight); +} + +void THNN_(VolumetricFullConvolution_accGradParameters)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int dT, int dW, int dH, // stride + int pT, int pW, int pH, // padding + int aT, int aW, int aH, // extra output adjustment + accreal scale_) +{ + real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); + // number of input & output planes and kernel size is indirectly defined by the gradWeight tensor + THNN_(VolumetricFullConvolution_shapeCheck)( + input, gradOutput, gradWeight, gradBias, + dT, dW, dH, pT, pW, pH, aT, aW, aH); + + int nInputPlane = (int)gradWeight->size[0]; + int nOutputPlane = (int)gradWeight->size[1]; + int kT = (int)gradWeight->size[2]; + int kH = (int)gradWeight->size[3]; + int kW = (int)gradWeight->size[4]; + + THTensor *columns = finput; + THTensor *ones = fgradInput; + + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous"); + if (gradBias) + THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous"); + + int batch = 1; + if (input->nDimension == 4) + { + // Force batch + batch = 0; + THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]); + THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]); + } + + const long inputWidth = input->size[4]; + const long inputHeight = input->size[3]; + const long inputDepth = input->size[2]; + const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW; + const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH; + const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT; + + // Batch size + input planes + const long batchSize = input->size[0]; + + // Define a buffer of ones, for bias accumulation + if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) + { + // Resize plane and fill with ones... + THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth); + THTensor_(fill)(ones, 1); + } + + // Resize temporary columns + THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth); + + // Helpers + THTensor *input_n = THTensor_(new)(); + THTensor *gradOutput_n = THTensor_(new)(); + + int elt; + // For each elt in batch, do: + for (elt = 0; elt < batchSize; ++elt) + { + // Matrix mulitply per output: + THTensor_(select)(input_n, input, 0, elt); + THTensor_(select)(gradOutput_n, gradOutput, 0, elt); + + // Extract columns: + THNN_(vol2col)( + THTensor_(data)(gradOutput_n), nOutputPlane, + outputDepth, outputHeight, outputWidth, + kT, kH, kW, + pT, pH, pW, + dT, dH, dW, + 1, 1, 1, + THTensor_(data)(columns) + ); + + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + const long n = columns->size[0]; // nOutputPlane * kt * kh * kw + const long m = input_n->size[0]; // nInputPlane + const long k = columns->size[1]; // inputHeight * inputWidth + + // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) + THBlas_(gemm)( + 't', 'n', + n, m, k, + scale, + THTensor_(data)(columns), k, + THTensor_(data)(input_n), k, + 1, + THTensor_(data)(gradWeight), n + ); + + // Do Bias: + // M,N,K are dims of matrix A and B + // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) + const long m_ = nOutputPlane; + const long k_ = outputDepth * outputHeight * outputWidth; + + // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) + if (gradBias) { + THBlas_(gemv)( + 't', + k_, m_, + scale, + THTensor_(data)(gradOutput_n), k_, + THTensor_(data)(ones), 1, + 1, + THTensor_(data)(gradBias), 1 + ); + } + } + + // Free + THTensor_(free)(input_n); + THTensor_(free)(gradOutput_n); + + // Resize + if (batch == 0) + { + THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth); + THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth); + } + + THTensor_(free)(input); + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxPooling.c new file mode 100644 index 000000000..a3601e0b6 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxPooling.c @@ -0,0 +1,50 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c" +#else + +void THNN_(VolumetricMaxPooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + bool ceilMode) +{ + THNN_(VolumetricDilatedMaxPooling_updateOutput)( + state, input, output, indices, + kT, kW, kH, dT, dW, dH, + pT, pW, pH, 1, 1, 1, ceilMode); +} + +void THNN_(VolumetricMaxPooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int kT, + int kW, + int kH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH, + bool ceilMode) +{ + THNN_(VolumetricDilatedMaxPooling_updateGradInput)( + state, input, gradOutput, gradInput, indices, + kT, kW, kH, dT, dW, dH, + pT, pW, pH, 1, 1, 1, ceilMode); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxUnpooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxUnpooling.c new file mode 100644 index 000000000..d9d9e5951 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxUnpooling.c @@ -0,0 +1,373 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricMaxUnpooling.c" +#else + +static inline void THNN_(VolumetricMaxUnpooling_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THIndexTensor *indices, + int oT, + int oW, + int oH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + + THNN_CHECK_SHAPE_INDICES(input, indices); + + THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, + "stride should be greater than zero, but got dT: %d dH: %d dW: %d", + dT, dH, dW); + + int dimw = 3; + int dimh = 2; + int dimt = 1; + int dimn = 0; + + if (input->nDimension == 5) + { + dimt++; + dimw++; + dimh++; + dimn++; + } + int nslices = input->size[dimn]; + + if (gradOutput != NULL) { + if (oT != gradOutput->size[dimt] || oW != gradOutput->size[dimw] || oH != gradOutput->size[dimh]) + { + THError( + "Inconsistent gradOutput size. oT= %d, oH= %d, oW= %d, gradOutput: %dx%dx%d", + oT, oH, oW, gradOutput->size[dimt], gradOutput->size[dimh], gradOutput->size[dimw] + ); + } + + THNN_CHECK_DIM_SIZE(gradOutput, input->nDimension, dimn, nslices); + } +} + +static void THNN_(VolumetricMaxUnpooling_updateOutput_frame)( + real *input_p, + real *output_p, + THIndex_t *ind_p, + int nslices, + int iT, + int iW, + int iH, + int oT, + int oW, + int oH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + int k; + int has_error = 0; + THIndex_t error_index; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + int ti, i, j, maxz, maxy, maxx; + for (ti = 0; ti < iT; ti++) + { + for (i = 0; i < iH; i++) + { + for (j = 0; j < iW; j++) + { + int start_t = ti * dT - pT; + int start_h = i * dH - pH; + int start_w = j * dW - pW; + + real *input_p_k = input_p + k*iT*iW*iH + ti*iW*iH + i*iW + j; + THIndex_t *ind_p_k = ind_p + k*iT*iW*iH + ti*iW*iH + i*iW + j; + + maxz = ((unsigned char*)(ind_p_k))[0]; /* retrieve position of max */ + maxy = ((unsigned char*)(ind_p_k))[1]; + maxx = ((unsigned char*)(ind_p_k))[2]; + + THIndex_t idx = k*oT*oW*oH + oH*oW*(start_t+maxz) + oW*(start_h+maxy) + (start_w+maxx); + if (start_t+maxz<0 || start_h+maxy<0 || start_w+maxx<0 || start_t+maxz>=oT + || start_h+maxy>=oH || start_w+maxx>=oW) + { +#pragma omp critical + { + has_error = 1; + error_index = idx; + } + } else { + output_p[idx] = *input_p_k; /* update output */ + } + } + } + } + } + if (has_error) { + THError( + "found an invalid max index %ld (output volumes are of size %dx%dx%d)", + error_index, oT, oH, oW + ); + } +} + +void THNN_(VolumetricMaxUnpooling_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + THIndexTensor *indices, + int oT, + int oW, + int oH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + int dimw = 3; + int dimh = 2; + int dimt = 1; + int nbatch = 1; + int nslices; + int iT; + int iH; + int iW; + real *input_data; + real *output_data; + THIndex_t *indices_data; + + THNN_(VolumetricMaxUnpooling_shapeCheck)( + state, input, NULL, indices, + oT, oW, oH, dT, dW, dH, pT, pW, pH); + + if (input->nDimension == 5) + { + nbatch = input->size[0]; + dimt++; + dimw++; + dimh++; + } + + /* sizes */ + nslices = input->size[dimt-1]; + iT = input->size[dimt]; + iH = input->size[dimh]; + iW = input->size[dimw]; + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + indices = THIndexTensor_(newContiguous)(indices); + + /* resize output */ + if (input->nDimension == 4) + { + THTensor_(resize4d)(output, nslices, oT, oH, oW); + THTensor_(zero)(output); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + THNN_(VolumetricMaxUnpooling_updateOutput_frame)( + input_data, output_data, + indices_data, + nslices, + iT, iW, iH, + oT, oW, oH, + dT, dW, dH, pT, pW, pH + ); + } + else + { + int p; + + THTensor_(resize5d)(output, nbatch, nslices, oT, oH, oW); + THTensor_(zero)(output); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THIndexTensor_(data)(indices); + + for (p = 0; p < nbatch; p++) + { + THNN_(VolumetricMaxUnpooling_updateOutput_frame)( + input_data+p*nslices*iT*iW*iH, + output_data+p*nslices*oT*oW*oH, + indices_data+p*nslices*iT*iW*iH, + nslices, + iT, iW, iH, + oT, oW, oH, + dT, dW, dH, + pT, pW, pH + ); + } + } + + /* cleanup */ + THTensor_(free)(input); + THIndexTensor_(free)(indices); +} + +static void THNN_(VolumetricMaxUnpooling_updateGradInput_frame)( + real *gradInput_p, + real *gradOutput_p, + THIndex_t *ind_p, + int nslices, + int iT, + int iW, + int iH, + int oT, + int oW, + int oH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + int k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + int ti, i, j, maxz, maxy, maxx; + for (ti = 0; ti < iT; ti++) + { + for (i = 0; i < iH; i++) + { + for (j = 0; j < iW; j++) + { + int start_t = ti * dT - pT; + int start_h = i * dH - pH; + int start_w = j * dW - pW; + + real *gradInput_p_k = gradInput_p + k*iT*iW*iH + ti*iW*iH + i*iW + j; + THIndex_t *ind_p_k = ind_p + k*iT*iW*iH + ti*iW*iH + i*iW + j; + + maxz = ((unsigned char*)(ind_p_k))[0]; /* retrieve position of max */ + maxy = ((unsigned char*)(ind_p_k))[1]; + maxx = ((unsigned char*)(ind_p_k))[2]; + + if (start_t+maxz<0 || start_h+maxy<0 || start_w+maxx<0 + || start_t+maxz>=oT || start_h+maxy>=oH || start_w+maxx>=oW) + { + THError( + "invalid max index z= %d, y= %d, x= %d, oT= %d, oW= %d, oH= %d", + start_t+maxz, start_h+maxy, start_w+maxx, oT, oW, oH + ); + } + *gradInput_p_k = gradOutput_p[k*oT*oW*oH + oH*oW*(start_t+maxz) + + oW*(start_h+maxy) + (start_w+maxx)]; /* update gradient */ + } + } + } + } +} + +void THNN_(VolumetricMaxUnpooling_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THIndexTensor *indices, + int oT, + int oW, + int oH, + int dT, + int dW, + int dH, + int pT, + int pW, + int pH) +{ + int dimw = 3; + int dimh = 2; + int dimt = 1; + int nbatch = 1; + int nslices; + int iT; + int iH; + int iW; + real *gradInput_data; + real *gradOutput_data; + THIndex_t *indices_data; + + THNN_(VolumetricMaxUnpooling_shapeCheck)( + state, input, gradOutput, indices, + oT, oW, oH, dT, dW, dH, pT, pW, pH); + + // TODO: check gradOutput shape + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + indices = THIndexTensor_(newContiguous)(indices); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 5) + { + nbatch = input->size[0]; + dimt++; + dimw++; + dimh++; + } + + /* sizes */ + nslices = input->size[dimt-1]; + iT = input->size[dimt]; + iH = input->size[dimh]; + iW = input->size[dimw]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THIndexTensor_(data)(indices); + + /* backprop */ + if (input->nDimension == 4) + { + THNN_(VolumetricMaxUnpooling_updateGradInput_frame)( + gradInput_data, gradOutput_data, + indices_data, + nslices, + iT, iW, iH, + oT, oW, oH, + dT, dW, dH, + pT, pW, pH + ); + } + else + { + int p; + for (p = 0; p < nbatch; p++) + { + THNN_(VolumetricMaxUnpooling_updateGradInput_frame)( + gradInput_data+p*nslices*iT*iW*iH, + gradOutput_data+p*nslices*oT*oW*oH, + indices_data+p*nslices*iT*iW*iH, + nslices, + iT, iW, iH, + oT, oW, oH, + dT, dW, dH, + pT, pW, pH + ); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); + THIndexTensor_(free)(indices); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricReplicationPadding.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricReplicationPadding.c new file mode 100644 index 000000000..4d8993ec2 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricReplicationPadding.c @@ -0,0 +1,357 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricReplicationPadding.c" +#else + +static inline void THNN_(VolumetricReplicationPadding_shapeCheck)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + int pleft, int pright, + int ptop, int pbottom, + int pfront, int pback) { + int dimw = 3; + int dimh = 2; + int dimd = 1; + int dimslices = 0; + long nslices; + long idepth; + long iheight; + long iwidth; + long odepth; + long oheight; + long owidth; + + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D (batch mode) tensor expected for input, but got: %s"); + + if (input->nDimension == 5) + { + dimw++; + dimh++; + dimd++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + idepth = input->size[dimd]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + odepth = idepth + pfront + pback; + oheight = iheight + ptop + pbottom; + owidth = iwidth + pleft + pright; + + THArgCheck(owidth >= 1 || oheight >= 1 || odepth >= 1, 2, + "input (D: %d H: %d, W: %d)is too small." + " Calculated output D: %d H: %d W: %d", + idepth, iheight, iwidth, odepth, oheight, owidth); + + if (gradOutput != NULL) { + THArgCheck(nslices == THTensor_(size)(gradOutput, dimslices), 3, + "gradOutput width unexpected. Expected: %d, Got: %d", + nslices, THTensor_(size)(gradOutput, dimslices)); + THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3, + "gradOutput width unexpected. Expected: %d, Got: %d", + owidth, THTensor_(size)(gradOutput, dimw)); + THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3, + "gradOutput height unexpected. Expected: %d, Got: %d", + oheight, THTensor_(size)(gradOutput, dimh)); + THArgCheck(odepth == THTensor_(size)(gradOutput, dimd), 3, + "gradOutput depth unexpected. Expected: %d, Got: %d", + odepth, THTensor_(size)(gradOutput, dimd)); + } +} + +static void THNN_(VolumetricReplicationPadding_updateOutput_frame)( + real *input_p, real *output_p, + long nslices, + long iwidth, long iheight, long idepth, + long owidth, long oheight, long odepth, + int pleft, int pright, + int ptop, int pbottom, + int pfront, int pback) +{ + int iStartX = fmax(0, -pleft); + int iStartY = fmax(0, -ptop); + int iStartZ = fmax(0, -pfront); + int oStartX = fmax(0, pleft); + int oStartY = fmax(0, ptop); + int oStartZ = fmax(0, pfront); + + long k, ip_x, ip_y, ip_z; +#pragma omp parallel for private(k, ip_x, ip_y, ip_z) + for (k = 0; k < nslices; k++) { + long i, j, z; + for (z = 0; z < odepth; z++) { + for (i = 0; i < oheight; i++) { + for (j = 0; j < owidth; j++) { + if (j < pleft) { + ip_x = pleft; + } else if (j >= pleft && j < iwidth + pleft) { + ip_x = j; + } else { + ip_x = iwidth + pleft - 1; + } + ip_x = ip_x - oStartX + iStartX; + + if (i < ptop) { + ip_y = ptop; + } else if (i >= ptop && i < iheight + ptop) { + ip_y = i; + } else { + ip_y = iheight + ptop - 1; + } + ip_y = ip_y - oStartY + iStartY; + + if (z < pfront) { + ip_z = pfront; + } else if (z >= pfront && z < idepth + pfront) { + ip_z = z; + } else { + ip_z = idepth + pfront - 1; + } + ip_z = ip_z - oStartZ + iStartZ; + + real *dest_p = output_p + k * owidth * oheight * odepth + + z * owidth * oheight + i * owidth + j; + real *src_p = input_p + k * iwidth * iheight * idepth + + ip_z * iwidth * iheight + ip_y * iwidth + ip_x; + *dest_p = *src_p; + } + } + } + } +} + +void THNN_(VolumetricReplicationPadding_updateOutput)(THNNState *state, + THTensor *input, + THTensor *output, + int pleft, int pright, + int ptop, int pbottom, + int pfront, int pback) +{ + int dimw = 3; + int dimh = 2; + int dimd = 1; + int dimslices = 0; + long nbatch = 1; + long nslices; + long idepth; + long iheight; + long iwidth; + long odepth; + long oheight; + long owidth; + real *input_data; + real *output_data; + +THNN_(VolumetricReplicationPadding_shapeCheck)( + state, input, NULL, pleft, pright, + ptop, pbottom, pfront, pback); + + if (input->nDimension == 5) + { + nbatch = input->size[0]; + dimw++; + dimh++; + dimd++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + idepth = input->size[dimd]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + odepth = idepth + pfront + pback; + oheight = iheight + ptop + pbottom; + owidth = iwidth + pleft + pright; + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + /* resize output */ + if (input->nDimension == 4) + { + THTensor_(resize4d)(output, nslices, odepth, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + + THNN_(VolumetricReplicationPadding_updateOutput_frame)( + input_data, output_data, nslices, iwidth, iheight, idepth, + owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront, + pback); + } + else + { + long p; + + THTensor_(resize5d)(output, nbatch, nslices, odepth, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) + { + THNN_(VolumetricReplicationPadding_updateOutput_frame)( + input_data + p * nslices * iwidth * iheight * idepth, + output_data + p * nslices * owidth * oheight * odepth, + nslices, + iwidth, iheight, idepth, + owidth, oheight, odepth, + pleft, pright, + ptop, pbottom, + pfront, pback); + } + } + + /* cleanup */ + THTensor_(free)(input); +} + +static void THNN_(VolumetricReplicationPadding_updateGradInput_frame)( + real *ginput_p, real *goutput_p, + long nslices, + long iwidth, long iheight, long idepth, + long owidth, long oheight, long odepth, + int pleft, int pright, + int ptop, int pbottom, + int pfront, int pback) +{ + int iStartX = fmax(0, -pleft); + int iStartY = fmax(0, -ptop); + int iStartZ = fmax(0, -pfront); + int oStartX = fmax(0, pleft); + int oStartY = fmax(0, ptop); + int oStartZ = fmax(0, pfront); + + long k, ip_x, ip_y, ip_z; +#pragma omp parallel for private(k, ip_x, ip_y, ip_z) + for (k = 0; k < nslices; k++) { + long i, j, z; + for (z = 0; z < odepth; z++) { + for (i = 0; i < oheight; i++) { + for (j = 0; j < owidth; j++) { + if (j < pleft) { + ip_x = pleft; + } else if (j >= pleft && j < iwidth + pleft) { + ip_x = j; + } else { + ip_x = iwidth + pleft - 1; + } + ip_x = ip_x - oStartX + iStartX; + + if (i < ptop) { + ip_y = ptop; + } else if (i >= ptop && i < iheight + ptop) { + ip_y = i; + } else { + ip_y = iheight + ptop - 1; + } + ip_y = ip_y - oStartY + iStartY; + + if (z < pfront) { + ip_z = pfront; + } else if (z >= pfront && z < idepth + pfront) { + ip_z = z; + } else { + ip_z = idepth + pfront - 1; + } + ip_z = ip_z - oStartZ + iStartZ; + + real *src_p = goutput_p + k * owidth * oheight * odepth + + z * owidth * oheight + i * owidth + j; + real *dest_p = ginput_p + k * iwidth * iheight * idepth + + ip_z * iwidth * iheight + ip_y * iwidth + ip_x; + *dest_p += *src_p; + } + } + } + } +} + +void THNN_(VolumetricReplicationPadding_updateGradInput)(THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int pleft, int pright, + int ptop, int pbottom, + int pfront, int pback) +{ + int dimw = 3; + int dimh = 2; + int dimd = 1; + int dimslices = 0; + long nbatch = 1; + long nslices; + long idepth; + long iheight; + long iwidth; + long odepth; + long oheight; + long owidth; + + if (input->nDimension == 5) + { + nbatch = input->size[0]; + dimw++; + dimh++; + dimd++; + dimslices++; + } + + /* sizes */ + nslices = input->size[dimslices]; + idepth = input->size[dimd]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + odepth = idepth + pfront + pback; + oheight = iheight + ptop + pbottom; + owidth = iwidth + pleft + pright; + + +THNN_(VolumetricReplicationPadding_shapeCheck)( + state, input, NULL, pleft, pright, + ptop, pbottom, pfront, pback); + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* backprop */ + if (input->nDimension == 4) { + THNN_(VolumetricReplicationPadding_updateGradInput_frame)( + THTensor_(data)(gradInput), + THTensor_(data)(gradOutput), + nslices, + iwidth, iheight, idepth, + owidth, oheight, odepth, + pleft, pright, + ptop, pbottom, + pfront, pback); + } else { + long p; +#pragma omp parallel for private(p) + for (p = 0; p < nbatch; p++) { + THNN_(VolumetricReplicationPadding_updateGradInput_frame)( + THTensor_(data)(gradInput) + p * nslices * idepth * iheight * iwidth, + THTensor_(data)(gradOutput) + p * nslices * odepth * oheight * owidth, + nslices, + iwidth, iheight, idepth, + owidth, oheight, odepth, + pleft, pright, + ptop, pbottom, + pfront, pback); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingNearest.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingNearest.c new file mode 100644 index 000000000..9068fb58d --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingNearest.c @@ -0,0 +1,226 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricUpSamplingNearest.c" +#else + + +static inline void THNN_(VolumetricUpSamplingNearest_shapeCheck) + (THTensor *input, THTensor *gradOutput, + int scale_factor) { + THArgCheck(input != NULL, 2, "5D input tensor expected but got NULL"); + THArgCheck(scale_factor > 1, 4, + "scale_factor must be greater than 1, but got: %d", scale_factor); + THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input, + "4D or 5D input tensor expected but got: %s"); + if (input->nDimension == 4) { + int nChannels = THTensor_(size)(input, 0); + int inputDepth = THTensor_(size)(input, 1); + int inputHeight = THTensor_(size)(input, 2); + int inputWidth = THTensor_(size)(input, 3); + int outputDepth = inputDepth * scale_factor; + int outputHeight = inputHeight * scale_factor; + int outputWidth = inputWidth * scale_factor; + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nChannels); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, outputDepth); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth); + } + } else { + int nBatch = THTensor_(size)(input, 0); + int nChannels = THTensor_(size)(input, 1); + int inputDepth = THTensor_(size)(input, 2); + int inputHeight = THTensor_(size)(input, 3); + int inputWidth = THTensor_(size)(input, 4); + int outputDepth = inputDepth * scale_factor; + int outputHeight = inputHeight * scale_factor; + int outputWidth = inputWidth * scale_factor; + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch); + THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels); + THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth); + THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth); + } + } +} + +void THNN_(VolumetricUpSamplingNearest_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int scale_factor) +{ + THNN_(VolumetricUpSamplingNearest_shapeCheck)(input, NULL, scale_factor); + int inputDepth = THTensor_(size)(input, input->nDimension-3); + int inputHeight = THTensor_(size)(input, input->nDimension-2); + int inputWidth = THTensor_(size)(input, input->nDimension-1); + int outputDepth = inputDepth * scale_factor; + int outputHeight = inputHeight * scale_factor; + int outputWidth = inputWidth * scale_factor; + + if (input->nDimension == 4) { + THTensor_(resize4d)(output, + THTensor_(size)(input, 0), + outputDepth, outputHeight, outputWidth); + } else { + THTensor_(resize5d)(output, + THTensor_(size)(input, 0), + THTensor_(size)(input, 1), + outputDepth, outputHeight, outputWidth); + } + + int dT = scale_factor; + int dW = scale_factor; + int dH = scale_factor; + int xDim = input->nDimension-3; + int yDim = input->nDimension-2; + int zDim = input->nDimension-1; + + // dims + int idim = input->nDimension; + int osz0 = output->size[0]; + int osz1 = output->size[1]; + int osz2 = output->size[2]; + int osz3 = output->size[3]; + int osz4 = 1; + if (idim > 4) { + osz4 = output->size[4]; + } + + // get strides + long *is = input->stride; + long *os = output->stride; + + // get raw pointers + real *pin = THTensor_(data)(input); + real *pout = THTensor_(data)(output); + + // perform the upsampling + int i0, i1, i2, i3, i4, isrc, idst; + int iout[5]; // Output indices + int iin[5]; // Input indices + + for (i0 = 0; i0 < osz0; i0++) { + iout[0] = i0; + iin[0] = i0; + for (i1 = 0; i1 < osz1; i1++) { + iout[1] = i1; + iin[1] = i1; + for (i2 = 0; i2 < osz2; i2++) { + iout[2] = i2; + iin[2] = i2; + for (i3 = 0; i3 < osz3; i3++) { + iout[3] = i3; + iin[3] = i3; + for (i4 = 0; i4 < osz4; i4++) { + iout[4] = i4; + iin[4] = i4; + + // set the indices for the upsampled dimensions + iin[xDim] = iout[xDim] / dW; + iin[yDim] = iout[yDim] / dH; + iin[zDim] = iout[zDim] / dT; + + idst = i0*os[0] + i1*os[1] + i2*os[2] + i3*os[3]; + isrc = iin[0]*is[0] + iin[1]*is[1] + iin[2]*is[2] + iin[3]*is[3]; + if (idim > 4) { + idst += i4*os[4]; + isrc += iin[4]*is[4]; + } + + pout[idst] = pin[isrc]; + } + } + } + } + } +} + +void THNN_(VolumetricUpSamplingNearest_updateGradInput)( + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + int scale_factor) +{ + THNN_(VolumetricUpSamplingNearest_shapeCheck)(input, gradOutput, scale_factor); + THTensor_(resizeAs)(gradInput, input); + + int dW = scale_factor; + int dH = scale_factor; + int dT = scale_factor; + int xDim = gradInput->nDimension-3; + int yDim = gradInput->nDimension-2; + int zDim = gradInput->nDimension-1; + + // dims + int idim = gradInput->nDimension; // Guaranteed to be between 3 and 5 + int isz0 = gradInput->size[0]; + int isz1 = gradInput->size[1]; + int isz2 = gradInput->size[2]; + int isz3 = gradInput->size[3]; + int isz4 = 1; + if (idim > 4) { + isz4 = gradInput->size[4]; + } + + // get strides + long *is = gradInput->stride; + long *os = gradOutput->stride; + + // get raw pointers + real *pin = THTensor_(data)(gradInput); + real *pout = THTensor_(data)(gradOutput); + + // perform the upsampling + int i0, i1, i2, i3, i4, isrc, idst, x, y, z; + int iin[5]; // Input indices + int iout[5]; // Output indices + + THTensor_(zero)(gradInput); + + for (i0 = 0; i0 < isz0; i0++) { + iin[0] = i0; + iout[0] = i0; + for (i1 = 0; i1 < isz1; i1++) { + iin[1] = i1; + iout[1] = i1; + for (i2 = 0; i2 < isz2; i2++) { + iin[2] = i2; + iout[2] = i2; + for (i3 = 0; i3 < isz3; i3++) { + iin[3] = i3; + iout[3] = i3; + + for (i4 = 0; i4 < isz4; i4++) { + iin[4] = i4; + iout[4] = i4; + + idst = i0*is[0] + i1*is[1] + i2*is[2] + i3*is[3]; + if (idim > 4) { + idst += i4*is[4]; + } + + // Now accumulate the gradients from gradOutput + for (z = 0; z < dT; z++) { + for (y = 0; y < dH; y++) { + for (x = 0; x < dW; x++) { + iout[xDim] = dW * iin[xDim] + x; + iout[yDim] = dH * iin[yDim] + y; + iout[zDim] = dT * iin[zDim] + z; + isrc = iout[0]*os[0] + iout[1]*os[1] + iout[2]*os[2] + iout[3]*os[3]; + if (idim > 4) { + isrc += iout[4]*os[4]; + } + pin[idst] += pout[isrc]; + } + } + } + } + } + } + } + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingTrilinear.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingTrilinear.c new file mode 100644 index 000000000..f2b04dba9 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingTrilinear.c @@ -0,0 +1,213 @@ +// Adapted from interp.cpp from Caffe util by Pauline Luc +// Originally developed by George Papandreou + +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricUpSamplingTrilinear.c" +#else + +static inline void THNN_(VolumetricUpSamplingTrilinear_shapeCheck) + (THTensor *input, THTensor *gradOutput, + int nBatch, int nChannels, + int inputDepth, int inputHeight, int inputWidth, + int outputDepth, int outputHeight, int outputWidth) { + THArgCheck(inputDepth > 0 && inputHeight > 0 && inputWidth > 0 + && outputDepth > 0 && outputHeight > 0 && outputWidth > 0, 2, + "input and output sizes should be greater than 0," + " but got input (D: %d, H: %d, W: %d) output (D: %d, H: %d, W: %d)", + inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth); + if (input != NULL) { + THNN_ARGCHECK(input->nDimension == 5, 2, input, + "5D input tensor expected but got: %s"); + } + + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch); + THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels); + THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth); + THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight); + THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth); + } +} + +void THNN_(VolumetricUpSamplingTrilinear_updateOutput)( + THNNState *state, + THTensor *input, + THTensor *output, + int outputDepth, + int outputHeight, + int outputWidth){ + + int nbatch = THTensor_(size)(input, 0); + int channels = THTensor_(size)(input, 1); + int inputDepth = THTensor_(size)(input, 2); + int inputHeight = THTensor_(size)(input, 3); + int inputWidth = THTensor_(size)(input, 4); + + THNN_(VolumetricUpSamplingTrilinear_shapeCheck) + (input, NULL, + nbatch, channels, + inputDepth, inputHeight, inputWidth, + outputDepth, outputHeight, outputWidth); + + input = THTensor_(newContiguous)(input); + THTensor_(resize5d)(output, + THTensor_(size)(input, 0), + THTensor_(size)(input, 1), + outputDepth, outputHeight, outputWidth); + THTensor_(zero)(output); + real *idata = THTensor_(data)(input); + real *odata = THTensor_(data)(output); + channels = nbatch * channels; + THAssert(inputDepth > 0 && inputHeight > 0 && inputWidth > 0 && + outputDepth > 0 && outputHeight > 0 && outputWidth > 0); + // special case: just copy + if (inputDepth == outputDepth && inputHeight == outputHeight && inputWidth == outputWidth) { + for (int t2 = 0; t2 < outputDepth; ++t2) { + const int t1 = t2; + for (int h2 = 0; h2 < outputHeight; ++h2) { + const int h1 = h2; + for (int w2 = 0; w2 < outputWidth; ++w2) { + const int w1 = w2; + const real* pos1 = &idata[t1 * inputHeight * inputWidth + h1 * inputWidth + w1]; + real* pos2 = &odata[t2 * outputHeight * outputWidth + h2 * outputWidth + w2]; + for (int c = 0; c < channels; ++c) { + pos2[0] = pos1[0]; + pos1 += inputWidth * inputHeight * inputDepth; + pos2 += outputWidth * outputHeight * outputDepth; + } + } + } + } + return; + } + const float rdepth = (outputDepth > 1) ? (float)(inputDepth - 1)/(outputDepth - 1) : 0.f; + const float rheight = (outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f; + const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1) / (outputWidth - 1) : 0.f; + for (int t2 = 0; t2 < outputDepth; ++t2) { + const float t1r = rdepth * t2; + const int t1 = t1r; + const int t1p = (t1 < inputDepth - 1) ? 1 : 0; + const real t1lambda = t1r - t1; + const real t0lambda = (real)1. - t1lambda; + for (int h2 = 0; h2 < outputHeight; ++h2) { + const float h1r = rheight * h2; + const int h1 = h1r; + const int h1p = (h1 < inputHeight - 1) ? 1 : 0; + const real h1lambda = h1r - h1; + const real h0lambda = (real)1. - h1lambda; + for (int w2 = 0; w2 < outputWidth; ++w2) { + const float w1r = rwidth * w2; + const int w1 = w1r; + const int w1p = (w1 < inputWidth - 1) ? 1 : 0; + const real w1lambda = w1r - w1; + const real w0lambda = (real)1. - w1lambda; + const real* pos1 = &idata[t1 * inputHeight * inputWidth + h1 * inputWidth + w1]; + real* pos2 = &odata[t2 * outputHeight * outputWidth + h2 * outputWidth + w2]; + for (int c = 0; c < channels; ++c) { + pos2[0] = t0lambda * (h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) + + h1lambda * (w0lambda * pos1[h1p * inputWidth] + + w1lambda * pos1[h1p * inputWidth + w1p])) + + t1lambda * (h0lambda * (w0lambda * pos1[t1p * inputHeight * inputWidth] + + w1lambda * pos1[t1p * inputHeight * inputWidth + + w1p]) + + h1lambda * (w0lambda * pos1[t1p * inputHeight * inputWidth + + h1p * inputWidth] + + w1lambda * pos1[t1p * inputHeight * inputWidth + + h1p * inputWidth + w1p])); + pos1 += inputWidth * inputHeight * inputDepth; + pos2 += outputWidth * outputHeight * outputDepth; + } + } + } + } + THTensor_(free)(input); +} + +void THNN_(VolumetricUpSamplingTrilinear_updateGradInput)( + THNNState *state, + THTensor *gradOutput, + THTensor *gradInput, + int nbatch, + int channels, + int inputDepth, + int inputHeight, + int inputWidth, + int outputDepth, + int outputHeight, + int outputWidth){ + + THNN_(VolumetricUpSamplingTrilinear_shapeCheck) + (NULL, gradOutput, + nbatch, channels, + inputDepth, inputHeight, inputWidth, + outputDepth, outputHeight, outputWidth); + + THTensor_(resize5d)(gradInput, nbatch, channels, inputDepth, inputHeight, inputWidth); + THTensor_(zero)(gradInput); + gradOutput = THTensor_(newContiguous)(gradOutput); + real *data1 = THTensor_(data)(gradInput); + real *data2 = THTensor_(data)(gradOutput); + channels = nbatch * channels; + + // special case: same-size matching grids + if (inputDepth == outputDepth && inputHeight == outputHeight && inputWidth == outputWidth) { + for (int t2 = 0; t2 < outputDepth; ++t2) { + const int t1 = t2; + for (int h2 = 0; h2 < outputHeight; ++h2) { + const int h1 = h2; + for (int w2 = 0; w2 < outputWidth; ++w2) { + const int w1 = w2; + real* pos1 = &data1[t1 * inputHeight * inputWidth + h1 * inputWidth + w1]; + const real* pos2 = &data2[t2 * outputHeight * outputWidth + h2 * outputWidth + w2]; + for (int c = 0; c < channels; ++c) { + pos1[0] += pos2[0]; + pos1 += inputWidth * inputHeight * inputDepth; + pos2 += outputWidth * outputHeight * outputDepth; + } + } + } + } + return; + } + const float rdepth = (outputDepth > 1) ? (float)(inputDepth - 1)/(outputDepth - 1) : 0.f; + const float rheight = (outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f; + const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1)/(outputWidth - 1) : 0.f; + for (int t2 = 0; t2 < outputDepth; ++t2) { + const float t1r = rdepth * t2; + const int t1 = t1r; + const int t1p = (t1 < inputDepth - 1) ? 1 : 0; + const real t1lambda = t1r - t1; + const real t0lambda = (real)1. - t1lambda; + for (int h2 = 0; h2 < outputHeight; ++h2) { + const float h1r = rheight * h2; + const int h1 = h1r; + const int h1p = (h1 < inputHeight - 1) ? 1 : 0; + const real h1lambda = h1r - h1; + const real h0lambda = (real)1. - h1lambda; + for (int w2 = 0; w2 < outputWidth; ++w2) { + const float w1r = rwidth * w2; + const int w1 = w1r; + const int w1p = (w1 < inputWidth - 1) ? 1 : 0; + const real w1lambda = w1r - w1; + const real w0lambda = (real)1. - w1lambda; + real* pos1 = &data1[t1 * inputHeight * inputWidth + h1 * inputWidth + w1]; + const real* pos2 = &data2[t2 * outputHeight * outputWidth + h2 * outputWidth + w2]; + for (int c = 0; c < channels; ++c) { + pos1[0] += t0lambda * h0lambda * w0lambda * pos2[0]; + pos1[w1p] += t0lambda * h0lambda * w1lambda * pos2[0]; + pos1[h1p * inputWidth] += t0lambda * h1lambda * w0lambda * pos2[0]; + pos1[h1p * inputWidth + w1p] += t0lambda * h1lambda * w1lambda * pos2[0]; + pos1[t1p * inputHeight * inputWidth] += t1lambda * h0lambda * w0lambda * pos2[0]; + pos1[t1p * inputHeight * inputWidth + w1p] += t1lambda * h0lambda * w1lambda * pos2[0]; + pos1[t1p * inputHeight * inputWidth + h1p * inputWidth] += t1lambda * h1lambda * w0lambda * pos2[0]; + pos1[t1p * inputHeight * inputWidth + h1p * inputWidth + w1p] += t1lambda * h1lambda * w1lambda * pos2[0]; + pos1 += inputWidth * inputHeight * inputDepth; + pos2 += outputWidth * outputHeight * outputDepth; + } + } + } + } + THTensor_(free)(gradOutput); +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/generic/unfold.c b/contrib/lua-torch/nn/lib/THNN/generic/unfold.c new file mode 100644 index 000000000..14a73b567 --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/generic/unfold.c @@ -0,0 +1,166 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/unfold.c" +#else + +/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */ +void THNN_(unfolded_acc)( + THTensor *finput, + THTensor *input, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int nInputPlane, + int inputWidth, + int inputHeight, + int outputWidth, + int outputHeight) +{ + // This function assumes that + // outputHeight*dH does not overflow a long + // outputWidth*dW does not overflow a long + + int nip; + + real *input_data = THTensor_(data)(input); + real *finput_data = THTensor_(data)(finput); + +#pragma omp parallel for private(nip) + for(nip = 0; nip < nInputPlane; nip++) + { + int kw, kh, y, x; + long ix, iy; + for(kh = 0; kh < kH; kh++) + { + for(kw = 0; kw < kW; kw++) + { + real *src = finput_data + nip*((size_t)kH*kW*outputHeight*outputWidth) + kh*((size_t)kW*outputHeight*outputWidth) + kw*((size_t)outputHeight*outputWidth); + real *dst = input_data + nip*((size_t)inputHeight*inputWidth); + if (padW > 0 || padH > 0) { + int lpad,rpad; + for(y = 0; y < outputHeight; y++) { + iy = (long)y*dH - padH + kh; + if (iy < 0 || iy >= inputHeight) { + } else { + if (dW==1){ + ix = 0 - padW + kw; + lpad = fmaxf(0,padW-kw); + rpad = fmaxf(0,padW-(kW-kw-1)); + real *dst_slice = dst+(size_t)iy*inputWidth+ix+lpad; + THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+lpad, 1, outputWidth - lpad - rpad); /* note: THVector_add could handle 1 value better */ + } + else{ + for (x=0; x<outputWidth; x++){ + ix = (long)x*dW - padW + kw; + if (ix < 0 || ix >= inputWidth){ + }else{ + real *dst_slice = dst+(size_t)iy*inputWidth+ix; + THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+x, 1, 1); + } + } + } + } + } + } else { + for(y = 0; y < outputHeight; y++) { + iy = (long)y*dH + kh; + ix = 0 + kw; + if (dW == 1 ) { + real *dst_slice = dst+(size_t)iy*inputWidth+ix; + THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth, 1, outputWidth); /* note: THVector_add could handle 1 value better */ + }else{ + for(x = 0; x < outputWidth; x++) { + real *dst_slice = dst+(size_t)iy*inputWidth+ix+x*dW; + THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+x, 1, 1); + } + } + } + } + } + } + } +} + +void THNN_(unfolded_copy)( + THTensor *finput, + THTensor *input, + int kW, + int kH, + int dW, + int dH, + int padW, + int padH, + int nInputPlane, + int inputWidth, + int inputHeight, + int outputWidth, + int outputHeight) +{ + // This function assumes that + // kH*kW does not overflow an int + // nInputPlane*kH*kW does not overflow a long + // outputHeight*dH does not overflow a long + // outputWidth*dW does not overflow a long + + long k; + real *input_data = THTensor_(data)(input); + real *finput_data = THTensor_(data)(finput); + +#pragma omp parallel for private(k) + for(k = 0; k < (long)nInputPlane*kH*kW; k++) { + long nip = k / (kH*kW); + long rest = k % (kH*kW); + long kh = rest / kW; + long kw = rest % kW; + int x, y; + long ix, iy; + real *dst = finput_data + nip*((size_t)kH*kW*outputHeight*outputWidth) + kh*((size_t)kW*outputHeight*outputWidth) + kw*((size_t)outputHeight*outputWidth); + real *src = input_data + nip*((size_t)inputHeight*inputWidth); + if (padW > 0 || padH > 0) { + long lpad,rpad; + for(y = 0; y < outputHeight; y++) { + iy = (long)y*dH - padH + kh; + if (iy < 0 || iy >= inputHeight) { + memset(dst+(size_t)y*outputWidth, 0, sizeof(real)*outputWidth); + } else { + if (dW==1){ + ix = 0 - padW + kw; + lpad = fmaxf(0,padW-kw); + rpad = fmaxf(0,padW-(kW-kw-1)); + if (outputWidth-rpad-lpad <= 0) { + memset(dst+(size_t)y*outputWidth, 0, sizeof(real)*outputWidth); + } else { + if (lpad > 0) memset(dst+(size_t)y*outputWidth, 0, sizeof(real)*lpad); + memcpy(dst+(size_t)y*outputWidth+lpad, src+(size_t)iy*inputWidth+ix+lpad, sizeof(real)*(outputWidth-rpad-lpad)); + if (rpad > 0) memset(dst+(size_t)y*outputWidth + outputWidth - rpad, 0, sizeof(real)*rpad); + } + } + else{ + for (x=0; x<outputWidth; x++){ + ix = (long)x*dW - padW + kw; + if (ix < 0 || ix >= inputWidth) + memset(dst+(size_t)y*outputWidth+x, 0, sizeof(real)*1); + else + memcpy(dst+(size_t)y*outputWidth+x, src+(size_t)iy*inputWidth+ix, sizeof(real)*(1)); + } + } + } + } + } else { + for(y = 0; y < outputHeight; y++) { + iy = (long)y*dH + kh; + ix = 0 + kw; + if (dW == 1) + memcpy(dst+(size_t)y*outputWidth, src+(size_t)iy*inputWidth+ix, sizeof(real)*outputWidth); + else{ + for (x=0; x<outputWidth; x++) + memcpy(dst+(size_t)y*outputWidth+x, src+(size_t)iy*inputWidth+ix+(long)x*dW, sizeof(real)*(1)); + } + } + } + } +} + +#endif diff --git a/contrib/lua-torch/nn/lib/THNN/init.c b/contrib/lua-torch/nn/lib/THNN/init.c new file mode 100644 index 000000000..5c8c023dc --- /dev/null +++ b/contrib/lua-torch/nn/lib/THNN/init.c @@ -0,0 +1,280 @@ +#include "TH.h" +#include "THNN.h" + +#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME) +#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME) + +#define THNN_CHECK_SHAPE(I1, I2) \ + if (I1 != NULL && I2 != NULL && !THTensor_(isSameSizeAs)(I1, I2)) \ + { \ + THDescBuff s1 = THTensor_(sizeDesc)(I1); \ + THDescBuff s2 = THTensor_(sizeDesc)(I2); \ + THError(#I1 " and " #I2 " shapes do not match: " \ + #I1 " %s, " #I2 " %s", s1.str, s2.str); \ + } + +#define THNN_CHECK_SHAPE_INDICES(I1, I2) \ + THLongStorage *size2 = THLongTensor_newSizeOf(I2); \ + if (I1 != NULL && I2 != NULL && !THTensor_(isSize)(I1, size2)) \ + { \ + THDescBuff s1 = THTensor_(sizeDesc)(I1); \ + THDescBuff s2 = THLongTensor_sizeDesc(I2); \ + THLongStorage_free(size2); \ + THError(#I1 " and " #I2 " shapes do not match: " \ + #I1 " %s, " #I2 " %s", s1.str, s2.str); \ + } else { \ + THLongStorage_free(size2); \ + } + +#define THNN_CHECK_NELEMENT(I1, I2) \ + if (I1 != NULL && I2 != NULL ) { \ + ptrdiff_t n1 = THTensor_(nElement)(I1); \ + ptrdiff_t n2 = THTensor_(nElement)(I2); \ + if (n1 != n2) \ + { \ + THDescBuff s1 = THTensor_(sizeDesc)(I1); \ + THDescBuff s2 = THTensor_(sizeDesc)(I2); \ + THError(#I1 " and " #I2 " have different number of elements: " \ + #I1 "%s has %ld elements, while " \ + #I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \ + } \ + } + +#define THNN_CHECK_DIM_SIZE(T, DIM, DIM_SIZE, SIZE) \ + if (THTensor_(nDimension)(T) != DIM || \ + THTensor_(size)(T, DIM_SIZE) != SIZE) { \ + THDescBuff s1 = THTensor_(sizeDesc)(T); \ + THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ + " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \ + } + +#define THNN_CHECK_DIM_SIZE_INDICES(T, DIM, DIM_SIZE, SIZE) \ + if (THIndexTensor_(nDimension)(T) != DIM || \ + THIndexTensor_(size)(T, DIM_SIZE) != SIZE) { \ + THDescBuff s1 = THIndexTensor_(sizeDesc)(T); \ + THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ + " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \ + } + +#define THNN_ARGCHECK(COND, ARG, T, FORMAT) \ + if (!(COND)) { \ + THDescBuff s1 = THTensor_(sizeDesc)(T); \ + THArgCheck(COND, ARG, FORMAT, s1.str); \ + } + +#include "generic/Abs.c" +#include "THGenerateFloatTypes.h" + +#include "generic/AbsCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/BCECriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/ClassNLLCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialClassNLLCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/DistKLDivCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/ELU.c" +#include "THGenerateFloatTypes.h" + +#include "generic/HardShrink.c" +#include "THGenerateFloatTypes.h" + +#include "generic/HardTanh.c" +#include "THGenerateFloatTypes.h" + +#include "generic/GatedLinearUnit.c" +#include "THGenerateFloatTypes.h" + +#include "generic/L1Cost.c" +#include "THGenerateFloatTypes.h" + +#include "generic/LeakyReLU.c" +#include "THGenerateFloatTypes.h" + +#include "generic/FusedRNNKernel.c" +#include "THGenerateFloatTypes.h" + +#include "generic/LogSigmoid.c" +#include "THGenerateFloatTypes.h" + +#include "generic/LogSoftMax.c" +#include "THGenerateFloatTypes.h" + +#include "generic/LookupTable.c" +#include "THGenerateFloatTypes.h" + +#include "generic/MSECriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/MarginCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SoftMarginCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/MultiLabelMarginCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/MultiMarginCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Linear.c" +#include "THGenerateFloatTypes.h" + +#include "generic/PReLU.c" +#include "THGenerateFloatTypes.h" + +#include "generic/RReLU.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Sigmoid.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SmoothL1Criterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SoftMax.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SoftPlus.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SoftShrink.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SparseLinear.c" +#include "THGenerateFloatTypes.h" + +#include "generic/IndexLinear.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Sqrt.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Square.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Tanh.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Threshold.c" +#include "THGenerateFloatTypes.h" + +#include "generic/TemporalConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/TemporalSubSampling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/TemporalMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/TemporalRowConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/BatchNormalization.c" +#include "THGenerateFloatTypes.h" + +#include "generic/unfold.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialConvolutionMap.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialConvolutionMM.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialDepthWiseConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialConvolutionLocal.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialFullConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialFullConvolutionMap.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialDilatedConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialAdaptiveMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialAdaptiveAveragePooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialAveragePooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialFractionalMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialDilatedMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialMaxUnpooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialSubSampling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialUpSamplingNearest.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialUpSamplingBilinear.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricAveragePooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricConvolutionMM.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricFullConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricDilatedConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricDilatedMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricFractionalMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricMaxUnpooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialReflectionPadding.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialReplicationPadding.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricReplicationPadding.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricUpSamplingNearest.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricUpSamplingTrilinear.c" +#include "THGenerateFloatTypes.h" + diff --git a/contrib/lua-torch/nn/mkdocs.yml b/contrib/lua-torch/nn/mkdocs.yml new file mode 100644 index 000000000..a37a34fb0 --- /dev/null +++ b/contrib/lua-torch/nn/mkdocs.yml @@ -0,0 +1,18 @@ +site_name: nn +theme : simplex +repo_url : https://github.com/torch/nn +use_directory_urls : false +markdown_extensions: [extra] +docs_dir : doc +pages: +- [index.md, Home] +- [module.md, Modules, Module Interface] +- [containers.md, Modules, Containers] +- [transfer.md, Modules, Transfer Functions] +- [simple.md, Modules, Simple Layers] +- [table.md, Modules, Table Layers] +- [convolution.md, Modules, Convolution Layers] +- [criterion.md, Criterion, Criterions] +- [overview.md, Additional Documentation, Overview] +- [training.md, Additional Documentation, Training] +- [testing.md, Additional Documentation, Testing] diff --git a/contrib/lua-torch/nn/test.lua b/contrib/lua-torch/nn/test.lua new file mode 100755 index 000000000..4e3f627fc --- /dev/null +++ b/contrib/lua-torch/nn/test.lua @@ -0,0 +1,8787 @@ +-- you can easily test specific units like this: +-- th -lnn -e "nn.test{'LookupTable'}" +-- th -lnn -e "nn.test{'LookupTable', 'Add'}" + +local mytester = torch.Tester() +local jac +local sjac + +local precision = 1e-5 +local expprecision = 1.1e-4 + +local nntest = torch.TestSuite() + +local function equal(t1, t2, msg) + if (torch.type(t1) == "table") then + for k, v in pairs(t2) do + equal(t1[k], t2[k], msg) + end + else + mytester:eq(t1, t2, 0.00001, msg) + end +end + + +--[[ Generate tests to exercise the tostring component of modules. ]] +local tostringTestModules = { + nnLinear = nn.Linear(1, 2), + nnReshape = nn.Reshape(10), + nnSpatialZeroPadding = nn.SpatialZeroPadding(1, 1, 1, 1)} +for test_name, component in pairs(tostringTestModules) do + nntest['tostring' .. test_name] = + function () + mytester:assert(tostring(component):find( + torch.type(component) .. '(', 1, true) ~= nil, + 'nn components should have a descriptive tostring' .. + ' beginning with the classname') + end +end + +function nntest.Add() + local inj_vals = {math.random(3,5), 1} -- Also test the inj = 1 spatial case + local ini = math.random(3,5) + local ink = math.random(3,5) + + for ind, inj in pairs(inj_vals) do + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Add(ini,inj,ink) + + -- 1D + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format('error on bias [%s]', t)) + end + + -- 2D + local nframe = math.random(50,70) + local input = torch.Tensor(nframe, ini,inj,ink):zero() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format('error on bias [%s]', t)) + end + + -- IO + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + end +end + +function nntest.Bottle() + local ini = 2 + local inj = 3 + local ink = 4 + local out = 5 + local input = torch.Tensor(ini,inj,ink):normal() + local linear = nn.Linear(ink, out) + local module1 = nn.Bottle(linear) + local module2 = nn.Sequential() + module2:add(nn.View(ini*inj, ink)) + module2:add(linear) + module2:add(nn.View(ini, inj, out)) + local output1 = module1:forward(input) + local output2 = module2:forward(input) + mytester:eq(output1, output2, 0.0001, 'Bottle output not the same as Module') + + local shape = {4, 5, 6, 7, 8, 1, 3} + local input = torch.Tensor(table.unpack(shape)):normal() + local module = nn.Sequential() + module:add(nn.Squeeze(2)) + module:add(nn.Linear(3, 3)) + local module1 = nn.Bottle(module, 3, 2) + local outShape = {4, 5, 6, 7, 8, 3} + local module2 = nn.Sequential() + module2:add(nn.View(4*5*6*7*8, 1, 3)) + module2:add(module) + module2:add(nn.View(table.unpack(outShape))) + local output1 = module1:forward(input) + local grad = torch.Tensor(output1:size()):normal() + local gradOutput1 = module1:backward(input, grad):clone() + local output2 = module2:forward(input) + local gradOutput2 = module2:backward(input, grad):clone() + mytester:eq(output1, output2, 0.0001, 'Bottle output not the same as Module') + mytester:eq(gradOutput1, gradOutput2, 0.0001, 'Bottle gradOutput not the same as Module') +end + +function nntest.WeightNorm() + local input = torch.rand(10, 5) + + -- temporal convolution + local model = nn.WeightNorm(nn.TemporalConvolution(5, 20, 2, 1)) + local err = nn.Jacobian.testJacobianParameters(model, input, + model.bias, model.gradBias) + mytester:assert(err < precision, 'Temporal Convolution bias') + err = nn.Jacobian.testJacobianParameters(model, input, + model.g, model.gradG) + mytester:assert(err < precision, 'Temporal Convolution g') + err = nn.Jacobian.testJacobianParameters(model, input, + model.v, model.gradV) + mytester:assert(err < precision, 'Temporal Convolution v') + + -- linear + model = nn.WeightNorm(nn.Linear(5, 20)) + err = nn.Jacobian.testJacobianParameters(model, input, + model.bias, model.gradBias) + mytester:assert(err < precision, 'Linear bias') + err = nn.Jacobian.testJacobianParameters(model, input, model.g, model.gradG) + mytester:assert(err < precision, 'Linear g') + err = nn.Jacobian.testJacobianParameters(model, input, + model.v, model.gradV) + mytester:assert(err < precision, 'Linear v') + + -- euclidean with weight but no bias + input = torch.rand(10, 5) + model = nn.WeightNorm(nn.Euclidean(5, 20)) + err = nn.Jacobian.testJacobianParameters(model, input, model.g, model.gradG) + mytester:assert(err < precision, 'Euclidean g') + err = nn.Jacobian.testJacobianParameters(model, input, + model.v, model.gradV) + mytester:assert(err < precision, 'Euclidean v') + + -- spatial convolution with 4D weights + input = torch.rand(5, 10, 10) + model = nn.WeightNorm(nn.SpatialConvolution(5, 20, 2, 2, 3, 3, 1, 1), 2) + err = nn.Jacobian.testJacobianParameters(model, input, + model.bias, model.gradBias) + mytester:assert(err < precision, 'Spatial Convolution bias') + err = nn.Jacobian.testJacobianParameters(model, input, + model.g, model.gradG) + mytester:assert(err < precision, 'Spatial Convolution g') + err = nn.Jacobian.testJacobianParameters(model, input, + model.v, model.gradV) + mytester:assert(err < precision, 'Spatial Convolution v') + + -- linear save/load + model = nn.WeightNorm(nn.Linear(5, 20)) + input = torch.rand(10, 5) + local out = model:forward(input) + local modelr = torch.deserialize(torch.serialize(model)) + local outr = modelr:forward(input) + mytester:assertTensorEq(out, outr) +end + +function nntest.LinearWeightNorm() + local input = torch.rand(10, 5) + local model = nn.LinearWeightNorm(5, 20) + + -- check gradient + local err = nn.Jacobian.testJacobianParameters(model, input, model.bias, model.gradBias) + mytester:assert(err < precision, 'bias') + err = nn.Jacobian.testJacobianParameters(model, input, model.g, model.gradG) + mytester:assert(err < precision, 'g') + err = nn.Jacobian.testJacobianParameters(model, input, model.v, model.gradV) + mytester:assert(err < precision, 'v') + + -- check conversion functions + local linear = nn.Linear(5,20) + local wnFromLin = nn.LinearWeightNorm.fromLinear(linear) + local linFromWn = wnFromLin:toLinear() + + local linOut = linear:forward(input) + local wnOut = wnFromLin:forward(input) + local linFromWnOut = linFromWn:forward(input) + + mytester:assertTensorEq(linOut, wnOut, precision, "outputs are not equivalent") + mytester:assertTensorEq(wnOut, linFromWnOut, precision, "outputs are not equivalent") + + -- check conversion with nobias + linear = nn.Linear(5,20,false) + wnFromLin = nn.LinearWeightNorm.fromLinear(linear) + linFromWn = wnFromLin:toLinear() + + linOut = linear:forward(input) + wnOut = wnFromLin:forward(input) + linFromWnOut = linFromWn:forward(input) + + mytester:assertTensorEq(linear.weight, wnFromLin.weight, precision, "weights are not equivalent") + mytester:assert(not wnFromLin.bias) + mytester:assert(not linear.bias) + mytester:assertTensorEq(linOut, wnOut, precision, "outputs are not equivalent") + mytester:assertTensorEq(wnOut, linFromWnOut, precision, "outputs are not equivalent") + + -- check gradient with nobias + model = wnFromLin + + err = nn.Jacobian.testJacobianParameters(model, input, model.g, model.gradG) + mytester:assert(err < precision, 'g') + err = nn.Jacobian.testJacobianParameters(model, input, model.v, model.gradV) + mytester:assert(err < precision, 'v') +end + +function nntest.CAdd() + local function testBackwardPass(module, input, params, dparams) + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, "error computing gradiens w.r.t. inputs") + + err = jac.testJacobianParameters(module, input, params, dparams) + mytester:assertlt(err,precision, "error computing gradients w.r.t params") + + err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, "error in update using gradients w.r.t parameters") + + --Test all of the various update methods + for test, err in pairs(jac.testAllUpdate(module, input, "bias", "gradBias")) do + mytester:assertlt(err, precision, string.format("error on bias [%s]", test)) + end + end + + local function testModuleIO(module, input) + local fwdErr,bkwdErr = jac.testIO(module,input) + mytester:asserteq(fwdErr, 0, torch.typename(module) .. " - i/o forward err ") + mytester:asserteq(bkwdErr, 0, torch.typename(module) .. " - i/o backward err ") + end + + local function testCAddWithNonBatchedInput() + local channels = math.random(3,5) + local width = math.random(3,5) + local height = math.random(3,5) + + local input = torch.Tensor(channels, height, width):zero() + + --Per channel bias + local module = nn.CAdd(channels, 1, 1) + local params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + local output = module:forward(input) + mytester:assert(output:isSameSizeAs(input)) + + for i = 1, module.bias:view(-1):size(1) do + local bias = module.bias:view(-1)[i] + local result = output[i]:view(-1) + local expectedResult = torch.Tensor({bias}):expandAs(result) + mytester:assertTensorEq(result, expectedResult, precision) + end + + --Per row bias + module = nn.CAdd(1, height, 1) + params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + output = module:forward(input) + mytester:assert(output:isSameSizeAs(input)) + + for i = 1, module.bias:view(-1):size(1) do + local bias = module.bias:view(-1)[i] + local result = output[{{}, {i}, {}}]:contiguous():view(-1) + local expectedResult = torch.Tensor({bias}):expandAs(result) + mytester:assertTensorEq(result, expectedResult, precision) + end + + --Per column bias + module = nn.CAdd(1, 1, width) + params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + output = module:forward(input) + mytester:assert(output:isSameSizeAs(input)) + + for i = 1, module.bias:view(-1):size(1) do + local bias = module.bias:view(-1)[i] + local result = output[{{}, {}, {i}}]:contiguous():view(-1) + local expectedResult = torch.Tensor({bias}):expandAs(result) + mytester:assertTensorEq(result, expectedResult, precision) + end + + --Per input component bias + module = nn.CAdd(channels, height, width) + params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + output = module:forward(input) + + mytester:assert(output:isSameSizeAs(input)) + mytester:assert(module.bias:isSameSizeAs(input)) + mytester:assertTensorEq(module.bias, output, precision) + + testModuleIO(module, input) + end + + local function testCAddWithBatchedInput() + local batchSize = math.random(3,5) + local channels = math.random(3,5) + local width = math.random(3,5) + local height = math.random(3,5) + + local input = torch.Tensor(batchSize, channels, height, width):zero() + local module = nn.CAdd(batchSize, channels, height, width) + + --Per batch bias + local module = nn.CAdd(batchSize, 1, 1, 1) + local params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + local output = module:forward(input) + mytester:assert(output:isSameSizeAs(input)) + + for i = 1, module.bias:view(-1):size(1) do + local bias = module.bias:view(-1)[i] + local result = output[i]:view(-1) + local expectedResult = torch.Tensor({bias}):expandAs(result) + mytester:assertTensorEq(result, expectedResult, precision) + end + + --Per channel bias + module = nn.CAdd(1, channels, 1, 1) + params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + output = module:forward(input) + mytester:assert(output:isSameSizeAs(input)) + + for i = 1, module.bias:view(-1):size(1) do + local bias = module.bias:view(-1)[i] + local result = output[{{}, {i}, {}, {}}]:contiguous():view(-1) + local expectedResult = torch.Tensor({bias}):expandAs(result) + mytester:assertTensorEq(result, expectedResult, precision) + end + + --Per row bias + module = nn.CAdd(1, 1, height, 1) + params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + output = module:forward(input) + mytester:assert(output:isSameSizeAs(input)) + + for i = 1, module.bias:view(-1):size(1) do + local bias = module.bias:view(-1)[i] + local result = output[{{}, {}, {i}, {}}]:contiguous():view(-1) + local expectedResult = torch.Tensor({bias}):expandAs(result) + mytester:assertTensorEq(result, expectedResult, precision) + end + + --Per column bias + module = nn.CAdd(1, 1, 1, width) + params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + output = module:forward(input) + mytester:assert(output:isSameSizeAs(input)) + + for i = 1, module.bias:view(-1):size(1) do + local bias = module.bias:view(-1)[i] + local result = output[{{}, {}, {}, {i}}]:contiguous():view(-1) + local expectedResult = torch.Tensor({bias}):expandAs(result) + mytester:assertTensorEq(result, expectedResult, precision) + end + + --Per input component bias + module = nn.CAdd(batchSize, channels, height, width) + params, gradParams = module:getParameters() + + testBackwardPass(module, input, params, gradParams) + + input:zero() + output = module:forward(input) + + mytester:assert(output:isSameSizeAs(input)) + mytester:assert(module.bias:isSameSizeAs(input)) + mytester:assertTensorEq(module.bias, output, precision) + + testModuleIO(module, input) + end + + + local function testCAddWithLessDimsThanInput() + local input = torch.rand(4,5) + local module = nn.CAdd(5) + local params, gradParams = module:getParameters() + testBackwardPass(module, input, params, gradParams) + + input:zero() + local output = module:forward(input) + local expandedBias = module.bias:view(1,5):expand(4,5):clone() + mytester:assert(output:isSameSizeAs(input)) + mytester:assertTensorEq(expandedBias, output, precision) + + testModuleIO(module, input) + + input = torch.rand(4,5,6) + module = nn.CAdd(5,6) + params, gradParams = module:getParameters() + testBackwardPass(module, input, params, gradParams) + + input:zero() + local output = module:forward(input) + expandedBias = module.bias:view(1,5,6):expand(4,5,6):clone() + mytester:assert(output:isSameSizeAs(input)) + mytester:assertTensorEq(expandedBias, output, precision) + + testModuleIO(module, input) + end + + + testCAddWithNonBatchedInput() + testCAddWithBatchedInput() + testCAddWithLessDimsThanInput() +end + +function nntest.CMul() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local inl = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.CMul(1, ini, inj, ink, 1) + + -- 1D + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + -- 2D + local nframe = math.random(3,14) + local input = torch.randn(nframe, ini,inj,ink) + local output = module:forward(input) + local output2 = torch.cmul(input, module.weight:view(1,ini,inj,ink):expandAs(input)) + mytester:assertTensorEq(output2, output, 0.000001, 'CMul forward 2D err') + + module:zeroGradParameters() + local gradWeight = module.gradWeight:clone() + local gradInput = module:backward(input, output) + local gradInput2 = gradInput:clone():zero() + local outputView = output:view(input:size(1), -1) + gradInput2:view(input:size(1), -1):addcmul(1, module.weight:view(1,-1):expandAs(outputView), outputView) + mytester:assertTensorEq(gradInput2, gradInput, 0.000001, 'CMul updateGradInput 2D err') + mytester:assert(gradInput:isSameSizeAs(input), 'CMul gradInput 2D size err') + + local inputView = input:view(nframe, -1) + local gradWeightView = gradWeight:view(1, -1) + for i=1,nframe do + gradWeightView:addcmul(1, inputView[i], outputView[i]) + end + mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'CMul accGradParameters 2D err') + mytester:assert(module.weight:isSameSizeAs(module.gradWeight), 'CMul gradWeight size err') + + -- Expansion + input = torch.randn(nframe, ini,inj,ink,inl) + output = module:forward(input) + output2 = torch.cmul(input, module.weight:expandAs(input)) + mytester:assertTensorEq(output2, output, 0.000001, 'CMul forward expand err') + + module:zeroGradParameters() + gradWeight:zero() + gradInput = module:backward(input, output) + gradInput2 = gradInput:clone():zero() + gradInput2:addcmul(1, module.weight:expandAs(output), output) + mytester:assertTensorEq(gradInput2, gradInput, 0.000001, 'CMul updateGradInput expansion err') + mytester:assert(gradInput:isSameSizeAs(input), 'CMul gradInput expand size err') + + for i=1,nframe do + -- 4 is the [non-batch] singleton dim + gradWeight:add(torch.cmul(input[i], output[i]):sum(4)) + end + mytester:assertTensorEq(gradWeight:sum(5), module.gradWeight, 0.000001, 'CMul accGradParameters expand err') + mytester:assert(module.weight:isSameSizeAs(module.gradWeight), 'CMul accGradParameters expand size err') + + input:zero() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format('error on weight [%s]', t)) + end + + -- Non-contiguous input or gradOutput + local testModule = nn.CMul(4, 3, 5) + local testInput = torch.rand(10, 3, 5):resize(10, 1, 3, 5):expand(10, 4, 3, 5) + local testOutput = testModule:forward(testInput) + + mytester:assert(testOutput:isSameSizeAs(testInput), 'CMul non-contiguous forward err') + + local testGradOutput = torch.rand(10, 3, 5):resize(10, 1, 3, 5):expand(10, 4, 3, 5) + testOutput = testModule:forward(testInput) + local testGradInput = testModule:backward(testOutput, testGradOutput) + + mytester:assert(testGradInput:isSameSizeAs(testGradOutput), 'CMul non-contiguous backward err') + + -- IO + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Contiguous() + local module = nn.Contiguous() + + -- Contiguous input + local input = torch.rand(30,20,10) + local output = module:forward(input) + + mytester:assert(output:ne(input):sum() == 0, 'output not equal to input') + + -- Make input non-contiguous + local input2 = output:transpose(1,2) + local output2 = module:forward(input2) + + mytester:assert(output2:ne(output:contiguous()):sum() == 0, 'output not equal to input') +end + +function nntest.Dropout() + local p = 0.2 --prob of droping out a neuron + local input = torch.Tensor(1000):fill((1-p)) + local module = nn.Dropout(p) + -- version 2 + local output = module:forward(input) + mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output') + local gradInput = module:backward(input, input) + mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput') + -- test inplace version + local module = nn.Dropout(p,nil,true) + local output = module:forward(input:clone()) + mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output') + local gradInput = module:backward(input:clone(), input:clone()) + mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput') + + -- version 1 (old nnx version) + local input = input:fill(1) + local module = nn.Dropout(p,true) + local output = module:forward(input) + mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output') + local gradInput = module:backward(input, input) + mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput') +end + +function nntest.SpatialDropout() + local p = 0.2 --prob of dropiing out a neuron + local w = math.random(1,5) + local h = math.random(1,5) + local nfeats = 1000 + local input = torch.Tensor(nfeats, w, h):fill(1) + local module = nn.SpatialDropout(p) + module.train = true + local output = module:forward(input) + mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output') + local gradInput = module:backward(input, input) + mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput') +end + +function nntest.SpatialDropoutBatch() + local p = 0.2 --prob of dropiing out a neuron + local bsz = math.random(1,5) + local w = math.random(1,5) + local h = math.random(1,5) + local nfeats = 1000 + local input = torch.Tensor(bsz, nfeats, w, h):fill(1) + local module = nn.SpatialDropout(p) + module.train = true + local output = module:forward(input) + mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output') + local gradInput = module:backward(input, input) + mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput') +end + +function nntest.VolumetricDropout() + local p = 0.2 --prob of dropiing out a neuron + local t = math.random(1,5) + local w = math.random(1,5) + local h = math.random(1,5) + local nfeats = 1000 + local input = torch.Tensor(nfeats, t, w, h):fill(1) + local module = nn.VolumetricDropout(p) + module.train = true + local output = module:forward(input) + mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output') + local gradInput = module:backward(input, input) + mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput') +end + +function nntest.VolumetricDropoutBatch() + local p = 0.2 --prob of dropiing out a neuron + local bsz = math.random(1,5) + local t = math.random(1,5) + local w = math.random(1,5) + local h = math.random(1,5) + local nfeats = 1000 + local input = torch.Tensor(bsz, nfeats, t, w, h):fill(1) + local module = nn.VolumetricDropout(p) + module.train = true + local output = module:forward(input) + mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output') + local gradInput = module:backward(input, input) + mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput') +end + +function nntest.ReLU() + local input = torch.randn(3,4) + local gradOutput = torch.randn(3,4) + local module = nn.ReLU() + local output = module:forward(input) + local output2 = input:clone():gt(input, 0):cmul(input) + mytester:assertTensorEq(output, output2, 0.000001, 'ReLU output') + local gradInput = module:backward(input, gradOutput) + local gradInput2 = input:clone():gt(input, 0):cmul(gradOutput) + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ReLU gradInput') +end + +function nntest.ReLU6() + for inplace = 0, 1 do + local input = torch.randn(3, 4):mul(6) + local gradOutput = torch.randn(3,4) + local module = nn.ReLU6(inplace == 1) + local output = module:forward(input:clone()) + local gt = input:clone():gt(input, 0) + local lt = input:clone():lt(input, 6) + local output2 = gt:clone():cmul(lt):cmul(input) + output2:add(6, input:clone():gt(input, 6)) + mytester:assertTensorEq(output, output2, 0.000001, 'ReLU6 output '..(inplace and '(inplace)' or '') ) + local gradInput = module:backward(input, gradOutput:clone()) + local gradInput2 = gt:clone():cmul(lt):cmul(gradOutput) + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ReLU gradInput '..(inplace and '(inplace)' or '') ) + end +end + +function nntest.GatedLinearUnit() + local model = nn.GatedLinearUnit() + local t = torch.Tensor({{1, 1}, {2, 2}, {3, 3}}) + local thalf = torch.Tensor():resizeAs(t):copy(t):narrow(2, 1, 1) + mytester:assertTensorEq( + thalf:cmul(torch.sigmoid(thalf)), + model:forward(t):resizeAs(thalf), + 0.000001, + 'Gated Linear output' + ) + t = torch.Tensor({{1, 1, 1, 1}, {2, 2, 2, 2}, {3, 3, 3, 3}}) + thalf = torch.Tensor():resizeAs(t):copy(t):narrow(2, 1, 2) + mytester:assertTensorEq( + thalf:cmul(torch.sigmoid(thalf)), + model:forward(t), + 0.000001, + 'Gated Linear Unit output' + ) + + local input = torch.rand(1, 10) + local err = jac.testJacobian(model, input) + mytester:assert(err < precision, 'Gated Linear gradient') + + input = torch.rand(5, 10, 6) + model = nn.GatedLinearUnit(2) + err = jac.testJacobian(model, input) + mytester:assert(err < precision, 'Gated Linear gradient, non-default dim') + + input = torch.rand(5, 10, 6) + model = nn.GatedLinearUnit(3) + err = jac.testJacobian(model, input) + mytester:assert(err < precision, 'Gated Linear gradient, non-default dim') + + input = torch.rand(5, 10) + model = nn.Sequential() + model:add(nn.Linear(10, 10)) + model:add(nn.GatedLinearUnit()) + model:add(nn.ReLU()) + model:add(nn.LogSoftMax()) + err = jac.testJacobian(model, input) + mytester:assert(err < precision, 'Gated Linear gradient with other layers') +end + +function nntest.CReLU() + local function _verifyCReLU(featureMaps, concatenatedFeatureMaps) + local rectifiedFeatureMaps = nn.ReLU():forward(featureMaps) + local rectifiedNegFeatureMaps = nn.ReLU():forward(-featureMaps) + + mytester:asserteq(concatenatedFeatureMaps:size(1), featureMaps:size(1) * 2, + "CReLU should double the number of feature maps") + + for i = 1, rectifiedFeatureMaps:size(1) do + local found = false + for j = 1, concatenatedFeatureMaps:size(1) do + found = found or rectifiedFeatureMaps[i]:equal(concatenatedFeatureMaps[j]) + end + mytester:assert(found, "Original (rectified) feature maps should be in the output of CReLU") + end + + for i = 1, rectifiedNegFeatureMaps:size(1) do + local found = false + for j = 1, concatenatedFeatureMaps:size(1) do + found = found or rectifiedFeatureMaps[i]:equal(concatenatedFeatureMaps[j]) + end + mytester:assert(found, "The negative of the original (rectified) feature maps should be in the output of CReLU") + end + end + + local model = nn.Sequential() + model:add(nn.SpatialConvolution(1, 3, 3, 3, 1, 1, 1, 1)) + + for _, inplace in pairs({true, false}) do + --batched + local crelu = nn.CReLU(3, inplace) + local input = torch.Tensor(2, 1, 20, 20):uniform() + local featureMaps = model:forward(input) + local concatenatedFeatureMaps = crelu:forward(featureMaps) + for i = 1, input:size(1) do + _verifyCReLU(featureMaps[i], concatenatedFeatureMaps[i]) + end + + --non-batched + local input = torch.Tensor(1, 20, 20):uniform() + local featureMaps = model:forward(input) + local concatenatedFeatureMaps = crelu:forward(featureMaps) + _verifyCReLU(featureMaps, concatenatedFeatureMaps) + end + + --test gradients w.r.t input + local jac = nn.Jacobian + + for _, inplace in pairs({true, false}) do + local crelu = nn.CReLU(3, inplace) + --batched + local input = torch.Tensor(2, 3, 20, 20):uniform() + local err = jac.testJacobian(crelu, input) + mytester:assertlt(err, precision, "error computing gradients w.r.t. inputs") + + --I/O + local fwdErr,bkwdErr = jac.testIO(crelu,input) + mytester:asserteq(fwdErr, 0, torch.typename(crelu) .. " - i/o forward err ") + mytester:asserteq(bkwdErr, 0, torch.typename(crelu) .. " - i/o backward err ") + + --non-batched + input = torch.Tensor(3, 20, 20):uniform() + err = jac.testJacobian(crelu,input) + mytester:assertlt(err, precision, "error computing gradients w.r.t. inputs") + + --I/O + local fwdErr,bkwdErr = jac.testIO(crelu,input) + mytester:asserteq(fwdErr, 0, torch.typename(crelu) .. " - i/o forward err ") + mytester:asserteq(bkwdErr, 0, torch.typename(crelu) .. " - i/o backward err ") + end + +end + +function nntest.Exp() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Exp() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Log() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Log() + + local err = jac.testJacobian(module,input, 0.1, 10) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input, 0.1, 10) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.HardTanh() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.HardTanh() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision , 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test inclusive bounds -- HardTahn(1,inf) should behave like Threshold(1) + local input = torch.Tensor({1}) + local gradOutput = torch.Tensor({1}) + local gradOutputClone = gradOutput:clone() + local module = nn.HardTanh(1, math.huge, true) + local tanhGradInput = module:backward(input, gradOutput) + + local input = input:clone() + local gradOutput = gradOutputClone + local module = nn.Threshold(1, 0, true) + local threshGradInput = module:backward(input, gradOutput) + mytester:assertTensorEq(tanhGradInput, threshGradInput, 0.000001, 'HardTanh gradInput') +end + +function nntest.Clamp() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local max_value = math.abs(math.random()) + local min_value = -math.abs(math.random()) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Clamp(min_value, max_value) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision , 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Abs() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Abs() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision , 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Threshold() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Threshold(torch.uniform(-2,2),torch.uniform(-2,2)) + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.ELU() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.ELU(0.3) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision , 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.ELUIP() + local input = torch.randn(3,4) + local input2 = input:clone() + local gradOutput = torch.randn(3,4) + local gradOutput2 = gradOutput:clone() + + -- Compare in-place to not in-place + local module = nn.ELU(0.3, true) + local module2 = nn.ELU(0.3, false) + + local output = module:forward(input) + local output2 = module2:forward(input2) + mytester:assertTensorEq(output, output2, 0.000001, 'ELU output') + local gradInput = module:backward(input, gradOutput) + local gradInput2 = module2:backward(input2, gradOutput2) + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ELU gradInput') +end + +function nntest.PReLU() + local ini = math.random(3,5) + local input = torch.Tensor(ini):zero() + + local module = nn.PReLU(ini) + + -- 1D + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + -- 2D + local nframe = math.random(1,7) + local input = torch.Tensor(nframe, ini):zero() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + -- 4D + local nframe = math.random(1,7) + local kW, kH = math.random(1,8), math.random(1,8) + local input = torch.Tensor(nframe, ini, kW, kH):zero() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + -- IO + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.RReLU() + local nframe = math.random(1,7) + local size = math.random(1,7) + local kW, kH = math.random(1,8), math.random(1,8) + local input = torch.Tensor(nframe, size, kW, kH):zero() + + local l = 1/math.random(5,8) + local u = 1/math.random(3,5) + + -- test in evaluation mode (not inplace), RReLU behaves like LeakyReLU + local module = nn.RReLU(l, u, false) + mytester:assert(module.train, 'default mode ') + module:evaluate() + + -- gradient check + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + -- IO + local ferr,berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test training and evalation mode + for _,train in ipairs({true,false}) do + -- test with separate output buffer and inplace + for _,inplace in ipairs({false,true}) do + module = nn.RReLU(l, u, inplace) + if train then + module:training() + else + module:evaluate() + end + input = torch.rand(nframe, size, kW, kH) - 0.5 + input:storage()[1] = -1 + local original_input = input:clone() + local output = module:forward(input) + mytester:assert(output:sign():eq(original_input:sign()):all(), 'sign flipped forward ') + local gradOutput = torch.ones(output:size()) + local gradInput = module:backward(input, gradOutput) + mytester:assert(gradInput:gt(0):eq(input:ne(0)):all(), 'gradient ') + mytester:assert(gradInput:lt(1):eq(input:le(0)):all(), 'backward negative inputs ') + mytester:assert(gradInput:eq(1):eq(input:gt(0)):all(), 'backward positive inputs ') + if not train then + local err = gradInput[input:le(0)]:mean()-(module.lower+module.upper)/2 + mytester:assertlt(err, precision, 'error on gradient ') + end + + input = -torch.rand(1000) + module:forward(input) -- fill internal noise tensor + local g = module:backward(input, torch.ones(1000)) + local err = math.abs(g[input:le(0)]:mean()-(module.lower+module.upper)/2) + mytester:assertlt(err, 0.05, 'mean deviation of gradient for negative inputs ') + end + end +end + +function nntest.LeakyReLU() + local input = torch.randn(3,4) + local gradOutput = torch.randn(3,4) + local negval = math.random() + local module = nn.LeakyReLU(negval) + local output = module:forward(input) + local output2 = input:clone():gt(input, 0):cmul(input) + input:clone():le(input,0):cmul(input) * module.negval + mytester:assertTensorEq(output, output2, 0.000001, 'LeakyReLU output') + local gradInput = module:backward(input, gradOutput) + local gradInput2 = input:clone():gt(input, 0):cmul(gradOutput) + input:clone():le(input,0):cmul(gradOutput) * module.negval + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'LeakyReLU gradInput') +end + +function nntest.LeakyReLUIP() + local input = torch.randn(3,4) + local gradOutput = torch.randn(3,4) + local negval = math.random() + local module = nn.LeakyReLU(negval,true) + local output = input:clone():gt(input, 0):cmul(input) + input:clone():le(input,0):cmul(input) * module.negval + local output2 = module:forward(input) + mytester:assertTensorEq(output2, output, 0.000001, 'LeakyReLU output') + local gradInput = input:clone():gt(input, 0):cmul(gradOutput) + input:clone():le(input,0):cmul(gradOutput) * module.negval + local gradInput2 = module:backward(input, gradOutput) + mytester:assertTensorEq(gradInput2, gradInput, 0.000001, 'LeakyReLU gradInput') +end + +function nntest.HardShrink() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.HardShrink(math.random()/2) + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SoftShrink() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.SoftShrink(math.random()/2) + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Power() + local in1 = torch.rand(5,7) + local module = nn.Power(2) + local out = module:forward(in1) + local err = out:dist(in1:cmul(in1)) + mytester:assertlt(err, 1e-15, torch.typename(module) .. ' - forward err ') + + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local pw = torch.uniform()*math.random(1,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Power(pw) + + local err = nn.Jacobian.testJacobian(module, input, 0.1, 2) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module,input, 0.1, 2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Normalize() + -- compare forward against torch implementation + -- and check gradient + for _,p in pairs({1,2,3,4,1.5}) do + local ini = math.random(3,10) + local input = torch.randn(ini) + local module = nn.Normalize(p) + local out = module:forward(input) + local expected = torch.div(input,input:norm(p)) + mytester:assertTensorEq(out, expected, 1e-7, + torch.typename(module) ..' (' .. p ..') - forward err ') + + local err = jac.testJacobian(module, input, -2, 2) + mytester:assertlt(err, precision, 'error norm '..p..' on state ') + end + + -- batch mode + for _,p in pairs({1,2,3,4,torch.uniform()*math.random(1,10),math.huge}) do + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(inj, ini):zero() + + local module = nn.Normalize(p) + + local err = jac.testJacobian(module, input, -2, 2) + mytester:assertlt(err, precision, 'error norm '..p..' on state ') + end + + -- test IO correctness + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(inj, ini):zero() + + local module = nn.Normalize(2) + + local ferr, berr = jac.testIO(module,input, 0.1, 2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + +end + +function nntest.Square() + local in1 = torch.rand(5,7) + local module = nn.Square() + local out = module:forward(in1) + local err = out:dist(in1:cmul(in1)) + mytester:assertlt(err, 1e-15, torch.typename(module) .. ' - forward err ') + + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Square() + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Sqrt() + local in1 = torch.rand(5,7) + local module = nn.Sqrt() + local out = module:forward(in1) + local err = out:dist(in1:sqrt()) + mytester:assertlt(err, 1e-15, torch.typename(module) .. ' - forward err ') + + -- Test zero inputs; we will avoid a div-by-zero by setting to zero + local zin = torch.DoubleTensor(5, 7):zero() + module:forward(zin) + local zgradout = torch.rand(5, 7) + local zgradin = module:backward(zin, zgradout) + mytester:assertTensorEq(zgradin, torch.DoubleTensor(5, 7):zero(), 0.000001, "error in sqrt backward singularity") + + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Sqrt() + + local err = nn.Jacobian.testJacobian(module, input, 0.1, 2) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input, 0, 2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Linear() + local ini = math.random(3,5) + local inj_vals = {math.random(3,5), 1} -- Also test the inj = 1 spatial case + local input = torch.Tensor(ini):zero() + + for ind, inj in pairs(inj_vals) do + local module = nn.Linear(ini,inj) + + local function jacTests(module) + -- 1D + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on bias ') + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update] ') + end + + nn.hessian.enable() + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + if module.bias then + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diagHessianBias') + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + end + + -- 2D + local nframe = math.random(50,70) + local input = torch.Tensor(nframe, ini):zero() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on bias ') + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update] ') + end + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + if module.bias then + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diag HessianBias') + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + end + + -- IO + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + end + + jacTests(module) + module:noBias() + jacTests(module) + module.bias = torch.Tensor(inj):zero() + module.gradBias = torch.Tensor(inj):zero() + module:reset() + jacTests(module) + end -- for ind, inj in pairs(inj_vals) do +end + +local function test_sparse_linear(inb, ini, inj, numNonzero) + local module = nn.SparseLinear(ini,inj, true) + local linear = nn.Linear(ini, inj) + linear.weight = module.weight:clone() + linear.bias = module.bias:clone() + module:zeroGradParameters() + linear:zeroGradParameters() + + -- Create a random sparse vector + local input = {} + local nonsparse = torch.zeros(inb, ini) + for i=1,inb do + local nnz = math.random(1, 3) + numNonzero + local inds = torch.randperm(ini)[{{1,nnz}}] + input[i] = torch.Tensor(nnz, 2) + input[i]:select(2,1):copy(inds) + input[i]:select(2,2):copy(torch.rand(nnz)) + nonsparse[i]:scatter(1, input[i]:select(2,1):long(), input[i]:select(2,2)) + end + local gradOutput = torch.rand(inb, inj) + + local cmps = {'weight', 'bias', 'gradWeight', 'gradBias'} + + -- Check output wrt linear, non-batch + local actual = module:forward(input[1]) + local expected = linear:forward(nonsparse[1]) + local actualgi = module:backward(input[1], gradOutput[1]) + local expectedgi = linear:backward(nonsparse[1], gradOutput[1]) + module:updateParameters(1) + linear:updateParameters(1) + local err = (expected - actual):abs():max() + local gierr = (expectedgi - actualgi[1]:select(2,2)):abs():max() + mytester:assertle(err, precision, 'error on result') + mytester:assertle(gierr, precision, 'error on gradInput') + + for _,var in ipairs(cmps) do + local err = (module[var] - linear[var]):abs():max() + mytester:assertle(err, precision, 'error on '..var) + end + module:zeroGradParameters() + linear:zeroGradParameters() + + -- Check output wrt linear, batch + -- doing this n times checks for fast last input param updates + local test_n_times = function(ntimes) + local actual, expected, actualgi, expectedgi + for i=1, ntimes do + actual = module:forward(input) + expected = linear:forward(nonsparse) + actualgi = module:backward(input, gradOutput) + expectedgi = linear:backward(nonsparse, gradOutput) + end + module:updateParameters(1) + linear:updateParameters(1) + local err = (expected - actual):abs():max() + local gicheck = torch.Tensor():resizeAs(expectedgi) + for i=1,#actualgi do gicheck[i]:copy(actualgi[i]:select(2,2)) end + local gierr = (expectedgi - gicheck):abs():max() + mytester:assertle(err, precision, 'error on result with ntimes = '..ntimes) + mytester:assertle(gierr, precision, 'error on gradInput with ntimes = '..ntimes) + + for _,var in ipairs(cmps) do + local err = (module[var] - linear[var]):abs():max() + mytester:assertle(err, precision, 'error on '..var..' with ntimes = '..ntimes) + end + + module:zeroGradParameters() + linear:zeroGradParameters() + mytester:assertle(module.gradWeight:sum(), precision, 'error zeroing gradweight') + mytester:assertle(module.gradBias:sum(), precision, 'error zeroing gradweight') + + end + + test_n_times(1) + test_n_times(2) + test_n_times(3) + + -- legacy batch mode + local batch = math.random(2,5) + + local input = torch.Tensor(batch, numNonzero, 2):zero() + for k=1,batch do + local N = {} + for i = 1, ini do N[i] = i end + for i = 1, numNonzero do + local j = math.random(i,ini) + N[i], N[j] = N[j], N[i] + end + for i = 1, numNonzero do input[{k,i,1}] = N[i] end + end + local values = input:select(3,2) + values:copy(torch.rand(values:nElement())):mul(2):add(-1) + + -- Check output + local actual = module:forward(input):clone() + local expected = torch.Tensor(batch, inj) + for k = 1, batch do + expected[k]:copy(module:forward(input[k])) + end + local err = (expected - actual):abs():max() + mytester:assertle(err, precision, 'error on batch result forward') +end + +function nntest.SparseLinear() + local inb = math.random(5,10) + local ini = math.random(50,100) + local inj = math.random(5,10) + local numNonzero = math.random(3,5) + + test_sparse_linear(inb, ini, inj, numNonzero) + -- Tests OMP parallelism + test_sparse_linear(1, 50000, 10, 20000) + test_sparse_linear(1000, 1000, 10, 100) +end + +local function testIndexLinear(bsize, iSize, oSize, nnz) + local inb = bsize + local ini = iSize + local inj = oSize + + local ilinear = nn.IndexLinear(ini,inj, true, nil, nil, nil, false) + local ilinear2 = nn.IndexLinear(ini,inj, true, nil, nil, nil, false) + local linear = nn.Linear(ini, inj) + ilinear.weight:zero() + ilinear.weight:copy(linear.weight:t():clone()) + ilinear.bias = linear.bias:clone() + ilinear:zeroGradParameters() + + ilinear2.weight:zero() + ilinear2.weight:copy(linear.weight:t():clone()) + ilinear2.bias = linear.bias:clone() + ilinear2:zeroGradParameters() + + linear:zeroGradParameters() + + -- Create a random sparse vector + local input = {{},{}} + local flatInput = {torch.LongTensor(), torch.Tensor(), torch.LongTensor()} + local nonsparse = torch.zeros(inb, ini) + local sizes = flatInput[3] + sizes:resize(inb) + for i=1,inb do + sizes[i] = nnz + input[1][i] = torch.randperm(ini)[{{1,nnz}}]:long() + input[2][i] = torch.ones(nnz):uniform() + nonsparse[i]:scatter(1, input[1][i], input[2][i]) + end + flatInput[1]:cat(input[1]) + flatInput[2]:cat(input[2]) + + local gradOutput = torch.rand(inb, inj) + local cmps = {'weight', 'bias', 'gradBias'} + -- Check output wrt linear, non-batch + local actual = ilinear:forward({input[1][1], input[2][1]}) + local actual2 = ilinear2:forward({input[1][1], input[2][1], flatInput[3][1]}) + local expected = linear:forward(nonsparse[1]) + + local actualgi = ilinear:backward({input[1][1], input[2][1]}, gradOutput[1]) + local actualgi2 = ilinear2:backward({input[1][1], input[2][1], flatInput[3][1]}, gradOutput[1]) + local expectedgi = linear:backward(nonsparse[1], gradOutput[1]) + + ilinear:updateParameters(1) + ilinear2:updateParameters(1) + linear:updateParameters(1) + + local err = (expected - actual):abs():max() + local err2 = (expected - actual2):abs():max() + + local gierr = (expectedgi - actualgi[2]):abs():max() + local gierr2 = (expectedgi - actualgi2[2]):abs():max() + + mytester:assertle(err, precision, 'error on result for tensor array') + mytester:assertle(gierr, precision, 'error on gradInput for tensor array') + + mytester:assertle(err2, precision, 'error on result for batched tensor') + mytester:assertle(gierr2, precision, 'error on gradInput for batched tensor') + + for _,var in ipairs(cmps) do + local err, err2 + if var == 'weight' then + err = (ilinear[var]:t() - linear[var]):abs():max() + err2 = (ilinear2[var]:t() - linear[var]):abs():max() + else + err = (ilinear[var] - linear[var]):abs():max() + err2 = (ilinear2[var] - linear[var]):abs():max() + end + mytester:assertle(err, precision, 'error on '..var..' for tensor array') + mytester:assertle(err2, precision, 'error on '..var..' for batched tensor') + end + ilinear:zeroGradParameters() + ilinear2:zeroGradParameters() + linear:zeroGradParameters() + + -- Check output wrt linear, batch + -- doing this n times checks for fast last input param updates + local test_n_times = function(ntimes) + local actual, expected, actualgi, expectedgi + for i=1, ntimes do + actual = ilinear:forward(input) + actual2 = ilinear2:forward(flatInput) + expected = linear:forward(nonsparse) + + actualgi = ilinear:backward(input, gradOutput) + actualgi2 = ilinear2:backward(flatInput, gradOutput) + expectedgi = linear:backward(nonsparse, gradOutput) + end + ilinear:updateParameters(1) + ilinear2:updateParameters(1) + linear:updateParameters(1) + + local err = (expected - actual):abs():max() + local err2 = (expected - actual2):abs():max() + + local gicheck = torch.Tensor():resizeAs(expectedgi) + local gicheck2 = actualgi2[2] + + for i=1,#actualgi[2] do + gicheck[i]:copy(actualgi[2][i]) + end + local gierr = (expectedgi - gicheck):abs():max() + local gierr2 = (expectedgi - gicheck2):abs():max() + + mytester:assertle(err, precision, 'error on result for tensor array with ntimes = '..ntimes) + mytester:assertle(err2, precision, 'error on result for batched tensor with ntimes = '..ntimes) + + mytester:assertle(gierr, precision, 'error on gradInput for tensor array with ntimes = '..ntimes) + mytester:assertle(gierr2, precision, 'error on gradInput for batched tensor with ntimes = '..ntimes) + + for _,var in ipairs(cmps) do + local err, err2 + if var == 'weight' then + err = (ilinear[var]:t() - linear[var]):abs():max() + err2 = (ilinear2[var]:t() - linear[var]):abs():max() + else + err = (ilinear[var] - linear[var]):abs():max() + err2 = (ilinear2[var] - linear[var]):abs():max() + end + mytester:assertle(err, precision, 'error on '..var..' for tensor array') + mytester:assertle(err2, precision, 'error on '..var..' for batched tensor') + end + + ilinear:zeroGradParameters() + ilinear2:zeroGradParameters() + linear:zeroGradParameters() + mytester:assertle(ilinear.gradBias:sum(), precision, 'error zeroing gradbias for tensor array') + mytester:assertle(ilinear2.gradBias:sum(), precision, 'error zeroing gradbias for batched tensor') + end + test_n_times(1) + test_n_times(2) + test_n_times(3) +end + +function nntest.IndexLinear() + testIndexLinear(4, 40 , 10, 30) + testIndexLinear(4, 40 , 500, 30) + testIndexLinear(4, 200000 , 5, 150000) + + local sizes = { + {osize = 1, isize = 10000, nnz = 10000, bsize = 16}, + {osize = 10, isize = 10000, nnz = 10000, bsize = 16}, + {osize = 100, isize = 10000, nnz = 10000, bsize = 16}, + + {osize = 1, isize = 10000, nnz = 200000, bsize = 1}, + {osize = 10, isize = 10000, nnz = 200000, bsize = 1}, + {osize = 100, isize = 10000, nnz = 200000, bsize = 1}, + + {osize = 1, isize = 10000, nnz = 200000, bsize = 2}, + {osize = 10, isize = 10000, nnz = 200000, bsize = 2}, + {osize = 100, isize = 10000, nnz = 200000, bsize = 2}, + } + + for i, lsizes in ipairs(sizes) do + -- Test multithreaded updates + local isize = lsizes.isize + local osize = lsizes.osize + local il = nn.IndexLinear(isize, osize) + local batch = {{},{}} + local idx = 100 + local nnz = lsizes.nnz + local bsize = lsizes.bsize + for i=1,bsize do + batch[1][i] = torch.LongTensor(nnz):fill(idx) + batch[2][i] = torch.DoubleTensor(nnz):fill(1) + end + local totalSize = bsize*nnz + local lr = 0.01 + -- Update the same index all over + local out = il:updateOutput(batch) + out:fill(1) + il:backwardUpdate(batch, out, lr) + il:backward(batch, out, 1) + il:updateParameters(lr) + for i=1,osize do + mytester:assertlt(math.abs(il.weight[idx][i] + totalSize * lr * 2), precision, 'parameters update was wrong.') + end + end +end + +function nntest.Bilinear() + + -- set up data: + local N = 10 + local D1 = 5 + local D2 = 4 + local K = 3 + local input = {torch.randn(N, D1), torch.randn(N, D2)} + local target = torch.randn(N, K) + + -- test forward + local module = nn.Bilinear(D1, D2, K) + local expected = torch.zeros(N,K) + for k = 1, K do + local temp = torch.mm(module.weight[k], input[2]:t()) + temp:cmul(input[1]:t()) + temp = temp:sum(1) + temp:add(module.bias[k]) + expected[{{},k}] = temp:view(-1) + end + local output = module:forward(input) + mytester:assertTensorEq(expected, output, 0.000001, 'Bilinear forward 2D err') + + -- For testing grads we'll follow the nn.DotProduct strategy of using a SplitTable + local input2 = torch.randn(2, N, D1) + local module2 = nn.Sequential() + module2:add(nn.SplitTable(1)) + module2:add(nn.ParallelTable():add(nn.Linear(D1,D1)):add(nn.Linear(D1,D2))) + module2:add(nn.Bilinear(D1, D2, K)) + module2:add(nn.Linear(K,1)) + + local err = jac.testJacobian(module2, input2) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module2, input2, module2:get(3).weight, module2:get(3).gradWeight) + mytester:assertlt(err, precision, 'error on weight ') + + local err = jac.testJacobianParameters(module2, input2, module2:get(3).bias, module2:get(3).gradBias) + mytester:assertlt(err, precision, 'error on bias ') + +end + +function nntest.PartialLinear() + + -- settings for experiment: + local N = 10 + local D = 5 + local K = 15 + + -- test forward-backward pass of module: + local module = nn.PartialLinear(D, K) + for sub_K = 1,K do + + -- get random test case: + local input = torch.randn(N, D) + local partition = torch.randperm(K):narrow(1, 1, sub_K) + + -- do forward-backward pass: + module:setPartition(partition) + module:forward(input) + mytester:asserteq(module.output:size(1), N) + mytester:asserteq(module.output:size(2), sub_K) + module:backward(input, torch.ones(N, sub_K)) + mytester:asserteq(module.gradInput:size(1), input:size(1)) + mytester:asserteq(module.gradInput:size(2), input:size(2)) + + -- do parameter update: + local lr = .01 + module:updateParameters(lr) + end + module:resetPartition() + + -- compare output with linear layer: + local module2 = nn.Linear(D, K) + module2.weight:copy(module.network:get(1):get(2).weight) + module2.bias:fill(0) + if module.bias then module2.bias:copy(module.bias) end + local input = torch.randn(N, D) + local diff = (module:forward(input) - module2:forward(input)):abs():sum() + mytester:assertlt(diff, 1e-7) + + -- gradient checks: + local sub_K = 5 + local partition = torch.randperm(K):narrow(1, 1, sub_K) + module:setPartition(partition) + local err = sjac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = sjac.testJacobianParameters(module, input, module.network:get(1):get(2).weight, module.network:get(1):get(2).gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = sjac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on bias ') + + local err = sjac.testJacobianUpdateParameters(module, input, module.network:get(1):get(2).weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + local err = sjac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update] ') + + local ferr, berr = sjac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Euclidean() + local ini = math.random(5,7) + local inj = math.random(5,7) + local input = torch.randn(ini) + local gradOutput = torch.randn(inj) + local module = nn.Euclidean(ini,inj) + local output = module:forward(input):clone() + + local output2 = torch.Tensor(inj):zero() + for o = 1,module.weight:size(2) do + output2[o] = input:dist(module.weight:select(2,o)) + end + mytester:assertTensorEq(output, output2, 0.000001, 'Euclidean forward 1D err') + + local input2 = torch.randn(8, ini) + input2[2]:copy(input) + local output2 = module:forward(input2) + mytester:assertTensorEq(output2[2], output, 0.000001, 'Euclidean forward 2D err') + + local output = module:forward(input):clone() + module:zeroGradParameters() + local gradInput = module:backward(input, gradOutput, 1):clone() + local gradInput2 = torch.zeros(ini) + local temp = input:clone() + for o = 1,module.weight:size(2) do + temp:copy(input) + temp:add(-1,module.weight:select(2,o)) + temp:mul(gradOutput[o]/output[o]) + gradInput2:add(temp) + end + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'Euclidean updateGradInput 1D err') + + local gradWeight = module.gradWeight:clone():zero() + for o = 1,module.weight:size(2) do + temp:copy(module.weight:select(2,o)):add(-1,input) + temp:mul(gradOutput[o]/output[o]) + gradWeight:select(2,o):add(1, temp) + end + mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'Euclidean accGradParameters 1D err') + + local input2 = input:view(1, -1):repeatTensor(8, 1) + local gradOutput2 = gradOutput:view(1, -1):repeatTensor(8, 1) + local output2 = module:forward(input2) + module:zeroGradParameters() + local gradInput2 = module:backward(input2, gradOutput2, 1/8) + mytester:assertTensorEq(gradInput2[2], gradInput, 0.000001, 'Euclidean updateGradInput 2D err') + + mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'Euclidean accGradParameters 2D err') + + input:zero() + module.fastBackward = false + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.WeightedEuclidean() + local ini = math.random(5,7) + local inj = math.random(5,7) + local input = torch.randn(ini) + local gradOutput = torch.randn(inj) + local module = nn.WeightedEuclidean(ini,inj) + + local output = module:forward(input):clone() + + local output2 = torch.Tensor(inj):zero() + local temp = input:clone() + for o = 1,module.weight:size(2) do + temp:copy(input):add(-1,module.weight:select(2,o)) + temp:cmul(temp) + temp:cmul(module.diagCov:select(2,o)):cmul(module.diagCov:select(2,o)) + output2[o] = math.sqrt(temp:sum()) + end + mytester:assertTensorEq(output, output2, 0.000001, 'WeightedEuclidean forward 1D err') + + local input2 = torch.randn(8, ini) + input2[2]:copy(input) + local output2 = module:forward(input2) + mytester:assertTensorEq(output2[2], output, 0.000001, 'WeightedEuclidean forward 2D err') + + local output = module:forward(input):clone() + module:zeroGradParameters() + local gradInput = module:backward(input, gradOutput, 1):clone() + local gradInput2 = torch.zeros(ini) + for o = 1,module.weight:size(2) do + temp:copy(input) + temp:add(-1,module.weight:select(2,o)) + temp:cmul(module.diagCov:select(2,o)):cmul(module.diagCov:select(2,o)) + temp:mul(gradOutput[o]/output[o]) + gradInput2:add(temp) + end + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'WeightedEuclidean updateGradInput 1D err') + + local gradWeight = module.gradWeight:clone():zero() + local gradDiagCov = module.gradDiagCov:clone():zero() + for o = 1,module.weight:size(2) do + if output[o] ~= 0 then + temp:copy(module.weight:select(2,o)):add(-1,input) + temp:cmul(module.diagCov:select(2,o)):cmul(module.diagCov:select(2,o)) + temp:mul(gradOutput[o]/output[o]) + gradWeight:select(2,o):add(temp) + + temp:copy(module.weight:select(2,o)):add(-1,input) + temp:cmul(temp) + temp:cmul(module.diagCov:select(2,o)) + temp:mul(gradOutput[o]/output[o]) + gradDiagCov:select(2,o):add(temp) + end + end + mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'WeightedEuclidean accGradParameters gradWeight 1D err') + mytester:assertTensorEq(gradDiagCov, module.gradDiagCov, 0.000001, 'WeightedEuclidean accGradParameters gradDiagCov 1D err') + + local input2 = input:view(1, -1):repeatTensor(8, 1) + local gradOutput2 = gradOutput:view(1, -1):repeatTensor(8, 1) + local output2 = module:forward(input2) + module:zeroGradParameters() + local gradInput2 = module:backward(input2, gradOutput2, 1/8) + mytester:assertTensorEq(gradInput2[2], gradInput, 0.000001, 'WeightedEuclidean updateGradInput 2D err') + + mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'WeightedEuclidean accGradParameters gradWeight 2D err') + mytester:assertTensorEq(gradDiagCov, module.gradDiagCov, 0.000001, 'WeightedEuclidean accGradParameters gradDiagCov 2D err') + + input:zero() + module.fastBackward = false + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.diagCov, module.gradDiagCov) + mytester:assertlt(err,precision, 'error on bias ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + input:zero() + module:zeroGradParameters() + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.diagCov, module.gradDiagCov) + mytester:assertlt(err,precision, 'error on bias ') + + local ferr,berr = jac.testIO(module,input2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +local function criterionJacobianTest(cri, input, target) + local eps = 1e-6 + local _ = cri:forward(input, target) + local dfdx = cri:backward(input, target) + -- for each input perturbation, do central difference + local centraldiff_dfdx = torch.Tensor():resizeAs(dfdx) + local input_s = input:storage() + local centraldiff_dfdx_s = centraldiff_dfdx:storage() + for i=1,input:nElement() do + -- f(xi + h) + input_s[i] = input_s[i] + eps + local fx1 = cri:forward(input, target) + -- f(xi - h) + input_s[i] = input_s[i] - 2*eps + local fx2 = cri:forward(input, target) + -- f'(xi) = (f(xi + h) - f(xi - h)) / 2h + local cdfx = (fx1 - fx2) / (2*eps) + -- store f' in appropriate place + centraldiff_dfdx_s[i] = cdfx + -- reset input[i] + input_s[i] = input_s[i] + eps + end + + -- compare centraldiff_dfdx with :backward() + local err = (centraldiff_dfdx - dfdx):abs():max() + mytester:assertlt(err, precision, 'error in difference between central difference and :backward') +end + +local function criterionJacobianTest1DTable(cri, input0, target) + -- supposes input is a tensor, which is splitted in the first dimension + local input = input0:split(1,1) + for i=1,#input do + input[i] = input[i][1] + end + local eps = 1e-6 + local _ = cri:forward(input, target) + local dfdx = cri:backward(input, target) + -- for each input perturbation, do central difference + local centraldiff_dfdx = torch.Tensor():resizeAs(input0) + local input_s = input0:storage() + local centraldiff_dfdx_s = centraldiff_dfdx:storage() + for i=1,input0:nElement() do + -- f(xi + h) + input_s[i] = input_s[i] + eps + local fx1 = cri:forward(input, target) + -- f(xi - h) + input_s[i] = input_s[i] - 2*eps + local fx2 = cri:forward(input, target) + -- f'(xi) = (f(xi + h) - f(xi - h)) / 2h + local cdfx = (fx1 - fx2) / (2*eps) + -- store f' in appropriate place + centraldiff_dfdx_s[i] = cdfx + -- reset input[i] + input_s[i] = input_s[i] + eps + end + local centraldiff_dfdx_t = centraldiff_dfdx:split(1,1) + for i=1,#centraldiff_dfdx_t do + centraldiff_dfdx_t[i] = centraldiff_dfdx_t[i][1] + end + for i=1,#centraldiff_dfdx_t do + -- compare centraldiff_dfdx with :backward() + local err = (centraldiff_dfdx_t[i] - dfdx[i]):abs():max() + mytester:assertlt(err, precision, 'error in difference between central difference and :backward') + end +end + +function nntest.SmoothL1Criterion() + local input = torch.rand(10) + local target = input:clone():add(torch.rand(10)) + local cri = nn.SmoothL1Criterion() + criterionJacobianTest(cri, input, target) +end + +function nntest.MSECriterion() + local input = torch.rand(10) + local target = input:clone():add(torch.rand(10)) + local cri = nn.MSECriterion() + criterionJacobianTest(cri, input, target) +end + +function nntest.SpatialAutoCropMSECriterion() + -- Tests the assumptions on input and target dimensions for the + -- nn.SpatialAutoCropMSECriterion criterion + local function testInputBounds() + for _, average in pairs({true, false}) do + local sMSE = nn.SpatialAutoCropMSECriterion(average) + + local input = torch.Tensor(3, 3, 3) + local target = torch.Tensor(4, 3, 3) + mytester:assertError(function() sMSE:forward(input, target) end, + "Target and input must have same number of channels") + + input = torch.Tensor(2, 4, 3, 3) + target = torch.Tensor(2, 3, 3, 3) + mytester:assertError(function() sMSE:forward(input, target) end, + "Target and input must have same number of channels") + + input = torch.Tensor(2, 3, 3, 3) + target = torch.Tensor(1, 3, 3, 3) + mytester:assertError(function() sMSE:forward(input, target) end, + "Target and input must have same batch size") + + input = torch.Tensor(2, 5, 5) + target = torch.Tensor(2, 5, 4) + mytester:assertError(function() sMSE:forward(input, target) end, + "input resolution must be smaller or equal to the spatial resolution of the target") + + input = torch.Tensor(1, 2, 5, 5) + target = torch.Tensor(1, 2, 4, 5) + mytester:assertError(function() sMSE:forward(input, target) end, + "input resolution must be smaller or equal to the spatial resolution of the target") + end + end + + -- Tests that the forward pass of nn.SpatialAutoCropMSECriterion + -- is equivalent to the forward pass of nn.MSECriterion with a pre-cropped target + local function testSpatialAutoCropMSECriterionBatched() + for _, average in pairs({true, false}) do + local sMSE = nn.SpatialAutoCropMSECriterion(average) + local MSE = nn.MSECriterion(average) + + local batchSize = math.random(1,10) + local channels = math.random(1,10) + local inputHeight = math.random(1, 50) + local inputWidth = math.random(1, 50) + local targetHeight = inputHeight + math.random(0,5) + local targetWidth = inputWidth + math.random(0,5) + + local input = torch.Tensor(batchSize, channels, inputHeight, inputWidth):uniform() + local target = torch.Tensor(batchSize, channels, targetHeight, targetWidth):uniform() + + local heightStartIdx = 1 + math.floor((targetHeight - inputHeight)/2.0) + local heightEndIdx = heightStartIdx + inputHeight - 1 + local widthStartIdx = 1 + math.floor((targetWidth - inputWidth)/2.0) + local widthEndIdx = widthStartIdx + inputWidth - 1 + + local croppedTarget = target[{{}, {}, {heightStartIdx, heightEndIdx}, {widthStartIdx, widthEndIdx}}] + + local sMSEOut = nn.SpatialAutoCropMSECriterion(average):forward(input, target) + local MSEOut = MSE:forward(input, croppedTarget) + mytester:asserteq(sMSEOut, MSEOut) + + local gradOutput = torch.Tensor():resizeAs(croppedTarget):uniform() + local sMSEGradInput = sMSE:backward(input, gradOutput) + local MSEGradInput = MSE:backward(input, gradOutput) + mytester:assertTensorEq(sMSEGradInput, MSEGradInput, 1e-7) + criterionJacobianTest(sMSE, input, gradOutput) + end + end + + local function testSpatialAutoCropMSECriterionNonBatched() + for _, average in pairs({true, false}) do + local sMSE = nn.SpatialAutoCropMSECriterion(average) + local MSE = nn.MSECriterion(average) + + local channels = math.random(1,10) + local inputHeight = math.random(1, 50) + local inputWidth = math.random(1, 50) + local targetHeight = inputHeight + math.random(0,5) + local targetWidth = inputWidth + math.random(0,5) + + local input = torch.Tensor(channels, inputHeight, inputWidth):uniform() + local target = torch.Tensor(channels, targetHeight, targetWidth):uniform() + + local heightStartIdx = 1 + math.floor((targetHeight - inputHeight)/2.0) + local heightEndIdx = heightStartIdx + inputHeight - 1 + local widthStartIdx = 1 + math.floor((targetWidth - inputWidth)/2.0) + local widthEndIdx = widthStartIdx + inputWidth - 1 + + local croppedTarget = target[{{}, {heightStartIdx, heightEndIdx}, {widthStartIdx, widthEndIdx}}] + + local sMSEOut = nn.SpatialAutoCropMSECriterion(average):forward(input, target) + local MSEOut = MSE:forward(input, croppedTarget) + mytester:asserteq(sMSEOut, MSEOut) + + local gradOutput = torch.Tensor():resizeAs(croppedTarget):uniform() + local sMSEGradInput = sMSE:backward(input, gradOutput) + local MSEGradInput = MSE:backward(input, gradOutput) + mytester:assertTensorEq(sMSEGradInput, MSEGradInput, 1e-7) + criterionJacobianTest(sMSE, input, gradOutput) + end + end + + testInputBounds() + testSpatialAutoCropMSECriterionBatched() + testSpatialAutoCropMSECriterionNonBatched() +end + +function nntest.ClassSimplexCriterion() + local nClasses = torch.random(3,15) + local input = torch.rand(nClasses) + local target = torch.random(1,nClasses) + local cri = nn.ClassSimplexCriterion(nClasses) + criterionJacobianTest(cri, input, target) +end + + +function nntest.MarginCriterion() + local input = torch.rand(100) + local target = input:clone():add(torch.rand(100)) + local cri = nn.MarginCriterion() + criterionJacobianTest(cri, input, target) +end + +function nntest.SoftMarginCriterion() + local input = torch.rand(100) + local target = input:clone():add(torch.rand(100)) + local cri = nn.SoftMarginCriterion() + criterionJacobianTest(cri, input, target) +end + +function nntest.MultiMarginCriterion() + local input = torch.rand(100) + local target = math.random(1,100) + local cri = nn.MultiMarginCriterion(math.random(1,2), nil, 0.1) + criterionJacobianTest(cri, input, target) + + local cri = nn.MultiMarginCriterion() + criterionJacobianTest(cri, input, target) + + local cri = nn.MultiMarginCriterion(2) + criterionJacobianTest(cri, input, target) + + local weights = torch.randn(100) + local cri = nn.MultiMarginCriterion(1, weights) +end + +function nntest.MarginRankingCriterion() + local input = {torch.rand(1), torch.rand(1)} + local mrc = nn.MarginRankingCriterion() + local output = mrc:forward(input, 1) + local gradInput = mrc:backward(input, 1) + -- cast to float + local input2 = {input[1]:float(), input[2]:float()} + local mrc2 = mrc:clone():float() + local output2 = mrc2:forward(input2, 1) + local gradInput2 = mrc2:backward(input2, 1) + mytester:assert(math.abs(output2 - output) < 0.00001, "MRC:type() forward error") + mytester:assertTensorEq(gradInput[1]:float(), gradInput2[1], 0.00001, "MRC:type() backward error 1") + mytester:assert(torch.type(gradInput2[1]) == 'torch.FloatTensor', "MRC:type() error 1") + mytester:assertTensorEq(gradInput[2]:float(), gradInput2[2], 0.00001, "MRC:type() backward error 2") + mytester:assert(torch.type(gradInput2[2]) == 'torch.FloatTensor', "MRC:type() error 2") + + -- batch, sizeAverage true, jacobian + local margin = math.random() * 2 - 1 + local batch_size = math.random(1,10) + local crit = nn.MarginRankingCriterion(margin) + crit.sizeAverage = true + local v = torch.rand(2, batch_size) + local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1) + criterionJacobianTest1DTable(crit,v,t) + + -- batch, sizeAverage false, jacobian + local margin = math.random() * 2 - 1 + local crit = nn.MarginRankingCriterion(margin) + crit.sizeAverage = false + local v = torch.rand(2, batch_size) + local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1) + criterionJacobianTest1DTable(crit,v,t) +end + +function nntest.ModuleCriterion() + local input = torch.randn(8,4) + local target = torch.randn(8,4) + local inputModule = nn.Tanh() + local criterion = nn.MSECriterion() + local mc = nn.ModuleCriterion(criterion, inputModule) + + local err = mc:forward(input, target) + local gradInput = mc:backward(input, target) + + local output = inputModule:forward(input) + local err2 = criterion:forward(output, target) + local gradOutput = criterion:backward(output, target) + local gradInput2 = inputModule:backward(input, gradOutput) + + mytester:assert(err == err2, "ModuleCriterion backward err") + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "ModuleCriterion backward err") +end + +function nntest.MaskedSelect() + local input = torch.randn(4, 5) + local mask = torch.ByteTensor(4, 5):bernoulli() + local module = nn.MaskedSelect() + local out = module:forward({input, mask}) + local err = out:dist(input:maskedSelect(mask)) + mytester:assertlt(err, 1e-15, torch.typename(module) .. ' - forward err ') + + local gradOut = torch.Tensor({20, 80}) + input = torch.Tensor({{10, 20}, {30, 40}}) + local inTarget = torch.Tensor({{20, 0}, {0, 80}}) + local mask = torch.ByteTensor({{1, 0}, {0, 1}}) + local module = nn.MaskedSelect() + module:forward({input, mask}) + local gradIn = module:backward({input, mask}, gradOut) + mytester:assertTensorEq(inTarget, gradIn[1], 1e-15, torch.typename(module) .. ' - backward err ') +end + +function nntest.ParallelCriterion() + local input = {torch.rand(2,10), torch.randn(2,10)} + local target = {torch.IntTensor{1,8}, torch.randn(2,10)} + local nll = nn.ClassNLLCriterion() + local mse = nn.MSECriterion() + local pc = nn.ParallelCriterion():add(nll, 0.5):add(mse) + local output = pc:forward(input, target) + local output2 = nll:forward(input[1], target[1])/2 + mse:forward(input[2], target[2]) + mytester:assert(math.abs(output2 - output) < 0.00001, "ParallelCriterion forward error") + local gradInput2 = {nll:backward(input[1], target[1]):clone():div(2), mse:backward(input[2], target[2])} + local gradInput = pc:backward(input, target) + mytester:assertTensorEq(gradInput[1], gradInput2[1], 0.000001, "ParallelCriterion backward error 1") + mytester:assertTensorEq(gradInput[2], gradInput2[2], 0.000001, "ParallelCriterion backward error 2") + + -- test type + pc:float() + gradInput[1], gradInput[2] = gradInput[1]:clone(), gradInput[2]:clone() + local input3 = {input[1]:float(), input[2]:float()} + local target3 = {target[1]:float(), target[2]:float()} + local output3 = pc:forward(input3, target3) + local gradInput3 = pc:backward(input3, target3) + mytester:assert(math.abs(output3 - output) < 0.00001, "ParallelCriterion forward error type") + mytester:assertTensorEq(gradInput[1]:float(), gradInput3[1], 0.000001, "ParallelCriterion backward error 1 type") + mytester:assertTensorEq(gradInput[2]:float(), gradInput3[2], 0.000001, "ParallelCriterion backward error 2 type") + + -- test repeatTarget + local input = {torch.rand(2,10), torch.randn(2,10)} + local target = torch.randn(2,10) + local mse = nn.MSECriterion() + local pc = nn.ParallelCriterion(true):add(mse, 0.5):add(mse:clone()) + local output = pc:forward(input, target) + local output2 = mse:forward(input[1], target)/2 + mse:forward(input[2], target) + mytester:assert(math.abs(output2 - output) < 0.00001, "ParallelCriterion repeatTarget forward error") + local gradInput = pc:backward(input, target) + local gradInput2 = {mse:backward(input[1], target):clone():div(2), mse:backward(input[2], target)} + mytester:assertTensorEq(gradInput[1], gradInput2[1], 0.000001, "ParallelCriterion repeatTarget backward error 1") + mytester:assertTensorEq(gradInput[2], gradInput2[2], 0.000001, "ParallelCriterion repeatTarget backward error 2") + + -- table input + local input = {torch.randn(2,10), {torch.rand(2,10), torch.randn(2,10)}} + local target = {torch.IntTensor{2,5}, {torch.IntTensor{1,8}, torch.randn(2,10)}} + local nll2 = nn.ClassNLLCriterion() + local nll = nn.ClassNLLCriterion() + local mse = nn.MSECriterion() + local pc = nn.ParallelCriterion():add(nll, 0.5):add(mse) + local pc2 = nn.ParallelCriterion():add(nll2, 0.4):add(pc) + local output = pc2:forward(input, target) + local output2 = nll2:forward(input[1], target[1])*0.4 + nll:forward(input[2][1], target[2][1])/2 + mse:forward(input[2][2], target[2][2]) + mytester:assert(math.abs(output2 - output) < 0.00001, "ParallelCriterion table forward error") + local gradInput2 = { + nll2:backward(input[1], target[1]):clone():mul(0.4), + {nll:backward(input[2][2], target[2][1]):clone():div(2), mse:backward(input[2][2], target[2][2])} + } + local gradInput = pc2:backward(input, target) + mytester:assertTensorEq(gradInput[1], gradInput2[1], 0.000001, "ParallelCriterion table backward error 1") + mytester:assertTensorEq(gradInput[2][1], gradInput2[2][1], 0.000001, "ParallelCriterion table backward error 2") + mytester:assertTensorEq(gradInput[2][2], gradInput2[2][2], 0.000001, "ParallelCriterion table backward error 3") +end + +function nntest.MultiCriterion() + local input = torch.rand(2,10) + local target = torch.IntTensor{1,8} + local nll = nn.ClassNLLCriterion() + local nll2 = nn.CrossEntropyCriterion() + local mc = nn.MultiCriterion():add(nll, 0.5):add(nll2) + local output = mc:forward(input, target) + local output2 = nll:forward(input, target)/2 + nll2:forward(input, target) + mytester:assert(math.abs(output2 - output) < 0.00001, "MultiCriterion forward error") + local gradInput = mc:backward(input, target) + local gradInput2 = nll:backward(input, target):clone():div(2):add(nll2:backward(input, target)) + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "MultiCriterion backward error ") + + -- test type + mc:float() + gradInput = gradInput:clone() + local input3 = input:float() + local target3 = target:float() + local output3 = mc:forward(input3, target3) + local gradInput3 = mc:backward(input3, target3) + mytester:assert(math.abs(output3 - output) < 0.00001, "MultiCriterion forward error type") + mytester:assertTensorEq(gradInput:float(), gradInput3, 0.000001, "MultiCriterion backward error type") + + -- test table input + mc:double() + local input = {torch.randn(2,10), {torch.randn(2,10), torch.randn(2,10)}} + local target = {torch.IntTensor{1,8}, {torch.IntTensor{5,6}, torch.IntTensor{4,3}}} + local pnllc = nn.ParallelCriterion():add(nll):add(nn.ParallelCriterion():add(nll:clone()):add(nll:clone())) + local pnllc2 = nn.ParallelCriterion():add(nll2):add(nn.ParallelCriterion():add(nll2:clone()):add(nll2:clone())) + local mc = nn.MultiCriterion():add(pnllc, 0.5):add(pnllc2) + local output = mc:forward(input, target) + local output2 = pnllc:forward(input, target)/2 + pnllc2:forward(input, target) + mytester:assert(math.abs(output2 - output) < 0.00001, "MultiCriterion forward table error") + local gradInput = mc:backward(input, target) + local gradInput2 = pnllc:clone():backward(input, target) + local gradInput2b = pnllc2:backward(input, target) + gradInput2[1]:div(2):add(gradInput2b[1]) + gradInput2[2][1]:div(2):add(gradInput2b[2][1]) + gradInput2[2][2]:div(2):add(gradInput2b[2][2]) + mytester:assertTensorEq(gradInput[1], gradInput2[1], 0.000001, "MultiCriterion backward table 1 error ") + mytester:assertTensorEq(gradInput[2][1], gradInput2[2][1], 0.000001, "MultiCriterion backward table 2 error ") + mytester:assertTensorEq(gradInput[2][2], gradInput2[2][2], 0.000001, "MultiCriterion backward table 3 error ") +end + +function nntest.WeightedMSECriterion() + local input = torch.rand(10) + local target = input:clone():add(torch.rand(10)) + local cri = nn.WeightedMSECriterion(torch.rand(10)) + criterionJacobianTest(cri, input, target) +end + +function nntest.BCECriterion() + local eps = 1e-2 + local input = torch.rand(10)*(1-eps) + eps/2 + local target = torch.rand(10)*(1-eps) + eps/2 + local cri = nn.BCECriterion() + criterionJacobianTest(cri, input, target) + --with weights + local weights= torch.rand(10)*(1-eps) + eps/2 + local cri = nn.BCECriterion(weights) + criterionJacobianTest(cri, input, target) + -- with weights + batch + local bsz = 5 + local input = torch.rand(bsz, 10)*(1-eps) + eps/2 + local target = torch.rand(bsz, 10)*(1-eps) + eps/2 + criterionJacobianTest(cri, input, target) +end + +function nntest.DistKLDivCriterion() + local input = torch.rand(10) + local target = input:clone():add(torch.rand(10)) + local cri = nn.DistKLDivCriterion(true) -- sizeAverage = true + criterionJacobianTest(cri, input, target) + cri = nn.DistKLDivCriterion(false) -- sizeAverage = false + criterionJacobianTest(cri, input, target) +end + +function nntest.ClassNLLCriterion() + local batchsize = math.random(2,4) + local numLabels = math.random(5,10) + + local function testclassnll(input, target) + -- default ClassNLLCriterion + local cri = nn.ClassNLLCriterion() + criterionJacobianTest(cri, input, target) + + -- ClassNLLCriterion with weights + local weights = torch.rand(numLabels) + weights = weights / weights:sum() + cri = nn.ClassNLLCriterion(weights) + criterionJacobianTest(cri, input, target) + end + + -- input/target: 1D/number + testclassnll(torch.rand(numLabels), math.random(1,numLabels)) + -- input/target: 1D/1D + testclassnll(torch.rand(numLabels), torch.LongTensor(1):random(1, numLabels)) + -- input/target: 2D/1D + testclassnll(torch.rand(batchsize, numLabels), torch.LongTensor(batchsize):random(1,numLabels)) + -- test ignoreIndex + local ignoreIndex = -1 + local cri = nn.ClassNLLCriterion(nil, nil, ignoreIndex) + local input = torch.randn(numLabels) + local target = ignoreIndex + mytester:assert(cri:forward(input, target) == 0) + mytester:assert(cri:backward(input, target):abs():sum() == 0) + local input = torch.randn(batchsize, numLabels) + local target = torch.LongTensor(batchsize):random(1,numLabels) + target[1] = ignoreIndex + local output = cri:forward(input, target) + local gradInput = cri:backward(input, target):clone() + mytester:assert(gradInput[1]:abs():sum() == 0) + local input, target = input:sub(2,batchsize), target:sub(2,batchsize) + local output2 = cri:forward(input, target) + mytester:assert(math.abs(output2 - output) < 0.0000001) + local gradInput2 = cri:backward(input, target) + mytester:assertTensorEq(gradInput2, gradInput:sub(2,batchsize), 0.0000001) +end + +function nntest.SpatialClassNLLCriterion() + local numLabels = math.random(5,10) + local h = math.random(5, 20) + local w = math.random(5, 20) + local batchSize = math.random(1, 4) + local input = torch.rand(batchSize, numLabels, h, w) + local target = torch.Tensor(batchSize, h, w) + target:apply(function() return math.random(1, numLabels) end) + + -- default ClassNLLCriterion + local cri = nn.SpatialClassNLLCriterion() + criterionJacobianTest(cri, input, target) + + -- ClassNLLCriterion with weights + local weights = torch.rand(numLabels) + cri = nn.SpatialClassNLLCriterion(weights) + criterionJacobianTest(cri, input, target) + + -- check with ClassNLLCriterion + local spatial = nn.SpatialClassNLLCriterion(weights) + local regular = nn.ClassNLLCriterion(weights) + local spatial_out = spatial:forward(input, target) + local regular_out = regular:forward(input:permute(1, 3, 4, 2):contiguous():view(-1, numLabels), + target:view(-1)) + mytester:eq(spatial_out, regular_out, 1e-6, + "spatial and regular criterions give different results") +end + +function nntest.MultiLabelSoftMarginCriterion() + -- test w/o weights + + local cri = nn.MultiLabelSoftMarginCriterion() + + -- stochastic + local numLabels = math.random(5, 10) + local input = torch.randn(numLabels) + local target = torch.round(torch.rand(numLabels)) + criterionJacobianTest(cri, input, target) + + -- batch + local numLabels = math.random(5, 10) + local bsz = math.random(3, 7) + local input = torch.randn(bsz, numLabels) + local target = torch.round(torch.rand(bsz, numLabels)) + criterionJacobianTest(cri, input, target) + + -- test weights + + local numLabels = math.random(5, 10) + local weights = torch.randn(numLabels) + local cri = nn.MultiLabelSoftMarginCriterion(weights) + + -- stochastic + local input = torch.randn(numLabels) + local target = torch.round(torch.rand(numLabels)) + criterionJacobianTest(cri, input, target) + + -- batch + local bsz = math.random(3, 7) + local input = torch.randn(bsz, numLabels) + local target = torch.round(torch.rand(bsz, numLabels)) + criterionJacobianTest(cri, input, target) +end + +function nntest.CrossEntropyCriterion() + -- stochastic + local numLabels = math.random(5, 10) + local input = torch.zeros(numLabels) + local target = torch.random(1, numLabels) + + local cri = nn.CrossEntropyCriterion() + criterionJacobianTest(cri, input, target) + + -- batch + local numLabels = math.random(5,10) + local bsz = math.random(3, 7) + local input = torch.zeros(bsz, numLabels) + local target = torch.Tensor(bsz):random(1, numLabels) + + local cri = nn.CrossEntropyCriterion() + criterionJacobianTest(cri, input, target) + + -- with weights + local weights = torch.rand(numLabels) + weights = weights / weights:sum() + cri = nn.CrossEntropyCriterion(weights) + criterionJacobianTest(cri, input, target) + + -- verify nll.sizeAverage preservation + cri = nn.CrossEntropyCriterion(weights) + cri.nll.sizeAverage = false + criterionJacobianTest(cri, input, target) + mytester:eq(cri.nll.sizeAverage, false, + "ClassNLLCriterion.sizeAverage overwritten") + + -- verify nll.sizeAverage propagation + cri = nn.CrossEntropyCriterion(weights) + cri.sizeAverage = false + criterionJacobianTest(cri, input, target) + mytester:eq(cri.nll.sizeAverage, false, + "ClassNLLCriterion.sizeAverage not propagated") +end + +function nntest.LogSigmoid() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.LogSigmoid() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.LogSoftmax() + local ini = math.random(3,5) + local inj = math.random(3,5) + local input = torch.Tensor(ini,inj):zero() + local module = nn.LogSoftMax() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err, 1e-3, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test logsoftmax when gradOutput is non-contiguous + local layer = nn.LogSoftMax() + layer:zeroGradParameters() + local input = torch.randn(4, 10) + local data = torch.randn(4, 20) + local gradOutput = data:narrow(2, 1, 10):fill(0) + local output = layer:forward(input) + local gradInput1 = layer:backward(input, gradOutput):clone() + local output = layer:forward(input) + gradOutput = gradOutput:clone() + local gradInput2 = layer:backward(input, gradOutput):clone() + + mytester:assertlt(gradInput1:add(-1, gradInput2):abs():max(), + 1e-10, + torch.typename(layer) + .. ' non-contiguous gradOutput check') + + + + +end + +function nntest.SpatialLogSoftMax() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local inl = math.random(3,5) + local input = torch.Tensor(inl, ink, inj, ini):zero() + local module = nn.SpatialLogSoftMax() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,expprecision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +-- function nntest.TemporalLogSoftmax() +-- local ini = math.random(10,20) +-- local inj = math.random(10,20) +-- local input = torch.Tensor(ini,inj):zero() +-- local module = nn.TemporalLogSoftMax() + +-- local err = jac.testJacobian(module,input) +-- mytester:assertlt(err,precision, 'error on state ') + +-- local ferr,berr = jac.testIO(module,input) +-- mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) +-- mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +-- end + +function nntest.Max() + -- 1D + local ini = math.random(3,7) + local input = torch.Tensor(ini):zero() + local module = nn.Max(1) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + -- negative dimension + local module = nn.Max(-1) + local input = torch.Tensor({1, 2, 3}) + local expected = torch.Tensor({3}) + local output = module:forward(input) + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + -- batch + local module = nn.Max(1, 1) + local input = torch.Tensor({{1, 2, 3},{4, 5, 6}}) + local expected = torch.Tensor({3, 6}) + local output = module:forward(input) + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + + -- 3D + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj*ink):zero() + local module = nn.Max(1) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Min() + -- 1D + local ini = math.random(3,7) + local input = torch.Tensor(ini):zero() + local module = nn.Min(1) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + -- negative dimension + local module = nn.Min(-1) + local input = torch.Tensor({1, 2, 3}) + local expected = torch.Tensor({1}) + local output = module:forward(input) + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + -- batch + local module = nn.Min(1, 1) + local input = torch.Tensor({{1, 2, 3},{4, 5, 6}}) + local expected = torch.Tensor({1, 4}) + local output = module:forward(input) + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + + -- 3D + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj*ink):zero() + local module = nn.Min(1) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Mean() + -- 1D + local ini = math.random(3,7) + local input = torch.Tensor(ini):zero() + local module = nn.Mean(1) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + -- negative dimension + local module = nn.Mean(-1) + local input = torch.Tensor({1, 2, 3}) + local expected = torch.Tensor({2}) + local output = module:forward(input) + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + -- batch + local module = nn.Mean(1, 1) + local input = torch.Tensor({{1, 2, 3},{4, 5, 6}}) + local expected = torch.Tensor({2, 5}) + local output = module:forward(input) + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + + -- 3D + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Mean(torch.random(1,3)) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Mul() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Mul() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Sigmoid() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Sigmoid() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Softmax() + local ini = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, ini):zero() + local module = nn.SoftMax() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,expprecision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialSoftMax() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local inl = math.random(3,5) + local input = torch.Tensor(inl, ink, inj, ini):zero() + local module = nn.SpatialSoftMax() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,expprecision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Softmin() + local ini = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, ini):zero() + local module = nn.SoftMin() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,expprecision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Softsign() + local ini = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, ini):zero() + local module = nn.SoftSign() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SoftPlus() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.SoftPlus() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialSubtractiveNormalization_2dkernel() + local inputSize = math.random(6,9) + local kersize = 3 + local nbfeatures = math.random(3,5) + local kernel = torch.Tensor(kersize,kersize):fill(1) + local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel) + local input = torch.rand(nbfeatures,inputSize,inputSize/2) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test batch mode + local output = module:forward(input):clone() + local gradOutput = output:clone():uniform(0,1) + local gradInput = module:backward(input, gradOutput):clone() + local batchSize = 4 + local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2) + input2[2]:copy(input) + + local output2 = module:forward(input2) + local gradOutput2 = output2:clone():uniform(0,1) + gradOutput2[2]:copy(gradOutput) + local gradInput2 = module:backward(input2, gradOutput2) + + mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 2d forward batch err") + mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 2d backward batch err") + + local err = jac.testJacobian(module,input2) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + +end + +function nntest.SpatialSubtractiveNormalization_1dkernel() + local inputSize = math.random(6,9) + local kersize = 3 + local nbfeatures = math.random(3,5) + local kernel = torch.Tensor(kersize):fill(1) + local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel) + local input = torch.rand(nbfeatures,inputSize,inputSize/2) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test batch mode + local output = module:forward(input):clone() + local gradOutput = output:clone():uniform(0,1) + local gradInput = module:backward(input, gradOutput):clone() + local batchSize = 4 + local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2) + input2[2]:copy(input) + + local output2 = module:forward(input2) + local gradOutput2 = output2:clone():uniform(0,1) + gradOutput2[2]:copy(gradOutput) + local gradInput2 = module:backward(input2, gradOutput2) + + mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 1d forward batch err") + mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 1d backward batch err") + + local err = jac.testJacobian(module,input2) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialDivisiveNormalization_2dkernel() + local inputSize = math.random(6,9) + local kersize = 3 + local nbfeatures = math.random(3,5) + local kernel = torch.Tensor(kersize,kersize):fill(1) + local module = nn.SpatialDivisiveNormalization(nbfeatures,kernel) + local input = torch.rand(nbfeatures,inputSize,inputSize/2) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test batch mode + local output = module:forward(input):clone() + local gradOutput = output:clone():uniform(0,1) + local gradInput = module:backward(input, gradOutput):clone() + local batchSize = 4 + local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2) + input2[2]:copy(input) + + local output2 = module:forward(input2) + local gradOutput2 = output2:clone():uniform(0,1) + gradOutput2[2]:copy(gradOutput) + local gradInput2 = module:backward(input2, gradOutput2) + + mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 2d forward batch err") + mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 2d backward batch err") + + local err = jac.testJacobian(module,input2) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialDivisiveNormalization_1dkernel() + local inputSize = math.random(6,9) + local kersize = 3 + local nbfeatures = math.random(3,5) + local kernel = torch.Tensor(kersize):fill(1) + local module = nn.SpatialDivisiveNormalization(nbfeatures,kernel) + local input = torch.rand(nbfeatures,inputSize,inputSize/2) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test batch mode + local output = module:forward(input):clone() + local gradOutput = output:clone():uniform(0,1) + local gradInput = module:backward(input, gradOutput):clone() + local batchSize = 4 + local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2) + input2[2]:copy(input) + + local output2 = module:forward(input2) + local gradOutput2 = output2:clone():uniform(0,1) + gradOutput2[2]:copy(gradOutput) + local gradInput2 = module:backward(input2, gradOutput2) + + mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 1d forward batch err") + mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 1d backward batch err") + + local err = jac.testJacobian(module,input2) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialContrastiveNormalization() + local inputSize = math.random(6,9) + local kersize = 3 + local nbfeatures = math.random(3,5) + local kernel = torch.Tensor(kersize,kersize):fill(1) + local module = nn.SpatialContrastiveNormalization(nbfeatures,kernel) + local input = torch.rand(nbfeatures,inputSize,inputSize/2) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test batch mode and type + local output = module:forward(input):clone() + local gradOutput = output:clone():uniform(0,1) + local gradInput = module:backward(input, gradOutput):clone() + local batchSize = 4 + local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2):float() + input2[2]:copy(input) + + module:float() -- type-cast + local output2 = module:forward(input2) + local gradOutput2 = output2:clone():uniform(0,1) + gradOutput2[2]:copy(gradOutput) + local gradInput2 = module:backward(input2, gradOutput2) + + mytester:assertTensorEq(output2[2], output:float(), 0.000002, "SpatialContrastiveNormalization 2d forward batch err") + mytester:assertTensorEq(gradOutput2[2], gradOutput:float(), 0.000002, "SpatialContrastiveNormalization 2d backward batch err") + + module:double() + input2 = input2:double() + local err = jac.testJacobian(module,input2) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialCrossMapLRN() + local inputSize = math.random(6,9) + local size = math.random(1,3)*2+1 + local nbfeatures = math.random(3,8) + local alpha = math.random(1,100)/100 + local beta = math.random(0,100)/100 + local k = math.random(1,3) + local module = nn.SpatialCrossMapLRN(size, alpha, beta, k) + local input = torch.rand(nbfeatures,inputSize,inputSize) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- test batch mode and type + local output = module:forward(input):clone() + local gradOutput = output:clone():uniform(0,1) + local gradInput = module:backward(input, gradOutput):clone() + local batchSize = 4 + local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize):float() + input2[2]:copy(input) + + module:float() -- type-cast + local output2 = module:forward(input2) + local gradOutput2 = output2:clone():uniform(0,1) + gradOutput2[2]:copy(gradOutput) + local gradInput2 = module:backward(input2, gradOutput2) + + mytester:assertTensorEq(output2[2], output:float(), 0.000001, "SpatialCrossMapLRN 2d forward batch err") + mytester:assertTensorEq(gradOutput2[2], gradOutput:float(), 0.000001, "SpatialCrossMapLRN 2d backward batch err") + + module:double() + input2 = input2:double() + local err = jac.testJacobian(module,input2) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input2) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + + +function nntest.SpatialConvolution() + local from = math.random(1,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(5,7) + local outj = math.random(5,7) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.SpatialConvolution(from, to, ki, kj, si, sj) + local input = torch.Tensor(from, inj, ini):zero() + + local function jacTests(module) + -- stochastic + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + end + + nn.hessian.enable() + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + if module.bias then + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diag HessianBias') + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + end + + -- batch + + --verbose = true + local batch = math.random(2,5) + outi = math.random(4,8) + outj = math.random(4,8) + ini = (outi-1)*si+ki + inj = (outj-1)*sj+kj + module = nn.SpatialConvolution(from, to, ki, kj, si, sj) + input = torch.Tensor(batch,from,inj,ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + end + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + if module.bias then + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diag HessianBias') + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + end + + jacTests(module) + module:noBias() + jacTests(module) + module.bias = torch.Tensor(module.nOutputPlane):zero() + module.gradBias = torch.Tensor(module.nOutputPlane):zero() + module:reset() + jacTests(module) + + local output = module:forward(input):clone() + local gradOutput = output:clone():normal() + local gradInput = module:forward(input, gradOutput):clone() + local bigWeight = module.weight.new(module.weight:nElement() * 4):fill(0/0) -- fill with nans + local newWeight = bigWeight:narrow(1, module.weight:nElement() * 3, module.weight:nElement()) + newWeight = newWeight:viewAs(module.weight):copy(module.weight) + module.weight = newWeight + local newOutput = module:forward(input) + local newGradInput = module:forward(input, gradOutput) + mytester:asserteq((newOutput - output):abs():max(), 0, + torch.typename(module) .. ' forward failure case in a getParameters setting ') + mytester:asserteq((newGradInput - gradInput):abs():max(), 0, + torch.typename(module) .. ' backward failure case in a getParameters setting ') + +end + +function nntest.SpatialConvolutionMM() + local from = math.random(2,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local di = math.random(1,4) + local dj = math.random(1,4) + local padW = math.random(0,2) + local padH = math.random(0,2) + local outi = math.random(5,9) + local outj = math.random(5,9) + local ini = (outi-1)*di+ki-padW*2 + local inj = (outj-1)*dj+kj-padH*2 + local module = nn.SpatialConvolutionMM(from, to, ki, kj, di, dj, padW, padH) + local input = torch.Tensor(from, inj, ini):zero() + + -- stochastic + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + -- batch + + --verbose = true + local batch = math.random(2,5) + + module = nn.SpatialConvolutionMM(from, to, ki, kj, di, dj, padW, padH) + input = torch.Tensor(batch,from,inj,ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- non-contiguous + local input = torch.randn(batch,from,ini,inj):transpose(3,4) -- non-contiguous + local inputc = input:contiguous() -- contiguous + local output = module:forward(input):clone() + local outputc = module:forward(inputc):clone() + mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ') + local gradInput = module:backward(input, output):clone() + local gradInputc = module:backward(inputc, outputc):clone() + mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ') +end + +function nntest.SpatialConvolutionLocal() + local from = math.random(1,4) + local to = math.random(1,4) + local ki = math.random(1,3) + local kj = math.random(1,3) + local si = math.random(1,3) + local sj = math.random(1,3) + local outi = math.random(5,6) + local outj = math.random(5,6) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.SpatialConvolutionLocal(from, to, ini, inj, ki, kj, si, sj) + local input = torch.Tensor(from, inj, ini):zero() + + -- stochastic + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + nn.hessian.enable() + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diag HessianBias') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + -- batch + + --verbose = true + local batch = math.random(2,5) + outi = math.random(4,6) + outj = math.random(4,6) + ini = (outi-1)*si+ki + inj = (outj-1)*sj+kj + module = nn.SpatialConvolutionLocal(from, to, ini, inj, ki, kj, si, sj) + input = torch.Tensor(batch, from, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diag HessianBias') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- check against nn.SpatialConvolution + local conv = nn.SpatialConvolution(from, to, ki, kj, si, sj) + torch.repeatTensor(module.bias, conv.bias:view(to, 1, 1), 1, outj, outi) + torch.repeatTensor(module.weight, conv.weight:view(1, 1, from, to, ki, kj), outi, outj, 1, 1, 1, 1) + local input = torch.rand(batch, from, inj, ini) + local output = module:forward(input) + local outputConv = conv:forward(input) + local err = torch.dist(output, outputConv) + mytester:assertlt(err, precision, 'error checking against nn.SpatialConvolution') +end + +function nntest.SpatialFullConvolution() + local from = math.random(2,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local di = math.random(1,4) + local dj = math.random(1,4) + local padW = math.random(0,2) + local padH = math.random(0,2) + local outi = math.random(5,9) + local outj = math.random(5,9) + local adjW = (outi + padW*2 - ki) % di + local adjH = (outj + padH*2 - kj) % dj + local ini = math.floor((outi + padW*2 - ki)/di + 1) + local inj = math.floor((outj + padH*2 - kj)/dj + 1) + local module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH) + local input = torch.Tensor(from, inj, ini):zero() + + local function jacTests(module) + -- stochastic + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + end + + -- batch + + --verbose = true + local batch = math.random(2,5) + + module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH) + input = torch.Tensor(batch,from,inj,ini):zero() + + -- Check that the required output size matches the actual output size + local output = module:forward(input) + mytester:asserteq(output:size(3), outj, 'output height error') + mytester:asserteq(output:size(4), outi, 'output width error') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + end + + jacTests(module) + module:noBias() + jacTests(module) + module.bias = torch.Tensor(module.nOutputPlane):zero() + module.gradBias = torch.Tensor(module.nOutputPlane):zero() + module:reset() + jacTests(module) + + -- non-contiguous + local batch = math.random(2,5) + local input = torch.randn(batch,from,ini,inj):transpose(3,4) -- non-contiguous + local inputc = input:contiguous() -- contiguous + local output = module:forward(input) + local outputc = module:forward(inputc) + mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ') + local gradInput = module:backward(input, output) + local gradInputc = module:backward(inputc, outputc) + mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ') +end + +function nntest.SpatialFullConvolutionDualInput() + local from = math.random(2,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local di = math.random(1,4) + local dj = math.random(1,4) + local padW = math.random(0,2) + local padH = math.random(0,2) + local outi = math.random(5,9) + local outj = math.random(5,9) + local ini = math.floor((outi + padW*2 - ki)/di + 1) + local inj = math.floor((outj + padH*2 - kj)/dj + 1) + local adjW = (outi + 2 * padW - ki) % di + local adjH = (outj + 2 * padH - kj) % dj + local targetTensor = torch.Tensor(outj, outi):zero() + local input = torch.Tensor(from, inj, ini):zero() + + local module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH) + local moduleRef = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH) + moduleRef.weight:copy(module.weight) + moduleRef.bias:copy(module.bias) + + -- Check that the required output size matches the actual output size + -- when using the dual input mode + local output = module:forward({input, targetTensor}) + mytester:asserteq(output:size(2), outj, 'output height error') + mytester:asserteq(output:size(3), outi, 'output width error') + + -- Check that backward and forward match the reference module + local outputRef = moduleRef:forward(input) + mytester:asserteq(0, (output-outputRef):abs():max(), torch.typename(module) .. ' - output err ') + local gradOutput = outputRef:clone():uniform() + local gradInputRef = moduleRef:backward(input, gradOutput) + local gradInput = module:backward({input, targetTensor}, gradOutput) + mytester:asserteq(0, (gradInput[1]-gradInputRef):abs():max(), torch.typename(module) .. ' - gradInput[1] err ') + + -- Check that gradInput[2] is the singleton tensor {0} + mytester:asserteq(gradInput[2]:storage():size(), 1, torch.typename(module) .. ' - gradInput[2] size err ') + mytester:asserteq(gradInput[2]:storage()[1], 0, torch.typename(module) .. ' - gradInput[2] value err ') +end + +function nntest.SpatialDilatedConvolution() + local from = math.random(1,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local di = math.random(1,4) + local dj = math.random(1,4) + local padW = math.random(0,2) + local padH = math.random(0,2) + local outi = math.random(5,9) + local outj = math.random(5,9) + local dilationW = math.random(1,10) + local dilationH = math.random(1,10) + local ini = (outi - 1) * di - 2 * padW + dilationW * (ki-1) + 1 + local inj = (outj - 1) * dj - 2 * padH + dilationH * (kj-1) + 1 + + local module = nn.SpatialDilatedConvolution(from, to, ki, kj, di, dj, padW, padH, dilationW, dilationH) + local input = torch.Tensor(from, inj, ini):zero() + + -- stochastic + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + -- batch + + --verbose = true + local batch = math.random(2,5) + + module = nn.SpatialDilatedConvolution(from, to, ki, kj, di, dj, padW, padH, dilationW, dilationH) + input = torch.Tensor(batch,from,inj,ini):zero() + + -- Check that the required output size matches the actual output size + local output = module:forward(input) + mytester:asserteq(output:size(3), outj, 'output height error') + mytester:asserteq(output:size(4), outi, 'output width error') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- non-contiguous + local input = torch.randn(batch,from,ini,inj):transpose(3,4) -- non-contiguous + local inputc = input:contiguous() -- contiguous + local output = module:forward(input) + local outputc = module:forward(inputc) + mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ') + local gradInput = module:backward(input, output) + local gradInputc = module:backward(inputc, outputc) + mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ') +end + +function nntest.SpatialConvolutionMap() + local from = math.random(1,5) + local fanin = math.random(1, from) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local si = math.random(1,3) + local sj = math.random(1,3) + local outi = math.random(5,9) + local outj = math.random(5,9) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + + local module = nn.SpatialConvolutionMap(nn.tables.random(from, to, fanin), ki, kj, si, sj) + local input = torch.Tensor(from, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + nn.hessian.enable() + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diag HessianBias') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + + + -- batch + + --verbose = true + local batch = math.random(2,6) + module = nn.SpatialConvolutionMap(nn.tables.random(from, to, fanin), ki, kj, si, sj) + input = torch.Tensor(batch,from,inj,ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diag HessianBias') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialFullConvolutionMap() + local from = math.random(2,4) + local to = math.random(2,5) + local fanin = math.random(1, from) + local tt = nn.tables.random(from, to, fanin) + local ki = math.random(2,5) + local kj = math.random(2,5) + local si = math.random(1,3) + local sj = math.random(1,3) + local ini = math.random(5,7) + local inj = math.random(5,7) + local module = nn.SpatialFullConvolutionMap(tt, ki, kj, si, sj) + local input = torch.Tensor(from, inj, ini):zero() + + -- stochastic + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + nn.hessian.enable() + + local err = jac.testDiagHessianInput(module, input) + mytester:assertlt(err , precision, 'error on diagHessianInput') + + local err = jac.testDiagHessianWeight(module, input) + mytester:assertlt(err , precision, 'error on diagHessianWeight') + + local err = jac.testDiagHessianBias(module, input) + mytester:assertlt(err , precision, 'error on diag HessianBias') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialFullConvolutionCompare() + local from = math.random(2,4) + local to = math.random(2,5) + local tt = nn.tables.full(from, to) + local ki = math.random(2,5) + local kj = math.random(2,5) + local si = math.random(1,3) + local sj = math.random(1,3) + local ini = math.random(7,8) + local inj = math.random(7,8) + local module1 = nn.SpatialFullConvolutionMap(tt, ki, kj, si, sj) + local module2 = nn.SpatialFullConvolution(from, to, ki, kj, si, sj) + local input = torch.rand(from, inj, ini) + for k=1,tt:size(1) do + module1.weight[k]:copy(module2.weight[tt[k][1]][tt[k][2]]) + module1.bias:copy(module2.bias) + end + + local o1 = module1:updateOutput(input) + local o2 = module2:updateOutput(input) + mytester:assertlt(o1:dist(o2), precision, 'error on output') + + local go1 = torch.rand(o1:size()) + local go2 = go1:clone() + + local gi1= module1:updateGradInput(input,go1) + local gi2 = module2:updateGradInput(input,go2) + mytester:assertlt(gi1:dist(gi2), precision, 'error on gradInput') + + module1:zeroGradParameters() + module2:zeroGradParameters() + + module1:accGradParameters(input,go1) + module2:accGradParameters(input,go2) + for k=1,tt:size(1) do + mytester:assertlt(module1.gradWeight[k]:dist(module2.gradWeight[tt[k][1]][tt[k][2]]),precision,'error on gradWeight ' .. k) + end + mytester:assertlt(module1.gradBias:dist(module2.gradBias),precision,'error on gradBias ') +end + +local function batchcompare(smod, sin, plist) + local bs = torch.LongStorage(sin:dim()+1) + bs[1] = 1 + for i=1,sin:dim() do bs[i+1] = sin:size()[i] end + local bin = torch.Tensor(bs):copy(sin) + local bmod = smod:clone() + + local sout = smod:forward(sin):clone() + local bout = bmod:forward(bin):clone() + + local sgout = torch.randn(sout:size()) + local bgout = torch.Tensor(bout:size()) + bgout:copy(sgout) + + local sgin = smod:backward(sin, sgout) + local bgin = bmod:backward(bin, bgout) + + smod:accGradParameters(sin, sgout, 1) + bmod:accGradParameters(bin, bgout, 1) + + mytester:assertTensorEq(sout,bout:select(1,1), 1e-8, 'batchcompare error on output') + mytester:assertTensorEq(sgin,bgin:select(1,1), 1e-8, 'batchcompare error on gradInput') + + for i,v in pairs(plist) do + mytester:assertTensorEq(smod[v],bmod[v], 1e-8, 'batchcompare error on ' .. v) + end +end + +function nntest.SpatialConvolutionBatchCompare() + local from = math.random(1,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(5,9) + local outj = math.random(5,9) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + + local module = nn.SpatialConvolution(from, to, ki, kj, si, sj) + module:zeroGradParameters() + local input = torch.randn(from,inj,ini) + + batchcompare(module,input, {'weight','bias','gradWeight','gradBias'}) +end + +function nntest.SpatialFullConvolutionBatchCompare() + local from = math.random(1,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local si = math.random(1,4) + local sj = math.random(1,4) + local ini = math.random(5,9) + local inj = math.random(5,9) + + local module = nn.SpatialFullConvolution(from, to, ki, kj, si, sj) + module:zeroGradParameters() + local input = torch.randn(from, inj, ini) + + batchcompare(module,input, {'weight','bias','gradWeight','gradBias'}) +end + + + +function nntest.SpatialSubSamplingBatchCompare() + local from = math.random(1,6) + local ki = math.random(1,5) + local kj = math.random(1,5) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(6,10) + local outj = math.random(6,10) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.SpatialSubSampling(from, ki, kj, si, sj) + module:zeroGradParameters() + local input = torch.randn(from,inj,ini)--torch.Tensor(from, inj, ini):zero() + + batchcompare(module,input, {'weight','bias','gradWeight','gradBias'}) +end + +function nntest.SpatialSubSampling() + local from = math.random(1,6) + local ki = math.random(1,5) + local kj = math.random(1,5) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(6,10) + local outj = math.random(6,10) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.SpatialSubSampling(from, ki, kj, si, sj) + local input = torch.Tensor(from, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local batch = math.random(2,5) + outi = math.random(4,8) + outj = math.random(4,8) + ini = (outi-1)*si+ki + inj = (outj-1)*sj+kj + module = nn.SpatialSubSampling(from, ki, kj, si, sj) + input = torch.Tensor(batch,from,inj,ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'batch error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.SpatialMaxPooling() + for _,ceil_mode in pairs({true,false}) do + local from = math.random(1,5) + local ki = math.random(1,4) + local kj = math.random(1,4) + local si = math.random(1,3) + local sj = math.random(1,3) + local outi = math.random(4,5) + local outj = math.random(4,5) + local padW = math.min(math.random(0,1),math.floor(ki/2)) + local padH = math.min(math.random(0,1),math.floor(kj/2)) + local ini = (outi-1)*si+ki-2*padW + local inj = (outj-1)*sj+kj-2*padH + + local ceil_string = ceil_mode and 'ceil' or 'floor' + local module = nn.SpatialMaxPooling(ki,kj,si,sj,padW,padH) + if ceil_mode then module:ceil() else module:floor() end + local input = torch.rand(from,inj,ini) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + local nbatch = math.random(2,5) + input = torch.rand(nbatch,from,inj,ini) + module = nn.SpatialMaxPooling(ki,kj,si,sj,padW,padH) + if ceil_mode then module:ceil() else module:floor() end + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision) + end +end + +function nntest.SpatialMaxUnpooling() + for _,ceil_mode in pairs({true,false}) do + local from = math.random(1,5) + local ki = math.random(2,4) + local kj = math.random(2,4) + local si, sj = ki, kj + local outi = math.random(4,5) + local outj = math.random(4,5) + local padW = math.min(math.random(0,1),math.floor(ki/2)) + local padH = math.min(math.random(0,1),math.floor(kj/2)) + local ini = (outi-1)*si+ki-2*padW + local inj = (outj-1)*sj+kj-2*padH + + local ceil_string = ceil_mode and 'ceil' or 'floor' + local poolingModule = nn.SpatialMaxPooling(ki,kj,si,sj,padW,padH) + if ceil_mode then poolingModule:ceil() else poolingModule:floor() end + local module = nn.SpatialMaxUnpooling(poolingModule) + + local original = torch.rand(from,inj,ini) + local input = poolingModule:forward(original) + local output = module:forward(input) + + mytester:assert(output:isSameSizeAs(original),'SpatialMaxUnpooling output size err') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + local nbatch = math.random(2,5) + original = torch.rand(nbatch,from,inj,ini) + input = poolingModule:forward(original) + output = module:forward(input) + + mytester:assert(output:isSameSizeAs(original),'SpatialMaxUnpooling batch output size err') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision) + end +end + +function nntest.SpatialDilatedMaxPooling() + for _,ceil_mode in pairs({true,false}) do + local from = math.random(1,5) + local ki = math.random(1,4) + local kj = math.random(1,4) + local si = math.random(1,3) + local sj = math.random(1,3) + local outi = math.random(4,5) + local outj = math.random(4,5) + local padW = math.min(math.random(0,1),math.floor(ki/2)) + local padH = math.min(math.random(0,1),math.floor(kj/2)) + local dilationW = math.random(1,5) + local dilationH = math.random(1,5) + local ini = (outi-1)*si+(dilationW*(ki-1)+1)-2*padW + local inj = (outj-1)*sj+(dilationH*(kj-1)+1)-2*padH + + local ceil_string = ceil_mode and 'ceil' or 'floor' + local module = nn.SpatialDilatedMaxPooling(ki,kj,si,sj,padW,padH,dilationW, dilationH) + if ceil_mode then module:ceil() else module:floor() end + local input = torch.rand(from,inj,ini) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') + + -- batch + local nbatch = math.random(2,5) + input = torch.rand(nbatch,from,inj,ini) + module = nn.SpatialDilatedMaxPooling(ki,kj,si,sj,padW,padH,dilationW,dilationH) + if ceil_mode then module:ceil() else module:floor() end + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ') + end +end + +function nntest.SpatialFractionalMaxPooling() + local batch = math.random(1, 3) + local plane = math.random(1, 3) + local outW = math.random(1, 7) + local outH = math.random(1, 7) + local poolSizeW = math.random(2, 4) + local poolSizeH = math.random(2, 4) + + local minInW = outW + poolSizeW + local minInH = outH + poolSizeH + + local inW = math.random(minInW, minInW + 6) + local inH = math.random(minInH, minInH + 6) + + -- fix the pooling regions so they aren't regenerated with every + -- forward(), so testJacobian can work properly + local module = + nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH, outW, outH) + :fixPoolingRegions() + local input = nil + if batch == 1 then + input = torch.Tensor(plane, inH, inW):zero() + else + input = torch.Tensor(batch, plane, inH, inW):zero() + end + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state') +end + +function nntest.SpatialFractionalMaxPooling_Ratio() + -- Fix a reduction ratio, and test with two different input sizes + local reductionRatioW = torch.uniform(0.4, 0.74) + local reductionRatioH = torch.uniform(0.4, 0.74) + + for tries = 1, 2 do + local batch = math.random(1, 3) + local plane = math.random(1, 3) + local poolSizeW = math.random(2, 3) + local poolSizeH = math.random(2, 3) + + local minInW = math.random(5, 8) + poolSizeW + local minInH = math.random(5, 8) + poolSizeH + + local inW = math.random(minInW, minInW + 6) + local inH = math.random(minInH, minInH + 6) + + -- fix the pooling regions so they aren't regenerated with every + -- forward(), so testJacobian can work properly + local module = + nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH, + reductionRatioW, reductionRatioH) + :fixPoolingRegions() + local input = nil + if batch == 1 then + input = torch.Tensor(plane, inH, inW):zero() + else + input = torch.Tensor(batch, plane, inH, inW):zero() + end + + -- Make sure that the output size is based on our ratio + local output = module:updateOutput(input) + if batch == 1 then + mytester:asserteq(output:size(3), math.floor(reductionRatioW * inW)) + mytester:asserteq(output:size(2), math.floor(reductionRatioH * inH)) + else + mytester:asserteq(output:size(4), math.floor(reductionRatioW * inW)) + mytester:asserteq(output:size(3), math.floor(reductionRatioH * inH)) + end + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state') + end +end + +function nntest.SpatialAveragePooling() + for _,count_include_pad in pairs({true,false}) do + for _,ceil_mode in pairs({true,false}) do + local from = math.random(1,5) + local ki = math.random(1,4) + local kj = math.random(1,4) + local si = math.random(1,3) + local sj = math.random(1,3) + local outi = math.random(4,5) + local outj = math.random(4,5) + local padW = math.min(math.random(0,1),math.floor(ki/2)) + local padH = math.min(math.random(0,1),math.floor(kj/2)) + local ini = (outi-1)*si+ki-2*padW + local inj = (outj-1)*sj+kj-2*padH + + local mode_string = ceil_mode and 'ceil' or 'floor' + + local module = nn.SpatialAveragePooling(ki, kj, si, sj, padW, padH) + if ceil_mode then module:ceil() else module:floor() end + if count_include_pad then + module:setCountIncludePad() + mode_string = mode_string .. ' - count include padding' + else + module:setCountExcludePad() + mode_string = mode_string .. ' - count exclude padding' + end + local input = torch.Tensor(from, inj, ini):uniform() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error'..mode_string..' on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + local batch = math.random(2,5) + outi = math.random(4,5) + outj = math.random(4,5) + local padW = math.min(math.random(0,1),math.floor(ki/2)) + local padH = math.min(math.random(0,1),math.floor(kj/2)) + local ini = (outi-1)*si+ki-2*padW + local inj = (outj-1)*sj+kj-2*padH + + module = nn.SpatialAveragePooling(ki, kj, si, sj, padW, padH) + if ceil_mode then module:ceil() else module:floor() end + if count_include_pad then + module:setCountIncludePad() + else + module:setCountExcludePad() + end + input = torch.Tensor(batch,from,inj,ini):uniform() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error'..mode_string..' on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision) + + end + end + -- test against SpatialSubSampling + local from = math.random(1,6) + local ki = math.random(1,5) + local kj = math.random(1,5) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(6,10) + local outj = math.random(6,10) + local padW = 0 + local padH = 0 + local ini = (outi-1)*si+ki-2*padW + local inj = (outj-1)*sj+kj-2*padH + + local module = nn.SpatialAveragePooling(ki, kj, si, sj, padW, padH) + local sap = nn.SpatialSubSampling(from, ki, kj, si, sj) + sap.weight:fill(1.0/(ki*kj)) + sap.bias:fill(0.0) + + local input = torch.Tensor(from, inj, ini):uniform() + + local output = module:forward(input) + local gradInput = module:backward(input, output) + local output2 = sap:forward(input) + local gradInput2 = sap:updateGradInput(input, output) + + mytester:assertTensorEq(output, output2, 0.000001, torch.typename(module) .. ' forward err ') + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, torch.typename(module) .. ' backward err ') + + -- test against SpatialSubSampling, batch mode + local batch = math.random(2,5) + outi = math.random(4,8) + outj = math.random(4,8) + local padW = 0 + local padH = 0 + local ini = (outi-1)*si+ki-2*padW + local inj = (outj-1)*sj+kj-2*padH + + module = nn.SpatialAveragePooling(ki, kj, si, sj, padW, padH) + input = torch.Tensor(batch,from,inj,ini):uniform() + + local sap = nn.SpatialSubSampling(from, ki, kj, si, sj) + sap.weight:fill(1.0/(ki*kj)) + sap.bias:fill(0.0) + + local output = module:forward(input) + local gradInput = module:backward(input, output) + local output2 = sap:forward(input) + local gradInput2 = sap:updateGradInput(input, output) + + mytester:assertTensorEq(output, output2, 0.000001, torch.typename(module) .. ' forward err (Batch) ') + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, torch.typename(module) .. ' backward err (Batch) ') + +end + +function nntest.SpatialAdaptiveMaxPooling() + local from = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local ini = math.random(1,16) + local inj = math.random(1,16) + + local module = nn.SpatialAdaptiveMaxPooling(ki,kj) + local input = torch.rand(from,ini,inj) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + local nbatch = math.random(1,3) + input = torch.rand(nbatch,from,ini,inj) + module = nn.SpatialAdaptiveMaxPooling(ki,kj) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state (Batch) ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision) + + -- non-contiguous + + input = torch.rand(from,ini,inj):transpose(2,3) + module = nn.SpatialAdaptiveMaxPooling(ki,kj) + local inputc = input:contiguous() -- contiguous + local output = module:forward(input):clone() + local outputc = module:forward(inputc):clone() + mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ') + local gradInput = module:backward(input, output):clone() + local gradInputc = module:backward(inputc, outputc):clone() + mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ') + + -- non-contiguous batch + local nbatch = math.random(1,3) + input = torch.rand(nbatch,from,ini,inj):transpose(1,3):transpose(2,4) + local inputc = input:contiguous() -- contiguous + module = nn.SpatialAdaptiveMaxPooling(ki,kj) + + local output = module:forward(input):clone() + local outputc = module:forward(inputc):clone() + mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ') + local gradInput = module:backward(input, output):clone() + local gradInputc = module:backward(inputc, outputc):clone() + mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ') + +end + +function nntest.SpatialAdaptiveAveragePooling() + local from = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local ini = math.random(1,16) + local inj = math.random(1,16) + + local module = nn.SpatialAdaptiveAveragePooling(ki,kj) + local input = torch.rand(from,ini,inj) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + local nbatch = math.random(1,3) + input = torch.rand(nbatch,from,ini,inj) + module = nn.SpatialAdaptiveAveragePooling(ki,kj) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state (Batch) ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision) + + -- non-contiguous + + input = torch.rand(from,ini,inj):transpose(2,3) + module = nn.SpatialAdaptiveAveragePooling(ki,kj) + local inputc = input:contiguous() -- contiguous + local output = module:forward(input):clone() + local outputc = module:forward(inputc):clone() + mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ') + local gradInput = module:backward(input, output):clone() + local gradInputc = module:backward(inputc, outputc):clone() + mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ') + + -- non-contiguous batch + local nbatch = math.random(1,3) + input = torch.rand(nbatch,from,ini,inj):transpose(1,3):transpose(2,4) + local inputc = input:contiguous() -- contiguous + module = nn.SpatialAdaptiveAveragePooling(ki,kj) + + local output = module:forward(input):clone() + local outputc = module:forward(inputc):clone() + mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ') + local gradInput = module:backward(input, output):clone() + local gradInputc = module:backward(inputc, outputc):clone() + mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ') + +end + +function nntest.SpatialLPPooling() + local fanin = math.random(1,4) + local osizex = math.random(1,4) + local osizey = math.random(1,4) + local p = 2 + local mx = math.random(2,6) + local my = math.random(2,6) + local dx = math.random(2,mx) + local dy = math.random(2,my) + local sizex = osizex*mx + local sizey = osizey*my + local module = nn.SpatialLPPooling(fanin,p,mx,my,dx,dy) + local input = torch.rand(fanin,sizey,sizex) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Sum() + -- 1D + local ini = math.random(3,7) + local input = torch.Tensor(ini):zero() + local module = nn.Sum(1) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + -- negative dimension + local module = nn.Sum(-1) + local input = torch.Tensor({1, 2, 3}) + local expected = torch.Tensor({6}) + local output = module:forward(input) + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + + -- batch + local dimension = 1 + local module = nn.Sum(dimension, 1) + local input = torch.Tensor({{1, 2, 3},{4, 5, 6}}) + local expected = torch.Tensor({6, 15}) + local output = module:forward(input) + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err,precision, 'error on state ') + + -- mean + batch + local dimension = 1 + local module = nn.Sum(dimension, 1, true) + local input = torch.Tensor({{1, 2, 3},{4, 5, 6}}) + local expected = input:mean(dimension + 1) + local output = module:forward(input) + + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err,precision, 'error on state ') + + -- squeeze + local dimension = 1 + local module = nn.Sum(dimension, nil, nil, false) + local input = torch.Tensor({{1, 2, 3},{4, 5, 6}}) + local expected = torch.Tensor({5, 7, 9}):view(1, 3) + local output = module:forward(input) + + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + mytester:assert(output:isSameSizeAs(expected), 'sizes mismatch') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err,precision, 'error on state ') + + -- squeeze + batch + local dimension = 1 + local module = nn.Sum(dimension, 1, nil, false) + local input = torch.Tensor({{1, 2, 3},{4, 5, 6}}) + local expected = torch.Tensor({6, 15}):view(2, 1) + local output = module:forward(input) + + mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ') + mytester:assert(output:isSameSizeAs(expected), 'sizes mismatch') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err,precision, 'error on state ') + + -- 3D + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Sum(torch.random(1,3)) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Tanh() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Tanh() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision , 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.TemporalConvolution() + -- 1D + local from = math.random(1,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local si = math.random(1,4) + local outi = math.random(5,7) + local ini = (outi-1)*si+ki + local module = nn.TemporalConvolution(from, to, ki,si) + local input = torch.Tensor(ini, from):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update]') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update]') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + -- 2D + local nBatchFrame = 4 + local input = torch.Tensor(nBatchFrame, ini, from):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update]') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update]') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- 2D matches 1D + local output = module:forward(input):clone() + local outputGrad = torch.randn(output:size()) + local inputGrad = module:backward(input, outputGrad):clone() + + local input1D = input:select(1, 2) + local output1D = module:forward(input1D) + local outputGrad1D = outputGrad:select(1, 2) + local inputGrad1D = module:backward(input1D, outputGrad1D) + + mytester:assertTensorEq(output:select(1,2), output1D, 0.000001, 'error on 2D vs 1D forward)') + mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001, 'error on 2D vs 1D backward)') +end + +function nntest.TemporalDynamicKMaxPooling() + local features = math.random(5,10) + local seqLen = math.random(6,9) + local minK = math.random(3,6) + local factor = math.random(1,100)*0.01 + local nBatchFrame = math.random(2,4) + local module = nn.TemporalDynamicKMaxPooling(minK, factor) + + -- 1D + local input = torch.Tensor(seqLen, features) + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') + + -- 2D + local input = torch.Tensor(nBatchFrame, seqLen, features) + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') + + -- 2D matches 1D + local output = module:forward(input):clone() + local outputGrad = torch.randn(output:size()) + local inputGrad = module:backward(input, outputGrad):clone() + + local input1D = input:select(1, 2) + local output1D = module:forward(input1D) + local outputGrad1D = outputGrad:select(1, 2) + local inputGrad1D = module:backward(input1D, outputGrad1D) + + mytester:assertTensorEq(output:select(1,2), output1D, 0.000001, 'error on 2D vs 1D forward)') + mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001, 'error on 2D vs 1D backward)') + + +end + +function nntest.TemporalSubSampling() + local from = math.random(1,5) + local ki = math.random(1,6) + local si = math.random(1,4) + local outi = math.random(6,9) + local ini = (outi-1)*si+ki + local module = nn.TemporalSubSampling(from, ki, si) + local input = torch.Tensor(ini, from):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) +end + + +function nntest.TemporalRowConvolution() + if true then return end -- until this unit test is fixed... + local from = math.random(1,5) + local ki = math.random(1,5) + local si = math.random(1,2) + local outi = math.random(5,7) + local ini = (outi-1)*si+ki + + local function jacTest(module) + + local input + if module.featFirst then + input = torch.Tensor(from, ini):zero() + else + input = torch.Tensor(ini, from):zero() + end + + -- 1D + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, "error on state" ) + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err, precision, "error on weight ") + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err, precision, "error on bias ") + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err, precision, "error on weight [direct update] ") + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err, precision, "error on bias [direct update] ") + end + + for t, err in pairs(jac.testAllUpdate(module, input, "weight", "gradWeight")) do + mytester:assertlt(err, precision, string.format( + "error on weight [%s] ", t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + "error on bias [%s] ", t)) + end + end + + -- 2D + local nBatchFrame = 4 + if module.featFirst then + input = torch.Tensor(nBatchFrame, from, ini):zero() + else + input = torch.Tensor(nBatchFrame, ini, from):zero() + end + + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, "error on state" ) + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err, precision, "error on weight ") + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err, precision, "error on bias ") + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err, precision, "error on weight [direct update] ") + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err, precision, "error on bias [direct update] ") + end + + for t, err in pairs(jac.testAllUpdate(module, input, "weight", "gradWeight")) do + mytester:assertlt(err, precision, string.format( + "error on weight [%s] ", t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + "error on bias [%s] ", t)) + end + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. " - i/o forward err ", precision) + mytester:eq(0, berr, torch.typename(module) .. " - i/o forward err ", precision) + + -- 2D matches 1D + local output = module:forward(input):clone() + local outputGrad = torch.randn(output:size()) + local inputGrad = module:backward(input, outputGrad):clone() + + local input1D = input:select(1, 2) + local output1D = module:forward(input1D) + local outputGrad1D = outputGrad:select(1, 2) + local inputGrad1D = module:backward(input1D, outputGrad1D) + + mytester:assertTensorEq(output:select(1,2), output1D, 0.000001, + "error on 2D vs 1D forward") + mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001, + "error on 2D vs 1D backward") + end + + local module = nn.TemporalRowConvolution(from, ki, si) + jacTest(module) + module:noBias() + jacTest(module) + module.bias = torch.Tensor(module.inputFrameSize):zero() + module.gradBias = torch.Tensor(module.inputFrameSize):zero() + module:reset() + module.featFirst = true + jacTest(module) + module:noBias() + jacTest(module, true) +end + +function nntest.TemporalMaxPooling() + local from = math.random(2,4) + local ki = math.random(5,7) + local si = math.random(1,2) + local outi = math.random(30,40) + local ini = (outi-1)*si+ki + local module = nn.TemporalMaxPooling(ki, si) + local input = torch.Tensor(ini, from):zero() + + -- 1D + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- 2D + local nBatchFrame = 2 + local input = torch.Tensor(nBatchFrame, ini, from):zero() + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- 2D matches 1D + local output = module:forward(input):clone() + local outputGrad = torch.randn(output:size()) + local inputGrad = module:backward(input, outputGrad):clone() + + local input1D = input:select(1, 2) + local output1D = module:forward(input1D) + local outputGrad1D = outputGrad:select(1, 2) + local inputGrad1D = module:backward(input1D, outputGrad1D) + + mytester:assertTensorEq(output:select(1,2), output1D, 0.000001, 'error on 2D vs 1D forward)') + mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001, 'error on 2D vs 1D backward)') +end + +function nntest.VolumetricFullConvolution_simple_test() + local module = nn.VolumetricFullConvolution(3, 1, 3, 3, 3, 3, 3, 3); + module.weight:fill(1); + module.bias:fill(0.1); + + local input = torch.Tensor(1, 3, 2, 2, 2):zero(); + for c = 1,3 do + input[1][c][1][1][1] = 1 + end + local output = module:forward(input) + for t = 1,6 do + for h = 1,6 do + for w = 1,6 do + if t <= 3 and h <= 3 and w <= 3 then + mytester:assertlt(output[1][1][t][h][w] - 3.1, precision, 'error on forward ') + else + mytester:assertlt(output[1][1][t][h][w] - 0.1, precision, 'error on forward ') + end + end + end + end + + module:zeroGradParameters() + local gradOut = torch.Tensor(1, 1, 6, 6, 6):fill(0.1); + local gradIn = module:backward(input, gradOut) + for t = 1,2 do + for h = 1,2 do + for w = 1,2 do + mytester:assertlt(gradIn[1][1][t][h][w] - 2.7, precision, + 'error on backward input gradients ') + end + end + end + + mytester:assertlt(module.gradBias[1] - 21.6, precision, + 'error on backward gradBias ') + for c = 1,3 do + for t = 1,3 do + for h = 1,3 do + for w = 1,3 do + mytester:assertlt(module.gradWeight[c][1][t][h][w] - 0.1, precision, + 'error on backward weight gradients ') + end + end + end + end +end + +function nntest.VolumetricFullConvolution() + local from = math.random(2,3) + local to = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = ki + local st = math.random(1,3) + local si = math.random(1,3) + local sj = si + local int = math.random(3,4) + local ini = math.random(3,4) + local inj = math.random(3,4) + local bs = math.random(1, 6) + local module = nn.VolumetricFullConvolution(from, to, kt, ki, kj, st, si, sj) + + local input = torch.Tensor(bs, from, int, ini, inj):zero() + + local function jacTests(module) + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + end + + jacTests(module) + module:noBias() + jacTests(module) + module.bias = torch.Tensor(module.nOutputPlane):zero() + module.gradBias = torch.Tensor(module.nOutputPlane):zero() + module:reset() + jacTests(module) +end + +function nntest.VolumetricFullConvolutionDualInput() + local from = math.random(2,3) + local to = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = math.random(3,4) + local dt = math.random(1,3) + local di = math.random(1,3) + local dj = math.random(1,3) + local padT = math.random(0,2) + local padW = math.random(0,2) + local padH = math.random(0,2) + local outt = math.random(5,9) + local outi = math.random(5,9) + local outj = math.random(5,9) + local int = math.floor((outt + padT*2 - kt)/dt + 1) + local ini = math.floor((outi + padW*2 - ki)/di + 1) + local inj = math.floor((outj + padH*2 - kj)/dj + 1) + local adjT = (outt + 2 * padT - kt) % dt + local adjW = (outi + 2 * padW - ki) % di + local adjH = (outj + 2 * padH - kj) % dj + local targetTensor = torch.Tensor(outt, outj, outi):zero() + local input = torch.Tensor(from, int, inj, ini):zero() + + local module = nn.VolumetricFullConvolution(from, to, kt, ki, kj, dt, di, dj, padT, padW, padH) + local moduleRef = nn.VolumetricFullConvolution(from, to, kt, ki, kj, dt, di, dj, padT, padW, padH, adjT, adjW, adjH) + moduleRef.weight:copy(module.weight) + moduleRef.bias:copy(module.bias) + + -- Check that the required output size matches the actual output size + -- when using the dual input mode + local output = module:forward({input, targetTensor}) + mytester:asserteq(output:size(2), outt, 'output depth error') + mytester:asserteq(output:size(3), outj, 'output height error') + mytester:asserteq(output:size(4), outi, 'output width error') + + -- Check that backward and forward match the reference module + local outputRef = moduleRef:forward(input) + mytester:asserteq(0, (output-outputRef):abs():max(), torch.typename(module) .. ' - output err ') + local gradOutput = outputRef:clone():uniform() + local gradInputRef = moduleRef:backward(input, gradOutput) + local gradInput = module:backward({input, targetTensor}, gradOutput) + mytester:asserteq(0, (gradInput[1]-gradInputRef):abs():max(), torch.typename(module) .. ' - gradInput[1] err ') + + -- Check that gradInput[2] is the singleton tensor {0} + mytester:asserteq(gradInput[2]:storage():size(), 1, torch.typename(module) .. ' - gradInput[2] size err ') + mytester:asserteq(gradInput[2]:storage()[1], 0, torch.typename(module) .. ' - gradInput[2] value err ') +end + +function nntest.VolumetricConvolution() + local from = math.random(2,4) + local to = math.random(1,4) + local kt = math.random(1,4) + local ki = math.random(1,4) + local kj = math.random(1,4) + local st = math.random(1,3) + local si = math.random(1,3) + local sj = math.random(1,3) + local padT = math.random(0,2) + local padW = math.random(0,2) + local padH = math.random(0,2) + local outt = math.random(5,7) + local outi = math.random(5,7) + local outj = math.random(5,7) + local int = (outt-1)*st+kt-padT*2 + local ini = (outi-1)*si+ki-padW*2 + local inj = (outj-1)*sj+kj-padH*2 + local module = nn.VolumetricConvolution(from, to, kt, ki, kj, st, si, sj, padT, padW, padH) + local input = torch.Tensor(from, int, inj, ini):zero() + + local function jacTests(module) + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + if module.bias then + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + end + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + if module.bias then + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + if module.bias then + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + end + + jacTests(module) + module:noBias() + jacTests(module) + module.bias = torch.Tensor(module.nOutputPlane):zero() + module.gradBias = torch.Tensor(module.nOutputPlane):zero() + module:reset() + jacTests(module) +end + +function nntest.VolumetricDilatedConvolution() + local from = math.random(1,5) + local to = math.random(1,5) + local ki = math.random(1,5) + local kj = math.random(1,5) + local kk = math.random(1,5) + local di = math.random(1,4) + local dj = math.random(1,4) + local dk = math.random(1,4) + local padW = 0 -- math.random(0,2) + local padH = 0 -- math.random(0,2) + local padT = 0 -- math.random(0,2) + local outi = math.random(2,3) + local outj = math.random(2,5) + local outk = math.random(2,5) + local dilationW = math.random(1,3) + local dilationH = math.random(1,3) + local dilationT = math.random(1,3) + local ini = (outi - 1) * di - 2 * padW + dilationW * (ki-1) + 1 + local inj = (outj - 1) * dj - 2 * padH + dilationH * (kj-1) + 1 + local ink = (outk - 1) * dk - 2 * padT + dilationT * (kk-1) + 1 + + local module = nn.VolumetricDilatedConvolution(from, to, kk, ki, kj, dk, di, dj, padT, padW, padH, dilationT, dilationW, dilationH) + local input = torch.Tensor(from, ink, inj, ini):zero() + + -- stochastic + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + -- batch + + --verbose = true + local batch = math.random(2,5) + + module = nn.VolumetricDilatedConvolution(from, to, kk, ki, kj, dk, di, dj, padT, padW, padH, dilationT, dilationW, dilationH) + input = torch.Tensor(batch,from,ink,inj,ini):zero() + + -- Check that the required output size matches the actual output size + local output = module:forward(input) + mytester:asserteq(output:size(3), outk, 'output width error') + mytester:asserteq(output:size(4), outj, 'output height error') + mytester:asserteq(output:size(5), outi, 'output width error') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- non-contiguous + local input = torch.randn(batch,from,ink,ini,inj):transpose(4,5) -- non-contiguous + local inputc = input:contiguous() -- contiguous + local output = module:forward(input) + local outputc = module:forward(inputc) + mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ') + local gradInput = module:backward(input, output) + local gradInputc = module:backward(inputc, outputc) + mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ') +end + +function nntest.VolumetricConvolutionBatchCompare() + local from = math.random(2,3) + local to = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = math.random(3,4) + local st = math.random(2,3) + local si = math.random(2,3) + local sj = math.random(2,3) + local padT = math.random(0,2) + local padW = math.random(0,2) + local padH = math.random(0,2) + local outt = math.random(3,4) + local outi = math.random(3,4) + local outj = math.random(3,4) + local int = (outt-1)*st+kt-padT*2 + local ini = (outi-1)*si+ki-padW*2 + local inj = (outj-1)*sj+kj-padH*2 + local module = nn.VolumetricConvolution(from, to, kt, ki, kj, st, si, sj, padT, padW, padH) + module:zeroGradParameters() + local input = torch.randn(from, int, inj, ini) + batchcompare(module,input, {'weight','bias','gradWeight','gradBias'}) +end + +function nntest.VolumetricAveragePooling() + local from = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = math.random(3,4) + local st = math.random(2,3) + local si = math.random(2,3) + local sj = math.random(2,3) + local outt = math.random(3,4) + local outi = math.random(3,4) + local outj = math.random(3,4) + local int = (outt-1)*st+kt + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj) + local input = torch.Tensor(from, int, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + local nbatch = math.random(2,3) + module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj) + input = torch.Tensor(nbatch, from, int, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state (Batch) ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision) +end + +function nntest.VolumetricMaxPooling() + local from = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = math.random(3,4) + local st = math.random(2,3) + local si = math.random(2,3) + local sj = math.random(2,3) + local outt = math.random(3,4) + local outi = math.random(3,4) + local outj = math.random(3,4) + local padT = math.min(math.random(0,2),math.floor(kt/2)) + local padW = math.min(math.random(0,2),math.floor(ki/2)) + local padH = math.min(math.random(0,2),math.floor(kj/2)) + local int = (outt-1)*st+kt-2*padT + local ini = (outi-1)*si+ki-2*padW + local inj = (outj-1)*sj+kj-2*padH + local module = nn.VolumetricMaxPooling(kt, ki, kj, st, si, sj, padT, padW, padH) + local input = torch.Tensor(from, int, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + local nbatch = math.random(2,3) + module = nn.VolumetricMaxPooling(kt, ki, kj, st, si, sj, padT, padW, padH) + input = torch.Tensor(nbatch, from, int, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state (Batch) ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision) +end + +function nntest.VolumetricDilatedMaxPooling() + for _,ceil_mode in pairs({true,false}) do + local from = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = math.random(3,4) + local st = math.random(2,3) + local si = math.random(2,3) + local sj = math.random(2,3) + local outt = math.random(3,4) + local outi = math.random(3,4) + local outj = math.random(3,4) + local padT = math.min(math.random(0,1),math.floor(kt/2)) + local padW = math.min(math.random(0,1),math.floor(ki/2)) + local padH = math.min(math.random(0,1),math.floor(kj/2)) + local dilationT = math.random(1,3) + local dilationW = math.random(1,3) + local dilationH = math.random(1,3) + local int = (outt-1)*st+(dilationT*(kt-1)+1)-2*padT + local ini = (outi-1)*si+(dilationW*(ki-1)+1)-2*padW + local inj = (outj-1)*sj+(dilationH*(kj-1)+1)-2*padH + + local ceil_string = ceil_mode and 'ceil' or 'floor' + local module = nn.VolumetricDilatedMaxPooling(kt,ki,kj,st,si,sj,padT,padW,padH,dilationT,dilationW,dilationH) + if ceil_mode then module:ceil() else module:floor() end + local input = torch.rand(from,int,inj,ini) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') + + -- batch + local nbatch = math.random(2,5) + input = torch.rand(nbatch,from,int,inj,ini) + module = nn.VolumetricDilatedMaxPooling(kt,ki,kj,st,si,sj,padT,padW,padH,dilationT,dilationW,dilationH) + if ceil_mode then module:ceil() else module:floor() end + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ') + end +end + +function nntest.VolumetricFractionalMaxPooling() + local batch = math.random(1, 3) + local plane = math.random(1, 3) + local outT = math.random(1, 7) + local outW = math.random(1, 7) + local outH = math.random(1, 7) + local poolSizeT = math.random(2, 4) + local poolSizeW = math.random(2, 4) + local poolSizeH = math.random(2, 4) + + local minInT = outT + poolSizeT + local minInW = outW + poolSizeW + local minInH = outH + poolSizeH + + local inT = math.random(minInT, minInT + 6) + local inW = math.random(minInW, minInW + 6) + local inH = math.random(minInH, minInH + 6) + + -- fix the pooling regions so they aren't regenerated with every + -- forward(), so testJacobian can work properly + local module = + nn.VolumetricFractionalMaxPooling(poolSizeT, poolSizeW, poolSizeH, outT, outW, outH) + :fixPoolingRegions() + local input = nil + if batch == 1 then + input = torch.Tensor(plane, inH, inW, inT):zero() + else + input = torch.Tensor(batch, plane, inH, inW, inT):zero() + end + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state') +end + +function nntest.VolumetricFractionalMaxPooling_Ratio() + -- Fix a reduction ratio, and test with two different input sizes + local reductionRatioT = torch.uniform(0.4, 0.74) + local reductionRatioW = torch.uniform(0.4, 0.74) + local reductionRatioH = torch.uniform(0.4, 0.74) + + for tries = 1, 2 do + local batch = math.random(1, 3) + local plane = math.random(1, 3) + local poolSizeT = math.random(2, 3) + local poolSizeW = math.random(2, 3) + local poolSizeH = math.random(2, 3) + + local minInT = math.random(5, 8) + poolSizeT + local minInW = math.random(5, 8) + poolSizeW + local minInH = math.random(5, 8) + poolSizeH + + local inT = math.random(minInT, minInT + 6) + local inW = math.random(minInW, minInW + 6) + local inH = math.random(minInH, minInH + 6) + + -- fix the pooling regions so they aren't regenerated with every + -- forward(), so testJacobian can work properly + local module = + nn.VolumetricFractionalMaxPooling(poolSizeT, poolSizeW, poolSizeH, + reductionRatioT, reductionRatioW, + reductionRatioH) + :fixPoolingRegions() + local input = nil + if batch == 1 then + input = torch.Tensor(plane, inH, inW, inT):zero() + else + input = torch.Tensor(batch, plane, inH, inW, inT):zero() + end + + -- Make sure that the output size is based on our ratio + local output = module:updateOutput(input) + if batch == 1 then + mytester:asserteq(output:size(4), math.floor(reductionRatioT * inT)) + mytester:asserteq(output:size(3), math.floor(reductionRatioW * inW)) + mytester:asserteq(output:size(2), math.floor(reductionRatioH * inH)) + else + mytester:asserteq(output:size(5), math.floor(reductionRatioT * inT)) + mytester:asserteq(output:size(4), math.floor(reductionRatioW * inW)) + mytester:asserteq(output:size(3), math.floor(reductionRatioH * inH)) + end + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state') + end +end + +function nntest.VolumetricMaxUnpooling() + local from = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = math.random(3,4) + local st, si, sj = kt, ki, kj + local outt = math.random(3,4) + local outi = math.random(3,4) + local outj = math.random(3,4) + local padT = math.min(math.random(0,2),math.floor(kt/2)) + local padW = math.min(math.random(0,2),math.floor(ki/2)) + local padH = math.min(math.random(0,2),math.floor(kj/2)) + local int = (outt-1)*st+kt-2*padT + local ini = (outi-1)*si+ki-2*padW + local inj = (outj-1)*sj+kj-2*padH + + local poolingModule = nn.VolumetricMaxPooling(kt, ki, kj, st, si, sj, padT, padW, padH) + local module = nn.VolumetricMaxUnpooling(poolingModule) + + local original = torch.rand(from,int,inj,ini) + local input = poolingModule:forward(original) + local output = module:forward(input) + mytester:assert(output:isSameSizeAs(original),'VolumetricMaxUnpooling output size err') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error ') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + local nbatch = math.random(2,3) + original = torch.rand(nbatch,from,int,inj,ini) + input = poolingModule:forward(original) + output = module:forward(input) + + mytester:assert(output:isSameSizeAs(original),'VolumetricMaxUnpooling batch output size err') + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on Batch') + + local ferr, berr = jac.testIO(module, input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision) +end + +function nntest.VolumetricMaxPooling_boundary() + -- simple kernel 2x2x2 with striding 2x2x2 + local module = nn.VolumetricMaxPooling(2, 2, 2, 2, 2, 2):ceil() + local nip = math.random(3,256) + local input = torch.rand(nip, 2, 7, 7) + + -- do a forward pass + local output = module:forward(input) + + -- checking output size + mytester:asserteq(output:size(1), nip, 'wrong output channels') + mytester:asserteq(output:size(2), 1, 'wrong output temporal length') + mytester:asserteq(output:size(3), 4, 'wrong output height') + mytester:asserteq(output:size(4), 4, 'wrong output width') + + -- checking output signals at top right + for c = 1,nip do + local max_val = input[c][1][1][7] + for t = 1,2 do + for h = 1,2 do + max_val = math.max(max_val, input[c][t][h][7]) + end + end + mytester:asserteq(output[c][1][1][4], max_val, 'wrong forward execution') + end + -- checking output signals at bottom left + for c = 1,nip do + local max_val = input[c][1][7][1] + for t = 1,2 do + for w = 1,2 do + max_val = math.max(max_val, input[c][t][7][w]) + end + end + mytester:asserteq(output[c][1][4][1], max_val, 'wrong forward execution') + end + + -- check output signals at right bottom + for c = 1,nip do + local max_val = math.max(input[c][1][7][7], input[c][2][7][7]) + mytester:asserteq(output[c][1][4][4], max_val, 'wrong forward execution') + end + + + -- backward is supposed to be tested in nntest.VolumetricMaxPooling + -- This is only test the boundary cases +end + +function nntest.Module_getParameters_1() + local n = nn.Sequential() + n:add( nn.Linear(10,10) ) + local p = n:getParameters() + + mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'getParameters(): weights wrong') + mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'getParameters(): bias wrong') +end + +function nntest.Module_getParameters_2() + local n = nn.Sequential() + n:add( nn.Linear(10,10) ) + local _ = n:getParameters() + + n:add( nn.Linear(10,10) ) + local p = n:getParameters() + + mytester:asserteq((p[{ {111,210} }] - n.modules[2].weight):norm(), 0, 'error when appending new module') + mytester:asserteq((p[{ {211,220} }] - n.modules[2].bias):norm(), 0, 'error when appending new module') +end + +function nntest.Module_getParameters_3() + local n = nn.Sequential() + n:add( nn.Linear(10,10) ) + n:add( n.modules[1]:clone() ) + local p = n:getParameters() + + mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'error when using cloning') + mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'error when using cloning') + + mytester:asserteq((p[{ {111,210} }] - n.modules[2].weight):norm(), 0, 'error when using cloning') + mytester:asserteq((p[{ {211,220} }] - n.modules[2].bias):norm(), 0, 'error when using cloning') + + mytester:asserteq((p[{ {111,210} }] - n.modules[1].weight):norm(), 0, 'error when using cloning') + mytester:asserteq((p[{ {211,220} }] - n.modules[1].bias):norm(), 0, 'error when using cloning') + + n:reset() + + mytester:assertgt((p[{ {111,210} }] - n.modules[1].weight):norm(), 0, 'error when using cloning') + mytester:assertgt((p[{ {211,220} }] - n.modules[1].bias):norm(), 0, 'error when using cloning') +end + +function nntest.Module_getParameters_4() + local n = nn.Sequential() + n:add( nn.Linear(10,10) ) + n:add( n.modules[1]:clone() ) + local _ = n:getParameters() + + n:add(nn.Linear(10,10)) + local p = n:getParameters() + + mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'error when using cloning') + mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'error when using cloning') + + mytester:asserteq((p[{ {111,210} }] - n.modules[2].weight):norm(), 0, 'error when using cloning') + mytester:asserteq((p[{ {211,220} }] - n.modules[2].bias):norm(), 0, 'error when using cloning') + + mytester:asserteq((p[{ {221,320} }] - n.modules[3].weight):norm(), 0, 'error when using cloning') + mytester:asserteq((p[{ {321,330} }] - n.modules[3].bias):norm(), 0, 'error when using cloning') + + mytester:asserteq(p:nElement(), 3*(10*10+10), 'error: incorrect number of elements in flat vector') +end + +function nntest.Module_getParameters_5() + local n = nn.Sequential() + n:add( nn.Linear(10,10) ) + n:add( n.modules[1]:clone('weight','bias','gradWeight','gradBias') ) + local p = n:getParameters() + + mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'error when using cloning+sharing') + mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'error when using cloning+sharing') + + mytester:asserteq((p[{ {1,100} }] - n.modules[2].weight):norm(), 0, 'error when using cloning+sharing') + mytester:asserteq((p[{ {101,110} }] - n.modules[2].bias):norm(), 0, 'error when using cloning+sharing') + + n:reset() + + mytester:asserteq((p[{ {1,100} }] - n.modules[2].weight):norm(), 0, 'error when using cloning+sharing') + mytester:asserteq((p[{ {101,110} }] - n.modules[2].bias):norm(), 0, 'error when using cloning+sharing') + + mytester:asserteq(p:nElement(), (10*10+10), 'error: incorrect number of elements in flat vector') +end + +function nntest.Module_getParameters_6() + local n = nn.Sequential() + n:add( nn.Linear(10,10) ) + n:add( n.modules[1]:clone('weight','bias','gradWeight','gradBias') ) + local _ = n:getParameters() + + n:add(nn.Linear(10,10)) + local p = n:getParameters() + + mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'error when using cloning+sharing') + mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'error when using cloning+sharing') + + mytester:asserteq((p[{ {1,100} }] - n.modules[2].weight):norm(), 0, 'error when using cloning+sharing') + mytester:asserteq((p[{ {101,110} }] - n.modules[2].bias):norm(), 0, 'error when using cloning+sharing') + + mytester:asserteq((p[{ {111,210} }] - n.modules[3].weight):norm(), 0, 'error when using cloning+sharing') + mytester:asserteq((p[{ {211,220} }] - n.modules[3].bias):norm(), 0, 'error when using cloning+sharing') + + mytester:asserteq(p:nElement(), 2*(10*10+10), 'error: incorrect number of elements in flat vector') +end + +function nntest.Module_getParameters_7() + local n = nn.Sequential() + n:add( nn.Linear(10,10) ) + n:add( n.modules[1]:clone('weight','bias','gradWeight','gradBias') ) + local _ = n:getParameters() + + n:add(nn.Linear(10,10)) + local _ = n:getParameters() + + local n1 = nn.Sequential() + n1:add( nn.Linear(10,10) ) + + local n2 = nn.Sequential() + n2:add( nn.Linear(10,10) ) + + local n = nn.Sequential() + n:add( n1 ) + n:add( n2 ) + + local _ = n:getParameters() + + local nf = nn.Sequential() + nf:add( n1 ) + nf:add( nn.Linear(10,1) ) + + local p = nf:getParameters() + + mytester:asserteq((p[{ {1,100} }] - n1.modules[1].weight):norm(), 0, 'error when using cloning+partial realloc') + mytester:asserteq((p[{ {101,110} }] - n1.modules[1].bias):norm(), 0, 'error when using cloning+partial realloc') + + mytester:asserteq((p[{ {111,120} }] - nf.modules[2].weight):norm(), 0, 'error when using cloning+partial realloc') + mytester:asserteq((p[{ {121,121} }] - nf.modules[2].bias):norm(), 0, 'error when using cloning+partial realloc') + + mytester:asserteq(p:nElement(), 121, 'error: incorrect number of elements in flat vector') +end + +function nntest.Module_getParameters_8() + local function makeMLP(nin, ns) + local net = nn.Sequential() + + for k,v in ipairs(ns) do + net:add(nn.Linear(nin, v)) + nin = v + end + local _,_ = net:getParameters() + return net + end + + local mlp1 = makeMLP(10, {10,10}) + local mlp2 = makeMLP(10, {10,10}) + + local net = nn.Sequential():add(mlp1:get(1)) + :add(mlp2:get(1)) + + -- clone the second MLP to ensure that the weights before calling getParameters are preserved + mlp2 = mlp2:clone() + + local p, _ = net:getParameters() + + mytester:asserteq((p[{ {1,100} }] - net.modules[1].weight):norm(), 0, 'error when using partial realloc') + mytester:asserteq((p[{ {111,210} }] - net.modules[2].weight):norm(), 0, 'error when using partial realloc') + -- check that the weights have the same values as before get Parameters was called + mytester:asserteq((net.modules[1].weight - mlp1.modules[1].weight):norm(), 0, ' error when using partial realloc') + mytester:asserteq((net.modules[2].weight - mlp2.modules[1].weight):norm(), 0, ' error when using partial realloc') + +end + +function nntest.Module_getParameters_10() + -- tensors are non-contiguous but compact; they can be gathered + local L = nn.Linear(10,10) + L.weight = torch.Tensor(10,10):t():fill(1) + local tmp = torch.Tensor(10,10):fill(2) + L.bias = tmp:select(1,2) + local P = L:getParameters() + mytester:asserteq(L.weight:mean(), 1) + mytester:asserteq(L.bias:mean(), 2) + mytester:asserteq(L.weight:storage(), L.bias:storage()) + mytester:asserteq(P:nElement(), 110) + mytester:asserteq(P:storage():size(), 110) + mytester:assertlt(L.bias[{ {10} }]:storageOffset() - 1, L.bias:storage():size()) +end + +function nntest.Module_getParameters_11() + -- tensors are non-compact; they can't be gathered + local L = nn.Linear(10,10) + local tmp = torch.Tensor(10,10):fill(2) + L.bias = tmp:select(2,2) + local ok, err = pcall(L.getParameters, L) + mytester:assert(not ok) +end + +function nntest.Module_getParameters_12() + -- tensors are expanded (i.e. have dimension 0) + local L = nn.Linear(10,10) + L.weight = torch.Tensor(10, 1):fill(1) + torch.expand(L.weight, 10, 10) + L.gradWeight = torch.Tensor(10, 1):fill(1) + torch.expand(L.gradWeight, 10, 10) + L.bias = torch.Tensor(10):fill(2) + local P = L:getParameters() + mytester:asserteq(L.weight:mean(), 1) + mytester:asserteq(L.bias:mean(), 2) + mytester:asserteq(L.weight:storage(), L.bias:storage()) + mytester:asserteq(P:nElement(), 20) + mytester:asserteq(P:storage():size(), 20) + mytester:assertlt(L.bias[{ {10} }]:storageOffset() - 1, L.bias:storage():size()) +end + +function nntest.Module_listModules() + local batchSize = 4 + local inputSize, outputSize = 7, 6 + local linear = nn.Linear(inputSize, outputSize) + local tanh = nn.Tanh() + local reshape = nn.Reshape(outputSize/2, 2) + local mlp3 = nn.Sequential() + mlp3:add(linear) + mlp3:add(tanh) + mlp3:add(reshape) + + local mlp2 = nn.Sequential() + local view = nn.View(outputSize) + local linear2 = nn.Linear(outputSize, inputSize) + local tanh2 = nn.Tanh() + mlp2:add(mlp3) + mlp2:add(view) + mlp2:add(linear2) + mlp2:add(tanh2) + + local concat = nn.ConcatTable() + local id = nn.Identity() + concat:add(mlp2) + concat:add(id) + local mlp = nn.Sequential() + local add = nn.CAddTable() + mlp:add(concat) + mlp:add(add) + + local modules2 = {mlp, concat, mlp2, mlp3, linear, tanh, reshape, view, linear2, tanh2, id, add} + local modules = mlp:listModules() + + mytester:assert(#modules2 == #modules, 'missing modules error') + + for i,module in ipairs(modules) do + mytester:assert(torch.type(module) == torch.type(modules2[i]), 'module error') + end +end + +function nntest.PairwiseDistance() + -- Note: testJacobian doesn't support table inputs, and rather than re-write + -- it so that it does, I'll just use a split table module on the input. + -- I assume both SplitTable and Sequential do not have bugs, otherwise this + -- test will break. + for p = 1,4 do -- test a few Lp norms + -- TEST CASE 1: non-batch input, same code path but includes a resize + local ini = math.random(3,5) + local input = torch.Tensor(2, ini):zero() + local module = nn.Sequential() + module:add(nn.SplitTable(1)) + module:add(nn.PairwiseDistance(p)) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err, 1e-4, ' error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module)..' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module)..' - i/o backward err ') + + -- Also check that the forward prop result is correct. + input = torch.rand(2, ini) + err = torch.dist(input:select(1,1), input:select(1,2), p) - + module:forward(input)[1] + mytester:assertlt(err,precision, ' error on non-batch fprop ') + + -- TEST CASE 2: batch input + local inj = math.random(3,5) + input = torch.Tensor(2, inj, ini):zero() + + -- (Rebuild the module to avoid correlated tests) + module = nn.Sequential() + module:add(nn.SplitTable(1)) + module:add(nn.PairwiseDistance(p)) + + err = jac.testJacobian(module,input) + mytester:assertlt(err, 1e-4, ' error on state ') + + -- Also check that the forward prop result is correct. + -- manually calculate each distance separately + local inputa = torch.rand(inj,ini) + local inputb = torch.rand(inj,ini) + local dist_manual = torch.Tensor(inj) + for i=1, inputa:size(1) do + dist_manual[i] = torch.dist(inputa:select(1,i), inputb:select(1,i),p) + end + -- compare the distances to the module's fprop + local dist = module:forward(torch.cat(inputa,inputb,1):resize(2,inj,ini)) + err = dist - dist_manual + mytester:assertlt(err:norm(), precision, torch.typename(module) .. + ' error on batch fprop ') + end +end + +function nntest.Index() + local net = nn.Index(1) + + -- test 1D + local input = {torch.Tensor{10, 20, 30}, torch.LongTensor{1, 2, 2, 3}} + local output = net:forward(input) + equal(output, torch.Tensor{10, 20, 20, 30}, "error in 1D forward pass") + + local gradOutput = torch.Tensor{1, 1, 1, 3 } + local gradInput = net:backward(input, gradOutput) + equal(gradInput[1], torch.Tensor{1, 2, 3}, "error in 1D backward pass") + + -- test 2D + local input = {torch.Tensor{{10, 20}, {30, 40}}, torch.LongTensor{1, 1}} + local output = net:forward(input) + equal(output, torch.Tensor{{10, 20}, {10, 20}}, "error in 2D forward pass") + + local gradOutput = torch.Tensor{{1, 2}, {1, 2}} + local gradInput = net:backward(input, gradOutput) + equal(gradInput[1], torch.Tensor{{2, 4}, {0, 0}}, "error in 2D backward pass") + + -- test clearState + local m = nn.Index(1) + local tensor = torch.Tensor(10, 3) + local indices = torch.LongTensor{ 2,3,4} + + m:clearState() + m:forward({tensor, indices}) + m:backward({tensor,indices}, torch.rand(3,3)) + +end + +function nntest.Squeeze() + local input = torch.Tensor(2,1,3):zero() + local module = nn.Squeeze() + equal(module:forward(input), input:squeeze(), "error in forward pass") + local output = input:squeeze() + equal(module:backward(input, output), input, "error in backward pass") + + -- testing the dimension option: + local input = torch.Tensor(2,1,1,3):zero() + local module = nn.Squeeze(2) + equal(module:forward(input), input:squeeze(2), "error in forward pass with dimension") + local output = input:squeeze(2) + equal(module:backward(input, output), input, "error in backward pass with dimension") + + -- with batch + local input = torch.Tensor(2,1,1,3):zero() + local module = nn.Squeeze(2, 3) + equal(module:forward(input), input:squeeze(3), "error in forward pass with dimension") + local output = input:squeeze(3) + equal(module:backward(input, output), input, "error in backward pass with dimension") + + + -- ... of size one + local input = torch.Tensor(1,1,1,3):zero() + local module = nn.Squeeze(2, 3) + equal(module:forward(input), input:squeeze(3), "error in forward pass with dimension") + local output = input:squeeze(3) + equal(module:backward(input, output), input, "error in backward pass with dimension") +end + +function nntest.Unsqueeze() + local function assertInputOutputSize(inputSize, outputSize, tf) + local input = torch.Tensor(table.unpack(inputSize)):zero() + local output = torch.Tensor(table.unpack(outputSize)):zero() + local gradInput = input:clone() + local gradOutput = output:clone() + equal(tf:forward(input), output, "error in forward pass") + equal(tf:backward(input, gradOutput), gradInput, "error in backward pass") + end + + local function test_normal() + -- insert dim 1 at head + local inputSize, outputSize = {2,3,4}, {1, 2,3,4} + local pos = 1 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos)) + + -- insert dim 1 at tail + local inputSize, outputSize = {2,3,4}, {2,3,4, 1} + local pos = 4 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos)) + + -- insert dim 1 in between + local inputSize, outputSize = {2,3,4}, {2, 1, 3,4} + local pos = 2 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos)) + end + + local function test_batchmode() + -- batch mode: insert dim 1 at head + local inputSize, outputSize = {5, 2, 3, 4}, {5, 1, 2, 3, 4} + local pos = 1 + local numInputDims = 3 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos, numInputDims)) + + -- batch mode: insert dim 1 at tail + local inputSize, outputSize = {5, 2, 3, 4}, {5, 2, 3, 4, 1} + local pos = 4 + local numInputDims = 3 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos, numInputDims)) + + -- batch mode: insert dim 1 in between + local inputSize, outputSize = {5, 2, 3, 4}, {5, 2, 1, 3, 4} + local pos = 2 + local numInputDims = 3 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos, numInputDims)) + end + + local function test_sizeone() + local inputSize, outputSize = {1,1,3,1}, {1,1, 1, 3,1} + local pos = 3 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos)) + + local inputSize, outputSize = {1,1,3,2}, {1,1,3,2, 1} + local pos = 3 + local numInputDims = 2 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos, numInputDims)) + end + + local function test_sizestrange() + local inputSize, outputSize = {2}, {2,1} + local pos = 2 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos)) + + local inputSize, outputSize = {1}, {1, 1} + local pos = 1 + assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos)) + end + + test_normal() + test_batchmode() + test_sizeone() + test_sizestrange() +end + +function nntest.LookupTable() + local totalIndex = math.random(6,9) + local nIndex = math.random(3,5) + local entry_size = math.random(2,5) + + local function dotest(module, input, minval, maxval) + local output = module:forward(input) + module:backwardUpdate(input, output, 0.1) + input:zero() + + -- 1D + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight, minval, maxval) + mytester:assertlt(err,precision, '1D error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight, minval, maxval) + mytester:assertlt(err,precision, '1D error on weight [direct update] ') + + module.gradWeight:zero() + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + '1D error on weight [%s]', t)) + end + + -- 2D + local nframe = math.random(2,5) + local input = torch.IntTensor(nframe, nIndex):zero() + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight, minval, maxval) + mytester:assertlt(err,precision, '2D error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight, minval, maxval) + mytester:assertlt(err,precision, '2D error on weight [direct update] ') + + module.gradWeight:zero() + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + '2D error on weight [%s]', t)) + end + + -- IO + module.gradInput = torch.Tensor(3,4):zero() --fixes an error + local ferr,berr = jac.testIO(module,input,minval,maxval) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- accUpdate + module:accUpdateOnly() + mytester:assert(not module.gradWeight, 'gradWeight is nil') + module:float() + local output = module:forward(input) + module:backwardUpdate(input, output, 0.1) + end + -- test without padding + local input = torch.randperm(totalIndex):narrow(1,1,nIndex):int() + local module = nn.LookupTable(totalIndex, entry_size) + dotest(module, input, 1, totalIndex) + -- test with padding set to 1, but no padding in inputs + local input = torch.randperm(totalIndex):narrow(1,1,nIndex):int() + local module = nn.LookupTable(totalIndex, entry_size, 1) + dotest(module, input, 2, totalIndex) + -- test whether padding weights remain unchanged + local paddingValue = math.random(totalIndex) + local module = nn.LookupTable(totalIndex, entry_size, paddingValue) + local padw = module.weight:select(1,paddingValue):fill(1) + local padw_sum = padw:sum() + local input = torch.IntTensor(nIndex) + for i = 1, 100 do + input:apply( + function() -- set randomly half of the input as padding + if torch.random(2) == 1 then return paddingValue end + return torch.random(totalIndex) + end) + local y = module:updateOutput(input) + module:updateGradInput(input, y) + module:accUpdateGradParameters(input, y, 0.1) + end + local err = padw_sum - padw:sum() + mytester:assertlt(err,precision, 'padding update error ') + -- test whether the weights changes accordingly when maxNorm is not nil + local all_index = torch.randperm(totalIndex):int() + -- input can have duplicates + local input = torch.repeatTensor(all_index:narrow(1,1,nIndex), 2) + local maxNorm = math.random() + for _, normType in ipairs{1, 2, math.random()} do + local module = nn.LookupTable(totalIndex, entry_size, 0, maxNorm, normType) + local oriW = module.weight:clone() + local output = module:updateOutput(input) + -- check output is of small norm + for j = 1,output:size(1) do + local norm = torch.norm(output:select(1, j), normType) + if norm > maxNorm then + local err = norm - maxNorm; + mytester:assertlt(math.abs(err), precision, string.format( + 'output after renorm exceeds maxNorm=[%f] with normType=[%f]', maxNorm, normType)) + end + end + -- check the update of the module.weight + for j = 1,totalIndex do + local k = all_index[j] + if j <= nIndex then -- k is an index in "input" + local norm = torch.norm(module.weight:select(1, k), normType) + local oriNorm = torch.norm(oriW:select(1, k), normType) + if oriNorm > maxNorm then + local err = norm - maxNorm + mytester:assertlt(math.abs(err), precision, 'unexpected norm after renorm') + else + local err = norm - oriNorm + mytester:assertlt(math.abs(err), precision, 'unpexpected norm after renorm') + end + else -- k is not an index in "input" + local err = module.weight:select(1,k):sum() - oriW:select(1,k):sum() + mytester:assertlt(math.abs(err), precision, 'unexpected changes in weight after renorm') + end + end + end +end + +function nntest.AddConstant() + local nbatch = torch.random(3, 5) + local f = torch.random(3, 5) + local h = torch.random(7,9) + local w = torch.random(7,9) + local input = torch.rand(nbatch, f, h, w):mul(20):add(-10) -- [-10, 10] + + local constant = torch.randn(1):squeeze() + local mod = nn.AddConstant(constant) + + -- Test FPROP + local output = mod:forward(input) + local delta = output - input + mytester:assertlt(delta:add(-constant):abs():max(), precision, 'fprop error') + + -- Test BPROP + local err = jac.testJacobian(mod, input) + mytester:assertlt(err, precision, 'bprop error ') + + -- inplace comparisons + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local constant = torch.uniform()*math.random(1,10) + + local input1 = torch.rand(ink, inj, ini) + local input2 = input1:clone() + + local module1 = nn.AddConstant(constant,true) + local module2 = nn.AddConstant(constant) + + local gradOutput1 = torch.rand(ink, inj, ini) + local gradOutput2 = gradOutput1:clone() + + local out1 = module1:forward(input1) + local out2 = module2:forward(input2) + + mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) .. + ' - in-place forward err ') + + local gradInput1 = module1:backward(input1, gradOutput1) + local gradInput2 = module2:backward(input2, gradOutput2) + + mytester:asserteq(0, (gradInput1-gradInput2):abs():max(), + torch.typename(module1) .. ' - in-place backward err ') + + local input1 = torch.rand(ink, inj, ini) + local input2 = input1:clone() + + module1:forward(input1) + module1:backward(module1.output,torch.rand(input1:size())) + + local err = (input1-input2):abs():max() + mytester:asserteq(err, 0, torch.typename(module1) .. + ' - inplace input change err ') + + local module3 = nn.AddConstant(torch.Tensor{1,2,3}) + local out3 = module3:forward(torch.Tensor{-1,-2,-3}) + mytester:asserteq(0, out3:abs():max(), torch.typename(module3) .. + ' - tensor constant forward err ') + local module4 = nn.AddConstant(torch.Tensor{1,2,3}) + local out4 = module3:forward(torch.Tensor{{-1,-2,-3},{-1,-2,-3}}) + mytester:asserteq(0, out4:abs():max(), torch.typename(module4) .. + ' - batch tensor constant forward err ') +end + +function nntest.MulConstant() + local nbatch = torch.random(3, 5) + local f = torch.random(3, 5) + local h = torch.random(7,9) + local w = torch.random(7,9) + local input = torch.rand(nbatch, f, h, w):mul(20):add(-10) -- [-10, 10] + + local constant = torch.randn(1):squeeze() + local mod = nn.MulConstant(constant) + + -- Test FPROP + local output = mod:forward(input) + local scale = output:clone():cdiv(input) + mytester:assertlt(scale:add(-constant):abs():max(), precision, 'fprop error') + + -- Test BPROP + local err = jac.testJacobian(mod, input) + mytester:assertlt(err, precision, 'bprop error ') + + -- inplace comparisons + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local constant = torch.uniform()*math.random(1,10) + + local input1 = torch.rand(ink, inj, ini) + local input2 = input1:clone() + + local module1 = nn.MulConstant(constant,true) + local module2 = nn.MulConstant(constant) + + local gradOutput1 = torch.rand(ink, inj, ini) + local gradOutput2 = gradOutput1:clone() + + local out1 = module1:forward(input1) + local out2 = module2:forward(input2) + + mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) .. + ' - in-place forward err ') + + local gradInput1 = module1:backward(input1, gradOutput1) + local gradInput2 = module2:backward(input2, gradOutput2) + + mytester:asserteq(0, (gradInput1-gradInput2):abs():max(), + torch.typename(module1) .. ' - in-place backward err ') + + local input1 = torch.rand(ink, inj, ini) + local input2 = input1:clone() + + module1:forward(input1) + module1:backward(module1.output,torch.rand(input1:size())) + + local err = (input1-input2):abs():max() + mytester:assertalmosteq(err, 0, 1e-15, torch.typename(module1) .. + ' - inplace input change err ') +end + +function nntest.Copy() + local input = torch.randn(3,4):double() + local c = nn.Copy('torch.DoubleTensor', 'torch.FloatTensor') + local output = c:forward(input) + mytester:assert(torch.type(output) == 'torch.FloatTensor', 'copy forward type err') + mytester:assertTensorEq(output, input:float(), 0.000001, 'copy forward value err') + local gradInput = c:backward(input, output) + mytester:assert(torch.type(gradInput) == 'torch.DoubleTensor', 'copy backward type err') + mytester:assertTensorEq(gradInput, input, 0.000001, 'copy backward value err') + c.dontCast = true + c:double() + mytester:assert(torch.type(output) == 'torch.FloatTensor', 'copy forward type err') +end + +function nntest.CMaxTable() + local input1 = torch.Tensor{{1,3},{2,4}} + local input2 = torch.Tensor{{4,2},{3,1}} + local input = {input1, input2} + local module = nn.CMaxTable() + local err1 = torch.add(module:forward(input), -1, torch.Tensor{{4,3},{3,4}}) + mytester:assertalmosteq(err1:abs():max(), 0, 1e-15, "CMaxTable forward call") + local gradOutputs = torch.Tensor{5,6,7,8} + local gradInputs = module:backward(input, gradOutputs) + local err2 = torch.add(gradInputs[1], -1, torch.Tensor{{0,6},{0,8}}) + local err3 = torch.add(gradInputs[2], -1, torch.Tensor{{5,0},{7,0}}) + mytester:assertalmosteq(err2:abs():max(), 0, 1e-15, "CMaxTable backward call") + mytester:assertalmosteq(err3:abs():max(), 0, 1e-15, "CMaxTable backward call") +end + +function nntest.CMinTable() + local input1 = torch.Tensor{{1,3},{2,4}} + local input2 = torch.Tensor{{4,2},{3,1}} + local input = {input1, input2} + local module = nn.CMinTable() + local err1 = torch.add(module:forward(input), -1, torch.Tensor{{1,2},{2,1}}) + mytester:assertalmosteq(err1:abs():max(), 0, 1e-15, "CMinTable forward call") + local gradOutputs = torch.Tensor{5,6,7,8} + local gradInputs = module:backward(input, gradOutputs) + local err2 = torch.add(gradInputs[1], -1, torch.Tensor{{5,0},{7,0}}) + local err3 = torch.add(gradInputs[2], -1, torch.Tensor{{0,6},{0,8}}) + mytester:assertalmosteq(err2:abs():max(), 0, 1e-15, "CMinTable backward call") + mytester:assertalmosteq(err3:abs():max(), 0, 1e-15, "CMinTable backward call") +end + +function nntest.JoinTable() + local tensor = torch.rand(3,4,5) + local input = {tensor, tensor} + local module + for d = 1,tensor:dim() do + module = nn.JoinTable(d) + mytester:asserteq(module:forward(input):size(d), tensor:size(d)*2, "dimension " .. d) + end + + -- Minibatch + local tensor = torch.rand(3,4,5) + local input = {tensor, tensor} + local module + for d = 1,tensor:dim()-1 do + module = nn.JoinTable(d, 2) + mytester:asserteq(module:forward(input):size(d+1), tensor:size(d+1)*2, "dimension " .. d) + end +end + +function nntest.SplitTable() + local input = torch.randn(3,4,5) + local module + for d = 1,input:dim() do + module = nn.SplitTable(d) + mytester:asserteq(#module:forward(input), input:size(d), "dimension " .. d) + end + + -- Minibatch + local input = torch.randn(3,4,5) + local module + for d = 1,input:dim()-1 do + module = nn.SplitTable(d, 2) + mytester:asserteq(#module:forward(input), input:size(d+1), "dimension " .. d) + end + + -- Negative indices + local module = nn.SplitTable(-3) + local input = torch.randn(3,4,5) + mytester:asserteq(#module:forward(input), 3, "negative index") + local input = torch.randn(2,3,4,5) + mytester:asserteq(#module:forward(input), 3, "negative index (minibatch)") +end + +function nntest.Select() + -- Test negative Select + local input = torch.Tensor{{4,6,7}, {8,0,1}} + mytester:asserteq(nn.Select(1,-1):forward(input)[1], 8, "negative index") + mytester:asserteq(nn.Select(1,-1):forward(input)[2], 0, "negative index") + mytester:asserteq(nn.Select(1,-2):forward(input)[2], 6, "negative index") + mytester:asserteq(nn.Select(-1,-1):forward(input)[1], 7, "negative dim + negative index") + mytester:asserteq(nn.Select(-1,-1):forward(input)[2], 1, "negative dim + negative index") +end + +function nntest.SelectTable() + local input = { + torch.rand(3,4,5), torch.rand(3,4,5), + {torch.rand(3,4,5)}, + {torch.rand(3,4,5), {torch.rand(3,4,5)}} + } + local gradOutputs = { + torch.rand(3,4,5), torch.rand(3,4,5), + {torch.rand(3,4,5)}, + {torch.rand(3,4,5), {torch.rand(3,4,5)}} + } + local zeros = { + torch.Tensor(3,4,5):zero(), torch.Tensor(3,4,5):zero(), + {torch.Tensor(3,4,5):zero()}, + {torch.Tensor(3,4,5):zero(), {torch.Tensor(3,4,5):zero()}} + } + local nonIdx = {2,3,4,1} + local module + for idx = 1,#input do + module = nn.SelectTable(idx) + local output = module:forward(input) + equal(output, input[idx], "output dimension " .. idx) + local gradInput = module:backward(input, gradOutputs[idx]) + equal(gradInput[idx], gradOutputs[idx], "gradInput[idx] dimension " .. idx) + equal(gradInput[nonIdx[idx]], zeros[nonIdx[idx]], "gradInput[nonIdx] dimension " .. idx) + end + + -- test negative index + local idx = -2 + module = nn.SelectTable(idx) + local output = module:forward(input) + equal(output, input[#input+idx+1], "output dimension " .. idx) + local gradInput = module:backward(input, gradOutputs[#input+idx+1]) + equal(gradInput[#input+idx+1], gradOutputs[#input+idx+1], "gradInput[idx] dimension " .. idx) + equal(gradInput[nonIdx[#input+idx+1]], zeros[nonIdx[#input+idx+1]], "gradInput[nonIdx] dimension " .. idx) + + -- test typecast + local idx = #input + module = nn.SelectTable(idx) + module:float() + local output = module:forward(input) + equal(output, input[idx], "type output") + local gradInput = module:backward(input, gradOutputs[idx]) + equal(gradInput[idx], gradOutputs[idx], "gradInput[idx] dimension " .. idx) + equal(gradInput[nonIdx[idx]], zeros[nonIdx[idx]], "gradInput[nonIdx] dimension " .. idx) + + -- test on differently sized sub-input tables given consequetively + local input1 = { + torch.rand(3,4,5), + {torch.rand(3,4,5), torch.rand(3,4,5), torch.rand(3,4,5)} + } + local input2 = { + torch.rand(3,4,5), + {torch.rand(3,4,5), torch.rand(3,4,5)} + } + + module = nn.SelectTable(1) + local output = module:forward(input1) + equal(output, input1[1], "output dimension 1") + local gradInput = module:backward(input1, output) + mytester:assert(#gradInput == #input1, "Table lengths") + mytester:assert(#gradInput[2] == #input1[2], "Sub-Table lengths") + output = module:forward(input2) + equal(output, input2[1], "output dimension 1") + gradInput = module:backward(input2, output) + mytester:assert(#gradInput == #input2, "Table lengths") + mytester:assert(#gradInput[2] == #input2[2], "Sub-Table lengths") + + -- test on tables of increasing size + local input1 = {torch.rand(3,4,5), torch.rand(3,4,5)} + local input2 = {torch.rand(3,4,5), torch.rand(3,4,5), torch.rand(3,4,5)} + local gradOutput1 = torch.randn(3,4,5) + local gradOutput2 = torch.randn(3,4,5) + + local module1 = nn.SelectTable(-1) + local output1 = module1:forward(input1):clone() + local output2 = module1:forward(input2) + local gradInput_ = module1:backward(input1, gradOutput1) + local gradInput1 = {} + for k,v in ipairs(gradInput_) do gradInput1[k] = v:clone() end + local gradInput2 = module1:backward(input2, gradOutput2) + + local module3 = nn.SelectTable(-1) + local module4 = nn.SelectTable(-1) + local output3 = module3:forward(input1) + local output4 = module4:forward(input2) + local gradInput3 = module3:backward(input1, gradOutput1) + local gradInput4 = module4:backward(input2, gradOutput2) + + equal(output1, output3, "output 1 and 3") + equal(output2, output4, "output 2 and 4") + equal(gradInput1, gradInput3, "gradInput 1 and 3") + equal(gradInput2, gradInput4, "gradInput 2 and 4") +end + +function nntest.MixtureTable() + -- 2D + -- expertInput is a Table: + local expertInput = torch.randn(5,3,6) + local gradOutput = torch.randn(5,6) + local input = { + torch.rand(5,3), + {expertInput:select(2,1), expertInput:select(2,2), expertInput:select(2,3)} + } + local module = nn.MixtureTable() + local output = module:forward(input) + local output2 = torch.cmul(input[1]:view(5,3,1):expand(5,3,6), expertInput):sum(2):squeeze(2) + mytester:assertTensorEq(output, output2, 0.000001, "mixture output") + local gradInput = module:backward(input, gradOutput) + local gradOutput2 = torch.view(gradOutput, 5, 1, 6):expandAs(expertInput) + local gaterGradInput2 = torch.cmul(gradOutput2, expertInput):sum(3):select(3,1) + mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture gater gradInput") + local expertGradInput2 = torch.cmul(input[1]:view(5,3,1):expand(5,3,6), gradOutput:view(5,1,6):expand(5,3,6)) + for i, expertGradInput in ipairs(gradInput[2]) do + mytester:assertTensorEq(expertGradInput, expertGradInput2:select(2,i), 0.000001, "mixture expert "..i.." gradInput") + end + -- expertInput is a Tensor: + local input = {input[1], expertInput} + local module = nn.MixtureTable(2) + local output = module:forward(input) + mytester:assertTensorEq(output, output2, 0.000001, "mixture2 output") + local gradInput = module:backward(input, gradOutput) + mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture2 gater gradInput") + mytester:assertTensorEq(gradInput[2], expertGradInput2, 0.000001, "mixture2 expert gradInput") + + -- 3D + local expertInput = torch.randn(5,6,3,2) + local gradOutput = torch.randn(5,6,2) + -- expertInput is a Table: + local input = { + torch.rand(5,3), + {expertInput:select(3,1), expertInput:select(3,2), expertInput:select(3,3)} + } + local module = nn.MixtureTable() + local output = module:forward(input) + local output2 = torch.cmul(input[1]:view(5,1,3,1):expand(5,6,3,2), expertInput):sum(3):squeeze(3) + mytester:assertTensorEq(output, output2, 0.000001, "mixture3 output") + local gradInput = module:backward(input, gradOutput) + local gradOutput2 = torch.view(gradOutput,5,6,1,2):expandAs(expertInput) + local gaterGradInput2 = torch.cmul(gradOutput2, expertInput):sum(4):select(4,1):sum(2):select(2,1) + mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture3 gater gradInput") + local expertGradInput2 = torch.cmul(input[1]:view(5,1,3,1):expand(5,6,3,2), gradOutput2) + for i, expertGradInput in ipairs(gradInput[2]) do + mytester:assertTensorEq(expertGradInput, expertGradInput2:select(3,i), 0.000001, "mixture3 expert "..i.." gradInput") + end + -- expertInput is a Tensor + local input = {input[1], expertInput} + local module = nn.MixtureTable(3) + local output = module:forward(input) + mytester:assertTensorEq(output, output2, 0.000001, "mixture4 output") + local gradInput = module:backward(input, gradOutput) + mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture4 gater gradInput") + mytester:assertTensorEq(gradInput[2], expertGradInput2, 0.000001, "mixture4 expert gradInput") + + -- 1D + -- expertInput is a Table: + local expertInput = torch.randn(3,6) + local gradOutput = torch.randn(6) + local input = { + torch.rand(3), + {expertInput:select(1,1), expertInput:select(1,2), expertInput:select(1,3)} + } + local module = nn.MixtureTable() + local output = module:forward(input) + local output2 = torch.cmul(input[1]:view(3,1):expand(3,6), expertInput):sum(1):squeeze(1) + mytester:assertTensorEq(output, output2, 0.000001, "mixture5 output") + local gradInput = module:backward(input, gradOutput) + local gradOutput2 = torch.view(gradOutput, 1, 6):expandAs(expertInput) + local gaterGradInput2 = torch.cmul(gradOutput2, expertInput):sum(2):select(2,1) + mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture5 gater gradInput") + local expertGradInput2 = torch.cmul(input[1]:view(3,1):expand(3,6), gradOutput:view(1,6):expand(3,6)) + for i, expertGradInput in ipairs(gradInput[2]) do + mytester:assertTensorEq(expertGradInput, expertGradInput2:select(1,i), 0.000001, "mixture5 expert "..i.." gradInput") + end + -- test type-cast + module:float() + local input2 = { + input[1]:float(), + {input[2][1]:float(), input[2][2]:float(), input[2][3]:float()} + } + local output = module:forward(input2) + mytester:assertTensorEq(output, output2:float(), 0.000001, "mixture5B output") + local gradInput = module:backward(input2, gradOutput:float()) + mytester:assertTensorEq(gradInput[1], gaterGradInput2:float(), 0.000001, "mixture5B gater gradInput") + for i, expertGradInput in ipairs(gradInput[2]) do + mytester:assertTensorEq(expertGradInput, expertGradInput2:select(1,i):float(), 0.000001, "mixture5B expert "..i.." gradInput") + end + -- expertInput is a Tensor: + local input = {input[1], expertInput} + local module = nn.MixtureTable(1) + local output = module:forward(input) + mytester:assertTensorEq(output, output2, 0.000001, "mixture6 output") + local gradInput = module:backward(input, gradOutput) + mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture6 gater gradInput") + mytester:assertTensorEq(gradInput[2], expertGradInput2, 0.000001, "mixture6 expert gradInput") + -- test type-cast: + module:float() + local input2 = {input[1]:float(), expertInput:float()} + local output = module:forward(input2) + mytester:assertTensorEq(output, output2:float(), 0.000001, "mixture6B output") + local gradInput = module:backward(input2, gradOutput:float()) + mytester:assertTensorEq(gradInput[1], gaterGradInput2:float(), 0.000001, "mixture6B gater gradInput") + mytester:assertTensorEq(gradInput[2], expertGradInput2:float(), 0.000001, "mixture6B expert gradInput") + + --2D gater, 1D expert + -- expertInput is a Table: + local expertInput = torch.randn(5,3) + local gradOutput = torch.randn(5) + local input = { + torch.rand(5,3), + {expertInput:select(2,1), expertInput:select(2,2), expertInput:select(2,3)} + } + local module = nn.MixtureTable() + local output = module:forward(input) + local output2 = torch.cmul(input[1], expertInput):sum(2):squeeze(2) + mytester:assertTensorEq(output, output2, 0.000001, "mixture7 output") + local gradInput = module:backward(input, gradOutput) + local gradOutput2 = torch.view(gradOutput, 5, 1):expandAs(expertInput) + local gaterGradInput2 = torch.cmul(gradOutput2, expertInput) + mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture7 gater gradInput") + local expertGradInput2 = torch.cmul(input[1], gradOutput:view(5,1):expand(5,3)) + for i, expertGradInput in ipairs(gradInput[2]) do + mytester:assertTensorEq(expertGradInput, expertGradInput2:select(2,i), 0.000001, "mixture7 expert "..i.." gradInput") + end +end + +function nntest.Narrow() + -- check basic narrow functionality #1 + local input = torch.rand(9, 4, 14) + local output = input:narrow(1, 3, 5) + local gradOutput = torch.rand(5, 4, 14) + local gradInput = torch.zeros(9, 4, 14) + gradInput:narrow(1, 3, 5):copy(gradOutput) + local module1 = nn.Narrow(1, 3, 5) + local output1 = module1:forward(input) + local gradInput1 = module1:backward(input, gradOutput) + local module2 = nn.Narrow(1, 3, -3) + local output2 = module2:forward(input) + local gradInput2 = module2:backward(input, gradOutput) + mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #1 output err") + mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #1 gradInput err") + mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #1 negative output err") + mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #1 negative gradInput err") + + -- check basic narrow functionality #2 + local input = torch.rand(3, 10, 4) + local output = input:narrow(2, 5, 3) + local gradOutput = torch.rand(3, 3, 4) + local gradInput = torch.zeros(3, 10, 4) + gradInput:narrow(2, 5, 3):copy(gradOutput) + local module1 = nn.Narrow(2, 5, 3) + local output1 = module1:forward(input) + local gradInput1 = module1:backward(input, gradOutput) + local module2 = nn.Narrow(2, 5, -4) + local output2 = module2:forward(input) + local gradInput2 = module2:backward(input, gradOutput) + mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #2 output err") + mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #2 gradInput err") + mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #2 negative output err") + mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #2 negative gradInput err") + + -- check basic narrow functionality #3 + local input = torch.rand(6, 11, 7) + local output = input:narrow(3, 1, 1) + local gradOutput = torch.rand(6, 11, 1) + local gradInput = torch.zeros(6, 11, 7) + gradInput:narrow(3, 1, 1):copy(gradOutput) + local module1 = nn.Narrow(3, 1, 1) + local output1 = module1:forward(input) + local gradInput1 = module1:backward(input, gradOutput) + local module2 = nn.Narrow(3, 1, -7) + local output2 = module2:forward(input) + local gradInput2 = module2:backward(input, gradOutput) + mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #3 output err") + mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #3 gradInput err") + mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #3 negative output err") + mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #3 negative gradInput err") + + -- check basic narrow functionality #4 + local input = torch.rand(3, 10, 4) + local output = input:narrow(2, 5, 3) + local gradOutput = torch.rand(3, 3, 4) + local gradInput = torch.zeros(3, 10, 4) + gradInput:narrow(2, 5, 3):copy(gradOutput) + local module1 = nn.Narrow(-2, 5, 3) + local output1 = module1:forward(input) + local gradInput1 = module1:backward(input, gradOutput) + local module2 = nn.Narrow(-2, 5, -4) + local output2 = module2:forward(input) + local gradInput2 = module2:backward(input, gradOutput) + mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #4 output err") + mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #4 gradInput err") + mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #4 negative output err") + mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #4 negative gradInput err") + + -- check narrow negative offset + local input = torch.rand(3, 10, 4) + local output = input:narrow(2, 1, 3) + local gradOutput = torch.rand(3, 3, 4) + local gradInput = torch.zeros(3, 10, 4) + gradInput:narrow(2, 1, 3):copy(gradOutput) + local module1 = nn.Narrow(2, -1, 7) + local output1 = module1:forward(input) + local gradInput1 = module1:backward(input, gradOutput) + local module2 = nn.Narrow(2, 1, 3) + local output2 = module2:forward(input) + local gradInput2 = module2:backward(input, gradOutput) + mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #5 output err") + mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #5 gradInput err") + mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #5 negative output err") + mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #5 negative gradInput err") +end + +function nntest.NarrowTable() + local input = torch.randn(3,10,4) + local gradOutput = torch.randn(3,3,4) + local nt = nn.NarrowTable(5,3) + local seq = nn.Sequential() + seq:add(nn.SplitTable(1,2)) + seq:add(nt) + seq:add(nn.JoinTable(1,1)) + seq:add(nn.Reshape(3,3,4)) + local seq2 = nn.Narrow(2,5,3) + local output = seq:forward(input) + local gradInput = seq:backward(input, gradOutput) + local output2 = seq2:forward(input) + local gradInput2 = seq2:backward(input, gradOutput) + mytester:assertTensorEq(output, output2, 0.0000001, "NarrowTable output err") + mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "NarrowTable gradInput err") + + -- now try it with a smaller input + local input = input:narrow(2, 1, 8) + local output = seq:forward(input) + local gradInput = seq:backward(input, gradOutput) + local output2 = seq2:forward(input) + local gradInput2 = seq2:backward(input, gradOutput) + mytester:assertTensorEq(output, output2, 0.0000001, "NarrowTable small output err") + mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "NarrowTable small gradInput err") + + -- test type-cast + local input = input:float() + local gradOutput = gradOutput:float() + seq:float() + seq2:float() + local output = seq:forward(input) + local gradInput = seq:backward(input, gradOutput) + local output2 = seq2:forward(input) + local gradInput2 = seq2:backward(input, gradOutput) + mytester:assertTensorEq(output, output2, 0.0000001, "NarrowTable output float err") + mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "NarrowTable gradInput float err") +end + +function nntest.View() + local input = torch.rand(10) + local template = torch.rand(5,2) + local target = template:size():totable() + local module = nn.View(template:size()) + mytester:assertTableEq(module:forward(input):size():totable(), target, "Error in forward (1)") + local module = nn.View(table.unpack(target)) + mytester:assertTableEq(module:forward(input):size():totable(), target, "Error in forward (2)") + + -- Minibatch + local minibatch = torch.rand(5,10) + mytester:asserteq(module:forward(minibatch):size(1), + minibatch:size(1), + "Error in minibatch dimension") + mytester:asserteq(module:forward(minibatch):nElement(), + minibatch:nElement(), + "Error in minibatch nElement") + local module = nn.View(-1):setNumInputDims(1) + mytester:asserteq(module:forward(minibatch):size(1), + minibatch:size(1), + "Error in minibatch dimension with size -1") + mytester:asserteq(module:forward(minibatch):nElement(), + minibatch:nElement(), + "Error in minibatch nElement with size -1") + + -- another setNumInputDims case + local minibatch = torch.rand(5,4,10) + local module = nn.View(-1):setNumInputDims(2) + mytester:asserteq(module:forward(minibatch):size(1), + minibatch:size(1), + "Error in minibatch dimension with size -1") + + -- another setNumInputDims case + local minibatch = torch.rand(2,5,4,10) + local module = nn.View(4,-1):setNumInputDims(2) + local out = module:forward(minibatch) + mytester:asserteq(out:size(1), minibatch:size(1)*minibatch:size(2), + "Error in minibatch dimension with size -1") + mytester:asserteq(out:size(2), minibatch:size(3), + "Error in minibatch dimension with size -1") + mytester:asserteq(out:size(3), minibatch:size(4), + "Error in minibatch dimension with size -1") + + -- Minibatch Generalization + local minibatch = torch.rand(5,2,6) + local module = nn.View(6) + mytester:asserteq( + module:forward(minibatch):size(1), + minibatch:size(1)*minibatch:size(2), + "Error in minibatch generalization dimension") + mytester:asserteq( + module:forward(minibatch):nElement(), + minibatch:nElement(), + "Error in minibatch generalization nElement") +end + +function nntest.Reshape() + local input = torch.rand(10) + local template = torch.rand(5,2) + local target = template:size():totable() + local module = nn.Reshape(template:size()) + mytester:assertTableEq(module:forward(input):size():totable(), target, "Error in forward (1)") + local module = nn.View(table.unpack(target)) + mytester:assertTableEq(module:forward(input):size():totable(), target, "Error in forward (2)") + + -- Minibatch + local minibatch = torch.rand(5,10) + mytester:asserteq(module:forward(minibatch):size(1), + minibatch:size(1), + "Error in minibatch dimension") + mytester:asserteq(module:forward(minibatch):nElement(), + minibatch:nElement(), + "Error in minibatch nElement") +end + +-- Define a test for SpatialUpSamplingCuda +function nntest.SpatialUpSamplingNearest() + local scale = torch.random(2,4) + for dim = 3,4 do + local m = nn.SpatialUpSamplingNearest(scale) + + -- Create a randomly sized dimD vector + local shape = {} + for i = 1, dim do + table.insert(shape, torch.random(2, 2+dim-1)) + end + + -- Check that the gradient is correct by using finite elements + local input = torch.Tensor(table.unpack(shape)):zero() + + local err = jac.testJacobian(m, input) + mytester:assertlt(err, precision, ' error on state ') + + local ferr, berr = jac.testIO(m, input) + mytester:asserteq(ferr, 0, torch.typename(m)..' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(m)..' - i/o backward err ') + end +end + +function nntest.SpatialUpSamplingBilinear() + for scale=2,4 do + for dim = 3,4 do + local m = nn.SpatialUpSamplingBilinear(scale) + + -- Create a randomly sized dimD vector + local shape = {} + for i = 1, dim do + table.insert(shape, torch.random(2, 2+dim-1)) + end + + -- Check that the gradient is correct by using finite elements + local input = torch.DoubleTensor(table.unpack(shape)):normal() + + local err = jac.testJacobian(m, input) + mytester:assertlt(err, precision, ' error on state ') + + local ferr, berr = jac.testIO(m, input) + mytester:asserteq(ferr, 0, torch.typename(m)..' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(m)..' - i/o backward err ') + end + end +end + +function nntest.Concat() + local input = torch.randn(4, 2) + local num_modules = math.random(2, 5) + local linears = {} + for i = 1,num_modules do + linears[i] = nn.Linear(2,5) + end + + local m = nn.Concat(1) + for _,module in ipairs(linears) do + m:add(module) + module:zeroGradParameters() + module.weight:fill(1) + module.bias:fill(0) + end + mytester:asserteq(m:size(), num_modules) + + local output = m:forward(input) + local output2 = input:sum(2):expand(4, 5):repeatTensor(num_modules, 1) + mytester:assertTensorEq(output2, output, 0.000001, 'Concat forward err') + + local gradInput = m:backward(input, torch.ones(output2:size())) + local gradInput2 = torch.ones(4, 2):fill(num_modules * 5) + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'Concat backward err (gradInput)') + + local gradWeight = input:sum(1):expand(5, 2) + for _,module in ipairs(linears) do + mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'Concat backward err (gradWeight)') + end +end + +function nntest.Parallel() + local input = torch.randn(3, 4, 5) + local m = nn.Parallel(1,3) + m:add(nn.View(4,5,1)) + m:add(nn.View(4,5,1)) + m:add(nn.View(4,5,1)) + + local output = m:forward(input) + local output2 = input:transpose(1,3):transpose(1,2) + mytester:assertTensorEq(output2, output, 0.000001, 'Parallel forward err') + + local gradInput = m:backward(input, output2) + mytester:assertTensorEq(gradInput, input, 0.000001, 'Parallel backward err') +end + +function nntest.ParallelTable() + local input = torch.randn(3, 4, 5) + local p = nn.ParallelTable() + p:add(nn.View(4,5,1)) + p:add(nn.View(4,5,1)) + p:add(nn.View(4,5,1)) + local m = nn.Sequential() + m:add(nn.SplitTable(1)) + m:add(p) + m:add(nn.JoinTable(3)) + + local output = m:forward(input) + local output2 = input:transpose(1,3):transpose(1,2) + mytester:assertTensorEq(output2, output, 0.000001, 'ParallelTable forward err') + + local gradInput = m:backward(input, output2) + mytester:assertTensorEq(gradInput, input, 0.000001, 'ParallelTable backward err') +end + +function nntest.ConcatTable() + -- Test tensor input + local input = torch.rand(5, 5, 5) + local m = nn.Sequential() + + local concat = nn.ConcatTable() + concat:add(nn.Identity()) + + m:add(concat) -- Output of concat is a table of length 1 + m:add(nn.JoinTable(1)) -- jac needs a tensor tensor output + + local err = jac.testJacobian(m, input) + mytester:assertlt(err, precision, ' error on state ') + + local ferr, berr = jac.testIO(m, input) + mytester:asserteq(ferr, 0, torch.typename(m)..' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(m)..' - i/o backward err ') + + -- Now test a table input + local input = { + torch.randn(3,4):float(), torch.randn(3,4):float(), {torch.randn(3,4):float()} + } + local _gradOutput = { + torch.randn(3,3,4):float(), torch.randn(3,3,4):float(), torch.randn(3,3,4):float() + } + local gradOutput = { + {_gradOutput[1][1], _gradOutput[2][1], {_gradOutput[3][1]}}, + {_gradOutput[1][2], _gradOutput[2][2], {_gradOutput[3][2]}}, + {_gradOutput[1][3], _gradOutput[2][3], {_gradOutput[3][3]}} + } + local module = nn.ConcatTable() + module:add(nn.Identity()) + module:add(nn.Identity()) + module:add(nn.Identity()) + module:float() + + local output = module:forward(input) + local output2 = {input, input, input} + equal(output2, output, "ConcatTable table output") + local gradInput = module:backward(input, gradOutput) + local gradInput2 = {_gradOutput[1]:sum(1):squeeze(1), _gradOutput[2]:sum(1):squeeze(1), {_gradOutput[3]:sum(1):squeeze(1)}} + equal(gradInput, gradInput2, "ConcatTable table gradInput") + + -- test outputs for variable length inputs + local test = nn.ConcatTable() + test:add(nn.Identity()) + test:add(nn.Identity()) + + local x = {torch.randn(5), torch.randn(5)} + local y = {torch.randn(5)} + + local o1 = #(test:forward(x)) + local go1 = #(test:backward(x, {x, x})) + local o2 = #(test:forward(y)) + local go2 = #(test:backward(y, {y, y})) + mytester:assert(o1 == 2, "ConcatTable table variable length") + mytester:assert(go1 == 2, "ConcatTable table variable length") + mytester:assert(o2 == 2, "ConcatTable table variable length") + mytester:assert(go2 == 1, "ConcatTable table variable length") +end + +function nntest.MapTable() + local map = nn.MapTable(nn.Linear(10,5)) + local lin = map:get(1):clone() + + -- ParalleTable with clones as reference + local parallel = nn.ParallelTable() + parallel:add(lin) + parallel:add(lin:clone('weight','bias')) + parallel:add(lin:clone('weight','bias')) + + local input = {torch.rand(10), torch.rand(10), torch.rand(10)} + local gradOutput = {torch.ones(5), torch.ones(5), torch.ones(5)} + + local outputM = map:forward(input) + local outputP = parallel:forward(input) + mytester:assertTensorEq(outputM[1], outputP[1]) + mytester:assertTensorEq(outputM[2], outputP[2]) + mytester:assertTensorEq(outputM[3], outputP[3]) + mytester:assert(map:size() == #input) + + map:zeroGradParameters() + parallel:zeroGradParameters() + local gradInputM = map:backward(input, gradOutput) + local gradInputP = parallel:backward(input, gradOutput) + mytester:assertTensorEq(gradInputM[1], gradInputP[1]) + mytester:assertTensorEq(gradInputM[2], gradInputP[2]) + mytester:assertTensorEq(gradInputM[3], gradInputP[3]) + + map:updateParameters(1) + parallel:updateParameters(1) + mytester:assertTensorEq(map:get(1).weight, parallel:get(1).weight, 0.00001) + + local output = map:forward({input[1], input[2], input[3], input[3]}) + mytester:assert(#output == 4) + local output = map:forward({input[1], input[2]}) + mytester:assert(#output == 2) + + map:resize(10) + mytester:assert(map:size() == 10) + map:resize(4) + mytester:assert(map:size() == 4) + mytester:assert(torch.pointer(map:get(4).weight:storage()) + == torch.pointer(map:get(1).weight:storage())) + map:clearState() + mytester:assert(map:size() == 1) + + -- check if gradients are correctly reset + -- share weights and gradients + map = nn.MapTable(nn.Linear(10,5)) + map:forward(input) + _, gradParams = map:getParameters() + gradParams:uniform() + map:zeroGradParameters() + mytester:assertlt(gradParams:sum(),precision) + + -- check if gradients are correctly reset + -- do not share weights and gradients + map = nn.MapTable(nn.Linear(10,5),false) + map:forward(input) + _, gradParams = map:getParameters() + gradParams:uniform() + map:zeroGradParameters() + mytester:assertlt(gradParams:sum(),precision) +end + +function nntest.FlattenTable() + -- Create a nested table. Obviously we can't even stochastically test + -- the space of all possible nested tables (it's infinite), but here is a + -- hand-coded one that covers all the cases we need: + local input = { + torch.rand(1), + { + torch.rand(2), + { + torch.rand(3) + }, + }, + torch.rand(4) + } + local gradOutput = { + torch.rand(1), + torch.rand(2), + torch.rand(3), + torch.rand(4) + } + + -- Check the FPROP + local m = nn.FlattenTable() + local output = m:forward(input) + mytester:assert(#output == 4, torch.typename(m)..' - fprop err ') + -- This is ugly, but check that the mapping from input to output is correct + mytester:assert(output[1] == input[1]) + mytester:assert(output[2] == input[2][1]) + mytester:assert(output[3] == input[2][2][1]) + mytester:assert(output[4] == input[3]) + + -- Check the BPROP + local gradInput = m:backward(input, gradOutput) + -- Again, check that the mapping is correct + mytester:assert(gradOutput[1] == gradInput[1]) + mytester:assert(gradOutput[2] == gradInput[2][1]) + mytester:assert(gradOutput[3] == gradInput[2][2][1]) + mytester:assert(gradOutput[4] == gradInput[3]) + + -- More uglyness: FlattenTable doesn't rebuild the table every updateOutput + -- call, so we need to make sure that modifications to the input are + -- detected correctly (and that the table is correctly rebuilt. + -- CASE 1: Nothing changes so the output table shouldn't be redefined + local old_input_map = m.input_map + local old_output = m.output + local _ = m:forward(input) + mytester:assert(old_input_map == m.input_map and old_output == m.output) + + -- CASE 2: An element is added to the input table + old_input_map = m.input_map + old_output = m.output + input[2][#(input[2])+1] = torch.rand(5) + m:forward(input) + mytester:assert(old_input_map ~= m.input_map and old_output ~= m.output) + + -- CASE 3: An element is removed from the input table + old_input_map = m.input_map + old_output = m.output + input[#input] = nil + m:forward(input) + mytester:assert(old_input_map ~= m.input_map and old_output ~= m.output) + + -- At this point further testing is not necessary I think, but just to be + -- consistent: perform a jacobian test by using SplitTable and JointTable + -- elements + m = nn.Sequential() + local par = nn.ParallelTable() + par:add(nn.SplitTable(1)) + par:add(nn.SplitTable(1)) + m:add(nn.SplitTable(1)) + m:add(par) -- this will create a nested table + m:add(nn.FlattenTable()) -- This will flatten the nested table + m:add(nn.JoinTable(1)) -- Finally, this will create a 1D tensor + + input = torch.Tensor(2,2,2) + local err = jac.testJacobian(m, input) + mytester:assertlt(err, precision, 'error on bprop ') +end + +function nntest.L1Penalty() + local weight = 1 + local sizeAverage = false + local m = nn.L1Penalty(weight, sizeAverage, false) + + local input = torch.rand(2,10):add(-0.5) + input[1][1] = 0 + + local _ = m:forward(input) + local grad = m:backward(input, torch.ones(input:size())) + + local err = input:clone():abs():sum()*weight - m.loss + mytester:assertlt(math.abs(err), precision, 'error on fprop ') + + local true_grad = (input:gt(0):typeAs(grad) + + input:lt(0):typeAs(grad):mul(-1)):mul(weight) + mytester:assertlt((true_grad - grad):abs():max(), precision, + 'error on bprop ') + + -- Note: We cannot use the Jacobian test for this Module since the backward + -- gradient cannot be estimated using finite differences (ie, the loss + -- during BPROP is not included in the FPROP output) +end + +function nntest.L1Cost() + local input = torch.rand(10) * 2 - 1 + local m = nn.L1Cost() + local output = m:forward(input) + local err = output - torch.abs(input):sum() + mytester:assertalmosteq(err, 0, 1e-15, 'L1Cost forward') +end + +function nntest.DepthConcat() + local outputSize = torch.IntTensor{5,6,7,8} + local input = torch.randn(2,3,12,12) + local gradOutput = torch.randn(2, outputSize:sum(), 12, 12) + local concat = nn.DepthConcat(2) + concat:add(nn.SpatialConvolutionMM(3, outputSize[1], 1, 1, 1, 1)) --> 2, 5, 12, 12 + concat:add(nn.SpatialConvolutionMM(3, outputSize[2], 3, 3, 1, 1)) --> 2, 6, 10, 10 + concat:add(nn.SpatialConvolutionMM(3, outputSize[3], 4, 4, 1, 1)) --> 2, 7, 9, 9 + concat:add(nn.SpatialConvolutionMM(3, outputSize[4], 5, 5, 1, 1)) --> 2, 8, 8, 8 + concat:zeroGradParameters() + -- forward/backward + local outputConcat = concat:forward(input) + local gradInputConcat = concat:backward(input, gradOutput) + -- the spatial dims are the largest, the nFilters is the sum + local output = torch.Tensor(2, outputSize:sum(), 12, 12):zero() -- zero for padding + local narrows = { {{},{1,5},{},{}}, {{},{6,11},{2,11},{2,11}}, {{},{12,18},{2,10},{2,10}}, {{},{19,26},{3,10},{3,10}} } + local gradInput = input:clone():zero() + for i=1,4 do + local conv = concat:get(i) + local gradWeight = conv.gradWeight:clone() + conv:zeroGradParameters() + output[narrows[i]]:copy(conv:forward(input)) + gradInput:add(conv:backward(input, gradOutput[narrows[i]])) + mytester:assertTensorEq(gradWeight, conv.gradWeight, 0.000001, "Error in SpatialConcat:accGradParameters for conv "..i) + end + mytester:assertTensorEq(output, outputConcat, 0.000001, "Error in SpatialConcat:updateOutput") + mytester:assertTensorEq(gradInput, gradInputConcat, 0.000001, "Error in SpatialConcat:updateGradInput") +end + +function nntest.MV() + local mv = nn.MV(false) + local outdim = torch.random(10,20) + local indim = torch.random(10,20) + local M = torch.randn(outdim, indim) + local V = torch.randn(indim) + + -- Test forward pass. + local output = mv:forward({M, V}) + mytester:assertTableEq(output:size():totable(), {outdim}, + 'Output has wrong dimensionality') + mytester:assertTensorEq(output, M * V, 1e-10, + 'Wrong output') + + -- Test backward pass. + local gradOutput = torch.randn(outdim) + local gradInput = mv:backward({M, V}, gradOutput) + mytester:assert(#gradInput == 2, 'gradInput must be table of size 2') + local gradM, gradV = table.unpack(gradInput) + mytester:assertTableEq(gradM:size():totable(), M:size():totable(), + 'Gradient for input M has wrong size') + mytester:assertTableEq(gradV:size():totable(), V:size():totable(), + 'Gradient for input V has wrong size') + mytester:assertTensorEq(gradM, torch.ger(gradOutput, V), 1e-10, + 'Wrong gradient for input M') + -- d/dV(j) (A(i,j)V(j)) = ( + mytester:assertTensorEq(gradV, M:t() * gradOutput, 1e-10, + 'Wrong gradient for input V') +end + +function nntest.BatchMVNoTranspose() + local mv = nn.MV() + local outdim = torch.random(10,20) + local indim = torch.random(10,20) + for bSize = 1, 11, 5 do + local M = torch.randn(bSize, outdim, indim) + local V = torch.randn(bSize, indim) + + -- Test forward pass. + local output = mv:forward({M, V}) + mytester:assertTableEq(output:size():totable(), {bSize, outdim}, + 'Output has wrong dimensionality') + for i = 1, bSize do + mytester:assertTensorEq(output[i], M[i] * V[i], 1e-10, + 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + + -- Test backward pass. + local gradOutput = torch.randn(bSize, outdim) + local gradInput = mv:backward({M, V}, gradOutput) + mytester:assert(#gradInput == 2, 'gradInput must be table of size 2') + local gradM, gradV = table.unpack(gradInput) + mytester:assertTableEq(gradM:size():totable(), M:size():totable(), + 'Gradient for input M has wrong size') + mytester:assertTableEq(gradV:size():totable(), V:size():totable(), + 'Gradient for input V has wrong size') + for i = 1, bSize do + mytester:assertTensorEq(gradM[i], torch.ger(gradOutput[i], V[i]), 1e-10, + 'Gradient for input M wrong for bSize = ' .. bSize .. ' and i = ' .. i) + mytester:assertTensorEq(gradV[i], M[i]:t() * gradOutput[i], 1e-10, + 'Gradient for input V wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + end +end + +function nntest.BatchMVTranspose() + local mv = nn.MV(true) + local outdim = torch.random(10,20) + local indim = torch.random(10,20) + for bSize = 1, 11, 5 do + local M = torch.randn(bSize, indim, outdim) + local V = torch.randn(bSize, indim) + + -- Test forward pass. + local output = mv:forward({M, V}) + mytester:assertTableEq(output:size():totable(), {bSize, outdim}, + 'Output has wrong dimensionality') + for i = 1, bSize do + mytester:assertTensorEq(output[i], M[i]:t() * V[i], 1e-10, + 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + + -- Test backward pass. + local gradOutput = torch.randn(bSize, outdim) + local gradInput = mv:backward({M, V}, gradOutput) + mytester:assert(#gradInput == 2, 'gradInput must be table of size 2') + local gradM, gradV = table.unpack(gradInput) + mytester:assertTableEq(gradM:size():totable(), M:size():totable(), + 'Gradient for input M has wrong size') + mytester:assertTableEq(gradV:size():totable(), V:size():totable(), + 'Gradient for input V has wrong size') + for i = 1, bSize do + mytester:assertTensorEq(gradM[i], torch.ger(V[i], gradOutput[i]), 1e-10, + 'Gradient for input M wrong for bSize = ' .. bSize .. ' and i = ' .. i) + mytester:assertTensorEq(gradV[i], M[i] * gradOutput[i], 1e-10, + 'Gradient for input V wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + end +end + +local function createMatrixInputSizes() + local M = torch.random(10, 20) + local N = torch.random(10, 20) + local P = torch.random(10, 20) + return M, N, P +end + +function nntest.MM() + local mm = nn.MM(false, true) + local M, N, P = createMatrixInputSizes() + local A = torch.randn(M, N) + local B = torch.randn(P, N) + + -- Test forward pass. + local output = mm:forward({A, B}) + mytester:assertTableEq(output:size():totable(), {M, P}, + 'Output has wrong dimensionality') + mytester:assertTensorEq(output, A * B:t(), 1e-10, + 'Wrong output') + + -- Test backward pass. + local gradOutput = torch.randn(M, P) + local gradInput = mm:backward({A, B}, gradOutput) + mytester:assert(#gradInput == 2, 'gradInput must be table of size 2') + local gradA, gradB = table.unpack(gradInput) + mytester:assertTableEq(gradA:size():totable(), A:size():totable(), + 'Gradient for input A has wrong size') + mytester:assertTableEq(gradB:size():totable(), B:size():totable(), + 'Gradient for input B has wrong size') + mytester:assertTensorEq(gradA, gradOutput * B, 1e-10, + 'Wrong gradient for input A') + mytester:assertTensorEq(gradB, gradOutput:t() * A, 1e-10, + 'Wrong gradient for input B') +end + +function nntest.BatchMMNoTranspose() + local mm = nn.MM() + local M, N, P = createMatrixInputSizes() + for bSize = 1, 11, 5 do + local A = torch.randn(bSize, M, N) + local B = torch.randn(bSize, N, P) + + -- Test forward pass. + local output = mm:forward({A, B}) + mytester:assertTableEq(output:size():totable(), {bSize, M, P}, + 'Output has wrong dimensionality') + for i = 1, bSize do + mytester:assertTensorEq(output[i], A[i] * B[i], 1e-10, + 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + + -- Test backward pass. + local gradOutput = torch.randn(bSize, M, P) + local gradInput = mm:backward({A, B}, gradOutput) + mytester:assert(#gradInput == 2, 'gradInput must be table of size 2') + local gradA, gradB = table.unpack(gradInput) + mytester:assertTableEq(gradA:size():totable(), A:size():totable(), + 'Gradient for input A has wrong size') + mytester:assertTableEq(gradB:size():totable(), B:size():totable(), + 'Gradient for input B has wrong size') + for i = 1, bSize do + mytester:assertTensorEq(gradA[i], gradOutput[i] * B[i]:t(), 1e-10, + 'Gradient for input A wrong for bSize = ' .. bSize .. ' and i = ' .. i) + mytester:assertTensorEq(gradB[i], A[i]:t() * gradOutput[i], 1e-10, + 'Gradient for input B wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + end +end + +function nntest.BatchMMTransposeA() + local mm = nn.MM(true, false) + local M, N, P = createMatrixInputSizes() + for bSize = 1, 11, 5 do + local A = torch.randn(bSize, N, M) + local B = torch.randn(bSize, N, P) + + -- Test forward pass. + local output = mm:forward({A, B}) + mytester:assertTableEq(output:size():totable(), {bSize, M, P}, + 'Output has wrong dimensionality') + for i = 1, bSize do + mytester:assertTensorEq(output[i], A[i]:t() * B[i], 1e-10, + 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + + -- Test backward pass. + local gradOutput = torch.randn(bSize, M, P) + local gradInput = mm:backward({A, B}, gradOutput) + mytester:assert(#gradInput == 2, 'gradInput must be table of size 2') + local gradA, gradB = table.unpack(gradInput) + mytester:assertTableEq(gradA:size():totable(), A:size():totable(), + 'Gradient for input A has wrong size') + mytester:assertTableEq(gradB:size():totable(), B:size():totable(), + 'Gradient for input B has wrong size') + for i = 1, bSize do + mytester:assertTensorEq(gradA[i], B[i] * gradOutput[i]:t(), 1e-10, + 'Gradient for input A wrong for bSize = ' .. bSize .. ' and i = ' .. i) + mytester:assertTensorEq(gradB[i], A[i] * gradOutput[i], 1e-10, + 'Gradient for input B wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + end +end + +function nntest.BatchMMTransposeB() + local mm = nn.MM(false, true) + local M, N, P = createMatrixInputSizes() + for bSize = 1, 11, 5 do + local A = torch.randn(bSize, M, N) + local B = torch.randn(bSize, P, N) + + -- Test forward pass. + local output = mm:forward({A, B}) + mytester:assertTableEq(output:size():totable(), {bSize, M, P}, + 'Output has wrong dimensionality') + for i = 1, bSize do + mytester:assertTensorEq(output[i], A[i] * B[i]:t(), 1e-10, + 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + + -- Test backward pass. + local gradOutput = torch.randn(bSize, M, P) + local gradInput = mm:backward({A, B}, gradOutput) + mytester:assert(#gradInput == 2, 'gradInput must be table of size 2') + local gradA, gradB = table.unpack(gradInput) + mytester:assertTableEq(gradA:size():totable(), A:size():totable(), + 'Gradient for input A has wrong size') + mytester:assertTableEq(gradB:size():totable(), B:size():totable(), + 'Gradient for input B has wrong size') + for i = 1, bSize do + mytester:assertTensorEq(gradA[i], gradOutput[i] * B[i], 1e-10, + 'Gradient for input A wrong for bSize = ' .. bSize .. ' and i = ' .. i) + mytester:assertTensorEq(gradB[i], gradOutput[i]:t() * A[i], 1e-10, + 'Gradient for input B wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + end +end + +function nntest.BatchMMTransposeBoth() + local mm = nn.MM(true, true) + local M, N, P = createMatrixInputSizes() + for bSize = 1, 11, 5 do + local A = torch.randn(bSize, N, M) + local B = torch.randn(bSize, P, N) + + -- Test forward pass. + local output = mm:forward({A, B}) + mytester:assertTableEq(output:size():totable(), {bSize, M, P}, + 'Output has wrong dimensionality') + for i = 1, bSize do + mytester:assertTensorEq(output[i], A[i]:t() * B[i]:t(), 1e-10, + 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + + -- Test backward pass. + local gradOutput = torch.randn(bSize, M, P) + local gradInput = mm:backward({A, B}, gradOutput) + mytester:assert(#gradInput == 2, 'gradInput must be table of size 2') + local gradA, gradB = table.unpack(gradInput) + mytester:assertTableEq(gradA:size():totable(), A:size():totable(), + 'Gradient for input A has wrong size') + mytester:assertTableEq(gradB:size():totable(), B:size():totable(), + 'Gradient for input B has wrong size') + for i = 1, bSize do + mytester:assertTensorEq(gradA[i], B[i]:t() * gradOutput[i]:t(), 1e-10, + 'Gradient for input A wrong for bSize = ' .. bSize .. ' and i = ' .. i) + mytester:assertTensorEq(gradB[i], gradOutput[i]:t() * A[i]:t(), 1e-10, + 'Gradient for input B wrong for bSize = ' .. bSize .. ' and i = ' .. i) + end + end +end + +function nntest.DotProduct() + local indim = math.random(1,10) + + -- test 1D forward + local input = {torch.rand(indim),torch.rand(indim)} + local module = nn.DotProduct() + local expected = input[1]:dot(input[2]) + local output = module:forward(input) + mytester:assertlt(math.abs(expected-output[1]), precision, 'error on forward ') + + -- check gradients + -- Note: testJacobian doesn't support table inputs, and rather than re-write + -- it so that it does, I'll just use a split table module on the input. + -- I assume both SplitTable and Sequential do not have bugs, otherwise this + -- test will break. + local input = torch.rand(2,indim) + local module = nn.Sequential() + module:add(nn.SplitTable(1)) + module:add(nn.DotProduct()) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + -- IO + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + -- rebuild module to avoid correlated tests + local module = nn.Sequential() + module:add(nn.SplitTable(1)) + module:add(nn.DotProduct()) + + local nframes = math.random(1,10) + local indim = math.random(1,10) + local input = torch.rand(2,nframes,indim) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'batch error on state ') +end + +function nntest.CosineDistance() + local indim = math.random(1,10) + local input = {torch.rand(indim),torch.rand(indim)} + + -- check forward against previous implementation + local module = nn.CosineDistance() + + local w1 = input[1]:dot(input[2]) + local w2 = math.sqrt(input[1]:dot(input[1])) + local w3 = math.sqrt(input[2]:dot(input[2])) + local output_old = w1/w2/w3 + + local output = module:forward(input) + + mytester:assertlt(math.abs(output_old-output[1]),precision,'error on forward ') + + + -- check gradients + -- Note: testJacobian doesn't support table inputs, and rather than re-write + -- it so that it does, I'll just use a split table module on the input. + -- I assume both SplitTable and Sequential do not have bugs, otherwise this + -- test will break. + local input = torch.rand(2,indim) + local module = nn.Sequential() + module:add(nn.SplitTable(1)) + module:add(nn.CosineDistance()) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + -- IO + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + + -- batch + -- rebuild module to avoid correlated tests + local module = nn.Sequential() + module:add(nn.SplitTable(1)) + module:add(nn.CosineDistance()) + + local nframes = math.random(1,10) + local indim = math.random(1,10) + local input = torch.rand(2,nframes,indim) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'batch error on state ') + +end + +function nntest.CosineEmbeddingCriterion() + local v1 = torch.Tensor{1, 0} + local v2 = torch.Tensor{0.5, math.sqrt(3)*0.5} + + local crit = nn.CosineEmbeddingCriterion(0.6) + local output = crit:forward({v1, v2}, -1) -- must be Called before backward + local grads = crit:backward({v1, v2}, -1) + + local zero = torch.Tensor(2):zero() + equal(grads[1], zero, 'gradient should be zero') + equal(grads[2], zero, 'gradient should be zero') + + -- check jacobians + local margin = math.random()*2-1 + local dim = 5 + local batch_size = 1 + local crit = nn.CosineEmbeddingCriterion(margin) + local v = torch.rand(2,dim) + criterionJacobianTest1DTable(crit,v,1) + criterionJacobianTest1DTable(crit,v,-1) + + -- batch with hand-computed values + local v1 = torch.Tensor{{1, 0}, {0.5, math.sqrt(3)*0.5}} + local v2 = torch.Tensor{{0.5, math.sqrt(3)*0.5}, {1, 0}} + + local t = torch.Tensor{-1,-1} + local crit = nn.CosineEmbeddingCriterion(0.6) + local output = crit:forward({v1, v2}, t) -- must be Called before backward + local grads = crit:backward({v1, v2}, t) + + local zero = torch.Tensor(2,2):zero() + equal(grads[1], zero, 'gradient should be zero') + equal(grads[2], zero, 'gradient should be zero') + + -- batch, sizeAverage true, jacobian + local margin = math.random()*2-1 + local dim = 5 + local batch_size = 2 + local crit = nn.CosineEmbeddingCriterion(margin) + crit.sizeAverage = true + local v = torch.rand(2,batch_size,dim) + local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1) + criterionJacobianTest1DTable(crit,v,t) + + -- batch, sizeAverage false, jacobian + local margin = math.random()*2-1 + local crit = nn.CosineEmbeddingCriterion(margin) + crit.sizeAverage = false + local v = torch.rand(2,batch_size,dim) + local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1) + criterionJacobianTest1DTable(crit,v,t) +end + +function nntest.HingeEmbeddingCriterion() + local x = torch.Tensor{0.3,2.1,1.8,0} + local y = torch.Tensor{1,-1,-1,1} + local expgrads = torch.Tensor{1,0,-1,1} / 4 + + local crit = nn.HingeEmbeddingCriterion(2) + local output = crit:forward(x, y) -- must be called before backward + local grads = crit:backward(x, y) + + mytester:assert(math.abs(output - (0.3 + 0.2) / 4) < 1e-10) + equal(grads, expgrads) +end + +function nntest.Replicate() + local vector = torch.rand(3) + + local r1 = nn.Replicate(2, 1) + local r2 = nn.Replicate(2, 2) + + local vOutput1 = r1:forward(vector):clone() + local vOutput2 = r2:forward(vector):clone() + + local expected1 = torch.zeros(2, 3) + local expected2 = torch.zeros(3, 2) + expected1:select(1, 1):copy(vector) + expected1:select(1, 2):copy(vector) + expected2:select(2, 1):copy(vector) + expected2:select(2, 2):copy(vector) + + mytester:assertTensorEq(vOutput1, expected1, precision, 'Wrong tiling of data when replicating vector.') + mytester:assertTensorEq(vOutput2, expected2, precision, 'Wrong tiling of data when replicating vector.') + + -- batch mode + local vector = torch.rand(4,3) + + local r1 = nn.Replicate(2, 1, 1) + local r2 = nn.Replicate(2, 2, 1) + + local vOutput1 = r1:forward(vector):clone() + local vOutput2 = r2:forward(vector):clone() + + local expected1 = torch.zeros(4, 2, 3) + local expected2 = torch.zeros(4, 3, 2) + expected1:select(2, 1):copy(vector) + expected1:select(2, 2):copy(vector) + expected2:select(3, 1):copy(vector) + expected2:select(3, 2):copy(vector) + + mytester:assertTensorEq(vOutput1, expected1, precision, 'Wrong tiling of data when replicating batch vector.') + mytester:assertTensorEq(vOutput2, expected2, precision, 'Wrong tiling of data when replicating batch vector.') +end + +local function testBatchNormalization(moduleName, dim, k) + local planes = torch.random(1,k) + local size = { torch.random(2, k), planes } + for i=1,dim do + table.insert(size, torch.random(1,k)) + end + local input = torch.zeros(table.unpack(size)):uniform() + + local function jacTests(module, input, affine) + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + if affine then + local err = jac.testJacobianParameters(module, input, + module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, + module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format('error on bias [%s]', t)) + end + end + + -- IO + local ferr,berr = jac.testIO(module,input) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) + end + + local module = nn[moduleName](planes) + module:training() + jacTests(module, input, true) + module:evaluate() + jacTests(module, input, true) + jacTests(module, input[1], true) + + -- batch norm without affine transform + module = nn[moduleName](planes, 1e-5, 0.1, false) + module:training() + jacTests(module, input, false) + module:evaluate() + jacTests(module, input, false) + jacTests(module, input[1], false) +end + +function nntest.BatchNormalization() + testBatchNormalization('BatchNormalization', 0, 20) +end + +function nntest.SpatialBatchNormalization() + testBatchNormalization('SpatialBatchNormalization', 2, 6) +end + +function nntest.VolumetricBatchNormalization() + testBatchNormalization('VolumetricBatchNormalization', 3, 4) +end + +function nntest.GradientReversal() + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local input = torch.Tensor(ini,inj,ink):zero() + -- Two GradientReversal layers should cancel each other out + local module = nn.Sequential() + module:add(nn.GradientReversal()) + module:add(nn.GradientReversal()) + + local err = jac.testJacobian(module,input, 0.1, 10) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input, 0.1, 10) + mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision) + mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision) +end + +function nntest.Padding() + local fanin = math.random(1,3) + local sizex = math.random(4,16) + local sizey = math.random(4,16) + local pad = math.random(-3,3) + local index = math.random(1, fanin) + local val = torch.randn(1):squeeze() + local module = nn.Padding(1, pad, 3, val, index) + local input = torch.rand(fanin,sizey,sizex) + local size = input:size():totable() + size[1] = size[1] + math.abs(pad) + + local output = module:forward(input) + mytester:assertTableEq(size, output:size():totable(), 0.00001, "Padding size error") + + local gradInput = module:backward(input, output) + mytester:assertTensorEq(gradInput, input, 0.00001, "Padding backward error") +end + +function nntest.addSingletonDimension() + local dims = torch.random(5) + local size = torch.LongTensor(dims):random(10) + local perm = torch.randperm(dims):totable() + local tensor = torch.Tensor(table.unpack(size:totable())):uniform():permute(table.unpack(perm)) + size = torch.gather(size, 1, torch.LongTensor(perm)) + + local firstDim = nn.utils.addSingletonDimension(tensor) + mytester:assertTableEq(firstDim:size():totable(), {1, table.unpack(size:totable())}, + "wrong size for singleton dimension 1") + mytester:assertTensorEq(firstDim[1], tensor, 0, + "wrong content for singleton dimension 1") + + local dim = torch.random(dims + 1) + local result = nn.utils.addSingletonDimension(tensor, dim) + local resultSize = size:totable() + table.insert(resultSize, dim, 1) + mytester:assertTableEq(result:size():totable(), resultSize, + "wrong size for random singleton dimension") + mytester:assertTensorEq(result:select(dim, 1), tensor, 0, + "wrong content for random singleton dimension") + + mytester:assertError(function() nn.utils.addSingletonDimension(tensor, dims + 2) end, + "invalid dimension not detected") + + -- passing output tensor as argument + local resultArg = torch.Tensor() + local resultR = nn.utils.addSingletonDimension(resultArg, tensor, dim) + mytester:eq(resultArg:size():totable(), resultSize, + 'wrong content for random singleton dimension '.. + 'when the result is passed as argument') + mytester:eq(resultArg, result, 'wrong content for random singleton dimension '.. + 'when the result is passed as argument') + + mytester:eq(resultR == resultArg, true, + 'new tensor is created when it should use the provided tensor') +end + +function nntest.SpatialReflectionPadding() + local batch = math.random(1,3) + local plane = math.random(1,3) + local sizeY = math.random(7,16) + local sizeX = math.random(7,16) + local padL = math.random(-3,3) + local padR = math.random(-3,3) + local padT = math.random(-3,3) + local padB = math.random(-3,3) + local jac = nn.Jacobian + local layer = nn.SpatialReflectionPadding(padL, padR, padT, padB) + local input = torch.rand(batch, plane, sizeY, sizeX) + local err = jac.testJacobian(layer, input) + mytester:assertalmosteq(err, 0.0, 1e-7) +end + +function nntest.SpatialReplicationPadding() + local batch = math.random(1,3) + local plane = math.random(1,3) + local sizeY = math.random(7,16) + local sizeX = math.random(7,16) + local padL = math.random(-3,3) + local padR = math.random(-3,3) + local padT = math.random(-3,3) + local padB = math.random(-3,3) + local jac = nn.Jacobian + local layer = nn.SpatialReplicationPadding(padL, padR, padT, padB) + local input = torch.rand(batch, plane, sizeY, sizeX) + local err = jac.testJacobian(layer, input) + mytester:assertalmosteq(err, 0.0, 1e-7) +end + +function nntest.VolumetricReplicationPadding() + for batch = 0, 1 do + local nbatch + if batch == 1 then + nbatch = math.random(1,3) + end + local plane = math.random(1,3) + local sizeZ = math.random(1,4) + local sizeY = math.random(7,11) + local sizeX = math.random(7,11) + local padLeft = math.random(-3,3) + local padRight = math.random(-3,3) + local padTop = math.random(-3,3) + local padBottom = math.random(-3,3) + local padFront = math.random(3,3) + local padBack = math.random(3,3) + local jac = nn.Jacobian + local layer = + nn.VolumetricReplicationPadding(padLeft, padRight, padTop, + padBottom, padFront, padBack) + local input + if batch == 1 then + input = torch.rand(nbatch, plane, sizeZ, sizeY, sizeX) + else + input = torch.rand(plane, sizeZ, sizeY, sizeX) + end + local err = jac.testJacobian(layer, input) + mytester:assertalmosteq(err, 0.0, 1e-7) + end +end + +function nntest.PixelShuffle() + -- Checks whether a given tensor has the specified size + local function tensorHasSize(tensor, size) + local tensorSize = tensor:size() + + if tensorSize:size() ~= #size then + return false + end + for i,v in ipairs(size) do + if tensorSize[i] ~= size[i] then + return false + end + end + return true + end + + --Verifies that the output is the input re-shuffled as per Eq 4. in + -- "Real-Time Single Image and Video Super-Resolution Using an Efficient + -- Sub-Pixel Convolutional Neural Network", Shi et al. + -- @param - the input, low-resolution image of shape [1, c, h , w] + -- @param - the output, super resolved image of shape [1, c, h ,w] + -- @param - upscale factor of the super resolutin + -- @returns true if output complies with Eq 4. + local function verifyPixelShuffle(_input, _output, upscaleFactor) + local input = _input + local output = _output + + if input:nDimension() == 3 then + input = input:view(1, input:size(1), input:size(2), input:size(3)) + output = output:view(1, output:size(1), output:size(2), output:size(3)) + end + + for c = 1, output:size(2) do + for h = 1, output:size(3) do + for w = 1, output:size(4) do + local heightIdx = torch.floor((h - 1)/upscaleFactor) + 1 + local widthIdx = torch.floor((w - 1)/upscaleFactor) + 1 + --c does not need to be (c - 1) as it starts at 1 not zero + local channelIdx = upscaleFactor * ((h-1) % upscaleFactor) + ((w-1) % upscaleFactor) + 1 + (c-1)*upscaleFactor*upscaleFactor + + mytester:assertTensorEq(output[{{}, {c}, {h}, {w}}], input[{{}, {channelIdx}, {heightIdx}, {widthIdx}}], + string.format("output at location (%d, %d, %d) is incorrect", c, h, w)) + end + end + end + return true + end + + -- Checks the nn.PixelShuffle layer's forward pass. It checks that is + -- re-arranges input pixels correctly according to Eq. 4 of + -- "Real-Time Single Image and Video Super-Resolution Using an Efficient + -- Sub-Pixel Convolutional Neural Network", Shi et al. + -- This function tests for multip batch sizes, multiple channels and multiple input dimensions (square) + -- It also tests for normal tensors (un-batched) + local function testPixelShuffleUpdateOutput() + --Test with batched input + for h = 1, 3 do + local batchSize = torch.round(torch.uniform(1, 3)) + for i = 1, 3 do + local upscaleFactor = torch.round(torch.uniform(2,5)) + local pixelShuffle = nn.PixelShuffle(upscaleFactor) + for j = 1, 3 do + local channels = torch.round(torch.uniform(1, 4)) + for k = 1, 3 do + + local inputDim = torch.round(torch.uniform(5, 10)) + local input = torch.Tensor(batchSize, channels * upscaleFactor * upscaleFactor, inputDim, inputDim) + input:uniform() + + local output = pixelShuffle:forward(input) + local expectedOutputDim = inputDim * upscaleFactor + mytester:assert(tensorHasSize(output, {batchSize, channels, expectedOutputDim, expectedOutputDim}), + string.format("Output tensor should have size (%d, %d, %d, %d) not %s", batchSize, channels, expectedOutputDim, expectedOutputDim, tostring(output:size()))) + verifyPixelShuffle(input, output, upscaleFactor) + end + end + end + end + + --Test with non-batched input + local inputDim = torch.round(torch.uniform(5, 10)) + local channels = torch.round(torch.uniform(1, 4)) + local upscaleFactor = torch.round(torch.uniform(2,5)) + + local input = torch.Tensor(channels * upscaleFactor * upscaleFactor, inputDim, inputDim) + input:uniform() + + local pixelShuffle = nn.PixelShuffle(upscaleFactor) + local output = pixelShuffle:forward(input) + local expectedOutputDim = inputDim * upscaleFactor + mytester:assert(tensorHasSize(output, {channels, expectedOutputDim, expectedOutputDim}), + string.format("Output tensor should have size (%d, %d, %d) not %s", channels, expectedOutputDim, expectedOutputDim, tostring(output:size()))) + + verifyPixelShuffle(input, output, upscaleFactor) + end + + -- Checks the nn.PixelShuffle layer's backward pass. It checks that is + -- essentially performs the inverse of Eq 4. in + -- "Real-Time Single Image and Video Super-Resolution Using an Efficient + -- Sub-Pixel Convolutional Neural Network", Shi et al. + -- This function tests for multip batch sizes, multiple channels and multiple input dimensions (square) + -- It also tests for normal tensors (un-batched) + local function testPixelShuffleUpdateGradInput() + --Test with batched input + for h = 1, 3 do + local batchSize = torch.round(torch.uniform(1, 3)) + for i = 1, 3 do + local upscaleFactor = torch.round(torch.uniform(2,5)) + local pixelShuffle = nn.PixelShuffle(upscaleFactor) + for j = 1, 3 do + local channels = torch.round(torch.uniform(1, 4)) + for k = 1, 3 do + local inputDim = torch.round(torch.uniform(5, 10)) + local input = torch.Tensor(batchSize, channels * upscaleFactor * upscaleFactor, inputDim, inputDim) + + input:uniform() + + local output = pixelShuffle:forward(input) + --here we treat output as the same as gradOutput as they have the same shape + local reconstructedInput = pixelShuffle:backward(input, output) + mytester:assertTensorEq(reconstructedInput, input, 0) + end + end + end + end + + --Test with non-batched input + local inputDim = torch.round(torch.uniform(5, 10)) + local channels = torch.round(torch.uniform(1, 4)) + local upscaleFactor = torch.round(torch.uniform(2,5)) + local input = torch.Tensor(channels * upscaleFactor * upscaleFactor, inputDim, inputDim) + input:uniform() + + local pixelShuffle = nn.PixelShuffle(upscaleFactor) + local output = pixelShuffle:forward(input) + --here we treat output as the same as gradOutput as they have the same shape + local reconstructedInput = pixelShuffle:backward(input, output) + mytester:assertTensorEq(reconstructedInput, input, 0) + + local err = jac.testJacobian(pixelShuffle, input) + mytester:assertlt(err,precision, "error computing gradiens w.r.t. inputs") + end + + local function testModuleIO() + --Test with non-batched input + local inputDim = torch.round(torch.uniform(5, 10)) + local channels = torch.round(torch.uniform(1, 4)) + local upscaleFactor = torch.round(torch.uniform(2,5)) + local input = torch.Tensor(channels * upscaleFactor * upscaleFactor, inputDim, inputDim):uniform() + local pixelShuffle = nn.PixelShuffle(upscaleFactor) + + local fwdErr,bkwdErr = jac.testIO(pixelShuffle,input) + mytester:asserteq(fwdErr, 0, torch.typename(pixelShuffle) .. " - i/o forward err ") + mytester:asserteq(bkwdErr, 0, torch.typename(pixelShuffle) .. " - i/o backward err ") + end + + testPixelShuffleUpdateOutput() + testPixelShuffleUpdateGradInput() + testModuleIO() +end + +function nntest.Typecast() + local function make_network() + local seq = nn.Sequential() + seq:add(nn.Linear(15, 10)) + seq:add(nn.Linear(15, 10)) + seq.modules[1].bias:fill(1) + seq.modules[2].bias:fill(2) + return seq + end + + -- make sure that the typecasts aren't nops + assert(torch.getdefaulttensortype() == 'torch.DoubleTensor') + + -- basic net + local net = make_network() + net.modules[1].empty_tensor = torch.Tensor() + net:float() + assert(net.modules[1].bias:type() == 'torch.FloatTensor', + net.modules[1].bias:type()) + assert(net.modules[1].empty_tensor:type() == 'torch.FloatTensor') + assert(net.modules[1].bias ~= net.modules[2].bias) + net.modules[1].bias:fill(3) + assert(net.modules[1].bias[1] == 3) + assert(net.modules[2].bias[1] == 2) + + -- shared tensors remain shared + local net = make_network() + net.modules[2].bias = net.modules[1].bias + net:float() + assert(net.modules[1].bias:type() == 'torch.FloatTensor') + assert(net.modules[1].bias == net.modules[2].bias) + assert(net.modules[1].bias[1] == 1) + + -- shared storages remain shared + local net = make_network() + net.modules[2].bias:set(net.modules[1].bias) + local net = net:float() + assert(net.modules[1].bias:type() == 'torch.FloatTensor') + assert(net.modules[1].bias ~= net.modules[2].bias) + net.modules[1].bias:fill(3) + assert(net.modules[1].bias[1] == 3) + assert(net.modules[2].bias[1] == 3) + + -- tricky: overlapping views on the same storage are preserved + local net = make_network() + local overlap_storage = torch.Tensor(15):fill(1) + net.modules[1].bias = overlap_storage:narrow(1, 1, 10) + net.modules[2].bias = overlap_storage:narrow(1, 6, 10) + net:float() + assert(net.modules[1].bias:type() == 'torch.FloatTensor') + assert(net.modules[1].bias ~= net.modules[2].bias) + net.modules[1].bias:fill(3) + assert(net.modules[1].bias[1] == 3) + assert(net.modules[2].bias[1] == 3) + assert(net.modules[2].bias[6] == 1) -- only the first 5 elements overlapped + + -- check recursiveType on a table + local net1 = make_network() + local net2 = make_network() + net2.modules[1].bias:set(net1.modules[1].bias) + net1:float() + net2:float() + net1.modules[1].bias:fill(3) + assert(net2.modules[1].bias[1] == 1) + + local net1 = make_network() + local net2 = make_network() + net2.modules[1].bias:set(net1.modules[1].bias) + + local tensorCache = {} + net1:type('torch.FloatTensor', tensorCache) + net2:type('torch.FloatTensor', tensorCache) + net1.modules[1].bias:fill(3) + assert(net2.modules[1].bias[1] == 3) + + local net1 = make_network() + local net2 = make_network() + net2.modules[1].bias:set(net1.modules[1].bias) + + nn.utils.recursiveType({net1, net2}, 'torch.FloatTensor') + net1.modules[1].bias:fill(3) + assert(net2.modules[1].bias[1] == 3) + + -- smoke test some modules with custom type methods + local custom_type_modules = { + nn.MixtureTable(3), + nn.ConcatTable(), + nn.Copy(), + nn.Copy(nil, nil, nil, true), + nn.SpatialContrastiveNormalization(), + nn.DotProduct(), + nn.PairwiseDistance(1), + nn.SpatialDivisiveNormalization(), + nn.SpatialSubtractiveNormalization() + } + for _, module in ipairs(custom_type_modules) do + module:float() + end +end + +function nntest.Module_apply() + local s = nn.Sequential() + s:add(nn.Linear(10,10)) + local s2 = nn.Sequential() + s2:add(nn.Linear(10,5)) + s:add(s2) + s:add(nn.Tanh()) + + local seen = 0 + s:apply(function(module) + if torch.type(module) == 'nn.Linear' then + module.bias:resize(20) + seen = seen + 1 + end + end) + mytester:asserteq(seen, 2) + mytester:asserteq(s.modules[1].bias:size(1), 20) + mytester:asserteq(s2.modules[1].bias:size(1), 20) +end + +function nntest.Module_replace() + -- test replace in container + local s = nn.Sequential() + s:add(nn.Linear(10,10)) + s:add(nn.Sigmoid()) + s:replace(function(module) + return torch.type(module) == 'nn.Sigmoid' and nn.Tanh() or module + end) + -- test replace of a single module + local single = nn.Tanh() + local replaced = single:replace(function(module) + return torch.type(module) == 'nn.Tanh' and nn.Sigmoid() or module + end) + mytester:asserteq(torch.type(s:get(2)), 'nn.Tanh', 'replace in container') + mytester:asserteq(torch.type(replaced), 'nn.Sigmoid', 'replace in single module') +end + +function nntest.Cosine() + local inputSize = 4 + local outputSize = 5 + + -- test 1D + local input = torch.randn(inputSize) + local gradOutput = torch.randn(outputSize) + local cosine = nn.Cosine(inputSize,outputSize) + local output = cosine:forward(input) + local inputNorm = input:norm()+1e-12 + local weight2 = cosine.weight[2] + local output2 = torch.dot(weight2, input)/((weight2:norm()+1e-12)*inputNorm) + mytester:assert(math.abs(output2 - output[2]) < 0.000001,"Cosine output 1D err weight[2]") + local output2 = torch.mv(cosine.weight, input) + output2:cdiv(cosine.weight:norm(2,2)+1e-12):div(inputNorm) + mytester:assertTensorEq(output, output2, 0.000001, "Cosine output 1D err") + local gradInput = cosine:updateGradInput(input, gradOutput) + local gradInput2 = gradInput:clone():zero() + for j=1,outputSize do + local w_j = cosine.weight[j] + local nw_j = w_j:norm()+1e-12 + for i=1,inputSize do + local w_ij = w_j[i] + local grad_i = (w_ij/(inputNorm*nw_j)) + grad_i = grad_i - (output[j]*input[i]/(inputNorm*inputNorm)) + grad_i = grad_i * gradOutput[j] + gradInput2[i] = gradInput2[i] + grad_i + end + end + mytester:assertTensorEq(gradInput2, gradInput, 0.000001, "Cosine gradInput 1D err") + cosine:zeroGradParameters() + cosine:accGradParameters(input, gradOutput, 1) + local gradWeight2 = cosine.weight:clone():zero() + for j=1,outputSize do + local w_j = cosine.weight[j] + local nw_j = w_j:norm()+1e-12 + for i=1,inputSize do + local w_ij = w_j[i] + local gW_ij = (gradOutput[j]/nw_j) * ( ( input[i] / inputNorm ) - (output[j] * w_ij / nw_j) ) + gradWeight2[{j,i}] = gW_ij + end + end + mytester:assertTensorEq(cosine.gradWeight, gradWeight2, 0.000001, "Cosine gradWeight 2D err") + + -- test 2D + local batchSize = 3 + local input = torch.randn(batchSize, inputSize) + local gradOutput = torch.randn(batchSize, outputSize) + cosine:zeroGradParameters() + local cosine2 = cosine:clone() + local output = cosine:forward(input) + local output2 = cosine2:forward(input[2]) + mytester:assertTensorEq(output[2], output2, 0.000001, "Cosine output 2D err") + local gradInput = cosine:backward(input, gradOutput) + + local gradInput2 = gradInput:clone():zero() + for i=1,batchSize do + cosine2:forward(input[i], gradOutput[i]) + gradInput2[i]:copy(cosine2:backward(input[i], gradOutput[i])) + end + mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "Cosine gradInput 2D err") + mytester:assertTensorEq(cosine.gradWeight, cosine2.gradWeight, 0.000001, "Cosine gradWeight 2D err") +end + +function nntest.DistanceRatioCriterion() + local sizeAverage = true + local crit = nn.DistanceRatioCriterion(sizeAverage) + local X = torch.rand(32,1):fill(1) + local Y = torch.rand(32,1):fill(1) + + -- Unit Test updateOutput + local loss = crit:forward({X, Y}) + local trueLoss = 1 + math.log(math.exp(-1) + math.exp(-1)) + assert(math.abs(loss - trueLoss) < 0.000001, + "DistanceRatioCriterion forward incorrect output") + + -- Unit Test updateGradInput + local dxdy = crit:backward({X, Y}) + local dx = dxdy[1] + local dy = dxdy[2] + assert(math.abs(dx:sum() - 0.5) < 0.000001, + "DistanceRatioCriterion backward (dx) incorrect output") + assert(math.abs(dy:sum() + 0.5) < 0.000001, + "DistanceRatioCriterion backward (dy) incorrect output") +end + +function nntest.ErrorHandling() + local l = nn.Linear(1, 1) + local p = nn.Parallel(1, 1):add(l) + local c = nn.Concat(1):add(p) + local model = nn.Sequential():add(nn.Identity()):add(c):add(nn.Identity()) + local function errmsg(module, i) + return 'In ' .. i .. ' module of ' .. torch.type(module) .. ':\n' + end + local expected_err = errmsg(model, 2) .. errmsg(c, 1) .. errmsg(p, 1) + mytester:assertErrorObj( + function() + model:forward(torch.randn(1,2,2)) + end, + function(err) + return err:find(expected_err) and err:find('size mismatch') + end, + "Failure expected or bad error message (missing information or reason)" + ) +end + +function nntest.GPU() + -- this is a placeholder to let you know that the nn.GPU unit test + -- is located in cunn package. +end + +function nntest.Profile() + local mx_overhead = 0.05 + local print_every = 3 + local net = nn.Profile(nn.Linear(3,4), print_every) + local input, gradOutput = torch.randn(1, 3), torch.randn(1, 4) + local output, gradInput = net:forward(input), net:backward(input, gradOutput) + mytester:assertTensorEq(net.modules[1].output, output, 0.000001) + mytester:assertTensorEq(net.modules[1].gradInput, gradInput, 0.000001) +end + +function nntest.NaN() + local _ = require 'moses' + local input = torch.randn(2,3) + local gradOutput = torch.randn(2,4) + local lin = nn.Linear(3,4) + lin:zeroGradParameters() + local nan = nn.NaN(lin) + mytester:assert(nan.id == 1) + -- test that it works when no NaNs are present + local output = nan:forward(input):clone() + local gradInput = nan:backward(input, gradOutput):clone() + local gradWeight = lin.gradWeight:clone() + local gradBias = lin.gradBias:clone() + lin:zeroGradParameters() + local output2 = lin:forward(input) + local gradInput2 = lin:backward(input, gradOutput) + mytester:assertTensorEq(output, output2, 0.000001) + mytester:assertTensorEq(gradInput, gradInput2, 0.000001) + mytester:assertTensorEq(gradWeight, lin.gradWeight, 0.000001) + mytester:assertTensorEq(gradBias, lin.gradBias, 0.000001) + -- test with some NaNs + input:zero():log():log() + local sum = input:sum() + mytester:assert(_.isNaN(sum)) + mytester:assert(not pcall(function() nan:forward(input) end)) + lin.bias:fill(sum) + input = torch.randn(2,3) + mytester:assert(not pcall(function() nan:forward(input) end)) + lin.bias:uniform(0,1) + gradOutput:fill(sum) + mytester:assert(not pcall(function() nan:backward(input, gradOutput) end)) + gradOutput:uniform(0,1) + lin.gradBias:fill(sum) + mytester:assert(not pcall(function() nan:backward(input, gradOutput) end)) +end + +function nntest.DontCast() + local input = torch.randn(3,4) + local gradOutput = torch.randn(3,2) + local linear = nn.Linear(4,2):float() + local mlp = nn.DontCast(linear, true) + linear:zeroGradParameters() + local linear = linear:clone() + local output = mlp:forward(input) + local gradInput = mlp:backward(input, gradOutput) + mytester:assert(torch.type(output) == 'torch.DoubleTensor') + mytester:assert(torch.type(gradInput) == 'torch.DoubleTensor') + local output2 = linear:forward(input:float()) + local gradInput2 = linear:backward(input:float(), gradOutput:float()) + mytester:assertTensorEq(output:float(), output2, 0.000001) + mytester:assertTensorEq(gradInput:float(), gradInput2, 0.000001) + local mlp3 = nn.DontCast(linear:clone()) + mlp3:zeroGradParameters() + local output3 = mlp3:forward(input:float()) + local gradInput3 = mlp3:backward(input:float(), gradOutput:float()) + mytester:assert(torch.type(output3) == 'torch.FloatTensor') + mytester:assert(torch.type(gradInput3) == 'torch.FloatTensor') + mytester:assertTensorEq(output3, output2, 0.000001) + mytester:assertTensorEq(gradInput3, gradInput2, 0.000001) + + mlp:float() + local output4 = mlp:forward(input:float()) + local gradInput4 = mlp:backward(input:float(), gradOutput:float()) + mytester:assert(torch.type(output4) == 'torch.FloatTensor') + mytester:assert(torch.type(gradInput4) == 'torch.FloatTensor') + mytester:assertTensorEq(output3, output4, 0.000001) + mytester:assertTensorEq(gradInput3, gradInput4, 0.000001) + mlp:double() + mytester:assert(torch.type(linear.output) == 'torch.FloatTensor') + local output = mlp:forward(input) + local gradInput = mlp:backward(input, gradOutput) + mytester:assert(torch.type(output4) == 'torch.FloatTensor') + mytester:assert(torch.type(gradInput4) == 'torch.FloatTensor') + mytester:assertTensorEq(output3, output:float(), 0.000001) + mytester:assertTensorEq(gradInput3, gradInput:float(), 0.000001) + + -- test table inputs/outputs + local input = {torch.randn(3,4), torch.randn(3,4)} + local gradOutput = {torch.randn(3,2), torch.randn(3,2)} + local linear = nn.ParallelTable():add(nn.Linear(4,2)):add(nn.Linear(4,2)):float() + local mlp = nn.DontCast(linear, true) + linear:zeroGradParameters() + local linear = linear:clone() + local output = mlp:forward(input) + local gradInput = mlp:backward(input, gradOutput) + mytester:assert(torch.type(output[1]) == 'torch.DoubleTensor') + mytester:assert(torch.type(gradInput[1]) == 'torch.DoubleTensor') + mytester:assert(torch.type(output[2]) == 'torch.DoubleTensor') + mytester:assert(torch.type(gradInput[2]) == 'torch.DoubleTensor') + local _ = require 'moses' + local finput = _.map(input, function(k,v) return v:float() end) + local foutput = _.map(output, function(k,v) return v:float() end) + local fgradInput = _.map(gradInput, function(k,v) return v:float() end) + local fgradOutput = _.map(gradOutput, function(k,v) return v:float() end) + local output2 = linear:forward(finput) + local gradInput2 = linear:backward(finput, fgradOutput) + mytester:assertTensorEq(foutput[1], output2[1], 0.000001) + mytester:assertTensorEq(foutput[2], output2[2], 0.000001) + mytester:assertTensorEq(fgradInput[1], gradInput2[1], 0.000001) + mytester:assertTensorEq(fgradInput[2], gradInput2[2], 0.000001) + local mlp3 = nn.DontCast(linear:clone()) + mlp3:zeroGradParameters() + local output3 = mlp3:forward(finput) + local gradInput3 = mlp3:backward(finput, fgradOutput) + mytester:assert(torch.type(output3[1]) == 'torch.FloatTensor') + mytester:assert(torch.type(gradInput3[1]) == 'torch.FloatTensor') + mytester:assert(torch.type(output3[2]) == 'torch.FloatTensor') + mytester:assert(torch.type(gradInput3[2]) == 'torch.FloatTensor') + mytester:assertTensorEq(output3[1], output2[1], 0.000001) + mytester:assertTensorEq(gradInput3[1], gradInput2[1], 0.000001) + mytester:assertTensorEq(output3[2], output2[2], 0.000001) + mytester:assertTensorEq(gradInput3[2], gradInput2[2], 0.000001) + mlp:float() + local output4 = mlp:forward(finput) + local gradInput4 = mlp:backward(finput, fgradOutput) + mytester:assert(torch.type(output4[1]) == 'torch.FloatTensor') + mytester:assert(torch.type(gradInput4[1]) == 'torch.FloatTensor') + mytester:assert(torch.type(output4[2]) == 'torch.FloatTensor') + mytester:assert(torch.type(gradInput4[2]) == 'torch.FloatTensor') + mytester:assertTensorEq(output3[1], output4[1], 0.000001) + mytester:assertTensorEq(gradInput3[1], gradInput4[1], 0.000001) + mytester:assertTensorEq(output3[2], output4[2], 0.000001) + mytester:assertTensorEq(gradInput3[2], gradInput4[2], 0.000001) + mlp:double() + mytester:assert(torch.type(linear.output) == 'table') + mytester:assert(torch.type(linear.output[1]) == 'torch.FloatTensor') + mytester:assert(torch.type(linear.output[2]) == 'torch.FloatTensor') + local output = mlp:forward(input) + local gradInput = mlp:backward(input, gradOutput) + mytester:assertTensorEq(output3[1], output[1]:float(), 0.000001) + mytester:assertTensorEq(gradInput3[1], gradInput[1]:float(), 0.000001) +end + +function nntest.SpatialDepthWiseConvolution() + local epsilon = 0.00001 + + local SC = nn.SpatialConvolution + local SDWC = nn.SpatialDepthWiseConvolution + + local function spatialDepthWiseConv( + nInputPlane, multiplier, kernel, stride, padding, inputSize, weight, bias + ) + local conv = SDWC(nInputPlane, multiplier, kernel, kernel, stride, stride, padding, padding) + conv.weight = weight + conv.bias = bias + return conv + end + + -- Utility spatialDepthWiseConv_util() function -------------------------------- + -- By Alfredo Canziani, alfredo.canziani@gmail.com ----------------------------- + local function spatialDepthWiseConv_util( + nInputPlane, multiplier, kernel, stride, padding, inputSize, weight, bias + ) + + local conv = nn.Sequential() + conv:add(nn.Contiguous()) + conv:add(nn.View(-1, 1, inputSize, inputSize)) + conv:add(SC(1, multiplier, kernel, kernel, stride, stride, padding, padding)) + + local depthWiseConv = nn.Parallel(2, 2) + for channel = 1, nInputPlane do + local tempConv = conv:clone() + tempConv:get(3).weight = weight:narrow(2, channel, 1):clone() + tempConv:get(3).bias = bias:select(2, channel):clone() + depthWiseConv:add(tempConv) + end + depthWiseConv:add(nn.Contiguous()) + return depthWiseConv + end + + local n = 3 -- nInputPlane + local s = 28 -- input height and width + local b = 3 -- batch size + local m = 4 -- multiplier + local k = 3 -- kernel size + local p = 1 -- padding + local st = 1 -- stride + + local testBatch = 1e3 -- number of repetition + + local X = torch.rand(b, n, s, s) -- 1x3x299x299 images + local weight = torch.rand(m, n, k, k) -- weight + local bias = torch.rand(m, n) -- bias + + local model = spatialDepthWiseConv(n, m, k, st, p, s, weight, bias) + local model_util = spatialDepthWiseConv_util(n, m, k, st, p, s, weight, bias) + + local Y_util = model_util:forward(X) + local Y = model:forward(X) + + local abs_diff = Y_util:clone():csub(Y):abs() + mytester:assert(torch.all(abs_diff:lt(epsilon))) +end + +function nntest.Constant() + local input = torch.randn(20,3,7) + local gradOutput = torch.randn(20,30,6) + local value = torch.randn(30,6) + local const = nn.Constant(value:clone(), 2) + local output = const:forward(input) + local gradInput = const:backward(input, output) + local output2 = value:view(1,30,6):expand(20,30,6) + mytester:assertTensorEq(output2, output, 0.000001, "Constant forward err") + mytester:assertTensorEq(gradInput, input:zero(), 0.000001, "Constant backward err") +end + +function nntest.WhiteNoise() + local input = torch.zeros(3, 28, 28) + local addNoise = nn.WhiteNoise() + local output = addNoise:forward(input) + local meanValue = output:mean() + local stdValue = output:std() + mytester:assert(meanValue > -0.01 and meanValue < 0.01) + mytester:assert(stdValue < 0.15 and stdValue >= 0) + + -- Evaluate + addNoise:evaluate() + output = addNoise:forward(input) + meanValue = output:mean() + stdValue = output:std() + mytester:assert(meanValue == 0) + mytester:assert(stdValue == 0) + + -- backprop + addNoise:training() + local gradOutput = torch.rand(3, 28, 28) + local gradInput = addNoise:updateGradInput(input, gradOutput) + mytester:assertTensorEq(gradOutput, gradInput, 0.000001, "WhiteNoise backward err") +end + +function nntest.OneHot() + local nClass = 10 + + -- batch mode + local batchSize = 3 + local input = torch.LongTensor(batchSize):random(1, nClass) + local gradOutput = torch.randn(batchSize, nClass) + + local oh = nn.OneHot(nClass) + + local output = oh:forward(input) + local output2 = torch.Tensor(batchSize, nClass):zero() + local eye = torch.eye(nClass) + output2:index(eye, 1, input) + mytester:assertTensorEq(output, output2, 0.000001, "OneHot forward batch err") + mytester:assert(output:dim() == 2) + + -- non-batch mode (number input) + local num = 3 + local output3 = torch.zeros(nClass) + output3[num] = 1.0 + mytester:assertTensorEq(oh:forward(num), output3, 0.000001, "OneHot forward number err") + + local gradInput = oh:backward(input, gradOutput) + mytester:assertTensorEq(gradInput, input:double():zero(), 0.000001, "OneHot backward batch err") + + if pcall(function() require 'cunn' end) then + oh:cuda() + + -- test with long input + local output = oh:forward(input) + mytester:assert(torch.type(output) == 'torch.CudaTensor') + mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot forward batch long-cuda err") + + -- test with cuda input + local input = input:cuda() + gradOutput = gradOutput:cuda() + + local output = oh:forward(input) + mytester:assert(torch.type(output) == 'torch.CudaTensor') + mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot forward batch cuda err") + + local gradInput2 = oh:backward(input, gradOutput) + mytester:assertTensorEq(gradInput, gradInput2:double(), 0.000001, "OneHot backward batch err") + cutorch.synchronize() + + -- non-batch mode (number input) + mytester:assertTensorEq(oh:forward(num), output3:cuda(), 0.000001, "OneHot forward number err") + end + + -- multi-dimensional input + local inputSize = 2 + local input = torch.LongTensor(batchSize, inputSize):random(1, nClass) + local gradOutput = torch.randn(batchSize, inputSize, nClass) + + local oh = nn.OneHot(nClass, 2) + + local output = oh:forward(input) + local output2 = torch.Tensor(batchSize*inputSize, nClass):zero() + local eye = torch.eye(nClass) + output2:index(eye, 1, input:view(-1)) + output2:resize(batchSize, inputSize, nClass) + mytester:assertTensorEq(output, output2, 0.000001, "OneHot 2d forward batch err") + mytester:assert(output:dim() == 3) + + local gradInput = oh:backward(input, gradOutput) + mytester:assertTensorEq(gradInput, input:double():zero(), 0.000001, "OneHot 2d backward batch err") + + if pcall(function() require 'cunn' end) then + oh:cuda() + + -- test with long input + local output = oh:forward(input) + mytester:assert(torch.type(output) == 'torch.CudaTensor') + mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot 2d forward batch long-cuda err") + + -- test with cuda input + local input = input:cuda() + gradOutput = gradOutput:cuda() + + local output = oh:forward(input) + mytester:assert(torch.type(output) == 'torch.CudaTensor') + mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot 2d forward batch cuda err") + + local gradInput2 = oh:backward(input, gradOutput) + mytester:assertTensorEq(gradInput, gradInput2:double(), 0.000001, "OneHot 2d backward batch err") + + local benchmark = false + if benchmark then + local input = torch.FloatTensor(50, 50):random(1,65):cuda() + + local oh = nn.OneHot(65):cuda() + + oh:forward(input) + cutorch.synchronize() + local a = torch.Timer() + for i=1,10 do + oh:forward(input) + end + cutorch.synchronize() + local gputime = a:time().real + + oh:float() + input = input:float() + oh:forward(input) + a = torch.Timer() + for i=1,10 do + oh:forward(input) + end + local cputime = a:time().real + print("Onehot GPU vs CPU time", gputime, cputime) + end + end +end + +function nntest.ZeroGrad() + local input = torch.randn(3,4) + local zg = nn.ZeroGrad() + local output = zg:forward(input) + mytester:assertTensorEq(input, output, 0.00000001) + local gradInput = zg:backward(input, input) + local gradInput2 = gradInput:clone():zero() + mytester:assertTensorEq(gradInput, gradInput2, 0.0000001) +end + +function nntest.ZipTable() + -- input : { {a1,a2}, {b1,b2}, {c1,c2} } + -- output : { {a1,b1,c1}, {a2,b2,c2} } + local z = nn.ZipTable() + local input = { + {torch.randn(3,4), torch.randn(3,4)}, + {torch.randn(3,4), torch.randn(3,4)}, + {torch.randn(3,4), torch.randn(3,4)} + } + local output = z:forward(input) + mytester:assert(#output == 2, "ZipTable #output") + mytester:assert(#(output[1]) == 3, "ZipTable #output[1]") + mytester:assertTensorEq(input[1][1], output[1][1], 0.000001, "ZipTable input11") + mytester:assertTensorEq(input[1][2], output[2][1], 0.000001, "ZipTable input12") + mytester:assertTensorEq(input[3][2], output[2][3], 0.000001, "ZipTable input32") + local gradInput = z:backward(input, output) + mytester:assert(#gradInput == 3, "ZipTable #gradInput") + mytester:assert(#(gradInput[1]) == 2, "ZipTable #gradInput[1]") + mytester:assertTensorEq(input[1][1], gradInput[1][1], 0.000001, "ZipTable gradInput11") + mytester:assertTensorEq(input[1][2], gradInput[1][2], 0.000001, "ZipTable gradInput12") + mytester:assertTensorEq(input[3][2], gradInput[3][2], 0.000001, "ZipTable gradInput32") +end + +function nntest.ZipTableOneToMany() + -- input : { v, {a,b,c} } + -- output : { {v,a}, {v,b}, {v,c} } + local z = nn.ZipTableOneToMany() + local input = { torch.randn(3), { torch.randn(4), torch.rand(4), torch.rand(4) } } + local output = z:forward(input) + mytester:assert(#output == 3, "ZipTableOneToMany #output") + mytester:assert(#(output[1]) == 2, "ZipTableOneToMany #output[1]") + mytester:assert(#(output[2]) == 2, "ZipTableOneToMany #output[2]") + mytester:assert(#(output[3]) == 2, "ZipTableOneToMany #output[3]") + mytester:assertTensorEq(input[1], output[1][1], 0.000001, "ZipTableOneToMany input1 output11") + mytester:assertTensorEq(input[1], output[2][1], 0.000001, "ZipTableOneToMany input1 output21") + mytester:assertTensorEq(input[1], output[3][1], 0.000001, "ZipTableOneToMany input1 output31") + mytester:assertTensorEq(input[2][1], output[1][2], 0.000001, "ZipTableOneToMany input21") + mytester:assertTensorEq(input[2][2], output[2][2], 0.000001, "ZipTableOneToMany input22") + mytester:assertTensorEq(input[2][3], output[3][2], 0.000001, "ZipTableOneToMany input23") + local gradInput = z:backward(input, output) + mytester:assert(#gradInput == 2, "ZipTableOneToMany #gradInput") + mytester:assert(#(gradInput[2]) == 3, "ZipTableOneToMany #gradInput[2]") + mytester:assertTensorEq(input[2][1], gradInput[2][1], 0.000001, "ZipTableOneToMany gradInput21") + mytester:assertTensorEq(input[2][2], gradInput[2][2], 0.000001, "ZipTableOneToMany gradInput22") + mytester:assertTensorEq(input[2][3], gradInput[2][3], 0.000001, "ZipTableOneToMany gradInput32") + mytester:assertTensorEq(torch.mul(input[1], 3), gradInput[1], 0.000001, "ZipTableOneToMany gradInput21") +end + +function nntest.Collapse() + local c = nn.Collapse(3) + local input = torch.randn(8,3,4,5) + local output = c:forward(input) + mytester:assertTensorEq(input:view(8,-1), output, 0.000001, "Collapse:forward") + local gradInput = c:backward(input, output) + mytester:assertTensorEq(gradInput, input, 0.000001, "Collapse:backward") + mytester:assertTableEq(gradInput:size():totable(), input:size():totable(), 0.000001, "Collapse:backward size") + local input2 = input:transpose(1,4) + local output2 = c:forward(input2) + mytester:assertTensorEq(input2:contiguous():view(5,-1), output2, 0.000001, "Collapse:forward non-contiguous") + local gradInput2 = c:backward(input2, output2) + mytester:assertTensorEq(gradInput2, input2, 0.000001, "Collapse:backward non-contiguous") + mytester:assertTableEq(gradInput2:size():totable(), input2:size():totable(), 0.000001, "Collapse:backward size non-contiguous") +end + +function nntest.Convert() + -- batch mode + local c = nn.Convert('bchw', 'chwb') + local input = torch.randn(8,3,5,5) + local output = c:forward(input) + local output2 = input:transpose(1,4):transpose(1,3):transpose(1,2) + mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd bchw->chwb") + local gradInput = c:backward(input, output) + mytester:assertTensorEq(gradInput, input, 0.000001, "Convert bwd bchw->chwb") + local c = nn.Convert('bchw', 'bf') + local output = c:forward(input) + local output2 = input:view(8,-1) + mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd bchw->bf") + c:float() + local output = c:forward(input:float()) + mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type()") + local output = c:forward(input) + mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() double->float") + -- non-batch mode + local c = nn.Convert('chw', 'hwc') + local input = torch.randn(3,5,5) + local output = c:forward(input) + local output2 = input:transpose(1,3):transpose(1,2) + mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd chw->hwc non-batch") + local gradInput = c:backward(input, output) + mytester:assertTensorEq(gradInput, input, 0.000001, "Convert bwd chw->hwc non-batch") + local c = nn.Convert('chw', 'f') + local output = c:forward(input) + local output2 = input:view(-1) + mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd chw->bf non-batch") + c:float() + local output = c:forward(input:float()) + mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() non-batch") + local output = c:forward(input) + mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() double->float non-batch") +end + +function nntest.CAddTensorTable() + -- input : { v, {a,b,c} } + -- output : { v+a, v+b, v+c } + local z = nn.CAddTensorTable() + local input = { torch.randn(3), { torch.randn(3), torch.rand(3), torch.rand(3) } } + local output = z:forward(input) + mytester:assert(#output == 3, "CAddTensorTable #output") + mytester:assertTensorEq(input[1]+input[2][1], output[1], 0.00001, "CAddTensorTable input21 output1") + mytester:assertTensorEq(input[1]+input[2][2], output[2], 0.00001, "CAddTensorTable input22 output2") + mytester:assertTensorEq(input[1]+input[2][3], output[3], 0.00001, "CAddTensorTable input23 output3") + local gradInput = z:backward(input, output) + mytester:assert(#gradInput == 2, "CAddTensorTable #gradInput") + mytester:assert(#(gradInput[2]) == 3, "CAddTensorTable #gradInput[2]") + mytester:assertTensorEq(output[1], gradInput[2][1], 0.000001, "CAddTensorTable gradInput21") + mytester:assertTensorEq(output[2], gradInput[2][2], 0.000001, "CAddTensorTable gradInput22") + mytester:assertTensorEq(output[3], gradInput[2][3], 0.000001, "CAddTensorTable gradInput23") + mytester:assertTensorEq(output[1]+output[2]+output[3], gradInput[1], 0.000001, "CAddTensorTable gradInput1") +end + +-- Unit Test Kmeans layer +function nntest.Kmeans() + local k = 3 + local dim = 5 + local batchSize = 200 + local input = torch.Tensor(batchSize, dim) + for i=1, batchSize do + input[i]:fill(torch.random(1, k)) + end + + local verbose = false + + local attempts = 10 + local iter = 100 + local bestLoss = 100000000 + local bestKm = nil + local tempLoss = 0 + local learningRate = 1 + + local initTypes = {'random', 'kmeans++'} + local useCudas = {false} + if pcall(function() require 'cunn' end) then + useCudas[2] = true + end + for _, initType in pairs(initTypes) do + for _, useCuda in pairs(useCudas) do + + if useCuda then + input = input:cuda() + else + input = input:double() + end + + local timer = torch.Timer() + for j=1, attempts do + local km = nn.Kmeans(k, dim) + if useCuda then km:cuda() end + + if initType == 'kmeans++' then + km:initKmeansPlus(input) + else + km:initRandom(input) + end + + for i=1, iter do + km:zeroGradParameters() + + km:forward(input) + km:backward(input, gradOutput) + + -- Gradient descent + km.weight:add(-learningRate, km.gradWeight) + tempLoss = km.loss + end + if verbose then print("Attempt Loss " .. j ..": " .. tempLoss) end + if tempLoss < bestLoss then + bestLoss = tempLoss + end + if (initType == 'kmeans++' and bestLoss < 0.00001) or (initType == 'random' and bestLoss < 500) then + break + end + end + if verbose then + print("InitType: " .. initType .. " useCuda: " .. tostring(useCuda)) + print("Best Loss: " .. bestLoss) + print("Total time: " .. timer:time().real) + end + if initType == 'kmeans++' then + mytester:assert(bestLoss < 0.00001, "Kmeans++ error ("..(useCuda and 'cuda' or 'double')..")") + else + mytester:assert(bestLoss < 500, "Kmeans error ("..(useCuda and 'cuda' or 'double')..")") + end + end + end +end + +mytester:add(nntest) + +jac = nn.Jacobian +sjac = nn.SparseJacobian +function nn.test(tests, seed) + -- Limit number of threads since everything is small + local nThreads = torch.getnumthreads() + torch.setnumthreads(1) + -- randomize stuff + local seed = seed or (1e5 * torch.tic()) + print('Seed: ', seed) + math.randomseed(seed) + torch.manualSeed(seed) + mytester:run(tests) + torch.setnumthreads(nThreads) + return mytester +end + +function nn.testTHNN(tests, seed) + require 'test.LinearTHNN' + nn.Linear = nn.LinearTHNN + return nn.test(tests,seed) +end diff --git a/contrib/lua-torch/nn/utils.lua b/contrib/lua-torch/nn/utils.lua new file mode 100644 index 000000000..17b52afb3 --- /dev/null +++ b/contrib/lua-torch/nn/utils.lua @@ -0,0 +1,223 @@ +nn.utils = {} + +-- oops; someone forgot to add torch.Storage.type +-- TODO replace with torch.Storage.type when implemented +local function torch_Storage_type(self, type) + local current = torch.typename(self) + if not type then return current end + if type ~= current then + local new = torch.getmetatable(type).new() + if self:size() > 0 then + new:resize(self:size()):copy(self) + end + return new + else + return self + end +end + +-- tensorCache maintains a list of all tensors and storages that have been +-- converted (recursively) by calls to recursiveType() and type(). +-- It caches conversions in order to preserve sharing semantics +-- i.e. if two tensors share a common storage, then type conversion +-- should preserve that. +-- +-- You can preserve sharing semantics across multiple networks by +-- passing tensorCache between the calls to type, e.g. +-- +-- > tensorCache = {} +-- > net1:type('torch.CudaTensor', tensorCache) +-- > net2:type('torch.CudaTensor', tensorCache) +-- > nn.utils.recursiveType(anotherTensor, 'torch.CudaTensor', tensorCache) +-- +-- Implementation note: to make Lua table lookup behave correctly, +-- tensor keys are stored as actual tensor objects, while storage +-- keys are stored as the pointers themselves (as numbers). +function nn.utils.recursiveType(param, type, tensorCache) + tensorCache = tensorCache or {} + + if torch.type(param) == 'table' then + for k, v in pairs(param) do + param[k] = nn.utils.recursiveType(v, type, tensorCache) + end + elseif torch.isTypeOf(param, 'nn.Module') or + torch.isTypeOf(param, 'nn.Criterion') then + param:type(type, tensorCache) + elseif torch.isTensor(param) then + if torch.typename(param) ~= type then + local newparam + if tensorCache[param] then + newparam = tensorCache[param] + else + newparam = torch.Tensor():type(type) + local storageType = type:gsub('Tensor','Storage') + if param:storage() then + local storage_key = torch.pointer(param:storage()) + if not tensorCache[storage_key] then + tensorCache[storage_key] = torch_Storage_type( + param:storage(), storageType) + end + assert(torch.type(tensorCache[storage_key]) == storageType) + newparam:set( + tensorCache[storage_key], + param:storageOffset(), + param:size(), + param:stride() + ) + end + tensorCache[param] = newparam + end + assert(torch.type(newparam) == type) + param = newparam + end + end + return param +end + +function nn.utils.recursiveResizeAs(t1,t2) + if torch.type(t2) == 'table' then + t1 = (torch.type(t1) == 'table') and t1 or {t1} + for key,_ in pairs(t2) do + t1[key], t2[key] = nn.utils.recursiveResizeAs(t1[key], t2[key]) + end + for key,_ in pairs(t1) do + if not t2[key] then + t1[key] = nil + end + end + elseif torch.isTensor(t2) then + t1 = torch.isTensor(t1) and t1 or t2.new() + t1:resize(t2:size()) + else + error("expecting nested tensors or tables. Got ".. + torch.type(t1).." and "..torch.type(t2).." instead") + end + return t1, t2 +end + +function nn.utils.recursiveFill(t2, val) + if torch.type(t2) == 'table' then + for key,_ in pairs(t2) do + t2[key] = nn.utils.recursiveFill(t2[key], val) + end + elseif torch.isTensor(t2) then + t2:fill(val) + else + error("expecting tensor or table thereof. Got " + ..torch.type(t2).." instead") + end + return t2 +end + +function nn.utils.recursiveAdd(t1, val, t2) + if not t2 then + assert(val, "expecting at least two arguments") + t2 = val + val = 1 + end + val = val or 1 + if torch.type(t2) == 'table' then + t1 = (torch.type(t1) == 'table') and t1 or {t1} + for key,_ in pairs(t2) do + t1[key], t2[key] = nn.utils.recursiveAdd(t1[key], val, t2[key]) + end + elseif torch.isTensor(t1) and torch.isTensor(t2) then + t1:add(val, t2) + else + error("expecting nested tensors or tables. Got ".. + torch.type(t1).." and "..torch.type(t2).." instead") + end + return t1, t2 +end + +function nn.utils.recursiveCopy(t1,t2,async) + if torch.type(t2) == 'table' then + t1 = (torch.type(t1) == 'table') and t1 or {t1} + for key,_ in pairs(t2) do + t1[key], t2[key] = nn.utils.recursiveCopy(t1[key], t2[key], async) + end + elseif torch.isTensor(t2) then + t1 = torch.isTensor(t1) and t1 or t2.new() + t1:resize(t2:size()) + if async then + t1:copyAsync(t2) + else + t1:copy(t2) + end + else + error("expecting nested tensors or tables. Got ".. + torch.type(t1).." and "..torch.type(t2).." instead") + end + return t1, t2 +end + +function nn.utils.addSingletonDimension(...) + local view, t, dim + if select('#',...) < 3 then + t, dim = select(1,...) + else + view, t, dim = select(1,...) + assert(torch.isTensor(view), + "output tensor expected, got " .. type(view)) + end + + assert(torch.isTensor(t), "input tensor expected") + dim = dim or 1 + assert(dim > 0 and dim <= (t:dim() + 1), "invalid dimension: " .. dim + .. '. Tensor is of ' .. t:dim() .. ' dimensions.') + + view = view or t.new() + local size = torch.LongStorage(t:dim() + 1) + local stride = torch.LongStorage(t:dim() + 1) + + for d = 1, dim - 1 do + size[d] = t:size(d) + stride[d] = t:stride(d) + end + size[dim] = 1 + stride[dim] = 1 + for d = dim + 1, t:dim() + 1 do + size[d] = t:size(d - 1) + stride[d] = t:stride(d - 1) + end + + view:set(t:storage(), t:storageOffset(), size, stride) + return view +end + +function nn.utils.contiguousView(output, input, ...) + output = output or input.new() + if input:isContiguous() then + output:view(input, ...) + else + output:resize(input:size()) + output:copy(input) + output:view(output, ...) + end + return output +end + +-- go over specified fields and clear them. accepts +-- nn.utils.clearState(self, {'_buffer', '_buffer2'}) and +-- nn.utils.clearState(self, '_buffer', '_buffer2') +function nn.utils.clear(self, ...) + local arg = {...} + if #arg > 0 and type(arg[1]) == 'table' then + arg = arg[1] + end + local function clear(f) + if self[f] then + if torch.isTensor(self[f]) then + self[f]:set() + elseif type(self[f]) == 'table' then + self[f] = {} + else + self[f] = nil + end + end + end + for i,v in ipairs(arg) do clear(v) end + return self +end + +table.unpack = table.unpack or unpack |