+local Module = torch.class('nn.Module')
+function Module:__init()
+ self.gradInput = torch.Tensor()
+ self.output = torch.Tensor()
+ self._type = self.output:type()
+function Module:parameters()
+ if self.weight and self.bias then
+ return {self.weight, self.bias}, {self.gradWeight, self.gradBias}
+ elseif self.weight then
+ return {self.weight}, {self.gradWeight}
+ elseif self.bias then
+ return {self.bias}, {self.gradBias}
+ else
+ return
+ end
+function Module:updateOutput(input)
+ return self.output
+function Module:forward(input)
+ return self:updateOutput(input)
+function Module:backward(input, gradOutput, scale)
+ scale = scale or 1
+ self:updateGradInput(input, gradOutput)
+ self:accGradParameters(input, gradOutput, scale)
+ return self.gradInput
+function Module:backwardUpdate(input, gradOutput, lr)
+ self:updateGradInput(input, gradOutput)
+ self:accUpdateGradParameters(input, gradOutput, lr)
+ return self.gradInput
+function Module:updateGradInput(input, gradOutput)
+ return self.gradInput
+function Module:accGradParameters(input, gradOutput, scale)
+function Module:accUpdateGradParameters(input, gradOutput, lr)
+ if self.shared then
+ self:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ else
+ self:defaultAccUpdateGradParameters(input, gradOutput, lr)
+ end
+function Module:defaultAccUpdateGradParameters(input, gradOutput, lr)
+ local gradWeight = self.gradWeight
+ local gradBias = self.gradBias
+ self.gradWeight = self.weight
+ self.gradBias = self.bias
+ self:accGradParameters(input, gradOutput, -lr)
+ self.gradWeight = gradWeight
+ self.gradBias = gradBias
+function Module:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ if self:parameters() then
+ self:zeroGradParameters()
+ self:accGradParameters(input, gradOutput, 1)
+ self:updateParameters(lr)
+ end
+function Module:zeroGradParameters()
+ local _,gradParams = self:parameters()
+ if gradParams then
+ for i=1,#gradParams do
+ gradParams[i]:zero()
+ end
+ end
+function Module:updateParameters(learningRate)
+ local params, gradParams = self:parameters()
+ if params then
+ for i=1,#params do
+ params[i]:add(-learningRate, gradParams[i])
+ end
+ end
+function Module:training()
+ self.train = true
+function Module:evaluate()
+ self.train = false
+function Module:share(mlp, ...)
+ local arg = {...}
+ for i,v in ipairs(arg) do
+ if self[v] ~= nil then
+ self[v]:set(mlp[v])
+ self.shared = true
+ mlp.shared = true
+ end
+ end
+ return self
+local function sharedWrite(...)
+ local arg = {...}
+ local shared = {}
+ for i,v in ipairs(arg) do
+ shared[v] = true
+ end
+ return function(self, file)
+ local object = {}
+ for k, v in pairs(self) do
+ if shared[k] then
+ assert(torch.isTensor(v), 'Shared parameters have to be Tensors')
+ object[k] =
+ else
+ object[k] = v
+ end
+ end
+ file:writeObject(object)
+ end
+function Module:clone(...)
+ local oldWrite = nn.Module.write
+ nn.Module.write = sharedWrite(...)
+ local f = torch.MemoryFile("rw"):binary()
+ f:writeObject(self)
+ f:seek(1)
+ local clone = f:readObject()
+ f:close()
+ nn.Module.write = oldWrite
+ if select('#',...) > 0 then
+ clone:share(self,...)
+ end
+ return clone
+function Module:type(type, tensorCache)
+ if not type then
+ return self._type
+ end
+ tensorCache = tensorCache or {}
+ -- find all tensors and convert them
+ for key,param in pairs(self) do
+ self[key] = nn.utils.recursiveType(param, type, tensorCache)
+ end
+ self._type = type
+ return self
+function Module:float(...)
+ return self:type('torch.FloatTensor',...)
+function Module:double(...)
+ return self:type('torch.DoubleTensor',...)
+function Module:cuda(...)
+ return self:type('torch.CudaTensor',...)
+function Module:reset()
+function Module:write(file)
+ -- Write all values in the object as a table.
+ local object = {}
+ for k, v in pairs(self) do
+ object[k] = v
+ end
+ file:writeObject(object)
+function Module:read(file)
+ local object = file:readObject()
+ for k, v in pairs(object) do
+ self[k] = v
+ end
+-- This function is not easy to understand. It works as follows:
+-- - gather all parameter tensors for this module (and children);
+-- count all parameter values (floats)
+-- - create one ginormous memory area (Storage object) with room for all
+-- parameters
+-- - remap each parameter tensor to point to an area within the ginormous
+-- Storage, and copy it there
+-- It has the effect of making all parameters point to the same memory area,
+-- which is then returned.
+-- The purpose is to allow operations over all parameters (such as momentum
+-- updates and serialization), but it assumes that all parameters are of
+-- the same type (and, in the case of CUDA, on the same device), which
+-- is not always true. Use for_each() to iterate over this module and
+-- children instead.
+-- Module._flattenTensorBuffer can be used by other packages (e.g. cunn)
+-- to specify the type of temporary buffers. For example, the temporary
+-- buffers for CudaTensor could be FloatTensor, to avoid GPU memory usage.
+-- TODO: This logically belongs to torch.Tensor, not nn.
+Module._flattenTensorBuffer = {}
+function Module.flatten(parameters)
+ -- returns true if tensor occupies a contiguous region of memory (no holes)
+ local function isCompact(tensor)
+ local sortedStride, perm = torch.sort(
+ torch.LongTensor(tensor:nDimension()):set(tensor:stride()), 1, true)
+ local sortedSize = torch.LongTensor(tensor:nDimension()):set(
+ tensor:size()):index(1, perm)
+ local nRealDim = torch.clamp(sortedStride, 0, 1):sum()
+ sortedStride = sortedStride:narrow(1, 1, nRealDim):clone()
+ sortedSize = sortedSize:narrow(1, 1, nRealDim):clone()
+ local t =, 1,
+ sortedSize:storage(),
+ sortedStride:storage())
+ return t:isContiguous()
+ end
+ if not parameters or #parameters == 0 then
+ return torch.Tensor()
+ end
+ local Tensor = parameters[1].new
+ local TmpTensor = Module._flattenTensorBuffer[torch.type(parameters[1])] or Tensor
+ -- 1. construct the set of all unique storages referenced by parameter tensors
+ local storages = {}
+ local nParameters = 0
+ local parameterMeta = {}
+ for k = 1,#parameters do
+ local param = parameters[k]
+ local storage = parameters[k]:storage()
+ local storageKey = torch.pointer(storage)
+ if not storages[storageKey] then
+ storages[storageKey] = {storage, nParameters}
+ nParameters = nParameters + storage:size()
+ end
+ parameterMeta[k] = {storageOffset = param:storageOffset() +
+ storages[storageKey][2],
+ size = param:size(),
+ stride = param:stride()}
+ end
+ -- 2. construct a single tensor that will hold all the parameters
+ local flatParameters = TmpTensor(nParameters):zero()
+ -- 3. determine if there are elements in the storage that none of the
+ -- parameter tensors reference ('holes')
+ local tensorsCompact = true
+ for k = 1,#parameters do
+ local meta = parameterMeta[k]
+ local tmp = TmpTensor():set(
+ flatParameters:storage(), meta.storageOffset, meta.size, meta.stride)
+ tmp:fill(1)
+ tensorsCompact = tensorsCompact and isCompact(tmp)
+ end
+ local maskParameters = flatParameters:byte():clone()
+ local compactOffsets = flatParameters:long():cumsum(1)
+ local nUsedParameters = compactOffsets[-1]
+ -- 4. copy storages into the flattened parameter tensor
+ for _, storageAndOffset in pairs(storages) do
+ local storage, offset = table.unpack(storageAndOffset)
+ flatParameters[{{offset+1,offset+storage:size()}}]:copy(Tensor():set(storage))
+ end
+ -- 5. allow garbage collection
+ storages = nil
+ for k = 1,#parameters do
+ parameters[k]:set(Tensor())
+ end
+ -- 6. compact the flattened parameters if there were holes
+ if nUsedParameters ~= nParameters then
+ assert(tensorsCompact,
+ "Cannot gather tensors that are not compact")
+ flatParameters = TmpTensor(nUsedParameters):copy(
+ flatParameters:maskedSelect(maskParameters))
+ for k = 1,#parameters do
+ parameterMeta[k].storageOffset =
+ compactOffsets[parameterMeta[k].storageOffset]
+ end
+ end
+ if TmpTensor ~= Tensor then
+ flatParameters = Tensor(flatParameters:nElement()):copy(flatParameters)
+ end
+ -- 7. fix up the parameter tensors to point at the flattened parameters
+ for k = 1,#parameters do
+ parameters[k]:set(flatParameters:storage(),
+ parameterMeta[k].storageOffset,
+ parameterMeta[k].size,
+ parameterMeta[k].stride)
+ end
+ return flatParameters
+function Module:getParameters()
+ -- get parameters
+ local parameters,gradParameters = self:parameters()
+ local p, g = Module.flatten(parameters), Module.flatten(gradParameters)
+ assert(p:nElement() == g:nElement(),
+ 'check that you are sharing parameters and gradParameters')
+ if parameters then
+ for i=1,#parameters do
+ assert(parameters[i]:storageOffset() == gradParameters[i]:storageOffset(),
+ 'misaligned parameter at ' .. tostring(i))
+ end
+ end
+ return p, g
+function Module:__call__(input, gradOutput)
+ self:forward(input)
+ if gradOutput then
+ self:backward(input, gradOutput)
+ return self.output, self.gradInput
+ else
+ return self.output
+ end
+-- Run a callback (called with the module as an argument) in preorder over this
+-- module and its children.
+function Module:apply(callback)
+ callback(self)
+ if self.modules then
+ for _, module in ipairs(self.modules) do
+ module:apply(callback)
+ end
+ end
+function Module:findModules(typename, container)
+ container = container or self
+ local nodes = {}
+ local containers = {}
+ local mod_type = torch.typename(self)
+ if mod_type == typename then
+ nodes[#nodes+1] = self
+ containers[#containers+1] = container
+ end
+ -- Recurse on nodes with 'modules'
+ if (self.modules ~= nil) then
+ if (torch.type(self.modules) == 'table') then
+ for i = 1, #self.modules do
+ local child = self.modules[i]
+ local cur_nodes, cur_containers =
+ child:findModules(typename, self)
+ assert(#cur_nodes == #cur_containers,
+ 'Internal error: incorrect return length') -- This shouldn't happen
+ -- add the list items from our child to our list (ie return a
+ -- flattened table of the return nodes).
+ for j = 1, #cur_nodes do
+ nodes[#nodes+1] = cur_nodes[j]
+ containers[#containers+1] = cur_containers[j]
+ end
+ end
+ end
+ end
+ return nodes, containers
+-- returns a list of modules
+function Module:listModules()
+ local function tinsert(to, from)
+ if torch.type(from) == 'table' then
+ for i=1,#from do
+ tinsert(to,from[i])
+ end
+ else
+ table.insert(to,from)
+ end
+ end
+ -- include self first
+ local modules = {self}
+ if self.modules then
+ for i=1,#self.modules do
+ local modulas = self.modules[i]:listModules()
+ if modulas then
+ tinsert(modules,modulas)
+ end
+ end
+ end
+ return modules
+function Module:clearState()
+ return nn.utils.clear(self, 'output', 'gradInput')
+-- similar to apply, recursively goes over network and calls
+-- a callback function which returns a new module replacing the old one
+function nn.Module:replace(callback)
+ local out = callback(self)
+ if self.modules then
+ for i, module in ipairs(self.modules) do
+ self.modules[i] = module:replace(callback)
+ end
+ end
+ return out