aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/lua-torch/nn
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-05-23 18:14:15 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-05-23 18:14:15 +0100
commit714eb56e1760fdfb26afccde92664d3a2f1e8435 (patch)
tree84d1399acbb92f852b4bd64f9ea5412680b0c6ab /contrib/lua-torch/nn
parent220a51ff68013dd668a45b78c60a7b8bfc10f074 (diff)
downloadrspamd-714eb56e1760fdfb26afccde92664d3a2f1e8435.tar.gz
rspamd-714eb56e1760fdfb26afccde92664d3a2f1e8435.zip
[Minor] Move lua contrib libraries to lua- prefix
Diffstat (limited to 'contrib/lua-torch/nn')
-rw-r--r--contrib/lua-torch/nn/.gitignore2
-rw-r--r--contrib/lua-torch/nn/.luacheckrc13
-rw-r--r--contrib/lua-torch/nn/.travis.yml56
-rw-r--r--contrib/lua-torch/nn/Abs.lua22
-rw-r--r--contrib/lua-torch/nn/AbsCriterion.lua32
-rw-r--r--contrib/lua-torch/nn/Add.lua66
-rw-r--r--contrib/lua-torch/nn/AddConstant.lua50
-rw-r--r--contrib/lua-torch/nn/BCECriterion.lua64
-rw-r--r--contrib/lua-torch/nn/BatchNormalization.lua213
-rw-r--r--contrib/lua-torch/nn/Bilinear.lua163
-rw-r--r--contrib/lua-torch/nn/Bottle.lua71
-rw-r--r--contrib/lua-torch/nn/CAdd.lua127
-rw-r--r--contrib/lua-torch/nn/CAddTable.lua36
-rw-r--r--contrib/lua-torch/nn/CAddTensorTable.lua43
-rw-r--r--contrib/lua-torch/nn/CDivTable.lua26
-rw-r--r--contrib/lua-torch/nn/CMakeLists.txt14
-rw-r--r--contrib/lua-torch/nn/CMaxTable.lua46
-rw-r--r--contrib/lua-torch/nn/CMinTable.lua46
-rw-r--r--contrib/lua-torch/nn/CMul.lua166
-rw-r--r--contrib/lua-torch/nn/CMulTable.lua55
-rw-r--r--contrib/lua-torch/nn/CONTRIBUTING.md136
-rw-r--r--contrib/lua-torch/nn/COPYRIGHT.txt36
-rw-r--r--contrib/lua-torch/nn/CReLU.lua57
-rw-r--r--contrib/lua-torch/nn/CSubTable.lua26
-rw-r--r--contrib/lua-torch/nn/Clamp.lua5
-rw-r--r--contrib/lua-torch/nn/ClassNLLCriterion.lua82
-rw-r--r--contrib/lua-torch/nn/ClassSimplexCriterion.lua118
-rw-r--r--contrib/lua-torch/nn/Collapse.lua30
-rw-r--r--contrib/lua-torch/nn/Concat.lua158
-rw-r--r--contrib/lua-torch/nn/ConcatTable.lua118
-rw-r--r--contrib/lua-torch/nn/Constant.lua36
-rw-r--r--contrib/lua-torch/nn/Container.lua149
-rwxr-xr-xcontrib/lua-torch/nn/Contiguous.lua21
-rw-r--r--contrib/lua-torch/nn/Convert.lua245
-rw-r--r--contrib/lua-torch/nn/Copy.lua42
-rw-r--r--contrib/lua-torch/nn/Cosine.lua175
-rw-r--r--contrib/lua-torch/nn/CosineDistance.lua116
-rw-r--r--contrib/lua-torch/nn/CosineEmbeddingCriterion.lua142
-rw-r--r--contrib/lua-torch/nn/Criterion.lua64
-rw-r--r--contrib/lua-torch/nn/CriterionTable.lua17
-rw-r--r--contrib/lua-torch/nn/CrossEntropyCriterion.lua42
-rw-r--r--contrib/lua-torch/nn/Decorator.lua47
-rw-r--r--contrib/lua-torch/nn/DepthConcat.lua116
-rw-r--r--contrib/lua-torch/nn/DistKLDivCriterion.lua34
-rw-r--r--contrib/lua-torch/nn/DistanceRatioCriterion.lua142
-rw-r--r--contrib/lua-torch/nn/DontCast.lua124
-rw-r--r--contrib/lua-torch/nn/DotProduct.lua61
-rw-r--r--contrib/lua-torch/nn/Dropout.lua70
-rw-r--r--contrib/lua-torch/nn/ELU.lua45
-rw-r--r--contrib/lua-torch/nn/ErrorMessages.lua19
-rw-r--r--contrib/lua-torch/nn/Euclidean.lua197
-rw-r--r--contrib/lua-torch/nn/Exp.lua9
-rw-r--r--contrib/lua-torch/nn/FlattenTable.lua106
-rw-r--r--contrib/lua-torch/nn/GPU.lua273
-rw-r--r--contrib/lua-torch/nn/GatedLinearUnit.lua27
-rw-r--r--contrib/lua-torch/nn/GradientReversal.lua32
-rw-r--r--contrib/lua-torch/nn/HardShrink.lua25
-rw-r--r--contrib/lua-torch/nn/HardTanh.lua37
-rw-r--r--contrib/lua-torch/nn/HingeEmbeddingCriterion.lua43
-rw-r--r--contrib/lua-torch/nn/Identity.lua30
-rw-r--r--contrib/lua-torch/nn/Index.lua32
-rw-r--r--contrib/lua-torch/nn/IndexLinear.lua398
-rw-r--r--contrib/lua-torch/nn/Jacobian.lua389
-rw-r--r--contrib/lua-torch/nn/JoinTable.lua74
-rw-r--r--contrib/lua-torch/nn/Kmeans.lua215
-rw-r--r--contrib/lua-torch/nn/L1Cost.lua30
-rw-r--r--contrib/lua-torch/nn/L1HingeEmbeddingCriterion.lua41
-rw-r--r--contrib/lua-torch/nn/L1Penalty.lua42
-rw-r--r--contrib/lua-torch/nn/LayerNormalization.lua27
-rw-r--r--contrib/lua-torch/nn/LeakyReLU.lua41
-rw-r--r--contrib/lua-torch/nn/Linear.lua122
-rwxr-xr-xcontrib/lua-torch/nn/LinearWeightNorm.lua168
-rw-r--r--contrib/lua-torch/nn/Log.lua20
-rw-r--r--contrib/lua-torch/nn/LogSigmoid.lua27
-rw-r--r--contrib/lua-torch/nn/LogSoftMax.lua19
-rw-r--r--contrib/lua-torch/nn/LookupTable.lua166
-rw-r--r--contrib/lua-torch/nn/MM.lua92
-rw-r--r--contrib/lua-torch/nn/MSECriterion.lua32
-rw-r--r--contrib/lua-torch/nn/MV.lua82
-rw-r--r--contrib/lua-torch/nn/MapTable.lua119
-rw-r--r--contrib/lua-torch/nn/MarginCriterion.lua31
-rw-r--r--contrib/lua-torch/nn/MarginRankingCriterion.lua75
-rw-r--r--contrib/lua-torch/nn/MaskedSelect.lua71
-rw-r--r--contrib/lua-torch/nn/Max.lua66
-rw-r--r--contrib/lua-torch/nn/Maxout.lua13
-rw-r--r--contrib/lua-torch/nn/Mean.lua14
-rw-r--r--contrib/lua-torch/nn/Min.lua66
-rw-r--r--contrib/lua-torch/nn/MixtureTable.lua165
-rw-r--r--contrib/lua-torch/nn/Module.lua429
-rw-r--r--contrib/lua-torch/nn/ModuleCriterion.lua44
-rw-r--r--contrib/lua-torch/nn/Mul.lua38
-rw-r--r--contrib/lua-torch/nn/MulConstant.lua41
-rw-r--r--contrib/lua-torch/nn/MultiCriterion.lua40
-rw-r--r--contrib/lua-torch/nn/MultiLabelMarginCriterion.lua41
-rw-r--r--contrib/lua-torch/nn/MultiLabelSoftMarginCriterion.lua86
-rw-r--r--contrib/lua-torch/nn/MultiMarginCriterion.lua64
-rw-r--r--contrib/lua-torch/nn/NaN.lua72
-rw-r--r--contrib/lua-torch/nn/Narrow.lua45
-rw-r--r--contrib/lua-torch/nn/NarrowTable.lua43
-rw-r--r--contrib/lua-torch/nn/Normalize.lua150
-rw-r--r--contrib/lua-torch/nn/OneHot.lua69
-rw-r--r--contrib/lua-torch/nn/PReLU.lua52
-rw-r--r--contrib/lua-torch/nn/Padding.lua65
-rw-r--r--contrib/lua-torch/nn/PairwiseDistance.lua91
-rw-r--r--contrib/lua-torch/nn/Parallel.lua116
-rw-r--r--contrib/lua-torch/nn/ParallelCriterion.lua41
-rw-r--r--contrib/lua-torch/nn/ParallelTable.lua58
-rw-r--r--contrib/lua-torch/nn/PartialLinear.lua114
-rw-r--r--contrib/lua-torch/nn/PixelShuffle.lua111
-rw-r--r--contrib/lua-torch/nn/Power.lua22
-rw-r--r--contrib/lua-torch/nn/PrintSize.lua36
-rw-r--r--contrib/lua-torch/nn/Profile.lua55
-rw-r--r--contrib/lua-torch/nn/README.md21
-rw-r--r--contrib/lua-torch/nn/RReLU.lua50
-rw-r--r--contrib/lua-torch/nn/ReLU.lua5
-rw-r--r--contrib/lua-torch/nn/ReLU6.lua32
-rw-r--r--contrib/lua-torch/nn/Replicate.lua57
-rw-r--r--contrib/lua-torch/nn/Reshape.lua72
-rw-r--r--contrib/lua-torch/nn/Select.lua24
-rw-r--r--contrib/lua-torch/nn/SelectTable.lua71
-rw-r--r--contrib/lua-torch/nn/Sequential.lua122
-rw-r--r--contrib/lua-torch/nn/Sigmoid.lua19
-rw-r--r--contrib/lua-torch/nn/SmoothL1Criterion.lua32
-rw-r--r--contrib/lua-torch/nn/SoftMarginCriterion.lua24
-rw-r--r--contrib/lua-torch/nn/SoftMax.lua19
-rw-r--r--contrib/lua-torch/nn/SoftMin.lua31
-rw-r--r--contrib/lua-torch/nn/SoftPlus.lua35
-rw-r--r--contrib/lua-torch/nn/SoftShrink.lua25
-rw-r--r--contrib/lua-torch/nn/SoftSign.lua20
-rw-r--r--contrib/lua-torch/nn/SparseJacobian.lua277
-rw-r--r--contrib/lua-torch/nn/SparseLinear.lua242
-rw-r--r--contrib/lua-torch/nn/SpatialAdaptiveAveragePooling.lua35
-rw-r--r--contrib/lua-torch/nn/SpatialAdaptiveMaxPooling.lua46
-rw-r--r--contrib/lua-torch/nn/SpatialAutoCropMSECriterion.lua74
-rw-r--r--contrib/lua-torch/nn/SpatialAveragePooling.lua93
-rw-r--r--contrib/lua-torch/nn/SpatialBatchNormalization.lua35
-rw-r--r--contrib/lua-torch/nn/SpatialClassNLLCriterion.lua81
-rw-r--r--contrib/lua-torch/nn/SpatialContrastiveNormalization.lua36
-rw-r--r--contrib/lua-torch/nn/SpatialConvolution.lua155
-rw-r--r--contrib/lua-torch/nn/SpatialConvolutionLocal.lua188
-rw-r--r--contrib/lua-torch/nn/SpatialConvolutionMM.lua139
-rw-r--r--contrib/lua-torch/nn/SpatialConvolutionMap.lua154
-rw-r--r--contrib/lua-torch/nn/SpatialCrossMapLRN.lua153
-rw-r--r--contrib/lua-torch/nn/SpatialDepthWiseConvolution.lua139
-rw-r--r--contrib/lua-torch/nn/SpatialDilatedConvolution.lua80
-rw-r--r--contrib/lua-torch/nn/SpatialDilatedMaxPooling.lua67
-rw-r--r--contrib/lua-torch/nn/SpatialDivisiveNormalization.lua136
-rw-r--r--contrib/lua-torch/nn/SpatialDropout.lua55
-rw-r--r--contrib/lua-torch/nn/SpatialFractionalMaxPooling.lua165
-rw-r--r--contrib/lua-torch/nn/SpatialFullConvolution.lua219
-rw-r--r--contrib/lua-torch/nn/SpatialFullConvolutionMap.lua91
-rw-r--r--contrib/lua-torch/nn/SpatialLPPooling.lua43
-rw-r--r--contrib/lua-torch/nn/SpatialLogSoftMax.lua19
-rw-r--r--contrib/lua-torch/nn/SpatialMaxPooling.lua94
-rw-r--r--contrib/lua-torch/nn/SpatialMaxUnpooling.lua45
-rw-r--r--contrib/lua-torch/nn/SpatialReflectionPadding.lua51
-rw-r--r--contrib/lua-torch/nn/SpatialReplicationPadding.lua51
-rw-r--r--contrib/lua-torch/nn/SpatialSoftMax.lua19
-rw-r--r--contrib/lua-torch/nn/SpatialSubSampling.lua79
-rw-r--r--contrib/lua-torch/nn/SpatialSubtractiveNormalization.lua115
-rw-r--r--contrib/lua-torch/nn/SpatialUpSamplingBilinear.lua139
-rw-r--r--contrib/lua-torch/nn/SpatialUpSamplingNearest.lua59
-rw-r--r--contrib/lua-torch/nn/SpatialZeroPadding.lua104
-rw-r--r--contrib/lua-torch/nn/SplitTable.lua43
-rw-r--r--contrib/lua-torch/nn/Sqrt.lua26
-rw-r--r--contrib/lua-torch/nn/Square.lua22
-rw-r--r--contrib/lua-torch/nn/Squeeze.lua40
-rw-r--r--contrib/lua-torch/nn/StochasticGradient.lua62
-rw-r--r--contrib/lua-torch/nn/Sum.lua67
-rw-r--r--contrib/lua-torch/nn/THNN.lua140
-rw-r--r--contrib/lua-torch/nn/Tanh.lua19
-rw-r--r--contrib/lua-torch/nn/TanhShrink.lua20
-rw-r--r--contrib/lua-torch/nn/TemporalConvolution.lua73
-rw-r--r--contrib/lua-torch/nn/TemporalDynamicKMaxPooling.lua65
-rw-r--r--contrib/lua-torch/nn/TemporalMaxPooling.lua44
-rw-r--r--contrib/lua-torch/nn/TemporalRowConvolution.lua120
-rw-r--r--contrib/lua-torch/nn/TemporalSubSampling.lua64
-rw-r--r--contrib/lua-torch/nn/Threshold.lua51
-rw-r--r--contrib/lua-torch/nn/Transpose.lua35
-rw-r--r--contrib/lua-torch/nn/Unsqueeze.lua52
-rw-r--r--contrib/lua-torch/nn/View.lua96
-rw-r--r--contrib/lua-torch/nn/VolumetricAveragePooling.lua54
-rw-r--r--contrib/lua-torch/nn/VolumetricBatchNormalization.lua4
-rw-r--r--contrib/lua-torch/nn/VolumetricConvolution.lua169
-rw-r--r--contrib/lua-torch/nn/VolumetricDilatedConvolution.lua84
-rw-r--r--contrib/lua-torch/nn/VolumetricDilatedMaxPooling.lua71
-rw-r--r--contrib/lua-torch/nn/VolumetricDropout.lua55
-rw-r--r--contrib/lua-torch/nn/VolumetricFractionalMaxPooling.lua175
-rw-r--r--contrib/lua-torch/nn/VolumetricFullConvolution.lua225
-rw-r--r--contrib/lua-torch/nn/VolumetricMaxPooling.lua102
-rw-r--r--contrib/lua-torch/nn/VolumetricMaxUnpooling.lua56
-rw-r--r--contrib/lua-torch/nn/VolumetricReplicationPadding.lua58
-rw-r--r--contrib/lua-torch/nn/WeightNorm.lua208
-rw-r--r--contrib/lua-torch/nn/WeightedEuclidean.lua244
-rw-r--r--contrib/lua-torch/nn/WeightedMSECriterion.lua45
-rw-r--r--contrib/lua-torch/nn/WhiteNoise.lua40
-rw-r--r--contrib/lua-torch/nn/ZeroGrad.lua14
-rw-r--r--contrib/lua-torch/nn/ZipTable.lua34
-rw-r--r--contrib/lua-torch/nn/ZipTableOneToMany.lua37
-rw-r--r--contrib/lua-torch/nn/hessian.lua391
-rwxr-xr-xcontrib/lua-torch/nn/init.lua221
-rw-r--r--contrib/lua-torch/nn/lib/CMakeLists.txt5
-rw-r--r--contrib/lua-torch/nn/lib/THNN/CMakeLists.txt47
-rw-r--r--contrib/lua-torch/nn/lib/THNN/README.md32
-rw-r--r--contrib/lua-torch/nn/lib/THNN/THNN.h33
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/Abs.c28
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/AbsCriterion.c40
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/BCECriterion.c66
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/BatchNormalization.c149
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/ClassNLLCriterion.c163
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/DistKLDivCriterion.c44
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/ELU.c54
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/FusedRNNKernel.c55
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/GatedLinearUnit.c73
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/HardShrink.c42
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/HardTanh.c133
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/IndexLinear.c742
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/L1Cost.c38
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/LeakyReLU.c57
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/Linear.c114
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/LogSigmoid.c36
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c137
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/LookupTable.c225
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/MSECriterion.c45
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/MarginCriterion.c47
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/MultiLabelMarginCriterion.c184
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/MultiMarginCriterion.c168
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/PReLU.c207
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/RReLU.c132
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/Sigmoid.c28
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SmoothL1Criterion.c49
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SoftMarginCriterion.c44
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SoftMax.c150
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SoftPlus.c47
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SoftShrink.c42
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SparseLinear.c564
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveAveragePooling.c258
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveMaxPooling.c274
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialAveragePooling.c329
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialClassNLLCriterion.c131
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionLocal.c367
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMM.c377
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMap.c277
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialDepthWiseConvolution.c528
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedConvolution.c408
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedMaxPooling.c401
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialFractionalMaxPooling.c253
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c462
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolutionMap.c222
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxPooling.c44
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxUnpooling.c234
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c260
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialReplicationPadding.c260
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialSubSampling.c302
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingBilinear.c174
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingNearest.c199
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/Sqrt.c52
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/Square.c59
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/THNN.h1501
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/Tanh.c49
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/TemporalConvolution.c398
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/TemporalMaxPooling.c283
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c472
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/TemporalSubSampling.c156
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/Threshold.c64
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricAveragePooling.c373
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolution.c260
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolutionMM.c628
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedConvolution.c420
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedMaxPooling.c515
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricFractionalMaxPooling.c279
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricFullConvolution.c541
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxPooling.c50
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxUnpooling.c373
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricReplicationPadding.c357
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingNearest.c226
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingTrilinear.c213
-rw-r--r--contrib/lua-torch/nn/lib/THNN/generic/unfold.c166
-rw-r--r--contrib/lua-torch/nn/lib/THNN/init.c280
-rw-r--r--contrib/lua-torch/nn/mkdocs.yml18
-rwxr-xr-xcontrib/lua-torch/nn/test.lua8787
-rw-r--r--contrib/lua-torch/nn/utils.lua223
282 files changed, 43109 insertions, 0 deletions
diff --git a/contrib/lua-torch/nn/.gitignore b/contrib/lua-torch/nn/.gitignore
new file mode 100644
index 000000000..e0fa91eda
--- /dev/null
+++ b/contrib/lua-torch/nn/.gitignore
@@ -0,0 +1,2 @@
+build/
+THNN_h.lua
diff --git a/contrib/lua-torch/nn/.luacheckrc b/contrib/lua-torch/nn/.luacheckrc
new file mode 100644
index 000000000..3d358e9c0
--- /dev/null
+++ b/contrib/lua-torch/nn/.luacheckrc
@@ -0,0 +1,13 @@
+-- -*- mode: lua; -*-
+std = "luajit"
+
+globals = {
+ "torch",
+ "nn",
+ "include",
+}
+
+unused_args = false
+
+
+files['test.lua'].redefined = false
diff --git a/contrib/lua-torch/nn/.travis.yml b/contrib/lua-torch/nn/.travis.yml
new file mode 100644
index 000000000..1d10e0fb5
--- /dev/null
+++ b/contrib/lua-torch/nn/.travis.yml
@@ -0,0 +1,56 @@
+language: c
+compiler:
+ - gcc
+ - clang
+cache:
+ directories:
+ - $HOME/OpenBlasInstall
+sudo: false
+env:
+ - TORCH_LUA_VERSION=LUAJIT21
+ - TORCH_LUA_VERSION=LUA51
+ - TORCH_LUA_VERSION=LUA52
+addons:
+ apt:
+ packages:
+ - cmake
+ - gfortran
+ - gcc-multilib
+ - gfortran-multilib
+ - liblapack-dev
+ - build-essential
+ - gcc
+ - g++
+ - curl
+ - cmake
+ - libreadline-dev
+ - git-core
+ - libqt4-core
+ - libqt4-gui
+ - libqt4-dev
+ - libjpeg-dev
+ - libpng-dev
+ - ncurses-dev
+ - imagemagick
+ - libzmq3-dev
+ - gfortran
+ - unzip
+ - gnuplot
+ - gnuplot-x11
+before_script:
+- export ROOT_TRAVIS_DIR=$(pwd)
+- export INSTALL_PREFIX=~/torch/install
+- ls $HOME/OpenBlasInstall/lib || (cd /tmp/ && git clone https://github.com/xianyi/OpenBLAS.git -b master && cd OpenBLAS && (make NO_AFFINITY=1 -j$(getconf _NPROCESSORS_ONLN) 2>/dev/null >/dev/null) && make PREFIX=$HOME/OpenBlasInstall install)
+- git clone https://github.com/torch/distro.git ~/torch --recursive
+- cd ~/torch && git submodule update --init --recursive
+- mkdir build && cd build
+- export CMAKE_LIBRARY_PATH=$HOME/OpenBlasInstall/include:$HOME/OpenBlasInstall/lib:$CMAKE_LIBRARY_PATH
+- cmake .. -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" -DCMAKE_BUILD_TYPE=Release -DWITH_${TORCH_LUA_VERSION}=ON
+- make && make install
+- cd $ROOT_TRAVIS_DIR
+- export LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH
+script:
+- ${INSTALL_PREFIX}/bin/luarocks make rocks/nn-scm-1.rockspec
+- export PATH=${INSTALL_PREFIX}/bin:$PATH
+- export TESTLUA=$(which luajit lua | head -n 1)
+- ${TESTLUA} -lnn -e "t=nn.test(); if t.errors[1] then os.exit(1) end"
diff --git a/contrib/lua-torch/nn/Abs.lua b/contrib/lua-torch/nn/Abs.lua
new file mode 100644
index 000000000..b32b64f79
--- /dev/null
+++ b/contrib/lua-torch/nn/Abs.lua
@@ -0,0 +1,22 @@
+local Abs, parent = torch.class('nn.Abs', 'nn.Module')
+
+function Abs:__init()
+ parent.__init(self)
+end
+
+function Abs:updateOutput(input)
+ input.THNN.Abs_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function Abs:updateGradInput(input, gradOutput)
+ input.THNN.Abs_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/AbsCriterion.lua b/contrib/lua-torch/nn/AbsCriterion.lua
new file mode 100644
index 000000000..65e2f8ae1
--- /dev/null
+++ b/contrib/lua-torch/nn/AbsCriterion.lua
@@ -0,0 +1,32 @@
+local AbsCriterion, parent = torch.class('nn.AbsCriterion', 'nn.Criterion')
+
+function AbsCriterion:__init(sizeAverage)
+ parent.__init(self)
+ if sizeAverage ~= nil then
+ self.sizeAverage = sizeAverage
+ else
+ self.sizeAverage = true
+ end
+end
+
+function AbsCriterion:updateOutput(input, target)
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.AbsCriterion_updateOutput(
+ input:cdata(),
+ target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function AbsCriterion:updateGradInput(input, target)
+ input.THNN.AbsCriterion_updateGradInput(
+ input:cdata(),
+ target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Add.lua b/contrib/lua-torch/nn/Add.lua
new file mode 100644
index 000000000..d071a15b3
--- /dev/null
+++ b/contrib/lua-torch/nn/Add.lua
@@ -0,0 +1,66 @@
+local Add, parent = torch.class('nn.Add', 'nn.Module')
+
+function Add:__init(inputSize,scalar)
+ parent.__init(self)
+
+ local size = inputSize
+ if scalar then size=1 end
+ self.scalar = scalar
+ self.bias = torch.Tensor(size)
+ self.gradBias = torch.Tensor(size)
+
+ self._ones = torch.Tensor{1}
+
+ self:reset()
+end
+
+function Add:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.bias:size(1))
+ end
+
+ self.bias:uniform(-stdv, stdv)
+end
+
+function Add:updateOutput(input)
+ self.output:resizeAs(input):copy(input)
+ if self.scalar then
+ self.output:add(self.bias[1]);
+ else
+ if input:isSameSizeAs(self.bias) then
+ self.output:add(self.bias)
+ else
+ local batchSize = input:size(1)
+ if self._ones:size(1) ~= batchSize then
+ self._ones:resize(batchSize):fill(1)
+ end
+ local bias = self.bias:view(-1)
+ local output = self.output:view(batchSize, -1)
+ output:addr(1, self._ones, bias)
+ end
+ end
+ return self.output
+end
+
+function Add:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+ return self.gradInput
+ end
+end
+
+function Add:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ if self.gradBias:size(1) == 1 then
+ self.gradBias[1] = self.gradBias[1] + scale*gradOutput:sum();
+ else
+ if input:isSameSizeAs(self.bias) then
+ self.gradBias:add(scale, gradOutput)
+ else
+ local gradOutput = gradOutput:view(input:size(1), -1)
+ self.gradBias:view(-1):addmv(scale, gradOutput:t(), self._ones)
+ end
+ end
+end
diff --git a/contrib/lua-torch/nn/AddConstant.lua b/contrib/lua-torch/nn/AddConstant.lua
new file mode 100644
index 000000000..b686d719c
--- /dev/null
+++ b/contrib/lua-torch/nn/AddConstant.lua
@@ -0,0 +1,50 @@
+local AddConstant, parent = torch.class('nn.AddConstant', 'nn.Module')
+
+function AddConstant:__init(constant_scalar,ip)
+ parent.__init(self)
+ self.constant_scalar = constant_scalar
+
+ -- default for inplace is false
+ self.inplace = ip or false
+ if (ip and type(ip) ~= 'boolean') then
+ error('in-place flag must be boolean')
+ end
+end
+
+function AddConstant:updateOutput(input)
+ assert(type(self.constant_scalar) == 'number' or
+ (torch.isTensor(self.constant_scalar) and input:nDimension() <= 2 and
+ input:size(input:nDimension()) == self.constant_scalar:size(1)),
+ 'input is not scalar or doesn\'t match with the dimension of constant!')
+ local tmp
+ if torch.isTensor(self.constant_scalar) and input:nDimension() == 2 then
+ local nOutput = self.constant_scalar:size(1)
+ tmp = self.constant_scalar.new()
+ tmp:resize(1,nOutput)
+ tmp:copy(self.constant_scalar)
+ tmp = tmp:expand(input:size(1),nOutput)
+ else
+ tmp = self.constant_scalar
+ end
+ if self.inplace then
+ input:add(tmp)
+ self.output:set(input)
+ else
+ self.output:resizeAs(input)
+ self.output:copy(input)
+ self.output:add(tmp)
+ end
+ return self.output
+end
+
+function AddConstant:updateGradInput(input, gradOutput)
+ if self.inplace then
+ self.gradInput:set(gradOutput)
+ -- restore previous input value
+ input:add(-self.constant_scalar)
+ else
+ self.gradInput:resizeAs(gradOutput)
+ self.gradInput:copy(gradOutput)
+ end
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/BCECriterion.lua b/contrib/lua-torch/nn/BCECriterion.lua
new file mode 100644
index 000000000..8bb5f8178
--- /dev/null
+++ b/contrib/lua-torch/nn/BCECriterion.lua
@@ -0,0 +1,64 @@
+local THNN = require 'nn.THNN'
+local BCECriterion, parent = torch.class('nn.BCECriterion', 'nn.Criterion')
+
+function BCECriterion:__init(weights, sizeAverage)
+ parent.__init(self)
+ if sizeAverage ~= nil then
+ self.sizeAverage = sizeAverage
+ else
+ self.sizeAverage = true
+ end
+ if weights ~= nil then
+ assert(weights:dim() == 1, "weights input should be 1-D Tensor")
+ self.weights = weights
+ end
+end
+
+
+function BCECriterion:__len()
+ return self.weights and #self.weights or 0
+end
+
+function BCECriterion:updateOutput(input, target)
+ -- - log(input) * target - log(1 - input) * (1 - target)
+ assert( input:nElement() == target:nElement(),
+ "input and target size mismatch")
+ self.output_tensor = self.output_tensor or input.new(1)
+
+ local weights = self.weights
+ if weights ~= nil and target:dim() ~= 1 then
+ weights = self.weights:view(1, target:size(2)):expandAs(target)
+ end
+
+ input.THNN.BCECriterion_updateOutput(
+ input:cdata(),
+ target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage,
+ THNN.optionalTensor(weights)
+ )
+
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function BCECriterion:updateGradInput(input, target)
+ -- - (target - input) / ( input (1 - input) )
+ assert( input:nElement() == target:nElement(),
+ "input and target size mismatch")
+
+ local weights = self.weights
+ if weights ~= nil and target:dim() ~= 1 then
+ weights = self.weights:view(1, target:size(2)):expandAs(target)
+ end
+
+ input.THNN.BCECriterion_updateGradInput(
+ input:cdata(),
+ target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage,
+ THNN.optionalTensor(weights)
+ )
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/BatchNormalization.lua b/contrib/lua-torch/nn/BatchNormalization.lua
new file mode 100644
index 000000000..8dfc576b3
--- /dev/null
+++ b/contrib/lua-torch/nn/BatchNormalization.lua
@@ -0,0 +1,213 @@
+--[[
+ This file implements Batch Normalization as described in the paper:
+ "Batch Normalization: Accelerating Deep Network Training
+ by Reducing Internal Covariate Shift"
+ by Sergey Ioffe, Christian Szegedy
+
+ This implementation is useful for inputs NOT coming from convolution layers.
+ For convolution layers, use nn.SpatialBatchNormalization.
+
+ The operation implemented is:
+ y = ( x - mean(x) )
+ -------------------- * gamma + beta
+ standard-deviation(x)
+ where gamma and beta are learnable parameters.
+
+ The learning of gamma and beta is optional.
+
+ Usage:
+ with learnable parameters: nn.BatchNormalization(N [,eps] [,momentum])
+ where N = dimensionality of input
+ without learnable parameters: nn.BatchNormalization(N [,eps] [,momentum], false)
+
+ eps is a small value added to the standard-deviation to avoid divide-by-zero.
+ Defaults to 1e-5
+
+ In training time, this layer keeps a running estimate of it's computed mean and std.
+ The running sum is kept with a default momentum of 0.1 (unless over-ridden)
+ In test time, this running mean/std is used to normalize.
+]]--
+local BN,parent = torch.class('nn.BatchNormalization', 'nn.Module')
+local THNN = require 'nn.THNN'
+
+BN.__version = 2
+
+-- expected dimension of input
+BN.nDim = 2
+
+function BN:__init(nOutput, eps, momentum, affine)
+ parent.__init(self)
+ assert(nOutput and type(nOutput) == 'number',
+ 'Missing argument #1: dimensionality of input. ')
+ assert(nOutput ~= 0, 'To set affine=false call BatchNormalization'
+ .. '(nOutput, eps, momentum, false) ')
+ if affine ~= nil then
+ assert(type(affine) == 'boolean', 'affine has to be true/false')
+ self.affine = affine
+ else
+ self.affine = true
+ end
+ self.eps = eps or 1e-5
+ self.train = true
+ self.momentum = momentum or 0.1
+ self.running_mean = torch.zeros(nOutput)
+ self.running_var = torch.ones(nOutput)
+
+ if self.affine then
+ self.weight = torch.Tensor(nOutput)
+ self.bias = torch.Tensor(nOutput)
+ self.gradWeight = torch.Tensor(nOutput)
+ self.gradBias = torch.Tensor(nOutput)
+ self:reset()
+ end
+end
+
+function BN:reset()
+ if self.weight then
+ self.weight:uniform()
+ end
+ if self.bias then
+ self.bias:zero()
+ end
+ self.running_mean:zero()
+ self.running_var:fill(1)
+end
+
+function BN:checkInputDim(input)
+ local iDim = input:dim()
+ assert(iDim == self.nDim or
+ (iDim == self.nDim - 1 and self.train == false), string.format(
+ 'only mini-batch supported (%dD tensor), got %dD tensor instead',
+ self.nDim, iDim))
+ local featDim = (iDim == self.nDim - 1) and 1 or 2
+ assert(input:size(featDim) == self.running_mean:nElement(), string.format(
+ 'got %d-feature tensor, expected %d',
+ input:size(featDim), self.running_mean:nElement()))
+end
+
+local function makeContiguous(self, input, gradOutput)
+ if not input:isContiguous() then
+ self._input = self._input or input.new()
+ self._input:resizeAs(input):copy(input)
+ input = self._input
+ end
+ if gradOutput then
+ if not gradOutput:isContiguous() then
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ gradOutput = self._gradOutput
+ end
+ end
+ return input, gradOutput
+end
+
+local function makeBatch(self, input)
+ local iDim = input:dim()
+ if self.train == false and iDim == self.nDim - 1 then
+ return nn.utils.addSingletonDimension(input, input, 1)
+ else
+ return input
+ end
+end
+
+function BN:updateOutput(input)
+ self:checkInputDim(input)
+
+ input = makeContiguous(self, input)
+ input = makeBatch(self, input)
+
+ self.save_mean = self.save_mean or input.new()
+ self.save_mean:resizeAs(self.running_mean)
+ self.save_std = self.save_std or input.new()
+ self.save_std:resizeAs(self.running_var)
+
+ input.THNN.BatchNormalization_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ THNN.optionalTensor(self.weight),
+ THNN.optionalTensor(self.bias),
+ self.running_mean:cdata(),
+ self.running_var:cdata(),
+ self.save_mean:cdata(),
+ self.save_std:cdata(),
+ self.train,
+ self.momentum,
+ self.eps)
+
+ return self.output
+end
+
+local function backward(self, input, gradOutput, scale, gradInput, gradWeight, gradBias)
+ self:checkInputDim(input)
+ self:checkInputDim(gradOutput)
+ assert(self.save_mean and self.save_std, 'must call :updateOutput() first')
+
+ input, gradOutput = makeContiguous(self, input, gradOutput)
+ input = makeBatch(self, input)
+ gradOutput = makeBatch(self, gradOutput)
+
+ scale = scale or 1
+ if gradInput then
+ gradInput:resizeAs(gradOutput)
+ end
+
+ input.THNN.BatchNormalization_backward(
+ input:cdata(),
+ gradOutput:cdata(),
+ THNN.optionalTensor(gradInput),
+ THNN.optionalTensor(gradWeight),
+ THNN.optionalTensor(gradBias),
+ THNN.optionalTensor(self.weight),
+ self.running_mean:cdata(),
+ self.running_var:cdata(),
+ self.save_mean:cdata(),
+ self.save_std:cdata(),
+ self.train,
+ scale,
+ self.eps)
+
+ return self.gradInput
+end
+
+function BN:backward(input, gradOutput, scale)
+ return backward(self, input, gradOutput, scale, self.gradInput, self.gradWeight, self.gradBias)
+end
+
+function BN:updateGradInput(input, gradOutput)
+ return backward(self, input, gradOutput, 1, self.gradInput)
+end
+
+function BN:accGradParameters(input, gradOutput, scale)
+ return backward(self, input, gradOutput, scale, nil, self.gradWeight, self.gradBias)
+end
+
+function BN:read(file, version)
+ parent.read(self, file)
+ if version < 2 then
+ if self.running_std then
+ self.running_var = self.running_std:pow(-2):add(-self.eps)
+ self.running_std = nil
+ end
+ end
+end
+
+function BN:clearState()
+ -- first 5 buffers are not present in the current implementation,
+ -- but we keep them for cleaning old saved models
+ nn.utils.clear(self, {
+ 'buffer',
+ 'buffer2',
+ 'centered',
+ 'std',
+ 'normalized',
+ '_input',
+ '_gradOutput',
+ 'save_mean',
+ 'save_std',
+ })
+ return parent.clearState(self)
+end
+
+function BN:__tostring__()
+ return string.format('%s (%dD) (%d)', torch.type(self), self.nDim, self.running_mean:nElement())
+end
diff --git a/contrib/lua-torch/nn/Bilinear.lua b/contrib/lua-torch/nn/Bilinear.lua
new file mode 100644
index 000000000..9350b03ec
--- /dev/null
+++ b/contrib/lua-torch/nn/Bilinear.lua
@@ -0,0 +1,163 @@
+local Bilinear, parent = torch.class('nn.Bilinear', 'nn.Module')
+
+local function isint(x) return type(x) == 'number' and x == math.floor(x) end
+function Bilinear:__assertInput(input)
+ assert(input and type(input) == 'table' and #input == 2,
+ 'input should be a table containing two data Tensors')
+ assert(input[1]:nDimension() == 2 and input[2]:nDimension() == 2,
+ 'input Tensors should be two-dimensional')
+ assert(input[1]:size(1) == input[2]:size(1),
+ 'input Tensors should have the same number of rows (instances)')
+ assert(input[1]:size(2) == self.weight:size(2),
+ 'dimensionality of first input is erroneous')
+ assert(input[2]:size(2) == self.weight:size(3),
+ 'dimensionality of second input is erroneous')
+end
+function Bilinear:__assertInputGradOutput(input, gradOutput)
+ assert(input[1]:size(1) == gradOutput:size(1),
+ 'number of rows in gradOutput does not match input')
+ assert(gradOutput:size(2) == self.weight:size(1),
+ 'number of columns in gradOutput does not output size of layer')
+end
+
+function Bilinear:__init(inputSize1, inputSize2, outputSize, bias)
+
+ -- assertions:
+ assert(self and inputSize1 and inputSize2 and outputSize,
+ 'should specify inputSize1 and inputSize2 and outputSize')
+ assert(isint(inputSize1) and isint(inputSize2) and isint(outputSize),
+ 'inputSize1 and inputSize2 and outputSize should be integer numbers')
+ assert(inputSize1 > 0 and inputSize2 > 0 and outputSize > 0,
+ 'inputSize1 and inputSize2 and outputSize should be positive numbers')
+
+ -- set up model:
+ parent.__init(self)
+ local bias = ((bias == nil) and true) or bias
+ self.weight = torch.Tensor(outputSize, inputSize1, inputSize2)
+ self.gradWeight = torch.Tensor(outputSize, inputSize1, inputSize2)
+ if bias then
+ self.bias = torch.Tensor(outputSize)
+ self.gradBias = torch.Tensor(outputSize)
+ end
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+ self:reset()
+end
+
+function Bilinear:reset(stdv)
+ assert(self)
+ if stdv then
+ assert(stdv and type(stdv) == 'number' and stdv > 0,
+ 'standard deviation should be a positive number')
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1 / math.sqrt(self.weight:size(2))
+ end
+ self.weight:uniform(-stdv, stdv)
+ if self.bias then self.bias:uniform(-stdv, stdv) end
+ return self
+end
+
+function Bilinear:updateOutput(input)
+ assert(self)
+ self:__assertInput(input)
+
+ -- set up buffer:
+ self.buff2 = self.buff2 or input[1].new()
+ self.buff2:resizeAs(input[2])
+
+ -- compute output scores:
+ self.output:resize(input[1]:size(1), self.weight:size(1))
+ for k = 1,self.weight:size(1) do
+ torch.mm(self.buff2, input[1], self.weight[k])
+ self.buff2:cmul(input[2])
+ torch.sum(self.output:narrow(2, k, 1), self.buff2, 2)
+ end
+ if self.bias then
+ self.output:add(
+ self.bias:reshape(1, self.bias:nElement()):expandAs(self.output)
+ )
+ end
+ return self.output
+end
+
+function Bilinear:updateGradInput(input, gradOutput)
+ assert(self)
+ if self.gradInput then
+ self:__assertInputGradOutput(input, gradOutput)
+
+ if #self.gradInput == 0 then
+ for i = 1, 2 do self.gradInput[i] = input[1].new() end
+ end
+
+ -- compute d output / d input:
+ self.gradInput[1]:resizeAs(input[1]):fill(0)
+ self.gradInput[2]:resizeAs(input[2]):fill(0)
+
+
+ -- do first slice of weight tensor (k = 1)
+ self.gradInput[1]:mm(input[2], self.weight[1]:t())
+ self.gradInput[1]:cmul(gradOutput:narrow(2,1,1):expand(self.gradInput[1]:size(1),
+ self.gradInput[1]:size(2)))
+ self.gradInput[2]:addmm(1, input[1], self.weight[1])
+ self.gradInput[2]:cmul(gradOutput:narrow(2,1,1):expand(self.gradInput[2]:size(1),
+ self.gradInput[2]:size(2)))
+
+ -- do remaining slices of weight tensor
+ if self.weight:size(1) > 1 then
+ self.buff1 = self.buff1 or input[1].new()
+ self.buff1:resizeAs(input[1])
+
+ for k = 2, self.weight:size(1) do
+ self.buff1:mm(input[2], self.weight[k]:t())
+ self.buff1:cmul(gradOutput:narrow(2,k,1):expand(self.gradInput[1]:size(1),
+ self.gradInput[1]:size(2)))
+ self.gradInput[1]:add(self.buff1)
+
+ self.buff2:mm(input[1], self.weight[k])
+ self.buff2:cmul(gradOutput:narrow(2,k,1):expand(self.gradInput[2]:size(1),
+ self.gradInput[2]:size(2)))
+ self.gradInput[2]:add(self.buff2)
+ end
+ end
+ return self.gradInput
+ end
+end
+
+function Bilinear:accGradParameters(input, gradOutput, scale)
+ local scale = scale or 1
+ self:__assertInputGradOutput(input, gradOutput)
+ assert(scale and type(scale) == 'number' and scale >= 0)
+
+ -- make sure we have buffer:
+ self.buff1 = self.buff1 or input[1].new()
+ self.buff1:resizeAs(input[1])
+
+ -- accumulate parameter gradients:
+ for k = 1,self.weight:size(1) do
+ torch.cmul(
+ self.buff1, input[1], gradOutput:narrow(2, k, 1):expandAs(input[1])
+ )
+ self.gradWeight[k]:addmm(self.buff1:t(), input[2])
+ end
+ if self.bias then self.gradBias:add(scale, gradOutput:sum(1)) end
+end
+
+function Bilinear:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ -- we do not need to accumulate parameters when sharing:
+ self:defaultAccUpdateGradParameters(input, gradOutput, lr)
+end
+
+function Bilinear:__tostring__()
+ return torch.type(self) ..
+ string.format(
+ '(%dx%d -> %d) %s',
+ self.weight:size(2), self.weight:size(3), self.weight:size(1),
+ (self.bias == nil and ' without bias' or '')
+ )
+end
+
+function Bilinear:clearState()
+ if self.buff2 then self.buff2:set() end
+ if self.buff1 then self.buff1:set() end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Bottle.lua b/contrib/lua-torch/nn/Bottle.lua
new file mode 100644
index 000000000..6dee432f5
--- /dev/null
+++ b/contrib/lua-torch/nn/Bottle.lua
@@ -0,0 +1,71 @@
+local Bottle, parent = torch.class("nn.Bottle", "nn.Decorator")
+local unpack = unpack or table.unpack
+
+function Bottle:__init(module, nInputDim, nOutputDim)
+ parent.__init(self, module)
+ self.nInputDim = nInputDim or 2
+ self.nOutputDim = nOutputDim or self.nInputDim
+ self.dimDelta = self.nInputDim - self.nOutputDim
+ -- Used to reshape the gradients
+ self.inShape = torch.Tensor(self.nInputDim)
+ self.outShape = torch.Tensor(self.nOutputDim)
+end
+
+function Bottle:updateOutput(input)
+ -- first batchDims dimensions will be fused
+ local batchDims = input:dim() - self.nInputDim + 1
+ -- see if bottle is required
+ if batchDims > 1 then
+ -- bottle the first dims
+ local inSize = torch.LongTensor(input:size())
+ local squeezeSize = inSize[{{1, batchDims - 1}}]:prod()
+ self.inShape:copy(inSize[{{batchDims, input:dim()}}])
+ self.inShape[{{1}}]:mul(squeezeSize)
+ -- Forward with the module's dimension
+ local newInput = input:view(unpack(self.inShape:totable()))
+ local output = self.modules[1]:updateOutput(newInput)
+ assert(output:dim() == self.nOutputDim,
+ "Wrong number of output dims on module. Expected: " ..
+ self.nOutputDim .. ' but got ' ..
+ tostring(output and output:dim()))
+ self.outShape:copy(torch.LongTensor(output:size()))
+ if math.abs(self.dimDelta) > 0 then
+ inSize:resize(inSize:size(1) - self.dimDelta)
+ end
+ inSize[{{batchDims, inSize:size(1)}}]:copy(self.outShape)
+ inSize[{{batchDims}}]:div(squeezeSize)
+ -- unbottle
+ self.output:set(output:view(unpack(torch.totable(inSize))))
+ else
+ self.output:set(self.modules[1]:updateOutput(input))
+ end
+ return self.output
+end
+
+function Bottle:updateGradInput(input, gradOutput)
+ if input:dim() > self.nInputDim then
+ local input_ = input:view(unpack(self.inShape:totable()))
+ local gradOutput_ = gradOutput:view(unpack(self.outShape:totable()))
+ self.modules[1]:updateGradInput(input_, gradOutput_)
+ if self.modules[1].gradInput then
+ self.gradInput:set(self.modules[1].gradInput:viewAs(input))
+ else
+ self.gradInput = nil
+ end
+ else
+ if self.modules[1].gradInput then
+ self.gradInput:set(self.modules[1]:updateGradInput(input, gradOutput))
+ else
+ self.gradInput = nil
+ end
+ end
+ return self.gradInput
+end
+
+function Bottle:accGradParameters(input, gradOutput, scale)
+ if input:dim() > self.nInputDim then
+ input = input:view(unpack(self.inShape:totable()))
+ gradOutput = gradOutput:view(unpack(self.outShape:totable()))
+ end
+ self.modules[1]:accGradParameters(input, gradOutput, scale)
+end
diff --git a/contrib/lua-torch/nn/CAdd.lua b/contrib/lua-torch/nn/CAdd.lua
new file mode 100644
index 000000000..1d7b45726
--- /dev/null
+++ b/contrib/lua-torch/nn/CAdd.lua
@@ -0,0 +1,127 @@
+local CAdd, parent = torch.class("nn.CAdd", "nn.Module")
+
+function CAdd:__init(...)
+ parent.__init(self)
+
+ local arg = {...}
+
+ self.size = torch.LongStorage()
+ local n = #arg
+ if n == 1 and torch.type(arg[1]) == 'torch.LongStorage' then
+ self.size:resize(#arg[1]):copy(arg[1])
+ else
+ self.size:resize(n)
+ for i=1,n do
+ self.size[i] = arg[i]
+ end
+ end
+
+ self.bias = torch.Tensor(self.size)
+ self.gradBias = torch.Tensor(self.size)
+
+ self.output:resize(self.size)
+
+ self:reset()
+end
+
+function CAdd:reset(stdv)
+ if stdv then
+ --std of uniform distribution on interval [-a,a] = a/sqrt(3)
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1.0/math.sqrt(self.bias:nElement())
+ end
+ self.bias:uniform(-stdv,stdv)
+end
+
+function CAdd:updateOutput(input)
+ self._output = self._output or input.new()
+ self._bias = self._bias or input.new()
+ self._expand = self._expand or input.new()
+ self._repeat = self._repeat or input.new()
+
+ self.output:resizeAs(input):copy(input)
+ if input:nElement() == self.bias:nElement() then
+ self.output:add(self.bias)
+ else
+ if self.bias:dim() == input:dim() then
+ self._output:set(self.output)
+ self._bias:set(self.bias)
+ else
+ local batchSize = input:size(1)
+ self._output:view(self.output, batchSize, -1)
+ self._bias:view(self.bias, 1, -1)
+ end
+
+ self._expand:expandAs(self._bias, self._output)
+
+ --expandAs uses stride 0 and self._expand is not contiguous
+ --cuda ops may assume contiguous input
+ if torch.type(input) == 'torch.CudaTensor' then
+ self._repeat:resizeAs(self._expand):copy(self._expand)
+ self._output:add(self._repeat)
+ else
+ self._output:add(self._expand)
+ end
+ end
+
+ return self.output
+end
+
+function CAdd:updateGradInput(input, gradOutput)
+ self.gradInput = self.gradInput or input.new()
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+
+ return self.gradInput
+end
+
+function CAdd:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+
+ self._gradBias = self._gradBias or gradOutput.new()
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._repeat = self._repeat or gradOutput.new()
+
+ if self.bias:nElement() == gradOutput:nElement() then
+ self.gradBias:add(scale, gradOutput)
+ else
+ if self.bias:dim() == gradOutput:dim() then
+ self._gradBias:set(self.gradBias)
+ self._gradOutput:set(gradOutput)
+ else
+ local batchSize = input:size(1)
+ self._gradBias:view(self.gradBias, 1, -1)
+ self._gradOutput:view(gradOutput, batchSize, -1)
+ end
+
+ self._gradBias:expandAs(self._gradBias, self._gradOutput)
+
+ --expandAs uses stride 0 and self._gradBias is not contiguous
+ --cuda ops may assume contiguous input
+ if torch.type(self._gradBias) == 'torch.CudaTensor' then
+ self._repeat:resizeAs(self._gradBias):copy(self._gradBias)
+ self._repeat:add(scale, self._gradOutput)
+ self._gradBias:copy(self._repeat)
+ else
+ self._gradBias:add(scale, self._gradOutput)
+ end
+ end
+end
+
+function CAdd:type(type, tensorCache)
+ if type then
+ self:clearState()
+ end
+ return parent.type(self, type, tensorCache)
+end
+
+function CAdd:clearState()
+ nn.utils.clear(self, {
+ '_gradBias',
+ '_expand',
+ '_output',
+ '_bias',
+ '_repeat'
+ })
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/CAddTable.lua b/contrib/lua-torch/nn/CAddTable.lua
new file mode 100644
index 000000000..79deb7e9b
--- /dev/null
+++ b/contrib/lua-torch/nn/CAddTable.lua
@@ -0,0 +1,36 @@
+local CAddTable, parent = torch.class('nn.CAddTable', 'nn.Module')
+
+function CAddTable:__init(ip)
+ parent.__init(self)
+ self.inplace = ip
+ self.gradInput = {}
+end
+
+function CAddTable:updateOutput(input)
+ if self.inplace then
+ self.output:set(input[1])
+ else
+ self.output:resizeAs(input[1]):copy(input[1])
+ end
+ for i=2,#input do
+ self.output:add(input[i])
+ end
+ return self.output
+end
+
+function CAddTable:updateGradInput(input, gradOutput)
+ for i=1,#input do
+ self.gradInput[i] = self.gradInput[i] or input[1].new()
+ if self.inplace then
+ self.gradInput[i]:set(gradOutput)
+ else
+ self.gradInput[i]:resizeAs(input[i]):copy(gradOutput)
+ end
+ end
+
+ for i=#input+1, #self.gradInput do
+ self.gradInput[i] = nil
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/CAddTensorTable.lua b/contrib/lua-torch/nn/CAddTensorTable.lua
new file mode 100644
index 000000000..16efe4450
--- /dev/null
+++ b/contrib/lua-torch/nn/CAddTensorTable.lua
@@ -0,0 +1,43 @@
+
+local CAddTensorTable, parent = torch.class('nn.CAddTensorTable', 'nn.Module')
+
+function CAddTensorTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+end
+
+-- input is a table with 2 entries. input[1] is the vector to be added.
+-- input[2] is the table to which we add the vector
+function CAddTensorTable:updateOutput(input)
+ local currentOutput = {}
+ for i=1,#input[2] do
+ currentOutput[i] = currentOutput[i] or input[1].new()
+ currentOutput[i]:resizeAs(input[1])
+ currentOutput[i]:copy(input[2][i])
+ currentOutput[i]:add(input[1])
+ end
+ for i = #input[2]+1, #currentOutput do
+ currentOutput[i] = nil
+ end
+ self.output = currentOutput
+ return self.output
+end
+
+function CAddTensorTable:updateGradInput(input, gradOutput)
+ self.gradInput[1] = self.gradInput[1] or input[1].new()
+ self.gradInput[1]:resizeAs(input[1])
+ self.gradInput[1]:copy(gradOutput[1])
+ for i=2, #input[2] do
+ self.gradInput[1]:add(gradOutput[i])
+ end
+ self.gradInput[2] = self.gradInput[2] or {}
+ for i=1,#input[2] do
+ self.gradInput[2][i] = self.gradInput[2][i] or input[1].new()
+ self.gradInput[2][i]:resizeAs(input[1])
+ self.gradInput[2][i]:copy(gradOutput[i])
+ end
+ for i=#input[2]+1, #self.gradInput[2] do
+ self.gradInput[2][i] = nil
+ end
+ return self.gradInput
+end \ No newline at end of file
diff --git a/contrib/lua-torch/nn/CDivTable.lua b/contrib/lua-torch/nn/CDivTable.lua
new file mode 100644
index 000000000..bf044c9af
--- /dev/null
+++ b/contrib/lua-torch/nn/CDivTable.lua
@@ -0,0 +1,26 @@
+
+local CDivTable, parent = torch.class('nn.CDivTable', 'nn.Module')
+
+function CDivTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+end
+
+function CDivTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ self.output:cdiv(input[2])
+ return self.output
+end
+
+function CDivTable:updateGradInput(input, gradOutput)
+ self.gradInput[1] = self.gradInput[1] or input[1].new()
+ self.gradInput[2] = self.gradInput[2] or input[1].new()
+ self.gradInput[1]:resizeAs(input[1]):copy(gradOutput):cdiv(input[2])
+ self.gradInput[2]:resizeAs(input[2]):zero():addcdiv(-1,self.gradInput[1],input[2]):cmul(input[1])
+
+ for i=#input+1, #self.gradInput do
+ self.gradInput[i] = nil
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/CMakeLists.txt b/contrib/lua-torch/nn/CMakeLists.txt
new file mode 100644
index 000000000..cebddfbfc
--- /dev/null
+++ b/contrib/lua-torch/nn/CMakeLists.txt
@@ -0,0 +1,14 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
+CMAKE_POLICY(VERSION 2.6)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../torch7/lib/TH)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/../torch7/lib/TH)
+ADD_SUBDIRECTORY(lib)
+
+FILE(STRINGS lib/THNN/generic/THNN.h THNN_headers NEWLINE_CONSUME)
+FILE(WRITE THNN_h.lua "return [[")
+FILE(APPEND THNN_h.lua ${THNN_headers})
+FILE(APPEND THNN_h.lua "]]")
+
+FILE(GLOB luasrc *.lua)
+
+ADD_TORCH_PACKAGE(nn "" "${luasrc}")
diff --git a/contrib/lua-torch/nn/CMaxTable.lua b/contrib/lua-torch/nn/CMaxTable.lua
new file mode 100644
index 000000000..845e38d23
--- /dev/null
+++ b/contrib/lua-torch/nn/CMaxTable.lua
@@ -0,0 +1,46 @@
+local CMaxTable, parent = torch.class('nn.CMaxTable', 'nn.Module')
+
+function CMaxTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+ self.maxIdx = torch.Tensor()
+ self.mask = torch.Tensor()
+ self.maxVals = torch.Tensor()
+ self.gradMaxVals = torch.Tensor()
+end
+
+function CMaxTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ self.maxIdx:resizeAs(input[1]):fill(1)
+ for i=2,#input do
+ self.maskByteTensor = self.maskByteTensor or
+ (torch.type(self.output) == 'torch.CudaTensor' and
+ torch.CudaByteTensor() or torch.ByteTensor())
+ self.mask:gt(input[i], self.output)
+ self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+ self.maxIdx:maskedFill(self.maskByteTensor, i)
+ self.maxVals:maskedSelect(input[i], self.maskByteTensor)
+ self.output:maskedCopy(self.maskByteTensor, self.maxVals)
+ end
+ return self.output
+end
+
+function CMaxTable:updateGradInput(input, gradOutput)
+ for i=1,#input do
+ self.gradInput[i] = self.gradInput[i] or input[i].new()
+ self.gradInput[i]:resizeAs(input[i]):fill(0.0)
+ self.maskByteTensor = self.maskByteTensor or
+ (torch.type(self.output) == 'torch.CudaTensor' and
+ torch.CudaByteTensor() or torch.ByteTensor())
+ self.mask:eq(self.maxIdx, i)
+ self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+ self.gradMaxVals:maskedSelect(gradOutput, self.maskByteTensor)
+ self.gradInput[i]:maskedCopy(self.maskByteTensor, self.gradMaxVals)
+ end
+
+ for i=#input+1, #self.gradInput do
+ self.gradInput[i] = nil
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/CMinTable.lua b/contrib/lua-torch/nn/CMinTable.lua
new file mode 100644
index 000000000..25b9a19a2
--- /dev/null
+++ b/contrib/lua-torch/nn/CMinTable.lua
@@ -0,0 +1,46 @@
+local CMinTable, parent = torch.class('nn.CMinTable', 'nn.Module')
+
+function CMinTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+ self.minIdx = torch.Tensor()
+ self.mask = torch.Tensor()
+ self.minVals = torch.Tensor()
+ self.gradMaxVals = torch.Tensor()
+end
+
+function CMinTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ self.minIdx:resizeAs(input[1]):fill(1)
+ for i=2,#input do
+ self.maskByteTensor = self.maskByteTensor or
+ (torch.type(self.output) == 'torch.CudaTensor' and
+ torch.CudaByteTensor() or torch.ByteTensor())
+ self.mask:lt(input[i], self.output)
+ self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+ self.minIdx:maskedFill(self.maskByteTensor, i)
+ self.minVals:maskedSelect(input[i], self.maskByteTensor)
+ self.output:maskedCopy(self.maskByteTensor, self.minVals)
+ end
+ return self.output
+end
+
+function CMinTable:updateGradInput(input, gradOutput)
+ for i=1,#input do
+ self.gradInput[i] = self.gradInput[i] or input[i].new()
+ self.gradInput[i]:resizeAs(input[i]):fill(0.0)
+ self.maskByteTensor = self.maskByteTensor or
+ (torch.type(self.output) == 'torch.CudaTensor' and
+ torch.CudaByteTensor() or torch.ByteTensor())
+ self.mask:eq(self.minIdx, i)
+ self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+ self.gradMaxVals:maskedSelect(gradOutput, self.maskByteTensor)
+ self.gradInput[i]:maskedCopy(self.maskByteTensor, self.gradMaxVals)
+ end
+
+ for i=#input+1, #self.gradInput do
+ self.gradInput[i] = nil
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/CMul.lua b/contrib/lua-torch/nn/CMul.lua
new file mode 100644
index 000000000..890169761
--- /dev/null
+++ b/contrib/lua-torch/nn/CMul.lua
@@ -0,0 +1,166 @@
+local CMul, parent = torch.class('nn.CMul', 'nn.Module')
+
+function CMul:__init(...)
+ parent.__init(self)
+
+ local arg = {...}
+
+ self.size = torch.LongStorage()
+ local n = #arg
+ if n == 1 and torch.type(arg[1]) == 'torch.LongStorage' then
+ self.size:resize(#arg[1]):copy(arg[1])
+ else
+ self.size:resize(n)
+ for i=1,n do
+ self.size[i] = arg[i]
+ end
+ end
+
+ self.weight = torch.Tensor(self.size)
+ self.gradWeight = torch.Tensor(self.size)
+
+ self.output:resize(self.size)
+
+ self:reset()
+end
+
+function CMul:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:nElement())
+ end
+ self.weight:uniform(-stdv,stdv)
+end
+
+function CMul:updateOutput(input)
+ -- lazy-initialize
+ self._output = self._output or input.new()
+ self._weight = self._weight or input.new()
+ self._expand = self._expand or input.new()
+ self._repeat = self._repeat or input.new()
+
+ self.output:resizeAs(input):copy(input)
+ if input:nElement() == self.weight:nElement() then
+ self._output:view(self.output, -1)
+ self._weight:view(self.weight, -1)
+
+ self._output:cmul(self._weight)
+ else
+ if self.weight:dim() == input:dim() then
+ self._output:set(self.output)
+ self._weight:set(self.weight)
+ else
+ local batchSize = input:size(1)
+ self._output:view(self.output, batchSize, -1)
+ self._weight:view(self.weight, 1, -1)
+ end
+
+ self._expand:expandAs(self._weight, self._output)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ self._repeat:resizeAs(self._expand):copy(self._expand)
+ self._output:cmul(self._repeat)
+ else
+ self._output:cmul(self._expand)
+ end
+ end
+
+ return self.output
+end
+
+function CMul:updateGradInput(input, gradOutput)
+ if not self.gradInput then
+ return
+ end
+
+ self._gradOutput = self._gradOutput or input.new()
+ self._gradInput = self._gradInput or input.new()
+
+ self.gradInput:resizeAs(input):zero()
+ if self.weight:nElement() == gradOutput:nElement() then
+ self.gradInput:addcmul(1, self.weight, gradOutput)
+ else
+ if self.weight:dim() == input:dim() then
+ nn.utils.contiguousView(self._gradOutput, gradOutput, gradOutput:size())
+ nn.utils.contiguousView(self._gradInput, self.gradInput, self.gradInput:size())
+ self._weight:set(self.weight)
+ else
+ local batchSize = input:size(1)
+ nn.utils.contiguousView(self._gradOutput, gradOutput, batchSize, -1)
+ nn.utils.contiguousView(self._gradInput, self.gradInput, batchSize, -1)
+ self._weight:view(self.weight, 1, -1)
+ end
+
+ self._expand:expandAs(self._weight, self._gradOutput)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ self._repeat:resizeAs(self._expand):copy(self._expand)
+ self._gradInput:addcmul(1, self._repeat, self._gradOutput)
+ else
+ self._gradInput:addcmul(1, self._expand, self._gradOutput)
+ end
+ end
+
+ return self.gradInput
+end
+
+function CMul:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+
+ self._input = self._input or input.new()
+ self._gradWeight = self._gradWeight or input.new()
+ self._sum = self._sum or input.new()
+
+ if self.weight:nElement() == gradOutput:nElement() then
+ self.gradWeight:addcmul(scale, input, gradOutput)
+ else
+ if self.weight:dim() == input:dim() then
+ nn.utils.contiguousView(self._input, input, input:size())
+ nn.utils.contiguousView(self._gradOutput, gradOutput, gradOutput:size())
+ self._gradWeight:set(self.gradWeight)
+
+ self._repeat:cmul(self._input, self._gradOutput)
+ local sumInto = self._sum
+ local sumFrom = self._repeat
+ for i=1,self.weight:dim() do
+ if self.weight:size(i) ~= input:size(i) then
+ sumInto:sum(sumFrom, i)
+ sumInto = sumFrom
+ sumFrom = sumFrom == self._repeat and self._sum or self._repeat
+ end
+ end
+ self._gradWeight:add(scale, sumFrom)
+ else
+ local batchSize = input:size(1)
+ nn.utils.contiguousView(self._input, input, batchSize, -1)
+ nn.utils.contiguousView(self._gradOutput, gradOutput, batchSize, -1)
+ self._gradWeight:view(self.gradWeight, 1, -1)
+
+ self._repeat:cmul(self._input, self._gradOutput)
+ self._sum:sum(self._repeat, 1)
+ self._gradWeight:add(scale, self._sum)
+ end
+
+ end
+end
+
+function CMul:type(type, tensorCache)
+ if type then
+ self:clearState()
+ end
+ return parent.type(self, type, tensorCache)
+end
+
+function CMul:clearState()
+ nn.utils.clear(self, {
+ '_input',
+ '_output',
+ '_weight',
+ '_gradWeight',
+ '_expand',
+ '_repeat',
+ '_sum',
+ })
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/CMulTable.lua b/contrib/lua-torch/nn/CMulTable.lua
new file mode 100644
index 000000000..b47378e83
--- /dev/null
+++ b/contrib/lua-torch/nn/CMulTable.lua
@@ -0,0 +1,55 @@
+
+local CMulTable, parent = torch.class('nn.CMulTable', 'nn.Module')
+
+function CMulTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+end
+
+function CMulTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ for i=2,#input do
+ self.output:cmul(input[i])
+ end
+ return self.output
+end
+
+function CMulTable:updateGradInput_efficient(input, gradOutput)
+ self.tout = self.tout or input[1].new()
+ self.tout:resizeAs(self.output)
+ for i=1,#input do
+ self.gradInput[i] = self.gradInput[i] or input[1].new()
+ self.gradInput[i]:resizeAs(input[i]):copy(gradOutput)
+ self.tout:copy(self.output):cdiv(input[i])
+ self.gradInput[i]:cmul(self.tout)
+ end
+
+ for i=#input+1, #self.gradInput do
+ self.gradInput[i] = nil
+ end
+
+ return self.gradInput
+end
+
+function CMulTable:updateGradInput(input, gradOutput)
+ for i=1,#input do
+ self.gradInput[i] = self.gradInput[i] or input[1].new()
+ self.gradInput[i]:resizeAs(input[i]):copy(gradOutput)
+ for j=1,#input do
+ if i~=j then
+ self.gradInput[i]:cmul(input[j])
+ end
+ end
+ end
+
+ for i=#input+1, #self.gradInput do
+ self.gradInput[i] = nil
+ end
+
+ return self.gradInput
+end
+
+function CMulTable:clearState()
+ if self.tout then self.tout:set() end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/CONTRIBUTING.md b/contrib/lua-torch/nn/CONTRIBUTING.md
new file mode 100644
index 000000000..cc800154e
--- /dev/null
+++ b/contrib/lua-torch/nn/CONTRIBUTING.md
@@ -0,0 +1,136 @@
+# Contributing to Torch7 Core (torch7, nn, cutorch, cunn)
+
+Thanks a lot! There are plenty of ways you can help!
+
+Please take a moment to review this document in order to make the contribution
+process easy and effective for everyone involved.
+
+Following these guidelines helps to communicate that you respect the time of
+the developers managing and developing this open source project. In return,
+they should reciprocate that respect in addressing your issue or assessing
+patches and features.
+
+
+## Using the issue tracker
+
+The [issue tracker](https://github.com/torch/nn/issues) is
+the preferred channel for [bug reports](#bugs), [features requests](#features)
+and [submitting pull requests](#pull-requests), but please respect the following
+restrictions:
+
+* Please **do not** use the issue tracker for personal support requests (use
+ [mailing-list](http://groups.google.com/forum/#!forum/torch7)).
+
+* Please **do not** open issues regarding the code in a torch package
+ outside the core. For example don't open issues about the
+ REPL in the nn issue tracker, use the trepl issue tracker for that.
+
+<a name="bugs"></a>
+## Bug reports
+
+A bug is a _demonstrable problem_ that is caused by the code in the repository.
+Good bug reports are extremely helpful - thank you!
+
+Guidelines for bug reports:
+
+1. **Use the GitHub issue search** &mdash; check if the issue has already been
+ reported.
+
+2. **Check if the issue has been fixed** &mdash; try to reproduce it using the
+ latest `master` or development branch in the repository.
+
+3. **Isolate the problem** &mdash; ideally create test case that is within reason,
+ preferably within 100 lines of code.
+
+A good bug report shouldn't leave others needing to chase you up for more
+information. Please try to be as detailed as possible in your report. What is
+your environment? What steps will reproduce the issue? What OS do you
+experience the problem? What would you expect to be the outcome? All these
+details will help people to fix any potential bugs.
+
+<a name="features"></a>
+## Feature requests
+
+Feature requests are welcome to be filed. Torch is community-developed,
+the maintainers are not exclusive torch developers, so keep that in mind.
+The purpose of feature requests is for others who are looking to implement
+a feature are aware of the interest in the feature.
+
+
+<a name="pull-requests"></a>
+## Pull requests
+
+Good pull requests - patches, improvements, new features - are a fantastic
+help. They should remain focused in scope **and avoid containing unrelated
+commits.**
+
+**Please ask first** before embarking on any significant pull request (e.g.
+implementing features, refactoring code, porting to a different language),
+otherwise you risk spending a lot of time working on something that the
+project's developers might not want to merge into the project.
+
+Please adhere to the coding conventions used throughout a project (indentation,
+accurate comments, etc.) and any other requirements (such as test coverage).
+
+Adhering to the following this process is the best way to get your work
+included in the project:
+
+1. [Fork](https://help.github.com/articles/fork-a-repo) the project, clone your
+ fork, and configure the remotes:
+
+ ```bash
+ # Clone your fork of the repo into the current directory
+ git clone https://github.com/<your-username>/nn.git
+ # Navigate to the newly cloned directory
+ cd nn
+ # Assign the original repo to a remote called "upstream"
+ git remote add upstream https://github.com/torch/nn.git
+ ```
+
+2. If you cloned a while ago, get the latest changes from upstream:
+
+ ```bash
+ git checkout master
+ git pull upstream master
+ ```
+
+3. Create a new topic branch (off the main project development branch) to
+ contain your feature, change, or fix:
+
+ ```bash
+ git checkout -b <topic-branch-name>
+ ```
+
+4. Commit your changes in logical chunks. Please try to adhere to these [git commit
+ message guidelines](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html)
+ . Use Git's [interactive rebase](https://help.github.com/articles/about-git-rebase)
+ feature to tidy up your commits before making them public. This helps us keep the
+ commit history in logical blocks and clean, as torch grows.
+ For example:
+ - If you are adding a new function or a module, keep the module + tests + doc
+ to a single commit unless logically warranted.
+ - If you are fixing a bug, keep the bugfix to a single commit unless logically warranted.
+
+5. Locally merge (or rebase) the upstream development branch into your topic branch:
+
+ ```bash
+ git pull [--rebase] upstream master
+ ```
+
+6. Push your topic branch up to your fork:
+
+ ```bash
+ git push origin <topic-branch-name>
+ ```
+
+7. [Open a Pull Request](https://help.github.com/articles/using-pull-requests/)
+ with a clear title and description.
+
+**IMPORTANT**: By submitting a patch, you agree to allow the project owners to
+license your work under the terms of the BSD License.
+
+## Development workflow tips
+
+* While you are changing lua files, one can simply symlink the cloned nn directory to ~/torch/install/share/lua/5.1/nn so that any change is reflected in the current install, without constantly having to do luarocks make rocks/*
+* If you are changing C files, then, after every change, you run luarocks make rocks/*
+* To test, you can just use: th -lnn -e "nn.test()"
diff --git a/contrib/lua-torch/nn/COPYRIGHT.txt b/contrib/lua-torch/nn/COPYRIGHT.txt
new file mode 100644
index 000000000..bc002b78a
--- /dev/null
+++ b/contrib/lua-torch/nn/COPYRIGHT.txt
@@ -0,0 +1,36 @@
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Deepmind Technologies, NYU, NEC Laboratories America
+ and IDIAP Research Institute nor the names of its contributors may be
+ used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/contrib/lua-torch/nn/CReLU.lua b/contrib/lua-torch/nn/CReLU.lua
new file mode 100644
index 000000000..8da6e7974
--- /dev/null
+++ b/contrib/lua-torch/nn/CReLU.lua
@@ -0,0 +1,57 @@
+local CReLU, parent = torch.class('nn.CReLU', 'nn.Sequential')
+
+-- Implements the CReLU activation function as described by
+-- W. Shang et al. in "Understanding and Improving Convolutional Neural Networks
+-- via Concatenated Rectified Linear Units"
+function CReLU:__init(nInputDims, inplace)
+ parent.__init(self)
+ self.nInputDims = nInputDims
+ self.inplace = inplace or false
+
+ local concatTable = nn.ConcatTable()
+ concatTable:add(nn.Identity())
+ concatTable:add(nn.MulConstant(-1))
+ self:add(concatTable)
+ self:add(nn.JoinTable(2))
+ self:add(nn.ReLU(self.inplace))
+end
+
+function CReLU:updateOutput(input)
+ local input_
+ local batched = input:dim() == (self.nInputDims + 1)
+ if not batched then
+ input_ = input:view(1, -1)
+ else
+ input_ = input:view(input:size(1), -1)
+ end
+ parent.updateOutput(self, input_)
+ local osize = input:size()
+ if not batched then
+ osize[1] = osize[1] * 2
+ else
+ osize[2] = osize[2] * 2
+ end
+ self.output:resize(osize)
+ return self.output
+end
+
+function CReLU:backward(input, gradOutput)
+ return self:updateGradInput(input, gradOutput)
+end
+
+function CReLU:updateGradInput(input, gradOutput)
+ local batched = input:dim() == (self.nInputDims + 1)
+ if not batched then
+ parent.updateGradInput(self, input:view(1, -1), gradOutput:view(1, -1))
+ else
+ parent.updateGradInput(self, input:view(input:size(1), -1),
+ gradOutput:view(input:size(1), -1))
+ end
+
+ self.gradInput:resizeAs(input)
+ return self.gradInput
+end
+
+function CReLU:__tostring__()
+ return "CReLU()"
+end
diff --git a/contrib/lua-torch/nn/CSubTable.lua b/contrib/lua-torch/nn/CSubTable.lua
new file mode 100644
index 000000000..eb7492055
--- /dev/null
+++ b/contrib/lua-torch/nn/CSubTable.lua
@@ -0,0 +1,26 @@
+
+local CSubTable, parent = torch.class('nn.CSubTable', 'nn.Module')
+
+function CSubTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+end
+
+function CSubTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ self.output:add(-1,input[2])
+ return self.output
+end
+
+function CSubTable:updateGradInput(input, gradOutput)
+ self.gradInput[1] = self.gradInput[1] or input[1].new()
+ self.gradInput[2] = self.gradInput[2] or input[1].new()
+ self.gradInput[1]:resizeAs(input[1]):copy(gradOutput)
+ self.gradInput[2]:resizeAs(input[2]):copy(gradOutput):mul(-1)
+
+ for i=#input+1, #self.gradInput do
+ self.gradInput[i] = nil
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Clamp.lua b/contrib/lua-torch/nn/Clamp.lua
new file mode 100644
index 000000000..36397a157
--- /dev/null
+++ b/contrib/lua-torch/nn/Clamp.lua
@@ -0,0 +1,5 @@
+local Clamp, Parent = torch.class('nn.Clamp', 'nn.HardTanh')
+
+function Clamp:__init(min_value, max_value)
+ Parent.__init(self, min_value, max_value)
+end
diff --git a/contrib/lua-torch/nn/ClassNLLCriterion.lua b/contrib/lua-torch/nn/ClassNLLCriterion.lua
new file mode 100644
index 000000000..dae0e6685
--- /dev/null
+++ b/contrib/lua-torch/nn/ClassNLLCriterion.lua
@@ -0,0 +1,82 @@
+local THNN = require 'nn.THNN'
+local ClassNLLCriterion, parent = torch.class('nn.ClassNLLCriterion', 'nn.Criterion')
+
+function ClassNLLCriterion:__init(weights, sizeAverage, ignoreIndex)
+ parent.__init(self)
+ self.sizeAverage = (sizeAverage == nil) and true or sizeAverage
+ self.ignoreIndex = ignoreIndex or -100 -- this target index will be ignored
+ if weights then
+ assert(weights:dim() == 1, "weights input should be 1-D Tensor")
+ self.weights = weights
+ end
+
+ self.output_tensor = torch.zeros(1)
+ self.total_weight_tensor = torch.ones(1)
+ self.target = torch.zeros(1):long()
+end
+
+function ClassNLLCriterion:__len()
+ if (self.weights) then
+ return #self.weights
+ else
+ return 0
+ end
+end
+
+function ClassNLLCriterion:updateOutput(input, target)
+ if type(target) == 'number' then
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda()
+ else
+ self.target = self.target:long()
+ end
+ self.target:resize(1)
+ self.target[1] = target
+ elseif torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.target = torch.CudaLongTensor and target:cudaLong() or target
+ else
+ self.target = target:long()
+ end
+
+ input.THNN.ClassNLLCriterion_updateOutput(
+ input:cdata(),
+ self.target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage,
+ THNN.optionalTensor(self.weights),
+ self.total_weight_tensor:cdata(),
+ self.ignoreIndex
+ )
+ self.output = self.output_tensor[1]
+ return self.output, self.total_weight_tensor[1]
+end
+
+function ClassNLLCriterion:updateGradInput(input, target)
+ if type(target) == 'number' then
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda()
+ else
+ self.target = self.target:long()
+ end
+ self.target:resize(1)
+ self.target[1] = target
+ elseif torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.target = torch.CudaLongTensor and target:cudaLong() or target
+ else
+ self.target = target:long()
+ end
+
+ self.gradInput:resizeAs(input):zero()
+
+ input.THNN.ClassNLLCriterion_updateGradInput(
+ input:cdata(),
+ self.target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage,
+ THNN.optionalTensor(self.weights),
+ self.total_weight_tensor:cdata(),
+ self.ignoreIndex
+ )
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/ClassSimplexCriterion.lua b/contrib/lua-torch/nn/ClassSimplexCriterion.lua
new file mode 100644
index 000000000..9cabc011f
--- /dev/null
+++ b/contrib/lua-torch/nn/ClassSimplexCriterion.lua
@@ -0,0 +1,118 @@
+local ClassSimplexCriterion, parent
+ = torch.class('nn.ClassSimplexCriterion', 'nn.MSECriterion')
+
+--[[
+ This file implements a criterion for multi-class classification.
+ It learns an embedding per class, where each class' embedding
+ is a point on an (N-1)-dimensional simplex, where N is
+ the number of classes.
+ For example usage of this class, look at doc/criterion.md
+
+ Reference: http://arxiv.org/abs/1506.08230
+
+]]--
+
+
+--[[
+ function regsplex(n):
+ regsplex returns the coordinates of the vertices of a
+ regular simplex centered at the origin.
+ The Euclidean norms of the vectors specifying the vertices are
+ all equal to 1. The input n is the dimension of the vectors;
+ the simplex has n+1 vertices.
+
+ input:
+ n -- dimension of the vectors specifying the vertices of the simplex
+
+ output:
+ a -- tensor dimensioned (n+1,n) whose rows are
+ vectors specifying the vertices
+
+ reference:
+ http://en.wikipedia.org/wiki/Simplex#Cartesian_coordinates_for_regular_n-dimensional_simplex_in_Rn
+--]]
+local function regsplex(n)
+ local a = torch.zeros(n+1,n)
+
+ for k = 1,n do
+ -- determine the last nonzero entry in the vector for the k-th vertex
+ if k==1 then a[k][k] = 1 end
+ if k>1 then a[k][k] = math.sqrt( 1 - a[{ {k},{1,k-1} }]:norm()^2 ) end
+
+ -- fill the k-th coordinates for the vectors of the remaining vertices
+ local c = (a[k][k]^2 - 1 - 1/n) / a[k][k]
+ a[{ {k+1,n+1},{k} }]:fill(c)
+ end
+
+ return a
+end
+
+
+function ClassSimplexCriterion:__init(nClasses)
+ parent.__init(self)
+ assert(nClasses and nClasses > 1 and nClasses == (nClasses -(nClasses % 1)),
+ "Required positive integer argument nClasses > 1")
+ self.nClasses = nClasses
+
+ -- embedding the simplex in a space of dimension strictly greater than
+ -- the minimum possible (nClasses-1) is critical for effective training.
+ local simp = regsplex(nClasses - 1)
+ self.simplex = torch.cat(simp,
+ torch.zeros(simp:size(1), nClasses -simp:size(2)),
+ 2)
+ self._target = torch.Tensor(nClasses)
+end
+
+-- handle target being both 1D tensor, and
+-- target being 2D tensor (2D tensor means don't do anything)
+local function transformTarget(self, target)
+ if torch.type(target) == 'number' then
+ self._target:resize(self.nClasses)
+ self._target:copy(self.simplex[target])
+ elseif torch.isTensor(target) then
+ assert(target:dim() == 1, '1D tensors only!')
+ local nSamples = target:size(1)
+ self._target:resize(nSamples, self.nClasses)
+ for i=1,nSamples do
+ self._target[i]:copy(self.simplex[target[i]])
+ end
+ end
+end
+
+function ClassSimplexCriterion:updateOutput(input, target)
+ transformTarget(self, target)
+ assert(input:nElement() == self._target:nElement())
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.MSECriterion_updateOutput(
+ input:cdata(),
+ self._target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function ClassSimplexCriterion:updateGradInput(input, target)
+ assert(input:nElement() == self._target:nElement())
+ input.THNN.MSECriterion_updateGradInput(
+ input:cdata(),
+ self._target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage
+ )
+ return self.gradInput
+end
+
+function ClassSimplexCriterion:getPredictions(input)
+ if input:dim() == 1 then
+ input = input:view(1, -1)
+ end
+ return torch.mm(input, self.simplex:t())
+end
+
+function ClassSimplexCriterion:getTopPrediction(input)
+ local prod = self:getPredictions(input)
+ local _, maxs = prod:max(prod:nDimension())
+ return maxs:view(-1)
+end
diff --git a/contrib/lua-torch/nn/Collapse.lua b/contrib/lua-torch/nn/Collapse.lua
new file mode 100644
index 000000000..a088608ca
--- /dev/null
+++ b/contrib/lua-torch/nn/Collapse.lua
@@ -0,0 +1,30 @@
+local Collapse, parent = torch.class('nn.Collapse', 'nn.Module')
+
+-- collapses non-batch dims
+function Collapse:__init(nInputDim)
+ parent.__init(self)
+ self.nInputDim = nInputDim
+end
+
+function Collapse:updateOutput(input)
+ if not input:isContiguous() then
+ self._input = self._input or input.new()
+ self._input:resize(input:size()):copy(input)
+ input = self._input
+ end
+ if input:dim() > self.nInputDim then
+ self.output:view(input,input:size(1),-1)
+ else
+ self.output:view(input,-1)
+ end
+ return self.output
+end
+
+function Collapse:updateGradInput(input, gradOutput)
+ self.gradInput:view(gradOutput, input:size())
+ return self.gradInput
+end
+
+function Collapse:clearState()
+ self._input = nil
+end
diff --git a/contrib/lua-torch/nn/Concat.lua b/contrib/lua-torch/nn/Concat.lua
new file mode 100644
index 000000000..d7e3ee711
--- /dev/null
+++ b/contrib/lua-torch/nn/Concat.lua
@@ -0,0 +1,158 @@
+local Concat, parent = torch.class('nn.Concat', 'nn.Container')
+
+function Concat:__init(dimension)
+ parent.__init(self)
+ self.outputSize = torch.LongStorage()
+ self.dimension = dimension
+end
+
+function Concat:updateOutput(input)
+ self.outputSize = self.outputSize or torch.LongStorage()
+
+ local outs = {}
+ for i=1,#self.modules do
+ local currentOutput = self:rethrowErrors(self.modules[i], i, 'updateOutput', input)
+ outs[i] = currentOutput
+ if i == 1 then
+ self.outputSize:resize(currentOutput:dim()):copy(currentOutput:size())
+ else
+ self.outputSize[self.dimension] = self.outputSize[self.dimension] + currentOutput:size(self.dimension)
+ end
+ end
+ self.output:resize(self.outputSize)
+
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = outs[i]
+ self.output:narrow(self.dimension, offset, currentOutput:size(self.dimension)):copy(currentOutput)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ return self.output
+end
+
+local function retable(t1, t2, f)
+ for k, v in ipairs(t2) do
+ if (torch.type(v) == "table") then
+ t1[k] = retable(t1[k] or {}, t2[k], f)
+ else
+ f(t1, k, v)
+ end
+ end
+ for i=#t2+1, #t1 do
+ t1[i] = nil
+ end
+ return t1
+end
+
+local function backward(self, method, input, gradOutput, scale)
+ local isTable = torch.type(input) == 'table'
+ local wasTable = torch.type(self.gradInput) == 'table'
+ scale = scale or 1
+
+ if isTable then
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local currentGradInput = self:rethrowErrors(module, i, method, input,
+ gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)), scale)
+ if torch.type(currentGradInput) ~= 'table' then
+ error"currentGradInput is not a table!"
+ end
+ if #input ~= #currentGradInput then
+ error("table size mismatch: "..#input.." ~= "..#currentGradInput)
+ end
+ if i == 1 then
+ self.gradInput = wasTable and self.gradInput or {}
+ retable(self.gradInput, currentGradInput,
+ function(t, k, v)
+ t[k] = t[k] or v:clone()
+ t[k]:resizeAs(v)
+ t[k]:copy(v)
+ end
+ )
+ else
+ retable(self.gradInput, currentGradInput,
+ function(t, k, v)
+ if t[k] then
+ t[k]:add(v)
+ else
+ t[k] = v:clone()
+ end
+ end
+ )
+ end
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ else
+ self.gradInput = (not wasTable) and self.gradInput:resizeAs(input) or input:clone()
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local currentGradInput = self:rethrowErrors(module, i, method, input,
+ gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)), scale)
+ if currentGradInput then -- if the module does not produce a gradInput (for example first layer), then ignore it and move on.
+ if i==1 then
+ self.gradInput:copy(currentGradInput)
+ else
+ self.gradInput:add(currentGradInput)
+ end
+ end
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ end
+ return self.gradInput
+end
+
+function Concat:updateGradInput(input, gradOutput)
+ return backward(self, 'updateGradInput', input, gradOutput)
+end
+
+function Concat:backward(input, gradOutput, scale)
+ return backward(self, 'backward', input, gradOutput, scale)
+end
+
+function Concat:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ self:rethrowErrors(module, i, 'accGradParameters', input,
+ gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)),
+ scale)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+end
+
+function Concat:accUpdateGradParameters(input, gradOutput, lr)
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ self:rethrowErrors(module, i, 'accUpdateGradParameters',
+ input,
+ gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)),
+ lr)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+end
+
+function Concat:__tostring__()
+ local tab = ' '
+ local line = '\n'
+ local next = ' |`-> '
+ local lastNext = ' `-> '
+ local ext = ' | '
+ local extlast = ' '
+ local last = ' ... -> '
+ local str = torch.type(self)
+ str = str .. ' {' .. line .. tab .. 'input'
+ for i=1,#self.modules do
+ if i == #self.modules then
+ str = str .. line .. tab .. lastNext .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. extlast)
+ else
+ str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. ext)
+ end
+ end
+ str = str .. line .. tab .. last .. 'output'
+ str = str .. line .. '}'
+ return str
+end
diff --git a/contrib/lua-torch/nn/ConcatTable.lua b/contrib/lua-torch/nn/ConcatTable.lua
new file mode 100644
index 000000000..742719344
--- /dev/null
+++ b/contrib/lua-torch/nn/ConcatTable.lua
@@ -0,0 +1,118 @@
+local ConcatTable, parent = torch.class('nn.ConcatTable', 'nn.Container')
+
+function ConcatTable:__init()
+ parent.__init(self)
+ self.modules = {}
+ self.output = {}
+end
+
+function ConcatTable:updateOutput(input)
+ for i=1,#self.modules do
+ self.output[i] = self:rethrowErrors(self.modules[i], i, 'updateOutput', input)
+ end
+ return self.output
+end
+
+local function retable(t1, t2, f)
+ for k, v in ipairs(t2) do
+ if (torch.type(v) == "table") then
+ t1[k] = retable(t1[k] or {}, t2[k], f)
+ else
+ f(t1, k, v)
+ end
+ end
+ for i=#t2+1, #t1 do
+ t1[i] = nil
+ end
+ return t1
+end
+
+local function backward(self, method, input, gradOutput, scale)
+ local isTable = torch.type(input) == 'table'
+ local wasTable = torch.type(self.gradInput) == 'table'
+ if isTable then
+ for i,module in ipairs(self.modules) do
+ local currentGradInput = self:rethrowErrors(module, i, method, input, gradOutput[i], scale)
+ if torch.type(currentGradInput) ~= 'table' then
+ error"currentGradInput is not a table!"
+ end
+ if #input ~= #currentGradInput then
+ error("table size mismatch: "..#input.." ~= "..#currentGradInput)
+ end
+ if i == 1 then
+ self.gradInput = wasTable and self.gradInput or {}
+ retable(self.gradInput, currentGradInput,
+ function(t, k, v)
+ t[k] = t[k] or v:clone()
+ t[k]:resize(v:size())
+ t[k]:copy(v)
+ end
+ )
+ else
+ retable(self.gradInput, currentGradInput,
+ function(t, k, v)
+ if t[k] then
+ t[k]:add(v)
+ else
+ t[k] = v:clone()
+ end
+ end
+ )
+ end
+ end
+ else
+ self.gradInput = (not wasTable) and self.gradInput or input:clone()
+ for i,module in ipairs(self.modules) do
+ local currentGradInput = self:rethrowErrors(module, i, method, input, gradOutput[i], scale)
+ if i == 1 then
+ self.gradInput:resize(currentGradInput:size()):copy(currentGradInput)
+ else
+ self.gradInput:add(currentGradInput)
+ end
+ end
+ end
+ return self.gradInput
+end
+
+function ConcatTable:updateGradInput(input, gradOutput)
+ return backward(self, 'updateGradInput', input, gradOutput)
+end
+
+function ConcatTable:backward(input, gradOutput, scale)
+ return backward(self, 'backward', input, gradOutput, scale)
+end
+
+function ConcatTable:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ for i,module in ipairs(self.modules) do
+ self:rethrowErrors(module, i, 'accGradParameters', input, gradOutput[i], scale)
+ end
+end
+
+function ConcatTable:accUpdateGradParameters(input, gradOutput, lr)
+ for i,module in ipairs(self.modules) do
+ self:rethrowErrors(module, i, 'accUpdateGradParameters', input, gradOutput[i], lr)
+ end
+end
+
+function ConcatTable:__tostring__()
+ local tab = ' '
+ local line = '\n'
+ local next = ' |`-> '
+ local lastNext = ' `-> '
+ local ext = ' | '
+ local extlast = ' '
+ local last = ' ... -> '
+ local str = torch.type(self)
+ str = str .. ' {' .. line .. tab .. 'input'
+ for i=1,#self.modules do
+ if i == #self.modules then
+ str = str .. line .. tab .. lastNext .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. extlast)
+ else
+ str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. ext)
+ end
+ end
+ str = str .. line .. tab .. last .. 'output'
+ str = str .. line .. '}'
+ return str
+end
diff --git a/contrib/lua-torch/nn/Constant.lua b/contrib/lua-torch/nn/Constant.lua
new file mode 100644
index 000000000..07773feb2
--- /dev/null
+++ b/contrib/lua-torch/nn/Constant.lua
@@ -0,0 +1,36 @@
+------------------------------------------------------------------------
+--[[ Constant ]]--
+-- Outputs a constant value given an input.
+-- If nInputDim is specified, uses the input to determine the size of
+-- the batch. The value is then replicated over the batch.
+-- You can use this with nn.ConcatTable() to append constant inputs to
+-- an input : nn.ConcatTable():add(nn.Constant(v)):add(nn.Identity()) .
+------------------------------------------------------------------------
+local Constant, parent = torch.class("nn.Constant", "nn.Module")
+
+function Constant:__init(value, nInputDim)
+ self.value = value
+ if torch.type(self.value) == 'number' then
+ self.value = torch.Tensor{self.value}
+ end
+ assert(torch.isTensor(self.value), "Expecting number or tensor at arg 1")
+ self.nInputDim = nInputDim
+ parent.__init(self)
+end
+
+function Constant:updateOutput(input)
+ if self.nInputDim and input:dim() > self.nInputDim then
+ local vsize = self.value:size():totable()
+ self.output:resize(input:size(1), table.unpack(vsize))
+ local value = self.value:view(1, table.unpack(vsize))
+ self.output:copy(value:expand(self.output:size()))
+ else
+ self.output:resize(self.value:size()):copy(self.value)
+ end
+ return self.output
+end
+
+function Constant:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input):zero()
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Container.lua b/contrib/lua-torch/nn/Container.lua
new file mode 100644
index 000000000..7e264bab9
--- /dev/null
+++ b/contrib/lua-torch/nn/Container.lua
@@ -0,0 +1,149 @@
+-- This is code common to container modules, which are collections of
+-- smaller constituent modules like Parallel, Sequential, etc.
+local Container, parent = torch.class('nn.Container', 'nn.Module')
+
+function Container:__init(...)
+ parent.__init(self, ...)
+ self.modules = {}
+end
+
+function Container:add(module)
+ table.insert(self.modules, module)
+ return self
+end
+
+function Container:get(index)
+ return self.modules[index]
+end
+
+function Container:size()
+ return #self.modules
+end
+
+-- Check if passing arguments through xpcall is supported in this Lua interpreter.
+local _, XPCALL_ARGS = xpcall(function(x) return x ~= nil end, function() end, 1)
+local TRACEBACK_WARNING = "WARNING: If you see a stack trace below, it doesn't point to the place where this error occurred. Please use only the one above."
+-- module argument can be retrieved with moduleIndex, but code is cleaner when
+-- it has to be specified anyway.
+function Container:rethrowErrors(module, moduleIndex, funcName, ...)
+ assert(module == self.modules[moduleIndex],
+ "mismatch between moduleIndex and self.modules in rethrowErrors")
+ local function handleError(err)
+ -- This will be executed only in the first container that handles the error.
+ if not err:find(TRACEBACK_WARNING) then
+ local traceback = debug.traceback()
+ -- Remove this handler from the stack
+ local _, first_line_end = traceback:find('^.-\n')
+ local _, second_line_end = traceback:find('^.-\n.-\n')
+ traceback = traceback:sub(1, first_line_end) .. traceback:sub(second_line_end+1)
+ err = err .. '\n' .. traceback .. '\n\n' .. TRACEBACK_WARNING
+ else
+ -- Remove file path
+ err = err:sub(err:find('\n')+1)
+ end
+ local msg = string.format('In %d module of %s:',
+ moduleIndex, torch.type(self))
+ -- Preceding newline has to be here, because Lua will prepend a file path.
+ err = '\n' .. msg .. '\n' .. err
+ return err
+ end
+
+ -- Lua 5.1 doesn't support passing arguments through xpcall, so they have to
+ -- be passed via a closure. This incurs some overhead, so it's better not to
+ -- make it the default.
+ local ok, ret, noret
+ if not XPCALL_ARGS then
+ local args = {...}
+ local unpack = unpack or table.unpack
+ ok, ret, noret = xpcall(function()
+ return module[funcName](module, unpack(args))
+ end,
+ handleError)
+ else
+ ok, ret, noret = xpcall(module[funcName], handleError, module, ...)
+ end
+ assert(noret == nil, "rethrowErrors supports only one return argument")
+
+ if not ok then error(ret) end
+ return ret
+end
+
+function Container:applyToModules(func)
+ for _, module in ipairs(self.modules) do
+ func(module)
+ end
+end
+
+function Container:zeroGradParameters()
+ self:applyToModules(function(module) module:zeroGradParameters() end)
+end
+
+function Container:updateParameters(learningRate)
+ self:applyToModules(function(module) module:updateParameters(learningRate) end)
+end
+
+function Container:training()
+ self:applyToModules(function(module) module:training() end)
+ parent.training(self)
+end
+
+function Container:evaluate()
+ self:applyToModules(function(module) module:evaluate() end)
+ parent.evaluate(self)
+end
+
+function Container:share(mlp, ...)
+ for i=1,#self.modules do
+ self.modules[i]:share(mlp.modules[i], ...);
+ end
+ return self
+end
+
+function Container:reset(stdv)
+ self:applyToModules(function(module) module:reset(stdv) end)
+end
+
+function Container:parameters()
+ local function tinsert(to, from)
+ if type(from) == 'table' then
+ for i=1,#from do
+ tinsert(to,from[i])
+ end
+ else
+ table.insert(to,from)
+ end
+ end
+ local w = {}
+ local gw = {}
+ for i=1,#self.modules do
+ local mw,mgw = self.modules[i]:parameters()
+ if mw then
+ tinsert(w,mw)
+ tinsert(gw,mgw)
+ end
+ end
+ return w,gw
+end
+
+function Container:clearState()
+ -- don't call set because it might reset referenced tensors
+ local function clear(f)
+ if self[f] then
+ if torch.isTensor(self[f]) then
+ self[f] = self[f].new()
+ elseif type(self[f]) == 'table' then
+ self[f] = {}
+ else
+ self[f] = nil
+ end
+ end
+ end
+ clear('output')
+ clear('gradInput')
+ if self.modules then
+ for i,module in pairs(self.modules) do
+ module:clearState()
+ end
+ end
+ return self
+end
diff --git a/contrib/lua-torch/nn/Contiguous.lua b/contrib/lua-torch/nn/Contiguous.lua
new file mode 100755
index 000000000..f9974ce5a
--- /dev/null
+++ b/contrib/lua-torch/nn/Contiguous.lua
@@ -0,0 +1,21 @@
+local Contiguous, parent = torch.class('nn.Contiguous', 'nn.Module')
+
+function Contiguous:updateOutput(input)
+ if not input:isContiguous() then
+ if self.output:storage() == input:storage() then self.output:set() end
+ self.output:resizeAs(input):copy(input)
+ else
+ self.output:set(input)
+ end
+ return self.output
+end
+
+function Contiguous:updateGradInput(input, gradOutput)
+ if not gradOutput:isContiguous() then
+ if self.gradInput:storage() == gradOutput:storage() then self.gradInput:set() end
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+ else
+ self.gradInput:set(gradOutput)
+ end
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Convert.lua b/contrib/lua-torch/nn/Convert.lua
new file mode 100644
index 000000000..855338dd6
--- /dev/null
+++ b/contrib/lua-torch/nn/Convert.lua
@@ -0,0 +1,245 @@
+------------------------------------------------------------------------
+--[ nn.Convert ]--
+-- Module to convert between different data formats
+-- nn.Convert('bchw', 'bf') or nn.Convert('chw', 'f')
+-- Automatically converts input to same type as self.output
+-- Simplest use is for automatic input type converions : nn.Convert()
+------------------------------------------------------------------------
+local _ = require 'moses'
+local Convert, parent = torch.class("nn.Convert", "nn.Container")
+
+function Convert:__init(inputShape, outputShape)
+ if outputShape and not inputShape then
+ error"Expecting non-nil arg 1 when arg 2 is provided"
+ end
+ inputShape = inputShape or 'b*'
+ outputShape = outputShape or inputShape
+ self.inputShape = inputShape:find('b') and inputShape or ('b'..inputShape)
+ self.outputShape = outputShape:find('b') and outputShape or ('b'..outputShape)
+ self.inputBatchDim = self.inputShape:find('b')
+ self.outputBatchDim = self.outputShape:find('b')
+ if self.inputShape == 'b*' or self.outputShape == 'b*' then
+ assert(self.inputShape == 'b*' and self.outputShape == 'b*', 'Both or neither shapes must be b*')
+ self.nInputDim = -1
+ self.nOutputDim = -1
+ self.transposition = true
+ else
+ -- number of dims in batch mode
+ self.nInputDim = #self.inputShape
+ self.nOutputDim = #self.outputShape
+ -- is the outputShape just a transposition of the inputShape?
+ if self.nInputDim == self.nOutputDim then
+ self.transposition = true
+ for i=1,self.nInputDim do
+ if not self.outputShape:find(self.inputShape:sub(i,i)) then
+ self.transposition = false
+ break
+ end
+ end
+ end
+ end
+ parent.__init(self)
+end
+
+-- post-initialization
+function Convert:buildConverter(input)
+ if self.transposition then
+ self.converter = self:transpose(self.outputShape)
+ else
+ if (torch.type(self[self.outputShape]) ~= 'function') then
+ error(string.format("Unrecognized conversion of shape %s to %s", self.inputShape, self.outputShape))
+ end
+ self.converter = self[self.outputShape](self, input)
+ end
+ assert(torch.isTensor(self.output), "Expecting Tensor output")
+
+ self.converter:type(torch.type(self.output))
+
+ self.modules[1] = self.converter
+end
+
+function Convert:updateOutput(input)
+ assert(torch.isTensor(input), "expecting Tensor")
+ if not torch.isTypeOf(input, torch.type(self.output)) then
+ -- handle different input type
+ self._input = self._input or self.output.new()
+ self._input:resize(input:size()):copy(input)
+ input = self._input
+ end
+ self.batchMode = true
+ if input:dim() < self.nInputDim then
+ -- handle non-batch mode
+ local inputSize = input:size():totable()
+ table.insert(inputSize, self.inputBatchDim, 1)
+ self.__input = self.__input or input.new()
+ self.__input:set(input):resize(table.unpack(inputSize))
+ input = self.__input
+ self.batchMode = false
+ end
+ if not self.converter then
+ self:buildConverter(input)
+ end
+
+ self.output = self.converter:updateOutput(input)
+
+ if not self.batchMode then
+ local outputSize = self.output:size():totable()
+ table.remove(outputSize, self.outputBatchDim)
+ self.__output = self.__output or self.output.new()
+ self.__output:set(self.output):resize(table.unpack(outputSize))
+ self.output = self.__output
+ end
+ return self.output
+end
+
+function Convert:updateGradInput(input, gradOutput)
+ local input_ = input
+ input = self._input or input
+ if not self.batchMode then
+ input = self.__input
+ self.__gradOutput = self.__gradOutput or gradOutput.new()
+ self.__gradOutput:set(gradOutput):resize(self.converter.output:size())
+ gradOutput = self.__gradOutput
+ end
+
+ local gradInput = self.converter:updateGradInput(input, gradOutput)
+
+ if not self.batchMode then
+ self.__gradInput = self.__gradInput or gradInput.new()
+ self.__gradInput:set(gradInput):resize(input_:size())
+ gradInput = self.__gradInput
+ end
+ if self._input then
+ self._gradInput = self._gradInput or input.new()
+ self._gradInput:resize(input:size()):copy(gradInput)
+ self.gradInput = self._gradInput
+ else
+ self.gradInput = gradInput
+ end
+
+ return self.gradInput
+end
+
+function Convert:accGradParameters(input, gradOutput, scale)
+ input = self.batchMode and self.__input or self._input or input
+ gradOutput = self.batchMode and self.__gradOutput or gradOutput
+ self.converter:accGradParameters(input, gradOutput, scale)
+end
+
+function Convert:accUpdateGradParameters(input, gradOutput, lr)
+ input = self.batchMode and self.__input or self._input or input
+ gradOutput = self.batchMode and self.__gradOutput or gradOutput
+ self.converter:accUpdateGradParameters(input, gradOutput, lr)
+end
+
+-- batch feature
+function Convert:bf(input)
+ local b_pos = self:findAxis('b', self.inputShape)
+ local dim = #self.inputShape
+ if self.inputShape == 'bt' then
+ error"Conversion of shape bt to bf not supported: open an issue on github"
+ end
+ -- was b
+ if dim == 1 then
+ return nn.Reshape(1)
+ end
+ -- was b...
+ local modula
+ if b_pos ~= 1 then
+ modula = nn.Transpose({1, b_pos})
+ end
+ if dim > 2 then
+ local transpose = modula
+ local sampleSize = input:select(self:findAxis('b'),1):nElement()
+ local reshape = nn.Reshape(sampleSize)
+ if transpose then
+ modula = nn.Sequential()
+ modula:add(transpose)
+ modula:add(reshape)
+ else
+ modula = reshape
+ end
+ end
+ return modula or nn.Identity()
+end
+
+-- each example is a scalar; batch is a vector
+function Convert:b(input)
+ local b_pos = self:findAxis('b')
+ if self.inputShape == 'bt' or self.inputShape == 'tb' then
+ local t_pos = self:findAxis('t')
+ -- select first set of classes
+ return nn.Select(t_pos, 1)
+ elseif self.inputShape == 'bf' or self.inputShape == 'fb' then
+ -- this wont work as expected with size(f) > 1
+ local f_pos = self:findAxis('f')
+ if input:size(f_pos) > 1 then
+ error("Cannot convert shape "..self.inputShape.." to b when feature > 1")
+ end
+ return nn.Select(f_pos, 1)
+ else
+ error("Cannot convert shape "..self.inputShape.." to shape b")
+ end
+end
+
+-- returns the current shape of the data
+function Convert:default()
+ return nn.Identity()
+end
+
+-- multi-class (batch target)
+function Convert:bt()
+ local b_pos = self:findAxis('b')
+ local modula
+ if self.inputShape == 'b' then
+ modula = nn.Reshape(1)
+ else
+ error("cannot convert shape '"..self.inputShape.."' to bt")
+ end
+ return modula
+end
+
+-- a generic function for transposing shape axes
+function Convert:transpose(newShape)
+ if newShape == self.inputShape then
+ return nn.Identity()
+ end
+ local inputShape = {}
+ for i=1,#self.inputShape do
+ table.insert(inputShape, self.inputShape:sub(i,i))
+ end
+ local transpositions = {}
+ for i=1,#newShape do
+ local j = _.indexOf(inputShape, newShape:sub(i,i))
+ if i ~= j then
+ local char = inputShape[i]
+ inputShape[i] = inputShape[j]
+ inputShape[j] = char
+ table.insert(transpositions, {j, i})
+ end
+ end
+ return nn.Transpose(table.unpack(transpositions))
+end
+
+function Convert:findAxis(axis_char, shape, silent)
+ shape = shape or self.inputShape
+ local axis_pos = shape:find(axis_char)
+ if (not silent) and (not axis_pos) then
+ error("Provided shape '"..shape.."' has no axis '"..axis_char.."'", 2)
+ end
+ return axis_pos
+end
+
+function Convert:clearState()
+ self._input = nil
+ self._gradInput = nil
+ self.__input = nil
+ self.__output = nil
+ self.__gradInput = nil
+ self.__gradOutput = nil
+end
+
+function Convert:type(type)
+ self:clearState()
+ return parent.type(self, type)
+end
diff --git a/contrib/lua-torch/nn/Copy.lua b/contrib/lua-torch/nn/Copy.lua
new file mode 100644
index 000000000..9f83cf9b4
--- /dev/null
+++ b/contrib/lua-torch/nn/Copy.lua
@@ -0,0 +1,42 @@
+local Copy, parent = torch.class('nn.Copy', 'nn.Module')
+
+function Copy:__init(intype, outtype, forceCopy, dontCast)
+ intype = intype or torch.Tensor.__typename
+ outtype = outtype or torch.Tensor.__typename
+
+ self.dontCast = dontCast
+
+ parent.__init(self)
+ self.gradInput = torch.getmetatable(intype).new()
+ self.output = torch.getmetatable(outtype).new()
+
+ if (not forceCopy) and intype == outtype then
+
+ self.updateOutput = function(self, input)
+ self.output:set(input)
+ return input
+ end
+
+ self.updateGradInput = function(self, input, gradOutput)
+ self.gradInput:set(gradOutput)
+ return gradOutput
+ end
+ end
+end
+
+function Copy:updateOutput(input)
+ self.output:resize(input:size()):copy(input)
+ return self.output
+end
+
+function Copy:updateGradInput(input, gradOutput)
+ self.gradInput:resize(gradOutput:size()):copy(gradOutput)
+ return self.gradInput
+end
+
+function Copy:type(type, tensorCache)
+ if type and self.dontCast then
+ return self
+ end
+ return parent.type(self, type, tensorCache)
+end
diff --git a/contrib/lua-torch/nn/Cosine.lua b/contrib/lua-torch/nn/Cosine.lua
new file mode 100644
index 000000000..19a9cba82
--- /dev/null
+++ b/contrib/lua-torch/nn/Cosine.lua
@@ -0,0 +1,175 @@
+local Cosine, parent = torch.class('nn.Cosine', 'nn.Module')
+
+function Cosine:__init(inputSize,outputSize)
+ parent.__init(self)
+
+ self.weight = torch.Tensor(outputSize,inputSize)
+ self.gradWeight = torch.Tensor(outputSize,inputSize)
+
+ self:reset()
+end
+
+function Cosine:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(1))
+ end
+ self.weight:uniform(-stdv, stdv)
+end
+
+function Cosine:updateOutput(input)
+ local inputSize = self.weight:size(2)
+ local outputSize = self.weight:size(1)
+
+ self._weightNorm = self._weightNorm or self.weight.new()
+ self._inputNorm = self._inputNorm or self.weight.new()
+
+ -- y_j = (w_j * x) / ( || w_j || * || x || )
+
+ self._weightNorm:norm(self.weight,2,2):add(1e-12)
+ if input:dim() == 1 then
+ self.output:resize(outputSize):zero()
+ self.output:addmv(1, self.weight, input)
+ self.__norm = input:norm()+1e-12
+ self.output:cdiv(self._weightNorm:view(outputSize)):div(self.__norm)
+ elseif input:dim() == 2 then
+ local batchSize = input:size(1)
+ local nElement = self.output:nElement()
+ self.output:resize(batchSize, outputSize)
+ if self.output:nElement() ~= nElement then
+ self.output:zero()
+ end
+ self.output:addmm(0, self.output, 1, input, self.weight:t())
+
+ self._inputNorm:norm(input,2,2):add(1e-12)
+ self.output:cdiv(self._weightNorm:view(1,outputSize):expandAs(self.output))
+ self.output:cdiv(self._inputNorm:expandAs(self.output))
+ else
+ error('input must be vector or matrix')
+ end
+
+ return self.output
+end
+
+function Cosine:updateGradInput(input, gradOutput)
+ if not self.gradInput then
+ return
+ end
+
+ local inputSize = self.weight:size(2)
+ local outputSize = self.weight:size(1)
+
+ --[[
+ dy_j w_ji x_i
+ ---- = ------------------- - y_j ---------
+ dx_i || w_j || * || x || || x ||^2
+ --]]
+
+ local nElement = self.gradInput:nElement()
+ self.gradInput:resizeAs(input)
+ if self.gradInput:nElement() ~= nElement then
+ self.gradInput:zero()
+ end
+
+ if input:dim() == 1 then
+ self._weight = self._weight or input.new()
+ self._weight:resizeAs(self.weight):copy(self.weight)
+ self._weight:cdiv(self._weightNorm:expandAs(self.weight))
+ self._weight:div(self.__norm)
+ self._weight:addr(1, self._weight, -1/(self.__norm*self.__norm), self.output, input)
+ self.gradInput:addmv(0, 1, self._weight:t(), gradOutput)
+ elseif input:dim() == 2 then
+ local inputNorm = self._inputNorm:expandAs(input)
+ local weightNorm = self._weightNorm:view(1,outputSize):expandAs(gradOutput)
+
+ self.gradInput:copy(input):cdiv(inputNorm)
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ self._gradOutput:cmul(self.output)
+ self._sum = self._sum or input.new()
+ self._sum:sum(self._gradOutput, 2)
+ self.gradInput:cmul(self._sum:expandAs(input))
+
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ self._gradOutput:cdiv(weightNorm)
+ self.gradInput:addmm(-1, self.gradInput, 1, self._gradOutput, self.weight)
+
+ self.gradInput:cdiv(inputNorm)
+ end
+
+ return self.gradInput
+end
+
+function Cosine:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ local inputSize = self.weight:size(2)
+ local outputSize = self.weight:size(1)
+
+ --[[
+ dy_j x_i w_ji
+ ----- = ------------------- - y_j -----------
+ dw_ji || w_j || * || x || || w_j ||^2
+ --]]
+
+ if input:dim() == 1 then
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ local weightNorm = self._weightNorm:view(outputSize)
+ self._gradOutput:cdiv(weightNorm)
+ self.gradWeight:addr(scale/self.__norm, self._gradOutput, input)
+
+ self._gradOutput:cdiv(weightNorm)
+ self._gradOutput:cmul(self.output)
+ self._weight = self._weight or self.weight.new()
+ self._weight:resizeAs(self._weight):copy(self.weight)
+ self._weight:cmul(self._gradOutput:view(outputSize, 1):expandAs(self.weight))
+ self.gradWeight:add(-1, self._weight)
+ elseif input:dim() == 2 then
+ self._weight = self._weight or self.weight.new()
+ self._weight:resizeAs(self.weight):copy(self.weight)
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ self._gradOutput:cmul(self.output)
+ self._sum = self._sum or input.new()
+ self._sum:sum(self._gradOutput, 1)
+ local grad = self._sum[1]
+ grad:cdiv(self._weightNorm:select(2,1))
+ self._weight:cmul(grad:view(outputSize,1):expandAs(self._weight))
+
+ local input_ = self._gradOutput
+ input_:resizeAs(input):copy(input)
+ input_:cdiv(self._inputNorm:expandAs(input))
+ self._weight:addmm(-1, self._weight, 1, gradOutput:t(), input_)
+
+ self._weight:cdiv(self._weightNorm:expandAs(self._weight))
+ self.gradWeight:add(self._weight)
+ else
+ error"1D or 2D input expected"
+ end
+end
+
+function Cosine:type(type, tensorCache)
+ if type then
+ -- prevent premature memory allocations
+ self._input = nil
+ self._weight = nil
+ self._inputNorm = nil
+ self._weightNorm = nil
+ self._gradOutput = nil
+ self._sum = nil
+ end
+ return parent.type(self, type, tensorCache)
+end
+
+function Cosine:clearState()
+ nn.utils.clear(self, {
+ '_input',
+ '_weight',
+ '_gradOutput',
+ '_sum',
+ '_inputNorm',
+ '_weightNorm',
+ })
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/CosineDistance.lua b/contrib/lua-torch/nn/CosineDistance.lua
new file mode 100644
index 000000000..fe4e4b9f5
--- /dev/null
+++ b/contrib/lua-torch/nn/CosineDistance.lua
@@ -0,0 +1,116 @@
+local CosineDistance, parent = torch.class('nn.CosineDistance', 'nn.Module')
+
+function CosineDistance:__init()
+ parent.__init(self)
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+end
+
+local function makeContiguous(self, input1, input2)
+ if not input1:isContiguous() then
+ self._input1 = self._input1 or input1.new()
+ self._input1:resizeAs(input1):copy(input1)
+ input1 = self._input1
+ end
+ if not input2:isContiguous() then
+ self._input2 = self._input2 or input2.new()
+ self._input2:resizeAs(input2):copy(input2)
+ input2 = self._input2
+ end
+ return input1, input2
+end
+
+function CosineDistance:updateOutput(input)
+ local input1, input2 = input[1], input[2]
+
+ input1, input2 = makeContiguous(self, input1, input2)
+
+ if input1:dim() == 1 then
+ input1 = input1:view(1,-1)
+ input2 = input2:view(1,-1)
+ end
+
+ if not self.buffer then
+ self.buffer = input1.new()
+ self.w1 = input1.new()
+ self.w22 = input1.new()
+ self.w = input1.new()
+ self.w32 = input1.new()
+ self.ones = input1.new()
+ end
+
+ self.buffer:cmul(input1,input2)
+ self.w1:sum(self.buffer,2)
+
+ local epsilon = 1e-12
+ self.buffer:cmul(input1,input1)
+ self.w22:sum(self.buffer,2):add(epsilon)
+ self.ones:resizeAs(self.w22):fill(1)
+ self.w22:cdiv(self.ones, self.w22)
+ self.w:resizeAs(self.w22):copy(self.w22)
+
+ self.buffer:cmul(input2,input2)
+ self.w32:sum(self.buffer,2):add(epsilon)
+ self.w32:cdiv(self.ones, self.w32)
+ self.w:cmul(self.w32)
+ self.w:sqrt()
+
+ self.output:cmul(self.w1,self.w)
+ self.output:resize(input1:size(1))
+
+ return self.output
+end
+
+function CosineDistance:updateGradInput(input, gradOutput)
+ local v1 = input[1]
+ local v2 = input[2]
+ local not_batch = false
+
+ v1, v2 = makeContiguous(self, v1, v2)
+
+ if v1:dim() == 1 then
+ v1 = v1:view(1,-1)
+ v2 = v2:view(1,-1)
+ not_batch = true
+ end
+
+ if #self.gradInput ~= 2 then
+ self.gradInput[1] = self.gradInput[1] or v1.new()
+ self.gradInput[2] = self.gradInput[2] or v1.new()
+ end
+
+ local gw1 = self.gradInput[1]
+ local gw2 = self.gradInput[2]
+ gw1:resizeAs(v1):copy(v2)
+ gw2:resizeAs(v1):copy(v1)
+
+ self.buffer:cmul(self.w1,self.w22)
+ gw1:addcmul(-1,self.buffer:expandAs(v1),v1)
+ gw1:cmul(self.w:expandAs(v1))
+
+ self.buffer:cmul(self.w1,self.w32)
+ gw2:addcmul(-1,self.buffer:expandAs(v1),v2)
+ gw2:cmul(self.w:expandAs(v1))
+
+ local go = gradOutput:view(-1,1):expandAs(v1)
+ gw1:cmul(go)
+ gw2:cmul(go)
+
+ if not_batch then
+ self.gradInput[1]:resize(gw1:size(2))
+ self.gradInput[2]:resize(gw2:size(2))
+ end
+
+ return self.gradInput
+end
+
+function CosineDistance:clearState()
+ nn.utils.clear(self, {
+ 'buffer',
+ 'w1',
+ 'w22',
+ 'w',
+ 'w32',
+ 'ones',
+ })
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/CosineEmbeddingCriterion.lua b/contrib/lua-torch/nn/CosineEmbeddingCriterion.lua
new file mode 100644
index 000000000..d55e03130
--- /dev/null
+++ b/contrib/lua-torch/nn/CosineEmbeddingCriterion.lua
@@ -0,0 +1,142 @@
+local CosineEmbeddingCriterion, parent = torch.class('nn.CosineEmbeddingCriterion', 'nn.Criterion')
+
+function CosineEmbeddingCriterion:__init(margin)
+ parent.__init(self)
+ margin = margin or 0
+ self.margin = margin
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+ self.sizeAverage = true
+end
+
+function CosineEmbeddingCriterion:updateOutput(input,y)
+
+ local input1, input2 = input[1], input[2]
+
+ -- keep backward compatibility
+ if type(y) == 'number' then
+ self._y = self._y or input1.new(1)
+ self._y[1] = y
+ y = self._y
+ end
+
+ if input1:dim() == 1 then
+ input1 = input1:view(1,-1)
+ input2 = input2:view(1,-1)
+ end
+
+ if not self.buffer then
+ self.buffer = input1.new()
+ self.w1 = input1.new()
+ self.w22 = input1.new()
+ self.w = input1.new()
+ self.w32 = input1.new()
+ self._outputs = input1.new()
+ -- comparison operators behave differently from cuda/c implementations
+ if input1:type() == 'torch.CudaTensor' then
+ self._idx = input1.new()
+ else
+ self._idx = torch.ByteTensor()
+ end
+ end
+
+ self.buffer:cmul(input1,input2)
+ self.w1:sum(self.buffer,2)
+
+ local epsilon = 1e-12
+ self.buffer:cmul(input1,input1)
+ self.w22:sum(self.buffer,2):add(epsilon)
+ -- self._outputs is also used as a temporary buffer
+ self._outputs:resizeAs(self.w22):fill(1)
+ self.w22:cdiv(self._outputs, self.w22)
+ self.w:resizeAs(self.w22):copy(self.w22)
+
+ self.buffer:cmul(input2,input2)
+ self.w32:sum(self.buffer,2):add(epsilon)
+ self.w32:cdiv(self._outputs, self.w32)
+ self.w:cmul(self.w32)
+ self.w:sqrt()
+
+ self._outputs:cmul(self.w1,self.w)
+ self._outputs = self._outputs:select(2,1)
+
+ y.eq(self._idx,y,-1)
+ self._outputs[self._idx] = self._outputs[self._idx]:add(-self.margin):cmax(0)
+ y.eq(self._idx,y,1)
+ self._outputs[self._idx] = self._outputs[self._idx]:mul(-1):add(1)
+
+ self.output = self._outputs:sum()
+
+ if self.sizeAverage then
+ self.output = self.output/y:size(1)
+ end
+
+ return self.output
+end
+
+function CosineEmbeddingCriterion:updateGradInput(input, y)
+
+ local v1 = input[1]
+ local v2 = input[2]
+ local not_batch = false
+
+ -- keep backward compatibility
+ if type(y) == 'number' then
+ self._y = self._y or input1.new(1)
+ self._y[1] = y
+ y = self._y
+ end
+
+ if v1:dim() == 1 then
+ v1 = v1:view(1,-1)
+ v2 = v2:view(1,-1)
+ not_batch = true
+ end
+
+ local gw1 = self.gradInput[1]
+ local gw2 = self.gradInput[2]
+ gw1:resizeAs(v1):copy(v2)
+ gw2:resizeAs(v1):copy(v1)
+
+ self.buffer:cmul(self.w1,self.w22)
+ gw1:addcmul(-1,self.buffer:expandAs(v1),v1)
+ gw1:cmul(self.w:expandAs(v1))
+
+ self.buffer:cmul(self.w1,self.w32)
+ gw2:addcmul(-1,self.buffer:expandAs(v1),v2)
+ gw2:cmul(self.w:expandAs(v1))
+
+ -- self._idx = self._outputs <= 0
+ y.le(self._idx,self._outputs,0)
+ self._idx = self._idx:view(-1,1):expand(gw1:size())
+ gw1[self._idx] = 0
+ gw2[self._idx] = 0
+
+ y.eq(self._idx,y,1)
+ self._idx = self._idx:view(-1,1):expand(gw2:size())
+ gw1[self._idx] = gw1[self._idx]:mul(-1)
+ gw2[self._idx] = gw2[self._idx]:mul(-1)
+
+ if self.sizeAverage then
+ gw1:div(y:size(1))
+ gw2:div(y:size(1))
+ end
+
+ if not_batch then
+ self.gradInput[1]:resize(gw1:size(2))
+ self.gradInput[2]:resize(gw2:size(2))
+ end
+
+ return self.gradInput
+end
+
+function CosineEmbeddingCriterion:type(type)
+ self._idx = nil
+ parent.type(self,type)
+ -- comparison operators behave differently from cuda/c implementations
+ if type == 'torch.CudaTensor' then
+ self._idx = torch.CudaTensor()
+ else
+ self._idx = torch.ByteTensor()
+ end
+ return self
+end
diff --git a/contrib/lua-torch/nn/Criterion.lua b/contrib/lua-torch/nn/Criterion.lua
new file mode 100644
index 000000000..e48f06876
--- /dev/null
+++ b/contrib/lua-torch/nn/Criterion.lua
@@ -0,0 +1,64 @@
+local Criterion = torch.class('nn.Criterion')
+
+function Criterion:__init()
+ self.gradInput = torch.Tensor()
+ self.output = 0
+end
+
+function Criterion:updateOutput(input, target)
+end
+
+function Criterion:forward(input, target)
+ return self:updateOutput(input, target)
+end
+
+function Criterion:backward(input, target)
+ return self:updateGradInput(input, target)
+end
+
+function Criterion:updateGradInput(input, target)
+end
+
+function Criterion:clone()
+ local f = torch.MemoryFile("rw"):binary()
+ f:writeObject(self)
+ f:seek(1)
+ local clone = f:readObject()
+ f:close()
+ return clone
+end
+
+function Criterion:type(type, tensorCache)
+ assert(type, 'Criterion: must provide a type to convert to')
+ -- find all tensors and convert them
+ for key,param in pairs(self) do
+ self[key] = nn.utils.recursiveType(param, type, tensorCache)
+ end
+ return self
+end
+
+function Criterion:float()
+ return self:type('torch.FloatTensor')
+end
+
+function Criterion:double()
+ return self:type('torch.DoubleTensor')
+end
+
+function Criterion:cuda()
+ return self:type('torch.CudaTensor')
+end
+
+function Criterion:cudaHalf()
+ return self:type('torch.CudaHalfTensor')
+end
+
+function Criterion:cudaDouble()
+ return self:type('torch.CudaDoubleTensor')
+end
+
+function Criterion:__call__(input, target)
+ self.output = self:forward(input, target)
+ self.gradInput = self:backward(input, target)
+ return self.output, self.gradInput
+end
diff --git a/contrib/lua-torch/nn/CriterionTable.lua b/contrib/lua-torch/nn/CriterionTable.lua
new file mode 100644
index 000000000..14f67bd39
--- /dev/null
+++ b/contrib/lua-torch/nn/CriterionTable.lua
@@ -0,0 +1,17 @@
+local CriterionTable, parent = torch.class('nn.CriterionTable', 'nn.Module')
+
+function CriterionTable:__init(criterion)
+ parent.__init(self)
+ self.criterion = criterion
+ self.gradInput = {criterion.gradInput}
+end
+
+function CriterionTable:updateOutput(input)
+ self.output = self.criterion:updateOutput(table.unpack(input))
+ return self.output
+end
+
+function CriterionTable:updateGradInput(input, gradOutput)
+ self.criterion:updateGradInput(table.unpack(input))
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/CrossEntropyCriterion.lua b/contrib/lua-torch/nn/CrossEntropyCriterion.lua
new file mode 100644
index 000000000..2f72cf87f
--- /dev/null
+++ b/contrib/lua-torch/nn/CrossEntropyCriterion.lua
@@ -0,0 +1,42 @@
+local CrossEntropyCriterion, Criterion = torch.class('nn.CrossEntropyCriterion', 'nn.Criterion')
+
+function CrossEntropyCriterion:__init(weights, sizeAverage)
+ Criterion.__init(self)
+ self.lsm = nn.LogSoftMax()
+ self.nll = nn.ClassNLLCriterion(weights, sizeAverage)
+ self.sizeAverage = self.nll.sizeAverage
+ self.oldSizeAverage = self.sizeAverage
+end
+
+function CrossEntropyCriterion:updateOutput(input, target)
+ input = input:squeeze()
+ target = type(target) == 'number' and target or target:squeeze()
+ -- only propagate if value has changed to preserve old behavior
+ -- of setting nll.sizeAverage directly
+ if self.sizeAverage ~= self.oldSizeAverage then
+ self.nll.sizeAverage = self.sizeAverage
+ end
+ self.lsm:updateOutput(input)
+ self.nll:updateOutput(self.lsm.output, target)
+ self.output = self.nll.output
+ self.oldSizeAverage = self.sizeAverage
+ return self.output
+end
+
+function CrossEntropyCriterion:updateGradInput(input, target)
+ local size = input:size()
+ input = input:squeeze()
+ target = type(target) == 'number' and target or target:squeeze()
+ -- only propagate if value has changed to preserve old behavior
+ -- of setting nll.sizeAverage directly
+ if self.sizeAverage ~= self.oldSizeAverage then
+ self.nll.sizeAverage = self.sizeAverage
+ end
+ self.nll:updateGradInput(self.lsm.output, target)
+ self.lsm:updateGradInput(input, self.nll.gradInput)
+ self.gradInput:view(self.lsm.gradInput, size)
+ self.oldSizeAverage = self.sizeAverage
+ return self.gradInput
+end
+
+return nn.CrossEntropyCriterion
diff --git a/contrib/lua-torch/nn/Decorator.lua b/contrib/lua-torch/nn/Decorator.lua
new file mode 100644
index 000000000..05fb4db92
--- /dev/null
+++ b/contrib/lua-torch/nn/Decorator.lua
@@ -0,0 +1,47 @@
+local Decorator, parent = torch.class("nn.Decorator", "nn.Container")
+
+function Decorator:__init(module)
+ parent.__init(self)
+ -- so that it can be handled like a Container
+ self.modules[1] = module
+end
+
+function Decorator:updateOutput(input)
+ self.output = self.modules[1]:updateOutput(input)
+ return self.output
+end
+
+function Decorator:updateGradInput(input, gradOutput)
+ self.gradInput = self.modules[1]:updateGradInput(input, gradOutput)
+ return self.gradInput
+end
+
+function Decorator:accGradParameters(input, gradOutput, scale)
+ self.modules[1]:accGradParameters(input, gradOutput, scale)
+end
+
+function Decorator:accUpdateGradParameters(input, gradOutput, lr)
+ self.modules[1]:accUpdateGradParameters(input, gradOutput, lr)
+end
+
+function Decorator:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ self.modules[1]:sharedAccUpdateGradParameters(input, gradOutput, lr)
+end
+
+function Decorator:__tostring__()
+ if self.modules[1].__tostring__ then
+ return torch.type(self) .. ' @ ' .. self.modules[1]:__tostring__()
+ else
+ return torch.type(self) .. ' @ ' .. torch.type(self.modules[1])
+ end
+end
+
+-- useful for multiple-inheritance
+function Decorator.decorate(class)
+ class.updateOutput = nn.Decorator.updateOutput
+ class.updateGradInput = nn.Decorator.updateGradInput
+ class.accGradParameters = nn.Decorator.accGradParameters
+ class.accUpdateGradParameters = nn.Decorator.accUpdateGradParameters
+ class.sharedAccUpdateGradParameters = nn.Decorator.sharedAccUpdateGradParameters
+ class.__tostring__ = nn.Decorator.__tostring__
+end
diff --git a/contrib/lua-torch/nn/DepthConcat.lua b/contrib/lua-torch/nn/DepthConcat.lua
new file mode 100644
index 000000000..f64a90eb8
--- /dev/null
+++ b/contrib/lua-torch/nn/DepthConcat.lua
@@ -0,0 +1,116 @@
+------------------------------------------------------------------------
+--[[ DepthConcat ]]--
+-- Concatenates the output of Convolutions along the depth dimension
+-- (nOutputFrame). This is used to implement the DepthConcat layer
+-- of the Going deeper with convolutions paper :
+-- http://arxiv.org/pdf/1409.4842v1.pdf
+-- The normal Concat Module can't be used since the spatial dimensions
+-- of tensors to be concatenated may have different values. To deal with
+-- this, we select the largest spatial dimensions and add zero-padding
+-- around the smaller dimensions.
+------------------------------------------------------------------------
+local DepthConcat, _ = torch.class('nn.DepthConcat', 'nn.Concat')
+
+function DepthConcat:windowNarrow(output, currentOutput, offset)
+ local outputWindow = output:narrow(self.dimension, offset, currentOutput:size(self.dimension))
+ for dim=1,self.outputSize:size(1) do
+ local currentSize = currentOutput:size(dim)
+ if dim ~= self.dimension and self.outputSize[dim] ~= currentSize then
+ -- 5x5 vs 3x3 -> start = [(5-3)/2] + 1 = 2 (1 pad each side)
+ -- 9x9 vs 5x5 -> start = [(9-5)/2] + 1 = 3 (2 pad each side)
+ -- 9x9 vs 4x4 -> start = [(9-4)/2] + 1 = 3.5 (2 pad, 3 pad)
+ local start = math.floor(((self.outputSize[dim] - currentSize) / 2) + 1)
+ outputWindow = outputWindow:narrow(dim, start, currentSize)
+ end
+ end
+ return outputWindow
+end
+
+function DepthConcat:updateOutput(input)
+ self.outputSize = self.outputSize or torch.LongStorage()
+
+ local outs = {}
+ for i=1,#self.modules do
+ local currentOutput = self:rethrowErrors(self.modules[i], i, 'updateOutput', input)
+ outs[i] = currentOutput
+ if i == 1 then
+ self.outputSize:resize(currentOutput:dim()):copy(currentOutput:size())
+ else
+ self.outputSize[self.dimension] = self.outputSize[self.dimension] + currentOutput:size(self.dimension)
+ for dim=1,self.outputSize:size(1) do
+ if dim ~= self.dimension then
+ -- take the maximum size (shouldn't change anything for batch dim)
+ self.outputSize[dim] = math.max(self.outputSize[dim], currentOutput:size(dim))
+ end
+ end
+ end
+ end
+ self.output:resize(self.outputSize):zero() --zero for padding
+
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = outs[i]
+ local outputWindow = self:windowNarrow(self.output, currentOutput, offset)
+ outputWindow:copy(currentOutput)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ return self.output
+end
+
+function DepthConcat:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input)
+
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local gradOutputWindow = self:windowNarrow(gradOutput, currentOutput, offset)
+ local currentGradInput = self:rethrowErrors(module, i, 'updateGradInput', input, gradOutputWindow)
+ if i==1 then
+ self.gradInput:copy(currentGradInput)
+ else
+ self.gradInput:add(currentGradInput)
+ end
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ return self.gradInput
+end
+
+function DepthConcat:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local gradOutputWindow = self:windowNarrow(gradOutput, currentOutput, offset)
+ self:rethrowErrors(module, i, 'accGradParameters', input, gradOutputWindow, scale)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+end
+
+function DepthConcat:backward(input, gradOutput, scale)
+ self.gradInput:resizeAs(input)
+
+ scale = scale or 1
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local gradOutputWindow = self:windowNarrow(gradOutput, currentOutput, offset)
+ local currentGradInput = self:rethrowErrors(module, i, 'backward', input, gradOutputWindow)
+ if i==1 then
+ self.gradInput:copy(currentGradInput)
+ else
+ self.gradInput:add(currentGradInput)
+ end
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ return self.gradInput
+end
+
+function DepthConcat:accUpdateGradParameters(input, gradOutput, lr)
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local gradOutputWindow = self:windowNarrow(gradOutput, currentOutput, offset)
+ self:rethrowErrors(module, i, 'accUpdateGradParameters', input, gradOutputWindow, lr)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+end
diff --git a/contrib/lua-torch/nn/DistKLDivCriterion.lua b/contrib/lua-torch/nn/DistKLDivCriterion.lua
new file mode 100644
index 000000000..bfad57567
--- /dev/null
+++ b/contrib/lua-torch/nn/DistKLDivCriterion.lua
@@ -0,0 +1,34 @@
+local DistKLDivCriterion, parent = torch.class('nn.DistKLDivCriterion', 'nn.Criterion')
+
+function DistKLDivCriterion:__init()
+ parent.__init(self)
+ self.sizeAverage = true
+end
+
+function DistKLDivCriterion:updateOutput(input, target)
+ assert(input:dim() == target:dim() and
+ torch.LongTensor(input:size()):eq(torch.LongTensor(target:size())):all(),
+ 'input and target should have the same size')
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.DistKLDivCriterion_updateOutput(
+ input:cdata(),
+ target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function DistKLDivCriterion:updateGradInput(input, target)
+ assert(input:dim() == target:dim() and
+ torch.LongTensor(input:size()):eq(torch.LongTensor(target:size())):all(),
+ 'input and target should have the same size')
+ input.THNN.DistKLDivCriterion_updateGradInput(
+ input:cdata(),
+ target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/DistanceRatioCriterion.lua b/contrib/lua-torch/nn/DistanceRatioCriterion.lua
new file mode 100644
index 000000000..6b79d0620
--- /dev/null
+++ b/contrib/lua-torch/nn/DistanceRatioCriterion.lua
@@ -0,0 +1,142 @@
+--[[
+ Probabilistic Criterion for Triplet Siamese Model for learning embedding.
+ Ref: https://arxiv.org/pdf/1610.00243.pdf
+
+ loss = -log( exp(-X) / ( exp(-X) + exp(-Y) ) )
+ where
+ X : Distance between similar samples
+ Y : Distance between dissimilar samples
+
+ The loss could be break down to following log expansion
+
+ loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) ))
+ = -log( exp(-X) ) + log( exp(-X) + exp(-Y) )
+ = -(-X) + log( exp(-X) + exp(-Y) )
+ = X + log( exp(-X) + exp(-Y) )
+
+ Gradients:
+ dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X)
+ = 1 - exp(-X) / (exp(-X) + exp(-Y))
+
+ dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y)
+ = -exp(-Y) / (exp(-X) + exp(-Y))
+
+--]]
+
+local DistanceRatioCriterion, parent = torch.class('nn.DistanceRatioCriterion',
+ 'nn.Criterion')
+
+function DistanceRatioCriterion:__init(sizeAverage)
+ parent.__init(self)
+ if sizeAverage ~= nil then
+ self.sizeAverage = sizeAverage
+ else
+ self.sizeAverage = true
+ end
+end
+
+-- Forward
+--[[
+-- X : Distance between similar samples
+-- Y : Distance between dissimilar samples
+ loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) ))
+ = -log( exp(-X) ) + log( exp(-X) + exp(-Y) )
+ = -(-X) + log( exp(-X) + exp(-Y) )
+ = X + log( exp(-X) + exp(-Y) )
+--]]
+function DistanceRatioCriterion:updateOutput(input)
+ assert(#input == 2, "Invalid number of inputs")
+
+ local X = input[1]
+ local Y = input[2]
+
+ assert(X:nElement() == Y:nElement(), "Number of distances don't match.")
+ assert(X:size(1) == Y:size(1), "Invalid distances' size.")
+
+ -- Compute exp(-X) and exp(-Y)
+ self._expMinusX = self._expMinusX or X.new()
+ self._expMinusY = self._expMinusY or Y.new()
+
+ -- Compute ( exp(-X) + exp(-Y) )
+ self._expMinusX:resizeAs(X):copy(X):mul(-1):exp()
+ self._expMinusY:resizeAs(Y):copy(Y):mul(-1):exp()
+
+ self._sumExpMinusXY = self.sumExpMinusExp or X.new()
+ self._sumExpMinusXY:resizeAs(self._expMinusX):copy(self._expMinusX)
+ :add(self._expMinusY)
+
+ -- Compute log( exp(-X) + exp(-Y) )
+ self._logSumExpMinusXY = self._logSumExpMinusXY or self._sumExpMinusXY.new()
+ self._logSumExpMinusXY:resizeAs(self._sumExpMinusXY)
+ :copy(self._sumExpMinusXY):log()
+
+ -- Compute log( exp(-X) + exp(-Y) )
+ self.loss = self.loss or self._logSumExpMinusXY.new()
+ self.loss:resizeAs(X):copy(X):add(self._logSumExpMinusXY)
+
+ if self.sizeAverage then
+ return self.loss:sum()/X:size(1)
+ else
+ return self.loss:sum()
+ end
+end
+
+-- Backward
+--[[
+-- X : Distance between similar samples
+-- Y : Distance between dissimilar samples
+
+ Gradients:
+ dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X)
+ = 1 - exp(-X) / (exp(-X) + exp(-Y))
+
+ dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y)
+ = -exp(-Y) / (exp(-X) + exp(-Y))
+
+--]]
+function DistanceRatioCriterion:updateGradInput(input)
+ assert(#input == 2, "Invalid number of inputs")
+ local X = input[1]
+ local Y = input[2]
+ assert(X:nElement() == Y:nElement(), "Number of distances don't match.")
+ assert(X:size(1) == Y:size(1), "Invalid distances' size.")
+
+ -- dLoss/dX
+ -- -exp(-X)
+ self.dX = self.dX or X.new()
+ self.dX:resizeAs(self._expMinusX):copy(self._expMinusX):mul(-1)
+
+ -- -exp(-X) / (exp(-X) + exp(-Y))
+ self.dX:cdiv(self._sumExpMinusXY)
+
+ -- 1 - exp(-X) / (exp(-X) + exp(-Y))
+ self.dX:add(1)
+
+ -- dLoss/dY
+ -- -exp(-Y)
+ self.dY = self.dY or Y.new()
+ self.dY:resizeAs(self._expMinusY):copy(self._expMinusY):mul(-1)
+
+ -- -exp(-Y) / (exp(-X) + exp(-Y))
+ self.dY:cdiv(self._sumExpMinusXY)
+
+ if self.sizeAverage then
+ self.dX:div(X:size(1))
+ self.dY:div(X:size(1))
+ end
+
+ return {self.dX, self.dY}
+end
+
+function DistanceRatioCriterion:type(type, tensorCache)
+ if type then
+ self._expMinusX = nil
+ self._expMinusY = nil
+ self._sumExpMinusXY = nil
+ self._logSumExpMinusXY = nil
+ self.loss = nil
+ self.dX = nil
+ self.dY = nil
+ end
+ return parent.type(self, type, tensorCache)
+end
diff --git a/contrib/lua-torch/nn/DontCast.lua b/contrib/lua-torch/nn/DontCast.lua
new file mode 100644
index 000000000..b89f5436b
--- /dev/null
+++ b/contrib/lua-torch/nn/DontCast.lua
@@ -0,0 +1,124 @@
+local DontCast, parent = torch.class("nn.DontCast", "nn.Decorator")
+
+-- utility functions
+
+local function recursiveTypeCopy(dst, src, type_str)
+ if torch.type(src) == 'table' then
+ dst = (torch.type(dst) == 'table') and dst or {}
+ for k, v in pairs(src) do
+ dst[k] = recursiveTypeCopy(dst[k], v, type_str)
+ end
+ elseif torch.isTensor(src) then
+ dst = (torch.type(dst) == type_str) and dst or torch.getmetatable(type_str).new()
+ dst:resize(src:size())
+ if src:nElement() > 0 then
+ dst:copy(src)
+ end
+ end
+ return dst
+end
+
+local function tableTensorType(src)
+ if type(src) == 'table' then
+ local type_str, found
+ for k,v in pairs(src) do
+ type_str, found = tableTensorType(v)
+ if found then
+ return type_str, true
+ end
+ end
+ return type_str, found
+ else
+ return torch.type(src), torch.isTensor(src)
+ end
+end
+
+-- DontCast methods and constructor
+
+function DontCast:__init(module, castin, castout, moduleType)
+ parent.__init(self, module)
+ self.castin = castin
+ self.castout = (castout == nil) and castin or castout
+ self.moduleType = moduleType
+ if (self.castin or self.castout) and not self.moduleType then
+ local moduleType, found = tableTensorType(module.output)
+ if found then
+ self.moduleType = moduleType
+ else
+ moduleType, found = tableTensorType(module:parameters())
+ if found then
+ self.moduleType = moduleType
+ else
+ error"Cannot extrapolate moduleType. Provide constructor argument 4"
+ end
+ end
+ end
+end
+
+function DontCast:updateOutput(input)
+ if self.castin and tableTensorType(input) ~= self.moduleType then
+ self._input = recursiveTypeCopy(self._input, input, self.moduleType)
+ input = self._input
+ end
+
+ local output = self.modules[1]:updateOutput(input)
+
+ if self.castout then
+ self.output = recursiveTypeCopy(self.output, output, tableTensorType(self.output))
+ else
+ self.output = output
+ end
+ return self.output
+end
+
+function DontCast:updateGradInput(input, gradOutput)
+ if self.castin and tableTensorType(input) ~= self.moduleType then
+ input = self._input
+ end
+ if self.castout and tableTensorType(gradOutput) ~= self.moduleType then
+ self._gradOutput = recursiveTypeCopy(self._gradOutput, gradOutput, self.moduleType)
+ gradOutput = self._gradOutput
+ end
+
+ local gradInput = self.modules[1]:updateGradInput(input, gradOutput)
+
+ if self.castin then
+ self.gradInput = recursiveTypeCopy(self.gradInput, gradInput, tableTensorType(self.gradInput))
+ else
+ self.gradInput = gradInput
+ end
+ return self.gradInput
+end
+
+function DontCast:accGradParameters(input, gradOutput, scale)
+ if self.castin and tableTensorType(input) ~= self.moduleType then
+ input = self._input
+ end
+ if self.castout and tableTensorType(gradOutput) ~= self.moduleType then
+ gradOutput = self._gradOutput
+ end
+
+ self.modules[1]:accGradParameters(input, gradOutput, scale)
+end
+
+function DontCast:accUpdateGradParameters(input, gradOutput, lr)
+ if self.castin and tableTensorType(input) ~= self.moduleType then
+ input = self._input
+ end
+ if self.castout and tableTensorType(gradOutput) ~= self.moduleType then
+ gradOutput = self._gradOutput
+ end
+
+ self.modules[1]:accUpdateGradParameters(input, gradOutput, lr)
+end
+
+-- dont cast (the essence thereof)
+function DontCast:type(type)
+ if self.castout and tableTensorType(self.output) ~= type then
+ self.output = recursiveTypeCopy(nil, self.output, type)
+ end
+ if self.castin and tableTensorType(self.gradInput) ~= type then
+ self.gradInput = recursiveTypeCopy(nil, self.gradInput, type)
+ end
+ return self
+end
diff --git a/contrib/lua-torch/nn/DotProduct.lua b/contrib/lua-torch/nn/DotProduct.lua
new file mode 100644
index 000000000..ccd347e6b
--- /dev/null
+++ b/contrib/lua-torch/nn/DotProduct.lua
@@ -0,0 +1,61 @@
+local DotProduct, parent = torch.class('nn.DotProduct', 'nn.Module')
+
+function DotProduct:__init()
+ parent.__init(self)
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+end
+
+function DotProduct:updateOutput(input)
+ local input1, input2 = input[1], input[2]
+ if input1:dim() == 1 then
+ -- convert non batch input to batch input
+ input1 = input1:view(1,-1)
+ input2 = input2:view(1,-1)
+ end
+ if not self.buffer then
+ self.buffer = input1.new()
+ end
+ self.buffer:cmul(input1, input2)
+ self.output:sum(self.buffer, 2)
+ self.output:resize(input1:size(1))
+ return self.output
+end
+
+function DotProduct:updateGradInput(input, gradOutput)
+ local v1 = input[1]
+ local v2 = input[2]
+ local not_batch = false
+
+ if #self.gradInput ~= 2 then
+ self.gradInput[1] = self.gradInput[1] or input[1].new()
+ self.gradInput[2] = self.gradInput[2] or input[2].new()
+ end
+
+ if v1:dim() == 1 then
+ v1 = v1:view(1,-1)
+ v2 = v2:view(1,-1)
+ not_batch = true
+ end
+
+ local gw1 = self.gradInput[1]
+ local gw2 = self.gradInput[2]
+ gw1:resizeAs(v1):copy(v2)
+ gw2:resizeAs(v2):copy(v1)
+
+ local go = gradOutput:view(-1,1):expandAs(v1)
+ gw1:cmul(go)
+ gw2:cmul(go)
+
+ if not_batch then
+ -- unbatch gradInput
+ self.gradInput[1]:set(gw1:select(1,1))
+ self.gradInput[2]:set(gw2:select(1,1))
+ end
+
+ return self.gradInput
+end
+
+function DotProduct:clearState()
+ if self.buffer then self.buffer:set() end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Dropout.lua b/contrib/lua-torch/nn/Dropout.lua
new file mode 100644
index 000000000..15f2f4699
--- /dev/null
+++ b/contrib/lua-torch/nn/Dropout.lua
@@ -0,0 +1,70 @@
+local Dropout, Parent = torch.class('nn.Dropout', 'nn.Module')
+
+function Dropout:__init(p,v1,inplace,stochasticInference)
+ Parent.__init(self)
+ self.p = p or 0.5
+ self.train = true
+ self.inplace = inplace
+ self.stochastic_inference = stochasticInference or false
+ -- version 2 scales output during training instead of evaluation
+ self.v2 = not v1
+ if self.p >= 1 or self.p < 0 then
+ error('<Dropout> illegal percentage, must be 0 <= p < 1')
+ end
+ self.noise = torch.Tensor()
+end
+
+function Dropout:updateOutput(input)
+ if self.inplace then
+ self.output:set(input)
+ else
+ self.output:resizeAs(input):copy(input)
+ end
+ if self.p > 0 then
+ if self.train or self.stochastic_inference then
+ self.noise:resizeAs(input)
+ self.noise:bernoulli(1-self.p)
+ if self.v2 then
+ self.noise:div(1-self.p)
+ end
+ self.output:cmul(self.noise)
+ elseif not self.v2 then
+ self.output:mul(1-self.p)
+ end
+ end
+ return self.output
+end
+
+function Dropout:updateGradInput(input, gradOutput)
+ if self.inplace then
+ self.gradInput:set(gradOutput)
+ else
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+ end
+ if self.train then
+ if self.p > 0 then
+ self.gradInput:cmul(self.noise) -- simply mask the gradients with the noise vector
+ end
+ else
+ if not self.v2 and self.p > 0 then
+ self.gradInput:mul(1-self.p)
+ end
+ end
+ return self.gradInput
+end
+
+function Dropout:setp(p)
+ self.p = p
+end
+
+function Dropout:__tostring__()
+ return string.format('%s(%f)', torch.type(self), self.p)
+end
+
+
+function Dropout:clearState()
+ if self.noise then
+ self.noise:set()
+ end
+ return Parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/ELU.lua b/contrib/lua-torch/nn/ELU.lua
new file mode 100644
index 000000000..48a6caa2c
--- /dev/null
+++ b/contrib/lua-torch/nn/ELU.lua
@@ -0,0 +1,45 @@
+local ELU, parent = torch.class('nn.ELU', 'nn.Module')
+
+--[[
+ Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter
+ Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
+ http://arxiv.org/pdf/1511.07289.pdf
+--]]
+
+function ELU:__init(alpha, inplace)
+ parent.__init(self)
+ self.alpha = alpha or 1
+ assert(type(self.alpha) == 'number')
+ self.inplace = inplace or false
+ assert(type(self.inplace) == 'boolean')
+end
+
+function ELU:updateOutput(input)
+ local inplace = self.inplace or false
+
+ input.THNN.ELU_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.alpha,
+ inplace
+ )
+ return self.output
+end
+
+function ELU:updateGradInput(input, gradOutput)
+ local inplace = self.inplace or false
+
+ input.THNN.ELU_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata(),
+ self.alpha,
+ inplace
+ )
+ return self.gradInput
+end
+
+function ELU:__tostring__()
+ return string.format('%s (alpha:%f)', torch.type(self), self.alpha)
+end
diff --git a/contrib/lua-torch/nn/ErrorMessages.lua b/contrib/lua-torch/nn/ErrorMessages.lua
new file mode 100644
index 000000000..a5cbed053
--- /dev/null
+++ b/contrib/lua-torch/nn/ErrorMessages.lua
@@ -0,0 +1,19 @@
+
+local mt = {
+ __index = function(table, key)
+ error("nn."..key.." is only supported for Float or Double Tensors.")
+ end
+}
+
+local tensors = {
+ torch.ByteTensor,
+ torch.CharTensor,
+ torch.ShortTensor,
+ torch.IntTensor,
+ torch.LongTensor,
+}
+
+for _, t in ipairs(tensors) do
+ t.nn = {}
+ setmetatable(t.nn, mt)
+end
diff --git a/contrib/lua-torch/nn/Euclidean.lua b/contrib/lua-torch/nn/Euclidean.lua
new file mode 100644
index 000000000..509feff50
--- /dev/null
+++ b/contrib/lua-torch/nn/Euclidean.lua
@@ -0,0 +1,197 @@
+local Euclidean, parent = torch.class('nn.Euclidean', 'nn.Module')
+
+function Euclidean:__init(inputSize,outputSize)
+ parent.__init(self)
+
+ self.weight = torch.Tensor(inputSize,outputSize)
+ self.gradWeight = torch.Tensor(inputSize,outputSize)
+
+ -- state
+ self.gradInput:resize(inputSize)
+ self.output:resize(outputSize)
+
+ self.fastBackward = true
+
+ self:reset()
+end
+
+function Euclidean:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(1))
+ end
+ if nn.oldSeed then
+ for i=1,self.weight:size(2) do
+ self.weight:select(2, i):apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ end
+ else
+ self.weight:uniform(-stdv, stdv)
+ end
+end
+
+local function view(res, src, ...)
+ local args = {...}
+ if src:isContiguous() then
+ res:view(src, table.unpack(args))
+ else
+ res:reshape(src, table.unpack(args))
+ end
+end
+
+function Euclidean:updateOutput(input)
+ -- lazy initialize buffers
+ self._input = self._input or input.new()
+ self._weight = self._weight or self.weight.new()
+ self._expand = self._expand or self.output.new()
+ self._expand2 = self._expand2 or self.output.new()
+ self._repeat = self._repeat or self.output.new()
+ self._repeat2 = self._repeat2 or self.output.new()
+
+ local inputSize, outputSize = self.weight:size(1), self.weight:size(2)
+
+ -- y_j = || w_j - x || = || x - w_j ||
+ if input:dim() == 1 then
+ view(self._input, input, inputSize, 1)
+ self._expand:expandAs(self._input, self.weight)
+ self._repeat:resizeAs(self._expand):copy(self._expand)
+ self._repeat:add(-1, self.weight)
+ self.output:norm(self._repeat, 2, 1)
+ self.output:resize(outputSize)
+ elseif input:dim() == 2 then
+ local batchSize = input:size(1)
+
+ view(self._input, input, batchSize, inputSize, 1)
+ self._expand:expand(self._input, batchSize, inputSize, outputSize)
+ -- make the expanded tensor contiguous (requires lots of memory)
+ self._repeat:resizeAs(self._expand):copy(self._expand)
+
+ self._weight:view(self.weight, 1, inputSize, outputSize)
+ self._expand2:expandAs(self._weight, self._repeat)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ -- requires lots of memory, but minimizes cudaMallocs and loops
+ self._repeat2:resizeAs(self._expand2):copy(self._expand2)
+ self._repeat:add(-1, self._repeat2)
+ else
+ self._repeat:add(-1, self._expand2)
+ end
+
+ self.output:norm(self._repeat, 2, 2)
+ self.output:resize(batchSize, outputSize)
+ else
+ error"1D or 2D input expected"
+ end
+
+ return self.output
+end
+
+function Euclidean:updateGradInput(input, gradOutput)
+ if not self.gradInput then
+ return
+ end
+
+ self._div = self._div or input.new()
+ self._output = self._output or self.output.new()
+ self._gradOutput = self._gradOutput or input.new()
+ self._expand3 = self._expand3 or input.new()
+
+ if not self.fastBackward then
+ self:updateOutput(input)
+ end
+
+ local inputSize, outputSize = self.weight:size(1), self.weight:size(2)
+
+ --[[
+ dy_j -2 * (w_j - x) x - w_j
+ ---- = --------------- = -------
+ dx 2 || w_j - x || y_j
+ --]]
+
+ -- to prevent div by zero (NaN) bugs
+ self._output:resizeAs(self.output):copy(self.output):add(0.0000001)
+ view(self._gradOutput, gradOutput, gradOutput:size())
+ self._div:cdiv(gradOutput, self._output)
+ if input:dim() == 1 then
+ self._div:resize(1, outputSize)
+ self._expand3:expandAs(self._div, self.weight)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ self._repeat2:resizeAs(self._expand3):copy(self._expand3)
+ self._repeat2:cmul(self._repeat)
+ else
+ self._repeat2:cmul(self._repeat, self._expand3)
+ end
+
+ self.gradInput:sum(self._repeat2, 2)
+ self.gradInput:resizeAs(input)
+ elseif input:dim() == 2 then
+ local batchSize = input:size(1)
+
+ self._div:resize(batchSize, 1, outputSize)
+ self._expand3:expand(self._div, batchSize, inputSize, outputSize)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ self._repeat2:resizeAs(self._expand3):copy(self._expand3)
+ self._repeat2:cmul(self._repeat)
+ else
+ self._repeat2:cmul(self._repeat, self._expand3)
+ end
+
+ self.gradInput:sum(self._repeat2, 3)
+ self.gradInput:resizeAs(input)
+ else
+ error"1D or 2D input expected"
+ end
+
+ return self.gradInput
+end
+
+function Euclidean:accGradParameters(input, gradOutput, scale)
+ local inputSize, outputSize = self.weight:size(1), self.weight:size(2)
+ scale = scale or 1
+
+ --[[
+ dy_j 2 * (w_j - x) w_j - x
+ ---- = --------------- = -------
+ dw_j 2 || w_j - x || y_j
+ --]]
+ -- assumes a preceding call to updateGradInput
+ if input:dim() == 1 then
+ self.gradWeight:add(-scale, self._repeat2)
+ elseif input:dim() == 2 then
+ self._sum = self._sum or input.new()
+ self._sum:sum(self._repeat2, 1)
+ self._sum:resize(inputSize, outputSize)
+ self.gradWeight:add(-scale, self._sum)
+ else
+ error"1D or 2D input expected"
+ end
+end
+
+function Euclidean:type(type, tensorCache)
+ if type then
+ -- prevent premature memory allocations
+ self:clearState()
+ end
+ return parent.type(self, type, tensorCache)
+end
+
+function Euclidean:clearState()
+ nn.utils.clear(self, {
+ '_input',
+ '_output',
+ '_gradOutput',
+ '_weight',
+ '_div',
+ '_sum',
+ '_expand',
+ '_expand2',
+ '_expand3',
+ '_repeat',
+ '_repeat2',
+ })
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Exp.lua b/contrib/lua-torch/nn/Exp.lua
new file mode 100644
index 000000000..f41569026
--- /dev/null
+++ b/contrib/lua-torch/nn/Exp.lua
@@ -0,0 +1,9 @@
+local Exp = torch.class('nn.Exp', 'nn.Module')
+
+function Exp:updateOutput(input)
+ return self.output:exp(input)
+end
+
+function Exp:updateGradInput(input, gradOutput)
+ return self.gradInput:cmul(self.output, gradOutput)
+end
diff --git a/contrib/lua-torch/nn/FlattenTable.lua b/contrib/lua-torch/nn/FlattenTable.lua
new file mode 100644
index 000000000..1c182557c
--- /dev/null
+++ b/contrib/lua-torch/nn/FlattenTable.lua
@@ -0,0 +1,106 @@
+local FlattenTable, parent = torch.class('nn.FlattenTable', 'nn.Module')
+
+function FlattenTable:__init()
+ parent.__init(self)
+
+ self.output = {}
+ self.input_map = {}
+ self.gradInput = {}
+end
+
+-- Recursive function to flatten a table (output is a table)
+local function flatten(output, input)
+ local input_map -- has the same structure as input, but stores the
+ -- indices to the corresponding output
+ if type(input) == 'table' then
+ input_map = {}
+ -- forward DFS order
+ for i = 1, #input do
+ input_map[#input_map+1] = flatten(output, input[i])
+ end
+ else
+ input_map = #output + 1
+ output[input_map] = input -- append the tensor
+ end
+ return input_map
+end
+
+-- Recursive function to check if we need to rebuild the output table
+local function checkMapping(output, input, input_map)
+ if input_map == nil or output == nil or input == nil then
+ return false
+ end
+ if type(input) == 'table' then
+ if type(input_map) ~= 'table' then
+ return false
+ end
+ if #input ~= #input_map then
+ return false
+ end
+ -- forward DFS order
+ for i = 1, #input do
+ local ok = checkMapping(output, input[i], input_map[i])
+ if not ok then
+ return false
+ end
+ end
+ return true
+ else
+ if type(input_map) ~= 'number' then
+ return false
+ end
+ return output[input_map] == input
+ end
+end
+
+-- During BPROP we have to build a gradInput with the same shape as the
+-- input. This is a recursive function to build up a gradInput
+local function inverseFlatten(gradOutput, input_map)
+ if type(input_map) == 'table' then
+ local gradInput = {}
+ for i = 1, #input_map do
+ gradInput[#gradInput + 1] = inverseFlatten(gradOutput, input_map[i])
+ end
+ return gradInput
+ else
+ return gradOutput[input_map]
+ end
+end
+
+function FlattenTable:updateOutput(input)
+ assert(type(input) == 'table', 'input must be a table')
+ -- to avoid updating rebuilding the flattened table every updateOutput call
+ -- we will do a DFS pass over the existing output table and the inputs to
+ -- see if it needs to be rebuilt.
+ if not checkMapping(self.output, input, self.input_map) then
+ self.output = {}
+ self.input_map = flatten(self.output, input)
+ end
+ return self.output
+end
+
+function FlattenTable:updateGradInput(input, gradOutput)
+ assert(type(input) == 'table', 'input must be a table')
+ assert(type(input) == 'table', 'gradOutput must be a table')
+ -- If the input changes between the updateOutput and updateGradInput call,
+ -- then we may have to rebuild the input_map! However, let's assume that
+ -- the input_map is valid and that forward has already been called.
+
+ -- However, we should check that the gradInput is valid:
+ if not checkMapping(gradOutput, self.gradInput, self.input_map) then
+ self.gradInput = inverseFlatten(gradOutput, self.input_map)
+ end
+
+ return self.gradInput
+end
+
+function FlattenTable:type(type, tensorCache)
+ -- This function just stores references so we don't need to do any type
+ -- conversions. Just force the tables to be empty.
+ self:clearState()
+end
+
+function FlattenTable:clearState()
+ self.input_map = {}
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/GPU.lua b/contrib/lua-torch/nn/GPU.lua
new file mode 100644
index 000000000..758618d8b
--- /dev/null
+++ b/contrib/lua-torch/nn/GPU.lua
@@ -0,0 +1,273 @@
+------------------------------------------------------------------------
+--[[ GPU ]]--
+-- Decorates a module such that its parameters are
+-- hosted on a specified GPU device.
+-- The operations are also executed on that device.
+-- Arguments input and gradOutput are converted to the specified device
+-- before being fed to the decorated module.
+-- Returned output is on the specified outdevice (defaults to device).
+-- Returned gradInput is allocated on the same device as the input.
+-- The unit test is located in cunn.
+------------------------------------------------------------------------
+local GPU, parent = torch.class("nn.GPU", "nn.Container")
+
+function GPU:__init(module, device, outdevice)
+ parent.__init(self)
+ assert(torch.type(device) == 'number')
+ self.device = device
+ self.outdevice = outdevice or device
+
+ assert(torch.isTypeOf(module, 'nn.Module'))
+ self.modules[1] = module
+
+ if module:type():find('torch%.Cuda.*Tensor') then
+ self:type(module:type())
+ end
+end
+
+function GPU.recursiveModuleDevice(obj, device)
+ if type(obj) == 'table' and not torch.isTypeOf(obj, 'nn.GPU') and not obj.__noGPU__ then
+ for k,v in pairs(obj) do
+ obj[k] = GPU.recursiveModuleDevice(v, device)
+ end
+ elseif torch.type(obj):match('torch.Cuda.*Tensor') then
+ if obj:getDevice() ~= device then
+ obj = obj:clone() -- this will reallocate it to device
+ local newdevice = obj:getDevice()
+ -- when nElement() == 0 newdevice is 0
+ assert(newdevice == device or newdevice == 0)
+ end
+ end
+ assert(obj ~= nil)
+ return obj
+end
+
+-- set the device of the decorated module
+function GPU:setDevice(device)
+ self.device = device or self.device
+
+ assert(self.modules[1])
+ self.modules[1] = cutorch.withDevice(self.device, function()
+ return self.recursiveModuleDevice(self.modules[1], self.device)
+ end)
+ return self
+end
+
+-- when proto is a device number, returns a dst that has device device for each element in src
+-- otherwise, if proto is a table/tensor, makes sure dst is a identical to src, yet on the same device as proto
+function GPU.recursiveSetDevice(dst, src, proto)
+ local device, prototable
+ if torch.isTensor(proto) then
+ device = proto:getDevice()
+ elseif torch.type(proto) == 'number' then
+ device = proto
+ elseif torch.type(proto) == 'table' then
+ prototable = true
+ else
+ error"Expecting number, table or tensor for arg 3 (proto)"
+ end
+ if torch.type(src) == 'table' then
+ dst = torch.type(dst) == 'table' and dst or {}
+ for k,v in ipairs(src) do
+ dst[k] = GPU.recursiveSetDevice(dst[k], v, prototable and proto[k] or device)
+ end
+ for k=#src+1,#dst do
+ dst[k] = nil
+ end
+ elseif torch.type(src):match('torch.Cuda.*Tensor') and src:getDevice() ~= device and src:getDevice() ~= 0 then
+ if not (torch.type(dst):match('torch.Cuda.*Tensor') and dst:getDevice() == device) then
+ dst = src.new()
+ end
+ cutorch.withDevice(device, function() dst:resizeAs(src):copy(src) end)
+ else
+ dst = src
+ end
+ return dst
+end
+
+function GPU:updateOutput(input)
+ if self._type:find('torch%.Cuda.*Tensor') then
+ self._input = self.recursiveSetDevice(self._input, input, self.device)
+
+ local output = cutorch.withDevice(self.device, function()
+ return self.modules[1]:updateOutput(self._input)
+ end)
+
+ if self.device ~= self.outdevice then
+ self.output = self.recursiveSetDevice(self.output, output, self.outdevice)
+ else
+ self.output = output
+ end
+ else
+ self.output = self.modules[1]:updateOutput(input)
+ end
+
+ return self.output
+end
+
+function GPU:updateGradInput(input, gradOutput)
+ if self._type:find('torch%.Cuda.*Tensor') then
+ self._gradOutput = self.recursiveSetDevice(self._gradOutput, gradOutput, self.device)
+
+ local gradInput = cutorch.withDevice(self.device, function()
+ return self.modules[1]:updateGradInput(self._input, self._gradOutput)
+ end)
+
+ self.gradInput = self.recursiveSetDevice(self.gradInput, gradInput, input)
+ else
+ self.gradInput = self.modules[1]:updateGradInput(input, gradOutput)
+ end
+
+ return self.gradInput
+end
+
+function GPU:accGradParameters(input, gradOutput, scale)
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function()
+ self.modules[1]:accGradParameters(self._input, self._gradOutput, scale)
+ end)
+ else
+ self.modules[1]:accGradParameters(input, gradOutput, scale)
+ end
+end
+
+function GPU:apply(callback)
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.apply(self, callback) end)
+ else
+ parent.apply(self, callback)
+ end
+end
+
+function GPU:type(type, typecache)
+ if type and type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.type(self, type, typecache) end)
+ self:setDevice()
+ else
+ self.output = nil
+ self.gradInput = nil
+ self._input = nil
+ self._gradOutput = nil
+ parent.type(self, type, typecache)
+ end
+ return self
+end
+
+function GPU:clearState()
+ nn.utils.clear(self, 'output', 'gradInput')
+ self._input = nil
+ self._gradOutput = nil
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.clearState(self) end)
+ else
+ parent.clearState(self)
+ end
+end
+
+function GPU:zeroGradParameters()
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.zeroGradParameters(self) end)
+ else
+ parent.zeroGradParameters(self)
+ end
+end
+
+function GPU:updateParameters(lr)
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.updateParameters(self, lr) end)
+ else
+ parent.updateParameters(self, lr)
+ end
+end
+
+function GPU:training()
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.training(self) end)
+ else
+ parent.training(self)
+ end
+end
+
+function GPU:evaluate()
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.evaluate(self) end)
+ else
+ parent.evaluate(self)
+ end
+end
+
+function GPU:share(mlp, ...)
+ local args = {...}
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.share(self, mlp, unpack(args)) end)
+ else
+ parent.share(self, mlp, unpack(args))
+ end
+ return self
+end
+
+function GPU:reset(...)
+ local args = {...}
+ if self._type:find('torch%.Cuda.*Tensor') then
+ cutorch.withDevice(self.device, function() parent.reset(self, unpack(args)) end)
+ else
+ parent.reset(self, unpack(args))
+ end
+ return self
+end
+
+function GPU:clone(...)
+ local args = {...}
+ if self._type:find('torch%.Cuda.*Tensor') then
+ return cutorch.withDevice(self.device, function() parent.clone(self, unpack(args)) end)
+ else
+ return parent.clone(self, unpack(args))
+ end
+end
+
+function GPU:write(file)
+ -- Write all values in the object as a table.
+ local object = {}
+ for k, v in pairs(self) do
+ object[k] = v
+ end
+ local header = {self._type, self.device}
+ file:writeObject(header)
+ file:writeObject(object)
+end
+
+function GPU:read(file)
+ local header = file:readObject()
+ local object
+ if header[1] and header[1]:find('torch%.Cuda.*Tensor') then
+ local device = header[2]
+ if device > cutorch.getDeviceCount() then
+ print"Warning : model was saved with more devices than available on current host."
+ print"Attempting to load module onto device 1"
+ device = 1
+ end
+ object = cutorch.withDevice(device, function() return file:readObject() end)
+ else
+ object = file:readObject()
+ end
+
+ for k, v in pairs(object) do
+ self[k] = v
+ end
+end
+
+function GPU:__tostring__()
+ if self.modules[1].__tostring__ then
+ return torch.type(self) .. '(' .. self.device ..') @ ' .. self.modules[1]:__tostring__()
+ else
+ return torch.type(self) .. '(' .. self.device ..') @ ' .. torch.type(self.modules[1])
+ end
+end
+
+function GPU:accUpdateGradParameters(input, gradOutput, lr)
+ error("Not Implemented for "..torch.type(self))
+end
+
+function GPU:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ error("Not Implemented for "..torch.type(self))
+end
diff --git a/contrib/lua-torch/nn/GatedLinearUnit.lua b/contrib/lua-torch/nn/GatedLinearUnit.lua
new file mode 100644
index 000000000..5273abfd4
--- /dev/null
+++ b/contrib/lua-torch/nn/GatedLinearUnit.lua
@@ -0,0 +1,27 @@
+local GatedLinearUnit, parent = torch.class('nn.GatedLinearUnit', 'nn.Module')
+
+function GatedLinearUnit:__init(dim)
+ parent.__init(self)
+ self.dim = dim
+end
+
+function GatedLinearUnit:updateOutput(input)
+ local dim = self.dim or input:dim()
+ input.THNN.GatedLinear_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ dim
+ )
+ return self.output
+end
+
+function GatedLinearUnit:updateGradInput(input, gradOutput)
+ local dim = self.dim or input:dim()
+ input.THNN.GatedLinear_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ dim
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/GradientReversal.lua b/contrib/lua-torch/nn/GradientReversal.lua
new file mode 100644
index 000000000..c08b1dfb0
--- /dev/null
+++ b/contrib/lua-torch/nn/GradientReversal.lua
@@ -0,0 +1,32 @@
+local GradientReversal, parent = torch.class('nn.GradientReversal', 'nn.Module')
+
+GradientReversal.__version = 2
+
+function GradientReversal:__init(lambda)
+ lambda = lambda or 1
+ parent.__init(self)
+ self.lambda = lambda
+end
+
+function GradientReversal:setLambda(lambda)
+ self.lambda = lambda
+end
+
+function GradientReversal:updateOutput(input)
+ self.output:set(input)
+ return self.output
+end
+
+function GradientReversal:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(gradOutput)
+ self.gradInput:copy(gradOutput)
+ self.gradInput:mul(-self.lambda)
+ return self.gradInput
+end
+
+function GradientReversal:read(file, version)
+ parent.read(self, file)
+ if version < 2 then
+ self.lambda = 1
+ end
+end
diff --git a/contrib/lua-torch/nn/HardShrink.lua b/contrib/lua-torch/nn/HardShrink.lua
new file mode 100644
index 000000000..85ff5909c
--- /dev/null
+++ b/contrib/lua-torch/nn/HardShrink.lua
@@ -0,0 +1,25 @@
+local HardShrink, parent = torch.class('nn.HardShrink', 'nn.Module')
+
+function HardShrink:__init(lam)
+ parent.__init(self)
+ self.lambda = lam or 0.5
+end
+
+function HardShrink:updateOutput(input)
+ input.THNN.HardShrink_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.lambda
+ )
+ return self.output
+end
+
+function HardShrink:updateGradInput(input, gradOutput)
+ input.THNN.HardShrink_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.lambda
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/HardTanh.lua b/contrib/lua-torch/nn/HardTanh.lua
new file mode 100644
index 000000000..07cfc6255
--- /dev/null
+++ b/contrib/lua-torch/nn/HardTanh.lua
@@ -0,0 +1,37 @@
+local HardTanh, parent = torch.class('nn.HardTanh', 'nn.Module')
+
+function HardTanh:__init(min_value, max_value, inplace)
+ parent.__init(self)
+ self.min_val = min_value or -1
+ self.max_val = max_value or 1
+ self.inplace = inplace or false
+ if (inplace and type(inplace) ~= 'boolean') then
+ error('in-place flag must be boolean')
+ end
+ assert(self.max_val>self.min_val, 'max_value must be larger than min_value')
+end
+
+function HardTanh:updateOutput(input)
+ self.min_val = self.min_val or -1
+ self.max_val = self.max_val or 1
+ input.THNN.HardTanh_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.min_val,
+ self.max_val,
+ self.inplace or false
+ )
+ return self.output
+end
+
+function HardTanh:updateGradInput(input, gradOutput)
+ input.THNN.HardTanh_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.min_val,
+ self.max_val,
+ self.inplace or false
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/HingeEmbeddingCriterion.lua b/contrib/lua-torch/nn/HingeEmbeddingCriterion.lua
new file mode 100644
index 000000000..13ad00f19
--- /dev/null
+++ b/contrib/lua-torch/nn/HingeEmbeddingCriterion.lua
@@ -0,0 +1,43 @@
+local HingeEmbeddingCriterion, parent = torch.class('nn.HingeEmbeddingCriterion', 'nn.Criterion')
+
+function HingeEmbeddingCriterion:__init(margin)
+ parent.__init(self)
+ self.margin = margin or 1
+ self.sizeAverage = true
+end
+
+function HingeEmbeddingCriterion:updateOutput(input,y)
+ self.buffer = self.buffer or input.new()
+ if not torch.isTensor(y) then
+ self.ty = self.ty or input.new():resize(1)
+ self.ty[1]=y
+ y=self.ty
+ end
+
+ self.buffer:resizeAs(input):copy(input)
+ self.buffer[torch.eq(y, -1)] = 0
+ self.output = self.buffer:sum()
+
+ self.buffer:fill(self.margin):add(-1, input)
+ self.buffer:cmax(0)
+ self.buffer[torch.eq(y, 1)] = 0
+ self.output = self.output + self.buffer:sum()
+
+ if (self.sizeAverage == nil or self.sizeAverage == true) then
+ self.output = self.output / input:nElement()
+ end
+
+ return self.output
+end
+
+function HingeEmbeddingCriterion:updateGradInput(input, y)
+ if not torch.isTensor(y) then self.ty[1]=y; y=self.ty end
+ self.gradInput:resizeAs(input):copy(y)
+ self.gradInput[torch.cmul(torch.eq(y, -1), torch.gt(input, self.margin))] = 0
+
+ if (self.sizeAverage == nil or self.sizeAverage == true) then
+ self.gradInput:mul(1 / input:nElement())
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Identity.lua b/contrib/lua-torch/nn/Identity.lua
new file mode 100644
index 000000000..5e6ccb624
--- /dev/null
+++ b/contrib/lua-torch/nn/Identity.lua
@@ -0,0 +1,30 @@
+local Identity, _ = torch.class('nn.Identity', 'nn.Module')
+
+function Identity:updateOutput(input)
+ self.output = input
+ return self.output
+end
+
+
+function Identity:updateGradInput(input, gradOutput)
+ self.gradInput = gradOutput
+ return self.gradInput
+end
+
+function Identity:clearState()
+ -- don't call set because it might reset referenced tensors
+ local function clear(f)
+ if self[f] then
+ if torch.isTensor(self[f]) then
+ self[f] = self[f].new()
+ elseif type(self[f]) == 'table' then
+ self[f] = {}
+ else
+ self[f] = nil
+ end
+ end
+ end
+ clear('output')
+ clear('gradInput')
+ return self
+end
diff --git a/contrib/lua-torch/nn/Index.lua b/contrib/lua-torch/nn/Index.lua
new file mode 100644
index 000000000..6aa429708
--- /dev/null
+++ b/contrib/lua-torch/nn/Index.lua
@@ -0,0 +1,32 @@
+local Index, parent = torch.class('nn.Index', 'nn.Module')
+
+function Index:__init(dimension)
+ parent.__init(self)
+ self.dimension = dimension
+ self.gradInput = {self.gradInput, self.gradInput.new()}
+end
+
+function Index:updateOutput(input)
+ local t = input[1]
+ local index = input[2]
+ self.output:index(t, self.dimension, index)
+ return self.output
+end
+
+function Index:updateGradInput(input, gradOutput)
+ local t = input[1]
+ local index = input[2]
+
+ self.gradInput[2]:resize(index:size()):zero()
+ local gradInput = self.gradInput[1] -- no gradient for the index variable
+ gradInput:resizeAs(t):zero()
+ gradInput:indexAdd(self.dimension, index, gradOutput)
+ return self.gradInput
+end
+
+function Index:clearState()
+ self.gradInput[1]:set()
+ self.gradInput[2]:set()
+ self.output:set()
+ return self
+end
diff --git a/contrib/lua-torch/nn/IndexLinear.lua b/contrib/lua-torch/nn/IndexLinear.lua
new file mode 100644
index 000000000..928e5d3f2
--- /dev/null
+++ b/contrib/lua-torch/nn/IndexLinear.lua
@@ -0,0 +1,398 @@
+local ffi = require 'ffi'
+local IndexLinear, parent = torch.class('nn.IndexLinear', 'nn.Module')
+
+
+
+function IndexLinear:__init(inputSize, outputSize, doGradInput, keysOffset, weight, bias, normalize)
+ parent.__init(self)
+
+ -- We need for 3 extra parameters per feature
+ -- if we normalize:
+ -- * The max-abs value
+ -- * The inverse of the max-abs value
+ -- * The per-feature bias
+ -- We keep an extra placeholder for further per learning rate feature manipulation.
+ -- So it's 4 total.
+ self.normalize = normalize and 4 or 0
+
+ -- This is important to keep the possibility of sharing a weight
+ -- directly, without having to allocate it first.
+ -- The reason is these weights can be very large.
+ self.weight = weight or torch.Tensor(inputSize, outputSize + self.normalize):zero()
+ self.bias = bias or torch.Tensor(outputSize):zero()
+ self.inputSize = self.weight and self.weight:size(1) or inputSize
+ self.outputSize = self.weight and (self.weight:size(2)-self.normalize) or outputSize
+
+ -- gradWeight is not initialized as we're doing dense gradient accumulation
+ -- This is more efficient and avoids allocating a giant useless gradWeight
+ self.gradWeight = torch.Tensor()
+
+ -- gradBias still works the same as it's already dense
+ self.gradBias = torch.Tensor(self.outputSize):zero()
+
+ -- Buffers
+ self.gradWeightBuffer = torch.Tensor()
+ self.valuesBuffer = torch.Tensor()
+ self.normalizedValues = torch.Tensor()
+
+ -- That is used to accumulate keys and gradWeight
+ -- when doing gradients accumulations
+ self.running = {
+ cumSumSizes = {},
+ keys = {},
+ gradWeight = {},
+ counter = 1,
+ }
+
+ -- self.sizes, self.cumSumSizes are calculated on the CPU even when using CUDA.
+ -- These two tables make it easier to resize these buffers instead of re-allocating them.
+ -- self.*Cache[1] always contains values on CPU.
+ -- If CUDA is being used, self.*Cache[2] contains values on GPU.
+ self.sizesCache = {}
+ self.cumSumSizesCache = {}
+
+ -- A few options
+ self.weightDecay = 0
+ self.doGradInput = doGradInput or false
+ self.offset = keysOffset and keysOffset-1 or -1 -- if this adds self.offset to indices
+end
+
+-- Reset all the parameters needed
+-- for normalization to 0
+function IndexLinear:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(2))
+ end
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv):mul(0.000001)
+ if self.normalize and self.normalize > 0 then
+ self.weight[{{}, {1,self.normalize}}]:zero()
+ end
+end
+
+function IndexLinear:reshapeInput(input)
+ assert(type(input) == 'table')
+
+ local ninputs = 0
+ for _, v in ipairs(input) do
+ ninputs = ninputs + 1
+ end
+
+ assert(ninputs == 2 or ninputs == 3)
+
+ -- If format is:
+ -- {
+ -- torch.LongTensor(size1+size2+...+sizeN), -- concatenated batch of keys
+ -- torch.Tensor(size1+size2+...+sizeN), -- concatenated batch of values
+ -- torch.LongTensor(N), -- keys/values sizes (values are {size1, ..., sizeN})
+ -- }
+ if ninputs == 3 then
+ local fkeys = input[1]
+ local fvals = input[2]
+ local fsizes = torch.isTensor(input[3]) and input[3] or fkeys.new{input[3]}
+ assert(fkeys:nElement() == fvals:nElement(), 'Keys and values should be of same size')
+ assert(fkeys:dim() == 1, 'Keys and values should be 1D')
+ self.isFlat = true
+ self.noBatch = false
+ return fkeys, fvals, fsizes
+ end
+
+ local keys = input[1]
+ local values = input[2]
+ local lkeys, lvalues
+
+ -- If format is:
+ -- {
+ -- { torch.LongTensor(size1), torch.LongTensor(size2), ..., torch.LongTensor(sizeN) }, -- batch of keys
+ -- { torch.Tensor(size1), torch.Tensor(size2), ..., torch.Tensor(sizeN) }, -- batch of values,
+ -- }
+ if type(keys) == 'table' and type(values) == 'table' then
+ lkeys, lvalues = keys, values
+ self.isFlat = false
+ self.noBatch = false
+
+ -- If format is not a batch:
+ -- {
+ -- torch.LongTensor(size1), -- keys
+ -- torch.Tensor(size1), -- values,
+ -- }
+ elseif torch.isTensor(keys) and torch.isTensor(values) then
+ lkeys, lvalues = {keys}, {values}
+ self.isFlat = false
+ self.noBatch = true
+ else
+ error('Wrong input format.')
+ end
+
+ for i=1,#lkeys do
+ assert(lvalues[i]:dim() == 1 and lkeys[i]:dim() == 1, "keys and values should be 1D")
+ end
+
+ return lkeys, lvalues
+end
+
+function IndexLinear:longTensor(...)
+ if (self:type() == 'torch.CudaTensor') then
+ return torch.CudaLongTensor(...)
+ else
+ return torch.LongTensor(...)
+ end
+end
+
+function IndexLinear:flattenInputs(input)
+ local lkeys, lvalues, sizes = self:reshapeInput(input)
+
+ local counter = self.running.counter
+
+ -- Ensure everything is of the right type
+ local isCuda = (self:type() == 'torch.CudaTensor')
+ self.running.keys[counter] = self.running.keys[counter] or self:longTensor()
+ self.keys = self.running.keys[counter]
+
+ if self.isFlat then
+ self.values = self.values or lvalues.new()
+ self.sizes = self.sizes or self:longTensor()
+
+ self.keys:resize(lkeys:size()):copy(lkeys)
+ self.values:resize(lvalues:size()):copy(lvalues)
+ self.sizes = sizes
+ self.cumSumSizes = self.cumSumSizes or self.sizes.new()
+ self.cumSumSizes:cumsum(self.sizes)
+ else
+ self.values = self.values or lvalues[1].new()
+
+ self.lkeys = lkeys
+ self.lvalues = lvalues
+ local batchSize = #self.lkeys
+
+ self.sizesCache[1] = self.sizesCache[1] or torch.LongTensor(batchSize)
+ self.cumSumSizesCache[1] = self.cumSumSizesCache[1] or torch.LongTensor(batchSize)
+
+ self.sizes = self.sizesCache[1]
+ self.cumSumSizes = self.cumSumSizesCache[1]
+
+ self.sizes:resize(batchSize)
+ self.cumSumSizes:resize(batchSize)
+
+ for i = 1,batchSize do
+ self.sizes[i] = self.lkeys[i]:size(1)
+ end
+ self.cumSumSizes:cumsum(self.sizes)
+
+ self.keys:cat(self.lkeys, 1)
+ self.values:cat(self.lvalues, 1)
+
+ if isCuda then
+ -- Get the GPU cache
+ self.sizesCache[2] = self.sizesCache[2] or torch.CudaLongTensor()
+ self.cumSumSizesCache[2] = self.cumSumSizesCache[2] or torch.CudaLongTensor()
+
+ self.sizes = self.sizesCache[2]
+ self.cumSumSizes = self.cumSumSizesCache[2]
+
+ -- Resize and copy to GPU
+ self.sizes:resize(batchSize):copy(self.sizesCache[1])
+ self.cumSumSizes:resize(batchSize):copy(self.cumSumSizesCache[1])
+ end
+ end
+ self.running.cumSumSizes[counter] = self.cumSumSizes
+end
+
+function IndexLinear:updateOutput(input)
+
+ self:flattenInputs(input)
+
+ self.values.THNN.IndexLinear_updateOutput(
+ self.keys:cdata(),
+ self.offset,
+ self.values:cdata(),
+ self.sizes:cdata(),
+ self.cumSumSizes:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.normalizedValues:cdata(),
+ self.train and 1 or 0
+ )
+
+ if self.noBatch then
+ self.output:resize(self.output:size(2))
+ end
+ return self.output
+end
+
+function IndexLinear:accUpdateGradParameters(input, gradOutput, scale)
+ self.values.THNN.IndexLinear_accUpdateGradParameters(
+ self.keys:cdata(),
+ self.offset,
+ self.normalize > 0 and self.normalizedValues:cdata() or self.values:cdata(),
+ self.sizes:cdata(),
+ self.cumSumSizes:cdata(),
+ gradOutput:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.weightDecay or 0,
+ scale or 1
+ )
+end
+
+function IndexLinear:accGradParameters(input, gradOutput, scale)
+
+ local counter = self.running.counter
+
+ -- Same as the running.keys in the updateOutput function,
+ -- get a table of dense running.gradWeight
+ self.running.gradWeight[counter] = self.running.gradWeight[counter] or self.values.new()
+ self.values.THNN.IndexLinear_accGradParameters(
+ self.keys:cdata(),
+ self.offset,
+ self.normalize > 0 and self.normalizedValues:cdata() or self.values:cdata(),
+ self.sizes:cdata(),
+ self.cumSumSizes:cdata(),
+ gradOutput:cdata(),
+ self.running.gradWeight[counter]:cdata(),
+ self.gradBias:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.valuesBuffer:cdata(),
+ self.weightDecay or 0,
+ scale or 1
+ )
+
+ -- Increment the running counter to create a new buffer
+ -- if we don't flush them in zerogradParameters
+ self.running.counter = self.running.counter + 1
+end
+
+function IndexLinear:updateGradInput(input, gradOutput)
+ self.gradInput = {}
+ -- Revamped from nn.SparseLinear.updateGradInput
+ if self.doGradInput and self.normalize > 0 then
+ error('updateGradInput is not implemented in max-normalize mode')
+ end
+
+ local ini = self.weight:size(1)
+
+ if self.doGradInput then
+ local gi = gradOutput.new()
+ if gradOutput:dim() == 1 then
+ gi:resize(self.weight:size(1))
+ gi:mv(self.weight,gradOutput)
+ gi:resize(1, self.weight:size(1))
+ elseif gradOutput:dim() == 2 then
+ gi:resize(gradOutput:size(1), self.weight:size(1))
+ gi:mm(gradOutput, self.weight:t())
+ end
+
+ local indices = self.running.keys[1].new(ini):range(1, ini)
+
+ if self.isFlat then
+ self.gradInput[1] = torch.repeatTensor(indices, gi:size(1), 1)
+ self.gradInput[2] = gi
+ else
+ self.gradInput[1] = {}
+ self.gradInput[2] = {}
+ for i = 1,gi:size(1) do
+ self.gradInput[1][i] = self.running.keys[1].new(ini)
+ self.gradInput[1][i]:copy(indices)
+ self.gradInput[2][i] = gradOutput.new(ini)
+ self.gradInput[2][i]:copy(gi[i])
+ end
+ end
+ end
+
+ if self.noBatch then
+ if self.isFlat then
+ self.gradInput = {self.gradInput[1]:resize(ini), self.gradInput[2]:resize(ini)}
+ else
+ self.gradInput = {self.gradInput[1][1], self.gradInput[2][1]}
+ end
+ end
+ return self.gradInput
+end
+
+function IndexLinear:updateParameters(lr)
+ local counter = self.running.counter
+ if counter > 1 then
+ if counter == 2 then
+ self.updateKeys = self.running.keys[1]
+ self.gradWeight = self.running.gradWeight[1]
+ else
+ self.updateKeysBuffer = self.updateKeysBuffer or self:longTensor()
+ local lkeys = {}
+ local lgweights = {}
+ local totalSize = 0
+ local lCumSumSizes = {}
+ for i=1,counter-1 do
+ lkeys[i] = self.running.keys[i]
+ -- Change layout to take advantage of the 1-D contiguous torch.cat
+ lgweights[i] = self.running.gradWeight[i]:contiguous()
+ lgweights[i]:resize(lgweights[i]:nElement())
+ lCumSumSizes[i] = totalSize + self.running.cumSumSizes[i]
+ totalSize = totalSize + lkeys[i]:size(1)
+ end
+
+ self.updateKeysBuffer:cat(lkeys, 1)
+ self.gradWeightBuffer:cat(lgweights, 1)
+ self.cumSumSizes:cat(lCumSumSizes, 1)
+ self.gradWeightBuffer:resize(totalSize, self.outputSize)
+ self.gradWeight = self.gradWeightBuffer
+ self.updateKeys = self.updateKeysBuffer
+ end
+ self.values.THNN.IndexLinear_updateParameters(
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.updateKeys:cdata(),
+ self.cumSumSizes:cdata(),
+ self.offset,
+ self.weightDecay or 0,
+ lr or error('You must specify a learning rate')
+ )
+ end
+end
+
+function IndexLinear:zeroGradParameters()
+ -- No need to do anything here as gradWeight is dense
+ self.gradBias:zero()
+
+ -- The below piece of code would reset
+ -- the smart scaling parameters for each features
+ -- each time we call zeroGradParameters
+ -- TODO: decide what to do with that piece of code.
+ -- NB: this should be commented along with the corresponding
+ -- piece of code in lib/THNN/generic/IndexLinear.c, in the accUpdateGradParameters function.
+
+ --[[
+ local w = self.weight:select(2, 3)
+ if self.updateKeys and self.updateKeys:nElement() > 0 then
+ self.updateKeysBuffer:resizeAs(self.updateKeys):copy(self.updateKeys):add(self.offset+1)
+ w:indexFill(1, self.updateKeysBuffer, 0)
+ end
+ ]]--
+ self.running.counter = 1
+end
+
+function IndexLinear:parameters()
+ return {self.weight, self.bias}, {self.running, self.gradBias}
+end
+
+function IndexLinear:clearState()
+ self.running.keys = {}
+ self.running.gradWeight = {}
+ self.keys = nil
+ self.zerokeys = nil
+ self.updateKeys = nil
+ self.values = nil
+ self.sizes = nil
+ self.lkeys = {}
+ self.lvalues = {}
+ self.gradWeightBuffer = self.gradWeightBuffer.new()
+ self.valuesBuffer = self.valuesBuffer.new()
+ self.updateKeysBuffer = nil
+ self.values = nil
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Jacobian.lua b/contrib/lua-torch/nn/Jacobian.lua
new file mode 100644
index 000000000..4f728b18c
--- /dev/null
+++ b/contrib/lua-torch/nn/Jacobian.lua
@@ -0,0 +1,389 @@
+nn.Jacobian = {}
+
+function nn.Jacobian.backward(module, input, param, dparam)
+ local doparam = 0
+ if param then
+ doparam = 1
+ end
+ param = param or input
+ -- output deriv
+ module:forward(input)
+ local dout = module.output.new():resizeAs(module.output)
+ -- 1D view
+ local sdout = module.output.new(dout:storage(),1,dout:nElement())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+ for i=1,sdout:nElement() do
+ dout:zero()
+ sdout[i] = 1
+ module:zeroGradParameters()
+ local din = module:updateGradInput(input, dout)
+ module:accGradParameters(input, dout)
+ if doparam == 1 then
+ jacobian:select(2,i):copy(dparam)
+ else
+ jacobian:select(2,i):copy(din)
+ end
+ end
+ return jacobian
+end
+
+function nn.Jacobian.backwardUpdate(module, input, param)
+
+ -- output deriv
+ module:forward(input)
+ local dout = module.output.new():resizeAs(module.output)
+ -- 1D view
+ local sdout = module.output.new(dout:storage(),1,dout:nElement())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+ -- original param
+ local params = module:parameters()
+ local origparams = {}
+ for j=1,#params do
+ table.insert(origparams, params[j]:clone())
+ end
+
+ for i=1,sdout:nElement() do
+ for j=1,#params do
+ params[j]:copy(origparams[j])
+ end
+ dout:zero()
+ sdout[i] = 1
+ module:updateGradInput(input, dout)
+ module:accUpdateGradParameters(input, dout, 1)
+ jacobian:select(2,i):copy(param)
+ end
+
+ for j=1,#params do
+ params[j]:copy(origparams[j])
+ end
+
+ return jacobian
+end
+
+function nn.Jacobian.forward(module, input, param, perturbation)
+ param = param or input
+ -- perturbation amount
+ perturbation = perturbation or 1e-6
+ -- 1D view of input
+ --local tst = param:storage()
+ local sin = param.new(param):resize(param:nElement())--param.new(tst,1,tst:size())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+
+ local outa = torch.Tensor(jacobian:size(2))
+ local outb = torch.Tensor(jacobian:size(2))
+
+ for i=1,sin:nElement() do
+ local orig = sin[i]
+ sin[i] = orig - perturbation
+ outa:copy(module:forward(input))
+ sin[i] = orig + perturbation
+ outb:copy(module:forward(input))
+ sin[i] = orig
+
+ outb:add(-1,outa):div(2*perturbation)
+ jacobian:select(1,i):copy(outb)
+ end
+
+ return jacobian
+end
+
+function nn.Jacobian.backwardDiagHessian(module, input, diagHessianParamName)
+ -- Compute the second derivatives (diagonal Hessian elements)
+ -- by backpropagation (using the code from hessian.lua).
+ --
+ -- This function computes the diagonal Hessian elements of the following function:
+ --
+ -- F(x_1, x_2, ..., x_n) = y_1^2/2 + y_2^2/2 + ... + y_m^2/2,
+ --
+ -- where
+ -- x_1, ..., x_n are the input values and parameters of the given module,
+ -- y_1, ..., y_m are the output values of the given module.
+ --
+ -- All x_i and y_i values are scalars here. In other words,
+ -- x_1, ..., x_n denote the scalar elements of the module input tensor,
+ -- the scalar elements of module.weight,
+ -- and the scalar elements of module.bias;
+ -- y_1, ..., y_m are the scalar elements of the module output tensor.
+ --
+ -- The diagonal Hessian elements of F are computed with respect to
+ -- the module input values and parameters (x_1, .., x_n).
+ --
+ -- The function F is chosen for its convenient properties:
+ --
+ -- dF / dy_i = y_i,
+ -- d^2F / dy_i^2 = 1.
+ --
+ -- In other words, the diagonal Hessian elements of F with respect
+ -- to the module OUTPUT values (y_1, ... y_m) are equal to 1.
+ --
+ -- Because of that, computing the diagonal Hessian elements of F
+ -- with respect to the module INPUT values and PARAMETERS (x_1, ..., x_n)
+ -- can be done by calling updateDiagHessianInput() and accDiagHessianParameters()
+ -- using a tensor of ones as diagHessianOutput.
+
+ module:forward(input)
+ local diagHessianOutput = module.output.new():resizeAs(module.output):fill(1)
+
+ module.diagHessianWeight:zero()
+ module.diagHessianBias:zero()
+ module:updateDiagHessianInput(input, diagHessianOutput)
+ module:accDiagHessianParameters(input, diagHessianOutput)
+
+ return module[diagHessianParamName]
+end
+
+function nn.Jacobian.linearModuleDiagHessian(module, input, gradParamName)
+ -- Compute the second derivatives (diagonal Hessian elements)
+ -- from the first derivatives for the given module
+ -- (without using the code from hessian.lua).
+ --
+ -- The given module is assumed to be linear with respect to its inputs and weights
+ -- (like nn.Linear, nn.SpatialConvolution, etc.)
+ --
+ -- This function computes the diagonal Hessian elements of the following function:
+ --
+ -- F(x_1, x_2, ..., x_n) = y_1^2/2 + y_2^2/2 + ... + y_m^2/2.
+ --
+ -- (See the the comment for nn.Jacobian.backwardDiagHessian() for explanation.)
+ --
+ -- The first derivatives of F with respect to
+ -- the module inputs and parameters (x_1, ..., x_n) are:
+ --
+ -- dF / dx_i = \sum_k (dF / dy_k) (dy_k / dx_i).
+ --
+ -- The second derivatives are:
+ --
+ -- d^2F / dx_i = \sum_k [(d^2F / dy_k^2) (dy_k / dx_i)^2 + (dF / dy_k) (d^2y_k / dx_i^2)].
+ --
+ -- The second derivatives of F with respect to the module outputs (y_1, ..., y_m)
+ -- are equal to 1, so:
+ --
+ -- d^2F / dx_i = \sum_k [(dy_k / dx_i)^2 + (dF / dy_k) (d^2y_k / dx_i^2)].
+ --
+ -- Assuming the linearity of module outputs (y_1, ..., y_m)
+ -- with respect to module inputs and parameters (x_1, ..., x_n),
+ -- we have (d^2y_k / dx_i^2) = 0,
+ -- and the expression finally becomes:
+ --
+ -- d^2F / dx_i = \sum_k (dy_k / dx_i)^2.
+ --
+ -- The first derivatives (dy_k / dx_i) are computed by normal backpropagation,
+ -- using updateGradInput() and accGradParameters().
+
+ local gradParam = module[gradParamName]
+
+ local diagHessian = gradParam.new():resize(gradParam:nElement()):zero()
+
+ module:forward(input)
+ local gradOutput = module.output.new():resizeAs(module.output)
+ local gradOutput1D = gradOutput:view(gradOutput:nElement())
+
+ for i=1,gradOutput:nElement() do
+ gradOutput1D:zero()
+ gradOutput1D[i] = 1
+ module.gradWeight:zero()
+ if module.bias then
+ module.gradBias:zero()
+ end
+ module:updateGradInput(input, gradOutput)
+ module:accGradParameters(input, gradOutput)
+ diagHessian:addcmul(gradParam, gradParam)
+ end
+
+ return diagHessian
+end
+
+function nn.Jacobian.forwardUpdate(module, input, param, perturbation)
+ -- perturbation amount
+ perturbation = perturbation or 1e-6
+ -- 1D view of input
+ --local tst = param:storage()
+ local sin = param.new(param):resize(param:nElement())--param.new(tst,1,tst:size())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+
+ local outa = torch.Tensor(jacobian:size(2))
+ local outb = torch.Tensor(jacobian:size(2))
+
+ for i=1,sin:nElement() do
+ local orig = sin[i]
+ sin[i] = orig - perturbation
+ outa:copy(module:forward(input))
+ sin[i] = orig + perturbation
+ outb:copy(module:forward(input))
+ sin[i] = orig
+
+ outb:add(-1,outa):div(2*perturbation)
+ jacobian:select(1,i):copy(outb)
+ jacobian:select(1,i):mul(-1)
+ jacobian:select(1,i):add(sin[i])
+ end
+ return jacobian
+end
+
+function nn.Jacobian.testJacobian(module, input, minval, maxval, perturbation)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+ local jac_fprop = nn.Jacobian.forward(module, input, input, perturbation)
+ local jac_bprop = nn.Jacobian.backward(module, input)
+ local error = jac_fprop-jac_bprop
+ return error:abs():max()
+end
+
+function nn.Jacobian.testJacobianParameters(module, input, param, dparam, minval, maxval, perturbation)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+ param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+ local jac_bprop = nn.Jacobian.backward(module, input, param, dparam)
+ local jac_fprop = nn.Jacobian.forward(module, input, param, perturbation)
+ local error = jac_fprop - jac_bprop
+ return error:abs():max()
+end
+
+function nn.Jacobian.testJacobianUpdateParameters(module, input, param, minval, maxval, perturbation)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+ param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+ local params_bprop = nn.Jacobian.backwardUpdate(module, input, param)
+ local params_fprop = nn.Jacobian.forwardUpdate(module, input, param, perturbation)
+
+ local error = params_fprop - params_bprop
+ return error:abs():max()
+end
+
+function nn.Jacobian.testDiagHessian(module, input, gradParamName, diagHessianParamName, minval, maxval)
+ -- Compute the diagonal Hessian elements for the same function in two different ways,
+ -- then compare the results and return the difference.
+
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+ module:initDiagHessianParameters()
+ local h_bprop = nn.Jacobian.backwardDiagHessian(module, input, diagHessianParamName)
+ local h_linearmodule = nn.Jacobian.linearModuleDiagHessian(module, input, gradParamName)
+ local error = h_bprop - h_linearmodule
+ return error:abs():max()
+end
+
+function nn.Jacobian.testDiagHessianInput(module, input, minval, maxval)
+ return nn.Jacobian.testDiagHessian(module, input, 'gradInput', 'diagHessianInput', minval, maxval)
+end
+
+function nn.Jacobian.testDiagHessianWeight(module, input, minval, maxval)
+ return nn.Jacobian.testDiagHessian(module, input, 'gradWeight', 'diagHessianWeight', minval, maxval)
+end
+
+function nn.Jacobian.testDiagHessianBias(module, input, minval, maxval)
+ return nn.Jacobian.testDiagHessian(module, input, 'gradBias', 'diagHessianBias', minval, maxval)
+end
+
+function nn.Jacobian.testIO(module,input, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ local inputclone = input:clone()
+
+ -- run module
+ module:forward(input)
+ local go = module.output:clone():copy(torch.rand(module.output:nElement()):mul(inrange):add(minval))
+ local goclone = go:clone()
+ module:zeroGradParameters()
+ module:updateGradInput(input,go)
+ module:accGradParameters(input,go)
+
+ local fo = module.output:clone()
+ local bo = module.gradInput:clone()
+
+ -- write module
+ local filename = os.tmpname()
+ local f = torch.DiskFile(filename, 'w'):binary()
+ -- call clearState and check that it returns itself
+ assert(module == module:clearState(),'clearState did not return self')
+ f:writeObject(module)
+ f:close()
+ -- read module
+ local m = torch.DiskFile(filename):binary():readObject()
+ m:forward(inputclone)
+ m:zeroGradParameters()
+ m:updateGradInput(inputclone,goclone)
+ m:accGradParameters(inputclone,goclone)
+ -- cleanup
+ os.remove(filename)
+
+ local fo2 = m.output:clone()
+ local bo2 = m.gradInput:clone()
+
+ local errf = fo - fo2
+ local errb = bo - bo2
+ return errf:abs():max(), errb:numel() == 0 and 0 or errb:abs():max()
+end
+
+function nn.Jacobian.testAllUpdate(module, input, weight, gradWeight)
+ local gradOutput
+ local lr = torch.uniform(0.1, 1)
+ local errors = {}
+
+ -- accGradParameters
+ local maccgp = module:clone()
+ local weightc = maccgp[weight]:clone()
+ maccgp:forward(input)
+ gradOutput = torch.rand(maccgp.output:size())
+ maccgp:zeroGradParameters()
+ maccgp:updateGradInput(input, gradOutput)
+ maccgp:accGradParameters(input, gradOutput)
+ maccgp:updateParameters(lr)
+ errors["accGradParameters"] = (weightc-maccgp[gradWeight]*lr-maccgp[weight]):norm()
+
+ -- accUpdateGradParameters
+ local maccugp = module:clone()
+ maccugp:forward(input)
+ maccugp:updateGradInput(input, gradOutput)
+ maccugp:accUpdateGradParameters(input, gradOutput, lr)
+ errors["accUpdateGradParameters"] = (maccugp[weight]-maccgp[weight]):norm()
+
+ -- shared, accGradParameters
+ local macsh1 = module:clone()
+ local macsh2 = module:clone()
+ macsh2:share(macsh1, weight)
+ macsh1:forward(input)
+ macsh2:forward(input)
+ macsh1:zeroGradParameters()
+ macsh2:zeroGradParameters()
+ macsh1:updateGradInput(input, gradOutput)
+ macsh2:updateGradInput(input, gradOutput)
+ macsh1:accGradParameters(input, gradOutput)
+ macsh2:accGradParameters(input, gradOutput)
+ macsh1:updateParameters(lr)
+ macsh2:updateParameters(lr)
+ local err = (weightc-maccgp[gradWeight]*(lr*2)-macsh1[weight]):norm()
+ err = err + (weightc-maccgp[gradWeight]*(lr*2)-macsh2[weight]):norm()
+ errors["accGradParameters [shared]"] = err
+
+ -- shared, accUpdateGradParameters
+ local macshu1 = module:clone()
+ local macshu2 = module:clone()
+ macshu2:share(macshu1, weight)
+ macshu1:forward(input)
+ macshu2:forward(input)
+ macshu1:updateGradInput(input, gradOutput)
+ macshu2:updateGradInput(input, gradOutput)
+ macshu1:accUpdateGradParameters(input, gradOutput, lr)
+ macshu2:accUpdateGradParameters(input, gradOutput, lr)
+ err = (weightc-maccgp[gradWeight]*(lr*2)-macshu1[weight]):norm()
+ err = err + (weightc-maccgp[gradWeight]*(lr*2)-macshu2[weight]):norm()
+ errors["accUpdateGradParameters [shared]"] = err
+
+ return errors
+end
diff --git a/contrib/lua-torch/nn/JoinTable.lua b/contrib/lua-torch/nn/JoinTable.lua
new file mode 100644
index 000000000..6ab68e189
--- /dev/null
+++ b/contrib/lua-torch/nn/JoinTable.lua
@@ -0,0 +1,74 @@
+local JoinTable, parent = torch.class('nn.JoinTable', 'nn.Module')
+
+function JoinTable:__init(dimension, nInputDims)
+ parent.__init(self)
+ self.size = torch.LongStorage()
+ self.dimension = dimension
+ self.gradInput = {}
+ self.nInputDims = nInputDims
+end
+
+function JoinTable:_getPositiveDimension(input)
+ local dimension = self.dimension
+ if dimension < 0 then
+ dimension = input[1]:dim() + dimension + 1
+ elseif self.nInputDims and input[1]:dim()==(self.nInputDims+1) then
+ dimension = dimension + 1
+ end
+ return dimension
+end
+
+function JoinTable:updateOutput(input)
+ local dimension = self:_getPositiveDimension(input)
+
+ for i=1,#input do
+ local currentOutput = input[i]
+ if i == 1 then
+ self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+ else
+ self.size[dimension] = self.size[dimension]
+ + currentOutput:size(dimension)
+ end
+ end
+ self.output:resize(self.size)
+
+ local offset = 1
+ for i=1,#input do
+ local currentOutput = input[i]
+ self.output:narrow(dimension, offset,
+ currentOutput:size(dimension)):copy(currentOutput)
+ offset = offset + currentOutput:size(dimension)
+ end
+ return self.output
+end
+
+function JoinTable:updateGradInput(input, gradOutput)
+ local dimension = self:_getPositiveDimension(input)
+
+ for i=1,#input do
+ if self.gradInput[i] == nil then
+ self.gradInput[i] = input[i].new()
+ end
+ self.gradInput[i]:resizeAs(input[i])
+ end
+
+ -- clear out invalid gradInputs
+ for i=#input+1, #self.gradInput do
+ self.gradInput[i] = nil
+ end
+
+ local offset = 1
+ for i=1,#input do
+ local currentOutput = input[i]
+ local currentGradInput = gradOutput:narrow(dimension, offset,
+ currentOutput:size(dimension))
+ self.gradInput[i]:copy(currentGradInput)
+ offset = offset + currentOutput:size(dimension)
+ end
+ return self.gradInput
+end
+
+function JoinTable:type(type, tensorCache)
+ self.gradInput = {}
+ return parent.type(self, type, tensorCache)
+end
diff --git a/contrib/lua-torch/nn/Kmeans.lua b/contrib/lua-torch/nn/Kmeans.lua
new file mode 100644
index 000000000..56066b63d
--- /dev/null
+++ b/contrib/lua-torch/nn/Kmeans.lua
@@ -0,0 +1,215 @@
+-- Online (Hard) Kmeans layer.
+local Kmeans, parent = torch.class('nn.Kmeans', 'nn.Module')
+
+function Kmeans:__init(k, dim, scale)
+ parent.__init(self)
+ self.k = k
+ self.dim = dim
+
+ -- scale for online kmean update
+ self.scale = scale
+
+ assert(k > 0, "Clusters cannot be 0 or negative.")
+ assert(dim > 0, "Dimensionality cannot be 0 or negative.")
+
+ -- Kmeans centers -> self.weight
+ self.weight = torch.Tensor(self.k, self.dim)
+
+ self.gradWeight = torch.Tensor(self.weight:size())
+ self.loss = 0 -- within cluster error of the last forward
+
+ self.clusterSampleCount = torch.Tensor(self.k)
+
+ self:reset()
+end
+
+-- Reset
+function Kmeans:reset(stdev)
+ stdev = stdev or 1
+ self.weight:uniform(-stdev, stdev)
+end
+
+-- Initialize Kmeans weight with random samples from input.
+function Kmeans:initRandom(input)
+ local inputDim = input:nDimension()
+ assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.")
+
+ local noOfSamples = input:size(1)
+ local dim = input:size(2)
+ assert(dim == self.dim, "Dimensionality of input and weight don't match.")
+ assert(noOfSamples >= self.k, "Need atleast k samples for initialization.")
+
+ local indices = torch.zeros(self.k)
+ indices:random(1, noOfSamples)
+
+ for i=1, self.k do
+ self.weight[i]:copy(input[indices[i]])
+ end
+end
+
+-- Initialize using Kmeans++
+function Kmeans:initKmeansPlus(input, p)
+ self.p = p or self.p or 0.95
+ assert(self.p>=0 and self.p<=1, "P value should be between 0-1.")
+
+ local inputDim = input:nDimension()
+ assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.")
+ local noOfSamples = input:size(1)
+
+ local pcount = math.ceil((1-self.p)*noOfSamples)
+ if pcount <= 0 then pcount = 1 end
+
+ local initializedK = 1
+ self.weight[initializedK]:copy(input[torch.random(noOfSamples)])
+ initializedK = initializedK + 1
+
+ local clusters = self.weight.new()
+ local clusterDistances = self.weight.new()
+ local temp = self.weight.new()
+ local expandedSample = self.weight.new()
+ local distances = self.weight.new()
+ distances:resize(noOfSamples):fill(math.huge)
+ local maxScores = self.weight.new()
+ local maxIndx = self.weight.new()
+
+ for k=initializedK, self.k do
+ clusters = self.weight[{{initializedK-1, initializedK-1}}]
+ for i=1, noOfSamples do
+ temp:expand(input[{{i}}], 1, self.dim)
+ expandedSample:resize(temp:size()):copy(temp)
+
+ -- Squared Euclidean distance
+ expandedSample:add(-1, clusters)
+ clusterDistances:norm(expandedSample, 2, 2)
+ clusterDistances:pow(2)
+ distances[i] = math.min(clusterDistances:min(), distances[i])
+ end
+ maxScores, maxIndx = distances:sort(true)
+ local tempIndx = torch.random(pcount)
+ local indx = maxIndx[tempIndx]
+ self.weight[initializedK]:copy(input[indx])
+ initializedK = initializedK + 1
+ end
+end
+
+local function isCudaTensor(tensor)
+ local typename = torch.typename(tensor)
+ if typename and typename:find('torch.Cuda*Tensor') then
+ return true
+ end
+ return false
+end
+
+-- Kmeans updateOutput (forward)
+function Kmeans:updateOutput(input)
+ local inputDim = input:nDimension()
+ assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.")
+
+ local batchSize = input:size(1)
+ local dim = input:size(2)
+ assert(dim == self.dim, "Dimensionality of input and weight don't match.")
+
+ assert(input:isContiguous(), "Input is not contiguous.")
+
+ -- a sample copied k times to compute distance between sample and weight
+ self._expandedSamples = self._expandedSamples or self.weight.new()
+
+ -- distance between a sample and weight
+ self._clusterDistances = self._clusterDistances or self.weight.new()
+
+ self._temp = self._temp or input.new()
+ self._tempExpanded = self._tempExpanded or input.new()
+
+ -- Expanding inputs
+ self._temp:view(input, 1, batchSize, self.dim)
+ self._tempExpanded:expand(self._temp, self.k, batchSize, self.dim)
+ self._expandedSamples:resize(self.k, batchSize, self.dim)
+ :copy(self._tempExpanded)
+
+ -- Expanding weights
+ self._tempWeight = self._tempWeight or self.weight.new()
+ self._tempWeightExp = self._tempWeightExp or self.weight.new()
+ self._expandedWeight = self._expanedWeight or self.weight.new()
+ self._tempWeight:view(self.weight, self.k, 1, self.dim)
+ self._tempWeightExp:expand(self._tempWeight, self._expandedSamples:size())
+ self._expandedWeight:resize(self.k, batchSize, self.dim)
+ :copy(self._tempWeightExp)
+
+ -- x-c
+ self._expandedSamples:add(-1, self._expandedWeight)
+ -- Squared Euclidean distance
+ self._clusterDistances:norm(self._expandedSamples, 2, 3)
+ self._clusterDistances:pow(2)
+ self._clusterDistances:resize(self.k, batchSize)
+
+ self._minScore = self._minScore or self.weight.new()
+ self._minIndx = self._minIndx or (isCudaTensor(input) and torch.CudaLongTensor() or torch.LongTensor())
+ self._minScore:min(self._minIndx, self._clusterDistances, 1)
+ self._minIndx:resize(batchSize)
+
+ self.output:resize(batchSize):copy(self._minIndx)
+ self.loss = self._minScore:sum()
+
+ return self.output
+end
+
+-- Kmeans has its own criterion hence gradInput are zeros
+function Kmeans:updateGradInput(input, gradOuput)
+ self.gradInput:resize(input:size()):zero()
+
+ return self.gradInput
+end
+
+-- We define kmeans update rule as c -> c + scale * 1/n * sum_i (x-c).
+-- n is no. of x's belonging to c.
+-- With this update rule and gradient descent will be negative the gradWeights.
+function Kmeans:accGradParameters(input, gradOutput, scale)
+ local scale = self.scale or scale or 1
+ assert(scale > 0 , " Scale has to be positive.")
+
+ -- Update cluster sample count
+ local batchSize = input:size(1)
+ self._cscAdder = self._cscAdder or self.weight.new()
+ self._cscAdder:resize(batchSize):fill(1)
+ self.clusterSampleCount:zero()
+ self.clusterSampleCount:indexAdd(1, self._minIndx, self._cscAdder)
+
+ -- scale * (x[k]-c[k]) where k is nearest cluster to x
+ self._gradWeight = self._gradWeight or self.gradWeight.new()
+ self._gradWeight:index(self.weight, 1, self._minIndx)
+ self._gradWeight:mul(-1)
+ self._gradWeight:add(input)
+ self._gradWeight:mul(-scale)
+
+ self._gradWeight2 = self._gradWeight2 or self.gradWeight.new()
+ self._gradWeight2:resizeAs(self.gradWeight):zero()
+ self._gradWeight2:indexAdd(1, self._minIndx, self._gradWeight)
+
+ -- scale/n * sum_i (x-c)
+ self._ccounts = self._ccounts or self.clusterSampleCount.new()
+ self._ccounts:resize(self.k):copy(self.clusterSampleCount)
+ self._ccounts:add(0.0000001) -- prevent division by zero errors
+
+ self._gradWeight2:cdiv(self._ccounts:view(self.k,1):expandAs(self.gradWeight))
+
+ self.gradWeight:add(self._gradWeight2)
+end
+
+function Kmeans:clearState()
+ -- prevent premature memory allocations
+ self._expandedSamples = nil
+ self._clusterDistances = nil
+ self._temp = nil
+ self._tempExpanded = nil
+ self._tempWeight = nil
+ self._tempWeightExp = nil
+ self._expandedWeight = nil
+ self._minScore = nil
+ self._minIndx = nil
+ self._cscAdder = nil
+end
+
+function Kmeans:type(type, tensorCache)
+ self:clearState()
+ return parent.type(self, type, tensorCache)
+end
diff --git a/contrib/lua-torch/nn/L1Cost.lua b/contrib/lua-torch/nn/L1Cost.lua
new file mode 100644
index 000000000..6b58e0ec9
--- /dev/null
+++ b/contrib/lua-torch/nn/L1Cost.lua
@@ -0,0 +1,30 @@
+local THNN = require 'nn.THNN'
+local L1Cost, parent = torch.class('nn.L1Cost','nn.Criterion')
+
+function L1Cost:__init()
+ parent.__init(self)
+end
+
+function L1Cost:updateOutput(input)
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.L1Cost_updateOutput(
+ input:cdata(),
+ self.output_tensor:cdata()
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function L1Cost:updateGradInput(input)
+ input.THNN.L1Cost_updateGradInput(
+ input:cdata(),
+ THNN.NULL,
+ self.gradInput:cdata()
+ )
+ return self.gradInput
+end
+
+function L1Cost:clearState()
+ if self.output_tensor then self.output_tensor:set() end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/L1HingeEmbeddingCriterion.lua b/contrib/lua-torch/nn/L1HingeEmbeddingCriterion.lua
new file mode 100644
index 000000000..6957278f5
--- /dev/null
+++ b/contrib/lua-torch/nn/L1HingeEmbeddingCriterion.lua
@@ -0,0 +1,41 @@
+local L1HingeEmbeddingCriterion, parent = torch.class('nn.L1HingeEmbeddingCriterion', 'nn.Criterion')
+
+function L1HingeEmbeddingCriterion:__init(margin)
+ parent.__init(self)
+ margin = margin or 1
+ self.margin = margin
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+end
+
+function L1HingeEmbeddingCriterion:updateOutput(input,y)
+ self.output=input[1]:dist(input[2],1);
+ if y == -1 then
+ self.output = math.max(0,self.margin - self.output);
+ end
+ return self.output
+end
+
+
+local function mathsign(t)
+ if t>0 then return 1; end
+ if t<0 then return -1; end
+ return 2*torch.random(2)-3;
+end
+
+function L1HingeEmbeddingCriterion:updateGradInput(input, y)
+ self.gradInput[1]:resizeAs(input[1])
+ self.gradInput[2]:resizeAs(input[2])
+ self.gradInput[1]:copy(input[1])
+ self.gradInput[1]:add(-1, input[2])
+ local dist = self.gradInput[1]:norm(1);
+ self.gradInput[1]:apply(mathsign) -- L1 gradient
+ if y == -1 then -- just to avoid a mul by 1
+ if dist > self.margin then
+ self.gradInput[1]:zero()
+ else
+ self.gradInput[1]:mul(-1)
+ end
+ end
+ self.gradInput[2]:zero():add(-1, self.gradInput[1])
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/L1Penalty.lua b/contrib/lua-torch/nn/L1Penalty.lua
new file mode 100644
index 000000000..9ee6b35ff
--- /dev/null
+++ b/contrib/lua-torch/nn/L1Penalty.lua
@@ -0,0 +1,42 @@
+local L1Penalty, parent = torch.class('nn.L1Penalty','nn.Module')
+
+--This module acts as an L1 latent state regularizer, adding the
+--[gradOutput] to the gradient of the L1 loss. The [input] is copied to
+--the [output].
+
+function L1Penalty:__init(l1weight, sizeAverage, provideOutput)
+ parent.__init(self)
+ self.l1weight = l1weight
+ self.sizeAverage = sizeAverage or false
+ if provideOutput == nil then
+ self.provideOutput = true
+ else
+ self.provideOutput = provideOutput
+ end
+end
+
+function L1Penalty:updateOutput(input)
+ local m = self.l1weight
+ if self.sizeAverage == true then
+ m = m/input:nElement()
+ end
+ local loss = m*input:norm(1)
+ self.loss = loss
+ self.output = input
+ return self.output
+end
+
+function L1Penalty:updateGradInput(input, gradOutput)
+ local m = self.l1weight
+ if self.sizeAverage == true then
+ m = m/input:nElement()
+ end
+
+ self.gradInput:resizeAs(input):copy(input):sign():mul(m)
+
+ if self.provideOutput == true then
+ self.gradInput:add(gradOutput)
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/LayerNormalization.lua b/contrib/lua-torch/nn/LayerNormalization.lua
new file mode 100644
index 000000000..722d7c802
--- /dev/null
+++ b/contrib/lua-torch/nn/LayerNormalization.lua
@@ -0,0 +1,27 @@
+-- Reference: https://arxiv.org/pdf/1607.06450.pdf (Section 3)
+
+local LayerNormalization, parent = torch.class('nn.LayerNormalization', 'nn.Sequential')
+function LayerNormalization:__init(nOutput, bias, eps, affine)
+ parent.__init(self)
+ eps = eps or 1e-10
+ affine = (affine == nil) and true or affine
+ bias = bias or 0
+
+ self:add(nn.ConcatTable()
+ :add(nn.Identity())
+ :add(nn.Sequential()
+ :add(nn.Mean(1, 1))
+ :add(nn.Replicate(nOutput,1,1))))
+ :add(nn.CSubTable())
+ :add(nn.Normalize(2, eps))
+ :add(nn.MulConstant(torch.sqrt(nOutput)))
+
+ if affine then
+ local biasTransform = nn.Add(nOutput, false)
+ biasTransform.bias:fill(bias)
+ local gainTransform = nn.CMul(nOutput)
+ gainTransform.weight:fill(1.)
+ self:add(gainTransform)
+ self:add(biasTransform)
+ end
+end
diff --git a/contrib/lua-torch/nn/LeakyReLU.lua b/contrib/lua-torch/nn/LeakyReLU.lua
new file mode 100644
index 000000000..56b7f2542
--- /dev/null
+++ b/contrib/lua-torch/nn/LeakyReLU.lua
@@ -0,0 +1,41 @@
+local LeakyReLU, parent = torch.class('nn.LeakyReLU','nn.Module')
+
+function LeakyReLU:__init(negval,ip)
+ parent.__init(self)
+ if type(negval) == 'boolean' then
+ local ip = negval
+ self.negval = 1/100
+ else
+ self.negval = negval or (1/100)
+ end
+ -- default for inplace is false
+ self.inplace = ip or false
+ if self.negval < 0 then
+ self.inplace = false
+ end
+end
+
+function LeakyReLU:updateOutput(input)
+ input.THNN.LeakyReLU_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.negval,
+ self.inplace
+ )
+ return self.output
+end
+
+function LeakyReLU:updateGradInput(input, gradOutput)
+ input.THNN.LeakyReLU_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.negval,
+ self.inplace
+ )
+ return self.gradInput
+end
+
+function LeakyReLU:__tostring__()
+ return torch.type(self) .. string.format('(%g)', self.negval)
+end
diff --git a/contrib/lua-torch/nn/Linear.lua b/contrib/lua-torch/nn/Linear.lua
new file mode 100644
index 000000000..09b5979ce
--- /dev/null
+++ b/contrib/lua-torch/nn/Linear.lua
@@ -0,0 +1,122 @@
+local Linear, parent = torch.class('nn.Linear', 'nn.Module')
+
+function Linear:__init(inputSize, outputSize, bias)
+ parent.__init(self)
+ local bias = ((bias == nil) and true) or bias
+ self.weight = torch.Tensor(outputSize, inputSize)
+ self.gradWeight = torch.Tensor(outputSize, inputSize)
+ if bias then
+ self.bias = torch.Tensor(outputSize)
+ self.gradBias = torch.Tensor(outputSize)
+ end
+ self:reset()
+end
+
+function Linear:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function Linear:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(2))
+ end
+ if nn.oldSeed then
+ for i=1,self.weight:size(1) do
+ self.weight:select(1, i):apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ end
+ if self.bias then
+ for i=1,self.bias:nElement() do
+ self.bias[i] = torch.uniform(-stdv, stdv)
+ end
+ end
+ else
+ self.weight:uniform(-stdv, stdv)
+ if self.bias then self.bias:uniform(-stdv, stdv) end
+ end
+ return self
+end
+
+function Linear:updateAddBuffer(input)
+ local nframe = input:size(1)
+ self.addBuffer = self.addBuffer or input.new()
+ if self.addBuffer:nElement() ~= nframe then
+ self.addBuffer:resize(nframe):fill(1)
+ end
+end
+
+function Linear:updateOutput(input)
+ if input:dim() == 1 then
+ self.output:resize(self.weight:size(1))
+ if self.bias then self.output:copy(self.bias) else self.output:zero() end
+ self.output:addmv(1, self.weight, input)
+ elseif input:dim() == 2 then
+ local nframe = input:size(1)
+ local nElement = self.output:nElement()
+ self.output:resize(nframe, self.weight:size(1))
+ if self.output:nElement() ~= nElement then
+ self.output:zero()
+ end
+ self:updateAddBuffer(input)
+ self.output:addmm(0, self.output, 1, input, self.weight:t())
+ if self.bias then self.output:addr(1, self.addBuffer, self.bias) end
+ else
+ error('input must be vector or matrix')
+ end
+
+ return self.output
+end
+
+function Linear:updateGradInput(input, gradOutput)
+ if self.gradInput then
+
+ local nElement = self.gradInput:nElement()
+ self.gradInput:resizeAs(input)
+ if self.gradInput:nElement() ~= nElement then
+ self.gradInput:zero()
+ end
+ if input:dim() == 1 then
+ self.gradInput:addmv(0, 1, self.weight:t(), gradOutput)
+ elseif input:dim() == 2 then
+ self.gradInput:addmm(0, 1, gradOutput, self.weight)
+ end
+
+ return self.gradInput
+ end
+end
+
+function Linear:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ if input:dim() == 1 then
+ self.gradWeight:addr(scale, gradOutput, input)
+ if self.bias then self.gradBias:add(scale, gradOutput) end
+ elseif input:dim() == 2 then
+ self.gradWeight:addmm(scale, gradOutput:t(), input)
+ if self.bias then
+ -- update the size of addBuffer if the input is not the same size as the one we had in last updateGradInput
+ self:updateAddBuffer(input)
+ self.gradBias:addmv(scale, gradOutput:t(), self.addBuffer)
+ end
+ end
+end
+
+function Linear:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ -- we do not need to accumulate parameters when sharing:
+ self:defaultAccUpdateGradParameters(input, gradOutput, lr)
+end
+
+function Linear:clearState()
+ if self.addBuffer then self.addBuffer:set() end
+ return parent.clearState(self)
+end
+
+function Linear:__tostring__()
+ return torch.type(self) ..
+ string.format('(%d -> %d)', self.weight:size(2), self.weight:size(1)) ..
+ (self.bias == nil and ' without bias' or '')
+end
diff --git a/contrib/lua-torch/nn/LinearWeightNorm.lua b/contrib/lua-torch/nn/LinearWeightNorm.lua
new file mode 100755
index 000000000..a712f5535
--- /dev/null
+++ b/contrib/lua-torch/nn/LinearWeightNorm.lua
@@ -0,0 +1,168 @@
+local LinearWeightNorm, parent = torch.class('nn.LinearWeightNorm', 'nn.Linear')
+
+function LinearWeightNorm:__init(inputSize, outputSize, bias, eps)
+ nn.Module.__init(self) -- Skip nn.Linear constructor
+
+ local bias = ((bias == nil) and true) or bias
+
+ self.eps = eps or 1e-16
+
+ self.outputSize = outputSize
+ self.inputSize = inputSize
+
+ self.v = torch.Tensor(outputSize, inputSize)
+ self.gradV = torch.Tensor(outputSize, inputSize)
+
+ self.weight = torch.Tensor(outputSize, inputSize)
+
+ self.g = torch.Tensor(outputSize,1)
+ self.gradG = torch.Tensor(outputSize,1)
+
+ self.norm = torch.Tensor(outputSize,1)
+ self.scale = torch.Tensor(outputSize,1)
+
+ if bias then
+ self.bias = torch.Tensor(outputSize)
+ self.gradBias = torch.Tensor(outputSize)
+ end
+
+ self:reset()
+end
+
+function LinearWeightNorm:evaluate()
+ if self.train ~= false then
+ self:updateWeightMatrix()
+ end
+
+ parent.evaluate(self)
+end
+
+function LinearWeightNorm:initFromWeight(weight)
+ weight = weight or self.weight
+
+ self.g:norm(weight,2,2):clamp(self.eps,math.huge)
+ self.v:copy(weight)
+
+ return self
+end
+
+function LinearWeightNorm.fromLinear(linear)
+ local module = nn.LinearWeightNorm(linear.weight:size(2), linear.weight:size(1), torch.isTensor(linear.bias))
+ module.weight:copy(linear.weight)
+ module:initFromWeight()
+
+ if linear.bias then
+ module.bias:copy(linear.bias)
+ end
+
+ return module
+end
+
+function LinearWeightNorm:toLinear()
+ self:updateWeightMatrix()
+
+ local module = nn.Linear(self.inputSize, self.outputSize, torch.isTensor(self.bias))
+
+ module.weight:copy(self.weight)
+ if self.bias then
+ module.bias:copy(self.bias)
+ end
+
+ return module
+end
+
+function LinearWeightNorm:parameters()
+ if self.bias then
+ return {self.v, self.g, self.bias}, {self.gradV, self.gradG, self.gradBias}
+ else
+ return {self.v, self.g}, {self.gradV, self.gradG}
+ end
+end
+
+function LinearWeightNorm:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1 / math.sqrt(self.inputSize)
+ end
+
+ self.weight:uniform(-stdv,stdv)
+ self:initFromWeight()
+
+ if self.bias then
+ self.bias:uniform(-stdv,stdv)
+ end
+end
+
+function LinearWeightNorm:updateWeightMatrix()
+ if self.norm:dim() == 0 then self.norm:resizeAs(self.g) end
+ if self.scale:dim() == 0 then self.scale:resizeAs(self.g) end
+ if self.weight:dim() == 0 then self.weight:resizeAs(self.v) end
+
+ self.norm:norm(self.v,2,2):clamp(self.eps,math.huge)
+ self.scale:cdiv(self.g,self.norm)
+ self.weight:cmul(self.v,self.scale:expandAs(self.v))
+end
+
+function LinearWeightNorm:updateOutput(input)
+ if self.train ~= false then
+ self:updateWeightMatrix()
+ end
+
+ return parent.updateOutput(self, input)
+end
+
+function LinearWeightNorm:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ if input:dim() == 1 then
+ self.gradV:addr(scale, gradOutput, input)
+ if self.bias then self.gradBias:add(scale, gradOutput) end
+ elseif input:dim() == 2 then
+ self.gradV:addmm(scale, gradOutput:t(), input)
+ if self.bias then
+ -- update the size of addBuffer if the input is not the same size as the one we had in last updateGradInput
+ self:updateAddBuffer(input)
+ self.gradBias:addmv(scale, gradOutput:t(), self.addBuffer)
+ end
+ end
+
+ local scale = self.scale:expandAs(self.v)
+ local norm = self.norm:expandAs(self.v)
+
+ self.weight:cmul(self.gradV,self.v):cdiv(norm)
+ self.gradG:sum(self.weight,2)
+
+ self.gradV:cmul(scale)
+
+ self.weight:cmul(self.v,scale):cdiv(norm)
+ self.weight:cmul(self.gradG:expandAs(self.weight))
+
+ self.gradV:add(-1,self.weight)
+end
+
+function LinearWeightNorm:defaultAccUpdateGradParameters(input, gradOutput, lr)
+ local gradV = self.gradV
+ local gradG = self.gradG
+ local gradBias = self.gradBias
+
+ self.gradV = self.v
+ self.gradG = self.g
+ self.gradBias = self.bias
+
+ self:accGradParameters(input, gradOutput, -lr)
+
+ self.gradV = gradV
+ self.gradG = gradG
+ self.gradBias = gradBias
+end
+
+function LinearWeightNorm:clearState()
+ nn.utils.clear(self, 'weight', 'norm', 'scale')
+ return parent.clearState(self)
+end
+
+function LinearWeightNorm:__tostring__()
+ return torch.type(self) ..
+ string.format('(%d -> %d)', self.inputSize, self.outputSize) ..
+ (self.bias == nil and ' without bias' or '')
+end \ No newline at end of file
diff --git a/contrib/lua-torch/nn/Log.lua b/contrib/lua-torch/nn/Log.lua
new file mode 100644
index 000000000..e8f236bfb
--- /dev/null
+++ b/contrib/lua-torch/nn/Log.lua
@@ -0,0 +1,20 @@
+local Log, parent = torch.class('nn.Log', 'nn.Module')
+
+function Log:__init()
+ parent.__init(self)
+end
+
+function Log:updateOutput(input)
+ self.output:resizeAs(input)
+ self.output:copy(input)
+ self.output:log()
+ return self.output
+end
+
+function Log:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input)
+ self.gradInput:fill(1)
+ self.gradInput:cdiv(input)
+ self.gradInput:cmul(gradOutput)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/LogSigmoid.lua b/contrib/lua-torch/nn/LogSigmoid.lua
new file mode 100644
index 000000000..cab848f4d
--- /dev/null
+++ b/contrib/lua-torch/nn/LogSigmoid.lua
@@ -0,0 +1,27 @@
+local LogSigmoid, parent = torch.class('nn.LogSigmoid', 'nn.Module')
+
+function LogSigmoid:updateOutput(input)
+ self.buffer = self.buffer or input.new()
+ input.THNN.LogSigmoid_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.buffer:cdata()
+ )
+ return self.output
+end
+
+function LogSigmoid:updateGradInput(input, gradOutput)
+ input.THNN.LogSigmoid_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.buffer:cdata()
+ )
+ return self.gradInput
+end
+
+function LogSigmoid:clearState()
+ if self.buffer then self.buffer:set() end
+ return parent.clearState(self)
+end
+
diff --git a/contrib/lua-torch/nn/LogSoftMax.lua b/contrib/lua-torch/nn/LogSoftMax.lua
new file mode 100644
index 000000000..37c8acae4
--- /dev/null
+++ b/contrib/lua-torch/nn/LogSoftMax.lua
@@ -0,0 +1,19 @@
+local LogSoftMax = torch.class('nn.LogSoftMax', 'nn.Module')
+
+function LogSoftMax:updateOutput(input)
+ input.THNN.LogSoftMax_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function LogSoftMax:updateGradInput(input, gradOutput)
+ input.THNN.LogSoftMax_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/LookupTable.lua b/contrib/lua-torch/nn/LookupTable.lua
new file mode 100644
index 000000000..6cffc6c3e
--- /dev/null
+++ b/contrib/lua-torch/nn/LookupTable.lua
@@ -0,0 +1,166 @@
+local THNN = require 'nn.THNN'
+local LookupTable, parent = torch.class('nn.LookupTable', 'nn.Module')
+
+LookupTable.__version = 4
+
+function LookupTable:__init(nIndex, nOutput, paddingValue, maxNorm, normType)
+ parent.__init(self)
+
+ self.weight = torch.Tensor(nIndex, nOutput)
+ self.gradWeight = torch.Tensor(nIndex, nOutput):zero()
+ self.paddingValue = paddingValue or 0
+ self.maxNorm = maxNorm or nil
+ self.normType = normType or nil
+
+ self:reset()
+end
+
+function LookupTable:backCompatibility()
+ self._count = self._count or torch.IntTensor()
+ self._input = self._input or torch.LongTensor()
+
+ if not self.shouldScaleGradByFreq then
+ self.shouldScaleGradByFreq = false
+ end
+end
+
+function LookupTable:accUpdateOnly()
+ self.gradWeight = nil
+ return self
+end
+
+function LookupTable:setPadding(paddingValue)
+ self.paddingValue = paddingValue
+ return self
+end
+
+function LookupTable:setMaxNorm(maxNorm)
+ self.maxNorm = maxNorm
+ return self
+end
+
+function LookupTable:setNormType(normType)
+ self.normType = normType
+ return self
+end
+
+function LookupTable:scaleGradByFreq()
+ self.shouldScaleGradByFreq = true
+ return self
+end
+
+function LookupTable:reset(stdv)
+ stdv = stdv or 1
+ self.weight:normal(0, stdv)
+end
+
+function LookupTable:makeInputContiguous(input)
+ -- make sure input is a contiguous torch.LongTensor
+ if (not input:isContiguous()) or torch.type(input) ~= torch.type(self._input) then
+ self.copiedInput = true
+ self._input:resize(input:size()):copy(input)
+ return self._input
+ end
+ self.copiedInput = false
+ return input
+end
+
+function LookupTable:updateOutput(input)
+ self:backCompatibility()
+ self:renorm(input)
+ input = self:makeInputContiguous(input)
+ if input:dim() == 1 then
+ self.output:index(self.weight, 1, input)
+ elseif input:dim() == 2 then
+ self.output:index(self.weight, 1, input:view(-1))
+ self.output = self.output:view(input:size(1), input:size(2), self.weight:size(2))
+ else
+ error("input must be a vector or matrix")
+ end
+ return self.output
+end
+
+function LookupTable:updateGradInput(input, gradOutput)
+ -- the input can be of any type (as in the forward it's
+ -- converted anyway to LongTensor) thus, need to allocate
+ -- new memory each time the user changes the input type
+ if torch.type(self.gradInput) ~= torch.type(input) then
+ self.gradInput = input.new()
+ end
+ if not self.gradInput:isSameSizeAs(input) then
+ self.gradInput:resizeAs(input):zero()
+ end
+ return self.gradInput
+end
+
+function LookupTable:accGradParameters(input, gradOutput, scale)
+ self:backCompatibility()
+ input = self.copiedInput and self._input or input
+ if input:dim() == 2 then
+ input = input:view(-1)
+ elseif input:dim() ~= 1 then
+ error("input must be a vector or matrix")
+ end
+
+ self.gradWeight.THNN.LookupTable_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ self._count:cdata(),
+ THNN.optionalTensor(self._sorted),
+ THNN.optionalTensor(self._indices),
+ self.shouldScaleGradByFreq or false,
+ self.paddingValue or 0,
+ scale or 1
+ )
+end
+
+function LookupTable:renorm(input)
+ if not self.maxNorm then
+ return
+ end
+ -- copy input into _input, so _input is continuous.
+ -- The copied _input will be modified in the C code.
+ self._input:resize(input:size()):copy(input)
+ local row_idx = self._input
+ if row_idx:dim() == 2 then
+ row_idx = row_idx:view(-1)
+ elseif row_idx:dim() ~= 1 then
+ error("input must be a vector or matrix")
+ end
+ -- "row_idx" and "weight" will be modified in the C code
+ self.weight.THNN.LookupTable_renorm(
+ row_idx:cdata(),
+ self.weight:cdata(),
+ self.maxNorm,
+ self.normType or 2
+ )
+end
+
+function LookupTable:type(type, tensorCache)
+ parent.type(self, type, tensorCache)
+
+ if type and type:find('torch%.Cuda.*Tensor') then
+ -- CUDA uses _sorted and _indices temporary tensors
+ self._sorted = torch.CudaLongTensor and torch.CudaLongTensor.new() or torch.CudaTensor.new()
+ self._indices = torch.CudaLongTensor and torch.CudaLongTensor.new() or torch.CudaTensor.new()
+ self._count = torch.CudaLongTensor and torch.CudaLongTensor.new() or torch.CudaTensor.new()
+ self._input = torch.CudaLongTensor and torch.CudaLongTensor.new() or torch.CudaTensor.new()
+ else
+ -- self._count and self._input should only be converted if using Cuda
+ self._count = torch.IntTensor()
+ self._input = torch.LongTensor()
+ end
+
+ return self
+end
+
+function LookupTable:clearState()
+ nn.utils.clear(self, '_count', '_input')
+ return parent.clearState(self)
+end
+
+function LookupTable:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ -- we do not need to accumulate parameters when sharing:
+ self:defaultAccUpdateGradParameters(input, gradOutput, lr)
+end
diff --git a/contrib/lua-torch/nn/MM.lua b/contrib/lua-torch/nn/MM.lua
new file mode 100644
index 000000000..cc978c8cb
--- /dev/null
+++ b/contrib/lua-torch/nn/MM.lua
@@ -0,0 +1,92 @@
+--[[ Module to perform matrix multiplication on two minibatch inputs,
+ producing a minibatch.
+]]
+
+local MM, parent = torch.class('nn.MM', 'nn.Module')
+
+--[[ The constructor takes two optional arguments, specifying whether or not transpose
+ any of the input matrices before perfoming the multiplication.
+]]
+function MM:__init(transA, transB)
+ parent.__init(self)
+
+ self.transA = transA or false
+ self.transB = transB or false
+
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+end
+
+function MM:updateOutput(input)
+ assert(#input == 2, 'input must be a pair of minibatch matrices')
+ local a, b = table.unpack(input)
+ assert(a:nDimension() == 2 or a:nDimension() == 3, 'input tensors must be 2D or 3D')
+
+ if a:nDimension() == 2 then
+ assert(b:nDimension() == 2, 'second input tensor must be 2D')
+
+ if self.transA then a = a:t() end
+ if self.transB then b = b:t() end
+ assert(a:size(2) == b:size(1), 'matrix sizes do not match')
+
+ self.output:resize(a:size(1), b:size(2))
+ self.output:mm(a, b)
+ else
+ assert(b:nDimension() == 3, 'second input tensor must be 3D')
+ assert(a:size(1) == b:size(1), 'inputs must contain the same number of minibatches')
+
+ if self.transA then a = a:transpose(2, 3) end
+ if self.transB then b = b:transpose(2, 3) end
+ assert(a:size(3) == b:size(2), 'matrix sizes do not match')
+
+ self.output:resize(a:size(1), a:size(2), b:size(3))
+ self.output:bmm(a, b)
+ end
+
+ return self.output
+end
+
+function MM:updateGradInput(input, gradOutput)
+ self.gradInput[1] = self.gradInput[1] or input[1].new()
+ self.gradInput[2] = self.gradInput[2] or input[2].new()
+
+ assert(#input == 2, 'input must be a pair of tensors')
+ local a, b = table.unpack(input)
+ self.gradInput[1]:resizeAs(a)
+ self.gradInput[2]:resizeAs(b)
+
+ assert(gradOutput:nDimension() == 2 or gradOutput:nDimension() == 3, 'arguments must be a 2D or 3D Tensor')
+
+ local h_dim, w_dim, f
+ if gradOutput:nDimension() == 2 then
+ assert(a:nDimension() == 2, 'first input tensor must be 2D')
+ assert(b:nDimension() == 2, 'second input tensor must be 2D')
+
+ h_dim, w_dim = 1, 2
+ f = "mm"
+ else
+ assert(a:nDimension() == 3, 'first input tensor must be 3D')
+ assert(b:nDimension() == 3, 'second input tensor must be 3D')
+
+ h_dim, w_dim = 2, 3
+ f = "bmm"
+ end
+
+ if self.transA == self.transB then
+ a = a:transpose(h_dim, w_dim)
+ b = b:transpose(h_dim, w_dim)
+ end
+
+ if self.transA then
+ self.gradInput[1][f](self.gradInput[1], b, gradOutput:transpose(h_dim, w_dim))
+ else
+ self.gradInput[1][f](self.gradInput[1], gradOutput, b)
+ end
+
+ if self.transB then
+ self.gradInput[2][f](self.gradInput[2], gradOutput:transpose(h_dim, w_dim), a)
+ else
+ self.gradInput[2][f](self.gradInput[2], a, gradOutput)
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/MSECriterion.lua b/contrib/lua-torch/nn/MSECriterion.lua
new file mode 100644
index 000000000..d38beb6bf
--- /dev/null
+++ b/contrib/lua-torch/nn/MSECriterion.lua
@@ -0,0 +1,32 @@
+local MSECriterion, parent = torch.class('nn.MSECriterion', 'nn.Criterion')
+
+function MSECriterion:__init(sizeAverage)
+ parent.__init(self)
+ if sizeAverage ~= nil then
+ self.sizeAverage = sizeAverage
+ else
+ self.sizeAverage = true
+ end
+end
+
+function MSECriterion:updateOutput(input, target)
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.MSECriterion_updateOutput(
+ input:cdata(),
+ target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function MSECriterion:updateGradInput(input, target)
+ input.THNN.MSECriterion_updateGradInput(
+ input:cdata(),
+ target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/MV.lua b/contrib/lua-torch/nn/MV.lua
new file mode 100644
index 000000000..a00478ef6
--- /dev/null
+++ b/contrib/lua-torch/nn/MV.lua
@@ -0,0 +1,82 @@
+--[[ Module to perform matrix vector multiplication on two minibatch inputs,
+producing a minibatch.
+]]
+
+local MV, parent = torch.class('nn.MV', 'nn.Module')
+
+-- Backward compatibility
+local unpack = unpack or table.unpack
+
+function MV:__init(trans)
+ parent.__init(self)
+
+ self.trans = trans or false
+ assert(type(self.trans) == 'boolean', "argument must be a boolean, matrix transpose before multiplication")
+
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+end
+
+function MV:updateOutput(input)
+ assert(#input == 2, 'input must be a pair of minibatch matrices')
+ local M, v = unpack(input)
+ assert(M:nDimension() == 2 or M:nDimension() == 3, 'input matrix must be 2D or 3D')
+ assert(v:nDimension() == 1 or v:nDimension() == 2, 'input vector must be 1D or 2D')
+
+ if M:nDimension() == 2 then
+ assert(v:nDimension() == 1, 'vector must be 1D')
+
+ if self.trans then M = M:transpose(1,2) end
+ assert(M:size(2) == v:size(1), 'matrix row count and vector length do not match')
+
+ self.output:resize(M:size(1))
+ self.output:mv(M, v)
+ else
+ assert(v:nDimension() == 2, 'vector must be 2D (batch dimension)')
+ assert(M:size(1) == v:size(1), 'inputs must contain the same number of minibatches')
+
+ if self.trans then M = M:transpose(2,3) end
+ assert(M:size(3) == v:size(2), 'matrix row count and vector length do not match')
+
+ self.output:resize(M:size(1), M:size(2), 1)
+ self.output:bmm(M, v:view(v:size(1), v:size(2), 1)):resize(M:size(1), M:size(2))
+ end
+
+ return self.output
+end
+
+function MV:updateGradInput(input, gradOutput)
+ assert(#input == 2, 'input must be a pair of tensors')
+ local M, v = unpack(input)
+ self.gradInput[1]:resizeAs(M)
+ self.gradInput[2]:resizeAs(v)
+
+ assert(gradOutput:nDimension() == 1 or gradOutput:nDimension() == 2, 'arguments must be a 1D or 2D Tensor')
+
+ if gradOutput:nDimension() == 2 then
+ assert(M:nDimension() == 3, 'matrix must must be 3D (batched)')
+ assert(v:nDimension() == 2, 'vector must be 2D (batched)')
+ local bdim = M:size(1)
+ local odim = M:size(2)
+ local idim = M:size(3)
+
+ if self.trans then
+ self.gradInput[1]:bmm(v:view(bdim, odim, 1), gradOutput:view(bdim, 1, idim))
+ self.gradInput[2]:view(bdim, odim, 1):bmm(M, gradOutput:view(bdim, idim, 1))
+ else
+ self.gradInput[1]:bmm(gradOutput:view(bdim, odim, 1), v:view(bdim, 1, idim))
+ self.gradInput[2]:view(bdim, idim, 1):bmm(M:transpose(2,3), gradOutput:view(bdim, odim, 1))
+ end
+ else
+ assert(M:nDimension() == 2, 'matrix must be 2D')
+ assert(v:nDimension() == 1, 'vector must be 1D')
+
+ if self.trans then
+ self.gradInput[1]:ger(v, gradOutput)
+ self.gradInput[2] = M * gradOutput
+ else
+ self.gradInput[1]:ger(gradOutput, v)
+ self.gradInput[2] = M:t() * gradOutput
+ end
+ end
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/MapTable.lua b/contrib/lua-torch/nn/MapTable.lua
new file mode 100644
index 000000000..c79f1ea1d
--- /dev/null
+++ b/contrib/lua-torch/nn/MapTable.lua
@@ -0,0 +1,119 @@
+local MapTable, parent = torch.class('nn.MapTable', 'nn.Container')
+
+function MapTable:__init(module, shared)
+ parent.__init(self)
+ self.shared = (shared == nil) and true or shared
+ self.sharedparams = {'weight', 'bias', 'gradWeight', 'gradBias'}
+ self.output = {}
+ self.gradInput = {}
+ self:add(module)
+end
+
+function MapTable:_extend(n)
+ self.sharedparams = self.sharedparams or {'weight', 'bias', 'gradWeight', 'gradBias'}
+ self.modules[1] = self.module
+ for i = 2, n do
+ if not self.modules[i] then
+ if self.shared then
+ self.modules[i] = self.module:clone(table.unpack(self.sharedparams))
+ else
+ self.modules[i] = self.module:clone()
+ end
+ end
+ end
+end
+
+function MapTable:resize(n)
+ self:_extend(n)
+ for i = n + 1, #self.modules do
+ -- It's not clear why this clearState call is necessary, but it fixes
+ -- https://github.com/torch/nn/issues/1141 .
+ self.modules[i]:clearState()
+ self.modules[i] = nil
+ end
+end
+
+function MapTable:add(module)
+ assert(not self.module, 'Single module required')
+ self.module = module
+ self.modules[1] = self.module
+ return self
+end
+
+function MapTable:updateOutput(input)
+ self.output = {}
+ self:_extend(#input)
+ for i = 1, #input do
+ self.output[i] = self:rethrowErrors(self.modules[i], i, 'updateOutput', input[i])
+ end
+ return self.output
+end
+
+function MapTable:updateGradInput(input, gradOutput)
+ self.gradInput = {}
+ self:_extend(#input)
+ for i = 1, #input do
+ self.gradInput[i] = self:rethrowErrors(self.modules[i], i, 'updateGradInput', input[i], gradOutput[i])
+ end
+ return self.gradInput
+end
+
+function MapTable:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ self:_extend(#input)
+ for i = 1, #input do
+ self:rethrowErrors(self.modules[i], i, 'accGradParameters', input[i], gradOutput[i], scale)
+ end
+end
+
+function MapTable:accUpdateGradParameters(input, gradOutput, lr)
+ lr = lr or 1
+ self:_extend(#input)
+ for i = 1, #input do
+ self:rethrowErrors(self.modules[i], i, 'accUpdateGradParameters', input[i], gradOutput[i], lr)
+ end
+end
+
+function MapTable:zeroGradParameters()
+ if self.module then
+ if self.shared then
+ self.module:zeroGradParameters()
+ else
+ parent.zeroGradParameters(self)
+ end
+ end
+end
+
+function MapTable:updateParameters(learningRate)
+ if self.module then
+ if self.shared then
+ self.module:updateParameters(learningRate)
+ else
+ parent.updateParameters(self, learningRate)
+ end
+ end
+end
+
+function MapTable:clearState()
+ for i = 2, #self.modules do
+ -- It's not clear why this clearState call is necessary, but it fixes
+ -- https://github.com/torch/nn/issues/1141 .
+ self.modules[i]:clearState()
+ self.modules[i] = nil
+ end
+ parent.clearState(self)
+end
+
+function MapTable:__tostring__()
+ local tab = ' '
+ local line = '\n'
+ local extlast = ' '
+ local str = torch.type(self)
+ if self.module then
+ str = str .. ' {' .. line .. tab
+ str = str .. tostring(self.module):gsub(line, line .. tab .. extlast) .. line .. '}'
+ else
+ str = str .. ' { }'
+ end
+ return str
+end
diff --git a/contrib/lua-torch/nn/MarginCriterion.lua b/contrib/lua-torch/nn/MarginCriterion.lua
new file mode 100644
index 000000000..1ab8ad784
--- /dev/null
+++ b/contrib/lua-torch/nn/MarginCriterion.lua
@@ -0,0 +1,31 @@
+local MarginCriterion, parent = torch.class('nn.MarginCriterion', 'nn.Criterion')
+
+function MarginCriterion:__init(margin)
+ parent.__init(self)
+ self.sizeAverage = true
+ self.margin = margin or 1
+end
+
+function MarginCriterion:updateOutput(input, target)
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.MarginCriterion_updateOutput(
+ input:cdata(),
+ target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage,
+ self.margin
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function MarginCriterion:updateGradInput(input, target)
+ input.THNN.MarginCriterion_updateGradInput(
+ input:cdata(),
+ target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage,
+ self.margin
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/MarginRankingCriterion.lua b/contrib/lua-torch/nn/MarginRankingCriterion.lua
new file mode 100644
index 000000000..844d905d5
--- /dev/null
+++ b/contrib/lua-torch/nn/MarginRankingCriterion.lua
@@ -0,0 +1,75 @@
+local MarginRankingCriterion, parent = torch.class('nn.MarginRankingCriterion', 'nn.Criterion')
+
+function MarginRankingCriterion:__init(margin)
+ parent.__init(self)
+ margin=margin or 1
+ self.margin = margin
+ self.gradInput = {torch.Tensor(1), torch.Tensor(1)}
+ self.sizeAverage = true
+end
+
+function MarginRankingCriterion:updateOutput(input, y)
+ if torch.type(y) == 'number' then -- non-batch mode
+ self.output = math.max(0, -y * (input[1][1] - input[2][1]) + self.margin)
+ else
+ self._output = self._output or input[1]:clone()
+ self._output:resizeAs(input[1])
+ self._output:copy(input[1])
+
+ self._output:add(-1, input[2])
+ self._output:mul(-1):cmul(y)
+ self._output:add(self.margin)
+
+ self._output:cmax(0)
+
+ self.output = self._output:sum()
+
+ if self.sizeAverage then
+ self.output = self.output/y:size(1)
+ end
+ end
+
+ return self.output
+end
+
+function MarginRankingCriterion:updateGradInput(input, y)
+ if torch.type(y) == 'number' then -- non-batch mode
+ local dist = -y * (input[1][1] - input[2][1]) + self.margin
+ if dist < 0 then
+ self.gradInput[1][1] = 0;
+ self.gradInput[2][1] = 0;
+ else
+ self.gradInput[1][1] = -y
+ self.gradInput[2][1] = y
+ end
+ else
+ self.dist = self.dist or input[1].new()
+ self.dist = self.dist:resizeAs(input[1]):copy(input[1])
+ local dist = self.dist
+
+ dist:add(-1, input[2])
+ dist:mul(-1):cmul(y)
+ dist:add(self.margin)
+
+ self.mask = self.mask or input[1].new()
+ self.mask = self.mask:resizeAs(input[1]):copy(dist)
+ local mask = self.mask
+
+ mask:ge(dist, 0)
+
+ self.gradInput[1]:resize(dist:size())
+ self.gradInput[2]:resize(dist:size())
+
+ self.gradInput[1]:copy(mask)
+ self.gradInput[1]:mul(-1):cmul(y)
+ self.gradInput[2]:copy(mask)
+ self.gradInput[2]:cmul(y)
+
+ if self.sizeAverage then
+ self.gradInput[1]:div(y:size(1))
+ self.gradInput[2]:div(y:size(1))
+ end
+
+ end
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/MaskedSelect.lua b/contrib/lua-torch/nn/MaskedSelect.lua
new file mode 100644
index 000000000..c3f7834e1
--- /dev/null
+++ b/contrib/lua-torch/nn/MaskedSelect.lua
@@ -0,0 +1,71 @@
+local unpack = unpack or table.unpack
+
+local MaskedSelect, parent = torch.class('nn.MaskedSelect', 'nn.Module')
+
+--[[ Sets the provided mask value for the module. ]]
+function MaskedSelect:__init()
+ parent.__init(self)
+ self._maskIndices = torch.LongTensor()
+ self._maskIndexBuffer = torch.LongTensor()
+ self._maskIndexBufferCPU = torch.FloatTensor()
+ self._gradBuffer = torch.Tensor()
+ self._gradMask = torch.ByteTensor()
+end
+
+--[[ Performs maskedSelect operation. ]]
+function MaskedSelect:updateOutput(input)
+ local input, mask = unpack(input)
+ self.output:maskedSelect(input, mask)
+ return self.output
+end
+
+--[[ Reverse maps unmasked gradOutput back to gradInput. ]]
+function MaskedSelect:updateGradInput(input, gradOutput)
+ local input, mask = unpack(input)
+ if input:type() == 'torch.CudaTensor' then
+ self._maskIndexBufferCPU:range(1, mask:nElement()):resize(mask:size())
+ self._maskIndexBuffer:resize(
+ self._maskIndexBufferCPU:size()):copy(self._maskIndexBufferCPU)
+ else
+ self._maskIndexBuffer:range(1, mask:nElement()):resize(mask:size())
+ end
+ self._maskIndices:maskedSelect(self._maskIndexBuffer, mask)
+ self._gradBuffer:resize(input:nElement()):zero()
+ self._gradBuffer:scatter(1, self._maskIndices, gradOutput)
+ self._gradBuffer:resize(input:size())
+ self.gradInput = {self._gradBuffer,
+ self._gradMask:resize(mask:size()):fill(0)}
+ return self.gradInput
+end
+
+function MaskedSelect:type(type, tensorCache)
+ if not type then
+ return self._type
+ end
+ self._gradBuffer = self._gradBuffer:type(type)
+ self.gradInput = self.gradInput:type(type)
+ self.output = self.output:type(type)
+
+ -- These casts apply when switching between cuda/non-cuda types
+ if type ~= 'torch.CudaTensor' then
+ self._maskIndexBuffer = self._maskIndexBuffer:long()
+ self._maskIndices = self._maskIndices:long()
+ self._gradMask = self._gradMask:byte()
+ elseif type == 'torch.CudaTensor' then
+ self._maskIndexBuffer = self._maskIndexBuffer:cuda()
+ self._maskIndices = self._maskIndices:cuda()
+ self._gradMask = self._gradMask:cuda()
+ end
+ self._type = type
+ return self
+end
+
+function MaskedSelect:clearState()
+ return nn.utils.clear(self, {'output',
+ 'gradInput',
+ '_maskIndexBuffer',
+ '_maskIndexBufferCPU',
+ '_maskIndices',
+ '_gradBuffer',
+ '_gradMask'})
+end
diff --git a/contrib/lua-torch/nn/Max.lua b/contrib/lua-torch/nn/Max.lua
new file mode 100644
index 000000000..8273e808c
--- /dev/null
+++ b/contrib/lua-torch/nn/Max.lua
@@ -0,0 +1,66 @@
+local Max, parent = torch.class('nn.Max', 'nn.Module')
+
+function Max:__init(dimension, nInputDims)
+ parent.__init(self)
+ dimension = dimension or 1
+ self.dimension = dimension
+ -- do not assign default value to nInputDims or it will break backward compatibility
+ self.nInputDims = nInputDims
+end
+
+function Max:_getPositiveDimension(input)
+ local dimension = self.dimension
+ if dimension < 0 then
+ dimension = input:dim() + dimension + 1
+ elseif self.nInputDims and input:dim()==(self.nInputDims+1) then
+ dimension = dimension + 1
+ end
+ return dimension
+end
+
+function Max:_lazyInit()
+ self._output = self._output or self.output.new()
+ if not self._indices then
+ if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
+ self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
+ else
+ self._indices = torch.LongTensor()
+ end
+ end
+end
+
+function Max:updateOutput(input)
+ self:_lazyInit()
+ local dimension = self:_getPositiveDimension(input)
+ torch.max(self._output, self._indices, input, dimension)
+ if input:dim() > 1 then
+ self.output:set(self._output:select(dimension, 1))
+ else
+ self.output:set(self._output)
+ end
+ return self.output
+end
+
+function Max:updateGradInput(input, gradOutput)
+ self:_lazyInit()
+ local dimension = self:_getPositiveDimension(input)
+ local gradOutputView
+ if input:dim() > 1 then
+ gradOutputView = nn.utils.addSingletonDimension(gradOutput, dimension)
+ else
+ gradOutputView = gradOutput
+ end
+ self.gradInput:resizeAs(input):zero():scatter(dimension, self._indices, gradOutputView)
+ return self.gradInput
+end
+
+function Max:type(type, tensorCache)
+ self._indices = nil
+ parent.type(self, type, tensorCache)
+ return self
+end
+
+function Max:clearState()
+ nn.utils.clear(self, '_indices', '_output')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Maxout.lua b/contrib/lua-torch/nn/Maxout.lua
new file mode 100644
index 000000000..a797a9f43
--- /dev/null
+++ b/contrib/lua-torch/nn/Maxout.lua
@@ -0,0 +1,13 @@
+-- Reference: http://jmlr.org/proceedings/papers/v28/goodfellow13.pdf
+
+local Maxout, parent = torch.class('nn.Maxout', 'nn.Sequential')
+
+function Maxout:__init(inputSize, outputSize, maxoutNumber, preprocess)
+ parent.__init(self)
+ self:add(nn.Linear(inputSize, outputSize * maxoutNumber))
+ self:add(nn.View(maxoutNumber, outputSize):setNumInputDims(1))
+ if preprocess then
+ self:add(preprocess)
+ end
+ self:add(nn.Max(1, 2))
+end
diff --git a/contrib/lua-torch/nn/Mean.lua b/contrib/lua-torch/nn/Mean.lua
new file mode 100644
index 000000000..8087ac95e
--- /dev/null
+++ b/contrib/lua-torch/nn/Mean.lua
@@ -0,0 +1,14 @@
+local Mean, parent = torch.class('nn.Mean', 'nn.Sum')
+
+--[[
+
+This file is still here because of backward compatibility.
+
+Please use instead "nn.Sum(dimension, nInputDims, sizeAverage)"
+
+]]--
+
+
+function Mean:__init(dimension, nInputDims)
+ parent.__init(self, dimension, nInputDims, true)
+end
diff --git a/contrib/lua-torch/nn/Min.lua b/contrib/lua-torch/nn/Min.lua
new file mode 100644
index 000000000..3a3e4a802
--- /dev/null
+++ b/contrib/lua-torch/nn/Min.lua
@@ -0,0 +1,66 @@
+local Min, parent = torch.class('nn.Min', 'nn.Module')
+
+function Min:__init(dimension, nInputDims)
+ parent.__init(self)
+ dimension = dimension or 1
+ self.dimension = dimension
+ -- do not assign default value to nInputDims or it will break backward compatibility
+ self.nInputDims = nInputDims
+end
+
+function Min:_getPositiveDimension(input)
+ local dimension = self.dimension
+ if dimension < 0 then
+ dimension = input:dim() + dimension + 1
+ elseif self.nInputDims and input:dim()==(self.nInputDims+1) then
+ dimension = dimension + 1
+ end
+ return dimension
+end
+
+function Min:_lazyInit()
+ self._output = self._output or self.output.new()
+ if not self._indices then
+ if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
+ self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
+ else
+ self._indices = torch.LongTensor()
+ end
+ end
+end
+
+function Min:updateOutput(input)
+ self:_lazyInit()
+ local dimension = self:_getPositiveDimension(input)
+ torch.min(self._output, self._indices, input, dimension)
+ if input:dim() > 1 then
+ self.output:set(self._output:select(dimension, 1))
+ else
+ self.output:set(self._output)
+ end
+ return self.output
+end
+
+function Min:updateGradInput(input, gradOutput)
+ self:_lazyInit()
+ local dimension = self:_getPositiveDimension(input)
+ local gradOutputView
+ if input:dim() > 1 then
+ gradOutputView = nn.utils.addSingletonDimension(gradOutput, dimension)
+ else
+ gradOutputView = gradOutput
+ end
+ self.gradInput:resizeAs(input):zero():scatter(dimension, self._indices, gradOutputView)
+ return self.gradInput
+end
+
+function Min:type(type, tensorCache)
+ self._indices = nil
+ parent.type(self, type, tensorCache)
+ return self
+end
+
+function Min:clearState()
+ nn.utils.clear(self, '_indices', '_output')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/MixtureTable.lua b/contrib/lua-torch/nn/MixtureTable.lua
new file mode 100644
index 000000000..dbe19742f
--- /dev/null
+++ b/contrib/lua-torch/nn/MixtureTable.lua
@@ -0,0 +1,165 @@
+local MixtureTable, parent = torch.class('nn.MixtureTable', 'nn.Module')
+
+function MixtureTable:__init(dim)
+ parent.__init(self)
+ self.dim = dim
+ self.size = torch.LongStorage()
+ self.batchSize = 0
+ self.size2 = torch.LongStorage()
+ self.backwardSetup = false
+ self.gradInput = {}
+end
+
+function MixtureTable:updateOutput(input)
+ local gaterInput, expertInputs = table.unpack(input)
+
+ -- buffers
+ self._gaterView = self._gaterView or input[1].new()
+ self._expert = self._expert or input[1].new()
+ self._expertView = self._expertView or input[1].new()
+
+ self.dimG = 2
+ local batchSize = gaterInput:size(1)
+ if gaterInput:dim() < 2 then
+ self.dimG = 1
+ self.dim = self.dim or 1
+ batchSize = 1
+ end
+ self.dim = self.dim or 2
+
+ if self.table or torch.type(expertInputs) == 'table' then
+ -- expertInputs is a Table :
+ self.table = true
+ if gaterInput:size(self.dimG) ~= #expertInputs then
+ error"Should be one gater output per expert"
+ end
+ local expertInput = expertInputs[1]
+ self.size:resize(expertInput:dim()+1):fill(1)
+ if self.dimG > 1 then
+ self.size[1] = gaterInput:size(1)
+ end
+ self.size[self.dim] = gaterInput:size(self.dimG)
+ self.output:resizeAs(expertInput)
+ self.batchSize = batchSize
+ self._gaterView:view(gaterInput, self.size)
+ self.output:zero()
+ -- multiply accumulate gater outputs by their commensurate expert
+ for i,expertInput in ipairs(expertInputs) do
+ local gate = self._gaterView:select(self.dim,i):expandAs(expertInput)
+ self.output:addcmul(expertInput, gate)
+ end
+ else
+ -- expertInputs is a Tensor :
+ self.size:resize(expertInputs:dim()):fill(1)
+ if self.dimG > 1 then
+ self.size[1] = gaterInput:size(1)
+ end
+ self.size[self.dim] = gaterInput:size(self.dimG)
+ self.output:resizeAs(expertInputs:select(self.dim, 1))
+ self.batchSize = batchSize
+ self._gaterView:view(gaterInput, self.size)
+ self._expert:cmul(self._gaterView:expandAs(expertInputs), expertInputs)
+ self.output:sum(self._expert, self.dim)
+ self.output:resizeAs(expertInputs:select(self.dim, 1))
+ end
+
+ return self.output
+end
+
+function MixtureTable:updateGradInput(input, gradOutput)
+ local gaterInput, expertInputs = table.unpack(input)
+ nn.utils.recursiveResizeAs(self.gradInput, input)
+ local gaterGradInput, expertGradInputs = table.unpack(self.gradInput)
+
+ -- buffers
+ self._sum = self._sum or input[1].new()
+ self._expertView2 = self._expertView2 or input[1].new()
+ self._expert2 = self._expert2 or input[1].new()
+
+ if self.table then
+ for i,expertInput in ipairs(expertInputs) do
+ local expertGradInput = expertGradInputs[i] or expertInput:clone()
+ expertGradInput:resizeAs(expertInput)
+ expertGradInputs[i] = expertGradInput
+ end
+ gaterGradInput:resizeAs(gaterInput)
+
+ -- Clear invalid gradients
+ if #expertGradInputs > #expertInputs then
+ for i=#expertInputs+1, #expertGradInputs do
+ expertGradInputs[i] = nil
+ end
+ end
+
+ -- like CMulTable, but with broadcasting
+ for i,expertGradInput in ipairs(expertGradInputs) do
+ -- gater updateGradInput
+ self._expert:cmul(gradOutput, expertInputs[i])
+ if self.dimG == 1 then
+ self._expertView:view(self._expert, -1)
+ else
+ self._expertView:view(self._expert, gradOutput:size(1), -1)
+ end
+ self._sum:sum(self._expertView, self.dimG)
+ if self.dimG == 1 then
+ gaterGradInput[i] = self._sum:select(self.dimG,1)
+ else
+ gaterGradInput:select(self.dimG,i):copy(self._sum:select(self.dimG,1))
+ end
+
+ -- expert updateGradInput
+ local gate = self._gaterView:select(self.dim,i):expandAs(expertGradInput)
+ expertGradInput:cmul(gate, gradOutput)
+ end
+ else
+ self.size2:resize(expertInputs:dim())
+ self.size2:copy(expertInputs:size())
+ self.size2[self.dim] = 1
+ gaterGradInput:resizeAs(gaterInput)
+
+ -- gater updateGradInput
+ self._expertView:view(gradOutput, self.size2)
+ local gradOutput = self._expertView:expandAs(expertInputs)
+ self._expert:cmul(gradOutput, expertInputs)
+ local expert = self._expert:transpose(self.dim, self.dimG)
+ if not expert:isContiguous() then
+ self._expert2:resizeAs(expert)
+ self._expert2:copy(expert)
+ expert = self._expert2
+ end
+ if self.dimG == 1 then
+ self._expertView2:view(expert, gaterInput:size(1), -1)
+ else
+ self._expertView2:view(expert, gaterInput:size(1), gaterInput:size(2), -1)
+ end
+ gaterGradInput:sum(self._expertView2, self.dimG+1)
+ gaterGradInput:resizeAs(gaterInput)
+
+ -- expert updateGradInput
+ expertGradInputs:cmul(self._gaterView:expandAs(expertInputs), gradOutput)
+ end
+
+ return self.gradInput
+end
+
+function MixtureTable:type(type, tensorCache)
+ self._gaterView = nil
+ self._expert = nil
+ self._expertView = nil
+ self._sum = nil
+ self._expert2 = nil
+ self._expertView2 = nil
+ return parent.type(self, type, tensorCache)
+end
+
+function MixtureTable:clearState()
+ nn.utils.clear(self, {
+ '_gaterView',
+ '_expert',
+ '_expertView',
+ '_sum',
+ '_expert2',
+ '_expertView2',
+ })
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Module.lua b/contrib/lua-torch/nn/Module.lua
new file mode 100644
index 000000000..3debc5789
--- /dev/null
+++ b/contrib/lua-torch/nn/Module.lua
@@ -0,0 +1,429 @@
+local Module = torch.class('nn.Module')
+
+function Module:__init()
+ self.gradInput = torch.Tensor()
+ self.output = torch.Tensor()
+ self._type = self.output:type()
+end
+
+function Module:parameters()
+ if self.weight and self.bias then
+ return {self.weight, self.bias}, {self.gradWeight, self.gradBias}
+ elseif self.weight then
+ return {self.weight}, {self.gradWeight}
+ elseif self.bias then
+ return {self.bias}, {self.gradBias}
+ else
+ return
+ end
+end
+
+function Module:updateOutput(input)
+ return self.output
+end
+
+function Module:forward(input)
+ return self:updateOutput(input)
+end
+
+function Module:backward(input, gradOutput, scale)
+ scale = scale or 1
+ self:updateGradInput(input, gradOutput)
+ self:accGradParameters(input, gradOutput, scale)
+ return self.gradInput
+end
+
+function Module:backwardUpdate(input, gradOutput, lr)
+ self:updateGradInput(input, gradOutput)
+ self:accUpdateGradParameters(input, gradOutput, lr)
+ return self.gradInput
+end
+
+function Module:updateGradInput(input, gradOutput)
+ return self.gradInput
+end
+
+function Module:accGradParameters(input, gradOutput, scale)
+end
+
+function Module:accUpdateGradParameters(input, gradOutput, lr)
+ if self.shared then
+ self:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ else
+ self:defaultAccUpdateGradParameters(input, gradOutput, lr)
+ end
+end
+
+function Module:defaultAccUpdateGradParameters(input, gradOutput, lr)
+ local gradWeight = self.gradWeight
+ local gradBias = self.gradBias
+ self.gradWeight = self.weight
+ self.gradBias = self.bias
+ self:accGradParameters(input, gradOutput, -lr)
+ self.gradWeight = gradWeight
+ self.gradBias = gradBias
+end
+
+function Module:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ if self:parameters() then
+ self:zeroGradParameters()
+ self:accGradParameters(input, gradOutput, 1)
+ self:updateParameters(lr)
+ end
+end
+
+function Module:zeroGradParameters()
+ local _,gradParams = self:parameters()
+ if gradParams then
+ for i=1,#gradParams do
+ gradParams[i]:zero()
+ end
+ end
+end
+
+function Module:updateParameters(learningRate)
+ local params, gradParams = self:parameters()
+ if params then
+ for i=1,#params do
+ params[i]:add(-learningRate, gradParams[i])
+ end
+ end
+end
+
+function Module:training()
+ self.train = true
+end
+
+function Module:evaluate()
+ self.train = false
+end
+
+function Module:share(mlp, ...)
+ local arg = {...}
+ for i,v in ipairs(arg) do
+ if self[v] ~= nil then
+ self[v]:set(mlp[v])
+ self.shared = true
+ mlp.shared = true
+ end
+ end
+ return self
+end
+
+local function sharedWrite(...)
+ local arg = {...}
+ local shared = {}
+ for i,v in ipairs(arg) do
+ shared[v] = true
+ end
+ return function(self, file)
+ local object = {}
+ for k, v in pairs(self) do
+ if shared[k] then
+ assert(torch.isTensor(v), 'Shared parameters have to be Tensors')
+ object[k] = v.new()
+ else
+ object[k] = v
+ end
+ end
+ file:writeObject(object)
+ end
+end
+
+function Module:clone(...)
+ local oldWrite = nn.Module.write
+ nn.Module.write = sharedWrite(...)
+
+ local f = torch.MemoryFile("rw"):binary()
+ f:writeObject(self)
+ f:seek(1)
+ local clone = f:readObject()
+ f:close()
+
+ nn.Module.write = oldWrite
+
+ if select('#',...) > 0 then
+ clone:share(self,...)
+ end
+ return clone
+end
+
+function Module:type(type, tensorCache)
+ if not type then
+ return self._type
+ end
+
+ tensorCache = tensorCache or {}
+
+ -- find all tensors and convert them
+ for key,param in pairs(self) do
+ self[key] = nn.utils.recursiveType(param, type, tensorCache)
+ end
+
+ self._type = type
+ return self
+end
+
+function Module:float(...)
+ return self:type('torch.FloatTensor',...)
+end
+
+function Module:double(...)
+ return self:type('torch.DoubleTensor',...)
+end
+
+function Module:cuda(...)
+ return self:type('torch.CudaTensor',...)
+end
+
+function Module:reset()
+end
+
+function Module:write(file)
+ -- Write all values in the object as a table.
+ local object = {}
+ for k, v in pairs(self) do
+ object[k] = v
+ end
+ file:writeObject(object)
+end
+
+function Module:read(file)
+ local object = file:readObject()
+ for k, v in pairs(object) do
+ self[k] = v
+ end
+end
+
+-- This function is not easy to understand. It works as follows:
+--
+-- - gather all parameter tensors for this module (and children);
+-- count all parameter values (floats)
+-- - create one ginormous memory area (Storage object) with room for all
+-- parameters
+-- - remap each parameter tensor to point to an area within the ginormous
+-- Storage, and copy it there
+--
+-- It has the effect of making all parameters point to the same memory area,
+-- which is then returned.
+--
+-- The purpose is to allow operations over all parameters (such as momentum
+-- updates and serialization), but it assumes that all parameters are of
+-- the same type (and, in the case of CUDA, on the same device), which
+-- is not always true. Use for_each() to iterate over this module and
+-- children instead.
+--
+-- Module._flattenTensorBuffer can be used by other packages (e.g. cunn)
+-- to specify the type of temporary buffers. For example, the temporary
+-- buffers for CudaTensor could be FloatTensor, to avoid GPU memory usage.
+--
+-- TODO: This logically belongs to torch.Tensor, not nn.
+Module._flattenTensorBuffer = {}
+function Module.flatten(parameters)
+
+ -- returns true if tensor occupies a contiguous region of memory (no holes)
+ local function isCompact(tensor)
+ local sortedStride, perm = torch.sort(
+ torch.LongTensor(tensor:nDimension()):set(tensor:stride()), 1, true)
+ local sortedSize = torch.LongTensor(tensor:nDimension()):set(
+ tensor:size()):index(1, perm)
+ local nRealDim = torch.clamp(sortedStride, 0, 1):sum()
+ sortedStride = sortedStride:narrow(1, 1, nRealDim):clone()
+ sortedSize = sortedSize:narrow(1, 1, nRealDim):clone()
+ local t = tensor.new():set(tensor:storage(), 1,
+ sortedSize:storage(),
+ sortedStride:storage())
+ return t:isContiguous()
+ end
+
+ if not parameters or #parameters == 0 then
+ return torch.Tensor()
+ end
+ local Tensor = parameters[1].new
+ local TmpTensor = Module._flattenTensorBuffer[torch.type(parameters[1])] or Tensor
+
+ -- 1. construct the set of all unique storages referenced by parameter tensors
+ local storages = {}
+ local nParameters = 0
+ local parameterMeta = {}
+ for k = 1,#parameters do
+ local param = parameters[k]
+ local storage = parameters[k]:storage()
+ local storageKey = torch.pointer(storage)
+
+ if not storages[storageKey] then
+ storages[storageKey] = {storage, nParameters}
+ nParameters = nParameters + storage:size()
+ end
+
+ parameterMeta[k] = {storageOffset = param:storageOffset() +
+ storages[storageKey][2],
+ size = param:size(),
+ stride = param:stride()}
+ end
+
+ -- 2. construct a single tensor that will hold all the parameters
+ local flatParameters = TmpTensor(nParameters):zero()
+
+ -- 3. determine if there are elements in the storage that none of the
+ -- parameter tensors reference ('holes')
+ local tensorsCompact = true
+ for k = 1,#parameters do
+ local meta = parameterMeta[k]
+ local tmp = TmpTensor():set(
+ flatParameters:storage(), meta.storageOffset, meta.size, meta.stride)
+ tmp:fill(1)
+ tensorsCompact = tensorsCompact and isCompact(tmp)
+ end
+
+ local maskParameters = flatParameters:byte():clone()
+ local compactOffsets = flatParameters:long():cumsum(1)
+ local nUsedParameters = compactOffsets[-1]
+
+ -- 4. copy storages into the flattened parameter tensor
+ for _, storageAndOffset in pairs(storages) do
+ local storage, offset = table.unpack(storageAndOffset)
+ flatParameters[{{offset+1,offset+storage:size()}}]:copy(Tensor():set(storage))
+ end
+
+ -- 5. allow garbage collection
+ storages = nil
+ for k = 1,#parameters do
+ parameters[k]:set(Tensor())
+ end
+
+ -- 6. compact the flattened parameters if there were holes
+ if nUsedParameters ~= nParameters then
+ assert(tensorsCompact,
+ "Cannot gather tensors that are not compact")
+
+ flatParameters = TmpTensor(nUsedParameters):copy(
+ flatParameters:maskedSelect(maskParameters))
+ for k = 1,#parameters do
+ parameterMeta[k].storageOffset =
+ compactOffsets[parameterMeta[k].storageOffset]
+ end
+ end
+
+ if TmpTensor ~= Tensor then
+ flatParameters = Tensor(flatParameters:nElement()):copy(flatParameters)
+ end
+
+ -- 7. fix up the parameter tensors to point at the flattened parameters
+ for k = 1,#parameters do
+ parameters[k]:set(flatParameters:storage(),
+ parameterMeta[k].storageOffset,
+ parameterMeta[k].size,
+ parameterMeta[k].stride)
+ end
+
+ return flatParameters
+end
+
+function Module:getParameters()
+ -- get parameters
+ local parameters,gradParameters = self:parameters()
+ local p, g = Module.flatten(parameters), Module.flatten(gradParameters)
+ assert(p:nElement() == g:nElement(),
+ 'check that you are sharing parameters and gradParameters')
+ if parameters then
+ for i=1,#parameters do
+ assert(parameters[i]:storageOffset() == gradParameters[i]:storageOffset(),
+ 'misaligned parameter at ' .. tostring(i))
+ end
+ end
+ return p, g
+end
+
+function Module:__call__(input, gradOutput)
+ self:forward(input)
+ if gradOutput then
+ self:backward(input, gradOutput)
+ return self.output, self.gradInput
+ else
+ return self.output
+ end
+end
+
+-- Run a callback (called with the module as an argument) in preorder over this
+-- module and its children.
+--
+function Module:apply(callback)
+ callback(self)
+
+ if self.modules then
+ for _, module in ipairs(self.modules) do
+ module:apply(callback)
+ end
+ end
+end
+
+function Module:findModules(typename, container)
+ container = container or self
+ local nodes = {}
+ local containers = {}
+ local mod_type = torch.typename(self)
+ if mod_type == typename then
+ nodes[#nodes+1] = self
+ containers[#containers+1] = container
+ end
+ -- Recurse on nodes with 'modules'
+ if (self.modules ~= nil) then
+ if (torch.type(self.modules) == 'table') then
+ for i = 1, #self.modules do
+ local child = self.modules[i]
+ local cur_nodes, cur_containers =
+ child:findModules(typename, self)
+ assert(#cur_nodes == #cur_containers,
+ 'Internal error: incorrect return length') -- This shouldn't happen
+ -- add the list items from our child to our list (ie return a
+ -- flattened table of the return nodes).
+ for j = 1, #cur_nodes do
+ nodes[#nodes+1] = cur_nodes[j]
+ containers[#containers+1] = cur_containers[j]
+ end
+ end
+ end
+ end
+ return nodes, containers
+end
+
+-- returns a list of modules
+function Module:listModules()
+ local function tinsert(to, from)
+ if torch.type(from) == 'table' then
+ for i=1,#from do
+ tinsert(to,from[i])
+ end
+ else
+ table.insert(to,from)
+ end
+ end
+ -- include self first
+ local modules = {self}
+ if self.modules then
+ for i=1,#self.modules do
+ local modulas = self.modules[i]:listModules()
+ if modulas then
+ tinsert(modules,modulas)
+ end
+ end
+ end
+ return modules
+end
+
+function Module:clearState()
+ return nn.utils.clear(self, 'output', 'gradInput')
+end
+
+-- similar to apply, recursively goes over network and calls
+-- a callback function which returns a new module replacing the old one
+function nn.Module:replace(callback)
+ local out = callback(self)
+ if self.modules then
+ for i, module in ipairs(self.modules) do
+ self.modules[i] = module:replace(callback)
+ end
+ end
+ return out
+end
diff --git a/contrib/lua-torch/nn/ModuleCriterion.lua b/contrib/lua-torch/nn/ModuleCriterion.lua
new file mode 100644
index 000000000..bfc79ef55
--- /dev/null
+++ b/contrib/lua-torch/nn/ModuleCriterion.lua
@@ -0,0 +1,44 @@
+local ModuleCriterion, parent = torch.class("nn.ModuleCriterion", "nn.Criterion")
+
+function ModuleCriterion:__init(criterion, inputModule, targetModule, castTarget)
+ self.inputModule = inputModule
+ self.targetModule = targetModule
+ self.castTarget = (castTarget == nil) and true or castTarget
+ if self.inputModule then
+ local params = self.inputModule:parameters()
+ if params and #params > 0 then
+ print"Warning: nn.ModuleCriterion doesn't support parameter updates"
+ end
+ end
+ self.criterion = criterion
+end
+
+function ModuleCriterion:updateOutput(input, target)
+ if self.inputModule then
+ self.input = self.inputModule:forward(input)
+ end
+ if self.targetModule then
+ self.target = self.targetModule:forward(target)
+ end
+ self.output = self.criterion:forward(self.input or input, self.target or target)
+ return self.output
+end
+
+function ModuleCriterion:updateGradInput(input, target)
+ self.gradInput = self.criterion:backward(self.input or input, self.target or target)
+ if self.inputModule then
+ self.gradInput = self.inputModule:backward(input, self.gradInput)
+ end
+ return self.gradInput
+end
+
+function ModuleCriterion:type(type, typecache)
+ if self.inputModule then
+ self.inputModule:type(type, typecache)
+ end
+ if self.castTarget and self.targetModule then
+ self.targetModule:type(type, typecache)
+ end
+ self.criterion:type(type, typecache)
+ return parent.type(self, type, typecache)
+end
diff --git a/contrib/lua-torch/nn/Mul.lua b/contrib/lua-torch/nn/Mul.lua
new file mode 100644
index 000000000..efa1db656
--- /dev/null
+++ b/contrib/lua-torch/nn/Mul.lua
@@ -0,0 +1,38 @@
+local Mul, parent = torch.class('nn.Mul', 'nn.Module')
+
+function Mul:__init()
+ parent.__init(self)
+
+ self.weight = torch.Tensor(1)
+ self.gradWeight = torch.Tensor(1)
+
+ self:reset()
+end
+
+
+function Mul:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(1))
+ end
+
+ self.weight:uniform(-stdv, stdv);
+end
+
+function Mul:updateOutput(input)
+ self.output:resizeAs(input):copy(input);
+ self.output:mul(self.weight[1]);
+ return self.output
+end
+
+function Mul:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input):zero()
+ self.gradInput:add(self.weight[1], gradOutput)
+ return self.gradInput
+end
+
+function Mul:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ self.gradWeight[1] = self.gradWeight[1] + scale*input:dot(gradOutput);
+end
diff --git a/contrib/lua-torch/nn/MulConstant.lua b/contrib/lua-torch/nn/MulConstant.lua
new file mode 100644
index 000000000..e8c473bee
--- /dev/null
+++ b/contrib/lua-torch/nn/MulConstant.lua
@@ -0,0 +1,41 @@
+local MulConstant, parent = torch.class('nn.MulConstant', 'nn.Module')
+
+function MulConstant:__init(constant_scalar,ip)
+ parent.__init(self)
+ assert(type(constant_scalar) == 'number', 'input is not scalar!')
+ self.constant_scalar = constant_scalar
+
+ -- default for inplace is false
+ self.inplace = ip or false
+ if (ip and type(ip) ~= 'boolean') then
+ error('in-place flag must be boolean')
+ end
+end
+
+function MulConstant:updateOutput(input)
+ if self.inplace then
+ input:mul(self.constant_scalar)
+ self.output:set(input)
+ else
+ self.output:resizeAs(input)
+ self.output:copy(input)
+ self.output:mul(self.constant_scalar)
+ end
+ return self.output
+end
+
+function MulConstant:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ if self.inplace then
+ gradOutput:mul(self.constant_scalar)
+ self.gradInput:set(gradOutput)
+ -- restore previous input value
+ input:div(self.constant_scalar)
+ else
+ self.gradInput:resizeAs(gradOutput)
+ self.gradInput:copy(gradOutput)
+ self.gradInput:mul(self.constant_scalar)
+ end
+ return self.gradInput
+ end
+end
diff --git a/contrib/lua-torch/nn/MultiCriterion.lua b/contrib/lua-torch/nn/MultiCriterion.lua
new file mode 100644
index 000000000..959317711
--- /dev/null
+++ b/contrib/lua-torch/nn/MultiCriterion.lua
@@ -0,0 +1,40 @@
+local MultiCriterion, parent = torch.class('nn.MultiCriterion', 'nn.Criterion')
+
+function MultiCriterion:__init()
+ parent.__init(self)
+ self.criterions = {}
+ self.weights = torch.DoubleStorage()
+end
+
+function MultiCriterion:add(criterion, weight)
+ assert(criterion, 'no criterion provided')
+ weight = weight or 1
+ table.insert(self.criterions, criterion)
+ self.weights:resize(#self.criterions, true)
+ self.weights[#self.criterions] = weight
+ return self
+end
+
+function MultiCriterion:updateOutput(input, target)
+ self.output = 0
+ for i=1,#self.criterions do
+ self.output = self.output + self.weights[i]*self.criterions[i]:updateOutput(input, target)
+ end
+ return self.output
+end
+
+function MultiCriterion:updateGradInput(input, target)
+ self.gradInput = nn.utils.recursiveResizeAs(self.gradInput, input)
+ nn.utils.recursiveFill(self.gradInput, 0)
+ for i=1,#self.criterions do
+ nn.utils.recursiveAdd(self.gradInput, self.weights[i], self.criterions[i]:updateGradInput(input, target))
+ end
+ return self.gradInput
+end
+
+function MultiCriterion:type(type)
+ for i,criterion in ipairs(self.criterions) do
+ criterion:type(type)
+ end
+ return parent.type(self, type)
+end
diff --git a/contrib/lua-torch/nn/MultiLabelMarginCriterion.lua b/contrib/lua-torch/nn/MultiLabelMarginCriterion.lua
new file mode 100644
index 000000000..908b6133c
--- /dev/null
+++ b/contrib/lua-torch/nn/MultiLabelMarginCriterion.lua
@@ -0,0 +1,41 @@
+local MultiLabelMarginCriterion, parent = torch.class('nn.MultiLabelMarginCriterion', 'nn.Criterion')
+
+function MultiLabelMarginCriterion:__init()
+ parent.__init(self)
+ self.sizeAverage = true
+ self.isTarget = torch.Tensor()
+end
+
+function MultiLabelMarginCriterion:updateOutput(input, target)
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ target = torch.CudaLongTensor and target:cudaLong() or target
+ else
+ target = target:long()
+ end
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.MultiLabelMarginCriterion_updateOutput(
+ input:cdata(),
+ target:cdata(),
+ self.output_tensor:cdata(),
+ self.isTarget:cdata(),
+ self.sizeAverage
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function MultiLabelMarginCriterion:updateGradInput(input, target)
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ target = torch.CudaLongTensor and target:cudaLong() or target
+ else
+ target = target:long()
+ end
+ input.THNN.MultiLabelMarginCriterion_updateGradInput(
+ input:cdata(),
+ target:cdata(),
+ self.gradInput:cdata(),
+ self.isTarget:cdata(),
+ self.sizeAverage
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/MultiLabelSoftMarginCriterion.lua b/contrib/lua-torch/nn/MultiLabelSoftMarginCriterion.lua
new file mode 100644
index 000000000..9d471d449
--- /dev/null
+++ b/contrib/lua-torch/nn/MultiLabelSoftMarginCriterion.lua
@@ -0,0 +1,86 @@
+--[[
+-- A MultiLabel multiclass criterion based on sigmoid:
+--
+-- the loss is:
+-- l(x,y) = - sum_i y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i])
+-- where p[i] = exp(x[i]) / (1 + exp(x[i]))
+--
+-- and with weights:
+-- l(x,y) = - sum_i weights[i] (y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i]))
+--
+-- This uses the stable form of the loss and gradients.
+--]]
+
+
+local MultiLabelSoftMarginCriterion, parent = torch.class('nn.MultiLabelSoftMarginCriterion', 'nn.Criterion')
+
+
+function MultiLabelSoftMarginCriterion:__init(weights, sizeAverage)
+ parent.__init(self)
+ if sizeAverage ~= nil then
+ self.sizeAverage = sizeAverage
+ else
+ self.sizeAverage = true
+ end
+ if weights ~= nil then
+ assert(weights:dim() == 1, "weights input should be 1-D Tensor")
+ self.weights = weights
+ end
+ self.sigmoid = nn.Sigmoid()
+end
+
+function MultiLabelSoftMarginCriterion:updateOutput(input, target)
+ local weights = self.weights
+ if weights ~= nil and target:dim() ~= 1 then
+ weights = self.weights:view(1, target:size(2)):expandAs(target)
+ end
+
+ local x = input:view(input:nElement())
+ local t = target:view(target:nElement())
+
+ self.sigmoid:updateOutput(x)
+
+ self._buffer1 = self._buffer1 or input.new()
+ self._buffer2 = self._buffer2 or input.new()
+
+ self._buffer1:ge(x, 0) -- indicator
+
+ -- log(1 + exp(x - cmul(x, indicator):mul(2)))
+ self._buffer2:cmul(x, self._buffer1):mul(-2):add(x):exp():add(1):log()
+ -- cmul(x, t - indicator)
+ self._buffer1:mul(-1):add(t):cmul(x)
+ -- log(1 + exp(x - cmul(x, indicator):mul(2))) - cmul(x, t - indicator)
+ self._buffer2:add(-1, self._buffer1)
+
+ if weights ~= nil then
+ self._buffer2:cmul(weights)
+ end
+
+ self.output = self._buffer2:sum()
+
+ if self.sizeAverage then
+ self.output = self.output / input:nElement()
+ end
+
+ return self.output
+end
+
+function MultiLabelSoftMarginCriterion:updateGradInput(input, target)
+ local weights = self.weights
+ if weights ~= nil and target:dim() ~= 1 then
+ weights = self.weights:view(1, target:size(2)):expandAs(target)
+ end
+
+ self.gradInput:resizeAs(input):copy(self.sigmoid.output)
+ self.gradInput:add(-1, target)
+
+ if weights ~= nil then
+ self.gradInput:cmul(weights)
+ end
+
+ if self.sizeAverage then
+ self.gradInput:div(target:nElement())
+ end
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/MultiMarginCriterion.lua b/contrib/lua-torch/nn/MultiMarginCriterion.lua
new file mode 100644
index 000000000..e3122386a
--- /dev/null
+++ b/contrib/lua-torch/nn/MultiMarginCriterion.lua
@@ -0,0 +1,64 @@
+local THNN = require 'nn.THNN'
+local MultiMarginCriterion, parent = torch.class('nn.MultiMarginCriterion', 'nn.Criterion')
+
+function MultiMarginCriterion:__init(p, weights, margin)
+ assert(p == nil or p == 1 or p == 2, 'only p=1 and p=2 supported')
+ self.p = p or 1
+ self.margin = margin or 1.0
+ parent.__init(self)
+ self.sizeAverage = true
+ if weights then
+ assert(weights:dim() == 1, "weights input should be 1-D Tensor")
+ self.weights = weights
+ end
+end
+
+function MultiMarginCriterion:updateOutput(input, target)
+ -- backward compatibility
+ if not torch.isTensor(target) then
+ self.target_tensor = self.target_tensor or torch.LongTensor(1)
+ self.target_tensor[1] = target
+ target = self.target_tensor
+ end
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ target = torch.CudaLongTensor and target:cudaLong() or target
+ else
+ target = target:long()
+ end
+ self.p = self.p or 1
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.MultiMarginCriterion_updateOutput(
+ input:cdata(),
+ target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage,
+ self.p,
+ THNN.optionalTensor(self.weights),
+ self.margin
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function MultiMarginCriterion:updateGradInput(input, target)
+ if not torch.isTensor(target) then
+ self.target_tensor = self.target_tensor or torch.LongTensor(1)
+ self.target_tensor[1] = target
+ target = self.target_tensor
+ end
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ target = torch.CudaLongTensor and target:cudaLong() or target
+ else
+ target = target:long()
+ end
+ input.THNN.MultiMarginCriterion_updateGradInput(
+ input:cdata(),
+ target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage,
+ self.p,
+ THNN.optionalTensor(self.weights),
+ self.margin
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/NaN.lua b/contrib/lua-torch/nn/NaN.lua
new file mode 100644
index 000000000..b80f6a04d
--- /dev/null
+++ b/contrib/lua-torch/nn/NaN.lua
@@ -0,0 +1,72 @@
+------------------------------------------------------------------------
+--[[ NaN ]]--
+-- Asserts that outputs and gradInputs do not contain NaNs.
+-- Useful for locating the source of NaN errors.
+------------------------------------------------------------------------
+local NaN, parent = torch.class("nn.NaN", "nn.Decorator")
+
+local idseq = 0
+function NaN.newId()
+ idseq = idseq + 1
+ return idseq
+end
+
+function NaN:__init(module, id)
+ parent.__init(self, module)
+ self.id = id or NaN.newId()
+end
+
+function NaN:recursiveIsNaN(tensor)
+ local isNaN = false
+ if torch.type(tensor) == 'table' then
+ for k,v in pairs(tensor) do
+ isNaN = self:recursiveIsNaN(v)
+ if isNaN then break end
+ end
+ else
+ local _ = require 'moses'
+ isNaN = _.isNaN(tensor:sum())
+ end
+ return isNaN
+end
+
+function NaN:updateOutput(input)
+ self.output = self.modules[1]:updateOutput(input)
+ if self:recursiveIsNaN(self.output) then
+ if self:recursiveIsNaN(input) then
+ error(string.format("NaN found in input of module :\n%s", self:__tostring__()))
+ elseif self:recursiveIsNaN(self:parameters()) then
+ error(string.format("NaN found in parameters of module :\n%s", self:__tostring__()))
+ end
+ error(string.format("NaN found in output of module :\n%s", self:__tostring__()))
+ end
+ return self.output
+end
+
+function NaN:updateGradInput(input, gradOutput)
+ self.gradInput = self.modules[1]:updateGradInput(input, gradOutput)
+ if self:recursiveIsNaN(self.gradInput) then
+ if self:recursiveIsNaN(gradOutput) then
+ error(string.format("NaN found in gradOutput of module :\n%s", self:__tostring__()))
+ end
+ error(string.format("NaN found in gradInput of module :\n%s", self:__tostring__()))
+ end
+ return self.gradInput
+end
+
+function NaN:accGradParameters(input, gradOutput, scale)
+ self.modules[1]:accGradParameters(input, gradOutput, scale)
+ local params, gradParams = self:parameters()
+ if self:recursiveIsNaN(gradParams) then
+ error(string.format("NaN found in gradParameters of module :\n%s", self:__tostring__()))
+ end
+end
+
+function NaN:__tostring__()
+ local selfstring = torch.type(self) .. '(' .. self.id .. ')'
+ if self.modules[1].__tostring__ then
+ return selfstring .. ' @ ' .. self.modules[1]:__tostring__()
+ else
+ return selfstring .. ' @ ' .. torch.type(self.modules[1])
+ end
+end
diff --git a/contrib/lua-torch/nn/Narrow.lua b/contrib/lua-torch/nn/Narrow.lua
new file mode 100644
index 000000000..a6ebaa321
--- /dev/null
+++ b/contrib/lua-torch/nn/Narrow.lua
@@ -0,0 +1,45 @@
+local Narrow, parent = torch.class('nn.Narrow', 'nn.Module')
+
+function Narrow:__init(dimension,offset,length)
+ parent.__init(self)
+ self.dimension=dimension
+ self.index=offset
+ self.length=length or 1
+ if not dimension or not offset then
+ error('nn.Narrow(dimension, offset, length)')
+ end
+end
+
+function Narrow:updateOutput(input)
+ local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension
+ local length = self.length
+ if length < 0 then
+ length = input:size(dim) - self.index + self.length + 2
+ end
+ local index = self.index
+ if self.index < 0 then
+ index = 1
+ length = input:size(dim) - length
+ end
+ local output=input:narrow(dim, index, length)
+ self.output = self.output:typeAs(output)
+ self.output:resizeAs(output):copy(output)
+ return self.output
+end
+
+function Narrow:updateGradInput(input, gradOutput)
+ local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension
+ local length = self.length
+ if length < 0 then
+ length = input:size(dim) - self.index + self.length + 2
+ end
+ local index = self.index
+ if self.index < 0 then
+ index = 1
+ length = input:size(dim) - length
+ end
+ self.gradInput = self.gradInput:typeAs(input)
+ self.gradInput:resizeAs(input):zero()
+ self.gradInput:narrow(dim,index,length):copy(gradOutput)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/NarrowTable.lua b/contrib/lua-torch/nn/NarrowTable.lua
new file mode 100644
index 000000000..17429f3b1
--- /dev/null
+++ b/contrib/lua-torch/nn/NarrowTable.lua
@@ -0,0 +1,43 @@
+local NarrowTable, parent = torch.class('nn.NarrowTable', 'nn.Module')
+
+function NarrowTable:__init(offset, length)
+ parent.__init(self)
+ self.offset = offset
+ self.length = length or 1
+ if not offset then
+ error('nn.NarrowTable(offset, length)')
+ end
+
+ self.output = {}
+ self.gradInput = {}
+end
+
+function NarrowTable:updateOutput(input)
+ for k,v in ipairs(self.output) do self.output[k] = nil end
+ for i=1,self.length do
+ self.output[i] = input[self.offset+i-1]
+ end
+ return self.output
+end
+
+function NarrowTable:updateGradInput(input, gradOutput)
+ for i=1,#gradOutput do
+ self.gradInput[self.offset+i-1] = gradOutput[i]
+ end
+ for i=1,#input do
+ if (i < self.offset) or (i >= self.offset + self.length) then
+ self.gradInput[i] = nn.utils.recursiveResizeAs(self.gradInput[i], input[i])
+ nn.utils.recursiveFill(self.gradInput[i], 0)
+ end
+ end
+ for i=#input+1,#self.gradInput do self.gradInput[i] = nil end
+ return self.gradInput
+end
+
+function NarrowTable:type(type, tensorCache)
+ self.output = {}
+ self.gradInput = {}
+ return parent.type(self, type, tensorCache)
+end
+
+NarrowTable.clearState = nn.Identity.clearState
diff --git a/contrib/lua-torch/nn/Normalize.lua b/contrib/lua-torch/nn/Normalize.lua
new file mode 100644
index 000000000..0937ebba9
--- /dev/null
+++ b/contrib/lua-torch/nn/Normalize.lua
@@ -0,0 +1,150 @@
+local Normalize, parent = torch.class('nn.Normalize', 'nn.Module')
+
+function Normalize:__init(p,eps)
+ parent.__init(self)
+ assert(p,'p-norm not provided')
+ assert(p > 0, p..'-norm not supported')
+ self.p = p
+ self.eps = eps or 1e-10
+end
+
+function Normalize:updateOutput(input)
+ assert(input:dim() <= 2, 'only 1d layer supported')
+ local input_size = input:size()
+ if input:dim() == 1 then
+ input = input:view(1,-1)
+ end
+
+ self._output = self._output or input.new()
+ self.norm = self.norm or input.new()
+ self.buffer = self.buffer or input.new()
+
+ self._output:resizeAs(input)
+
+ if self.p == math.huge then
+ -- specialization for the infinity norm
+ if not self._indices then
+ if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
+ self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
+ else
+ self._indices = torch.LongTensor()
+ end
+ end
+
+ self.buffer:abs(input)
+ torch.max(self.norm, self._indices, self.buffer, 2)
+ self.norm:add(self.eps)
+ else
+ self.normp = self.normp or input.new()
+ if self.p % 2 ~= 0 then
+ self.buffer:abs(input):pow(self.p)
+ else
+ self.buffer:pow(input,self.p)
+ end
+ self.normp:sum(self.buffer,2):add(self.eps)
+ self.norm:pow(self.normp,1/self.p)
+ end
+ self._output:cdiv(input, self.norm:view(-1,1):expandAs(input))
+
+ self.output:view(self._output, input_size)
+ return self.output
+end
+
+function Normalize:updateGradInput(input, gradOutput)
+ assert(input:dim() <= 2, 'only 1d layer supported')
+ assert(gradOutput:dim() <= 2, 'only 1d layer supported')
+
+ local input_size = input:size()
+ if input:dim() == 1 then
+ input = input:view(1,-1)
+ end
+
+ local n = input:size(1) -- batch size
+ local d = input:size(2) -- dimensionality of vectors
+
+ self._gradInput = self._gradInput or input.new()
+ self.cross = self.cross or input.new()
+ -- compute diagonal term with gradOutput
+ self._gradInput:resize(n,d)
+ if self.p == math.huge then
+ -- specialization for the inf case
+ self._gradInput:cmul(self.norm:view(n,1,1):expand(n,d,1),gradOutput)
+ self.buffer:resizeAs(input):zero()
+ self.cross:resize(n,1)
+ self.cross:gather(input,2,self._indices)
+ self.cross:cdiv(self.norm)
+ self.buffer:scatter(2,self._indices,self.cross)
+ else
+ self._gradInput:cmul(self.normp:view(n,1):expand(n,d), gradOutput)
+ -- small optimizations for different p
+ -- buffer = input*|input|^(p-2)
+ if self.p % 2 ~= 0 then
+ -- for non-even p, need to add absolute value
+ if self.p < 2 then
+ -- add eps to avoid possible division by 0
+ self.buffer:abs(input):add(self.eps):pow(self.p-2):cmul(input)
+ else
+ self.buffer:abs(input):pow(self.p-2):cmul(input)
+ end
+ elseif self.p == 2 then
+ -- special case for p == 2, pow(x,0) = 1
+ self.buffer:copy(input)
+ else
+ -- p is even and > 2, pow(x,p) is always positive
+ self.buffer:pow(input,self.p-2):cmul(input)
+ end
+ end
+ -- compute cross term in two steps
+ self.cross:resize(n,1)
+
+ -- instead of having a huge temporary matrix (b1*b2),
+ -- do the computations as b1*(b2*gradOutput). This avoids redundant
+ -- computation and also a huge buffer of size n*d^2
+ self.buffer2 = self.buffer2 or input.new() -- nxd
+ self.buffer2:cmul(input, gradOutput)
+ self.cross:sum(self.buffer2, 2)
+
+ self.buffer:cmul(self.cross:expandAs(self.buffer))
+ self._gradInput:add(-1, self.buffer)
+
+ -- reuse cross buffer for normalization
+ if self.p == math.huge then
+ self.cross:cmul(self.norm,self.norm)
+ else
+ self.cross:cmul(self.normp,self.norm)
+ end
+ self._gradInput:cdiv(self.cross:expand(n,d))
+
+ self.gradInput:view(self._gradInput, input_size)
+ return self.gradInput
+end
+
+function Normalize:__tostring__()
+ local s
+ -- different prints if the norm is integer
+ if self.p % 1 == 0 then
+ s = '%s(%d)'
+ else
+ s = '%s(%f)'
+ end
+ return string.format(s,torch.type(self),self.p)
+end
+
+function Normalize:type(type, tensorCache)
+ self._indices = nil
+ parent.type(self, type, tensorCache)
+ return self
+end
+
+function Normalize:clearState()
+ nn.utils.clear(self, {
+ '_output',
+ '_indices',
+ '_gradInput',
+ 'buffer',
+ 'norm',
+ 'normp',
+ 'cross',
+ })
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/OneHot.lua b/contrib/lua-torch/nn/OneHot.lua
new file mode 100644
index 000000000..d1dc1b52d
--- /dev/null
+++ b/contrib/lua-torch/nn/OneHot.lua
@@ -0,0 +1,69 @@
+local OneHot, parent = torch.class('nn.OneHot', 'nn.Module')
+
+-- adapted from https://github.com/karpathy/char-rnn
+-- and https://github.com/hughperkins/char-lstm
+
+function OneHot:__init(outputSize)
+ parent.__init(self)
+ self.outputSize = outputSize
+end
+
+function OneHot:updateOutput(input)
+ local size
+ if type(input) == 'number' then
+ if self:type() == 'torch.CudaTensor' then
+ self._single = self._single or torch.CudaTensor():resize(1);
+ else
+ self._single = self._single or torch.LongTensor():resize(1);
+ end
+ self._single[1] = input
+ input = self._single;
+ size = {}
+ else
+ size = input:size():totable()
+ end
+ table.insert(size, self.outputSize)
+
+ self.output:resize(table.unpack(size)):zero()
+
+ size[#size] = 1
+ local input_ = input:view(table.unpack(size))
+
+ if torch.type(input) == 'torch.CudaTensor' or torch.type(input) == 'torch.ClTensor' then
+ self.output:scatter(self.output:dim(), input_, 1)
+ else
+ if torch.type(self.output) == 'torch.CudaTensor' then
+ -- input is not cuda, module is, cast input to cuda
+ self._input = self._input or torch.CudaTensor()
+ self._input:resize(input_:size()):copy(input_)
+ input_ = self._input
+ elseif torch.type(input) ~= 'torch.LongTensor' then
+ -- input is not long, module isnot cuda, cast input to long
+ self._input = self._input or torch.LongTensor()
+ self._input:resize(input_:size()):copy(input_)
+ input_ = self._input
+ end
+ self.output:scatter(self.output:dim(), input_, 1)
+ end
+
+ return self.output
+end
+
+function OneHot:updateGradInput(input, gradOutput)
+ if type(input) == 'number' then
+ return 0
+ else
+ self.gradInput:resize(input:size()):zero()
+ return self.gradInput
+ end
+end
+
+function OneHot:clearState()
+ self._single = nil
+ self._input = nil
+end
+
+function OneHot:type(type, typecache)
+ self:clearState()
+ return parent.type(self, type, typecache)
+end
diff --git a/contrib/lua-torch/nn/PReLU.lua b/contrib/lua-torch/nn/PReLU.lua
new file mode 100644
index 000000000..2e58fba4e
--- /dev/null
+++ b/contrib/lua-torch/nn/PReLU.lua
@@ -0,0 +1,52 @@
+local PReLU, parent = torch.class('nn.PReLU','nn.Module')
+
+function PReLU:__init(nOutputPlane)
+ parent.__init(self)
+ -- if no argument provided, use shared model (weight is scalar)
+ self.nOutputPlane = nOutputPlane or 0
+ self.weight = torch.Tensor(nOutputPlane or 1):fill(0.25)
+ self.gradWeight = torch.Tensor(nOutputPlane or 1)
+end
+
+function PReLU:updateOutput(input)
+ input.THNN.PReLU_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.nOutputPlane
+ )
+ return self.output
+end
+
+function PReLU:updateGradInput(input, gradOutput)
+ input.THNN.PReLU_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.nOutputPlane
+ )
+ return self.gradInput
+end
+
+function PReLU:accGradParameters(input, gradOutput, scale)
+ self.gradWeightBuf = self.gradWeightBuf or input.new()
+ self.gradWeightBuf2 = self.gradWeightBuf2 or input.new()
+ input.THNN.PReLU_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.gradWeight:cdata(),
+ self.gradWeightBuf:cdata(),
+ self.gradWeightBuf2:cdata(),
+ self.nOutputPlane,
+ scale or 1
+ )
+ return self.gradWeight
+end
+
+function PReLU:clearState()
+ nn.utils.clear(self, 'gradWeightBuf', 'gradWeightBuf2')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Padding.lua b/contrib/lua-torch/nn/Padding.lua
new file mode 100644
index 000000000..d5f7771d0
--- /dev/null
+++ b/contrib/lua-torch/nn/Padding.lua
@@ -0,0 +1,65 @@
+local Padding, parent = torch.class('nn.Padding', 'nn.Module')
+
+-- pad puts in [pad] amount of [value] over dimension [dim], starting at index [index] in that dimension. If pad<0, index counts from the left. If pad>0 index counts from the right
+-- index = 1 pads before index 1. index = 2 pads starting before index 2 and after index 1 in dimension [dim]
+function Padding:__init(dim, pad, nInputDim, value, index)
+ self.value = value or 0
+ self.index = index or 1
+ self.dim = dim
+ self.pad = pad
+ self.nInputDim = nInputDim
+ self.outputSize = torch.LongStorage()
+ parent.__init(self)
+end
+
+function Padding:updateOutput(input)
+ self.outputSize:resize(input:dim())
+ self.outputSize:copy(input:size())
+ local dim = self.dim
+ if self.nInputDim and input:dim() ~= self.nInputDim then
+ dim = dim + 1
+ end
+ self.outputSize[dim] = self.outputSize[dim] + math.abs(self.pad)
+ self.output:resize(self.outputSize)
+ self.output:fill(self.value)
+ local index = self.index
+ local pad = self.pad
+ if pad > 0 then
+ index = input:size(dim) - index + 2
+ else
+ pad = -pad
+ end
+ if index == 1 then
+ self.output:narrow(dim, 1 + pad, input:size(dim)):copy(input)
+ elseif index == input:size(dim) + 1 then
+ self.output:narrow(dim, 1, input:size(dim)):copy(input)
+ else
+ self.output:narrow(dim, 1, index - 1):copy(input:narrow(dim, 1, index - 1))
+ self.output:narrow(dim, index + pad, input:size(dim) - (index - 1)):copy(input:narrow(dim, index, input:size(dim) - (index - 1)))
+ end
+ return self.output
+end
+
+function Padding:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input)
+ local dim = self.dim
+ if self.nInputDim and input:dim() ~= self.nInputDim then
+ dim = dim + 1
+ end
+ local index = self.index
+ local pad = self.pad
+ if pad > 0 then
+ index = input:size(dim) - index + 2
+ else
+ pad = -pad
+ end
+ if index == 1 then
+ self.gradInput:copy(gradOutput:narrow(dim, 1 + pad, input:size(dim)))
+ elseif index == input:size(dim) + 1 then
+ self.gradInput:copy(gradOutput:narrow(dim, 1, input:size(dim)))
+ else
+ self.gradInput:narrow(dim, 1, index - 1):copy(gradOutput:narrow(dim, 1, index - 1))
+ self.gradInput:narrow(dim, index, input:size(dim) - (index - 1)):copy(gradOutput:narrow(dim, index + pad, input:size(dim) - (index - 1)))
+ end
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/PairwiseDistance.lua b/contrib/lua-torch/nn/PairwiseDistance.lua
new file mode 100644
index 000000000..99a502c16
--- /dev/null
+++ b/contrib/lua-torch/nn/PairwiseDistance.lua
@@ -0,0 +1,91 @@
+local PairwiseDistance, parent = torch.class('nn.PairwiseDistance', 'nn.Module')
+
+function PairwiseDistance:__init(p)
+ parent.__init(self)
+
+ -- state
+ self.gradInput = {}
+ self.diff = torch.Tensor()
+ self.norm = p or 2 -- Default using Euclidean distance
+end
+
+function PairwiseDistance:updateOutput(input)
+ self.output:resize(1)
+ if input[1]:dim() == 1 then
+ self.output:resize(1)
+ self.output[1]=input[1]:dist(input[2],self.norm)
+ elseif input[1]:dim() == 2 then
+ self.diff = self.diff or input[1].new()
+ self.diff:resizeAs(input[1])
+
+ local diff = self.diff:zero()
+ diff:add(input[1], -1, input[2])
+ diff:abs()
+
+ self.output:resize(input[1]:size(1))
+ self.output:zero()
+ self.output:add(diff:pow(self.norm):sum(2))
+ self.output:pow(1./self.norm)
+ else
+ error('input must be vector or matrix')
+ end
+
+ return self.output
+end
+
+local function mathsign(x)
+ if x==0 then return 2*torch.random(2)-3; end
+ if x>0 then return 1; else return -1; end
+end
+
+function PairwiseDistance:updateGradInput(input, gradOutput)
+ if input[1]:dim() > 2 then
+ error('input must be vector or matrix')
+ end
+
+ self.gradInput[1] = (self.gradInput[1] or input[1].new()):resize(input[1]:size())
+ self.gradInput[2] = (self.gradInput[2] or input[2].new()):resize(input[2]:size())
+ self.gradInput[1]:copy(input[1])
+ self.gradInput[1]:add(-1, input[2])
+
+ if self.norm==1 then
+ self.gradInput[1]:apply(mathsign)
+ else
+ -- Note: derivative of p-norm:
+ -- d/dx_k(||x||_p) = (x_k * abs(x_k)^(p-2)) / (||x||_p)^(p-1)
+ if (self.norm > 2) then
+ self.gradInput[1]:cmul(self.gradInput[1]:clone():abs():pow(self.norm-2))
+ end
+
+ if (input[1]:dim() > 1) then
+ self.outExpand = self.outExpand or self.output.new()
+ self.outExpand:resize(self.output:size(1), 1)
+ self.outExpand:copy(self.output)
+ self.outExpand:add(1.0e-6) -- Prevent divide by zero errors
+ self.outExpand:pow(-(self.norm-1))
+ self.gradInput[1]:cmul(self.outExpand:expand(self.gradInput[1]:size(1),
+ self.gradInput[1]:size(2)))
+ else
+ self.gradInput[1]:mul(math.pow(self.output[1] + 1e-6, -(self.norm-1)))
+ end
+ end
+ if input[1]:dim() == 1 then
+ self.gradInput[1]:mul(gradOutput[1])
+ else
+ self.grad = self.grad or gradOutput.new()
+ self.ones = self.ones or gradOutput.new()
+
+ self.grad:resizeAs(input[1]):zero()
+ self.ones:resize(input[1]:size(2)):fill(1)
+
+ self.grad:addr(gradOutput, self.ones)
+ self.gradInput[1]:cmul(self.grad)
+ end
+ self.gradInput[2]:zero():add(-1, self.gradInput[1])
+ return self.gradInput
+end
+
+function PairwiseDistance:clearState()
+ nn.utils.clear(self, 'diff', 'outExpand', 'grad', 'ones')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Parallel.lua b/contrib/lua-torch/nn/Parallel.lua
new file mode 100644
index 000000000..58cb9748e
--- /dev/null
+++ b/contrib/lua-torch/nn/Parallel.lua
@@ -0,0 +1,116 @@
+local Parallel, parent = torch.class('nn.Parallel', 'nn.Container')
+
+function Parallel:__init(inputDimension,outputDimension)
+ parent.__init(self)
+ self.modules = {}
+ self.inputDimension = inputDimension
+ self.outputDimension = outputDimension
+end
+
+function Parallel:updateOutput(input)
+ local nModule=input:size(self.inputDimension)
+ local outputs = {}
+ self.totalOutputSize = self.totalOutputSize or torch.LongStorage()
+ local totalOutputSize = self.totalOutputSize
+
+ for i=1,nModule do
+ local currentInput = input:select(self.inputDimension,i)
+ local currentOutput = self:rethrowErrors(self.modules[i], i, 'updateOutput', currentInput)
+ table.insert(outputs, currentOutput)
+ local outputSize = currentOutput:size(self.outputDimension)
+
+ if i == 1 then
+ totalOutputSize:resize(currentOutput:dim()):copy(currentOutput:size())
+ else
+ totalOutputSize[self.outputDimension] = totalOutputSize[self.outputDimension] + outputSize
+ end
+
+ end
+ self.output:resize(totalOutputSize)
+
+ local offset = 1
+ for i=1,nModule do
+ local currentOutput = outputs[i]
+ local outputSize = currentOutput:size(self.outputDimension)
+ self.output:narrow(self.outputDimension, offset, outputSize):copy(currentOutput)
+ offset = offset + currentOutput:size(self.outputDimension)
+ end
+ return self.output
+end
+
+function Parallel:updateGradInput(input, gradOutput)
+ local nModule=input:size(self.inputDimension)
+ self.gradInput:resizeAs(input)
+
+ local offset = 1
+ for i=1,nModule do
+ local module=self.modules[i]
+ local currentInput = input:select(self.inputDimension,i)
+ local currentOutput = module.output
+ local outputSize = currentOutput:size(self.outputDimension)
+ local currentGradOutput = gradOutput:narrow(self.outputDimension, offset, outputSize)
+
+ local currentGradInput = self:rethrowErrors(module, i, 'updateGradInput', currentInput, currentGradOutput)
+
+ self.gradInput:select(self.inputDimension,i):copy(currentGradInput)
+ offset = offset + outputSize
+ end
+ return self.gradInput
+end
+
+function Parallel:accGradParameters(input, gradOutput, scale)
+ local nModule=input:size(self.inputDimension)
+
+ local offset = 1
+ for i=1,nModule do
+ local module = self.modules[i]
+ local currentOutput = module.output
+ local outputSize = currentOutput:size(self.outputDimension)
+
+ self:rethrowErrors(module, i, 'accGradParameters',
+ input:select(self.inputDimension,i),
+ gradOutput:narrow(self.outputDimension, offset,outputSize),
+ scale)
+
+ offset = offset + outputSize
+ end
+end
+
+function Parallel:accUpdateGradParameters(input, gradOutput, lr)
+ local nModule=input:size(self.inputDimension)
+
+ local offset = 1
+ for i=1,nModule do
+ local module = self.modules[i];
+ local currentOutput = module.output
+ self:rethrowErrors(module, i, 'accUpdateGradParameters',
+ input:select(self.inputDimension,i),
+ gradOutput:narrow(self.outputDimension, offset,
+ currentOutput:size(self.outputDimension)),
+ lr)
+
+ offset = offset + currentOutput:size(self.outputDimension)
+ end
+end
+
+function Parallel:__tostring__()
+ local tab = ' '
+ local line = '\n'
+ local next = ' |`-> '
+ local lastNext = ' `-> '
+ local ext = ' | '
+ local extlast = ' '
+ local last = ' ... -> '
+ local str = torch.type(self)
+ str = str .. ' {' .. line .. tab .. 'input'
+ for i=1,#self.modules do
+ if i == #self.modules then
+ str = str .. line .. tab .. lastNext .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. extlast)
+ else
+ str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. ext)
+ end
+ end
+ str = str .. line .. tab .. last .. 'output'
+ str = str .. line .. '}'
+ return str
+end
diff --git a/contrib/lua-torch/nn/ParallelCriterion.lua b/contrib/lua-torch/nn/ParallelCriterion.lua
new file mode 100644
index 000000000..45607d5c3
--- /dev/null
+++ b/contrib/lua-torch/nn/ParallelCriterion.lua
@@ -0,0 +1,41 @@
+local ParallelCriterion, parent = torch.class('nn.ParallelCriterion', 'nn.Criterion')
+
+function ParallelCriterion:__init(repeatTarget)
+ parent.__init(self)
+ self.criterions = {}
+ self.weights = {}
+ self.gradInput = {}
+ self.repeatTarget = repeatTarget
+end
+
+function ParallelCriterion:add(criterion, weight)
+ assert(criterion, 'no criterion provided')
+ weight = weight or 1
+ table.insert(self.criterions, criterion)
+ table.insert(self.weights, weight)
+ return self
+end
+
+function ParallelCriterion:updateOutput(input, target)
+ self.output = 0
+ for i,criterion in ipairs(self.criterions) do
+ local target = self.repeatTarget and target or target[i]
+ self.output = self.output + self.weights[i]*criterion:updateOutput(input[i],target)
+ end
+ return self.output
+end
+
+function ParallelCriterion:updateGradInput(input, target)
+ self.gradInput = nn.utils.recursiveResizeAs(self.gradInput, input)
+ nn.utils.recursiveFill(self.gradInput, 0)
+ for i,criterion in ipairs(self.criterions) do
+ local target = self.repeatTarget and target or target[i]
+ nn.utils.recursiveAdd(self.gradInput[i], self.weights[i], criterion:updateGradInput(input[i], target))
+ end
+ return self.gradInput
+end
+
+function ParallelCriterion:type(type, tensorCache)
+ self.gradInput = {}
+ return parent.type(self, type, tensorCache)
+end
diff --git a/contrib/lua-torch/nn/ParallelTable.lua b/contrib/lua-torch/nn/ParallelTable.lua
new file mode 100644
index 000000000..2fe0899dd
--- /dev/null
+++ b/contrib/lua-torch/nn/ParallelTable.lua
@@ -0,0 +1,58 @@
+local ParallelTable, parent = torch.class('nn.ParallelTable', 'nn.Container')
+
+function ParallelTable:__init()
+ parent.__init(self)
+ self.modules = {}
+ self.output = {}
+ self.gradInput = {}
+end
+
+function ParallelTable:updateOutput(input)
+ for i=1,#self.modules do
+ self.output[i] = self:rethrowErrors(self.modules[i], i, 'updateOutput', input[i])
+ end
+ return self.output
+end
+
+function ParallelTable:updateGradInput(input, gradOutput)
+ for i,module in ipairs(self.modules) do
+ self.gradInput[i] = self:rethrowErrors(module, i, 'updateGradInput', input[i], gradOutput[i])
+ end
+ return self.gradInput
+end
+
+function ParallelTable:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ for i,module in ipairs(self.modules) do
+ self:rethrowErrors(module, i, 'accGradParameters', input[i], gradOutput[i], scale)
+ end
+end
+
+function ParallelTable:accUpdateGradParameters(input, gradOutput, lr)
+ lr = lr or 1
+ for i,module in ipairs(self.modules) do
+ self:rethrowErrors(module, i, 'accUpdateGradParameters', input[i], gradOutput[i], lr)
+ end
+end
+
+function ParallelTable:__tostring__()
+ local tab = ' '
+ local line = '\n'
+ local next = ' |`-> '
+ local lastNext = ' `-> '
+ local ext = ' | '
+ local extlast = ' '
+ local last = ' ... -> '
+ local str = torch.type(self)
+ str = str .. ' {' .. line .. tab .. 'input'
+ for i=1,#self.modules do
+ if i == #self.modules then
+ str = str .. line .. tab .. lastNext .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. extlast)
+ else
+ str = str .. line .. tab .. next .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab .. ext)
+ end
+ end
+ str = str .. line .. tab .. last .. 'output'
+ str = str .. line .. '}'
+ return str
+end
diff --git a/contrib/lua-torch/nn/PartialLinear.lua b/contrib/lua-torch/nn/PartialLinear.lua
new file mode 100644
index 000000000..6e92cfc08
--- /dev/null
+++ b/contrib/lua-torch/nn/PartialLinear.lua
@@ -0,0 +1,114 @@
+local PartialLinear, Module = torch.class('nn.PartialLinear', 'nn.Module')
+
+--[[
+
+PartialLinear is a Linear layer that allows the user to a set a collection of
+column indices. When the column indices are set, the layer will behave like a
+Linear layer that only has those columns. Meanwhile, all parameters are
+preserved, so resetting the PartialLinear layer will result in a module that
+behaves just like a regular Linear layer.
+
+This module is useful, for instance, when you want to do forward-backward on
+only a subset of a Linear layer during training but use the full Linear layer
+at test time.
+
+]]--
+
+function PartialLinear:__init(inputsize, outputsize, bias)
+ local bias = ((bias == nil) and true) or bias
+ Module.__init(self)
+
+ -- define the layer as a small network:
+ local pt = nn.ParallelTable()
+ pt:add(nn.Identity()):add(nn.LookupTable(outputsize, inputsize))
+ self.network = nn.Sequential():add(pt):add(nn.MM(false, true))
+ if bias then
+ self.bias = torch.Tensor(1, outputsize):zero()
+ self.gradBias = torch.Tensor(1, outputsize):zero()
+ end
+
+ -- set partition:
+ self.inputsize = inputsize
+ self.outputsize = outputsize
+ self.allcolumns = torch.range(1, self.outputsize)
+ self:resetPartition()
+end
+
+function PartialLinear:setPartition(indices)
+ self.partition = indices:type(self.allcolumns:type())
+end
+
+function PartialLinear:resetPartition()
+ self.partition = self.allcolumns
+end
+
+function PartialLinear:parameters()
+ return {self.network:get(1):get(2).weight, self.bias},
+ {self.network:get(1):get(2).gradWeight, self.gradBias}
+end -- should return only the relevant partition?
+
+function PartialLinear:updateOutput(input)
+ self.output:set(self.network:forward{input, self.partition})
+ if self.bias then
+ self.output:add(
+ self.bias:index(2, self.partition:long()):expandAs(self.output)
+ )
+ self.addBuffer = self.addBuffer or input.new()
+ if self.addBuffer:nElement() ~= input:size(1) then
+ self.addBuffer:resize(input:size(1)):fill(1)
+ end
+ end
+ return self.output
+end
+
+function PartialLinear:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ self.network:updateGradInput({input, self.partition}, gradOutput)
+ self.gradInput:set(self.network.gradInput[1])
+ end
+ return self.gradInput
+end
+
+function PartialLinear:accGradParameters(input, gradOutput, scale)
+ local scale = scale or 1
+ self.network:accGradParameters({input, self.partition}, gradOutput, scale)
+ if self.bias then
+ self.buffer = self.buffer or input.new()
+ self.buffer:resize(gradOutput:size(2))
+ self.buffer:mv(gradOutput:t(), self.addBuffer):mul(scale)
+ self.gradBias:indexAdd(
+ 2, self.partition:long(), self.buffer:view(1, self.buffer:nElement())
+ )
+ end
+end
+
+function PartialLinear:accUpdateGradParameters(input, gradOutput, lr)
+ local gradWeight = self.network:get(1):get(2).gradWeight
+ local gradBias = self.gradBias
+ self.network:get(1):get(2).gradWeight = self.network:get(1):get(2).weight
+ self.gradBias = self.bias
+ self:accGradParameters(input, gradOutput, -lr)
+ self.network:get(1):get(2).gradWeight = gradWeight
+ self.gradBias = gradBias
+end
+
+function PartialLinear:zeroGradParameters()
+ self.network:zeroGradParameters()
+ self.gradBias:zero()
+end
+
+function PartialLinear:updateParameters(learningRate)
+ self.network:updateParameters(learningRate)
+ self.bias:add(-learningRate, self.gradBias)
+end
+
+function PartialLinear:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ -- we do not need to accumulate parameters when sharing:
+ self:defaultAccUpdateGradParameters(input, gradOutput, lr)
+end
+
+function PartialLinear:__tostring__()
+ return torch.type(self) ..
+ string.format('(%d -> %d)', self.inputsize, self.outputsize) ..
+ (self.bias == nil and ' without bias' or '')
+end
diff --git a/contrib/lua-torch/nn/PixelShuffle.lua b/contrib/lua-torch/nn/PixelShuffle.lua
new file mode 100644
index 000000000..dd58ed948
--- /dev/null
+++ b/contrib/lua-torch/nn/PixelShuffle.lua
@@ -0,0 +1,111 @@
+local PixelShuffle, parent = torch.class("nn.PixelShuffle", "nn.Module")
+
+-- Shuffles pixels after upscaling with a ESPCNN model
+-- Converts a [batch x channel*r^2 x m x p] tensor to [batch x channel x r*m x r*p]
+-- tensor, where r is the upscaling factor.
+-- @param upscaleFactor - the upscaling factor to use
+function PixelShuffle:__init(upscaleFactor)
+ parent.__init(self)
+ self.upscaleFactor = upscaleFactor
+ self.upscaleFactorSquared = self.upscaleFactor * self.upscaleFactor
+end
+
+-- Computes the forward pass of the layer i.e. Converts a
+-- [batch x channel*r^2 x m x p] tensor to [batch x channel x r*m x r*p] tensor.
+-- @param input - the input tensor to be shuffled of size [b x c*r^2 x m x p]
+-- @return output - the shuffled tensor of size [b x c x r*m x r*p]
+function PixelShuffle:updateOutput(input)
+ self._intermediateShape = self._intermediateShape or torch.LongStorage(6)
+ self._outShape = self.outShape or torch.LongStorage()
+ self._shuffleOut = self._shuffleOut or input.new()
+
+ local batched = false
+ local batchSize = 1
+ local inputStartIdx = 1
+ local outShapeIdx = 1
+ if input:nDimension() == 4 then
+ batched = true
+ batchSize = input:size(1)
+ inputStartIdx = 2
+ outShapeIdx = 2
+ self._outShape:resize(4)
+ self._outShape[1] = batchSize
+ else
+ self._outShape:resize(3)
+ end
+
+ --input is of size h/r w/r, rc output should be h, r, c
+ local channels = input:size(inputStartIdx) / self.upscaleFactorSquared
+ local inHeight = input:size(inputStartIdx + 1)
+ local inWidth = input:size(inputStartIdx + 2)
+
+ self._intermediateShape[1] = batchSize
+ self._intermediateShape[2] = channels
+ self._intermediateShape[3] = self.upscaleFactor
+ self._intermediateShape[4] = self.upscaleFactor
+ self._intermediateShape[5] = inHeight
+ self._intermediateShape[6] = inWidth
+
+ self._outShape[outShapeIdx] = channels
+ self._outShape[outShapeIdx + 1] = inHeight * self.upscaleFactor
+ self._outShape[outShapeIdx + 2] = inWidth * self.upscaleFactor
+
+ local inputView = torch.view(input, self._intermediateShape)
+
+ self._shuffleOut:resize(inputView:size(1), inputView:size(2), inputView:size(5),
+ inputView:size(3), inputView:size(6), inputView:size(4))
+ self._shuffleOut:copy(inputView:permute(1, 2, 5, 3, 6, 4))
+
+ self.output = torch.view(self._shuffleOut, self._outShape)
+
+ return self.output
+end
+
+-- Computes the backward pass of the layer, given the gradient w.r.t. the output
+-- this function computes the gradient w.r.t. the input.
+-- @param input - the input tensor of shape [b x c*r^2 x m x p]
+-- @param gradOutput - the tensor with the gradients w.r.t. output of shape [b x c x r*m x r*p]
+-- @return gradInput - a tensor of the same shape as input, representing the gradient w.r.t. input.
+function PixelShuffle:updateGradInput(input, gradOutput)
+ self._intermediateShape = self._intermediateShape or torch.LongStorage(6)
+ self._shuffleIn = self._shuffleIn or input.new()
+
+ local batchSize = 1
+ local inputStartIdx = 1
+ if input:nDimension() == 4 then
+ batchSize = input:size(1)
+ inputStartIdx = 2
+ end
+
+ local channels = input:size(inputStartIdx) / self.upscaleFactorSquared
+ local height = input:size(inputStartIdx + 1)
+ local width = input:size(inputStartIdx + 2)
+
+ self._intermediateShape[1] = batchSize
+ self._intermediateShape[2] = channels
+ self._intermediateShape[3] = height
+ self._intermediateShape[4] = self.upscaleFactor
+ self._intermediateShape[5] = width
+ self._intermediateShape[6] = self.upscaleFactor
+
+ local gradOutputView = torch.view(gradOutput, self._intermediateShape)
+
+ self._shuffleIn:resize(gradOutputView:size(1), gradOutputView:size(2), gradOutputView:size(4),
+ gradOutputView:size(6), gradOutputView:size(3), gradOutputView:size(5))
+ self._shuffleIn:copy(gradOutputView:permute(1, 2, 4, 6, 3, 5))
+
+ self.gradInput = torch.view(self._shuffleIn, input:size())
+
+ return self.gradInput
+end
+
+
+function PixelShuffle:clearState()
+ nn.utils.clear(self, {
+ "_intermediateShape",
+ "_outShape",
+ "_shuffleIn",
+ "_shuffleOut",
+ })
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Power.lua b/contrib/lua-torch/nn/Power.lua
new file mode 100644
index 000000000..771183c48
--- /dev/null
+++ b/contrib/lua-torch/nn/Power.lua
@@ -0,0 +1,22 @@
+local Power, parent = torch.class('nn.Power','nn.Module')
+
+function Power:__init(p)
+ parent.__init(self)
+ self.pow = p
+ if not p then
+ error('nn.Power(power)')
+ end
+end
+
+function Power:updateOutput(input)
+ self.output:resizeAs(input):copy(input)
+ self.output:pow(self.pow)
+ return self.output
+end
+
+function Power:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input):copy(input)
+ self.gradInput:pow(self.pow - 1)
+ self.gradInput:cmul(gradOutput):mul(self.pow)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/PrintSize.lua b/contrib/lua-torch/nn/PrintSize.lua
new file mode 100644
index 000000000..d8dc91bff
--- /dev/null
+++ b/contrib/lua-torch/nn/PrintSize.lua
@@ -0,0 +1,36 @@
+local PrintSize, parent = torch.class('nn.PrintSize', 'nn.Module')
+
+function PrintSize:__init(prefix)
+ parent.__init(self)
+ self.prefix = prefix or "PrintSize"
+end
+
+function PrintSize:updateOutput(input)
+ self.output = input
+ local size
+ if torch.type(input) == 'table' then
+ size = input
+ elseif torch.type(input) == 'nil' then
+ size = 'missing size'
+ else
+ size = input:size()
+ end
+ print(self.prefix..":input\n", size)
+ return self.output
+end
+
+
+function PrintSize:updateGradInput(input, gradOutput)
+ local size
+ if torch.type(gradOutput) == 'table' then
+ size = gradOutput
+ elseif torch.type(gradOutput) == 'nil' then
+ size = 'missing size'
+ else
+ size = gradOutput:size()
+ end
+ print(self.prefix..":gradOutput\n", size)
+ self.gradInput = gradOutput
+ return self.gradInput
+end
+
diff --git a/contrib/lua-torch/nn/Profile.lua b/contrib/lua-torch/nn/Profile.lua
new file mode 100644
index 000000000..36cd909cd
--- /dev/null
+++ b/contrib/lua-torch/nn/Profile.lua
@@ -0,0 +1,55 @@
+local ProfileModule, parent = torch.class("nn.Profile", "nn.Decorator")
+
+function ProfileModule:__init(module, print_interval, name)
+ parent.__init(self, module)
+ self.print_interval = print_interval or 100
+ self.name = name or torch.type(module)
+ self.module = module
+ self.numFwds = 0
+ self.numBwds = 0
+ self.summedFwdTime = 0
+ self.summedBwdTime = 0
+ self.timer = torch.Timer()
+end
+
+function ProfileModule:updateOutput(input)
+ self.timer:reset()
+ self.output = self.module:updateOutput(input)
+ self.summedFwdTime = self.summedFwdTime + self.timer:time().real
+ self.numFwds = self.numFwds + 1
+ if self.numFwds % self.print_interval == 0 then
+ print (string.format('%s took %.3f seconds for %d forward passes',
+ self.name, self.summedFwdTime, self.print_interval))
+ self.numFwds = 0
+ self.summedFwdTime = 0
+ end
+ return self.output
+end
+
+function ProfileModule:updateGradInput(input, gradOutput)
+ self.timer:reset()
+ self.gradInput = self.module:updateGradInput(input, gradOutput)
+ self.summedBwdTime = self.summedBwdTime + self.timer:time().real
+ self.numBwds = self.numBwds + 1
+ if self.numBwds % self.print_interval == 0 then
+ print (string.format('%s took %.3f seconds for %d backward passes',
+ self.name, self.summedBwdTime, self.print_interval))
+ self.numBwds = 0
+ self.summedBwdTime = 0
+ end
+ return self.gradInput
+end
+
+local function makeTorchTimerSerializable()
+ -- The Timer object part of this class needs to be serializable
+ -- so that the layer can be saved, cloned, etc. We add a dummy
+ -- serialization of torch.Timer that just creates a new instance at read
+ local timerMetatable = getmetatable(torch.Timer())
+ timerMetatable['__factory'] = torch.Timer
+ timerMetatable['write'] = function(object, file) end
+ timerMetatable['read'] = function(object, file, versionNumber)
+ return object
+ end
+end
+
+makeTorchTimerSerializable()
diff --git a/contrib/lua-torch/nn/README.md b/contrib/lua-torch/nn/README.md
new file mode 100644
index 000000000..6efd60962
--- /dev/null
+++ b/contrib/lua-torch/nn/README.md
@@ -0,0 +1,21 @@
+[![Build Status](https://travis-ci.org/torch/nn.svg?branch=master)](https://travis-ci.org/torch/nn)
+<a name="nn.dok"></a>
+# Neural Network Package #
+
+This package provides an easy and modular way to build and train simple or complex neural networks using [Torch](https://github.com/torch/torch7/blob/master/README.md):
+ * Modules are the bricks used to build neural networks. Each are themselves neural networks, but can be combined with other networks using containers to create complex neural networks:
+ * [Module](doc/module.md#nn.Module): abstract class inherited by all modules;
+ * [Containers](doc/containers.md#nn.Containers): composite and decorator classes like [`Sequential`](doc/containers.md#nn.Sequential), [`Parallel`](doc/containers.md#nn.Parallel), [`Concat`](doc/containers.md#nn.Concat) and [`NaN`](doc/containers.md#nn.NaN);
+ * [Transfer functions](doc/transfer.md#nn.transfer.dok): non-linear functions like [`Tanh`](doc/transfer.md#nn.Tanh) and [`Sigmoid`](doc/transfer.md#nn.Sigmoid);
+ * [Simple layers](doc/simple.md#nn.simplelayers.dok): like [`Linear`](doc/simple.md#nn.Linear), [`Mean`](doc/simple.md#nn.Mean), [`Max`](doc/simple.md#nn.Max) and [`Reshape`](doc/simple.md#nn.Reshape);
+ * [Table layers](doc/table.md#nn.TableLayers): layers for manipulating `table`s like [`SplitTable`](doc/table.md#nn.SplitTable), [`ConcatTable`](doc/table.md#nn.ConcatTable) and [`JoinTable`](doc/table.md#nn.JoinTable);
+ * [Convolution layers](doc/convolution.md#nn.convlayers.dok): [`Temporal`](doc/convolution.md#nn.TemporalModules), [`Spatial`](doc/convolution.md#nn.SpatialModules) and [`Volumetric`](doc/convolution.md#nn.VolumetricModules) convolutions;
+ * Criterions compute a gradient according to a given loss function given an input and a target:
+ * [Criterions](doc/criterion.md#nn.Criterions): a list of all criterions, including [`Criterion`](doc/criterion.md#nn.Criterion), the abstract class;
+ * [`MSECriterion`](doc/criterion.md#nn.MSECriterion): the Mean Squared Error criterion used for regression;
+ * [`ClassNLLCriterion`](doc/criterion.md#nn.ClassNLLCriterion): the Negative Log Likelihood criterion used for classification;
+ * Additional documentation:
+ * [Overview](doc/overview.md#nn.overview.dok) of the package essentials including modules, containers and training;
+ * [Training](doc/training.md#nn.traningneuralnet.dok): how to train a neural network using [`StochasticGradient`](doc/training.md#nn.StochasticGradient);
+ * [Testing](doc/testing.md): how to test your modules.
+ * [Experimental Modules](https://github.com/clementfarabet/lua---nnx/blob/master/README.md): a package containing experimental modules and criteria.
diff --git a/contrib/lua-torch/nn/RReLU.lua b/contrib/lua-torch/nn/RReLU.lua
new file mode 100644
index 000000000..843415f7e
--- /dev/null
+++ b/contrib/lua-torch/nn/RReLU.lua
@@ -0,0 +1,50 @@
+local ffi = require 'ffi'
+local RReLU, parent = torch.class('nn.RReLU', 'nn.Module')
+
+function RReLU:__init(l, u, ip)
+ parent.__init(self)
+ self.lower = l or 1/8
+ self.upper = u or 1/3
+ assert(self.lower <= self.upper and self.lower >= 0 and self.upper >= 0)
+ self.noise = torch.Tensor()
+ self.train = true
+ self.inplace = ip or false
+end
+
+function RReLU:updateOutput(input)
+ local gen = ffi.typeof('THGenerator**')(torch._gen)[0]
+ input.THNN.RReLU_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.noise:cdata(),
+ self.lower,
+ self.upper,
+ self.train,
+ self.inplace,
+ gen
+ )
+ return self.output
+end
+
+function RReLU:updateGradInput(input, gradOutput)
+ input.THNN.RReLU_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.noise:cdata(),
+ self.lower,
+ self.upper,
+ self.train,
+ self.inplace
+ )
+ return self.gradInput
+end
+
+function RReLU:__tostring__()
+ return string.format('%s (l:%f, u:%f)', torch.type(self), self.lower, self.upper)
+end
+
+function RReLU:clearState()
+ if self.noise then self.noise:set() end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/ReLU.lua b/contrib/lua-torch/nn/ReLU.lua
new file mode 100644
index 000000000..a6eb271ee
--- /dev/null
+++ b/contrib/lua-torch/nn/ReLU.lua
@@ -0,0 +1,5 @@
+local ReLU, Parent = torch.class('nn.ReLU', 'nn.Threshold')
+
+function ReLU:__init(p)
+ Parent.__init(self,0,0,p)
+end
diff --git a/contrib/lua-torch/nn/ReLU6.lua b/contrib/lua-torch/nn/ReLU6.lua
new file mode 100644
index 000000000..1cde00b46
--- /dev/null
+++ b/contrib/lua-torch/nn/ReLU6.lua
@@ -0,0 +1,32 @@
+local ReLU6, parent = torch.class('nn.ReLU6', 'nn.Module')
+
+function ReLU6:__init(inplace)
+ parent.__init(self)
+
+ if inplace == nil then
+ self.inplace = false
+ else
+ self.inplace = inplace
+ end
+
+ if (inplace and type(inplace) ~= 'boolean') then
+ error('in-place flag must be boolean')
+ end
+end
+
+function ReLU6:updateOutput(input)
+ input.THNN.HardTanh_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ 0, 6, self.inplace)
+ return self.output
+end
+
+function ReLU6:updateGradInput(input, gradOutput)
+ input.THNN.HardTanh_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ 0, 6, self.inplace)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Replicate.lua b/contrib/lua-torch/nn/Replicate.lua
new file mode 100644
index 000000000..c7dedd767
--- /dev/null
+++ b/contrib/lua-torch/nn/Replicate.lua
@@ -0,0 +1,57 @@
+local Replicate, parent = torch.class('nn.Replicate','nn.Module')
+
+function Replicate:__init(nf, dim, ndim)
+ parent.__init(self)
+ self.nfeatures = nf
+ self.dim = dim or 1
+ self.ndim = ndim
+ assert(self.dim > 0, "Can only replicate across positive integer dimensions.")
+end
+
+function Replicate:updateOutput(input)
+ self.dim = self.dim or 1 --backwards compatible
+ assert(
+ self.dim <= input:dim()+1,
+ "Not enough input dimensions to replicate along dimension " ..
+ tostring(self.dim) .. ".")
+ local batchOffset = self.ndim and input:dim() > self.ndim and 1 or 0
+ local rdim = self.dim + batchOffset
+ local sz = torch.LongStorage(input:dim()+1)
+ sz[rdim] = self.nfeatures
+ for i = 1,input:dim() do
+ local offset = 0
+ if i >= rdim then
+ offset = 1
+ end
+ sz[i+offset] = input:size(i)
+ end
+ local st = torch.LongStorage(input:dim()+1)
+ st[rdim] = 0
+ for i = 1,input:dim() do
+ local offset = 0
+ if i >= rdim then
+ offset = 1
+ end
+ st[i+offset] = input:stride(i)
+ end
+ self.output:set(input:storage(),input:storageOffset(),sz,st)
+ return self.output
+end
+
+function Replicate:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input):zero()
+ local batchOffset = self.ndim and input:dim() > self.ndim and 1 or 0
+ local rdim = self.dim + batchOffset
+ local sz = torch.LongStorage(input:dim()+1)
+ sz[rdim] = 1
+ for i = 1,input:dim() do
+ local offset = 0
+ if i >= rdim then
+ offset = 1
+ end
+ sz[i+offset] = input:size(i)
+ end
+ local gradInput = self.gradInput:view(sz)
+ gradInput:sum(gradOutput, rdim)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Reshape.lua b/contrib/lua-torch/nn/Reshape.lua
new file mode 100644
index 000000000..d508369fa
--- /dev/null
+++ b/contrib/lua-torch/nn/Reshape.lua
@@ -0,0 +1,72 @@
+local Reshape, parent = torch.class('nn.Reshape', 'nn.Module')
+
+function Reshape:__init(...)
+ parent.__init(self)
+ local arg = {...}
+
+ self.size = torch.LongStorage()
+ self.batchsize = torch.LongStorage()
+ if torch.type(arg[#arg]) == 'boolean' then
+ self.batchMode = arg[#arg]
+ table.remove(arg, #arg)
+ end
+ local n = #arg
+ if n == 1 and torch.typename(arg[1]) == 'torch.LongStorage' then
+ self.size:resize(#arg[1]):copy(arg[1])
+ else
+ self.size:resize(n)
+ for i=1,n do
+ self.size[i] = arg[i]
+ end
+ end
+
+ self.nelement = 1
+ self.batchsize:resize(#self.size+1)
+ for i=1,#self.size do
+ self.nelement = self.nelement * self.size[i]
+ self.batchsize[i+1] = self.size[i]
+ end
+end
+
+function Reshape:updateOutput(input)
+ if not input:isContiguous() then
+ self._input = self._input or input.new()
+ self._input:resizeAs(input)
+ self._input:copy(input)
+ input = self._input
+ end
+
+ if (self.batchMode == false) or (
+ (self.batchMode == nil) and
+ (input:nElement() == self.nelement and input:size(1) ~= 1)
+ ) then
+ self.output:view(input, self.size)
+ else
+ self.batchsize[1] = input:size(1)
+ self.output:view(input, self.batchsize)
+ end
+ return self.output
+end
+
+function Reshape:updateGradInput(input, gradOutput)
+ if not gradOutput:isContiguous() then
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput)
+ self._gradOutput:copy(gradOutput)
+ gradOutput = self._gradOutput
+ end
+
+ self.gradInput:viewAs(gradOutput, input)
+ return self.gradInput
+end
+
+
+function Reshape:__tostring__()
+ return torch.type(self) .. '(' ..
+ table.concat(self.size:totable(), 'x') .. ')'
+end
+
+function Reshape:clearState()
+ nn.utils.clear(self, '_input', '_gradOutput')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/Select.lua b/contrib/lua-torch/nn/Select.lua
new file mode 100644
index 000000000..be87c6465
--- /dev/null
+++ b/contrib/lua-torch/nn/Select.lua
@@ -0,0 +1,24 @@
+local Select, parent = torch.class('nn.Select', 'nn.Module')
+
+function Select:__init(dimension,index)
+ parent.__init(self)
+ self.dimension = dimension
+ self.index = index
+end
+
+function Select:updateOutput(input)
+ local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension
+ local index = self.index < 0 and input:size(dim) + self.index + 1 or self.index
+ local output = input:select(dim, index);
+ self.output:resizeAs(output)
+ return self.output:copy(output)
+end
+
+function Select:updateGradInput(input, gradOutput)
+ local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension
+ local index = self.index < 0 and input:size(dim) + self.index + 1 or self.index
+ self.gradInput:resizeAs(input)
+ self.gradInput:zero()
+ self.gradInput:select(dim,index):copy(gradOutput)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SelectTable.lua b/contrib/lua-torch/nn/SelectTable.lua
new file mode 100644
index 000000000..ef26f3507
--- /dev/null
+++ b/contrib/lua-torch/nn/SelectTable.lua
@@ -0,0 +1,71 @@
+local SelectTable, parent = torch.class('nn.SelectTable', 'nn.Module')
+
+function SelectTable:__init(index)
+ parent.__init(self)
+ self.index = index
+ self.gradInput = {}
+end
+
+function SelectTable:updateOutput(input)
+
+ -- handle negative indices
+ local index = self.index
+ if type(index) == "number" then
+ index = index < 0 and #input + index + 1 or index
+ end
+
+ assert(input[index], "index does not exist in the input table")
+ self.output = input[index]
+
+ return self.output
+end
+
+local function zeroTableCopy(t1, t2)
+ for k, v in pairs(t2) do
+ if (torch.type(v) == "table") then
+ t1[k] = zeroTableCopy(t1[k] or {}, t2[k])
+ elseif torch.isTensor(v) then
+ if not t1[k] then
+ t1[k] = v:clone():zero()
+ else
+ t1[k]:resizeAs(v)
+ t1[k]:zero()
+ end
+ else
+ t1[k] = nil
+ end
+ end
+ for k, v in pairs(t1) do
+ if not t2[k] then
+ t1[k] = nil
+ end
+ end
+ return t1
+end
+
+function SelectTable:updateGradInput(input, gradOutput)
+ -- make gradInput a zeroed copy of input
+ zeroTableCopy(self.gradInput, input)
+ -- handle negative indices
+ local index = self.index
+ if type(index) == "number" then
+ index = index < 0 and #input + index + 1 or index
+ end
+ -- copy into gradInput[index] (necessary for variable sized inputs)
+ assert(self.gradInput[index])
+ nn.utils.recursiveCopy(self.gradInput[index], gradOutput)
+
+ return self.gradInput
+end
+
+function SelectTable:type(type, tensorCache)
+ self.gradInput = {}
+ self.output = {}
+ return parent.type(self, type, tensorCache)
+end
+
+function SelectTable:__tostring__()
+ return torch.type(self) .. '(' .. self.index .. ')'
+end
+
+SelectTable.clearState = nn.Identity.clearState
diff --git a/contrib/lua-torch/nn/Sequential.lua b/contrib/lua-torch/nn/Sequential.lua
new file mode 100644
index 000000000..22b0886b8
--- /dev/null
+++ b/contrib/lua-torch/nn/Sequential.lua
@@ -0,0 +1,122 @@
+local Sequential, _ = torch.class('nn.Sequential', 'nn.Container')
+
+function Sequential:__len()
+ return #self.modules
+end
+
+function Sequential:add(module)
+ if #self.modules == 0 then
+ self.gradInput = module.gradInput
+ end
+ table.insert(self.modules, module)
+ self.output = module.output
+ return self
+end
+
+function Sequential:insert(module, index)
+ index = index or (#self.modules + 1)
+ if index > (#self.modules + 1) or index < 1 then
+ error"index should be contiguous to existing modules"
+ end
+ table.insert(self.modules, index, module)
+ self.output = self.modules[#self.modules].output
+ self.gradInput = self.modules[1].gradInput
+end
+
+function Sequential:remove(index)
+ index = index or #self.modules
+ if index > #self.modules or index < 1 then
+ error"index out of range"
+ end
+ table.remove(self.modules, index)
+ if #self.modules > 0 then
+ self.output = self.modules[#self.modules].output
+ self.gradInput = self.modules[1].gradInput
+ else
+ self.output = torch.Tensor()
+ self.gradInput = torch.Tensor()
+ end
+end
+
+function Sequential:updateOutput(input)
+ local currentOutput = input
+ for i=1,#self.modules do
+ currentOutput = self:rethrowErrors(self.modules[i], i, 'updateOutput', currentOutput)
+ end
+ self.output = currentOutput
+ return currentOutput
+end
+
+function Sequential:updateGradInput(input, gradOutput)
+ local currentGradOutput = gradOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ currentGradOutput = self:rethrowErrors(currentModule, i+1, 'updateGradInput', previousModule.output, currentGradOutput)
+ currentModule = previousModule
+ end
+ currentGradOutput = self:rethrowErrors(currentModule, 1, 'updateGradInput', input, currentGradOutput)
+ self.gradInput = currentGradOutput
+ return currentGradOutput
+end
+
+function Sequential:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+
+ local currentGradOutput = gradOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ self:rethrowErrors(currentModule, i+1, 'accGradParameters', previousModule.output, currentGradOutput, scale)
+ currentGradOutput = currentModule.gradInput
+ currentModule = previousModule
+ end
+
+ self:rethrowErrors(currentModule, 1, 'accGradParameters', input, currentGradOutput, scale)
+end
+
+function Sequential:backward(input, gradOutput, scale)
+ scale = scale or 1
+ local currentGradOutput = gradOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ currentGradOutput = self:rethrowErrors(currentModule, i+1, 'backward', previousModule.output, currentGradOutput, scale)
+ currentModule.gradInput = currentGradOutput
+ currentModule = previousModule
+ end
+ currentGradOutput = self:rethrowErrors(currentModule, 1, 'backward', input, currentGradOutput, scale)
+ self.gradInput = currentGradOutput
+ return currentGradOutput
+end
+
+function Sequential:accUpdateGradParameters(input, gradOutput, lr)
+ local currentGradOutput = gradOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ self:rethrowErrors(currentModule, i+1, 'accUpdateGradParameters', previousModule.output, currentGradOutput, lr)
+ currentGradOutput = currentModule.gradInput
+ currentModule = previousModule
+ end
+
+ self:rethrowErrors(currentModule, 1, 'accUpdateGradParameters', input, currentGradOutput, lr)
+end
+
+
+function Sequential:__tostring__()
+ local tab = ' '
+ local line = '\n'
+ local next = ' -> '
+ local str = 'nn.Sequential'
+ str = str .. ' {' .. line .. tab .. '[input'
+ for i=1,#self.modules do
+ str = str .. next .. '(' .. i .. ')'
+ end
+ str = str .. next .. 'output]'
+ for i=1,#self.modules do
+ str = str .. line .. tab .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab)
+ end
+ str = str .. line .. '}'
+ return str
+end
diff --git a/contrib/lua-torch/nn/Sigmoid.lua b/contrib/lua-torch/nn/Sigmoid.lua
new file mode 100644
index 000000000..0126f6f8f
--- /dev/null
+++ b/contrib/lua-torch/nn/Sigmoid.lua
@@ -0,0 +1,19 @@
+local Sigmoid = torch.class('nn.Sigmoid', 'nn.Module')
+
+function Sigmoid:updateOutput(input)
+ input.THNN.Sigmoid_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function Sigmoid:updateGradInput(input, gradOutput)
+ input.THNN.Sigmoid_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SmoothL1Criterion.lua b/contrib/lua-torch/nn/SmoothL1Criterion.lua
new file mode 100644
index 000000000..be636a94c
--- /dev/null
+++ b/contrib/lua-torch/nn/SmoothL1Criterion.lua
@@ -0,0 +1,32 @@
+local SmoothL1Criterion, parent = torch.class('nn.SmoothL1Criterion', 'nn.Criterion')
+
+function SmoothL1Criterion:__init(sizeAverage)
+ parent.__init(self)
+ if sizeAverage ~= nil then
+ self.sizeAverage = sizeAverage
+ else
+ self.sizeAverage = true
+ end
+end
+
+function SmoothL1Criterion:updateOutput(input, target)
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.SmoothL1Criterion_updateOutput(
+ input:cdata(),
+ target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function SmoothL1Criterion:updateGradInput(input, target)
+ input.THNN.SmoothL1Criterion_updateGradInput(
+ input:cdata(),
+ target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SoftMarginCriterion.lua b/contrib/lua-torch/nn/SoftMarginCriterion.lua
new file mode 100644
index 000000000..96ccda8a4
--- /dev/null
+++ b/contrib/lua-torch/nn/SoftMarginCriterion.lua
@@ -0,0 +1,24 @@
+local SoftMarginCriterion, parent = torch.class('nn.SoftMarginCriterion', 'nn.Criterion')
+
+function SoftMarginCriterion:__init()
+ parent.__init(self)
+ self.sizeAverage = true
+end
+
+function SoftMarginCriterion:updateOutput(input, target)
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.SoftMarginCriterion_updateOutput(
+ input:cdata(), target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage)
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function SoftMarginCriterion:updateGradInput(input, target)
+ input.THNN.SoftMarginCriterion_updateGradInput(
+ input:cdata(), target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SoftMax.lua b/contrib/lua-torch/nn/SoftMax.lua
new file mode 100644
index 000000000..23a444cf6
--- /dev/null
+++ b/contrib/lua-torch/nn/SoftMax.lua
@@ -0,0 +1,19 @@
+local SoftMax, _ = torch.class('nn.SoftMax', 'nn.Module')
+
+function SoftMax:updateOutput(input)
+ input.THNN.SoftMax_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function SoftMax:updateGradInput(input, gradOutput)
+ input.THNN.SoftMax_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SoftMin.lua b/contrib/lua-torch/nn/SoftMin.lua
new file mode 100644
index 000000000..7da2a6589
--- /dev/null
+++ b/contrib/lua-torch/nn/SoftMin.lua
@@ -0,0 +1,31 @@
+local SoftMin, parent = torch.class('nn.SoftMin', 'nn.Module')
+
+function SoftMin:updateOutput(input)
+ self.mininput = self.mininput or input.new()
+ self.mininput:resizeAs(input):copy(input):mul(-1)
+ input.THNN.SoftMax_updateOutput(
+ self.mininput:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function SoftMin:updateGradInput(input, gradOutput)
+ self.mininput = self.mininput or input.new()
+ self.mininput:resizeAs(input):copy(input):mul(-1)
+
+ input.THNN.SoftMax_updateGradInput(
+ self.mininput:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata()
+ )
+
+ self.gradInput:mul(-1)
+ return self.gradInput
+end
+
+function SoftMin:clearState()
+ if self.mininput then self.mininput:set() end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SoftPlus.lua b/contrib/lua-torch/nn/SoftPlus.lua
new file mode 100644
index 000000000..f77b25380
--- /dev/null
+++ b/contrib/lua-torch/nn/SoftPlus.lua
@@ -0,0 +1,35 @@
+local SoftPlus, parent = torch.class('nn.SoftPlus', 'nn.Module')
+
+function SoftPlus:__init(beta)
+ parent.__init(self)
+ self.beta = beta or 1 -- Beta controls sharpness of transfer function
+ self.threshold = 20 -- Avoid floating point issues with exp(x), x>20
+end
+
+function SoftPlus:updateOutput(input)
+ -- f(x) = 1/beta * log(1 + exp(beta * x))
+ input.THNN.SoftPlus_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.beta,
+ self.threshold
+ )
+ return self.output
+end
+
+function SoftPlus:updateGradInput(input, gradOutput)
+ -- d/dx[log(1+exp(k*x))/k] = exp(kx) / (exp(kx) + 1)
+ -- SINCE
+ -- y = (1/k)*log(1+exp(k*x)) --> x = (1/k)*log(exp(k*y)-1)
+ -- THEREFORE:
+ -- d/dx(f(x)) = (exp(k*y) - 1) / exp(k*y)
+ input.THNN.SoftPlus_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata(),
+ self.beta,
+ self.threshold
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SoftShrink.lua b/contrib/lua-torch/nn/SoftShrink.lua
new file mode 100644
index 000000000..67af15a98
--- /dev/null
+++ b/contrib/lua-torch/nn/SoftShrink.lua
@@ -0,0 +1,25 @@
+local SoftShrink, parent = torch.class('nn.SoftShrink', 'nn.Module')
+
+function SoftShrink:__init(lam)
+ parent.__init(self)
+ self.lambda = lam or 0.5
+end
+
+function SoftShrink:updateOutput(input)
+ input.THNN.SoftShrink_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.lambda
+ )
+ return self.output
+end
+
+function SoftShrink:updateGradInput(input, gradOutput)
+ input.THNN.SoftShrink_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.lambda
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SoftSign.lua b/contrib/lua-torch/nn/SoftSign.lua
new file mode 100644
index 000000000..ee72011f1
--- /dev/null
+++ b/contrib/lua-torch/nn/SoftSign.lua
@@ -0,0 +1,20 @@
+local SoftSign, parent = torch.class('nn.SoftSign', 'nn.Module')
+
+function SoftSign:updateOutput(input)
+ self.temp = self.temp or input.new()
+ self.temp:resizeAs(input):copy(input):abs():add(1)
+ self.output:resizeAs(input):copy(input):cdiv(self.temp)
+ return self.output
+end
+
+function SoftSign:updateGradInput(input, gradOutput)
+ self.tempgrad = self.tempgrad or input.new()
+ self.tempgrad:resizeAs(self.output):copy(input):abs():add(1):cmul(self.tempgrad)
+ self.gradInput:resizeAs(input):copy(gradOutput):cdiv(self.tempgrad)
+ return self.gradInput
+end
+
+function SoftSign:clearState()
+ nn.utils.clear(self, 'temp', 'tempgrad')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SparseJacobian.lua b/contrib/lua-torch/nn/SparseJacobian.lua
new file mode 100644
index 000000000..7f4c02444
--- /dev/null
+++ b/contrib/lua-torch/nn/SparseJacobian.lua
@@ -0,0 +1,277 @@
+nn.SparseJacobian = {}
+
+function nn.SparseJacobian.backward (module, input, param, dparam)
+ local doparam = 0
+ if param then
+ doparam = 1
+ end
+
+ -- output deriv
+ module:forward(input)
+ local dout = module.output.new():resizeAs(module.output)
+ -- 1D view
+ local sdout = module.output.new(dout:storage(), 1, dout:nElement())
+ -- jacobian matrix to calculate
+ local jacobian
+ if doparam == 1 then
+ jacobian = torch.Tensor(param:nElement(), dout:nElement()):zero()
+ else
+ jacobian = torch.Tensor(input:size(1), dout:nElement()):zero()
+ end
+
+ for i=1,sdout:nElement() do
+ dout:zero()
+ sdout[i] = 1
+ module:zeroGradParameters()
+ local din = module:updateGradInput(input, dout)
+ module:accGradParameters(input, dout)
+ if doparam == 1 then
+ jacobian:select(2,i):copy(dparam)
+ else
+ jacobian:select(2,i):copy(din:select(2,2))
+ end
+ end
+
+ return jacobian
+end
+
+
+function nn.SparseJacobian.backwardUpdate (module, input, param)
+
+ -- output deriv
+ module:forward(input)
+ local dout = module.output.new():resizeAs(module.output)
+ -- 1D view
+ local sdout = module.output.new(dout:storage(),1,dout:nElement())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+ -- original param
+ local params = module:parameters()
+ local origparams = {}
+ for j=1,#params do
+ table.insert(origparams, params[j]:clone())
+ end
+
+ for i=1,sdout:nElement() do
+ -- Reset parameters
+ for j=1,#params do
+ params[j]:copy(origparams[j])
+ end
+ dout:zero()
+ sdout[i] = 1
+ module:zeroGradParameters()
+ module:updateGradInput(input, dout)
+ module:accUpdateGradParameters(input, dout, 1)
+ jacobian:select(2,i):copy(param)
+ end
+
+ for j=1,#params do
+ params[j]:copy(origparams[j])
+ end
+
+ return jacobian
+end
+
+function nn.SparseJacobian.forward(module, input, param)
+ local doparam = 0
+ if param then
+ doparam = 1
+ end
+ param = param or input
+
+ -- perturbation amount
+ local small = 1e-6
+ -- 1D view of input
+ --local tst = param:storage()
+ local sin
+ if doparam == 1 then
+ sin = param.new(param):resize(param:nElement())
+ else
+ sin = input.new(input):select(2,2)
+ end
+
+ local out = module:forward(input)
+ -- jacobian matrix to calculate
+ local jacobian
+ if doparam == 1 then
+ jacobian = torch.Tensor():resize(param:nElement(),
+ out:nElement())
+ else
+ jacobian = torch.Tensor():resize(input:size(1),
+ out:nElement())
+ end
+
+ local outa = torch.Tensor(jacobian:size(2))
+ local outb = torch.Tensor(jacobian:size(2))
+
+ for i=1,sin:nElement() do
+ sin[i] = sin[i] - small
+ outa:copy(module:forward(input))
+ sin[i] = sin[i] + 2*small
+ outb:copy(module:forward(input))
+ sin[i] = sin[i] - small
+
+ outb:add(-1,outa):div(2*small)
+ jacobian:select(1,i):copy(outb)
+ end
+
+ return jacobian
+end
+
+function nn.SparseJacobian.forwardUpdate(module, input, param)
+ -- perturbation amount
+ local small = 1e-6
+ -- 1D view of input
+ --local tst = param:storage()
+ local sin = param.new(param):resize(param:nElement())--param.new(tst,1,tst:size())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+
+ local outa = torch.Tensor(jacobian:size(2))
+ local outb = torch.Tensor(jacobian:size(2))
+
+ for i=1,sin:nElement() do
+ sin[i] = sin[i] - small
+ outa:copy(module:forward(input))
+ sin[i] = sin[i] + 2*small
+ outb:copy(module:forward(input))
+ sin[i] = sin[i] - small
+
+ outb:add(-1,outa):div(2*small)
+ jacobian:select(1,i):copy(outb)
+ jacobian:select(1,i):mul(-1)
+ jacobian:select(1,i):add(sin[i])
+ end
+ return jacobian
+end
+
+function nn.SparseJacobian.testJacobian (module, input, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:select(2,2):copy(torch.rand(input:size(1)):mul(inrange):add(minval))
+ local jac_fprop = nn.SparseJacobian.forward(module,input)
+ local jac_bprop = nn.SparseJacobian.backward(module,input)
+ local error = jac_fprop-jac_bprop
+ return error:abs():max()
+end
+
+function nn.SparseJacobian.testJacobianParameters (module, input, param, dparam, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:select(2,2):copy(torch.rand(input:size(1)):mul(inrange):add(minval))
+ param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+ local jac_bprop = nn.SparseJacobian.backward(module, input, param, dparam)
+ local jac_fprop = nn.SparseJacobian.forward(module, input, param)
+ local error = jac_fprop - jac_bprop
+ return error:abs():max()
+end
+
+function nn.SparseJacobian.testJacobianUpdateParameters (module, input, param, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:select(2,2):copy(torch.rand(input:size(1)):mul(inrange):add(minval))
+ param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+ local params_bprop = nn.SparseJacobian.backwardUpdate(module, input, param)
+ local params_fprop = nn.SparseJacobian.forwardUpdate(module, input, param)
+
+ local error = params_fprop - params_bprop
+ return error:abs():max()
+end
+
+function nn.SparseJacobian.testIO(module,input, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+
+ -- run module
+ module:forward(input)
+ local go = module.output:clone():copy(torch.rand(module.output:nElement()):mul(inrange):add(minval))
+ module:zeroGradParameters()
+ module:updateGradInput(input,go)
+ module:accGradParameters(input,go)
+
+ local fo = module.output:clone()
+ local bo = module.gradInput:clone()
+
+ -- write module
+ local f = torch.DiskFile('tmp.bin','w'):binary()
+ f:writeObject(module)
+ f:close()
+ -- read module
+ local m = torch.DiskFile('tmp.bin'):binary():readObject()
+ m:forward(input)
+ m:zeroGradParameters()
+ m:updateGradInput(input,go)
+ m:accGradParameters(input,go)
+ -- cleanup
+ os.remove('tmp.bin')
+
+ local fo2 = m.output:clone()
+ local bo2 = m.gradInput:clone()
+
+ local errf = fo - fo2
+ local errb = bo - bo2
+ return errf:abs():max(), errb:abs():max()
+end
+
+function nn.SparseJacobian.testAllUpdate(module, input, weight, gradWeight)
+ local gradOutput
+ local lr = torch.uniform(0.1, 1)
+ local errors = {}
+
+ -- accGradParameters
+ local maccgp = module:clone()
+ local weightc = maccgp[weight]:clone()
+ maccgp:forward(input)
+ gradOutput = torch.rand(maccgp.output:size())
+ maccgp:zeroGradParameters()
+ maccgp:updateGradInput(input, gradOutput)
+ maccgp:accGradParameters(input, gradOutput)
+ maccgp:updateParameters(lr)
+ errors["accGradParameters"] = (weightc-maccgp[gradWeight]*lr-maccgp[weight]):norm()
+
+ -- accUpdateGradParameters
+ local maccugp = module:clone()
+ maccugp:forward(input)
+ maccugp:updateGradInput(input, gradOutput)
+ maccugp:accUpdateGradParameters(input, gradOutput, lr)
+ errors["accUpdateGradParameters"] = (maccugp[weight]-maccgp[weight]):norm()
+
+ -- shared, accGradParameters
+ local macsh1 = module:clone()
+ local macsh2 = module:clone()
+ macsh2:share(macsh1, weight)
+ macsh1:forward(input)
+ macsh2:forward(input)
+ macsh1:zeroGradParameters()
+ macsh2:zeroGradParameters()
+ macsh1:updateGradInput(input, gradOutput)
+ macsh2:updateGradInput(input, gradOutput)
+ macsh1:accGradParameters(input, gradOutput)
+ macsh2:accGradParameters(input, gradOutput)
+ macsh1:updateParameters(lr)
+ macsh2:updateParameters(lr)
+ local err = (weightc-maccgp[gradWeight]*(lr*2)-macsh1[weight]):norm()
+ err = err + (weightc-maccgp[gradWeight]*(lr*2)-macsh2[weight]):norm()
+ errors["accGradParameters [shared]"] = err
+
+ -- shared, accUpdateGradParameters
+ local macshu1 = module:clone()
+ local macshu2 = module:clone()
+ macshu2:share(macshu1, weight)
+ macshu1:forward(input)
+ macshu2:forward(input)
+ macshu1:updateGradInput(input, gradOutput)
+ macshu2:updateGradInput(input, gradOutput)
+ macshu1:accUpdateGradParameters(input, gradOutput, lr)
+ macshu2:accUpdateGradParameters(input, gradOutput, lr)
+ err = (weightc-maccgp[gradWeight]*(lr*2)-macshu1[weight]):norm()
+ err = err + (weightc-maccgp[gradWeight]*(lr*2)-macshu2[weight]):norm()
+ errors["accUpdateGradParameters [shared]"] = err
+
+ return errors
+end
diff --git a/contrib/lua-torch/nn/SparseLinear.lua b/contrib/lua-torch/nn/SparseLinear.lua
new file mode 100644
index 000000000..9a50c6912
--- /dev/null
+++ b/contrib/lua-torch/nn/SparseLinear.lua
@@ -0,0 +1,242 @@
+local THNN = require 'nn.THNN'
+local SparseLinear, parent = torch.class('nn.SparseLinear', 'nn.Module')
+
+local NO_LAST_INPUT = 0
+local ONE_LAST_INPUT = 1
+local ACC_MULTIPLE_TIMES = 2
+
+function SparseLinear:__init(inputSize, outputSize, doGradInput)
+ parent.__init(self)
+
+ self.weightDecay = 0
+ self.doGradInput = doGradInput or false
+ self.weight = torch.Tensor(outputSize, inputSize):zero()
+ self.bias = torch.Tensor(outputSize):zero()
+ self.gradWeight = torch.Tensor(outputSize, inputSize):zero()
+ self.gradBias = torch.Tensor(outputSize):zero()
+
+ assert(type(self.doGradInput) == type(true))
+
+ self.lastInput = nil
+ self.sparseUpdate = NO_LAST_INPUT
+ self.formatted_input = nil
+
+ -- state
+ self.gradInput = {}
+ self.output:resize(outputSize)
+
+ self:reset()
+end
+
+function SparseLinear:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(2))
+ end
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv):mul(0.000001)
+end
+
+function SparseLinear:reshapeInput(input)
+ if type(input) == 'table' then
+ return input, true, false
+ else
+ if input:dim() == 2 then
+ return {input}, false, false
+ else
+ return input, true, true
+ end
+ end
+end
+
+function SparseLinear:updateOutput(input)
+ if self.sparseUpdate == ONE_LAST_INPUT then
+ self.sparseUpdate = ACC_MULTIPLE_TIMES
+ end
+ local input, batchMode, legacyMode = self:reshapeInput(input)
+ self.legacyMode = legacyMode
+
+ if legacyMode then
+ input.THNN.SparseLinear_legacyUpdateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata()
+ )
+ else
+ local nbatches = #input
+ if nbatches == 0 then
+ self.output:copy(self.bias)
+ return self.output
+ end
+
+ local size = 0
+ local marker = 1
+ self.formatted_input = self.formatted_input or input[1].new()
+
+ for i,v in ipairs(input) do size = size + input[i]:size(1) end
+ self.formatted_input:resize(size, 3)
+ for i,v in ipairs(input) do
+ local buf = self.formatted_input:narrow(1, marker, input[i]:size(1))
+ buf:narrow(2,2,2):copy(input[i])
+ buf:select(2,1):fill(i)
+ marker = marker + input[i]:size(1)
+ end
+
+ self.output:resize(nbatches, self.weight:size(1))
+ input[1].THNN.SparseLinear_updateOutput(
+ self.formatted_input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata()
+ )
+
+ -- fix output size for batchSize = 1
+ if not batchMode then
+ self.output = self.output[1]
+ end
+ end
+
+ return self.output
+end
+
+function SparseLinear:accGradParameters(input, gradOutput, scale)
+ local input, batchMode, legacyMode = self:reshapeInput(input)
+ self.legacyMode = legacyMode
+ self.lastInput = self.lastInput or gradOutput.new()
+ if self.sparseUpdate == NO_LAST_INPUT then
+ local v = self.formatted_input
+ if self.legacyMode then v = input end
+ self.lastInput:resizeAs(v):copy(v)
+ self.sparseUpdate = ONE_LAST_INPUT
+ elseif self.sparseUpdate == ONE_LAST_INPUT then
+ self.sparseUpdate = ACC_MULTIPLE_TIMES
+ end
+
+ if legacyMode then
+ input.THNN.SparseLinear_legacyAccGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.weightDecay or 0,
+ scale or 1
+ )
+ else
+ if not batchMode then
+ gradOutput:resize(1, gradOutput:size(1))
+ end
+
+ local rows = self.formatted_input:select(2, 1)
+ local cols = self.formatted_input:select(2, 2)
+ local sortinds = cols * gradOutput:size(1) + rows
+ local _, inds = sortinds:sort(1, false)
+ local newinput = self.formatted_input:index(1, inds)
+ input[1].THNN.SparseLinear_accGradParameters(
+ newinput:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.weightDecay or 0,
+ scale or 1
+ )
+ end
+end
+
+function SparseLinear:updateGradInput(input, gradOutput)
+ if self.legacyMode then
+ if type(self.gradInput) ~= type(gradOutput) then self.gradInput = gradOutput.new() end
+ self.gradInput:resizeAs(input)
+ else
+ self.gradInput = {}
+ end
+ if self.doGradInput then
+ -- GradInput should be dense anyway
+ local gi
+ local batchMode = true
+ if gradOutput:dim() == 1 then
+ gi = self.weight:t()*gradOutput
+ batchMode = false
+ elseif gradOutput:dim() == 2 then
+ gi = gradOutput*self.weight
+ end
+ local ini = self.weight:size(2)
+
+ if self.legacyMode then
+ local batches = self.gradInput:size(1)
+ self.gradInput:resize(batches, ini, 2)
+ self.gradInput:select(3,1):copy(torch.repeatTensor(torch.range(1, ini), batches, 1))
+ self.gradInput:select(3,2):copy(gi)
+ else
+ local indicies = torch.range(1, ini)
+ if not batchMode then gi:resize(1, ini) end
+ for i = 1,gi:size(1) do
+ self.gradInput[i] = gradOutput.new(ini, 2)
+ self.gradInput[i]:select(2, 2):copy(gi[i])
+ self.gradInput[i]:select(2, 1):range(1, ini)
+ end
+ end
+ end
+ return self.gradInput
+end
+
+-- These functions do sparse updates / zeros. However, if we accumulated
+-- gradients multiple times, we can't depend on the last input to do sparse
+-- updates.
+function SparseLinear:updateParameters(learningRate)
+ if self.lastInput and self.sparseUpdate == ONE_LAST_INPUT then
+ if self.legacyMode then
+ self.lastInput.THNN.SparseLinear_legacyUpdateParameters(
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.lastInput:cdata(),
+ learningRate
+ )
+ else
+ self.lastInput.THNN.SparseLinear_updateParameters(
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.lastInput:cdata(),
+ learningRate
+ )
+ end
+ else
+ parent.updateParameters(self, learningRate)
+ end
+end
+
+function SparseLinear:zeroGradParameters()
+ if self.lastInput and self.sparseUpdate == ONE_LAST_INPUT then
+ if self.legacyMode then
+ self.lastInput.THNN.SparseLinear_legacyZeroGradParameters(
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.lastInput:cdata()
+ )
+ else
+ self.lastInput.THNN.SparseLinear_zeroGradParameters(
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.lastInput:cdata()
+ )
+ end
+ else
+ parent.zeroGradParameters(self)
+ end
+ self.sparseUpdate = NO_LAST_INPUT
+end
+
+function SparseLinear:clearState()
+ if self.lastInput then self.lastInput:set() end
+ input.THNN.SparseLinear_cudaClearState()
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialAdaptiveAveragePooling.lua b/contrib/lua-torch/nn/SpatialAdaptiveAveragePooling.lua
new file mode 100644
index 000000000..2e223580a
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialAdaptiveAveragePooling.lua
@@ -0,0 +1,35 @@
+local SpatialAdaptiveAveragePooling, parent = torch.class('nn.SpatialAdaptiveAveragePooling', 'nn.Module')
+
+function SpatialAdaptiveAveragePooling:__init(W, H)
+ parent.__init(self)
+
+ self.W = W
+ self.H = H
+end
+
+function SpatialAdaptiveAveragePooling:updateOutput(input)
+ input.THNN.SpatialAdaptiveAveragePooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.W, self.H
+ )
+ return self.output
+end
+
+function SpatialAdaptiveAveragePooling:updateGradInput(input, gradOutput)
+ input.THNN.SpatialAdaptiveAveragePooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata()
+ )
+ return self.gradInput
+end
+
+-- for backward compat
+function SpatialAdaptiveAveragePooling:empty()
+ self:clearState()
+end
+
+function SpatialAdaptiveAveragePooling:clearState()
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialAdaptiveMaxPooling.lua b/contrib/lua-torch/nn/SpatialAdaptiveMaxPooling.lua
new file mode 100644
index 000000000..b78261c3d
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialAdaptiveMaxPooling.lua
@@ -0,0 +1,46 @@
+local SpatialAdaptiveMaxPooling, parent = torch.class('nn.SpatialAdaptiveMaxPooling', 'nn.Module')
+
+function SpatialAdaptiveMaxPooling:__init(W, H)
+ parent.__init(self)
+
+ self.W = W
+ self.H = H
+end
+
+function SpatialAdaptiveMaxPooling:updateOutput(input)
+ self.indices = self.indices or torch.LongTensor()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices
+ else
+ self.indices = self.indices:long()
+ end
+ input.THNN.SpatialAdaptiveMaxPooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.indices:cdata(),
+ self.W, self.H
+ )
+ return self.output
+end
+
+function SpatialAdaptiveMaxPooling:updateGradInput(input, gradOutput)
+ input.THNN.SpatialAdaptiveMaxPooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.indices:cdata()
+ )
+ return self.gradInput
+end
+
+-- for backward compat
+function SpatialAdaptiveMaxPooling:empty()
+ self:clearState()
+end
+
+function SpatialAdaptiveMaxPooling:clearState()
+ if self.indices then
+ self.indices:set()
+ end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialAutoCropMSECriterion.lua b/contrib/lua-torch/nn/SpatialAutoCropMSECriterion.lua
new file mode 100644
index 000000000..97206a062
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialAutoCropMSECriterion.lua
@@ -0,0 +1,74 @@
+--[[
+ SpatialAutoCropMSECriterion.
+ Implements the MSECriterion when the spatial resolution of the input is less than
+ or equal to the spatial resolution of the target. It achieves this center-cropping
+ the target to the same spatial resolution of the input and the MSE is then
+ calculated between these cropped inputs
+]]
+local SpatialAutoCropMSECriterion, parent = torch.class('nn.SpatialAutoCropMSECriterion', 'nn.MSECriterion')
+
+function SpatialAutoCropMSECriterion:__init(sizeAverage)
+ parent.__init(self, sizeAverage)
+end
+
+local function centerCrop(input, cropSize)
+ assert(input:dim() == 3 or input:dim() == 4, "input should be a 3D or 4D tensor")
+ assert(#cropSize == 2, "cropSize should have two elements only")
+ local _input = input
+ if input:dim() == 3 then
+ _input = input:view(1, input:size(1), input:size(2), input:size(3))
+ end
+ assert(cropSize[1] > 0 and cropSize[1] <= _input:size(3),
+ "0 < cropSize[1] <= input:size(3) not satisfied")
+ assert(cropSize[2] > 0 and cropSize[2] <= _input:size(4),
+ "0 < cropSize[1] <= input:size(3) not satisfied")
+
+ local inputHeight = _input:size(3)
+ local inputWidth = _input:size(4)
+
+ local rowStart = 1 + math.floor((inputHeight - cropSize[1])/2.0)
+ local rowEnd = rowStart + cropSize[1] - 1
+ local colStart = 1 + math.floor((inputWidth - cropSize[2])/2.0)
+ local colEnd = colStart + cropSize[2] - 1
+ if input:dim() == 3 then
+ return input[{{}, {rowStart, rowEnd}, {colStart, colEnd}}]
+ else
+ return input[{{}, {}, {rowStart, rowEnd}, {colStart, colEnd}}]
+ end
+end
+
+local function getTensorHeightAndWidth(tensor)
+ local heightIdx = 2
+ local widthIdx = 3
+ if tensor:dim() == 4 then
+ heightIdx = 3
+ widthIdx = 4
+ end
+ return tensor:size(heightIdx), tensor:size(widthIdx)
+end
+
+local function inputResolutionIsSmallerThanTargetResolution(input, target)
+ local inputHeight, inputWidth = getTensorHeightAndWidth(input)
+ local targetHeight, targetWidth = getTensorHeightAndWidth(target)
+ return inputHeight <= targetHeight and inputWidth <= targetWidth
+end
+
+function SpatialAutoCropMSECriterion:updateOutput(input, target)
+ assert(input:dim() == target:dim(), "input and target should have the same number of dimensions")
+ assert(input:dim() == 4 or input:dim() == 3, "input and target must have 3 or 4 dimensions")
+ assert(inputResolutionIsSmallerThanTargetResolution(input, target),
+ "Spatial resolution of input should be less than or equal to the spatial resolution of the target")
+
+ local inputHeight, inputWidth = getTensorHeightAndWidth(input)
+ local targetCropped = centerCrop(target, {inputHeight, inputWidth})
+ return parent.updateOutput(self, input, targetCropped)
+end
+
+
+function SpatialAutoCropMSECriterion:updateGradInput(input, gradOutput)
+ assert(input:dim() == gradOutput:dim(), "input and gradOutput should have the same number of dimensions")
+ assert(input:dim() == 4 or input:dim() == 3, "input and gradOutput must have 3 or 4 dimensions")
+ assert(input:isSameSizeAs(gradOutput), "gradOutput and input must have the same size")
+
+ return parent.updateGradInput(self, input, gradOutput)
+end
diff --git a/contrib/lua-torch/nn/SpatialAveragePooling.lua b/contrib/lua-torch/nn/SpatialAveragePooling.lua
new file mode 100644
index 000000000..1e7666827
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialAveragePooling.lua
@@ -0,0 +1,93 @@
+local SpatialAveragePooling, parent = torch.class('nn.SpatialAveragePooling', 'nn.Module')
+
+function SpatialAveragePooling:__init(kW, kH, dW, dH, padW, padH)
+ parent.__init(self)
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW or 1
+ self.dH = dH or 1
+ self.padW = padW or 0
+ self.padH = padH or 0
+ self.ceil_mode = false
+ self.count_include_pad = true
+ self.divide = true
+end
+
+function SpatialAveragePooling:ceil()
+ self.ceil_mode = true
+ return self
+end
+
+function SpatialAveragePooling:floor()
+ self.ceil_mode = false
+ return self
+end
+
+function SpatialAveragePooling:setCountIncludePad()
+ self.count_include_pad = true
+ return self
+end
+
+function SpatialAveragePooling:setCountExcludePad()
+ self.count_include_pad = false
+ return self
+end
+
+local function backwardCompatible(self)
+ if self.ceil_mode == nil then
+ self.ceil_mode = false
+ self.count_include_pad = true
+ self.padH = 0
+ self.padW = 0
+ end
+end
+
+function SpatialAveragePooling:updateOutput(input)
+ backwardCompatible(self)
+ input.THNN.SpatialAveragePooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.ceil_mode,
+ self.count_include_pad
+ )
+ -- for backward compatibility with saved models
+ -- which are not supposed to have "divide" field
+ if not self.divide then
+ self.output:mul(self.kW*self.kH)
+ end
+ return self.output
+end
+
+function SpatialAveragePooling:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ input.THNN.SpatialAveragePooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.ceil_mode,
+ self.count_include_pad
+ )
+ -- for backward compatibility
+ if not self.divide then
+ self.gradInput:mul(self.kW*self.kH)
+ end
+ return self.gradInput
+ end
+end
+
+function SpatialAveragePooling:__tostring__()
+ local s = string.format('%s(%dx%d, %d,%d', torch.type(self),
+ self.kW, self.kH, self.dW, self.dH)
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ','.. self.padH
+ end
+ s = s .. ')'
+ return s
+end
diff --git a/contrib/lua-torch/nn/SpatialBatchNormalization.lua b/contrib/lua-torch/nn/SpatialBatchNormalization.lua
new file mode 100644
index 000000000..c5004ce3a
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialBatchNormalization.lua
@@ -0,0 +1,35 @@
+--[[
+ This file implements Batch Normalization as described in the paper:
+ "Batch Normalization: Accelerating Deep Network Training
+ by Reducing Internal Covariate Shift"
+ by Sergey Ioffe, Christian Szegedy
+
+ This implementation is useful for inputs coming from convolution layers.
+ For non-convolutional layers, see BatchNormalization.lua
+
+ The operation implemented is:
+ y = ( x - mean(x) )
+ -------------------- * gamma + beta
+ standard-deviation(x)
+ where gamma and beta are learnable parameters.
+
+ The learning of gamma and beta is optional.
+
+ Usage:
+ with learnable parameters: nn.SpatialBatchNormalization(N [,eps] [,momentum])
+ where N = dimensionality of input
+ without learnable parameters: nn.SpatialBatchNormalization(N [,eps] [,momentum], false)
+
+ eps is a small value added to the variance to avoid divide-by-zero.
+ Defaults to 1e-5
+
+ In training time, this layer keeps a running estimate of it's computed mean and std.
+ The running sum is kept with a default momentum of 0.1 (unless over-ridden)
+ In test time, this running mean/std is used to normalize.
+]]--
+local BN, parent = torch.class('nn.SpatialBatchNormalization', 'nn.BatchNormalization')
+
+BN.__version = 2
+
+-- expected dimension of input
+BN.nDim = 4
diff --git a/contrib/lua-torch/nn/SpatialClassNLLCriterion.lua b/contrib/lua-torch/nn/SpatialClassNLLCriterion.lua
new file mode 100644
index 000000000..fbd367410
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialClassNLLCriterion.lua
@@ -0,0 +1,81 @@
+local THNN = require 'nn.THNN'
+local SpatialClassNLLCriterion, parent = torch.class('nn.SpatialClassNLLCriterion', 'nn.Criterion')
+
+function SpatialClassNLLCriterion:__init(weights, sizeAverage)
+ parent.__init(self)
+ if sizeAverage ~= nil then
+ self.sizeAverage = sizeAverage
+ else
+ self.sizeAverage = true
+ end
+ if weights then
+ assert(weights:dim() == 1, "weights input should be 1-D Tensor")
+ self.weights = weights
+ end
+
+ self.output_tensor = torch.zeros(1)
+ self.total_weight_tensor = torch.ones(1)
+ self.target = torch.zeros(1):long()
+end
+
+function SpatialClassNLLCriterion:__len()
+ if (self.weights) then
+ return #self.weights
+ else
+ return 0
+ end
+end
+
+function SpatialClassNLLCriterion:updateOutput(input, target)
+ if type(target) == 'number' then
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda()
+ else
+ self.target = self.target:long()
+ end
+ self.target[1] = target
+ elseif torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.target = torch.CudaLongTensor and target:cudaLong() or target
+ else
+ self.target = target:long()
+ end
+
+ input.THNN.SpatialClassNLLCriterion_updateOutput(
+ input:cdata(),
+ self.target:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage,
+ THNN.optionalTensor(self.weights),
+ self.total_weight_tensor:cdata()
+ )
+ self.output = self.output_tensor[1]
+ return self.output, self.total_weight_tensor[1]
+end
+
+function SpatialClassNLLCriterion:updateGradInput(input, target)
+ if type(target) == 'number' then
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda()
+ else
+ self.target = self.target:long()
+ end
+ self.target[1] = target
+ elseif torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.target = torch.CudaLongTensor and target:cudaLong() or target
+ else
+ self.target = target:long()
+ end
+
+ self.gradInput:resizeAs(input):zero()
+
+ input.THNN.SpatialClassNLLCriterion_updateGradInput(
+ input:cdata(),
+ self.target:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage,
+ THNN.optionalTensor(self.weights),
+ self.total_weight_tensor:cdata()
+ )
+
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SpatialContrastiveNormalization.lua b/contrib/lua-torch/nn/SpatialContrastiveNormalization.lua
new file mode 100644
index 000000000..0ad251ae4
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialContrastiveNormalization.lua
@@ -0,0 +1,36 @@
+local SpatialContrastiveNormalization, parent = torch.class('nn.SpatialContrastiveNormalization','nn.Module')
+
+function SpatialContrastiveNormalization:__init(nInputPlane, kernel, threshold, thresval)
+ parent.__init(self)
+
+ -- get args
+ self.nInputPlane = nInputPlane or 1
+ self.kernel = kernel or torch.Tensor(9,9):fill(1)
+ self.threshold = threshold or 1e-4
+ self.thresval = thresval or threshold or 1e-4
+ local kdim = self.kernel:nDimension()
+
+ -- check args
+ if kdim ~= 2 and kdim ~= 1 then
+ error('<SpatialContrastiveNormalization> averaging kernel must be 2D or 1D')
+ end
+ if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then
+ error('<SpatialContrastiveNormalization> averaging kernel must have ODD dimensions')
+ end
+
+ -- instantiate sub+div normalization
+ self.normalizer = nn.Sequential()
+ self.normalizer:add(nn.SpatialSubtractiveNormalization(self.nInputPlane, self.kernel))
+ self.normalizer:add(nn.SpatialDivisiveNormalization(self.nInputPlane, self.kernel,
+ self.threshold, self.thresval))
+end
+
+function SpatialContrastiveNormalization:updateOutput(input)
+ self.output = self.normalizer:forward(input)
+ return self.output
+end
+
+function SpatialContrastiveNormalization:updateGradInput(input, gradOutput)
+ self.gradInput = self.normalizer:backward(input, gradOutput)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SpatialConvolution.lua b/contrib/lua-torch/nn/SpatialConvolution.lua
new file mode 100644
index 000000000..15a2b4b62
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialConvolution.lua
@@ -0,0 +1,155 @@
+local THNN = require 'nn.THNN'
+local SpatialConvolution, parent = torch.class('nn.SpatialConvolution', 'nn.Module')
+
+function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kW = kW
+ self.kH = kH
+
+ self.dW = dW
+ self.dH = dH
+ self.padW = padW or 0
+ self.padH = padH or self.padW
+
+ self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
+ self.bias = torch.Tensor(nOutputPlane)
+ self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
+ self.gradBias = torch.Tensor(nOutputPlane)
+
+ self:reset()
+end
+
+function SpatialConvolution:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function SpatialConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
+ end
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ if self.bias then
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ end
+ else
+ self.weight:uniform(-stdv, stdv)
+ if self.bias then
+ self.bias:uniform(-stdv, stdv)
+ end
+ end
+end
+
+local function backCompatibility(self)
+ self.finput = self.finput or self.weight.new()
+ self.fgradInput = self.fgradInput or self.weight.new()
+ if self.padding then
+ self.padW = self.padding
+ self.padH = self.padding
+ self.padding = nil
+ else
+ self.padW = self.padW or 0
+ self.padH = self.padH or 0
+ end
+ if self.weight:dim() == 2 then
+ self.weight = self.weight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
+ end
+ if self.gradWeight and self.gradWeight:dim() == 2 then
+ self.gradWeight = self.gradWeight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
+ end
+end
+
+function SpatialConvolution:updateOutput(input)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ backCompatibility(self)
+ input.THNN.SpatialConvolutionMM_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH
+ )
+ return self.output
+end
+
+function SpatialConvolution:updateGradInput(input, gradOutput)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ if self.gradInput then
+ backCompatibility(self)
+ input.THNN.SpatialConvolutionMM_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH
+ )
+ return self.gradInput
+ end
+end
+
+function SpatialConvolution:accGradParameters(input, gradOutput, scale)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ scale = scale or 1
+ backCompatibility(self)
+ input.THNN.SpatialConvolutionMM_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ scale
+ )
+end
+
+function SpatialConvolution:type(type,tensorCache)
+ self.finput = self.finput and torch.Tensor()
+ self.fgradInput = self.fgradInput and torch.Tensor()
+ return parent.type(self,type,tensorCache)
+end
+
+function SpatialConvolution:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kW, self.kH)
+ if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d', self.dW, self.dH)
+ end
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ',' .. self.padH
+ end
+ if self.bias then
+ return s .. ')'
+ else
+ return s .. ') without bias'
+ end
+end
+
+function SpatialConvolution:clearState()
+ nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialConvolutionLocal.lua b/contrib/lua-torch/nn/SpatialConvolutionLocal.lua
new file mode 100644
index 000000000..9494c2ffe
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialConvolutionLocal.lua
@@ -0,0 +1,188 @@
+local SpatialConvolutionLocal, parent = torch.class('nn.SpatialConvolutionLocal', 'nn.Module')
+
+function SpatialConvolutionLocal:__init(nInputPlane, nOutputPlane, iW, iH ,kW, kH, dW, dH, padW, padH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kW = kW
+ self.kH = kH
+ self.iW = iW
+ self.iH = iH
+
+ self.dW = dW
+ self.dH = dH
+ self.padW = padW or 0
+ self.padH = padH or self.padW
+ self.oW = math.floor((self.padW * 2 + iW - self.kW) / self.dW) + 1
+ self.oH = math.floor((self.padH * 2 + iH - self.kH) / self.dH) + 1
+ assert(1 <= self.oW and 1 <= self.oH, 'illegal configuration: output width or height less than 1')
+
+ self.weight = torch.Tensor(self.oH, self.oW, nOutputPlane, nInputPlane, kH, kW)
+ self.bias = torch.Tensor(nOutputPlane, self.oH, self.oW)
+ self.gradWeight = torch.Tensor():resizeAs(self.weight)
+ self.gradBias = torch.Tensor():resizeAs(self.bias)
+
+ self:reset()
+end
+
+function SpatialConvolutionLocal:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
+ end
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+ end
+end
+
+local function viewWeight(self)
+ self.weight = self.weight:view(self.oH * self.oW, self.nOutputPlane, self.nInputPlane * self.kH * self.kW)
+ if self.gradWeight and self.gradWeight:dim() > 0 then
+ self.gradWeight = self.gradWeight:view(self.oH * self.oW, self.nOutputPlane, self.nInputPlane * self.kH * self.kW)
+ end
+end
+
+local function unviewWeight(self)
+ self.weight = self.weight:view(self.oH, self.oW, self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
+ if self.gradWeight and self.gradWeight:dim() > 0 then
+ self.gradWeight = self.gradWeight:view(self.oH, self.oW, self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
+ end
+end
+
+local function checkInputSize(self, input)
+ if input:nDimension() == 3 then
+ if input:size(1) ~= self.nInputPlane or input:size(2) ~= self.iH or input:size(3) ~= self.iW then
+ error(string.format('Given input size: (%dx%dx%d) inconsistent with expected input size: (%dx%dx%d).',
+ input:size(1), input:size(2), input:size(3), self.nInputPlane, self.iH, self.iW))
+ end
+ elseif input:nDimension() == 4 then
+ if input:size(2) ~= self.nInputPlane or input:size(3) ~= self.iH or input:size(4) ~= self.iW then
+ error(string.format('Given input size: (%dx%dx%dx%d) inconsistent with expected input size: (batchsize x%dx%dx%d).',
+ input:size(1), input:size(2), input:size(3), input:size(4), self.nInputPlane, self.iH, self.iW))
+ end
+ else
+ error('3D or 4D(batch mode) tensor expected')
+ end
+end
+
+local function checkOutputSize(self, input, output)
+ if output:nDimension() ~= input:nDimension() then
+ error('inconsistent dimension between output and input.')
+ end
+ if output:nDimension() == 3 then
+ if output:size(1) ~= self.nOutputPlane or output:size(2) ~= self.oH or output:size(3) ~= self.oW then
+ error(string.format('Given output size: (%dx%dx%d) inconsistent with expected output size: (%dx%dx%d).',
+ output:size(1), output:size(2), output:size(3), self.nOutputPlane, self.oH, self.oW))
+ end
+ elseif output:nDimension() == 4 then
+ if output:size(2) ~= self.nOutputPlane or output:size(3) ~= self.oH or output:size(4) ~= self.oW then
+ error(string.format('Given output size: (%dx%dx%dx%d) inconsistent with expected output size: (batchsize x%dx%dx%d).',
+ output:size(1), output:size(2), output:size(3), output:size(4), self.nOutputPlane, self.oH, self.oW))
+ end
+ else
+ error('3D or 4D(batch mode) tensor expected')
+ end
+end
+
+function SpatialConvolutionLocal:updateOutput(input)
+ self.finput = self.finput or input.new()
+ self.fgradInput = self.fgradInput or input.new()
+ checkInputSize(self, input)
+ viewWeight(self)
+ input.THNN.SpatialConvolutionLocal_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.iW, self.iH,
+ self.oW, self.oH
+ )
+ unviewWeight(self)
+ return self.output
+end
+
+function SpatialConvolutionLocal:updateGradInput(input, gradOutput)
+ checkInputSize(self, input)
+ checkOutputSize(self, input, gradOutput)
+ if self.gradInput then
+ viewWeight(self)
+ input.THNN.SpatialConvolutionLocal_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.iW, self.iH,
+ self.oW, self.oH
+ )
+ unviewWeight(self)
+ return self.gradInput
+ end
+end
+
+function SpatialConvolutionLocal:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ checkInputSize(self, input)
+ checkOutputSize(self, input, gradOutput)
+ viewWeight(self)
+ input.THNN.SpatialConvolutionLocal_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.iW, self.iH,
+ self.oW, self.oH,
+ scale
+ )
+ unviewWeight(self)
+end
+
+function SpatialConvolutionLocal:type(type,tensorCache)
+ self.finput = self.finput and torch.Tensor()
+ self.fgradInput = self.fgradInput and torch.Tensor()
+ return parent.type(self,type,tensorCache)
+end
+
+function SpatialConvolutionLocal:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%d, %dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.iW, self.iH, self.kW, self.kH)
+ if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d', self.dW, self.dH)
+ end
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ',' .. self.padH
+ end
+ return s .. ')'
+end
+
+function SpatialConvolutionLocal:clearState()
+ nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialConvolutionMM.lua b/contrib/lua-torch/nn/SpatialConvolutionMM.lua
new file mode 100644
index 000000000..f20734f9b
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialConvolutionMM.lua
@@ -0,0 +1,139 @@
+local THNN = require 'nn.THNN'
+local SpatialConvolutionMM, parent = torch.class('nn.SpatialConvolutionMM', 'nn.Module')
+
+function SpatialConvolutionMM:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kW = kW
+ self.kH = kH
+
+ self.dW = dW
+ self.dH = dH
+ self.padW = padW or 0
+ self.padH = padH or self.padW
+
+ self.weight = torch.Tensor(nOutputPlane, nInputPlane*kH*kW)
+ self.bias = torch.Tensor(nOutputPlane)
+ self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane*kH*kW)
+ self.gradBias = torch.Tensor(nOutputPlane)
+
+ self:reset()
+end
+
+function SpatialConvolutionMM:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function SpatialConvolutionMM:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
+ end
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+ end
+end
+
+function SpatialConvolutionMM:updateOutput(input)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ self.finput = self.finput or input.new()
+ self.fgradInput = self.fgradInput or input.new()
+ -- backward compatibility
+ if self.padding then
+ self.padW = self.padding
+ self.padH = self.padding
+ self.padding = nil
+ end
+ input.THNN.SpatialConvolutionMM_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH
+ )
+ return self.output
+end
+
+function SpatialConvolutionMM:updateGradInput(input, gradOutput)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ if self.gradInput then
+ input.THNN.SpatialConvolutionMM_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH
+ )
+ return self.gradInput
+ end
+end
+
+function SpatialConvolutionMM:accGradParameters(input, gradOutput, scale)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ scale = scale or 1
+ assert((self.bias and self.gradBias) or (self.bias == nil and self.gradBias == nil))
+ input.THNN.SpatialConvolutionMM_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ scale
+ )
+end
+
+function SpatialConvolutionMM:type(type,tensorCache)
+ self.finput = self.finput and torch.Tensor()
+ self.fgradInput = self.fgradInput and torch.Tensor()
+ return parent.type(self,type,tensorCache)
+end
+
+function SpatialConvolutionMM:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kW, self.kH)
+ if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d', self.dW, self.dH)
+ end
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ',' .. self.padH
+ end
+ if self.bias then
+ return s .. ')'
+ else
+ return s .. ') without bias'
+ end
+end
+
+function SpatialConvolutionMM:clearState()
+ nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput')
+ return parent.clearState(self)
+end
+
diff --git a/contrib/lua-torch/nn/SpatialConvolutionMap.lua b/contrib/lua-torch/nn/SpatialConvolutionMap.lua
new file mode 100644
index 000000000..9051c119e
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialConvolutionMap.lua
@@ -0,0 +1,154 @@
+local SpatialConvolutionMap, parent = torch.class('nn.SpatialConvolutionMap', 'nn.Module')
+
+nn.tables = nn.tables or {}
+
+function nn.tables.full(nin, nout)
+ local ft = torch.Tensor(nin*nout,2)
+ local p = 1
+ for j=1,nout do
+ for i=1,nin do
+ ft[p][1] = i
+ ft[p][2] = j
+ p = p + 1
+ end
+ end
+ return ft
+end
+
+function nn.tables.oneToOne(nfeat)
+ local ft = torch.Tensor(nfeat,2)
+ for i=1,nfeat do
+ ft[i][1] = i
+ ft[i][2] = i
+ end
+ return ft
+end
+
+function nn.tables.random(nin, nout, nto)
+ local nker = nto * nout
+ local tbl = torch.Tensor(nker, 2)
+ local fi = torch.randperm(nin)
+ local frcntr = 1
+ local nfi = math.floor(nin/nto) -- number of distinct nto chunks
+ local totbl = tbl:select(2,2)
+ local frtbl = tbl:select(2,1)
+ local fitbl = fi:narrow(1, 1, (nfi * nto)) -- part of fi that covers distinct chunks
+ local ufrtbl= frtbl:unfold(1, nto, nto)
+ local utotbl= totbl:unfold(1, nto, nto)
+ local ufitbl= fitbl:unfold(1, nto, nto)
+
+ -- start filling frtbl
+ for i=1,nout do -- fro each unit in target map
+ ufrtbl:select(1,i):copy(ufitbl:select(1,frcntr))
+ frcntr = frcntr + 1
+ if frcntr-1 == nfi then -- reset fi
+ fi:copy(torch.randperm(nin))
+ frcntr = 1
+ end
+ end
+ for tocntr=1,utotbl:size(1) do
+ utotbl:select(1,tocntr):fill(tocntr)
+ end
+ return tbl
+end
+
+function SpatialConvolutionMap:__init(conMatrix, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+ self.connTable = conMatrix
+ self.nInputPlane = self.connTable:select(2,1):max()
+ self.nOutputPlane = self.connTable:select(2,2):max()
+ self.weight = torch.Tensor(self.connTable:size(1), kH, kW)
+ self.bias = torch.Tensor(self.nOutputPlane)
+ self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW)
+ self.gradBias = torch.Tensor(self.nOutputPlane)
+
+ self:reset()
+end
+
+function SpatialConvolutionMap:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+ end
+ else
+ local ninp = torch.Tensor(self.nOutputPlane):zero()
+ for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] = ninp[self.connTable[i][2]]+1 end
+ for k=1,self.connTable:size(1) do
+ stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]])
+ if nn.oldSeed then
+ self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end)
+ else
+ self.weight:select(1,k):uniform(-stdv,stdv)
+ end
+ end
+ for k=1,self.bias:size(1) do
+ stdv = 1/math.sqrt(self.kW*self.kH*ninp[k])
+ self.bias[k] = torch.uniform(-stdv,stdv)
+ end
+ end
+end
+
+function SpatialConvolutionMap:updateOutput(input)
+ input.THNN.SpatialConvolutionMap_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.connTable:cdata(),
+ self.nInputPlane,
+ self.nOutputPlane,
+ self.dW, self.dH
+ )
+ return self.output
+end
+
+function SpatialConvolutionMap:updateGradInput(input, gradOutput)
+ input.THNN.SpatialConvolutionMap_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.connTable:cdata(),
+ self.nInputPlane,
+ self.nOutputPlane,
+ self.dW, self.dH
+ )
+ return self.gradInput
+end
+
+function SpatialConvolutionMap:accGradParameters(input, gradOutput, scale)
+ input.THNN.SpatialConvolutionMap_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.connTable:cdata(),
+ self.nInputPlane,
+ self.nOutputPlane,
+ self.dW, self.dH,
+ scale or 1
+ )
+end
+
+function SpatialConvolutionMap:decayParameters(decay)
+ self.weight:add(-decay, self.weight)
+ self.bias:add(-decay, self.bias)
+end
diff --git a/contrib/lua-torch/nn/SpatialCrossMapLRN.lua b/contrib/lua-torch/nn/SpatialCrossMapLRN.lua
new file mode 100644
index 000000000..088eb07f0
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialCrossMapLRN.lua
@@ -0,0 +1,153 @@
+local SpatialCrossMapLRN, parent = torch.class('nn.SpatialCrossMapLRN', 'nn.Module')
+
+function SpatialCrossMapLRN:__init(size, alpha, beta, k)
+ parent.__init(self)
+
+ self.size = size
+ self.alpha = alpha or 0.0001
+ self.beta = beta or 0.75
+ self.k = k or 1
+end
+
+function SpatialCrossMapLRN:updateOutput(input)
+ assert(input:dim() == 3 or input:dim() == 4,
+ 'Input must be 3D or 4D')
+
+ self.scale = self.scale or input.new()
+
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ input.THNN.SpatialCrossMapLRN_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.scale:cdata(),
+ self.size,
+ self.alpha,
+ self.beta,
+ self.k
+ )
+ else
+ local isBatch = true
+ if input:dim() == 3 then
+ input = nn.utils.addSingletonDimension(input)
+ isBatch = false
+ end
+
+ local batchSize = input:size(1)
+ local channels = input:size(2)
+ local inputHeight = input:size(3)
+ local inputWidth = input:size(4)
+
+ self.output:resizeAs(input)
+ self.scale:resizeAs(input)
+
+ -- use output storage as temporary buffer
+ local inputSquare = self.output
+ inputSquare:pow(input, 2)
+
+ local prePad = (self.size - 1)/2 + 1
+ local prePadCrop = prePad > channels and channels or prePad
+
+ local scaleFirst = self.scale:select(2,1)
+ scaleFirst:zero()
+ -- compute first feature map normalization
+ for c = 1, prePadCrop do
+ scaleFirst:add(inputSquare:select(2, c))
+ end
+
+ -- reuse computations for next feature maps normalization
+ -- by adding the next feature map and removing the previous
+ for c = 2, channels do
+ local scalePrevious = self.scale:select(2, c -1)
+ local scaleCurrent = self.scale:select(2, c)
+ scaleCurrent:copy(scalePrevious)
+ if c < channels - prePad + 2 then
+ local squareNext = inputSquare:select(2, c + prePad - 1)
+ scaleCurrent:add(1, squareNext)
+ end
+ if c > prePad then
+ local squarePrevious = inputSquare:select(2, c - prePad )
+ scaleCurrent:add(-1, squarePrevious)
+ end
+ end
+
+ self.scale:mul(self.alpha/self.size):add(self.k)
+
+ self.output:pow(self.scale,-self.beta)
+ self.output:cmul(input)
+
+ if not isBatch then
+ self.output = self.output[1]
+ end
+ end
+
+ return self.output
+end
+
+function SpatialCrossMapLRN:updateGradInput(input, gradOutput)
+ assert(input:dim() == 3 or input:dim() == 4,
+ 'Input must be 3D or 4D')
+
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ input.THNN.SpatialCrossMapLRN_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.scale:cdata(),
+ self.output:cdata(),
+ self.size,
+ self.alpha,
+ self.beta,
+ self.k
+ )
+ else
+ local isBatch = true
+ if input:dim() == 3 then
+ input = nn.utils.addSingletonDimension(input)
+ gradOutput = nn.utils.addSingletonDimension(gradOutput)
+ self.output = nn.utils.addSingletonDimension(self.output)
+ isBatch = false
+ end
+
+ local batchSize = input:size(1)
+ local channels = input:size(2)
+ local inputHeight = input:size(3)
+ local inputWidth = input:size(4)
+
+ self.paddedRatio = self.paddedRatio or input.new()
+ self.accumRatio = self.accumRatio or input.new()
+ self.paddedRatio:resize(channels + self.size - 1, inputHeight, inputWidth)
+ self.accumRatio:resize(inputHeight,inputWidth)
+
+ local cacheRatioValue = 2*self.alpha*self.beta/self.size
+ local inversePrePad = self.size - (self.size - 1) / 2
+
+ self.gradInput:resizeAs(input)
+ self.gradInput:pow(self.scale,-self.beta):cmul(gradOutput)
+
+ self.paddedRatio:zero()
+ local paddedRatioCenter = self.paddedRatio:narrow(1, inversePrePad, channels)
+ for n = 1, batchSize do
+ paddedRatioCenter:cmul(gradOutput[n],self.output[n])
+ paddedRatioCenter:cdiv(self.scale[n])
+ self.accumRatio:sum(self.paddedRatio:narrow(1,1,self.size-1), 1)
+ for c = 1, channels do
+ self.accumRatio:add(self.paddedRatio[c+self.size-1])
+ self.gradInput[n][c]:addcmul(-cacheRatioValue, input[n][c], self.accumRatio)
+ self.accumRatio:add(-1, self.paddedRatio[c])
+ end
+ end
+
+ if not isBatch then
+ self.gradInput = self.gradInput[1]
+ self.output = self.output[1]
+ end
+ end
+
+ return self.gradInput
+end
+
+
+function SpatialCrossMapLRN:clearState()
+ nn.utils.clear(self, 'scale', 'paddedRatio', 'accumRatio')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialDepthWiseConvolution.lua b/contrib/lua-torch/nn/SpatialDepthWiseConvolution.lua
new file mode 100644
index 000000000..1132f04cb
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialDepthWiseConvolution.lua
@@ -0,0 +1,139 @@
+local THNN = require 'nn.THNN'
+local SpatialDepthWiseConvolution, parent = torch.class('nn.SpatialDepthWiseConvolution', 'nn.Module')
+
+function SpatialDepthWiseConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kW = kW
+ self.kH = kH
+
+ self.dW = dW
+ self.dH = dH
+ self.padW = padW or 0
+ self.padH = padH or self.padW
+
+ self.weight = torch.Tensor(nOutputPlane, nInputPlane*kH*kW)
+ self.bias = torch.Tensor(nOutputPlane, nInputPlane)
+ self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane*kH*kW)
+ self.gradBias = torch.Tensor(nOutputPlane, nInputPlane)
+
+ self:reset()
+end
+
+function SpatialDepthWiseConvolution:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function SpatialDepthWiseConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
+ end
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+ end
+end
+
+function SpatialDepthWiseConvolution:updateOutput(input)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ self.finput = self.finput or input.new()
+ self.fgradInput = self.fgradInput or input.new()
+ -- backward compatibility
+ if self.padding then
+ self.padW = self.padding
+ self.padH = self.padding
+ self.padding = nil
+ end
+ input.THNN.SpatialDepthWiseConvolution_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH
+ )
+ return self.output
+end
+
+function SpatialDepthWiseConvolution:updateGradInput(input, gradOutput)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ if self.gradInput then
+ input.THNN.SpatialDepthWiseConvolution_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH
+ )
+ return self.gradInput
+ end
+end
+
+function SpatialDepthWiseConvolution:accGradParameters(input, gradOutput, scale)
+ assert(input.THNN, torch.type(input)..'.THNN backend not imported')
+ scale = scale or 1
+ assert((self.bias and self.gradBias) or (self.bias == nil and self.gradBias == nil))
+ input.THNN.SpatialDepthWiseConvolution_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ scale
+ )
+end
+
+function SpatialDepthWiseConvolution:type(type,tensorCache)
+ self.finput = self.finput and torch.Tensor()
+ self.fgradInput = self.fgradInput and torch.Tensor()
+ return parent.type(self,type,tensorCache)
+end
+
+function SpatialDepthWiseConvolution:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kW, self.kH)
+ if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d', self.dW, self.dH)
+ end
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ',' .. self.padH
+ end
+ if self.bias then
+ return s .. ')'
+ else
+ return s .. ') without bias'
+ end
+end
+
+function SpatialDepthWiseConvolution:clearState()
+ nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput')
+ return parent.clearState(self)
+end
+
diff --git a/contrib/lua-torch/nn/SpatialDilatedConvolution.lua b/contrib/lua-torch/nn/SpatialDilatedConvolution.lua
new file mode 100644
index 000000000..a0590c7e9
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialDilatedConvolution.lua
@@ -0,0 +1,80 @@
+local THNN = require 'nn.THNN'
+local SpatialDilatedConvolution, parent = torch.class('nn.SpatialDilatedConvolution', 'nn.SpatialConvolution')
+
+function SpatialDilatedConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH, dilationW, dilationH)
+ parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
+
+ self.dilationW = dilationW or 1
+ self.dilationH = dilationH or 1
+end
+
+function SpatialDilatedConvolution:updateOutput(input)
+ self.finput = self.finput or self.weight.new()
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input.THNN.SpatialDilatedConvolution_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.dilationW, self.dilationH
+ )
+ return self.output
+end
+
+function SpatialDilatedConvolution:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input.THNN.SpatialDilatedConvolution_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.dilationW, self.dilationH
+ )
+ return self.gradInput
+ end
+end
+
+function SpatialDilatedConvolution:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input.THNN.SpatialDilatedConvolution_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.dilationW, self.dilationH,
+ scale
+ )
+end
+
+function SpatialDilatedConvolution:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kW, self.kH)
+ if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d', self.dW, self.dH)
+ end
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ',' .. self.padH
+ end
+ s = s .. ', ' .. self.dilationW .. ',' .. self.dilationH
+ if self.bias then
+ return s .. ')'
+ else
+ return s .. ') without bias'
+ end
+end
diff --git a/contrib/lua-torch/nn/SpatialDilatedMaxPooling.lua b/contrib/lua-torch/nn/SpatialDilatedMaxPooling.lua
new file mode 100644
index 000000000..34525a4ad
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialDilatedMaxPooling.lua
@@ -0,0 +1,67 @@
+local THNN = require 'nn.THNN'
+local SpatialDilatedMaxPooling, parent = torch.class('nn.SpatialDilatedMaxPooling', 'nn.SpatialMaxPooling')
+
+function SpatialDilatedMaxPooling:__init(kW, kH, dW, dH, padW, padH, dilationW, dilationH)
+ parent.__init(self, kW, kH, dW, dH, padW, padH)
+
+ self.dilationW = dilationW or 1
+ self.dilationH = dilationH or 1
+end
+
+function SpatialDilatedMaxPooling:updateOutput(input)
+ self.indices = self.indices or torch.LongTensor()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices
+ else
+ self.indices = self.indices:long()
+ end
+
+ local dims = input:dim()
+ self.iheight = input:size(dims-1)
+ self.iwidth = input:size(dims)
+
+ input.THNN.SpatialDilatedMaxPooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.indices:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.dilationW, self.dilationH,
+ self.ceil_mode
+ )
+ return self.output
+end
+
+function SpatialDilatedMaxPooling:updateGradInput(input, gradOutput)
+ input.THNN.SpatialDilatedMaxPooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.indices:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.dilationW, self.dilationH,
+ self.ceil_mode
+ )
+ return self.gradInput
+end
+
+function SpatialDilatedMaxPooling:__tostring__()
+ local s = string.format('%s(%dx%d, %d,%d', torch.type(self),
+ self.kW, self.kH, self.dW, self.dH)
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ','.. self.padH
+ end
+ s = s .. ', ' .. self.dilationW .. ',' .. self.dilationH
+ s = s .. ')'
+ return s
+end
+
+function SpatialDilatedMaxPooling:clearState()
+ if self.indices then
+ self.indices:set()
+ end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialDivisiveNormalization.lua b/contrib/lua-torch/nn/SpatialDivisiveNormalization.lua
new file mode 100644
index 000000000..dc2b8c530
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialDivisiveNormalization.lua
@@ -0,0 +1,136 @@
+local SpatialDivisiveNormalization, parent = torch.class('nn.SpatialDivisiveNormalization','nn.Module')
+
+function SpatialDivisiveNormalization:__init(nInputPlane, kernel, threshold, thresval)
+ parent.__init(self)
+
+ -- get args
+ self.nInputPlane = nInputPlane or 1
+ self.kernel = kernel or torch.Tensor(9,9):fill(1)
+ self.threshold = threshold or 1e-4
+ self.thresval = thresval or threshold or 1e-4
+ local kdim = self.kernel:nDimension()
+
+ -- check args
+ if kdim ~= 2 and kdim ~= 1 then
+ error('<SpatialDivisiveNormalization> averaging kernel must be 2D or 1D')
+ end
+ if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then
+ error('<SpatialDivisiveNormalization> averaging kernel must have ODD dimensions')
+ end
+
+ -- padding values
+ local padH = math.floor(self.kernel:size(1)/2)
+ local padW = padH
+ if kdim == 2 then
+ padW = math.floor(self.kernel:size(2)/2)
+ end
+
+ -- create convolutional mean estimator
+ self.meanestimator = nn.Sequential()
+ self.meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH))
+ if kdim == 2 then
+ self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, self.kernel:size(2), self.kernel:size(1)))
+ else
+ self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1))
+ self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1)))
+ end
+ self.meanestimator:add(nn.Replicate(self.nInputPlane,1,3))
+
+ -- create convolutional std estimator
+ self.stdestimator = nn.Sequential()
+ self.stdestimator:add(nn.Square())
+ self.stdestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH))
+ if kdim == 2 then
+ self.stdestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, self.kernel:size(2), self.kernel:size(1)))
+ else
+ self.stdestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1))
+ self.stdestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1)))
+ end
+ self.stdestimator:add(nn.Replicate(self.nInputPlane,1,3))
+ self.stdestimator:add(nn.Sqrt())
+
+ -- set kernel and bias
+ if kdim == 2 then
+ self.kernel:div(self.kernel:sum() * self.nInputPlane)
+ for i = 1,self.nInputPlane do
+ self.meanestimator.modules[2].weight[1][i] = self.kernel
+ self.stdestimator.modules[3].weight[1][i] = self.kernel
+ end
+ self.meanestimator.modules[2].bias:zero()
+ self.stdestimator.modules[3].bias:zero()
+ else
+ self.kernel:div(self.kernel:sum() * math.sqrt(self.nInputPlane))
+ for i = 1,self.nInputPlane do
+ self.meanestimator.modules[2].weight[i]:copy(self.kernel)
+ self.meanestimator.modules[3].weight[1][i]:copy(self.kernel)
+ self.stdestimator.modules[3].weight[i]:copy(self.kernel)
+ self.stdestimator.modules[4].weight[1][i]:copy(self.kernel)
+ end
+ self.meanestimator.modules[2].bias:zero()
+ self.meanestimator.modules[3].bias:zero()
+ self.stdestimator.modules[3].bias:zero()
+ self.stdestimator.modules[4].bias:zero()
+ end
+
+ -- other operation
+ self.normalizer = nn.CDivTable()
+ self.divider = nn.CDivTable()
+ self.thresholder = nn.Threshold(self.threshold, self.thresval)
+
+ -- coefficient array, to adjust side effects
+ self.coef = torch.Tensor(1,1,1)
+end
+
+function SpatialDivisiveNormalization:updateOutput(input)
+
+ self.localstds = self.stdestimator:updateOutput(input)
+
+ -- compute side coefficients
+ local dim = input:dim()
+ if self.localstds:dim() ~= self.coef:dim() or (input:size(dim) ~= self.coef:size(dim)) or (input:size(dim-1) ~= self.coef:size(dim-1)) then
+ self.ones = self.ones or input.new()
+ if dim == 4 then
+ -- batch mode
+ self.ones:resizeAs(input[1]):fill(1)
+ local coef = self.meanestimator:updateOutput(self.ones)
+ self._coef = self._coef or input.new()
+ self._coef:resizeAs(coef):copy(coef) -- make contiguous for view
+ self.coef = self._coef:view(1,table.unpack(self._coef:size():totable())):expandAs(self.localstds)
+ else
+ self.ones:resizeAs(input):fill(1)
+ self.coef = self.meanestimator:updateOutput(self.ones)
+ end
+
+ end
+
+ -- normalize std dev
+ self.adjustedstds = self.divider:updateOutput{self.localstds, self.coef}
+ self.thresholdedstds = self.thresholder:updateOutput(self.adjustedstds)
+ self.output = self.normalizer:updateOutput{input, self.thresholdedstds}
+
+ -- done
+ return self.output
+end
+
+function SpatialDivisiveNormalization:updateGradInput(input, gradOutput)
+ -- resize grad
+ self.gradInput:resizeAs(input):zero()
+
+ -- backprop through all modules
+ local gradnorm = self.normalizer:updateGradInput({input, self.thresholdedstds}, gradOutput)
+ local gradadj = self.thresholder:updateGradInput(self.adjustedstds, gradnorm[2])
+ local graddiv = self.divider:updateGradInput({self.localstds, self.coef}, gradadj)
+ self.gradInput:add(self.stdestimator:updateGradInput(input, graddiv[1]))
+ self.gradInput:add(gradnorm[1])
+
+ -- done
+ return self.gradInput
+end
+
+function SpatialDivisiveNormalization:clearState()
+ if self.ones then self.ones:set() end
+ if self._coef then self._coef:set() end
+ self.meanestimator:clearState()
+ self.stdestimator:clearState()
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialDropout.lua b/contrib/lua-torch/nn/SpatialDropout.lua
new file mode 100644
index 000000000..4320061b7
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialDropout.lua
@@ -0,0 +1,55 @@
+local SpatialDropout, Parent = torch.class('nn.SpatialDropout', 'nn.Module')
+
+function SpatialDropout:__init(p,stochasticInference)
+ Parent.__init(self)
+ self.p = p or 0.5
+ self.train = true
+ self.stochastic_inference = stochasticInference or false
+ self.noise = torch.Tensor()
+end
+
+function SpatialDropout:updateOutput(input)
+ self.output:resizeAs(input):copy(input)
+ if self.train or self.stochastic_inference then
+ if input:dim() == 4 then
+ self.noise:resize(input:size(1), input:size(2), 1, 1)
+ elseif input:dim() == 3 then
+ self.noise:resize(input:size(1), 1, 1)
+ else
+ error('Input must be 4D (nbatch, nfeat, h, w) or 3D (nfeat, h, w)')
+ end
+ self.noise:bernoulli(1-self.p)
+ -- We expand the random dropouts to the entire feature map because the
+ -- features are likely correlated across the map and so the dropout
+ -- should also be correlated.
+ self.output:cmul(torch.expandAs(self.noise, input))
+ else
+ self.output:mul(1-self.p)
+ end
+ return self.output
+end
+
+function SpatialDropout:updateGradInput(input, gradOutput)
+ if self.train then
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+ self.gradInput:cmul(torch.expandAs(self.noise, input)) -- simply mask the gradients with the noise vector
+ else
+ error('backprop only defined while training')
+ end
+ return self.gradInput
+end
+
+function SpatialDropout:setp(p)
+ self.p = p
+end
+
+function SpatialDropout:__tostring__()
+ return string.format('%s(%f)', torch.type(self), self.p)
+end
+
+function SpatialDropout:clearState()
+ if self.noise then
+ self.noise:set()
+ end
+ return Parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialFractionalMaxPooling.lua b/contrib/lua-torch/nn/SpatialFractionalMaxPooling.lua
new file mode 100644
index 000000000..884751d41
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialFractionalMaxPooling.lua
@@ -0,0 +1,165 @@
+local SpatialFractionalMaxPooling, parent =
+ torch.class('nn.SpatialFractionalMaxPooling', 'nn.Module')
+
+-- Usage:
+-- nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH, outW, outH)
+-- the output should be the exact size (outH x outW)
+-- nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH, ratioW, ratioH)
+-- the output should be the size (floor(inH x ratioH) x floor(inW x ratioW))
+-- ratios are numbers between (0, 1) exclusive
+function SpatialFractionalMaxPooling:__init(poolSizeW, poolSizeH, arg1, arg2)
+ parent.__init(self)
+ assert(poolSizeW >= 2)
+ assert(poolSizeH >= 2)
+
+ -- Pool size (how wide the pooling for each output unit is)
+ self.poolSizeW = poolSizeW
+ self.poolSizeH = poolSizeH
+
+ -- Random samples are drawn for all
+ -- batch * plane * (height, width; i.e., 2) points. This determines
+ -- the 2d "pseudorandom" overlapping pooling regions for each
+ -- (batch element x input plane). A new set of random samples is
+ -- drawn every updateOutput call, unless we disable it via
+ -- :fixPoolingRegions().
+ self.randomSamples = nil
+
+ -- Flag to disable re-generation of random samples for producing
+ -- a new pooling. For testing purposes
+ self.newRandomPool = false
+
+ if arg1 >= 1 and arg2 >= 1 then
+ -- Desired output size: the input tensor will determine the reduction
+ -- ratio
+ self.outW = arg1
+ self.outH = arg2
+ else
+ -- Reduction ratio specified per each input
+ -- This is the reduction ratio that we use
+ self.ratioW = arg1
+ self.ratioH = arg2
+
+ -- The reduction ratio must be between 0 and 1
+ assert(self.ratioW > 0 and self.ratioW < 1)
+ assert(self.ratioH > 0 and self.ratioH < 1)
+ end
+end
+
+function SpatialFractionalMaxPooling:getBufferSize_(input)
+ local batchSize = 0
+ local planeSize = 0
+
+ if input:nDimension() == 3 then
+ batchSize = 1
+ planeSize = input:size(1)
+ elseif input:nDimension() == 4 then
+ batchSize = input:size(1)
+ planeSize = input:size(2)
+ else
+ error('input must be dim 3 or 4')
+ end
+
+ return torch.LongStorage({batchSize, planeSize, 2})
+end
+
+function SpatialFractionalMaxPooling:initSampleBuffer_(input)
+ local sampleBufferSize = self:getBufferSize_(input)
+
+ if self.randomSamples == nil then
+ self.randomSamples = input.new():resize(sampleBufferSize):uniform()
+ elseif (self.randomSamples:size(1) ~= sampleBufferSize[1] or
+ self.randomSamples:size(2) ~= sampleBufferSize[2]) then
+ self.randomSamples:resize(sampleBufferSize):uniform()
+ else
+ if not self.newRandomPool then
+ -- Create new pooling windows, since this is a subsequent call
+ self.randomSamples:uniform()
+ end
+ end
+end
+
+function SpatialFractionalMaxPooling:getOutputSizes_(input)
+ local outW = self.outW
+ local outH = self.outH
+ if self.ratioW ~= nil and self.ratioH ~= nil then
+ if input:nDimension() == 4 then
+ outW = math.floor(input:size(4) * self.ratioW)
+ outH = math.floor(input:size(3) * self.ratioH)
+ elseif input:nDimension() == 3 then
+ outW = math.floor(input:size(3) * self.ratioW)
+ outH = math.floor(input:size(2) * self.ratioH)
+ else
+ error('input must be dim 3 or 4')
+ end
+
+ -- Neither can be smaller than 1
+ assert(outW > 0, 'reduction ratio or input width too small')
+ assert(outH > 0, 'reduction ratio or input height too small')
+ else
+ assert(outW ~= nil and outH ~= nil)
+ end
+
+ return outW, outH
+end
+
+-- Call this to turn off regeneration of random pooling regions each
+-- updateOutput call.
+function SpatialFractionalMaxPooling:fixPoolingRegions(val)
+ if val == nil then
+ val = true
+ end
+
+ self.newRandomPool = val
+ return self
+end
+
+function SpatialFractionalMaxPooling:updateOutput(input)
+ self.indices = self.indices or torch.LongTensor()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices
+ else
+ self.indices = self.indices:long()
+ end
+ self:initSampleBuffer_(input)
+ local outW, outH = self:getOutputSizes_(input)
+
+ input.THNN.SpatialFractionalMaxPooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ outW, outH, self.poolSizeW, self.poolSizeH,
+ self.indices:cdata(), self.randomSamples:cdata())
+ return self.output
+end
+
+function SpatialFractionalMaxPooling:updateGradInput(input, gradOutput)
+ assert(self.randomSamples ~= nil,
+ 'must call updateOutput/forward first')
+
+ local outW, outH = self:getOutputSizes_(input)
+
+ input.THNN.SpatialFractionalMaxPooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ outW, outH, self.poolSizeW, self.poolSizeH,
+ self.indices:cdata())
+ return self.gradInput
+end
+
+-- backward compat
+function SpatialFractionalMaxPooling:empty()
+ self:clearState()
+end
+
+function SpatialFractionalMaxPooling:clearState()
+ self.indices = nil
+ self.randomSamples = nil
+ return parent.clearState(self)
+end
+
+function SpatialFractionalMaxPooling:__tostring__()
+ return string.format('%s(%dx%d, %d,%d)', torch.type(self),
+ self.outW and self.outW or self.ratioW,
+ self.outH and self.outH or self.ratioH,
+ self.poolSizeW, self.poolSizeH)
+end
diff --git a/contrib/lua-torch/nn/SpatialFullConvolution.lua b/contrib/lua-torch/nn/SpatialFullConvolution.lua
new file mode 100644
index 000000000..e6019bc18
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialFullConvolution.lua
@@ -0,0 +1,219 @@
+local THNN = require 'nn.THNN'
+local SpatialFullConvolution, parent = torch.class('nn.SpatialFullConvolution','nn.Module')
+
+function SpatialFullConvolution:__init(nInputPlane, nOutputPlane,
+ kW, kH, dW, dH, padW, padH, adjW, adjH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+ self.padW = padW or 0
+ self.padH = padH or 0
+ self.adjW = adjW or 0
+ self.adjH = adjH or 0
+
+ if self.adjW > self.dW - 1 or self.adjH > self.dH - 1 then
+ error('adjW and adjH must be smaller than self.dW - 1' ..
+ ' and self.dH - 1 respectively')
+ end
+
+ self.weight = torch.Tensor(nInputPlane, nOutputPlane, kH, kW)
+ self.gradWeight = torch.Tensor(nInputPlane, nOutputPlane, kH, kW)
+ self.bias = torch.Tensor(self.nOutputPlane)
+ self.gradBias = torch.Tensor(self.nOutputPlane)
+
+ self.ones = torch.Tensor()
+
+ self:reset()
+end
+
+function SpatialFullConvolution:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function SpatialFullConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ local nInputPlane = self.nInputPlane
+ local kH = self.kH
+ local kW = self.kW
+ stdv = 1/math.sqrt(kW*kH*nInputPlane)
+ end
+ self.weight:uniform(-stdv, stdv)
+ if self.bias then
+ self.bias:uniform(-stdv, stdv)
+ end
+end
+
+local function calculateAdj(targetSize, ker, pad, stride)
+ return (targetSize + 2 * pad - ker) % stride
+end
+
+function SpatialFullConvolution:backCompatibility()
+ self.adjW = self.adjW or 0
+ self.adjH = self.adjH or 0
+end
+
+function SpatialFullConvolution:updateOutput(input)
+ self:backCompatibility()
+
+ local inputTensor = input
+ local adjW, adjH = self.adjW, self.adjH
+
+ -- The input can be a table where the second element indicates the target
+ -- output size, in which case the adj factors are computed automatically
+ if type(inputTensor) == 'table' then
+ inputTensor = input[1]
+ local targetTensor = input[2]
+ local tDims = targetTensor:dim()
+ local tH = targetTensor:size(tDims-1)
+ local tW = targetTensor:size(tDims)
+ adjW = calculateAdj(tW, self.kW, self.padW, self.dW)
+ adjH = calculateAdj(tH, self.kH, self.padH, self.dH)
+ self.finput = self.finput or input[1].new()
+ self.fgradInput = self.fgradInput or input[1].new()
+ else
+ self.finput = self.finput or input.new()
+ self.fgradInput = self.fgradInput or input.new()
+ end
+
+ inputTensor.THNN.SpatialFullConvolution_updateOutput(
+ inputTensor:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ adjW, adjH
+ )
+
+ return self.output
+end
+
+function SpatialFullConvolution:updateGradInput(input, gradOutput)
+ self:backCompatibility()
+
+ if self.gradInput then
+
+ local inputTensor = input
+ local adjW, adjH = self.adjW, self.adjH
+
+ -- The input can be a table where the second element indicates the target
+ -- output size, in which case the adj factors are computed automatically
+ if type(inputTensor) == 'table' then
+ inputTensor = input[1]
+ local targetTensor = input[2]
+ local tDims = targetTensor:dim()
+ local tH = targetTensor:size(tDims-1)
+ local tW = targetTensor:size(tDims)
+ adjW = calculateAdj(tW, self.kW, self.padW, self.dW)
+ adjH = calculateAdj(tH, self.kH, self.padH, self.dH)
+ -- Momentarily extract the gradInput tensor
+ if type(self.gradInput) == 'table' then
+ self.gradInput = self.gradInput[1] or inputTensor.new()
+ end
+ end
+
+ inputTensor.THNN.SpatialFullConvolution_updateGradInput(
+ inputTensor:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ adjW, adjH
+ )
+
+ if type(input) == 'table' then
+ -- Create a zero tensor to be expanded and used as gradInput[2].
+ self.zeroScalar = self.zeroScalar or input[2].new(1):zero()
+ self.ones:resize(input[2]:dim()):fill(1)
+ local zeroTensor = self.zeroScalar
+ :view(table.unpack(self.ones:totable()))
+ :expandAs(input[2])
+ self.gradInput = {self.gradInput, zeroTensor}
+ end
+
+ return self.gradInput
+ end
+end
+
+function SpatialFullConvolution:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ self:backCompatibility()
+
+ local inputTensor = input
+ local adjW, adjH = self.adjW, self.adjH
+
+ -- The input can be a table where the second element indicates the target
+ -- output size, in which case the adj factors are computed automatically
+ if type(inputTensor) == 'table' then
+ inputTensor = input[1]
+ local targetTensor = input[2]
+ local tDims = targetTensor:dim()
+ local tH = targetTensor:size(tDims-1)
+ local tW = targetTensor:size(tDims)
+ adjW = calculateAdj(tW, self.kW, self.padW, self.dW)
+ adjH = calculateAdj(tH, self.kH, self.padH, self.dH)
+ end
+
+ inputTensor.THNN.SpatialFullConvolution_accGradParameters(
+ inputTensor:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ adjW, adjH,
+ scale
+ )
+end
+
+function SpatialFullConvolution:type(type, tensorCache)
+ self.finput = self.finput and torch.Tensor()
+ self.fgradInput = self.fgradInput and torch.Tensor()
+ return parent.type(self, type, tensorCache)
+end
+
+function SpatialFullConvolution:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kW, self.kH)
+ if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d', self.dW, self.dH)
+ end
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ',' .. self.padH
+ end
+ if (self.adjW or self.adjH) and (self.adjW ~= 0 or self.adjH ~= 0) then
+ s = s .. ', ' .. self.adjW .. ',' .. self.adjH
+ end
+ if self.bias then
+ return s .. ')'
+ else
+ return s .. ') without bias'
+ end
+end
+
+function SpatialFullConvolution:clearState()
+ nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput')
+ return parent.clearState(self)
+end
+
diff --git a/contrib/lua-torch/nn/SpatialFullConvolutionMap.lua b/contrib/lua-torch/nn/SpatialFullConvolutionMap.lua
new file mode 100644
index 000000000..008f5e7cf
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialFullConvolutionMap.lua
@@ -0,0 +1,91 @@
+local SpatialFullConvolutionMap, parent = torch.class('nn.SpatialFullConvolutionMap', 'nn.Module')
+
+function SpatialFullConvolutionMap:__init(conMatrix, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+ self.connTable = conMatrix
+ self.nInputPlane = self.connTable:select(2,1):max()
+ self.nOutputPlane = self.connTable:select(2,2):max()
+
+ self.weight = torch.Tensor(self.connTable:size(1), kH, kW)
+ self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW)
+
+ self.bias = torch.Tensor(self.nOutputPlane)
+ self.gradBias = torch.Tensor(self.nOutputPlane)
+
+ self:reset()
+end
+
+function SpatialFullConvolutionMap:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ local ninp = torch.Tensor(self.nOutputPlane):zero()
+ for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] = ninp[self.connTable[i][2]]+1 end
+ for k=1,self.connTable:size(1) do
+ stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]])
+ self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end)
+ end
+ for k=1,self.bias:size(1) do
+ stdv = 1/math.sqrt(self.kW*self.kH*ninp[k])
+ self.bias[k] = torch.uniform(-stdv,stdv)
+ end
+
+ end
+end
+
+function SpatialFullConvolutionMap:updateOutput(input)
+ input.THNN.SpatialFullConvolutionMap_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.connTable:cdata(),
+ self.nInputPlane,
+ self.nOutputPlane,
+ self.dW, self.dH
+ )
+ return self.output
+end
+
+function SpatialFullConvolutionMap:updateGradInput(input, gradOutput)
+ input.THNN.SpatialFullConvolutionMap_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.connTable:cdata(),
+ self.nInputPlane,
+ self.nOutputPlane,
+ self.dW, self.dH
+ )
+ return self.gradInput
+end
+
+function SpatialFullConvolutionMap:accGradParameters(input, gradOutput, scale)
+ input.THNN.SpatialFullConvolutionMap_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.connTable:cdata(),
+ self.nInputPlane,
+ self.nOutputPlane,
+ self.dW, self.dH,
+ scale or 1
+ )
+end
diff --git a/contrib/lua-torch/nn/SpatialLPPooling.lua b/contrib/lua-torch/nn/SpatialLPPooling.lua
new file mode 100644
index 000000000..49a8493cf
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialLPPooling.lua
@@ -0,0 +1,43 @@
+local SpatialLPPooling, parent = torch.class('nn.SpatialLPPooling', 'nn.Sequential')
+
+function SpatialLPPooling:__init(nInputPlane, pnorm, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or kW
+ dH = dH or kH
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+
+ if pnorm == 2 then
+ self:add(nn.Square())
+ else
+ self:add(nn.Power(pnorm))
+ end
+ self:add(nn.SpatialAveragePooling(kW, kH, dW, dH))
+ self:add(nn.MulConstant(kW*kH))
+ if pnorm == 2 then
+ self:add(nn.Sqrt())
+ else
+ self:add(nn.Power(1/pnorm))
+ end
+end
+
+-- the module is a Sequential: by default, it'll try to learn the parameters
+-- of the sub sampler: we avoid that by redefining its methods.
+function SpatialLPPooling:reset()
+end
+
+function SpatialLPPooling:accGradParameters()
+end
+
+function SpatialLPPooling:accUpdateGradParameters()
+end
+
+function SpatialLPPooling:zeroGradParameters()
+end
+
+function SpatialLPPooling:updateParameters()
+end
diff --git a/contrib/lua-torch/nn/SpatialLogSoftMax.lua b/contrib/lua-torch/nn/SpatialLogSoftMax.lua
new file mode 100644
index 000000000..9c81d49e1
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialLogSoftMax.lua
@@ -0,0 +1,19 @@
+local SpatialLogSoftMax = torch.class('nn.SpatialLogSoftMax', 'nn.Module')
+
+function SpatialLogSoftMax:updateOutput(input)
+ input.THNN.LogSoftMax_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function SpatialLogSoftMax:updateGradInput(input, gradOutput)
+ input.THNN.LogSoftMax_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SpatialMaxPooling.lua b/contrib/lua-torch/nn/SpatialMaxPooling.lua
new file mode 100644
index 000000000..5c865c631
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialMaxPooling.lua
@@ -0,0 +1,94 @@
+local SpatialMaxPooling, parent = torch.class('nn.SpatialMaxPooling', 'nn.Module')
+
+function SpatialMaxPooling:__init(kW, kH, dW, dH, padW, padH)
+ parent.__init(self)
+
+ dW = dW or kW
+ dH = dH or kH
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+
+ self.padW = padW or 0
+ self.padH = padH or 0
+
+ self.ceil_mode = false
+ self.indices = torch.LongTensor()
+end
+
+function SpatialMaxPooling:ceil()
+ self.ceil_mode = true
+ return self
+end
+
+function SpatialMaxPooling:floor()
+ self.ceil_mode = false
+ return self
+end
+
+function SpatialMaxPooling:updateOutput(input)
+ self.indices = self.indices or torch.LongTensor()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices
+ else
+ self.indices = self.indices:long()
+ end
+
+ local dims = input:dim()
+ self.iheight = input:size(dims-1)
+ self.iwidth = input:size(dims)
+
+ -- backward compatibility
+ self.ceil_mode = self.ceil_mode or false
+ self.padW = self.padW or 0
+ self.padH = self.padH or 0
+ input.THNN.SpatialMaxPooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.indices:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.ceil_mode
+ )
+ return self.output
+end
+
+function SpatialMaxPooling:updateGradInput(input, gradOutput)
+ input.THNN.SpatialMaxPooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.indices:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ self.padW, self.padH,
+ self.ceil_mode
+ )
+ return self.gradInput
+end
+
+-- for backward compat
+function SpatialMaxPooling:empty()
+ self:clearState()
+end
+
+function SpatialMaxPooling:__tostring__()
+ local s = string.format('%s(%dx%d, %d,%d', torch.type(self),
+ self.kW, self.kH, self.dW, self.dH)
+ if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padW .. ','.. self.padH
+ end
+ s = s .. ')'
+
+ return s
+end
+
+function SpatialMaxPooling:clearState()
+ if self.indices then
+ self.indices:set()
+ end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialMaxUnpooling.lua b/contrib/lua-torch/nn/SpatialMaxUnpooling.lua
new file mode 100644
index 000000000..408bcc052
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialMaxUnpooling.lua
@@ -0,0 +1,45 @@
+local SpatialMaxUnpooling, parent = torch.class('nn.SpatialMaxUnpooling', 'nn.Module')
+
+function SpatialMaxUnpooling:__init(poolingModule)
+ parent.__init(self)
+ assert(torch.type(poolingModule)=='nn.SpatialMaxPooling', 'Argument must be a nn.SpatialMaxPooling module')
+ assert(poolingModule.kH==poolingModule.dH and poolingModule.kW==poolingModule.dW, "The size of pooling module's kernel must be equal to its stride")
+ self.pooling = poolingModule
+end
+
+function SpatialMaxUnpooling:setParams()
+ self.indices = self.pooling.indices
+ self.oheight = self.pooling.iheight
+ self.owidth = self.pooling.iwidth
+end
+
+function SpatialMaxUnpooling:updateOutput(input)
+ self:setParams()
+ input.THNN.SpatialMaxUnpooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.indices:cdata(),
+ self.owidth, self.oheight
+ )
+ return self.output
+end
+
+function SpatialMaxUnpooling:updateGradInput(input, gradOutput)
+ self:setParams()
+ input.THNN.SpatialMaxUnpooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.indices:cdata(),
+ self.owidth, self.oheight
+ )
+ return self.gradInput
+end
+
+function SpatialMaxUnpooling:empty()
+ self:clearState()
+end
+
+function SpatialMaxUnpooling:__tostring__()
+ return 'nn.SpatialMaxUnpooling associated to '..tostring(self.pooling)
+end
diff --git a/contrib/lua-torch/nn/SpatialReflectionPadding.lua b/contrib/lua-torch/nn/SpatialReflectionPadding.lua
new file mode 100644
index 000000000..9ce4612ad
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialReflectionPadding.lua
@@ -0,0 +1,51 @@
+local SpatialReflectionPadding, parent =
+ torch.class('nn.SpatialReflectionPadding', 'nn.Module')
+
+function SpatialReflectionPadding:__init(pad_l, pad_r, pad_t, pad_b)
+ parent.__init(self)
+ self.pad_l = pad_l
+ self.pad_r = pad_r or self.pad_l
+ self.pad_t = pad_t or self.pad_l
+ self.pad_b = pad_b or self.pad_l
+end
+
+function SpatialReflectionPadding:updateOutput(input)
+ if input:dim() == 3 or input:dim() == 4 then
+ input.THNN.SpatialReflectionPadding_updateOutput(
+ input:cdata(), self.output:cdata(),
+ self.pad_l, self.pad_r, self.pad_t, self.pad_b)
+ else
+ error('input must be 3 or 4-dimensional')
+ end
+ return self.output
+end
+
+function SpatialReflectionPadding:updateGradInput(input, gradOutput)
+ if input:dim() == 3 and gradOutput:dim() == 3 then
+ assert(input:size(1) == gradOutput:size(1)
+ and input:size(2) + self.pad_t + self.pad_b == gradOutput:size(2)
+ and input:size(3) + self.pad_l + self.pad_r == gradOutput:size(3),
+ 'input and gradOutput must be compatible in size')
+ elseif input:dim() == 4 and gradOutput:dim() == 4 then
+ assert(input:size(1) == gradOutput:size(1)
+ and input:size(2) == gradOutput:size(2)
+ and input:size(3) + self.pad_t + self.pad_b == gradOutput:size(3)
+ and input:size(4) + self.pad_l + self.pad_r == gradOutput:size(4),
+ 'input and gradOutput must be compatible in size')
+ else
+ error(
+ [[input and gradOutput must be 3 or 4-dimensional
+ and have equal number of dimensions]]
+ )
+ end
+ input.THNN.SpatialReflectionPadding_updateGradInput(
+ input:cdata(), gradOutput:cdata(), self.gradInput:cdata(),
+ self.pad_l, self.pad_r, self.pad_t, self.pad_b)
+ return self.gradInput
+end
+
+function SpatialReflectionPadding:__tostring__()
+ return torch.type(self) ..
+ string.format('(l=%d, r=%d, t=%d, b=%d)', self.pad_l, self.pad_r,
+ self.pad_t, self.pad_b)
+end
diff --git a/contrib/lua-torch/nn/SpatialReplicationPadding.lua b/contrib/lua-torch/nn/SpatialReplicationPadding.lua
new file mode 100644
index 000000000..429763f9b
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialReplicationPadding.lua
@@ -0,0 +1,51 @@
+local SpatialReplicationPadding, parent =
+ torch.class('nn.SpatialReplicationPadding', 'nn.Module')
+
+function SpatialReplicationPadding:__init(pad_l, pad_r, pad_t, pad_b)
+ parent.__init(self)
+ self.pad_l = pad_l
+ self.pad_r = pad_r or self.pad_l
+ self.pad_t = pad_t or self.pad_l
+ self.pad_b = pad_b or self.pad_l
+end
+
+function SpatialReplicationPadding:updateOutput(input)
+ if input:dim() == 3 or input:dim() == 4 then
+ input.THNN.SpatialReplicationPadding_updateOutput(
+ input:cdata(), self.output:cdata(),
+ self.pad_l, self.pad_r, self.pad_t, self.pad_b)
+ else
+ error('input must be 3 or 4-dimensional')
+ end
+ return self.output
+end
+
+function SpatialReplicationPadding:updateGradInput(input, gradOutput)
+ if input:dim() == 3 and gradOutput:dim() == 3 then
+ assert(input:size(1) == gradOutput:size(1)
+ and input:size(2) + self.pad_t + self.pad_b == gradOutput:size(2)
+ and input:size(3) + self.pad_l + self.pad_r == gradOutput:size(3),
+ 'input and gradOutput must be compatible in size')
+ elseif input:dim() == 4 and gradOutput:dim() == 4 then
+ assert(input:size(1) == gradOutput:size(1)
+ and input:size(2) == gradOutput:size(2)
+ and input:size(3) + self.pad_t + self.pad_b == gradOutput:size(3)
+ and input:size(4) + self.pad_l + self.pad_r == gradOutput:size(4),
+ 'input and gradOutput must be compatible in size')
+ else
+ error(
+ [[input and gradOutput must be 3 or 4-dimensional
+ and have equal number of dimensions]]
+ )
+ end
+ input.THNN.SpatialReplicationPadding_updateGradInput(
+ input:cdata(), gradOutput:cdata(), self.gradInput:cdata(),
+ self.pad_l, self.pad_r, self.pad_t, self.pad_b)
+ return self.gradInput
+end
+
+function SpatialReplicationPadding:__tostring__()
+ return torch.type(self) ..
+ string.format('(l=%d, r=%d, t=%d, b=%d)', self.pad_l, self.pad_r,
+ self.pad_t, self.pad_b)
+end
diff --git a/contrib/lua-torch/nn/SpatialSoftMax.lua b/contrib/lua-torch/nn/SpatialSoftMax.lua
new file mode 100644
index 000000000..56f0b40e2
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialSoftMax.lua
@@ -0,0 +1,19 @@
+local SpatialSoftMax, _ = torch.class('nn.SpatialSoftMax', 'nn.Module')
+
+function SpatialSoftMax:updateOutput(input)
+ input.THNN.SoftMax_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function SpatialSoftMax:updateGradInput(input, gradOutput)
+ input.THNN.SoftMax_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SpatialSubSampling.lua b/contrib/lua-torch/nn/SpatialSubSampling.lua
new file mode 100644
index 000000000..4e3fb8881
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialSubSampling.lua
@@ -0,0 +1,79 @@
+local SpatialSubSampling, parent = torch.class('nn.SpatialSubSampling', 'nn.Module')
+
+function SpatialSubSampling:__init(nInputPlane, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+
+ self.weight = torch.Tensor(nInputPlane)
+ self.bias = torch.Tensor(nInputPlane)
+ self.gradWeight = torch.Tensor(nInputPlane)
+ self.gradBias = torch.Tensor(nInputPlane)
+
+ self:reset()
+end
+
+function SpatialSubSampling:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.kH)
+ end
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+ end
+end
+
+function SpatialSubSampling:updateOutput(input)
+ input.THNN.SpatialSubSampling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH
+ )
+ return self.output
+end
+
+function SpatialSubSampling:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ input.THNN.SpatialSubSampling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH
+ )
+ return self.gradInput
+ end
+end
+
+function SpatialSubSampling:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ input.THNN.SpatialSubSampling_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.kW, self.kH,
+ self.dW, self.dH,
+ scale
+ )
+end
diff --git a/contrib/lua-torch/nn/SpatialSubtractiveNormalization.lua b/contrib/lua-torch/nn/SpatialSubtractiveNormalization.lua
new file mode 100644
index 000000000..d430083e9
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialSubtractiveNormalization.lua
@@ -0,0 +1,115 @@
+local SpatialSubtractiveNormalization, parent = torch.class('nn.SpatialSubtractiveNormalization','nn.Module')
+
+function SpatialSubtractiveNormalization:__init(nInputPlane, kernel)
+ parent.__init(self)
+
+ -- get args
+ self.nInputPlane = nInputPlane or 1
+ self.kernel = kernel or torch.Tensor(9,9):fill(1)
+ local kdim = self.kernel:nDimension()
+
+ -- check args
+ if kdim ~= 2 and kdim ~= 1 then
+ error('<SpatialSubtractiveNormalization> averaging kernel must be 2D or 1D')
+ end
+ if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then
+ error('<SpatialSubtractiveNormalization> averaging kernel must have ODD dimensions')
+ end
+
+ -- normalize kernel
+ self.kernel:div(self.kernel:sum() * self.nInputPlane)
+
+ -- padding values
+ local padH = math.floor(self.kernel:size(1)/2)
+ local padW = padH
+ if kdim == 2 then
+ padW = math.floor(self.kernel:size(2)/2)
+ end
+
+ -- create convolutional mean extractor
+ self.meanestimator = nn.Sequential()
+ self.meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH))
+ if kdim == 2 then
+ self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, self.kernel:size(2), self.kernel:size(1)))
+ else
+ self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), self.kernel:size(1), 1))
+ self.meanestimator:add(nn.SpatialConvolution(self.nInputPlane, 1, 1, self.kernel:size(1)))
+ end
+ self.meanestimator:add(nn.Replicate(self.nInputPlane,1,3))
+
+ -- set kernel and bias
+ if kdim == 2 then
+ for i = 1,self.nInputPlane do
+ self.meanestimator.modules[2].weight[1][i] = self.kernel
+ end
+ self.meanestimator.modules[2].bias:zero()
+ else
+ for i = 1,self.nInputPlane do
+ self.meanestimator.modules[2].weight[i]:copy(self.kernel)
+ self.meanestimator.modules[3].weight[1][i]:copy(self.kernel)
+ end
+ self.meanestimator.modules[2].bias:zero()
+ self.meanestimator.modules[3].bias:zero()
+ end
+
+ -- other operation
+ self.subtractor = nn.CSubTable()
+ self.divider = nn.CDivTable()
+
+ -- coefficient array, to adjust side effects
+ self.coef = torch.Tensor(1,1,1)
+end
+
+function SpatialSubtractiveNormalization:updateOutput(input)
+ -- compute side coefficients
+ local dim = input:dim()
+ if input:dim()+1 ~= self.coef:dim() or (input:size(dim) ~= self.coef:size(dim)) or (input:size(dim-1) ~= self.coef:size(dim-1)) then
+ self.ones = self.ones or input.new()
+ self._coef = self._coef or self.coef.new()
+ if dim == 4 then
+ -- batch mode
+ self.ones:resizeAs(input[1]):fill(1)
+ local coef = self.meanestimator:updateOutput(self.ones)
+ self._coef:resizeAs(coef):copy(coef) -- make contiguous for view
+ local size = coef:size():totable()
+ table.insert(size,1,input:size(1))
+ self.coef = self._coef:view(1,table.unpack(self._coef:size():totable())):expand(table.unpack(size))
+ else
+ self.ones:resizeAs(input):fill(1)
+ local coef = self.meanestimator:updateOutput(self.ones)
+ self._coef:resizeAs(coef):copy(coef) -- copy meanestimator.output as it will be used below
+ self.coef = self._coef
+ end
+
+ end
+
+ -- compute mean
+ self.localsums = self.meanestimator:updateOutput(input)
+ self.adjustedsums = self.divider:updateOutput{self.localsums, self.coef}
+ self.output = self.subtractor:updateOutput{input, self.adjustedsums}
+
+ -- done
+ return self.output
+end
+
+function SpatialSubtractiveNormalization:updateGradInput(input, gradOutput)
+ -- resize grad
+ self.gradInput:resizeAs(input):zero()
+
+ -- backprop through all modules
+ local gradsub = self.subtractor:updateGradInput({input, self.adjustedsums}, gradOutput)
+ local graddiv = self.divider:updateGradInput({self.localsums, self.coef}, gradsub[2])
+ local size = self.meanestimator:updateGradInput(input, graddiv[1]):size()
+ self.gradInput:add(self.meanestimator:updateGradInput(input, graddiv[1]))
+ self.gradInput:add(gradsub[1])
+
+ -- done
+ return self.gradInput
+end
+
+function SpatialSubtractiveNormalization:clearState()
+ if self.ones then self.ones:set() end
+ if self._coef then self._coef:set() end
+ self.meanestimator:clearState()
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/SpatialUpSamplingBilinear.lua b/contrib/lua-torch/nn/SpatialUpSamplingBilinear.lua
new file mode 100644
index 000000000..12e1ce8f2
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialUpSamplingBilinear.lua
@@ -0,0 +1,139 @@
+require 'nn.THNN'
+local SpatialUpSamplingBilinear, parent =
+ torch.class('nn.SpatialUpSamplingBilinear', 'nn.Module')
+
+--[[
+Applies a 2D bilinear up-sampling over an input image composed of several
+input planes.
+
+The Y and X dimensions are assumed to be the last 2 tensor dimensions. For
+instance, if the tensor is 4D, then dim 3 is the y dimension and dim 4 is the x.
+
+scale_factor is assumed to be a positive integer.
+owidth = (width-1)*(scale_factor-1) + width
+oheight = (height-1)*(scale_factor-1) + height
+
+Alternatively, owidth and oheight can be directly provided as input.
+--]]
+
+function SpatialUpSamplingBilinear:__init(params)
+ parent.__init(self)
+
+ self.owidth, self.oheight, self.scale_factor = nil, nil, nil
+ if torch.type(params) == 'table' then
+ self.owidth, self.oheight = params.owidth, params.oheight
+ else
+ self.scale_factor = params
+ if self.scale_factor < 1 then
+ error('scale_factor must be greater than 1')
+ end
+ if math.floor(self.scale_factor) ~= self.scale_factor then
+ error('scale_factor must be integer')
+ end
+ end
+ self.inputSize = torch.LongStorage(4)
+ self.outputSize = torch.LongStorage(4)
+end
+
+local function makeContiguous(self, input, gradOutput)
+ if not input:isContiguous() then
+ self._input = self._input or input.new()
+ self._input:resizeAs(input):copy(input)
+ input = self._input
+ end
+ if gradOutput then
+ if not gradOutput:isContiguous() then
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ gradOutput = self._gradOutput
+ end
+ end
+ return input, gradOutput
+end
+
+function SpatialUpSamplingBilinear:setSize(input)
+ local xdim = input:dim()
+ local ydim = xdim - 1
+ for i = 1, input:dim() do
+ self.inputSize[i] = input:size(i)
+ self.outputSize[i] = input:size(i)
+ end
+ if self.scale_factor ~= nil then
+ self.outputSize[ydim] = self.outputSize[ydim] * self.scale_factor
+ self.outputSize[xdim] = self.outputSize[xdim] * self.scale_factor
+ else
+ self.outputSize[ydim] = self.oheight
+ self.outputSize[xdim] = self.owidth
+ end
+end
+
+function SpatialUpSamplingBilinear:updateOutput(input)
+ assert(input:dim() == 4 or input:dim()==3,
+ 'SpatialUpSamplingBilinear only supports 3D or 4D tensors' )
+ input = makeContiguous(self, input)
+ local inputwas3D = false
+ if input:dim() == 3 then
+ input=input:view(-1, input:size(1), input:size(2), input:size(3))
+ inputwas3D = true
+ end
+ local xdim = input:dim()
+ local ydim = xdim - 1
+ self:setSize(input)
+ input.THNN.SpatialUpSamplingBilinear_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.outputSize[ydim],
+ self.outputSize[xdim]
+ )
+ if inputwas3D then
+ input = input:squeeze(1)
+ self.output = self.output:squeeze(1)
+ end
+ return self.output
+end
+
+function SpatialUpSamplingBilinear:updateGradInput(input, gradOutput)
+ assert(input:dim() == 4 or input:dim()==3,
+ 'SpatialUpSamplingBilinear only support 3D or 4D tensors' )
+ assert(input:dim() == gradOutput:dim(),
+ 'Input and gradOutput should be of same dimension' )
+ input, gradOutput = makeContiguous(self, input, gradOutput)
+ local inputwas3D = false
+ if input:dim() == 3 then
+ input = input:view(-1, input:size(1), input:size(2), input:size(3))
+ gradOutput = gradOutput:view(-1, gradOutput:size(1), gradOutput:size(2),
+ gradOutput:size(3))
+ inputwas3D = true
+ end
+ local xdim = input:dim()
+ local ydim = xdim - 1
+ self.gradInput:resizeAs(input)
+ input.THNN.SpatialUpSamplingBilinear_updateGradInput(
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ input:size(1),
+ input:size(2),
+ input:size(3),
+ input:size(4),
+ self.outputSize[ydim],
+ self.outputSize[xdim]
+ )
+ if inputwas3D then
+ input = input:squeeze(1)
+ gradOutput = gradOutput:squeeze(1)
+ self.gradInput = self.gradInput:squeeze(1)
+ end
+ return self.gradInput
+end
+
+
+function SpatialUpSamplingBilinear:__tostring__()
+ local s
+ if self.scale_factor ~= nil then
+ s = string.format('%s(%d)', torch.type(self), self.scale_factor)
+ else
+ s = string.format('%s(%d, %d)',
+ torch.type(self), self.oheight, self.owidth)
+ end
+ return s
+end
diff --git a/contrib/lua-torch/nn/SpatialUpSamplingNearest.lua b/contrib/lua-torch/nn/SpatialUpSamplingNearest.lua
new file mode 100644
index 000000000..362ae73a3
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialUpSamplingNearest.lua
@@ -0,0 +1,59 @@
+local SpatialUpSamplingNearest, parent = torch.class('nn.SpatialUpSamplingNearest', 'nn.Module')
+
+--[[
+Applies a 2D up-sampling over an input image composed of several input planes.
+
+The upsampling is done using the simple nearest neighbor technique.
+
+The Y and X dimensions are assumed to be the last 2 tensor dimensions. For
+instance, if the tensor is 4D, then dim 3 is the y dimension and dim 4 is the x.
+
+owidth = width*scale_factor
+oheight = height*scale_factor
+--]]
+
+function SpatialUpSamplingNearest:__init(scale)
+ parent.__init(self)
+
+ self.scale_factor = scale
+ if self.scale_factor < 1 then
+ error('scale_factor must be greater than 1')
+ end
+ if math.floor(self.scale_factor) ~= self.scale_factor then
+ error('scale_factor must be integer')
+ end
+ self.inputSize = torch.LongStorage(4)
+ self.outputSize = torch.LongStorage(4)
+end
+
+function SpatialUpSamplingNearest:updateOutput(input)
+ if input:dim() ~= 4 and input:dim() ~= 3 then
+ error('SpatialUpSamplingNearest only support 3D or 4D tensors')
+ end
+ -- Copy the input size
+ local xdim = input:dim()
+ local ydim = input:dim() - 1
+ for i = 1, input:dim() do
+ self.inputSize[i] = input:size(i)
+ self.outputSize[i] = input:size(i)
+ end
+ self.outputSize[ydim] = self.outputSize[ydim] * self.scale_factor
+ self.outputSize[xdim] = self.outputSize[xdim] * self.scale_factor
+ input.THNN.SpatialUpSamplingNearest_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.scale_factor
+ )
+ return self.output
+end
+
+function SpatialUpSamplingNearest:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input)
+ input.THNN.SpatialUpSamplingNearest_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.scale_factor
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/SpatialZeroPadding.lua b/contrib/lua-torch/nn/SpatialZeroPadding.lua
new file mode 100644
index 000000000..f19925841
--- /dev/null
+++ b/contrib/lua-torch/nn/SpatialZeroPadding.lua
@@ -0,0 +1,104 @@
+local SpatialZeroPadding, parent = torch.class('nn.SpatialZeroPadding', 'nn.Module')
+
+function SpatialZeroPadding:__init(pad_l, pad_r, pad_t, pad_b)
+ parent.__init(self)
+ self.pad_l = pad_l
+ self.pad_r = pad_r or self.pad_l
+ self.pad_t = pad_t or self.pad_l
+ self.pad_b = pad_b or self.pad_l
+end
+
+function SpatialZeroPadding:updateOutput(input)
+ if input:dim() == 3 then
+ -- sizes
+ local h = input:size(2) + self.pad_t + self.pad_b
+ local w = input:size(3) + self.pad_l + self.pad_r
+ if w < 1 or h < 1 then error('input is too small') end
+ self.output:resize(input:size(1), h, w)
+ self.output:zero()
+ -- crop input if necessary
+ local c_input = input
+ if self.pad_t < 0 then c_input = c_input:narrow(2, 1 - self.pad_t, c_input:size(2) + self.pad_t) end
+ if self.pad_b < 0 then c_input = c_input:narrow(2, 1, c_input:size(2) + self.pad_b) end
+ if self.pad_l < 0 then c_input = c_input:narrow(3, 1 - self.pad_l, c_input:size(3) + self.pad_l) end
+ if self.pad_r < 0 then c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_r) end
+ -- crop outout if necessary
+ local c_output = self.output
+ if self.pad_t > 0 then c_output = c_output:narrow(2, 1 + self.pad_t, c_output:size(2) - self.pad_t) end
+ if self.pad_b > 0 then c_output = c_output:narrow(2, 1, c_output:size(2) - self.pad_b) end
+ if self.pad_l > 0 then c_output = c_output:narrow(3, 1 + self.pad_l, c_output:size(3) - self.pad_l) end
+ if self.pad_r > 0 then c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_r) end
+ -- copy input to output
+ c_output:copy(c_input)
+ elseif input:dim() == 4 then
+ -- sizes
+ local h = input:size(3) + self.pad_t + self.pad_b
+ local w = input:size(4) + self.pad_l + self.pad_r
+ if w < 1 or h < 1 then error('input is too small') end
+ self.output:resize(input:size(1), input:size(2), h, w)
+ self.output:zero()
+ -- crop input if necessary
+ local c_input = input
+ if self.pad_t < 0 then c_input = c_input:narrow(3, 1 - self.pad_t, c_input:size(3) + self.pad_t) end
+ if self.pad_b < 0 then c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_b) end
+ if self.pad_l < 0 then c_input = c_input:narrow(4, 1 - self.pad_l, c_input:size(4) + self.pad_l) end
+ if self.pad_r < 0 then c_input = c_input:narrow(4, 1, c_input:size(4) + self.pad_r) end
+ -- crop outout if necessary
+ local c_output = self.output
+ if self.pad_t > 0 then c_output = c_output:narrow(3, 1 + self.pad_t, c_output:size(3) - self.pad_t) end
+ if self.pad_b > 0 then c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_b) end
+ if self.pad_l > 0 then c_output = c_output:narrow(4, 1 + self.pad_l, c_output:size(4) - self.pad_l) end
+ if self.pad_r > 0 then c_output = c_output:narrow(4, 1, c_output:size(4) - self.pad_r) end
+ -- copy input to output
+ c_output:copy(c_input)
+ else
+ error('input must be 3 or 4-dimensional')
+ end
+ return self.output
+end
+
+function SpatialZeroPadding:updateGradInput(input, gradOutput)
+ if input:dim() == 3 then
+ self.gradInput:resizeAs(input):zero()
+ -- crop gradInput if necessary
+ local cg_input = self.gradInput
+ if self.pad_t < 0 then cg_input = cg_input:narrow(2, 1 - self.pad_t, cg_input:size(2) + self.pad_t) end
+ if self.pad_b < 0 then cg_input = cg_input:narrow(2, 1, cg_input:size(2) + self.pad_b) end
+ if self.pad_l < 0 then cg_input = cg_input:narrow(3, 1 - self.pad_l, cg_input:size(3) + self.pad_l) end
+ if self.pad_r < 0 then cg_input = cg_input:narrow(3, 1, cg_input:size(3) + self.pad_r) end
+ -- crop gradOutout if necessary
+ local cg_output = gradOutput
+ if self.pad_t > 0 then cg_output = cg_output:narrow(2, 1 + self.pad_t, cg_output:size(2) - self.pad_t) end
+ if self.pad_b > 0 then cg_output = cg_output:narrow(2, 1, cg_output:size(2) - self.pad_b) end
+ if self.pad_l > 0 then cg_output = cg_output:narrow(3, 1 + self.pad_l, cg_output:size(3) - self.pad_l) end
+ if self.pad_r > 0 then cg_output = cg_output:narrow(3, 1, cg_output:size(3) - self.pad_r) end
+ -- copy gradOuput to gradInput
+ cg_input:copy(cg_output)
+ elseif input:dim() == 4 then
+ self.gradInput:resizeAs(input):zero()
+ -- crop gradInput if necessary
+ local cg_input = self.gradInput
+ if self.pad_t < 0 then cg_input = cg_input:narrow(3, 1 - self.pad_t, cg_input:size(3) + self.pad_t) end
+ if self.pad_b < 0 then cg_input = cg_input:narrow(3, 1, cg_input:size(3) + self.pad_b) end
+ if self.pad_l < 0 then cg_input = cg_input:narrow(4, 1 - self.pad_l, cg_input:size(4) + self.pad_l) end
+ if self.pad_r < 0 then cg_input = cg_input:narrow(4, 1, cg_input:size(4) + self.pad_r) end
+ -- crop gradOutout if necessary
+ local cg_output = gradOutput
+ if self.pad_t > 0 then cg_output = cg_output:narrow(3, 1 + self.pad_t, cg_output:size(3) - self.pad_t) end
+ if self.pad_b > 0 then cg_output = cg_output:narrow(3, 1, cg_output:size(3) - self.pad_b) end
+ if self.pad_l > 0 then cg_output = cg_output:narrow(4, 1 + self.pad_l, cg_output:size(4) - self.pad_l) end
+ if self.pad_r > 0 then cg_output = cg_output:narrow(4, 1, cg_output:size(4) - self.pad_r) end
+ -- copy gradOuput to gradInput
+ cg_input:copy(cg_output)
+ else
+ error('input must be 3 or 4-dimensional')
+ end
+ return self.gradInput
+end
+
+
+function SpatialZeroPadding:__tostring__()
+ return torch.type(self) ..
+ string.format('(l=%d, r=%d, t=%d, b=%d)', self.pad_l, self.pad_r,
+ self.pad_t, self.pad_b)
+end
diff --git a/contrib/lua-torch/nn/SplitTable.lua b/contrib/lua-torch/nn/SplitTable.lua
new file mode 100644
index 000000000..7c4f968e6
--- /dev/null
+++ b/contrib/lua-torch/nn/SplitTable.lua
@@ -0,0 +1,43 @@
+local SplitTable, parent = torch.class('nn.SplitTable', 'nn.Module')
+
+function SplitTable:__init(dimension, nInputDims)
+ parent.__init(self)
+ self.dimension = dimension
+ self.nInputDims = nInputDims
+end
+
+function SplitTable:_getPositiveDimension(input)
+ local dimension = self.dimension
+ if dimension < 0 then
+ dimension = input:dim() + dimension + 1
+ elseif self.nInputDims and input:dim()==(self.nInputDims+1) then
+ dimension = dimension + 1
+ end
+ return dimension
+end
+
+function SplitTable:updateOutput(input)
+ local dimension = self:_getPositiveDimension(input)
+ local slices = input:size(dimension)
+
+ local currentOutput= {}
+ for i=1,slices do
+ currentOutput[#currentOutput+1] = input:select(dimension,i)
+ end
+ self.output = currentOutput
+ return self.output
+end
+
+function SplitTable:updateGradInput(input, gradOutput)
+ local dimension = self:_getPositiveDimension(input)
+ local slices = input:size(dimension)
+ if self.gradInput then
+ self.gradInput:resizeAs(input)
+
+ for i=1,slices do
+ local currentGradInput = gradOutput[i];
+ self.gradInput:select(dimension,i):copy(currentGradInput)
+ end
+ end
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Sqrt.lua b/contrib/lua-torch/nn/Sqrt.lua
new file mode 100644
index 000000000..df354a175
--- /dev/null
+++ b/contrib/lua-torch/nn/Sqrt.lua
@@ -0,0 +1,26 @@
+local Sqrt, parent = torch.class('nn.Sqrt','nn.Module')
+
+function Sqrt:__init(b)
+ parent.__init(self)
+ self.eps = b or 0
+end
+
+function Sqrt:updateOutput(input)
+ self.eps = self.eps or 0
+ input.THNN.Sqrt_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.eps
+ )
+ return self.output
+end
+
+function Sqrt:updateGradInput(input, gradOutput)
+ input.THNN.Sqrt_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Square.lua b/contrib/lua-torch/nn/Square.lua
new file mode 100644
index 000000000..a6292afb9
--- /dev/null
+++ b/contrib/lua-torch/nn/Square.lua
@@ -0,0 +1,22 @@
+local Square, parent = torch.class('nn.Square', 'nn.Module')
+
+function Square:__init(args)
+ parent.__init(self)
+end
+
+function Square:updateOutput(input)
+ input.THNN.Square_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function Square:updateGradInput(input, gradOutput)
+ input.THNN.Square_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Squeeze.lua b/contrib/lua-torch/nn/Squeeze.lua
new file mode 100644
index 000000000..7d204a19d
--- /dev/null
+++ b/contrib/lua-torch/nn/Squeeze.lua
@@ -0,0 +1,40 @@
+local Squeeze, parent = torch.class('nn.Squeeze', 'nn.Module')
+
+function Squeeze:__init(dim, numInputDims)
+ parent.__init(self)
+ self.dim = dim
+ self:setNumInputDims(numInputDims)
+end
+
+function Squeeze:setNumInputDims(numInputDims)
+ self.numInputDims = numInputDims
+ return self
+end
+
+function Squeeze:updateOutput(input)
+ assert(input and torch.isTensor(input), 'Squeeze only works on tensors')
+ local dim = self.dim
+ local addone = false
+ if self.numInputDims and input:dim()==(self.numInputDims+1) then
+ if dim then
+ dim = dim + 1
+ elseif input:size(1) == 1 then
+ addone = true -- in case of minibatch of size 1.
+ end
+ end
+ self.output:set(dim and input:squeeze(dim) or input:squeeze())
+ if addone then
+ local s = self.output:size():totable{}
+ table.insert(s, 1, 1)
+ self.output:set(self.output:view(torch.LongStorage(s)))
+ end
+ return self.output
+end
+
+function Squeeze:updateGradInput(input, gradOutput)
+ assert(input and torch.isTensor(input), 'Squeeze only works on tensors')
+ assert(gradOutput and torch.isTensor(gradOutput), 'Squeeze only works on tensors')
+ assert(input:nElement() == gradOutput:nElement())
+ self.gradInput:set(gradOutput:view(input:size()))
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/StochasticGradient.lua b/contrib/lua-torch/nn/StochasticGradient.lua
new file mode 100644
index 000000000..a060371e8
--- /dev/null
+++ b/contrib/lua-torch/nn/StochasticGradient.lua
@@ -0,0 +1,62 @@
+local StochasticGradient = torch.class('nn.StochasticGradient')
+
+function StochasticGradient:__init(module, criterion)
+ self.learningRate = 0.01
+ self.learningRateDecay = 0
+ self.maxIteration = 25
+ self.shuffleIndices = true
+ self.module = module
+ self.criterion = criterion
+ self.verbose = true
+end
+
+function StochasticGradient:train(dataset)
+ local iteration = 1
+ local currentLearningRate = self.learningRate
+ local module = self.module
+ local criterion = self.criterion
+
+ local shuffledIndices = torch.randperm(dataset:size(), 'torch.LongTensor')
+ if not self.shuffleIndices then
+ for t = 1,dataset:size() do
+ shuffledIndices[t] = t
+ end
+ end
+
+ print("# StochasticGradient: training")
+
+ while true do
+ local currentError = 0
+ for t = 1,dataset:size() do
+ local example = dataset[shuffledIndices[t]]
+ local input = example[1]
+ local target = example[2]
+
+ currentError = currentError + criterion:forward(module:forward(input), target)
+
+ module:updateGradInput(input, criterion:updateGradInput(module.output, target))
+ module:accUpdateGradParameters(input, criterion.gradInput, currentLearningRate)
+
+ if self.hookExample then
+ self.hookExample(self, example)
+ end
+ end
+
+ currentError = currentError / dataset:size()
+
+ if self.hookIteration then
+ self.hookIteration(self, iteration, currentError)
+ end
+
+ if self.verbose then
+ print("# current error = " .. currentError)
+ end
+ iteration = iteration + 1
+ currentLearningRate = self.learningRate/(1+iteration*self.learningRateDecay)
+ if self.maxIteration > 0 and iteration > self.maxIteration then
+ print("# StochasticGradient: you have reached the maximum number of iterations")
+ print("# training error = " .. currentError)
+ break
+ end
+ end
+end
diff --git a/contrib/lua-torch/nn/Sum.lua b/contrib/lua-torch/nn/Sum.lua
new file mode 100644
index 000000000..7fe8a1ab8
--- /dev/null
+++ b/contrib/lua-torch/nn/Sum.lua
@@ -0,0 +1,67 @@
+local Sum, parent = torch.class('nn.Sum', 'nn.Module')
+
+function Sum:__init(dimension, nInputDims, sizeAverage, squeeze)
+ parent.__init(self)
+ self.dimension = dimension or 1
+ -- do not assign default value to nInputDims or it will break backward compatibility
+ self.nInputDims = nInputDims
+ self.sizeAverage = sizeAverage or false
+ if squeeze ~= nil then
+ assert(type(squeeze) == 'boolean', 'squeeze has to be true/false')
+ self.squeeze = squeeze
+ else
+ self.squeeze = true
+ end
+end
+
+function Sum:_getPositiveDimension(input)
+ local dimension = self.dimension
+ if dimension < 0 then
+ dimension = input:dim() + dimension + 1
+ elseif self.nInputDims and input:dim()==(self.nInputDims+1) then
+ dimension = dimension + 1
+ end
+ assert(input:dim() >= dimension, "dimension exceeds input dimensions")
+ return dimension
+end
+
+function Sum:updateOutput(input)
+ local dimension = self:_getPositiveDimension(input)
+ if type(self.output) == 'number' then
+ self.output = input.new()
+ end
+ self.output:sum(input, dimension)
+ if self.sizeAverage then
+ self.output:div(input:size(dimension))
+ end
+ if (self.squeeze == nil or self.squeeze) and self.output:nDimension() > 1 then
+ self.output:set(self.output:select(dimension, 1))
+ end
+ return self.output
+end
+
+function Sum:updateGradInput(input, gradOutput)
+ local dimension = self:_getPositiveDimension(input)
+ -- zero-strides don't work with MKL/BLAS, so
+ -- don't set self.gradInput to zero-stride tensor.
+ -- Instead, do a deepcopy
+ local size = input:size()
+ size[dimension] = 1
+ if not gradOutput:isContiguous() then
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ gradOutput = self._gradOutput
+ end
+ gradOutput = gradOutput:view(size)
+ self.gradInput:resizeAs(input)
+ self.gradInput:copy(gradOutput:expandAs(input))
+ if self.sizeAverage then
+ self.gradInput:div(input:size(dimension))
+ end
+ return self.gradInput
+end
+
+function Sum:clearState()
+ nn.utils.clear(self, '_gradOutput')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/THNN.lua b/contrib/lua-torch/nn/THNN.lua
new file mode 100644
index 000000000..0848e9ed2
--- /dev/null
+++ b/contrib/lua-torch/nn/THNN.lua
@@ -0,0 +1,140 @@
+local ffi = require 'ffi'
+
+local THNN = {}
+
+
+local generic_THNN_h = require 'nn.THNN_h'
+-- strip all lines starting with #
+-- to remove preprocessor directives originally present
+-- in THNN.h
+generic_THNN_h = generic_THNN_h:gsub("\n#[^\n]*", "")
+generic_THNN_h = generic_THNN_h:gsub("^#[^\n]*\n", "")
+
+-- THGenerator struct declaration copied from torch7/lib/TH/THRandom.h
+local base_declarations = [[
+typedef void THNNState;
+
+typedef struct {
+ unsigned long the_initial_seed;
+ int left;
+ int seeded;
+ unsigned long next;
+ unsigned long state[624]; /* the array for the state vector 624 = _MERSENNE_STATE_N */
+ double normal_x;
+ double normal_y;
+ double normal_rho;
+ int normal_is_valid;
+} THGenerator;
+]]
+
+-- polyfill for LUA 5.1
+if not package.searchpath then
+ local sep = package.config:sub(1,1)
+ function package.searchpath(mod, path)
+ mod = mod:gsub('%.', sep)
+ for m in path:gmatch('[^;]+') do
+ local nm = m:gsub('?', mod)
+ local f = io.open(nm, 'r')
+ if f then
+ f:close()
+ return nm
+ end
+ end
+ end
+end
+
+-- load libTHNN
+THNN.C = ffi.load(package.searchpath('libTHNN', package.cpath))
+
+ffi.cdef(base_declarations)
+
+-- expand macros, allow to use original lines from lib/THNN/generic/THNN.h
+local preprocessed = string.gsub(generic_THNN_h, 'TH_API void THNN_%(([%a%d_]+)%)', 'void THNN_TYPE%1')
+
+local replacements =
+{
+ {
+ ['TYPE'] = 'Double',
+ ['accreal'] = 'double',
+ ['THTensor'] = 'THDoubleTensor',
+ ['THIndexTensor'] = 'THLongTensor',
+ ['THIntegerTensor'] = 'THIntTensor',
+ ['THIndex_t'] = 'long',
+ ['THInteger_t'] = 'int'
+ },
+ {
+ ['TYPE'] = 'Float',
+ ['accreal'] = 'double',
+ ['THTensor'] = 'THFloatTensor',
+ ['THIndexTensor'] = 'THLongTensor',
+ ['THIntegerTensor'] = 'THIntTensor',
+ ['THIndex_t'] = 'long',
+ ['THInteger_t'] = 'int'
+ }
+}
+
+for i=1,#replacements do
+ local r = replacements[i]
+ local s = preprocessed
+ for k,v in pairs(r) do
+ s = string.gsub(s, k, v)
+ end
+ ffi.cdef(s)
+end
+
+THNN.NULL = ffi.NULL or nil
+
+function THNN.getState()
+ return ffi.NULL or nil
+end
+
+function THNN.optionalTensor(t)
+ return t and t:cdata() or THNN.NULL
+end
+
+local function extract_function_names(s)
+ local t = {}
+ for n in string.gmatch(s, 'TH_API void THNN_%(([%a%d_]+)%)') do
+ t[#t+1] = n
+ end
+ return t
+end
+
+function THNN.bind(lib, base_names, type_name, state_getter)
+ local ftable = {}
+ local prefix = 'THNN_' .. type_name
+ for i,n in ipairs(base_names) do
+ -- use pcall since some libs might not support all functions (e.g. cunn)
+ local ok,v = pcall(function() return lib[prefix .. n] end)
+ if ok then
+ ftable[n] = function(...) v(state_getter(), ...) end -- implicitely add state
+ else
+ print('not found: ' .. prefix .. n .. v)
+ end
+ end
+ return ftable
+end
+
+-- build function table
+local function_names = extract_function_names(generic_THNN_h)
+
+THNN.kernels = {}
+THNN.kernels['torch.FloatTensor'] = THNN.bind(THNN.C, function_names, 'Float', THNN.getState)
+THNN.kernels['torch.DoubleTensor'] = THNN.bind(THNN.C, function_names, 'Double', THNN.getState)
+
+torch.getmetatable('torch.FloatTensor').THNN = THNN.kernels['torch.FloatTensor']
+torch.getmetatable('torch.DoubleTensor').THNN = THNN.kernels['torch.DoubleTensor']
+
+function THNN.runKernel(f, type, ...)
+ local ftable = THNN.kernels[type]
+ if not ftable then
+ error('Unsupported tensor type: '..type)
+ end
+ local f = ftable[f]
+ if not f then
+ error(string.format("Function '%s' not found for tensor type '%s'.", f, type))
+ end
+ f(...)
+end
+
+return THNN
diff --git a/contrib/lua-torch/nn/Tanh.lua b/contrib/lua-torch/nn/Tanh.lua
new file mode 100644
index 000000000..fc42cbbfd
--- /dev/null
+++ b/contrib/lua-torch/nn/Tanh.lua
@@ -0,0 +1,19 @@
+local Tanh = torch.class('nn.Tanh', 'nn.Module')
+
+function Tanh:updateOutput(input)
+ input.THNN.Tanh_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ return self.output
+end
+
+function Tanh:updateGradInput(input, gradOutput)
+ input.THNN.Tanh_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.output:cdata()
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/TanhShrink.lua b/contrib/lua-torch/nn/TanhShrink.lua
new file mode 100644
index 000000000..96df6c5b7
--- /dev/null
+++ b/contrib/lua-torch/nn/TanhShrink.lua
@@ -0,0 +1,20 @@
+local TanhShrink, parent = torch.class('nn.TanhShrink','nn.Module')
+
+function TanhShrink:__init()
+ parent.__init(self)
+ self.tanh = nn.Tanh()
+end
+
+function TanhShrink:updateOutput(input)
+ local th = self.tanh:updateOutput(input)
+ self.output:resizeAs(input):copy(input)
+ self.output:add(-1,th)
+ return self.output
+end
+
+function TanhShrink:updateGradInput(input, gradOutput)
+ local dth = self.tanh:updateGradInput(input,gradOutput)
+ self.gradInput:resizeAs(input):copy(gradOutput)
+ self.gradInput:add(-1,dth)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/TemporalConvolution.lua b/contrib/lua-torch/nn/TemporalConvolution.lua
new file mode 100644
index 000000000..4b3a89eb6
--- /dev/null
+++ b/contrib/lua-torch/nn/TemporalConvolution.lua
@@ -0,0 +1,73 @@
+local TemporalConvolution, parent = torch.class('nn.TemporalConvolution', 'nn.Module')
+
+function TemporalConvolution:__init(inputFrameSize, outputFrameSize, kW, dW)
+ parent.__init(self)
+
+ dW = dW or 1
+
+ self.inputFrameSize = inputFrameSize
+ self.outputFrameSize = outputFrameSize
+ self.kW = kW
+ self.dW = dW
+
+ self.weight = torch.Tensor(outputFrameSize, inputFrameSize*kW)
+ self.bias = torch.Tensor(outputFrameSize)
+ self.gradWeight = torch.Tensor(outputFrameSize, inputFrameSize*kW)
+ self.gradBias = torch.Tensor(outputFrameSize)
+
+ self:reset()
+end
+
+function TemporalConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.inputFrameSize)
+ end
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+ end
+end
+
+function TemporalConvolution:updateOutput(input)
+ input.THNN.TemporalConvolution_updateOutput(
+ input:cdata(), self.output:cdata(),
+ self.weight:cdata(), self.bias:cdata(),
+ self.kW, self.dW,
+ self.inputFrameSize, self.outputFrameSize
+ )
+ return self.output
+end
+
+function TemporalConvolution:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ input.THNN.TemporalConvolution_updateGradInput(
+ input:cdata(), gradOutput:cdata(),
+ self.gradInput:cdata(), self.weight:cdata(),
+ self.kW, self.dW
+ )
+ return self.gradInput
+ end
+end
+
+function TemporalConvolution:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ input.THNN.TemporalConvolution_accGradParameters(
+ input:cdata(), gradOutput:cdata(),
+ self.gradWeight:cdata(), self.gradBias:cdata(),
+ self.kW, self.dW, scale
+ )
+end
+
+function TemporalConvolution:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ -- we do not need to accumulate parameters when sharing:
+ self:defaultAccUpdateGradParameters(input, gradOutput, lr)
+end
diff --git a/contrib/lua-torch/nn/TemporalDynamicKMaxPooling.lua b/contrib/lua-torch/nn/TemporalDynamicKMaxPooling.lua
new file mode 100644
index 000000000..644a0fa9c
--- /dev/null
+++ b/contrib/lua-torch/nn/TemporalDynamicKMaxPooling.lua
@@ -0,0 +1,65 @@
+--[[
+ This file implements Dynamic K Max Pooling as described in the paper:
+ "A Convolutional Neural Network for Modelling Sentences"
+ by Nal Kalchbrenner, Edward Grefenstette, Phil Blunsom
+
+ The operation is simply selecting the k highest values out of a sequence.
+ k can be a calculated value or pre-defined
+
+ The value of k can be calulated as in the paper by using:
+ k_top as minK
+ (L-l)/L as factor
+
+ Where:
+ k_top is the desired sequence length at the end of the convolution part,
+ L is the total number of layers,
+ l is this layers number
+]]
+
+local TemporalDynamicKMaxPooling, parent = torch.class('nn.TemporalDynamicKMaxPooling', 'nn.Module')
+
+function TemporalDynamicKMaxPooling:__init(minK, factor)
+ parent.__init(self)
+
+ self.minK = minK
+ self.factor = factor or 0
+end
+
+function TemporalDynamicKMaxPooling:updateOutput(input)
+ assert(input:dim() == 2 or input:dim() == 3, 'Only 2D or 3D(batch mode) accepted')
+
+ local seqDim = input:dim()-1
+ local k = math.max(self.minK, math.ceil(self.factor*input:size(seqDim)))
+ assert(input:size(seqDim) >= self.minK, 'Input sequence length (' .. input:size(seqDim) .. ') too small for desired k value (' .. k .. ')')
+
+ -- Sort input in descending order
+ local sorted, allIndices = input:sort(seqDim,true)
+ -- Reduce the indices to only include the top-k and return to original order by sorting
+ self.indices = allIndices:narrow(seqDim, 1, k):sort(seqDim)
+
+ self.output = input:gather(seqDim, self.indices)
+
+ return self.output
+end
+
+function TemporalDynamicKMaxPooling:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ local seqDim = input:dim()-1
+
+ self.gradInput:resizeAs(input)
+ self.gradInput:zero()
+
+ -- Using the previously stored indices, add the gradOutputs to their respective
+ -- input indices in the self.gradInput buffer
+ local updateValues = self.gradInput:gather(seqDim, self.indices)
+ updateValues:add(gradOutput)
+ self.gradInput:scatter(seqDim, self.indices, updateValues)
+
+ return self.gradInput
+ end
+end
+
+function TemporalDynamicKMaxPooling:clearState()
+ nn.utils.clear(self, 'indices')
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/TemporalMaxPooling.lua b/contrib/lua-torch/nn/TemporalMaxPooling.lua
new file mode 100644
index 000000000..894f4a99f
--- /dev/null
+++ b/contrib/lua-torch/nn/TemporalMaxPooling.lua
@@ -0,0 +1,44 @@
+local TemporalMaxPooling, parent = torch.class('nn.TemporalMaxPooling', 'nn.Module')
+
+function TemporalMaxPooling:__init(kW, dW)
+ parent.__init(self)
+
+ dW = dW or kW
+
+ self.kW = kW
+ self.dW = dW
+end
+
+function TemporalMaxPooling:updateOutput(input)
+ self.indices = self.indices or torch.LongTensor()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices
+ else
+ self.indices = self.indices:long()
+ end
+ input.THNN.TemporalMaxPooling_updateOutput(
+ input:cdata(), self.output:cdata(),
+ self.indices:cdata(), self.kW, self.dW
+ )
+ return self.output
+end
+
+function TemporalMaxPooling:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ input.THNN.TemporalMaxPooling_updateGradInput(
+ input:cdata(), gradOutput:cdata(),
+ self.gradInput:cdata(), self.indices:cdata(),
+ self.kW, self.dW
+ )
+ return self.gradInput
+ end
+end
+
+function TemporalMaxPooling:empty()
+ self:clearState()
+end
+
+function TemporalMaxPooling:clearState()
+ if self.indices then self.indices:set() end
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/TemporalRowConvolution.lua b/contrib/lua-torch/nn/TemporalRowConvolution.lua
new file mode 100644
index 000000000..7c9d6a269
--- /dev/null
+++ b/contrib/lua-torch/nn/TemporalRowConvolution.lua
@@ -0,0 +1,120 @@
+local THNN = require "nn.THNN"
+
+local TemporalRowConvolution, parent = torch.class("nn.TemporalRowConvolution", "nn.Module")
+
+function TemporalRowConvolution:__init(inputFrameSize, kW, dW, featFirst)
+ parent.__init(self)
+
+ self.inputFrameSize = inputFrameSize
+ self.kW = kW
+ self.dW = dW or 1
+
+ self.weight = torch.Tensor(inputFrameSize, 1, kW)
+ self.bias = torch.Tensor(inputFrameSize)
+ self.gradWeight = torch.Tensor(inputFrameSize, 1, kW)
+ self.gradBias = torch.Tensor(inputFrameSize)
+
+ -- Set to true for batch x inputFrameSize x nInputFrame
+ self.featFirst = featFirst and true or false
+ self:reset()
+end
+
+function TemporalRowConvolution:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function TemporalRowConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1 / math.sqrt(self.kW * self.inputFrameSize)
+ end
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+end
+
+function TemporalRowConvolution:updateOutput(input)
+ assert(input.THNN, torch.type(input)..".THNN backend not imported")
+ self.finput = self.finput or input.new()
+ self.fgradInput = self.fgradInput or input.new()
+
+ input.THNN.TemporalRowConvolution_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW,
+ self.dW,
+ 0, -- would be self.padW
+ self.featFirst
+ )
+
+ return self.output
+end
+
+function TemporalRowConvolution:updateGradInput(input, gradOutput)
+ assert(input.THNN, torch.type(input)..".THNN backend not imported")
+
+ if self.gradInput then
+ input.THNN.TemporalRowConvolution_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW,
+ self.dW,
+ 0, -- would be self.padW
+ self.featFirst
+ )
+ return self.gradInput
+ end
+end
+
+function TemporalRowConvolution:accGradParameters(input, gradOutput, scale)
+ assert(input.THNN, torch.type(input)..".THNN backend not imported")
+
+ input.THNN.TemporalRowConvolution_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW,
+ self.dW,
+ 0, -- would be self.padW
+ self.featFirst,
+ scale or 1)
+end
+
+function TemporalRowConvolution:type(type, tensorCache)
+ if self.finput then self.finput:set() end
+ if self.fgradInput then self.fgradInput:set() end
+ return parent.type(self, type, tensorCache)
+end
+
+function TemporalRowConvolution:__tostring__()
+ local s = string.format("%s(%d, %d", torch.type(self), self.inputFrameSize, self.kW)
+ if self.dW ~= 1 then
+ s = s .. string.format(", %d", self.dW)
+ end
+ if self.padW and self.padW ~= 0 then -- currently padding is not supported
+ s = s .. ", " .. self.padW
+ end
+ if self.bias then
+ return s .. ")"
+ else
+ return s .. ") without bias"
+ end
+end
+
+function TemporalRowConvolution:clearState()
+ nn.utils.clear(self, "finput", "fgradInput", "_input", "_gradOutput")
+ return parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/TemporalSubSampling.lua b/contrib/lua-torch/nn/TemporalSubSampling.lua
new file mode 100644
index 000000000..e9287d63d
--- /dev/null
+++ b/contrib/lua-torch/nn/TemporalSubSampling.lua
@@ -0,0 +1,64 @@
+local TemporalSubSampling, parent = torch.class('nn.TemporalSubSampling', 'nn.Module')
+
+function TemporalSubSampling:__init(inputFrameSize, kW, dW)
+ parent.__init(self)
+
+ dW = dW or 1
+
+ self.inputFrameSize = inputFrameSize
+ self.kW = kW
+ self.dW = dW
+
+ self.weight = torch.Tensor(inputFrameSize)
+ self.bias = torch.Tensor(inputFrameSize)
+ self.gradWeight = torch.Tensor(inputFrameSize)
+ self.gradBias = torch.Tensor(inputFrameSize)
+
+ self:reset()
+end
+
+function TemporalSubSampling:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW)
+ end
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+ end
+end
+
+function TemporalSubSampling:updateOutput(input)
+ input.THNN.TemporalSubSampling_updateOutput(
+ input:cdata(), self.output:cdata(),
+ self.weight:cdata(), self.bias:cdata(),
+ self.kW, self.dW, self.inputFrameSize
+ )
+ return self.output
+end
+
+function TemporalSubSampling:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ input.THNN.TemporalSubSampling_updateGradInput(
+ input:cdata(), gradOutput:cdata(), self.gradInput:cdata(),
+ self.weight:cdata(), self.kW, self.dW
+ )
+ return self.gradInput
+ end
+end
+
+function TemporalSubSampling:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ input.THNN.TemporalSubSampling_accGradParameters(
+ input:cdata(), gradOutput:cdata(), self.gradWeight:cdata(),
+ self.gradBias:cdata(), self.kW, self.dW, scale
+ )
+end
diff --git a/contrib/lua-torch/nn/Threshold.lua b/contrib/lua-torch/nn/Threshold.lua
new file mode 100644
index 000000000..6fdd26408
--- /dev/null
+++ b/contrib/lua-torch/nn/Threshold.lua
@@ -0,0 +1,51 @@
+local Threshold, parent = torch.class('nn.Threshold','nn.Module')
+
+function Threshold:__init(th,v,ip)
+ parent.__init(self)
+ self.threshold = th or 1e-6
+ self.val = v or 0
+ if (th and type(th) ~= 'number') or (v and type(v) ~= 'number') then
+ error('nn.Threshold(threshold, value)')
+ end
+ -- default for inplace is false
+ self.inplace = ip or false
+ if (ip and type(ip) ~= 'boolean') then
+ error('in-place flag must be boolean')
+ end
+ self:validateParameters()
+end
+
+function Threshold:updateOutput(input)
+ self:validateParameters()
+ input.THNN.Threshold_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.threshold,
+ self.val,
+ self.inplace
+ )
+ return self.output
+end
+
+function Threshold:updateGradInput(input, gradOutput)
+ self:validateParameters()
+ input.THNN.Threshold_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.threshold,
+ self.val,
+ self.inplace
+ )
+ return self.gradInput
+end
+
+function Threshold:validateParameters()
+ self.inplace = self.inplace or false -- backwards compatibility pre inplace
+ if self.inplace then
+ if self.val > self.threshold then
+ error('in-place processing requires value (' .. self.val ..
+ ') not exceed threshold (' .. self.threshold .. ')')
+ end
+ end
+end
diff --git a/contrib/lua-torch/nn/Transpose.lua b/contrib/lua-torch/nn/Transpose.lua
new file mode 100644
index 000000000..cceb2b643
--- /dev/null
+++ b/contrib/lua-torch/nn/Transpose.lua
@@ -0,0 +1,35 @@
+local Transpose, parent = torch.class('nn.Transpose', 'nn.Module')
+
+-- transpose dimensions:
+-- n = nn.Transpose({1,4},{1,3})
+-- will transpose dims 1 and 4, then 1 and 3...
+
+function Transpose:__init(...)
+ parent.__init(self)
+ self.permutations = {...}
+ self.numInputDims = nil
+end
+
+function Transpose:setNumInputDims(numInputDims)
+ self.numInputDims = numInputDims
+ return self
+end
+
+function Transpose:updateOutput(input)
+ local offset = self.numInputDims and input:nDimension()-self.numInputDims or 0
+ for _,perm in ipairs(self.permutations) do
+ input = input:transpose(perm[1]+offset,perm[2]+offset)
+ end
+ self.output:resizeAs(input):copy(input)
+ return self.output
+end
+
+function Transpose:updateGradInput(input, gradOutput)
+ for i = #self.permutations,1,-1 do
+ local perm = self.permutations[i]
+ local offset = self.numInputDims and input:nDimension()-self.numInputDims or 0
+ gradOutput = gradOutput:transpose(perm[1]+offset,perm[2]+offset)
+ end
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/Unsqueeze.lua b/contrib/lua-torch/nn/Unsqueeze.lua
new file mode 100644
index 000000000..2e82a25a0
--- /dev/null
+++ b/contrib/lua-torch/nn/Unsqueeze.lua
@@ -0,0 +1,52 @@
+local Unsqueeze, parent = torch.class('nn.Unsqueeze', 'nn.Module')
+
+local function _assertTensor(t)
+ assert(torch.isTensor(t), "This module only works on tensor")
+end
+
+function Unsqueeze:__init(pos, numInputDims)
+ parent.__init(self)
+ self.pos = pos or error('the position to insert singleton dim not specified')
+ self:setNumInputDims(numInputDims)
+end
+
+function Unsqueeze:setNumInputDims(numInputDims)
+ self.numInputDims = numInputDims
+ return self
+end
+
+function Unsqueeze:updateOutput(input)
+ _assertTensor(input)
+ local actualPos = self:_getActualPosition(input)
+ nn.utils.addSingletonDimension(self.output, input, actualPos)
+ return self.output
+end
+
+function Unsqueeze:updateGradInput(input, gradOutput)
+ _assertTensor(input)
+ _assertTensor(gradOutput)
+ assert(input:nElement() == gradOutput:nElement())
+
+ self.gradInput:view(gradOutput, input:size())
+ return self.gradInput
+end
+
+function Unsqueeze:__tostring__()
+ return torch.type(self)..'(dim ' .. self.pos .. ')'
+end
+
+function Unsqueeze:_getActualPosition(input)
+ -- get valid dimesion offset for batchMode (if any)
+ local inputDim = input:dim() -- data batch dim
+ self.numInputDims = self.numInputDims or inputDim -- feature map dim
+ local offsetDim = inputDim - self.numInputDims
+ assert(offsetDim >= 0, "input feature map dim (numInputDims) must be <= input:dim()")
+
+ -- the actual position; clearer error message for batchMode (if any)
+ local actualPos = self.pos + offsetDim
+ assert(actualPos >= 1 and actualPos <= (inputDim + 1),
+ ("Invalid position: %d. input:dim() is %d, input feature map dim (numInputDims) is %d.")
+ :format(self.pos, inputDim, self.numInputDims)
+ )
+ return actualPos
+end
diff --git a/contrib/lua-torch/nn/View.lua b/contrib/lua-torch/nn/View.lua
new file mode 100644
index 000000000..542e57e16
--- /dev/null
+++ b/contrib/lua-torch/nn/View.lua
@@ -0,0 +1,96 @@
+local View, parent = torch.class('nn.View', 'nn.Module')
+
+function View:resetSize(...)
+ if select('#', ...) == 1 and torch.typename(select(1, ...)) == 'torch.LongStorage' then
+ self.size = select(1, ...)
+ else
+ self.size = torch.LongStorage({...})
+ end
+
+ self.numElements = 1
+ local inferdim = false
+ for i = 1,#self.size do
+ local szi = self.size[i]
+ if szi >= 0 then
+ self.numElements = self.numElements * self.size[i]
+ else
+ assert(szi == -1, 'size should be positive or -1')
+ assert(not inferdim, 'only one dimension can be at -1')
+ inferdim = true
+ end
+ end
+
+ return self
+end
+
+function View:__init(...)
+ parent.__init(self)
+ self:resetSize(...)
+ self.numInputDims = nil
+end
+
+function View:setNumInputDims(numInputDims)
+ self.numInputDims = numInputDims
+ return self
+end
+
+local function batchsize(input, size, numInputDims, numElements)
+ local ind = input:nDimension()
+ local isz = input:size()
+ local maxdim = numInputDims and numInputDims or ind
+ local ine = 1
+ for i=ind,ind-maxdim+1,-1 do
+ ine = ine * isz[i]
+ end
+
+ if ine % numElements ~= 0 then
+ error(string.format(
+ 'input view (%s) and desired view (%s) do not match',
+ table.concat(input:size():totable(), 'x'),
+ table.concat(size:totable(), 'x')))
+ end
+
+ -- the remainder is either the batch...
+ local bsz = ine / numElements
+
+ -- ... or the missing size dim
+ for i=1,size:size() do
+ if size[i] == -1 then
+ bsz = 1
+ break
+ end
+ end
+
+ -- for dim over maxdim, it is definitively the batch
+ for i=ind-maxdim,1,-1 do
+ bsz = bsz * isz[i]
+ end
+
+ -- special card
+ if bsz == 1 and (not numInputDims or input:nDimension() <= numInputDims) then
+ return
+ end
+
+ return bsz
+end
+
+function View:updateOutput(input)
+ self.output = self.output or input.new()
+ local bsz = batchsize(input, self.size, self.numInputDims, self.numElements)
+ if bsz then
+ self.output:view(input, bsz, table.unpack(self.size:totable()))
+ else
+ self.output:view(input, self.size)
+ end
+ return self.output
+end
+
+function View:updateGradInput(input, gradOutput)
+ self.gradInput = self.gradInput or gradOutput.new()
+ self.gradInput:view(gradOutput, input:size())
+ return self.gradInput
+end
+
+function View:__tostring__()
+ return torch.type(self)..'('..table.concat(self.size:totable(), ', ')..')'
+end
diff --git a/contrib/lua-torch/nn/VolumetricAveragePooling.lua b/contrib/lua-torch/nn/VolumetricAveragePooling.lua
new file mode 100644
index 000000000..df6d2c405
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricAveragePooling.lua
@@ -0,0 +1,54 @@
+local VolumetricAveragePooling, parent = torch.class(
+ 'nn.VolumetricAveragePooling', 'nn.Module')
+
+function VolumetricAveragePooling:__init(kT, kW, kH, dT, dW, dH)
+ parent.__init(self)
+
+ dT = dT or kT
+ dW = dW or kW
+ dH = dH or kH
+
+ self.kT = kT
+ self.kH = kH
+ self.kW = kW
+ self.dT = dT
+ self.dW = dW
+ self.dH = dH
+end
+
+function VolumetricAveragePooling:updateOutput(input)
+ input.THNN.VolumetricAveragePooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH
+ )
+ return self.output
+end
+
+function VolumetricAveragePooling:updateGradInput(input, gradOutput)
+ input.THNN.VolumetricAveragePooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH
+ )
+ return self.gradInput
+end
+
+function VolumetricAveragePooling:empty()
+ return parent.clearState(self)
+end
+
+function VolumetricAveragePooling:__tostring__()
+ local s = string.format('%s(%dx%dx%d, %d,%d,%d', torch.type(self),
+ self.kT, self.kW, self.kH, self.dT, self.dW, self.dH)
+ if (self.padT or self.padW or self.padH) and
+ (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padT.. ',' .. self.padW .. ','.. self.padH
+ end
+ s = s .. ')'
+
+ return s
+end
diff --git a/contrib/lua-torch/nn/VolumetricBatchNormalization.lua b/contrib/lua-torch/nn/VolumetricBatchNormalization.lua
new file mode 100644
index 000000000..6168a9245
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricBatchNormalization.lua
@@ -0,0 +1,4 @@
+local BN, parent = torch.class('nn.VolumetricBatchNormalization', 'nn.BatchNormalization')
+
+-- expected dimension of input
+BN.nDim = 5
diff --git a/contrib/lua-torch/nn/VolumetricConvolution.lua b/contrib/lua-torch/nn/VolumetricConvolution.lua
new file mode 100644
index 000000000..329609aff
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricConvolution.lua
@@ -0,0 +1,169 @@
+local THNN = require 'nn.THNN'
+local VolumetricConvolution, parent = torch.class('nn.VolumetricConvolution', 'nn.Module')
+
+function VolumetricConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH)
+ parent.__init(self)
+
+ dT = dT or 1
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kT = kT
+ self.kW = kW
+ self.kH = kH
+ self.dT = dT
+ self.dW = dW
+ self.dH = dH
+ self.padT = padT or 0
+ self.padW = padW or self.padT
+ self.padH = padH or self.padW
+
+ self.weight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW)
+ self.bias = torch.Tensor(nOutputPlane)
+ self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW)
+ self.gradBias = torch.Tensor(nOutputPlane)
+ self:reset()
+end
+
+function VolumetricConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kT*self.kW*self.kH*self.nInputPlane)
+ end
+ if nn.oldSeed then
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ if self.bias then
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ end
+ else
+ self.weight:uniform(-stdv, stdv)
+ if self.bias then
+ self.bias:uniform(-stdv, stdv)
+ end
+ end
+end
+
+function VolumetricConvolution:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function VolumetricConvolution:updateOutput(input)
+ self.finput = self.finput or input.new()
+ self.fgradInput = self.fgradInput or input.new()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ input.THNN.VolumetricConvolution_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH
+ )
+ else
+ input.THNN.VolumetricConvolutionMM_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH
+ )
+ end
+ return self.output
+end
+
+function VolumetricConvolution:updateGradInput(input, gradOutput)
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ input.THNN.VolumetricConvolution_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH
+ )
+ return self.gradInput
+ else
+ if self.gradInput then
+ input.THNN.VolumetricConvolutionMM_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH
+ )
+ return self.gradInput
+ end
+ end
+end
+
+function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ input.THNN.VolumetricConvolution_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ scale or 1
+ )
+ else
+ input.THNN.VolumetricConvolutionMM_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ scale or 1
+ )
+ end
+end
+
+function VolumetricConvolution:type(type, tensorCache)
+ if self.finput then self.finput:set() end
+ if self.fgradInput then self.fgradInput:set() end
+ return parent.type(self, type, tensorCache)
+end
+
+function VolumetricConvolution:clearState()
+ nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput')
+ return parent.clearState(self)
+end
+
+function VolumetricConvolution:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH)
+ if self.dT ~= 1 or self.dW ~= 1 or self.dH ~= 1 or
+ self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d,%d', self.dT, self.dW, self.dH)
+ end
+ if (self.padT or self.padW or self.padH) and
+ (self.padT ~=0 or self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padT .. ',' .. self.padW .. ',' .. self.padH
+ end
+ return s .. ')'
+end
diff --git a/contrib/lua-torch/nn/VolumetricDilatedConvolution.lua b/contrib/lua-torch/nn/VolumetricDilatedConvolution.lua
new file mode 100644
index 000000000..f1337ebaa
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricDilatedConvolution.lua
@@ -0,0 +1,84 @@
+local THNN = require 'nn.THNN'
+local VolumetricDilatedConvolution, parent = torch.class('nn.VolumetricDilatedConvolution', 'nn.VolumetricConvolution')
+
+function VolumetricDilatedConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH)
+ parent.__init(self, nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH)
+
+ self.dilationT = dilationT or 1
+ self.dilationW = dilationW or 1
+ self.dilationH = dilationH or 1
+end
+
+function VolumetricDilatedConvolution:updateOutput(input)
+ self.finput = self.finput or self.weight.new()
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input.THNN.VolumetricDilatedConvolution_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH
+ )
+ return self.output
+end
+
+function VolumetricDilatedConvolution:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input.THNN.VolumetricDilatedConvolution_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH
+ )
+ return self.gradInput
+ end
+end
+
+function VolumetricDilatedConvolution:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input.THNN.VolumetricDilatedConvolution_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH,
+ scale
+ )
+end
+
+function VolumetricDilatedConvolution:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH)
+ if self.dT ~= 1 or self.dW ~= 1 or self.dH ~= 1
+ or self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d,%d', self.dT, self.dW, self.dH)
+ end
+ if (self.padT or self.padW or self.padH)
+ and (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padT .. ',' .. self.padW .. ',' .. self.padH
+ end
+ s = s .. ', ' .. self.dilationT .. ','
+ .. self.dilationW .. ',' .. self.dilationH
+ if self.bias then
+ return s .. ')'
+ else
+ return s .. ') without bias'
+ end
+end
diff --git a/contrib/lua-torch/nn/VolumetricDilatedMaxPooling.lua b/contrib/lua-torch/nn/VolumetricDilatedMaxPooling.lua
new file mode 100644
index 000000000..249b2b58e
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricDilatedMaxPooling.lua
@@ -0,0 +1,71 @@
+local THNN = require 'nn.THNN'
+local VolumetricDilatedMaxPooling, parent = torch.class('nn.VolumetricDilatedMaxPooling', 'nn.VolumetricMaxPooling')
+
+function VolumetricDilatedMaxPooling:__init(kT, kW, kH, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH)
+ parent.__init(self, kT, kW, kH, dT, dW, dH, padT, padW, padH)
+
+ self.dilationT = dilationT or 1
+ self.dilationW = dilationW or 1
+ self.dilationH = dilationH or 1
+
+end
+
+function VolumetricDilatedMaxPooling:updateOutput(input)
+ local dims = input:dim()
+ self.itime = input:size(dims-2)
+ self.iheight = input:size(dims-1)
+ self.iwidth = input:size(dims)
+
+ self.indices = self.indices or torch.LongTensor()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices
+ else
+ self.indices = self.indices:long()
+ end
+ input.THNN.VolumetricDilatedMaxPooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.indices:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH,
+ self.ceil_mode
+ )
+ return self.output
+end
+
+function VolumetricDilatedMaxPooling:updateGradInput(input, gradOutput)
+ input.THNN.VolumetricDilatedMaxPooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.indices:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH,
+ self.ceil_mode
+ )
+ return self.gradInput
+end
+
+function VolumetricDilatedMaxPooling:clearState()
+ if self.indices then
+ self.indices:set()
+ end
+ return parent.clearState(self)
+end
+
+function VolumetricDilatedMaxPooling:__tostring__()
+ local s = string.format('%s(%dx%dx%d, %d,%d,%d', torch.type(self),
+ self.kT, self.kW, self.kH, self.dT, self.dW, self.dH)
+ if (self.padT or self.padW or self.padH) and
+ (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padT.. ',' .. self.padW .. ','.. self.padH
+ end
+ s = s .. ', ' .. self.dilationT .. ',' .. self.dilationW .. ',' .. self.dilationH
+ s = s .. ')'
+
+ return s
+end
diff --git a/contrib/lua-torch/nn/VolumetricDropout.lua b/contrib/lua-torch/nn/VolumetricDropout.lua
new file mode 100644
index 000000000..809e28afe
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricDropout.lua
@@ -0,0 +1,55 @@
+local VolumetricDropout, Parent = torch.class('nn.VolumetricDropout', 'nn.Module')
+
+function VolumetricDropout:__init(p,stochasticInference)
+ Parent.__init(self)
+ self.p = p or 0.5
+ self.train = true
+ self.stochastic_inference = stochasticInference or false
+ self.noise = torch.Tensor()
+end
+
+function VolumetricDropout:updateOutput(input)
+ self.output:resizeAs(input):copy(input)
+ if self.train or self.stochastic_inference then
+ if input:dim() == 5 then
+ self.noise:resize(input:size(1), input:size(2), 1, 1, 1)
+ elseif input:dim() == 4 then
+ self.noise:resize(input:size(1), 1, 1, 1)
+ else
+ error('Input must be 5D (nbatch, nfeat, t, h, w) or 4D (nfeat, t, h, w)')
+ end
+ self.noise:bernoulli(1-self.p)
+ -- We expand the random dropouts to the entire feature map because the
+ -- features are likely correlated across the map and so the dropout
+ -- should also be correlated.
+ self.output:cmul(torch.expandAs(self.noise, input))
+ else
+ self.output:mul(1-self.p)
+ end
+ return self.output
+end
+
+function VolumetricDropout:updateGradInput(input, gradOutput)
+ if self.train then
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+ self.gradInput:cmul(torch.expandAs(self.noise, input)) -- simply mask the gradients with the noise vector
+ else
+ error('backprop only defined while training')
+ end
+ return self.gradInput
+end
+
+function VolumetricDropout:setp(p)
+ self.p = p
+end
+
+function VolumetricDropout:__tostring__()
+ return string.format('%s(%f)', torch.type(self), self.p)
+end
+
+function VolumetricDropout:clearState()
+ if self.noise then
+ self.noise:set()
+ end
+ return Parent.clearState(self)
+end
diff --git a/contrib/lua-torch/nn/VolumetricFractionalMaxPooling.lua b/contrib/lua-torch/nn/VolumetricFractionalMaxPooling.lua
new file mode 100644
index 000000000..f5ff58cf0
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricFractionalMaxPooling.lua
@@ -0,0 +1,175 @@
+local VolumetricFractionalMaxPooling, parent =
+ torch.class('nn.VolumetricFractionalMaxPooling', 'nn.Module')
+
+-- Usage:
+-- nn.VolumetricFractionalMaxPooling(poolSizeT, poolSizeW, poolSizeH, outT, outW, outH)
+-- the output should be the exact size (outT x outH x outW)
+-- nn.VolumetricFractionalMaxPooling(poolSizeT, poolSizeW, poolSizeH, ratioT, ratioW, ratioH)
+-- the output should be the size (floor(inT x ratioT) x floor(inH x ratioH) x floor(inW x ratioW))
+-- ratios are numbers between (0, 1) exclusive
+function VolumetricFractionalMaxPooling:__init(poolSizeT, poolSizeW, poolSizeH, arg1, arg2, arg3)
+ parent.__init(self)
+ assert(poolSizeT >= 2)
+ assert(poolSizeW >= 2)
+ assert(poolSizeH >= 2)
+
+ -- Pool size (how wide the pooling for each output unit is)
+ self.poolSizeT = poolSizeT
+ self.poolSizeW = poolSizeW
+ self.poolSizeH = poolSizeH
+
+ -- Random samples are drawn for all
+ -- batch * plane * (time, height, width; i.e., 3) points. This determines
+ -- the 3d "pseudorandom" overlapping pooling regions for each
+ -- (batch element x input plane). A new set of random samples is
+ -- drawn every updateOutput call, unless we disable it via
+ -- :fixPoolingRegions().
+ self.randomSamples = nil
+
+ -- Flag to disable re-generation of random samples for producing
+ -- a new pooling. For testing purposes
+ self.newRandomPool = false
+
+ if arg1 >= 1 and arg2 >= 1 and arg3 >= 1 then
+ -- Desired output size: the input tensor will determine the reduction
+ -- ratio
+ self.outT = arg1
+ self.outW = arg2
+ self.outH = arg3
+ else
+ -- Reduction ratio specified per each input
+ -- This is the reduction ratio that we use
+ self.ratioT = arg1
+ self.ratioW = arg2
+ self.ratioH = arg3
+
+ -- The reduction ratio must be between 0 and 1
+ assert(self.ratioT > 0 and self.ratioT < 1)
+ assert(self.ratioW > 0 and self.ratioW < 1)
+ assert(self.ratioH > 0 and self.ratioH < 1)
+ end
+end
+
+function VolumetricFractionalMaxPooling:getBufferSize_(input)
+ local batchSize = 0
+ local planeSize = 0
+
+ if input:nDimension() == 4 then
+ batchSize = 1
+ planeSize = input:size(1)
+ elseif input:nDimension() == 5 then
+ batchSize = input:size(1)
+ planeSize = input:size(2)
+ else
+ error('input must be dim 4 or 5')
+ end
+
+ return torch.LongStorage({batchSize, planeSize, 3})
+end
+
+function VolumetricFractionalMaxPooling:initSampleBuffer_(input)
+ local sampleBufferSize = self:getBufferSize_(input)
+
+ if self.randomSamples == nil then
+ self.randomSamples = input.new():resize(sampleBufferSize):uniform()
+ elseif (self.randomSamples:size(1) ~= sampleBufferSize[1] or
+ self.randomSamples:size(2) ~= sampleBufferSize[2]) then
+ self.randomSamples:resize(sampleBufferSize):uniform()
+ else
+ if not self.newRandomPool then
+ -- Create new pooling windows, since this is a subsequent call
+ self.randomSamples:uniform()
+ end
+ end
+end
+
+function VolumetricFractionalMaxPooling:getOutputSizes_(input)
+ local outT = self.outT
+ local outW = self.outW
+ local outH = self.outH
+ if self.ratioW ~= nil and self.ratioH ~= nil then
+ if input:nDimension() == 5 then
+ outT = math.floor(input:size(5) * self.ratioT)
+ outW = math.floor(input:size(4) * self.ratioW)
+ outH = math.floor(input:size(3) * self.ratioH)
+ elseif input:nDimension() == 4 then
+ outT = math.floor(input:size(4) * self.ratioT)
+ outW = math.floor(input:size(3) * self.ratioW)
+ outH = math.floor(input:size(2) * self.ratioH)
+ else
+ error('input must be dim 4 or 5')
+ end
+
+ -- Neither can be smaller than 1
+ assert(outT > 0, 'reduction ratio or input time too small')
+ assert(outW > 0, 'reduction ratio or input width too small')
+ assert(outH > 0, 'reduction ratio or input height too small')
+ else
+ assert(outT ~= nil and outW ~= nil and outH ~= nil)
+ end
+
+ return outT, outW, outH
+end
+
+-- Call this to turn off regeneration of random pooling regions each
+-- updateOutput call.
+function VolumetricFractionalMaxPooling:fixPoolingRegions(val)
+ if val == nil then
+ val = true
+ end
+
+ self.newRandomPool = val
+ return self
+end
+
+function VolumetricFractionalMaxPooling:updateOutput(input)
+ self.indices = self.indices or torch.LongTensor()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices
+ else
+ self.indices = self.indices:long()
+ end
+ self:initSampleBuffer_(input)
+ local outT, outW, outH = self:getOutputSizes_(input)
+
+ input.THNN.VolumetricFractionalMaxPooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ outT, outW, outH, self.poolSizeT, self.poolSizeW, self.poolSizeH,
+ self.indices:cdata(), self.randomSamples:cdata())
+ return self.output
+end
+
+function VolumetricFractionalMaxPooling:updateGradInput(input, gradOutput)
+ assert(self.randomSamples ~= nil,
+ 'must call updateOutput/forward first')
+
+ local outT, outW, outH = self:getOutputSizes_(input)
+
+ input.THNN.VolumetricFractionalMaxPooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ outT, outW, outH, self.poolSizeT, self.poolSizeW, self.poolSizeH,
+ self.indices:cdata())
+ return self.gradInput
+end
+
+-- backward compat
+function VolumetricFractionalMaxPooling:empty()
+ self:clearState()
+end
+
+function VolumetricFractionalMaxPooling:clearState()
+ self.indices = nil
+ self.randomSamples = nil
+ return parent.clearState(self)
+end
+
+function VolumetricFractionalMaxPooling:__tostring__()
+ return string.format('%s(%dx%dx%d, %d,%d,%d)', torch.type(self),
+ self.outT and self.outT or self.ratioT,
+ self.outW and self.outW or self.ratioW,
+ self.outH and self.outH or self.ratioH,
+ self.poolSizeT, self.poolSizeW, self.poolSizeH)
+end
diff --git a/contrib/lua-torch/nn/VolumetricFullConvolution.lua b/contrib/lua-torch/nn/VolumetricFullConvolution.lua
new file mode 100644
index 000000000..0ce23401e
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricFullConvolution.lua
@@ -0,0 +1,225 @@
+local THNN = require 'nn.THNN'
+local VolumetricFullConvolution, parent = torch.class('nn.VolumetricFullConvolution','nn.Module')
+
+function VolumetricFullConvolution:__init(nInputPlane, nOutputPlane,
+ kT, kW, kH, -- kernel size
+ dT, dW, dH, -- stride
+ padT, padW, padH, -- padding
+ adjT, adjW, adjH) -- extra output adjustment
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+ dT = dT or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kW = kW
+ self.kH = kH
+ self.kT = kT
+ self.dW = dW
+ self.dH = dH
+ self.dT = dT
+ self.padW = padW or 0
+ self.padH = padH or 0
+ self.padT = padT or 0
+ self.adjW = adjW or 0
+ self.adjH = adjH or 0
+ self.adjT = adjT or 0
+
+ if self.adjW > self.dW - 1 or self.adjH > self.dH - 1 or self.adjT > self.dT - 1 then
+ error('adjW, adjH and adjT must be smaller than self.dW - 1,' ..
+ ' self.dH - 1 and self.dT - 1 respectively')
+ end
+
+ self.weight = torch.Tensor(nInputPlane, nOutputPlane, kT, kH, kW)
+ self.gradWeight = torch.Tensor(nInputPlane, nOutputPlane, kT, kH, kW)
+ self.bias = torch.Tensor(self.nOutputPlane)
+ self.gradBias = torch.Tensor(self.nOutputPlane)
+
+ self.ones = torch.Tensor()
+ self.finput = torch.Tensor()
+ self.fgradInput = torch.Tensor()
+
+ self:reset()
+end
+
+function VolumetricFullConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ local nInputPlane = self.nInputPlane
+ local kT = self.kT
+ local kH = self.kH
+ local kW = self.kW
+ stdv = 1/math.sqrt(kW*kH*kT*nInputPlane)
+ end
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+end
+
+local function calculateAdj(targetSize, ker, pad, stride)
+ return (targetSize + 2 * pad - ker) % stride
+end
+
+function VolumetricFullConvolution:backCompatibility()
+ -- Transpose the weight when loading from an old version
+ if not self.adjW then
+ self.weight = self.weight:transpose(1, 2):contiguous()
+ end
+
+ -- Rename the padding when loading from an old version
+ self.padW = self.padW or self.pW
+ self.padH = self.padH or self.pH
+ self.padT = self.padT or self.pT
+
+ self.adjW = self.adjW or 0
+ self.adjH = self.adjH or 0
+ self.adjT = self.adjT or 0
+end
+
+
+function VolumetricFullConvolution:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function VolumetricFullConvolution:updateOutput(input)
+ self:backCompatibility()
+
+ local inputTensor = input
+ local adjT, adjW, adjH = self.adjT, self.adjW, self.adjH
+
+ -- The input can be a table where the second element indicates the target
+ -- output size, in which case the adj factors are computed automatically
+ if type(inputTensor) == 'table' then
+ inputTensor = input[1]
+ local targetTensor = input[2]
+ local tDims = targetTensor:dim()
+ local tT = targetTensor:size(tDims-2)
+ local tH = targetTensor:size(tDims-1)
+ local tW = targetTensor:size(tDims)
+ adjT = calculateAdj(tT, self.kT, self.padT, self.dT)
+ adjW = calculateAdj(tW, self.kW, self.padW, self.dW)
+ adjH = calculateAdj(tH, self.kH, self.padH, self.dH)
+ end
+
+ inputTensor.THNN.VolumetricFullConvolution_updateOutput(
+ inputTensor:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ adjT, adjW, adjH
+ )
+
+ return self.output
+end
+
+function VolumetricFullConvolution:updateGradInput(input, gradOutput)
+ self:backCompatibility()
+
+ local inputTensor = input
+ local adjT, adjW, adjH = self.adjT, self.adjW, self.adjH
+
+ -- The input can be a table where the second element indicates the target
+ -- output size, in which case the adj factors are computed automatically
+ if type(inputTensor) == 'table' then
+ inputTensor = input[1]
+ local targetTensor = input[2]
+ local tDims = targetTensor:dim()
+ local tT = targetTensor:size(tDims-2)
+ local tH = targetTensor:size(tDims-1)
+ local tW = targetTensor:size(tDims)
+ adjT = calculateAdj(tT, self.kT, self.padT, self.dT)
+ adjW = calculateAdj(tW, self.kW, self.padW, self.dW)
+ adjH = calculateAdj(tH, self.kH, self.padH, self.dH)
+ -- Momentarily extract the gradInput tensor
+ if type(self.gradInput) == 'table' then
+ self.gradInput = self.gradInput[1]
+ end
+ end
+
+ inputTensor.THNN.VolumetricFullConvolution_updateGradInput(
+ inputTensor:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ adjT, adjW, adjH
+ )
+
+ if type(input) == 'table' then
+ -- Create a zero tensor to be expanded and used as gradInput[2].
+ self.zeroScalar = self.zeroScalar or input[2].new(1):zero()
+ self.ones:resize(input[2]:dim()):fill(1)
+ local zeroTensor = self.zeroScalar
+ :view(table.unpack(self.ones:totable()))
+ :expandAs(input[2])
+ self.gradInput = {self.gradInput, zeroTensor}
+ end
+
+ return self.gradInput
+end
+
+function VolumetricFullConvolution:accGradParameters(input, gradOutput, scale)
+ self:backCompatibility()
+
+ local inputTensor = input
+ local adjT, adjW, adjH = self.adjT, self.adjW, self.adjH
+
+ -- The input can be a table where the second element indicates the target
+ -- output size, in which case the adj factors are computed automatically
+ if type(inputTensor) == 'table' then
+ inputTensor = input[1]
+ local targetTensor = input[2]
+ local tDims = targetTensor:dim()
+ local tT = targetTensor:size(tDims-2)
+ local tH = targetTensor:size(tDims-1)
+ local tW = targetTensor:size(tDims)
+ adjT = calculateAdj(tT, self.kT, self.padT, self.dT)
+ adjW = calculateAdj(tW, self.kW, self.padW, self.dW)
+ adjH = calculateAdj(tH, self.kH, self.padH, self.dH)
+ end
+
+ inputTensor.THNN.VolumetricFullConvolution_accGradParameters(
+ inputTensor:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ adjT, adjW, adjH,
+ scale or 1
+ )
+end
+
+function VolumetricFullConvolution:type(type, tensorCache)
+ self.finput = torch.Tensor()
+ self.fgradInput = torch.Tensor()
+ return parent.type(self, type, tensorCache)
+end
+
+function VolumetricFullConvolution:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH)
+ if self.dT ~= 1 or self.dW ~= 1 or self.dH ~= 1 or self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d,%d', self.dT, self.dW, self.dH)
+ end
+ if (self.padT or self.padW or self.padH) and (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padT .. ',' .. self.padW .. ',' .. self.padH
+ end
+ if (self.adjT or self.adjW or self.adjH) and (self.adjT ~= 0 or self.adjW ~= 0 or self.adjH ~= 0) then
+ s = s .. ', ' .. self.adjT .. ',' .. self.adjW .. ',' .. self.adjH
+ end
+ return s .. ')'
+end
diff --git a/contrib/lua-torch/nn/VolumetricMaxPooling.lua b/contrib/lua-torch/nn/VolumetricMaxPooling.lua
new file mode 100644
index 000000000..e25c5b31c
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricMaxPooling.lua
@@ -0,0 +1,102 @@
+local VolumetricMaxPooling, parent = torch.class('nn.VolumetricMaxPooling', 'nn.Module')
+
+VolumetricMaxPooling.__version = 2
+
+function VolumetricMaxPooling:__init(kT, kW, kH, dT, dW, dH, padT, padW, padH)
+ parent.__init(self)
+
+ dT = dT or kT
+ dW = dW or kW
+ dH = dH or kH
+
+ self.kT = kT
+ self.kH = kH
+ self.kW = kW
+ self.dT = dT
+ self.dW = dW
+ self.dH = dH
+
+ self.padT = padT or 0
+ self.padW = padW or 0
+ self.padH = padH or 0
+
+
+ self.ceil_mode = false
+ self.indices = torch.LongTensor()
+end
+
+function VolumetricMaxPooling:ceil()
+ self.ceil_mode = true
+ return self
+end
+
+function VolumetricMaxPooling:floor()
+ self.ceil_mode = false
+ return self
+end
+
+function VolumetricMaxPooling:updateOutput(input)
+ local dims = input:dim()
+ self.itime = input:size(dims-2)
+ self.iheight = input:size(dims-1)
+ self.iwidth = input:size(dims)
+
+ self.indices = self.indices or torch.LongTensor()
+ if torch.typename(input):find('torch%.Cuda.*Tensor') then
+ self.indices = torch.CudaLongTensor and self.indices:cudaLong() or self.indices
+ else
+ self.indices = self.indices:long()
+ end
+ input.THNN.VolumetricMaxPooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.indices:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.ceil_mode
+ )
+ return self.output
+end
+
+function VolumetricMaxPooling:updateGradInput(input, gradOutput)
+ input.THNN.VolumetricMaxPooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.indices:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.ceil_mode
+ )
+ return self.gradInput
+end
+
+function VolumetricMaxPooling:empty()
+ self:clearState()
+end
+
+function VolumetricMaxPooling:clearState()
+ if self.indices then self.indices:set() end
+ return parent.clearState(self)
+end
+
+function VolumetricMaxPooling:read(file, version)
+ parent.read(self, file)
+ if version < 2 then
+ self.ceil_mode = false
+ end
+end
+
+function VolumetricMaxPooling:__tostring__()
+ local s = string.format('%s(%dx%dx%d, %d,%d,%d', torch.type(self),
+ self.kT, self.kW, self.kH, self.dT, self.dW, self.dH)
+ if (self.padT or self.padW or self.padH) and
+ (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padT.. ',' .. self.padW .. ','.. self.padH
+ end
+ s = s .. ')'
+
+ return s
+end
diff --git a/contrib/lua-torch/nn/VolumetricMaxUnpooling.lua b/contrib/lua-torch/nn/VolumetricMaxUnpooling.lua
new file mode 100644
index 000000000..6291f5b85
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricMaxUnpooling.lua
@@ -0,0 +1,56 @@
+local VolumetricMaxUnpooling, parent = torch.class('nn.VolumetricMaxUnpooling', 'nn.Module')
+
+function VolumetricMaxUnpooling:__init(poolingModule)
+ parent.__init(self)
+ assert(torch.type(poolingModule)=='nn.VolumetricMaxPooling', 'Argument must be a nn.VolumetricMaxPooling module')
+ assert(poolingModule.kT==poolingModule.dT and poolingModule.kH==poolingModule.dH and poolingModule.kW==poolingModule.dW, "The size of pooling module's kernel must be equal to its stride")
+ self.pooling = poolingModule
+end
+
+function VolumetricMaxUnpooling:setParams()
+ self.indices = self.pooling.indices
+ self.otime = self.pooling.itime
+ self.oheight = self.pooling.iheight
+ self.owidth = self.pooling.iwidth
+ self.dT = self.pooling.dT
+ self.dH = self.pooling.dH
+ self.dW = self.pooling.dW
+ self.padT = self.pooling.padT
+ self.padH = self.pooling.padH
+ self.padW = self.pooling.padW
+end
+
+function VolumetricMaxUnpooling:updateOutput(input)
+ self:setParams()
+ input.THNN.VolumetricMaxUnpooling_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.indices:cdata(),
+ self.otime, self.owidth, self.oheight,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH
+ )
+ return self.output
+end
+
+function VolumetricMaxUnpooling:updateGradInput(input, gradOutput)
+ self:setParams()
+ input.THNN.VolumetricMaxUnpooling_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.indices:cdata(),
+ self.otime, self.owidth, self.oheight,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH
+ )
+ return self.gradInput
+end
+
+function VolumetricMaxUnpooling:empty()
+ self:clearState()
+end
+
+function VolumetricMaxUnpooling:__tostring__()
+ return 'nn.VolumetricMaxUnpooling associated to '..tostring(self.pooling)
+end
diff --git a/contrib/lua-torch/nn/VolumetricReplicationPadding.lua b/contrib/lua-torch/nn/VolumetricReplicationPadding.lua
new file mode 100644
index 000000000..31a9503fd
--- /dev/null
+++ b/contrib/lua-torch/nn/VolumetricReplicationPadding.lua
@@ -0,0 +1,58 @@
+local VolumetricReplicationPadding, parent =
+ torch.class('nn.VolumetricReplicationPadding', 'nn.Module')
+
+function VolumetricReplicationPadding:__init(pleft, pright, ptop, pbottom,
+ pfront, pback)
+ parent.__init(self)
+ self.pleft = pleft
+ self.pright = pright or self.pleft
+ self.ptop = ptop or self.pleft
+ self.pbottom = pbottom or self.pleft
+ self.pfront = pfront or self.pleft
+ self.pback = pback or self.pleft
+end
+
+function VolumetricReplicationPadding:updateOutput(input)
+ if input:dim() == 4 or input:dim() == 5 then
+ input.THNN.VolumetricReplicationPadding_updateOutput(
+ input:cdata(), self.output:cdata(),
+ self.pleft, self.pright, self.ptop, self.pbottom, self.pfront,
+ self.pback)
+ else
+ error('input must be 4 or 5-dimensional')
+ end
+ return self.output
+end
+
+function VolumetricReplicationPadding:updateGradInput(input, gradOutput)
+ if input:dim() == 4 and gradOutput:dim() == 4 then
+ assert(input:size(1) == gradOutput:size(1)
+ and input:size(2) + self.pfront + self.pback == gradOutput:size(2)
+ and input:size(3) + self.ptop + self.pbottom == gradOutput:size(3)
+ and input:size(4) + self.pleft + self.pright == gradOutput:size(4),
+ 'input and gradOutput must be compatible in size')
+ elseif input:dim() == 5 and gradOutput:dim() == 5 then
+ assert(input:size(1) == gradOutput:size(1)
+ and input:size(2) == gradOutput:size(2)
+ and input:size(3) + self.pfront + self.pback == gradOutput:size(3)
+ and input:size(4) + self.ptop + self.pbottom == gradOutput:size(4)
+ and input:size(5) + self.pleft + self.pright == gradOutput:size(5),
+ 'input and gradOutput must be compatible in size')
+ else
+ error(
+ [[input and gradOutput must be 4 or 5-dimensional
+ and have equal number of dimensions]]
+ )
+ end
+ input.THNN.VolumetricReplicationPadding_updateGradInput(
+ input:cdata(), gradOutput:cdata(), self.gradInput:cdata(),
+ self.pleft, self.pright, self.ptop, self.pbottom, self.pfront, self.pback)
+ return self.gradInput
+end
+
+function VolumetricReplicationPadding:__tostring__()
+ return torch.type(self) ..
+ string.format('(left=%d, right=%d, top=%d, bottom=%d, front=%d, back=%d)',
+ self.pleft, self.pright, self.ptop, self.pbottom,
+ self.pfront, self.pback)
+end
diff --git a/contrib/lua-torch/nn/WeightNorm.lua b/contrib/lua-torch/nn/WeightNorm.lua
new file mode 100644
index 000000000..3ffcd90aa
--- /dev/null
+++ b/contrib/lua-torch/nn/WeightNorm.lua
@@ -0,0 +1,208 @@
+-- Weight Normalization
+-- https://arxiv.org/pdf/1602.07868v3.pdf
+local WeightNorm, parent = torch.class("nn.WeightNorm", "nn.Decorator")
+
+function WeightNorm:__init(module, outputDim)
+ -- this container will apply Weight Normalization to any module it wraps
+ -- it accepts parameter ``outputDim`` that represents the dimension of the output of the weight
+ -- if outputDim is not 1, the container will transpose the weight
+ -- if the weight is not 2D, the container will view the weight into a 2D shape
+ -- that is nOut x (nIn x kw x dw x ...)
+
+ parent.__init(self, module)
+ assert(module.weight)
+
+ if module.bias then
+ self.bias = module.bias
+ self.gradBias = module.gradBias
+ end
+ self.gradWeight = module.gradWeight
+ self.weight = module.weight
+
+ self.outputDim = outputDim or 1
+
+ -- track the non-output weight dimensions
+ self.otherDims = 1
+ for i = 1, self.weight:dim() do
+ if i ~= self.outputDim then
+ self.otherDims = self.otherDims * self.weight:size(i)
+ end
+ end
+
+ -- view size for weight norm 2D calculations
+ self.viewIn = torch.LongStorage({self.weight:size(self.outputDim), self.otherDims})
+
+ -- view size back to original weight
+ self.viewOut = self.weight:size()
+ self.weightSize = self.weight:size()
+
+ -- bubble outputDim size up to the front
+ for i = self.outputDim - 1, 1, -1 do
+ self.viewOut[i], self.viewOut[i + 1] = self.viewOut[i + 1], self.viewOut[i]
+ end
+
+ -- weight is reparametrized to decouple the length from the direction
+ -- such that w = g * ( v / ||v|| )
+ self.v = torch.Tensor(self.viewIn[1], self.viewIn[2])
+ self.g = torch.Tensor(self.viewIn[1])
+
+ self._norm = torch.Tensor(self.viewIn[1])
+ self._scale = torch.Tensor(self.viewIn[1])
+
+ -- gradient of g
+ self.gradG = torch.Tensor(self.viewIn[1]):zero()
+ -- gradient of v
+ self.gradV = torch.Tensor(self.viewIn)
+
+ self:resetInit()
+end
+
+function WeightNorm:permuteIn(inpt)
+ local ans = inpt
+ for i = self.outputDim - 1, 1, -1 do
+ ans = ans:transpose(i, i+1)
+ end
+ return ans
+end
+
+function WeightNorm:permuteOut(inpt)
+ local ans = inpt
+ for i = 1, self.outputDim - 1 do
+ ans = ans:transpose(i, i+1)
+ end
+ return ans
+end
+
+function WeightNorm:resetInit(inputSize, outputSize)
+ self.v:normal(0, math.sqrt(2/self.viewIn[2]))
+ self.g:norm(self.v, 2, 2)
+ if self.bias then
+ self.bias:zero()
+ end
+end
+
+function WeightNorm:evaluate()
+ if not(self.train == false) then
+ self:updateWeight()
+ parent.evaluate(self)
+ end
+end
+
+function WeightNorm:updateWeight()
+ -- view to 2D when weight norm container operates
+ self.gradV:copy(self:permuteIn(self.weight))
+ self.gradV = self.gradV:view(self.viewIn)
+
+ -- ||w||
+ self._norm:norm(self.v, 2, 2):pow(2):add(10e-5):sqrt()
+ -- g * w / ||w||
+ self.gradV:copy(self.v)
+ self._scale:copy(self.g):cdiv(self._norm)
+ self.gradV:cmul(self._scale:view(self.viewIn[1], 1)
+ :expand(self.viewIn[1], self.viewIn[2]))
+
+ -- otherwise maintain size of original module weight
+ self.gradV = self.gradV:view(self.viewOut)
+
+ self.weight:copy(self:permuteOut(self.gradV))
+end
+
+function WeightNorm:updateOutput(input)
+ if not(self.train == false) then
+ self:updateWeight()
+ end
+ self.output:set(self.modules[1]:updateOutput(input))
+ return self.output
+end
+
+function WeightNorm:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ self.modules[1]:accGradParameters(input, gradOutput, scale)
+
+ self.weight:copy(self:permuteIn(self.weight))
+ self.gradV:copy(self:permuteIn(self.gradWeight))
+ self.weight = self.weight:view(self.viewIn)
+
+ local norm = self._norm:view(self.viewIn[1], 1):expand(self.viewIn[1], self.viewIn[2])
+ local scale = self._scale:view(self.viewIn[1], 1):expand(self.viewIn[1], self.viewIn[2])
+
+ -- dL / dw * (w / ||w||)
+ self.weight:copy(self.gradV)
+ self.weight:cmul(self.v):cdiv(norm)
+ self.gradG:sum(self.weight, 2)
+
+ -- dL / dw * g / ||w||
+ self.gradV:cmul(scale)
+
+ -- dL / dg * (w * g / ||w||^2)
+ self.weight:copy(self.v):cmul(scale):cdiv(norm)
+ self.weight:cmul(self.gradG:view(self.viewIn[1], 1)
+ :expand(self.viewIn[1], self.viewIn[2]))
+
+ -- dL / dv update
+ self.gradV:add(-1, self.weight)
+
+ self.gradV = self.gradV:view(self.viewOut)
+ self.weight = self.weight:view(self.viewOut)
+ self.gradWeight:copy(self:permuteOut(self.gradV))
+end
+
+function WeightNorm:updateGradInput(input, gradOutput)
+ self.gradInput:set(self.modules[1]:updateGradInput(input, gradOutput))
+ return self.gradInput
+end
+
+function WeightNorm:zeroGradParameters()
+ self.modules[1]:zeroGradParameters()
+ self.gradV:zero()
+ self.gradG:zero()
+end
+
+function WeightNorm:updateParameters(lr)
+ self.modules[1]:updateParameters(lr)
+ self.g:add(-lr, self.gradG)
+ self.v:add(-lr, self.gradV)
+end
+
+function WeightNorm:parameters()
+ if self.bias then
+ return {self.v, self.g, self.bias}, {self.gradV, self.gradG, self.gradBias}
+ else
+ return {self.v, self.g}, {self.gradV, self.gradG}
+ end
+end
+
+function WeightNorm:write(file)
+ -- Don't save weight and gradWeight since we can easily re-compute it from v
+ -- and g.
+ local weight = self.modules[1].weight
+ local gradWeight = self.modules[1].gradWeight
+ self.weight = nil
+ self.gradWeight = nil
+ self.modules[1].weight = nil
+ self.modules[1].gradWeight = nil
+ if not self.weightSize then
+ self.weightSize = weight:size()
+ end
+
+ parent.write(self, file)
+
+ self.modules[1].weight = weight
+ self.modules[1].gradWeight = gradWeight
+ self.weight = weight
+ self.gradWeight = gradWeight
+end
+
+function WeightNorm:read(file)
+ parent.read(self, file)
+
+ -- Re-compute weight and gradWeight
+ if not self.weight then
+ self.modules[1].weight = self.v.new(self.weightSize)
+ self.modules[1].gradWeight = self.v.new(self.weightSize)
+ self.weight = self.modules[1].weight
+ self.gradWeight = self.modules[1].gradWeight
+ self:updateWeight()
+ self.gradWeight:copy(self:permuteOut(self.gradV))
+ end
+end
diff --git a/contrib/lua-torch/nn/WeightedEuclidean.lua b/contrib/lua-torch/nn/WeightedEuclidean.lua
new file mode 100644
index 000000000..dbf4158a9
--- /dev/null
+++ b/contrib/lua-torch/nn/WeightedEuclidean.lua
@@ -0,0 +1,244 @@
+local WeightedEuclidean, parent = torch.class('nn.WeightedEuclidean', 'nn.Module')
+
+function WeightedEuclidean:__init(inputSize,outputSize)
+ parent.__init(self)
+
+ self.weight = torch.Tensor(inputSize,outputSize)
+ self.gradWeight = torch.Tensor(inputSize,outputSize)
+
+ -- each template (output dim) has its own diagonal covariance matrix
+ self.diagCov = torch.Tensor(inputSize,outputSize)
+ self.gradDiagCov = torch.Tensor(inputSize,outputSize)
+
+ self:reset()
+end
+
+function WeightedEuclidean:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(1))
+ end
+ self.weight:uniform(-stdv, stdv)
+ self.diagCov:fill(1)
+end
+
+local function view(res, src, ...)
+ local args = {...}
+ if src:isContiguous() then
+ res:view(src, table.unpack(args))
+ else
+ res:reshape(src, table.unpack(args))
+ end
+end
+
+function WeightedEuclidean:updateOutput(input)
+ -- lazy-initialize
+ self._diagCov = self._diagCov or self.output.new()
+
+ self._input = self._input or input.new()
+ self._weight = self._weight or self.weight.new()
+ self._expand = self._expand or self.output.new()
+ self._expand2 = self._expand or self.output.new()
+ self._expand3 = self._expand3 or self.output.new()
+ self._repeat = self._repeat or self.output.new()
+ self._repeat2 = self._repeat2 or self.output.new()
+ self._repeat3 = self._repeat3 or self.output.new()
+
+ local inputSize, outputSize = self.weight:size(1), self.weight:size(2)
+
+ -- y_j = || c_j * (w_j - x) ||
+ if input:dim() == 1 then
+ view(self._input, input, inputSize, 1)
+ self._expand:expandAs(self._input, self.weight)
+ self._repeat:resizeAs(self._expand):copy(self._expand)
+ self._repeat:add(-1, self.weight)
+ self._repeat:cmul(self.diagCov)
+ self.output:norm(self._repeat, 2, 1)
+ self.output:resize(outputSize)
+ elseif input:dim() == 2 then
+ local batchSize = input:size(1)
+
+ view(self._input, input, batchSize, inputSize, 1)
+ self._expand:expand(self._input, batchSize, inputSize, outputSize)
+ -- make the expanded tensor contiguous (requires lots of memory)
+ self._repeat:resizeAs(self._expand):copy(self._expand)
+
+ self._weight:view(self.weight, 1, inputSize, outputSize)
+ self._expand2:expandAs(self._weight, self._repeat)
+
+ self._diagCov:view(self.diagCov, 1, inputSize, outputSize)
+ self._expand3:expandAs(self._diagCov, self._repeat)
+ if torch.type(input) == 'torch.CudaTensor' then
+ -- requires lots of memory, but minimizes cudaMallocs and loops
+ self._repeat2:resizeAs(self._expand2):copy(self._expand2)
+ self._repeat:add(-1, self._repeat2)
+ self._repeat3:resizeAs(self._expand3):copy(self._expand3)
+ self._repeat:cmul(self._repeat3)
+ else
+ self._repeat:add(-1, self._expand2)
+ self._repeat:cmul(self._expand3)
+ end
+
+ self.output:norm(self._repeat, 2, 2)
+ self.output:resize(batchSize, outputSize)
+ else
+ error"1D or 2D input expected"
+ end
+ return self.output
+end
+
+function WeightedEuclidean:updateGradInput(input, gradOutput)
+ if not self.gradInput then
+ return
+ end
+
+ self._div = self._div or input.new()
+ self._output = self._output or self.output.new()
+ self._expand4 = self._expand4 or input.new()
+ self._gradOutput = self._gradOutput or input.new()
+
+ if not self.fastBackward then
+ self:updateOutput(input)
+ end
+
+ local inputSize, outputSize = self.weight:size(1), self.weight:size(2)
+
+ --[[
+ dy_j -2 * c_j * c_j * (w_j - x) c_j * c_j * (x - w_j)
+ ---- = -------------------------- = ---------------------
+ dx 2 || c_j * (w_j - x) || y_j
+ --]]
+
+ -- to prevent div by zero (NaN) bugs
+ self._output:resizeAs(self.output):copy(self.output):add(0.0000001)
+ view(self._gradOutput, gradOutput, gradOutput:size())
+ self._div:cdiv(gradOutput, self._output)
+ if input:dim() == 1 then
+ self._div:resize(1, outputSize)
+ self._expand4:expandAs(self._div, self.weight)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ self._repeat2:resizeAs(self._expand4):copy(self._expand4)
+ self._repeat2:cmul(self._repeat)
+ else
+ self._repeat2:cmul(self._repeat, self._expand4)
+ end
+
+ self._repeat2:cmul(self.diagCov)
+ self.gradInput:sum(self._repeat2, 2)
+ self.gradInput:resizeAs(input)
+ elseif input:dim() == 2 then
+ local batchSize = input:size(1)
+
+ self._div:resize(batchSize, 1, outputSize)
+ self._expand4:expand(self._div, batchSize, inputSize, outputSize)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ self._repeat2:resizeAs(self._expand4):copy(self._expand4)
+ self._repeat2:cmul(self._repeat)
+ self._repeat2:cmul(self._repeat3)
+ else
+ self._repeat2:cmul(self._repeat, self._expand4)
+ self._repeat2:cmul(self._expand3)
+ end
+
+ self.gradInput:sum(self._repeat2, 3)
+ self.gradInput:resizeAs(input)
+ else
+ error"1D or 2D input expected"
+ end
+
+ return self.gradInput
+end
+
+function WeightedEuclidean:accGradParameters(input, gradOutput, scale)
+ local inputSize, outputSize = self.weight:size(1), self.weight:size(2)
+ scale = scale or 1
+
+ --[[
+ dy_j 2 * c_j * c_j * (w_j - x) c_j * c_j * (w_j - x)
+ ---- = ------------------------- = ---------------------
+ dw_j 2 || c_j * (w_j - x) || y_j
+
+ dy_j 2 * c_j * (w_j - x)^2 c_j * (w_j - x)^2
+ ---- = ----------------------- = -----------------
+ dc_j 2 || c_j * (w_j - x) || y_j
+ --]]
+ -- assumes a preceding call to updateGradInput
+ if input:dim() == 1 then
+ self.gradWeight:add(-scale, self._repeat2)
+
+ self._repeat:cdiv(self.diagCov)
+ self._repeat:cmul(self._repeat)
+ self._repeat:cmul(self.diagCov)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ self._repeat2:resizeAs(self._expand4):copy(self._expand4)
+ self._repeat2:cmul(self._repeat)
+ else
+ self._repeat2:cmul(self._repeat, self._expand4)
+ end
+
+ self.gradDiagCov:add(self._repeat2)
+ elseif input:dim() == 2 then
+ self._sum = self._sum or input.new()
+ self._sum:sum(self._repeat2, 1)
+ self._sum:resize(inputSize, outputSize)
+ self.gradWeight:add(-scale, self._sum)
+
+ if torch.type(input) == 'torch.CudaTensor' then
+ -- requires lots of memory, but minimizes cudaMallocs and loops
+ self._repeat:cdiv(self._repeat3)
+ self._repeat:cmul(self._repeat)
+ self._repeat:cmul(self._repeat3)
+ self._repeat2:resizeAs(self._expand4):copy(self._expand4)
+ self._repeat:cmul(self._repeat2)
+ else
+ self._repeat:cdiv(self._expand3)
+ self._repeat:cmul(self._repeat)
+ self._repeat:cmul(self._expand3)
+ self._repeat:cmul(self._expand4)
+ end
+
+ self._sum:sum(self._repeat, 1)
+ self._sum:resize(inputSize, outputSize)
+ self.gradDiagCov:add(scale, self._sum)
+ else
+ error"1D or 2D input expected"
+ end
+end
+
+function WeightedEuclidean:type(type, tensorCache)
+ if type then
+ -- prevent premature memory allocations
+ self._input = nil
+ self._output = nil
+ self._gradOutput = nil
+ self._weight = nil
+ self._div = nil
+ self._sum = nil
+ self._expand = nil
+ self._expand2 = nil
+ self._expand3 = nil
+ self._expand4 = nil
+ self._repeat = nil
+ self._repeat2 = nil
+ self._repeat3 = nil
+ end
+ return parent.type(self, type, tensorCache)
+end
+
+function WeightedEuclidean:parameters()
+ return {self.weight, self.diagCov}, {self.gradWeight, self.gradDiagCov}
+end
+
+function WeightedEuclidean:accUpdateGradParameters(input, gradOutput, lr)
+ local gradWeight = self.gradWeight
+ local gradDiagCov = self.gradDiagCov
+ self.gradWeight = self.weight
+ self.gradDiagCov = self.diagCov
+ self:accGradParameters(input, gradOutput, -lr)
+ self.gradWeight = gradWeight
+ self.gradDiagCov = gradDiagCov
+end
diff --git a/contrib/lua-torch/nn/WeightedMSECriterion.lua b/contrib/lua-torch/nn/WeightedMSECriterion.lua
new file mode 100644
index 000000000..933472937
--- /dev/null
+++ b/contrib/lua-torch/nn/WeightedMSECriterion.lua
@@ -0,0 +1,45 @@
+local WeightedMSECriterion, parent = torch.class('nn.WeightedMSECriterion','nn.MSECriterion')
+
+function WeightedMSECriterion:__init(w)
+ parent.__init(self)
+ self.weight = w:clone()
+end
+
+function WeightedMSECriterion:updateOutput(input,target)
+ self.buffer = self.buffer or input.new()
+ self.buffer:resizeAs(input):copy(target)
+ if input:dim() - 1 == self.weight:dim() then
+ for i=1,input:size(1) do
+ self.buffer[i]:cmul(self.weight)
+ end
+ else
+ self.buffer:cmul(self.weight)
+ end
+ self.output_tensor = self.output_tensor or input.new(1)
+ input.THNN.MSECriterion_updateOutput(
+ input:cdata(),
+ self.buffer:cdata(),
+ self.output_tensor:cdata(),
+ self.sizeAverage
+ )
+ self.output = self.output_tensor[1]
+ return self.output
+end
+
+function WeightedMSECriterion:updateGradInput(input, target)
+ self.buffer:resizeAs(input):copy(target)
+ if input:dim() - 1 == self.weight:dim() then
+ for i=1,input:size(1) do
+ self.buffer[i]:cmul(self.weight)
+ end
+ else
+ self.buffer:cmul(self.weight)
+ end
+ input.THNN.MSECriterion_updateGradInput(
+ input:cdata(),
+ self.buffer:cdata(),
+ self.gradInput:cdata(),
+ self.sizeAverage
+ )
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/WhiteNoise.lua b/contrib/lua-torch/nn/WhiteNoise.lua
new file mode 100644
index 000000000..f1defb646
--- /dev/null
+++ b/contrib/lua-torch/nn/WhiteNoise.lua
@@ -0,0 +1,40 @@
+local WhiteNoise, parent = torch.class('nn.WhiteNoise', 'nn.Module')
+
+function WhiteNoise:__init(mean, std)
+ parent.__init(self)
+ self.mean = mean or 0
+ self.std = std or 0.1
+ self.noise = torch.Tensor()
+end
+
+function WhiteNoise:updateOutput(input)
+ self.output:resizeAs(input):copy(input)
+ if self.train ~= false then
+ self.noise:resizeAs(input)
+ self.noise:normal(self.mean, self.std)
+ self.output:add(self.noise)
+ else
+ if self.mean ~= 0 then
+ self.output:add(self.mean)
+ end
+ end
+ return self.output
+end
+
+function WhiteNoise:updateGradInput(input, gradOutput)
+ if self.train ~= false then
+ -- Simply return the gradients.
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+ else
+ error('backprop only defined while training')
+ end
+ return self.gradInput
+end
+
+function WhiteNoise:clearState()
+ self.noise:set()
+end
+
+function WhiteNoise:__tostring__()
+ return string.format('%s mean: %f, std: %f', torch.type(self), self.mean, self.std)
+end
diff --git a/contrib/lua-torch/nn/ZeroGrad.lua b/contrib/lua-torch/nn/ZeroGrad.lua
new file mode 100644
index 000000000..7c941ce1c
--- /dev/null
+++ b/contrib/lua-torch/nn/ZeroGrad.lua
@@ -0,0 +1,14 @@
+local ZeroGrad, parent = torch.class('nn.ZeroGrad', 'nn.Module')
+
+function ZeroGrad:updateOutput(input)
+ self.output:set(input)
+ return self.output
+end
+
+-- the gradient is simply zeroed.
+-- useful when you don't want to backpropgate through certain paths.
+function ZeroGrad:updateGradInput(input, gradOutput)
+ self.gradInput = nn.utils.recursiveResizeAs(self.gradInput, input)
+ self.gradInput = nn.utils.recursiveFill(self.gradInput, 0)
+ return self.gradInput
+end
diff --git a/contrib/lua-torch/nn/ZipTable.lua b/contrib/lua-torch/nn/ZipTable.lua
new file mode 100644
index 000000000..7b18619eb
--- /dev/null
+++ b/contrib/lua-torch/nn/ZipTable.lua
@@ -0,0 +1,34 @@
+local ZipTable, parent = torch.class('nn.ZipTable', 'nn.Module')
+
+-- input : { {a1,a2}, {b1,b2}, {c1,c2} }
+-- output : { {a1,b1,c1}, {a2,b2,c2} }
+function ZipTable:__init()
+ parent.__init(self)
+ self.output = {}
+ self.gradInput = {}
+end
+
+function ZipTable:updateOutput(inputTable)
+ self.output = {}
+ for i,inTable in ipairs(inputTable) do
+ for j,input in ipairs(inTable) do
+ local output = self.output[j] or {}
+ output[i] = input
+ self.output[j] = output
+ end
+ end
+ return self.output
+end
+
+function ZipTable:updateGradInput(inputTable, gradOutputTable)
+ self.gradInput = {}
+ for i,gradOutTable in ipairs(gradOutputTable) do
+ for j,gradOutput in ipairs(gradOutTable) do
+ local gradInput = self.gradInput[j] or {}
+ gradInput[i] = gradOutput
+ self.gradInput[j] = gradInput
+ end
+ end
+ return self.gradInput
+end
+
diff --git a/contrib/lua-torch/nn/ZipTableOneToMany.lua b/contrib/lua-torch/nn/ZipTableOneToMany.lua
new file mode 100644
index 000000000..d4a80fe0d
--- /dev/null
+++ b/contrib/lua-torch/nn/ZipTableOneToMany.lua
@@ -0,0 +1,37 @@
+local ZipTableOneToMany, parent = torch.class('nn.ZipTableOneToMany', 'nn.Module')
+
+-- based on ZipTable in dpnn
+
+-- input : { v, {a, b, c} }
+-- output : { {v,a}, {v,b}, {v,c} }
+function ZipTableOneToMany:__init()
+ parent.__init(self)
+ self.output = {}
+ self.gradInput = {}
+ -- make buffer to update during forward/backward
+ self.gradInputEl = torch.Tensor()
+end
+
+function ZipTableOneToMany:updateOutput(input)
+ assert(#input == 2, "input must be table of element and table")
+ local inputEl, inputTable = input[1], input[2]
+ self.output = {}
+ for i,v in ipairs(inputTable) do
+ self.output[i] = {inputEl, v}
+ end
+ return self.output
+end
+
+function ZipTableOneToMany:updateGradInput(input, gradOutput)
+ assert(#input == 2, "input must be table of element and table")
+ local inputEl, inputTable = input[1], input[2]
+ self.gradInputEl:resizeAs(inputEl):zero()
+ local gradInputTable = {}
+ for i,gradV in ipairs(gradOutput) do
+ self.gradInputEl:add(gradV[1])
+ gradInputTable[i] = gradV[2]
+ end
+ self.gradInput = {self.gradInputEl, gradInputTable}
+ return self.gradInput
+end
+
diff --git a/contrib/lua-torch/nn/hessian.lua b/contrib/lua-torch/nn/hessian.lua
new file mode 100644
index 000000000..b841d8c59
--- /dev/null
+++ b/contrib/lua-torch/nn/hessian.lua
@@ -0,0 +1,391 @@
+----------------------------------------------------------------------
+-- hessian.lua: this file appends extra methods to modules in nn,
+-- to estimate diagonal elements of the Hessian. This is useful
+-- to condition learning rates individually.
+----------------------------------------------------------------------
+nn.hessian = {}
+
+----------------------------------------------------------------------
+-- Hessian code is still experimental,
+-- and deactivated by default
+----------------------------------------------------------------------
+function nn.hessian.enable()
+
+ local function accDiagHessianParameters(module, input, diagHessianOutput, gw, hw)
+ if #gw ~= #hw then
+ error('Number of gradients is nto equal to number of hessians')
+ end
+ module.inputSq = module.inputSq or input.new()
+ module.inputSq:resizeAs(input)
+ torch.cmul(module.inputSq, input, input)
+ -- replace gradients with hessian
+ for i=1,#gw do
+ local gwname = gw[i]
+ local hwname = hw[i]
+ local gwval = module[gwname]
+ local hwval = module[hwname]
+ if hwval == nil then
+ module[hwname] = gwval.new():resizeAs(gwval)
+ hwval = module[hwname]
+ end
+ module[gwname] = hwval
+ module[hwname] = gwval
+ end
+ local oldOutput = module.output
+ module.output = module.output.new():resizeAs(oldOutput)
+ module.forward(module, module.inputSq)
+ module.accGradParameters(module, module.inputSq, diagHessianOutput, 1)
+ -- put back gradients
+ for i=1,#gw do
+ local gwname = gw[i]
+ local hwname = hw[i]
+ local gwval = module[gwname]
+ local hwval = module[hwname]
+ module[gwname] = hwval
+ module[hwname] = gwval
+ end
+ module.output = oldOutput
+ end
+ nn.hessian.accDiagHessianParameters = accDiagHessianParameters
+
+ local function updateDiagHessianInput(module, input, diagHessianOutput, w, wsq)
+ if #w ~= #wsq then
+ error('Number of weights is not equal to number of weights squares')
+ end
+ module.diagHessianInput = module.diagHessianInput or input.new()
+ module.diagHessianInput:resizeAs(input):zero()
+
+ local gi = module.gradInput
+ module.gradInput = module.diagHessianInput
+ for i=1,#w do
+ local wname = w[i]
+ local wsqname = wsq[i]
+ local wval = module[wname]
+ local wsqval = module[wsqname]
+ if wsqval == nil then
+ module[wsqname] = wval.new()
+ wsqval = module[wsqname]
+ end
+ wsqval:resizeAs(wval)
+ torch.cmul(wsqval, wval, wval)
+ module[wsqname] = wval
+ module[wname] = wsqval
+ end
+ module.updateGradInput(module,input,diagHessianOutput)
+ for i=1,#w do
+ local wname = w[i]
+ local wsqname = wsq[i]
+ local wval = module[wname]
+ local wsqval = module[wsqname]
+ module[wname] = wsqval
+ module[wsqname] = wval
+ end
+ module.gradInput = gi
+ end
+ nn.hessian.updateDiagHessianInput = updateDiagHessianInput
+
+ local function updateDiagHessianInputPointWise(module, input, diagHessianOutput)
+ local tdh = diagHessianOutput.new():resizeAs(diagHessianOutput):fill(1)
+ updateDiagHessianInput(module,input,tdh,{},{})
+ module.diagHessianInput:cmul(module.diagHessianInput)
+ module.diagHessianInput:cmul(diagHessianOutput)
+ end
+ nn.hessian.updateDiagHessianInputPointWise = updateDiagHessianInputPointWise
+
+ local function initDiagHessianParameters(module,gw,hw)
+ module.diagHessianInput = module.diagHessianInput or module.gradInput.new();
+ for i=1,#gw do
+ module[hw[i]] = module[hw[i]] or module[gw[i]].new():resizeAs(module[gw[i]])
+ end
+ end
+ nn.hessian.initDiagHessianParameters = initDiagHessianParameters
+
+ ----------------------------------------------------------------------
+ -- Module
+ ----------------------------------------------------------------------
+ function nn.Module.updateDiagHessianInput(self, input, diagHessianOutput)
+ error(torch.typename(self) .. ':updateDiagHessianInput() is undefined')
+ end
+
+ function nn.Module.accDiagHessianParameters(self, input, diagHessianOutput)
+ end
+
+ function nn.Module.initDiagHessianParameters()
+ end
+
+ ----------------------------------------------------------------------
+ -- Sequential
+ ----------------------------------------------------------------------
+ function nn.Sequential.initDiagHessianParameters(self)
+ for i=1,#self.modules do
+ self.modules[i]:initDiagHessianParameters()
+ end
+ end
+
+ function nn.Sequential.updateDiagHessianInput(self, input, diagHessianOutput)
+ local currentDiagHessianOutput = diagHessianOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ currentDiagHessianOutput = currentModule:updateDiagHessianInput(previousModule.output, currentDiagHessianOutput)
+ currentModule = previousModule
+ end
+ currentDiagHessianOutput = currentModule:updateDiagHessianInput(input, currentDiagHessianOutput)
+ self.diagHessianInput = currentDiagHessianOutput
+ return currentDiagHessianOutput
+ end
+
+ function nn.Sequential.accDiagHessianParameters(self, input, diagHessianOutput)
+ local currentDiagHessianOutput = diagHessianOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ currentModule:accDiagHessianParameters(previousModule.output, currentDiagHessianOutput)
+ currentDiagHessianOutput = currentModule.diagHessianInput
+ currentModule = previousModule
+ end
+ currentModule:accDiagHessianParameters(input, currentDiagHessianOutput)
+ end
+
+ ----------------------------------------------------------------------
+ -- Criterion
+ ----------------------------------------------------------------------
+ function nn.Criterion.updateDiagHessianInput(self, input, diagHessianOutput)
+ error(torch.typename(self) .. ':updateDiagHessianInput() is undefined')
+ end
+
+ ----------------------------------------------------------------------
+ -- MSECriterion
+ ----------------------------------------------------------------------
+ function nn.MSECriterion.updateDiagHessianInput(self, input, target)
+ self.diagHessianInput = self.diagHessianInput or input.new()
+ local val = 2
+ if self.sizeAverage then
+ val = val / input:nElement()
+ end
+ self.diagHessianInput:resizeAs(input):fill(val)
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
+ -- WeightedMSECriterion
+ ----------------------------------------------------------------------
+ function nn.WeightedMSECriterion.updateDiagHessianInput(self,input,target)
+ return nn.MSECriterion.updateDiagHessianInput(self,input,target)
+ end
+
+ ----------------------------------------------------------------------
+ -- L1Cost
+ ----------------------------------------------------------------------
+ function nn.L1Cost.updateDiagHessianInput(self,input)
+ self.diagHessianInput = self.diagHessianInput or input.new()
+ self.diagHessianInput:resizeAs(input)
+ self.diagHessianInput:fill(1)
+ self.diagHessianInput[torch.eq(input,0)] = 0
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
+ -- Linear
+ ----------------------------------------------------------------------
+ function nn.Linear.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'})
+ return self.diagHessianInput
+ end
+
+ function nn.Linear.accDiagHessianParameters(self, input, diagHessianOutput)
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'})
+ end
+
+ function nn.Linear.initDiagHessianParameters(self)
+ initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'})
+ end
+
+ ----------------------------------------------------------------------
+ -- SpatialConvolution
+ ----------------------------------------------------------------------
+ function nn.SpatialConvolution.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'})
+ return self.diagHessianInput
+ end
+
+ function nn.SpatialConvolution.accDiagHessianParameters(self, input, diagHessianOutput)
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'})
+ end
+
+ function nn.SpatialConvolution.initDiagHessianParameters(self)
+ initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'})
+ end
+
+ ----------------------------------------------------------------------
+ -- SpatialConvolutionLocal
+ ----------------------------------------------------------------------
+ function nn.SpatialConvolutionLocal.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'})
+ return self.diagHessianInput
+ end
+
+ function nn.SpatialConvolutionLocal.accDiagHessianParameters(self, input, diagHessianOutput)
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'})
+ end
+
+ function nn.SpatialConvolutionLocal.initDiagHessianParameters(self)
+ initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'})
+ end
+
+ ----------------------------------------------------------------------
+ -- SpatialFullConvolution
+ ----------------------------------------------------------------------
+ function nn.SpatialFullConvolution.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'})
+ return self.diagHessianInput
+ end
+
+ function nn.SpatialFullConvolution.accDiagHessianParameters(self, input, diagHessianOutput)
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'})
+ end
+
+ function nn.SpatialFullConvolution.initDiagHessianParameters(self)
+ initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'})
+ end
+
+ ----------------------------------------------------------------------
+ -- SpatialConvolutionMap
+ ----------------------------------------------------------------------
+ function nn.SpatialConvolutionMap.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInput(self, input, diagHessianOutput, {'weight','bias'}, {'weightSq','biasSq'})
+ return self.diagHessianInput
+ end
+
+ function nn.SpatialConvolutionMap.accDiagHessianParameters(self, input, diagHessianOutput)
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'})
+ end
+
+ function nn.SpatialConvolutionMap.initDiagHessianParameters(self)
+ initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'})
+ end
+
+ ----------------------------------------------------------------------
+ -- SpatialFullConvolutionMap
+ ----------------------------------------------------------------------
+ function nn.SpatialFullConvolutionMap.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'})
+ return self.diagHessianInput
+ end
+
+ function nn.SpatialFullConvolutionMap.accDiagHessianParameters(self, input, diagHessianOutput)
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'})
+ end
+
+ function nn.SpatialFullConvolutionMap.initDiagHessianParameters(self)
+ initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'})
+ end
+
+----------------------------------------------------------------------
+ -- Tanh
+ ----------------------------------------------------------------------
+ function nn.Tanh.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInputPointWise(self, input, diagHessianOutput)
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
+ -- TanhShrink
+ ----------------------------------------------------------------------
+ function nn.TanhShrink.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInputPointWise(self.tanh, input, diagHessianOutput)
+ self.diagHessianInput = self.diagHessianInput or input.new():resizeAs(input)
+ torch.add(self.diagHessianInput, self.tanh.diagHessianInput, diagHessianOutput)
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
+ -- Square
+ ----------------------------------------------------------------------
+ function nn.Square.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInputPointWise(self, input, diagHessianOutput)
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
+ -- Sqrt
+ ----------------------------------------------------------------------
+ function nn.Sqrt.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInputPointWise(self, input, diagHessianOutput)
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
+ -- Reshape
+ ----------------------------------------------------------------------
+ function nn.Reshape.updateDiagHessianInput(self, input, diagHessianOutput)
+ self.diagHessianInput = self.diagHessianInput or input.new()
+ diagHessianOutput = diagHessianOutput:contiguous()
+ self.diagHessianInput:set(diagHessianOutput):resizeAs(input)
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
+ -- Parameters manipulation:
+ -- we modify these functions such that they return hessian coefficients
+ ----------------------------------------------------------------------
+ function nn.Module.parameters(self)
+ if self.weight and self.bias then
+ return {self.weight, self.bias}, {self.gradWeight, self.gradBias}, {self.diagHessianWeight, self.diagHessianBias}
+ elseif self.weight then
+ return {self.weight}, {self.gradWeight}, {self.diagHessianWeight}
+ elseif self.bias then
+ return {self.bias}, {self.gradBias}, {self.diagHessianBias}
+ else
+ return
+ end
+ end
+
+ function nn.Module.getParameters(self)
+ -- get parameters
+ local parameters,gradParameters,hessianParameters = self:parameters()
+ -- flatten parameters and gradients
+ local flatParameters = nn.Module.flatten(parameters)
+ collectgarbage()
+ local flatGradParameters = nn.Module.flatten(gradParameters)
+ collectgarbage()
+ local flatHessianParameters
+ if hessianParameters and hessianParameters[1] then
+ flatHessianParameters = nn.Module.flatten(hessianParameters)
+ collectgarbage()
+ end
+
+ -- return new flat vector that contains all discrete parameters
+ return flatParameters, flatGradParameters, flatHessianParameters
+ end
+
+ function nn.Sequential.parameters(self)
+ local function tinsert(to, from)
+ if type(from) == 'table' then
+ for i=1,#from do
+ tinsert(to,from[i])
+ end
+ else
+ table.insert(to,from)
+ end
+ end
+ local w = {}
+ local gw = {}
+ local ggw = {}
+ for i=1,#self.modules do
+ local mw,mgw,mggw = self.modules[i]:parameters()
+ if mw then
+ tinsert(w,mw)
+ tinsert(gw,mgw)
+ tinsert(ggw,mggw)
+ end
+ end
+ return w,gw,ggw
+ end
+
+ ----------------------------------------------------------------------
+ -- Avoid multiple calls to enable()
+ ----------------------------------------------------------------------
+ function nn.hessian.enable()
+ end
+end
diff --git a/contrib/lua-torch/nn/init.lua b/contrib/lua-torch/nn/init.lua
new file mode 100755
index 000000000..4319a8868
--- /dev/null
+++ b/contrib/lua-torch/nn/init.lua
@@ -0,0 +1,221 @@
+require('torch')
+
+nn = {} -- define the global nn table
+
+require('nn.THNN')
+
+require('nn.utils')
+
+
+require('nn.ErrorMessages')
+require('nn.Module')
+
+require('nn.Container')
+require('nn.Concat')
+require('nn.Parallel')
+require('nn.Sequential')
+require('nn.DepthConcat')
+
+require('nn.Decorator')
+require('nn.Bottle')
+require('nn.WeightNorm')
+require('nn.DontCast')
+require('nn.NaN')
+require('nn.Profile')
+
+require('nn.Linear')
+require('nn.LinearWeightNorm')
+require('nn.Bilinear')
+require('nn.PartialLinear')
+require('nn.SparseLinear')
+require('nn.IndexLinear')
+require('nn.Reshape')
+require('nn.View')
+require('nn.Contiguous')
+require('nn.Select')
+require('nn.Narrow')
+require('nn.Index')
+require('nn.Squeeze')
+require('nn.Unsqueeze')
+require('nn.Replicate')
+require('nn.Transpose')
+require('nn.BatchNormalization')
+require('nn.LayerNormalization')
+require('nn.Padding')
+require('nn.GradientReversal')
+require('nn.MaskedSelect')
+
+require('nn.Copy')
+require('nn.Min')
+require('nn.Max')
+require('nn.Sum')
+require('nn.Mean')
+require('nn.CMul')
+require('nn.Mul')
+require('nn.MulConstant')
+require('nn.CAdd')
+require('nn.Add')
+require('nn.AddConstant')
+require('nn.Constant')
+require('nn.Dropout')
+require('nn.SpatialDropout')
+require('nn.VolumetricDropout')
+require('nn.WhiteNoise')
+require('nn.OneHot')
+require('nn.PrintSize')
+require('nn.ZeroGrad')
+
+require('nn.CAddTable')
+require('nn.CDivTable')
+require('nn.CMulTable')
+require('nn.CSubTable')
+require('nn.CMaxTable')
+require('nn.CMinTable')
+require('nn.CAddTensorTable')
+
+require('nn.Euclidean')
+require('nn.WeightedEuclidean')
+require('nn.PairwiseDistance')
+require('nn.CosineDistance')
+require('nn.DotProduct')
+require('nn.Normalize')
+require('nn.Cosine')
+require('nn.Kmeans')
+
+require('nn.Exp')
+require('nn.Log')
+require('nn.HardTanh')
+require('nn.Clamp')
+require('nn.LogSigmoid')
+require('nn.LogSoftMax')
+require('nn.Sigmoid')
+require('nn.SoftMax')
+require('nn.SoftMin')
+require('nn.SoftPlus')
+require('nn.SoftSign')
+require('nn.Tanh')
+require('nn.TanhShrink')
+require('nn.Abs')
+require('nn.Power')
+require('nn.Square')
+require('nn.Sqrt')
+require('nn.HardShrink')
+require('nn.SoftShrink')
+require('nn.Threshold')
+require('nn.Maxout')
+require('nn.ReLU')
+require('nn.ReLU6')
+require('nn.PReLU')
+require('nn.CReLU')
+require('nn.LeakyReLU')
+require('nn.SpatialSoftMax')
+require('nn.SpatialLogSoftMax')
+require('nn.RReLU')
+require('nn.ELU')
+require('nn.GatedLinearUnit')
+
+require('nn.LookupTable')
+require('nn.SpatialConvolution')
+require('nn.SpatialConvolutionLocal')
+require('nn.SpatialFullConvolution')
+require('nn.SpatialFullConvolutionMap')
+require('nn.SpatialConvolutionMM')
+require('nn.SpatialDepthWiseConvolution')
+require('nn.SpatialConvolutionMap')
+require('nn.SpatialDilatedConvolution')
+require('nn.SpatialSubSampling')
+require('nn.SpatialMaxPooling')
+require('nn.SpatialDilatedMaxPooling')
+require('nn.SpatialMaxUnpooling')
+require('nn.SpatialFractionalMaxPooling')
+require('nn.SpatialLPPooling')
+require('nn.SpatialAveragePooling')
+require('nn.SpatialAdaptiveMaxPooling')
+require('nn.SpatialAdaptiveAveragePooling')
+require('nn.TemporalConvolution')
+require('nn.TemporalSubSampling')
+require('nn.TemporalMaxPooling')
+require('nn.TemporalDynamicKMaxPooling')
+require('nn.TemporalRowConvolution')
+require('nn.SpatialSubtractiveNormalization')
+require('nn.SpatialDivisiveNormalization')
+require('nn.SpatialContrastiveNormalization')
+require('nn.SpatialCrossMapLRN')
+require('nn.SpatialZeroPadding')
+require('nn.SpatialReflectionPadding')
+require('nn.SpatialReplicationPadding')
+require('nn.SpatialUpSamplingNearest')
+require('nn.SpatialUpSamplingBilinear')
+require('nn.SpatialBatchNormalization')
+
+require('nn.VolumetricConvolution')
+require('nn.VolumetricFullConvolution')
+require('nn.VolumetricDilatedConvolution')
+require('nn.VolumetricMaxPooling')
+require('nn.VolumetricDilatedMaxPooling')
+require('nn.VolumetricFractionalMaxPooling')
+require('nn.VolumetricMaxUnpooling')
+require('nn.VolumetricAveragePooling')
+require('nn.VolumetricBatchNormalization')
+require('nn.VolumetricReplicationPadding')
+
+require('nn.GPU')
+
+require('nn.ParallelTable')
+require('nn.Identity')
+require('nn.ConcatTable')
+require('nn.SplitTable')
+require('nn.JoinTable')
+require('nn.SelectTable')
+require('nn.MixtureTable')
+require('nn.CriterionTable')
+require('nn.FlattenTable')
+require('nn.NarrowTable')
+require('nn.MapTable')
+require('nn.ZipTable')
+require('nn.ZipTableOneToMany')
+require('nn.Collapse')
+require('nn.Convert')
+
+require('nn.Criterion')
+require('nn.MSECriterion')
+require('nn.SpatialAutoCropMSECriterion')
+require('nn.SmoothL1Criterion')
+require('nn.MarginCriterion')
+require('nn.SoftMarginCriterion')
+require('nn.AbsCriterion')
+require('nn.ClassNLLCriterion')
+require('nn.SpatialClassNLLCriterion')
+require('nn.ClassSimplexCriterion')
+require('nn.DistKLDivCriterion')
+require('nn.MultiCriterion')
+require('nn.L1HingeEmbeddingCriterion')
+require('nn.HingeEmbeddingCriterion')
+require('nn.CosineEmbeddingCriterion')
+require('nn.MarginRankingCriterion')
+require('nn.MultiMarginCriterion')
+require('nn.MultiLabelMarginCriterion')
+require('nn.MultiLabelSoftMarginCriterion')
+require('nn.L1Cost')
+require('nn.L1Penalty')
+require('nn.WeightedMSECriterion')
+require('nn.BCECriterion')
+require('nn.CrossEntropyCriterion')
+require('nn.ParallelCriterion')
+require('nn.DistanceRatioCriterion')
+require('nn.ModuleCriterion')
+
+require('nn.PixelShuffle')
+
+require('nn.StochasticGradient')
+
+require('nn.MM')
+require('nn.MV')
+
+require('nn.Jacobian')
+require('nn.SparseJacobian')
+require('nn.hessian')
+require('nn.test')
+
+
+return nn
diff --git a/contrib/lua-torch/nn/lib/CMakeLists.txt b/contrib/lua-torch/nn/lib/CMakeLists.txt
new file mode 100644
index 000000000..de04595f6
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/CMakeLists.txt
@@ -0,0 +1,5 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
+CMAKE_POLICY(VERSION 2.6)
+SET(THNN_INSTALL_LIB_SUBDIR "${RSPAMD_LIBDIR}")
+SET(THNN_INSTALL_INCLUDE_SUBDIR "${Torch_INSTALL_INCLUDE_SUBDIR}")
+ADD_SUBDIRECTORY(THNN) \ No newline at end of file
diff --git a/contrib/lua-torch/nn/lib/THNN/CMakeLists.txt b/contrib/lua-torch/nn/lib/THNN/CMakeLists.txt
new file mode 100644
index 000000000..00908a5b1
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/CMakeLists.txt
@@ -0,0 +1,47 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
+CMAKE_POLICY(VERSION 2.6)
+
+IF(NOT TH_LIBRARIES)
+ SET(TH_LIBRARIES "TH")
+ENDIF(NOT TH_LIBRARIES)
+MESSAGE(STATUS "TH_LIBRARIES: ${TH_LIBRARIES}")
+
+IF(NOT THNN_INSTALL_LIB_SUBDIR)
+ SET(THNN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "THNN install library directory")
+ SET(THNN_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "THNN install include subdirectory")
+ENDIF()
+
+# Flags
+# When using MSVC
+IF(MSVC)
+ # we want to respect the standard, and we are bored of those **** .
+ ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)
+ ADD_DEFINITIONS(-DTH_EXPORTS)
+ENDIF(MSVC)
+
+IF (CMAKE_VERSION VERSION_LESS "3.1")
+ SET(CMAKE_C_FLAGS "-std=c99 ${CMAKE_C_FLAGS}")
+ELSE ()
+ SET(CMAKE_C_STANDARD 99)
+ENDIF ()
+
+IF (WITH_OPENMP)
+ FIND_PACKAGE(OpenMP)
+ IF(OPENMP_FOUND)
+ MESSAGE(STATUS "Compiling with OpenMP support")
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+ SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+ ENDIF(OPENMP_FOUND)
+ENDIF (WITH_OPENMP)
+
+SET(src init.c)
+ADD_LIBRARY(THNN SHARED init.c)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+### Torch packages supposes libraries prefix is "lib"
+SET_TARGET_PROPERTIES(THNN PROPERTIES
+ PREFIX "lib"
+ IMPORT_PREFIX "lib")
+
+TARGET_LINK_LIBRARIES(THNN ${TH_LIBRARIES})
+INSTALL(TARGETS THNN DESTINATION ${RSPAMD_LIBDIR})
diff --git a/contrib/lua-torch/nn/lib/THNN/README.md b/contrib/lua-torch/nn/lib/THNN/README.md
new file mode 100644
index 000000000..e6c61601d
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/README.md
@@ -0,0 +1,32 @@
+# THNN
+
+THNN is a library that gathers nn's C implementations of neural network modules. It's entirely free of Lua dependency and therefore can be used in any application that has a C FFI. Please note that it only contains quite low level functions, and an object oriented C/C++ wrapper will be created soon as another library.
+
+There is also a CUDA counterpart of THNN (THCUNN) in the [cunn repository](https://github.com/torch/cunn/tree/master/lib/THCUNN).
+
+## Links
+
+* [API reference](doc/api_reference.md)
+* [Style guidelines](doc/style_guidelines.md)
+
+## Motivation
+
+Torch's neural network package (nn) provided many optimized C implementations of modules, but the source files contained Lua specific code and headers so they couldn't be easily compiled and included anywhere else.
+
+THNN is based on the same code, but is written in pure C, so it can be easily included in other code. **Future C implementations should be committed to THNN.**
+
+## API
+
+THNN is a purely functional library. It provides 2-3 functions for each module, that perform the most important operations:
+
+* **updateOutput** - applies the module to an input
+* **updateGradInput** - accepts gradient w.r.t. output and previous module input, and computes a gradient w.r.t. that input
+* **accGradParameters** - *(optional, only modules with parameters)* accepts gradient w.r.t. output and previous module input, and computes gradient w.r.t. the parameters
+
+For information on argument types please check the [API reference](doc/api_reference.md).
+
+## Developer docs
+
+* [Style guidelines](doc/style_guidelines.md)
+
+This section will be expanded when FFI refactoring will be finished.
diff --git a/contrib/lua-torch/nn/lib/THNN/THNN.h b/contrib/lua-torch/nn/lib/THNN/THNN.h
new file mode 100644
index 000000000..0019b7976
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/THNN.h
@@ -0,0 +1,33 @@
+#ifndef THNN_H
+#define THNN_H
+
+#include <stdbool.h>
+#include <TH.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#define THNN_(NAME) TH_CONCAT_3(THNN_, Real, NAME)
+
+#define THIndexTensor THLongTensor
+#define THIndexTensor_(NAME) THLongTensor_ ## NAME
+
+#define THIntegerTensor THIntTensor
+#define THIntegerTensor_(NAME) THIntTensor_ ## NAME
+
+typedef long THIndex_t;
+typedef int THInteger_t;
+typedef void THNNState;
+
+#define THNN_resizeAs_indices(I1, I2) \
+ THLongStorage *size2 = THIndexTensor_(newSizeOf)(I2); \
+ if (!THTensor_(isSize)(I1, size2)) \
+ { \
+ THTensor_(resize)(I1, size2, NULL); \
+ } \
+ THLongStorage_free(size2);
+
+#include "generic/THNN.h"
+#include <THGenerateFloatTypes.h>
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Abs.c b/contrib/lua-torch/nn/lib/THNN/generic/Abs.c
new file mode 100644
index 000000000..28721ec8e
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/Abs.c
@@ -0,0 +1,28 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Abs.c"
+#else
+
+void THNN_(Abs_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output)
+{
+ THTensor_(resizeAs)(output, input);
+ THTensor_(abs)(output, input);
+}
+
+void THNN_(Abs_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput)
+{
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ real z = *input_data;
+ *gradInput_data = *gradOutput_data * (z >= 0 ? 1 : -1);
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/AbsCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/AbsCriterion.c
new file mode 100644
index 000000000..9bee5de9e
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/AbsCriterion.c
@@ -0,0 +1,40 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/AbsCriterion.c"
+#else
+
+void THNN_(AbsCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage)
+{
+ real sum = 0;
+ THNN_CHECK_NELEMENT(input, target);
+ TH_TENSOR_APPLY2(real, input, real, target,
+ sum += fabs(*input_data - *target_data);
+ );
+
+ if (sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ THTensor_(set1d)(output, 0, sum);
+}
+
+void THNN_(AbsCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ *gradInput_data = (*input_data - *target_data) >= 0 ? norm : -norm;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/BCECriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/BCECriterion.c
new file mode 100644
index 000000000..637a4067e
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/BCECriterion.c
@@ -0,0 +1,66 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/BCECriterion.c"
+#else
+
+#define EPS 1e-12
+
+void THNN_(BCECriterion_updateOutput)(THNNState *state, THTensor *input,
+ THTensor *target, THTensor *output,
+ bool sizeAverage, THTensor *weights)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ THNN_CHECK_NELEMENT(input, weights);
+ THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
+ real sum = 0;
+
+ if(weights)
+ TH_TENSOR_APPLY3(real, input, real, target, real, weights,
+ real x = *input_data;
+ real y = *target_data;
+ real w = *weights_data;
+ THAssertMsg(x >= 0. && x <= 1.,
+ "input value should be between 0~1, but got %f",
+ (double) x);
+ sum -= (log(x + EPS) * y + log(1. - x + EPS) * (1. - y)) * w;
+ )
+ else
+ TH_TENSOR_APPLY2(real, input, real, target,
+ real x = *input_data;
+ real y = *target_data;
+ THAssertMsg(x >= 0. && x <= 1.,
+ "input value should be between 0~1, but got %f",
+ (double) x);
+ sum -= log(x + EPS) * y + log(1. - x + EPS) * (1. - y);
+ );
+
+
+ if (sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ THTensor_(set1d)(output, 0, sum);
+}
+
+void THNN_(BCECriterion_updateGradInput)(THNNState *state, THTensor *input,
+ THTensor *target, THTensor *gradInput,
+ bool sizeAverage, THTensor *weights)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ THNN_CHECK_NELEMENT(input, weights);
+
+ real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+ THTensor_(resizeAs)(gradInput, input);
+
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ real x = *input_data;
+ real y = *target_data;
+ *gradInput_data = - norm * (y - x) / ((1. - x + EPS) * (x + EPS));
+ );
+
+ if(weights)
+ THTensor_(cmul)(gradInput, gradInput, weights);
+}
+
+#undef EPS
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/BatchNormalization.c b/contrib/lua-torch/nn/lib/THNN/generic/BatchNormalization.c
new file mode 100644
index 000000000..b8f462790
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/BatchNormalization.c
@@ -0,0 +1,149 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/BatchNormalization.c"
+#else
+
+void THNN_(BatchNormalization_updateOutput)(
+ THNNState *state, THTensor *input, THTensor *output,
+ THTensor *weight, THTensor *bias,
+ THTensor *running_mean, THTensor *running_var,
+ THTensor *save_mean, THTensor *save_std,
+ bool train, double momentum, double eps)
+{
+ THTensor_(resizeAs)(output, input);
+ long nInput = THTensor_(size)(input, 1);
+ long f;
+ ptrdiff_t n = THTensor_(nElement)(input) / nInput;
+
+ #pragma omp parallel for
+ for (f = 0; f < nInput; ++f) {
+ THTensor *in = THTensor_(newSelect)(input, 1, f);
+ THTensor *out = THTensor_(newSelect)(output, 1, f);
+
+ real mean, invstd;
+
+ if (train) {
+ // compute mean per input
+ accreal sum = 0;
+ TH_TENSOR_APPLY(real, in, sum += *in_data;);
+
+ mean = (real) sum / n;
+ THTensor_(set1d)(save_mean, f, (real) mean);
+
+ // compute variance per input
+ sum = 0;
+ TH_TENSOR_APPLY(real, in,
+ sum += (*in_data - mean) * (*in_data - mean););
+
+ if (sum == 0 && eps == 0.0) {
+ invstd = 0;
+ } else {
+ invstd = (real) (1 / sqrt(sum/n + eps));
+ }
+ THTensor_(set1d)(save_std, f, (real) invstd);
+
+ // update running averages
+ THTensor_(set1d)(running_mean, f,
+ (real) (momentum * mean + (1 - momentum) * THTensor_(get1d)(running_mean, f)));
+
+ accreal unbiased_var = sum / (n - 1);
+ THTensor_(set1d)(running_var, f,
+ (real) (momentum * unbiased_var + (1 - momentum) * THTensor_(get1d)(running_var, f)));
+ } else {
+ mean = THTensor_(get1d)(running_mean, f);
+ invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps);
+ }
+
+ // compute output
+ real w = weight ? THTensor_(get1d)(weight, f) : 1;
+ real b = bias ? THTensor_(get1d)(bias, f) : 0;
+
+ TH_TENSOR_APPLY2(real, in, real, out,
+ *out_data = (real) (((*in_data - mean) * invstd) * w + b););
+
+ THTensor_(free)(out);
+ THTensor_(free)(in);
+ }
+}
+
+void THNN_(BatchNormalization_backward)(
+ THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput,
+ THTensor *gradWeight, THTensor *gradBias, THTensor *weight,
+ THTensor *running_mean, THTensor *running_var,
+ THTensor *save_mean, THTensor *save_std,
+ bool train, double scale, double eps)
+{
+ THNN_CHECK_SHAPE(input, gradOutput);
+ long nInput = THTensor_(size)(input, 1);
+ long f;
+ ptrdiff_t n = THTensor_(nElement)(input) / nInput;
+
+ #pragma omp parallel for
+ for (f = 0; f < nInput; ++f) {
+ THTensor *in = THTensor_(newSelect)(input, 1, f);
+ THTensor *gradOut = THTensor_(newSelect)(gradOutput, 1, f);
+ real w = weight ? THTensor_(get1d)(weight, f) : 1;
+ real mean, invstd;
+ if (train) {
+ mean = THTensor_(get1d)(save_mean, f);
+ invstd = THTensor_(get1d)(save_std, f);
+ } else {
+ mean = THTensor_(get1d)(running_mean, f);
+ invstd = 1 / sqrt(THTensor_(get1d)(running_var, f) + eps);
+ }
+
+ // sum over all gradOutput in feature plane
+ accreal sum = 0;
+ TH_TENSOR_APPLY(real, gradOut, sum += *gradOut_data;);
+
+ // dot product of the Q(X) and gradOuput
+ accreal dotp = 0;
+ TH_TENSOR_APPLY2(real, in, real, gradOut,
+ dotp += (*in_data - mean) * (*gradOut_data););
+
+ if (gradInput) {
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor *gradIn = THTensor_(newSelect)(gradInput, 1, f);
+
+ if (train) {
+ // when in training mode
+ // Q(X) = X - E[x] ; i.e. input centered to zero mean
+ // Y = Q(X) / σ ; i.e. BN output before weight and bias
+ // dL/dX = (Q(dL/dY) - dot(Y, dL/dY) * Y) / σ * w
+
+ // projection of gradOutput on to output scaled by std
+ real k = (real) dotp * invstd * invstd / n;
+ TH_TENSOR_APPLY2(real, gradIn, real, in,
+ *gradIn_data = (*in_data - mean) * k;);
+
+ accreal gradMean = sum / n;
+ TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
+ *gradIn_data = (*gradOut_data - gradMean - *gradIn_data) * invstd * w;);
+
+ } else {
+ // when in evaluation mode
+ // Q(X) = X - running_mean ; i.e. input centered to zero mean
+ // Y = Q(X) / running_std ; i.e. BN output before weight and bias
+ // dL/dX = w / running_std
+ TH_TENSOR_APPLY2(real, gradIn, real, gradOut,
+ *gradIn_data = *gradOut_data * invstd * w;);
+ }
+
+ THTensor_(free)(gradIn);
+ }
+
+ if (gradWeight) {
+ real val = THTensor_(get1d)(gradWeight, f);
+ THTensor_(set1d)(gradWeight, f, val + scale * dotp * invstd);
+ }
+
+ if (gradBias) {
+ real val = THTensor_(get1d)(gradBias, f);
+ THTensor_(set1d)(gradBias, f, val + scale * sum);
+ }
+
+ THTensor_(free)(gradOut);
+ THTensor_(free)(in);
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/ClassNLLCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/ClassNLLCriterion.c
new file mode 100644
index 000000000..4cf37aeaf
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/ClassNLLCriterion.c
@@ -0,0 +1,163 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/ClassNLLCriterion.c"
+#else
+
+void THNN_(ClassNLLCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *output,
+ bool sizeAverage,
+ THTensor *weights,
+ THTensor *total_weight,
+ long ignore_index)
+{
+ THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
+ THNN_CHECK_DIM_SIZE(total_weight, 1, 0, 1);
+ int n_dims = THTensor_(nDimension)(input);
+ int n_classes = THTensor_(size)(input, n_dims - 1);
+ ignore_index -= TH_INDEX_BASE;
+
+ if (THIndexTensor_(nDimension)(target) > 1) {
+ THError("multi-target not supported");
+ }
+ if (THTensor_(nDimension)(input) > 2) {
+ THError("input tensor should be 1D or 2D");
+ }
+ if (weights && THTensor_(nElement)(weights) != n_classes) {
+ THDescBuff s1 = THTensor_(sizeDesc)(weights);
+ THError("weight tensor should be defined either for all %d classes or no classes"
+ " but got weight tensor of shape: %s", n_classes, s1.str);
+ }
+
+ input = THTensor_(newContiguous)(input);
+ target = THIndexTensor_(newContiguous)(target);
+ weights = weights ? THTensor_(newContiguous)(weights) : NULL;
+
+ real *input_data = THTensor_(data)(input);
+ THIndex_t *target_data = THIndexTensor_(data)(target);
+ real *weights_data = weights ? THTensor_(data)(weights) : NULL;
+ real *output_data = THTensor_(data)(output);
+ real *total_weight_data = THTensor_(data)(total_weight);
+
+ output_data[0] = total_weight_data[0] = 0.0;
+
+ if (THTensor_(nDimension)(input) == 1) {
+ int cur_target = target_data[0] - TH_INDEX_BASE;
+ if (cur_target != ignore_index) {
+ THAssert(cur_target >= 0 && cur_target < n_classes);
+ total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f;
+ output_data[0] = -input_data[cur_target] * total_weight_data[0];
+ }
+ } else if (THTensor_(nDimension)(input) == 2) {
+ int batch_size = THTensor_(size)(input, 0);
+ THAssert(THIndexTensor_(size)(target, 0) == batch_size);
+
+ int n_target = THTensor_(size)(input, 1);
+
+ int i;
+ for (i = 0; i < batch_size; i++) {
+ int cur_target = target_data[i] - TH_INDEX_BASE;
+ if (cur_target != ignore_index) {
+ THAssert(cur_target >= 0 && cur_target < n_classes);
+
+ real cur_weight = weights ? weights_data[cur_target] : 1.0f;
+ total_weight_data[0] += cur_weight;
+ output_data[0] -= input_data[i * n_target + cur_target] * cur_weight;
+ }
+ }
+ }
+
+ if (sizeAverage && total_weight_data[0]) {
+ output_data[0] /= total_weight_data[0];
+ }
+
+ if (weights) {
+ THTensor_(free)(weights);
+ }
+ THTensor_(free)(input);
+ THIndexTensor_(free)(target);
+}
+
+void THNN_(ClassNLLCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage,
+ THTensor *weights,
+ THTensor *total_weight,
+ long ignore_index)
+{
+ int n_dims = THTensor_(nDimension)(input);
+ int n_classes = THTensor_(size)(input, n_dims - 1);
+ ignore_index -= TH_INDEX_BASE;
+
+ if (!THTensor_(isContiguous)(gradInput)) {
+ THError("gradInput must be contiguous");
+ }
+
+ real *total_weight_data = THTensor_(data)(total_weight);
+
+ if (!(*total_weight_data > 0)) {
+ return;
+ }
+
+ if (THIndexTensor_(nDimension)(target) > 1) {
+ THError("multi-target not supported");
+ }
+
+ if (THTensor_(nDimension)(input) > 2) {
+ THError("input tensor should be 1D or 2D");
+ }
+
+ if (weights && THTensor_(nElement)(weights) != n_classes) {
+ THError("weight tensor should be defined either for all or no classes");
+ }
+
+ target = THIndexTensor_(newContiguous)(target);
+ weights = weights ? THTensor_(newContiguous)(weights) : NULL;
+
+ THIndex_t *target_data = THIndexTensor_(data)(target);
+ real *weights_data = weights ? THTensor_(data)(weights) : NULL;
+ real *gradInput_data = THTensor_(data)(gradInput);
+
+ if (THTensor_(nDimension)(input) == 1) {
+ int cur_target = target_data[0] - TH_INDEX_BASE;
+ if (cur_target != ignore_index) {
+ THAssert(cur_target >= 0 && cur_target < n_classes);
+
+ gradInput_data[cur_target] =
+ (!sizeAverage && weights) ? -weights_data[cur_target] : -1;
+ }
+
+ } else if (THTensor_(nDimension)(input) == 2) {
+ int batch_size = THTensor_(size)(input, 0);
+ THAssert(THIndexTensor_(size)(target, 0) == batch_size);
+
+ int n_target = THTensor_(size)(input, 1);
+
+ int i;
+ for (i = 0; i < batch_size; i++){
+ int cur_target = target_data[i] - TH_INDEX_BASE;
+
+ if (cur_target != ignore_index) {
+ THAssert(cur_target >= 0 && cur_target < n_classes);
+
+ gradInput_data[i * n_target + cur_target] =
+ -(weights ? weights_data[cur_target] : 1.0f);
+
+ if (sizeAverage && *total_weight_data) {
+ gradInput_data[i * n_target + cur_target] /= *total_weight_data;
+ }
+ }
+ }
+ }
+
+ THIndexTensor_(free)(target);
+ if (weights) {
+ THTensor_(free)(weights);
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/DistKLDivCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/DistKLDivCriterion.c
new file mode 100644
index 000000000..6bd6aa067
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/DistKLDivCriterion.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/DistKLDivCriterion.c"
+#else
+
+void THNN_(DistKLDivCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
+
+ real sum = 0;
+
+ TH_TENSOR_APPLY2(real, input, real, target,
+ sum += *target_data > 0 ? *target_data * (log(*target_data) - *input_data) : 0;
+ );
+
+ if (sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ THTensor_(set1d)(output, 0, sum);
+}
+
+void THNN_(DistKLDivCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+
+ real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ *gradInput_data = *target_data > 0 ? norm * (-*target_data) : 0;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/ELU.c b/contrib/lua-torch/nn/lib/THNN/generic/ELU.c
new file mode 100644
index 000000000..ddcfb9705
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/ELU.c
@@ -0,0 +1,54 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/ELU.c"
+#else
+
+void THNN_(ELU_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal alpha_,
+ bool inplace)
+{
+ real alpha = TH_CONVERT_ACCREAL_TO_REAL(alpha_);
+ if(inplace) {
+ TH_TENSOR_APPLY(real, input,
+ if(*input_data <= 0) {
+ *input_data = (exp(*input_data) - 1) * alpha;
+ }
+ );
+ THTensor_(set)(output, input);
+ } else {
+ THTensor_(resizeAs)(output, input);
+ TH_TENSOR_APPLY2(real, input, real, output,
+ *output_data = *input_data <= 0 ? (exp(*input_data)-1)*alpha : *input_data;
+ );
+ }
+}
+
+void THNN_(ELU_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output,
+ accreal alpha_,
+ bool inplace)
+{
+ real alpha = TH_CONVERT_ACCREAL_TO_REAL(alpha_);
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ if(inplace) {
+ TH_TENSOR_APPLY2(real, gradOutput, real, output,
+ if(*output_data <= 0) {
+ *gradOutput_data *= *output_data + alpha;
+ }
+ );
+ THTensor_(set)(gradInput, gradOutput);
+ } else {
+ THTensor_(resizeAs)(gradInput, output);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
+ *gradInput_data = *output_data <= 0 ? *gradOutput_data * (*output_data + alpha) : *gradOutput_data;
+ );
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/FusedRNNKernel.c b/contrib/lua-torch/nn/lib/THNN/generic/FusedRNNKernel.c
new file mode 100644
index 000000000..30788b0a2
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/FusedRNNKernel.c
@@ -0,0 +1,55 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/FusedRNNKernel.c"
+#else
+
+void THNN_(GRUFused_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *hidden,
+ THTensor *bias1,
+ THTensor *bias2,
+ THTensor *hx,
+ THTensor *hy,
+ THTensor *storage)
+{
+ THAssertMsg(false, "Not implemented for CPU");
+}
+
+void THNN_(GRUFused_updateGradInput)(
+ THNNState *state,
+ THTensor *gradInInput,
+ THTensor *gradInHidden,
+ THTensor *gradOutput,
+ THTensor *gradInputHx,
+ THTensor *storage)
+{
+ THAssertMsg(false, "Not implemented for CPU");
+}
+
+void THNN_(LSTMFused_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *hidden,
+ THTensor *bias1,
+ THTensor *bias2,
+ THTensor *cx,
+ THTensor *hy,
+ THTensor *cy)
+{
+ THAssertMsg(false, "Not implemented for CPU");
+}
+
+void THNN_(LSTMFused_updateGradInput)(
+ THNNState *state,
+ THTensor *storage,
+ THTensor *gradInGates,
+ THTensor *prevC,
+ THTensor *cy,
+ THTensor *gradOutput,
+ THTensor *gradOutputCell,
+ THTensor *gradInputCx)
+{
+ THAssertMsg(false, "Not implemented for CPU");
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/GatedLinearUnit.c b/contrib/lua-torch/nn/lib/THNN/generic/GatedLinearUnit.c
new file mode 100644
index 000000000..274a27e3b
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/GatedLinearUnit.c
@@ -0,0 +1,73 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/GatedLinearUnit.c"
+#else
+
+void THNN_(GatedLinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int dim)
+{
+ // size output to half of input
+ dim = dim - TH_INDEX_BASE;
+ const long nIn = THTensor_(size)(input, dim);
+ THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld",
+ dim + TH_INDEX_BASE, nIn);
+
+ const long inputSize = THTensor_(size)(input, dim) / 2;
+ THLongStorage *newSizes = THTensor_(newSizeOf)(input);
+ THLongStorage_set(newSizes, dim, inputSize);
+ THTensor_(resize)(output, newSizes, NULL);
+
+ // halve tensor
+ THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize);
+ THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize);
+
+ // x = x1:cmul( sigmoid(x2) )
+ THTensor_(sigmoid)(output, secondHalf);
+ THTensor_(cmul)(output, output, firstHalf);
+
+ THLongStorage_free(newSizes);
+ THTensor_(free)(firstHalf);
+ THTensor_(free)(secondHalf);
+}
+
+void THNN_(GatedLinear_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int dim)
+{
+ // set up tensors
+ dim = dim - TH_INDEX_BASE;
+ const long nIn = THTensor_(size)(input, dim);
+ THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld",
+ dim + TH_INDEX_BASE, nIn);
+
+ THTensor_(resizeAs)(gradInput, input);
+ const long inputSize = THTensor_(size)(input, dim) / 2;
+ THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize);
+ THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize);
+ THTensor *gradInputfirstHalf = THTensor_(newNarrow)(gradInput, dim, 0, inputSize);
+ THTensor *gradInputsecondHalf = THTensor_(newNarrow)(gradInput, dim, inputSize, inputSize);
+
+ THTensor_(sigmoid)(gradInputfirstHalf, secondHalf);
+
+ TH_TENSOR_APPLY2(real, gradInputsecondHalf, real, gradInputfirstHalf,
+ real z = *gradInputfirstHalf_data;
+ *gradInputsecondHalf_data = (1. - z) * z;
+ );
+
+ THTensor_(cmul)(gradInputfirstHalf, gradInputfirstHalf, gradOutput);
+
+ THTensor_(cmul)(gradInputsecondHalf, gradInputsecondHalf, gradOutput);
+ THTensor_(cmul)(gradInputsecondHalf, gradInputsecondHalf, firstHalf);
+
+ THTensor_(free)(firstHalf);
+ THTensor_(free)(secondHalf);
+ THTensor_(free)(gradInputfirstHalf);
+ THTensor_(free)(gradInputsecondHalf);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/HardShrink.c b/contrib/lua-torch/nn/lib/THNN/generic/HardShrink.c
new file mode 100644
index 000000000..aaae85bac
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/HardShrink.c
@@ -0,0 +1,42 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/HardShrink.c"
+#else
+
+void THNN_(HardShrink_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal lambda_)
+{
+ real lambda = TH_CONVERT_ACCREAL_TO_REAL(lambda_);
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input,
+ if (*input_data > lambda)
+ *output_data = *input_data;
+ else if (*input_data < -lambda)
+ *output_data = *input_data;
+ else
+ *output_data = 0;
+ );
+}
+
+void THNN_(HardShrink_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ accreal lambda_)
+{
+ real lambda = TH_CONVERT_ACCREAL_TO_REAL(lambda_);
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ if (*input_data > lambda || *input_data < -lambda)
+ *gradInput_data = *gradOutput_data;
+ else
+ *gradInput_data = 0;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/HardTanh.c b/contrib/lua-torch/nn/lib/THNN/generic/HardTanh.c
new file mode 100644
index 000000000..589a66e15
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/HardTanh.c
@@ -0,0 +1,133 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/HardTanh.c"
+#else
+
+void THNN_(HardTanh_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal min_val_,
+ accreal max_val_,
+ bool inplace)
+{
+ real min_val = TH_CONVERT_ACCREAL_TO_REAL(min_val_);
+ real max_val = TH_CONVERT_ACCREAL_TO_REAL(max_val_);
+ if (inplace)
+ THTensor_(set)(output, input);
+ else
+ THTensor_(resizeAs)(output, input);
+
+ if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
+ {
+ if (inplace)
+ TH_TENSOR_APPLY(real, input,
+ if (*input_data < min_val)
+ *input_data = min_val;
+ else if (*input_data > max_val)
+ *input_data = max_val;
+ );
+ TH_TENSOR_APPLY2(real, output, real, input,
+ if (*input_data < min_val)
+ *output_data = min_val;
+ else if (*input_data <= max_val)
+ *output_data = *input_data;
+ else
+ *output_data = max_val;
+ );
+ }
+ else
+ {
+ real* ptr_input = THTensor_(data)(input);
+ real* ptr_output = THTensor_(data)(output);
+ ptrdiff_t i;
+ ptrdiff_t n = THTensor_(nElement)(input);
+
+ if (inplace)
+#pragma omp parallel for private(i)
+ for (i = 0; i < n; i++)
+ {
+ if (ptr_input[i] < min_val)
+ ptr_input[i] = min_val;
+ else if (ptr_input[i] > max_val)
+ ptr_input[i] = max_val;
+ }
+ else
+#pragma omp parallel for private(i)
+ for (i = 0; i < n; i++)
+ {
+ if (ptr_input[i] < min_val)
+ ptr_output[i] = min_val;
+ else if (ptr_input[i] <= max_val)
+ ptr_output[i] = ptr_input[i];
+ else
+ ptr_output[i] = max_val;
+ }
+ }
+}
+
+void THNN_(HardTanh_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ accreal min_val_,
+ accreal max_val_,
+ bool inplace)
+{
+ real min_val = TH_CONVERT_ACCREAL_TO_REAL(min_val_);
+ real max_val = TH_CONVERT_ACCREAL_TO_REAL(max_val_);
+
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ if (inplace)
+ THTensor_(set)(gradInput, gradOutput);
+ else
+ THTensor_(resizeAs)(gradInput, input);
+
+ if (input->nDimension == 1 ||
+ !THTensor_(isContiguous)(input) ||
+ !THTensor_(isContiguous)(gradOutput) ||
+ !THTensor_(isContiguous)(gradInput))
+ {
+ if (inplace)
+ {
+ TH_TENSOR_APPLY2(real, gradOutput, real, input,
+ if (*input_data <= min_val || *input_data >= max_val)
+ *gradOutput_data = 0;
+ );
+ }
+ else
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ if (*input_data <= min_val || *input_data >= max_val)
+ *gradInput_data = 0;
+ else
+ *gradInput_data = *gradOutput_data;
+ );
+ }
+ else
+ {
+ real* ptr_gradOutput = THTensor_(data)(gradOutput);
+ real* ptr_gradInput = THTensor_(data)(gradInput);
+ real* ptr_input = THTensor_(data)(input);
+ ptrdiff_t i;
+ ptrdiff_t n = THTensor_(nElement)(input);
+
+ if (inplace)
+#pragma omp parallel for private(i)
+ for (i = 0; i < n; i++)
+ {
+ if (ptr_input[i] <= min_val || ptr_input[i] >= max_val)
+ ptr_gradInput[i] = 0;
+ }
+ else
+#pragma omp parallel for private(i)
+ for (i = 0; i < n; i++)
+ {
+ if (ptr_input[i] <= min_val || ptr_input[i] >= max_val)
+ ptr_gradInput[i] = 0;
+ else
+ ptr_gradInput[i] = ptr_gradOutput[i];
+ }
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/IndexLinear.c b/contrib/lua-torch/nn/lib/THNN/generic/IndexLinear.c
new file mode 100644
index 000000000..42d8368ba
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/IndexLinear.c
@@ -0,0 +1,742 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/IndexLinear.c"
+#else
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+/* Threshold used to trigger multithreading */
+#ifndef THNN_SPARSE_OMP_THRESHOLD
+#define THNN_SPARSE_OMP_THRESHOLD 100000
+#endif
+
+/* Threshold used to trigger BLAS axpy call */
+#ifndef THNN_SPARSE_OUTDIM_THRESHOLD
+#define THNN_SPARSE_OUTDIM_THRESHOLD 49
+#endif
+
+/* sign MACRO */
+#ifndef THNN_INDEXLINEAR_SIGN
+#define THNN_INDEXLINEAR_SIGN(a) ( ( (a) < 0 ) ? -1 : ( (a) > 0 ) )
+#endif
+
+static bool THNN_(checkKeysValues)(THLongTensor* keys, THTensor* values)
+{
+ return THLongTensor_size(keys, 0) == THTensor_(nElement)(values)
+ && THTensor_(nDimension)(values) == 1
+ && THLongTensor_nDimension(keys) == 1;
+}
+
+void THNN_(IndexLinear_updateOutput)(
+ THNNState *state,
+ THLongTensor *keys,
+ long keysOffset,
+ THTensor *values,
+ THLongTensor *sizes,
+ THLongTensor *cumSumSizes,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *normalizedValues,
+ int train)
+{
+ /* Retrieve all the dimensions of the problem */
+ long batchSize = THLongTensor_size(sizes, 0);
+ long keysSize = THLongTensor_size(keys, 0);
+ long outDim = THTensor_(size)(bias, 0);
+ long woutDim = THTensor_(size)(weight, 1);
+ int maxNormalize = woutDim - outDim;
+ long* sizesData = THLongTensor_data(sizes);
+ long* cumSumSizesData = THLongTensor_data(cumSumSizes);
+
+ /* Define/resize the normalized values tensor if maxNormalize is > 0 */
+ real* normalizedValuesData = NULL;
+ if (maxNormalize)
+ {
+ THTensor_(resize1d)(normalizedValues, keysSize);
+ normalizedValuesData = THTensor_(data)(normalizedValues);
+ }
+
+ /* Resize the output */
+ THTensor_(resize2d)(output, batchSize, outDim);
+
+ /* Access the storage data/strides */
+ real* outputData = THTensor_(data)(output);
+ real* valuesData = THTensor_(data)(values);
+ real* weightData = THTensor_(data)(weight);
+ long weightStride0 = weight->stride[0];
+ real* biasData = THTensor_(data)(bias);
+ long* keysData = THLongTensor_data(keys);
+
+ /* Make sure these inputs are contiguous to accelerate computations */
+ THArgCheck(THLongTensor_isContiguous(keys), 1, "keys vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(values), 3, "values vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(output), 6, "output vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(weight), 7, "weight matrix must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(bias), 8, "bias vector must be contiguous");
+ THArgCheck(THNN_(checkKeysValues)(keys, values), 1, "Keys and values should have the same number of elements");
+ THArgCheck(THTensor_(isContiguous)(normalizedValues), 9, "normalizedValues vector must be contiguous");
+ long i,j,k;
+
+ /* Separate cases: output dimension is == 1, or > 1
+ * This allows for some optimizations. */
+ if (outDim == 1)
+ {
+ THVector_(fill)(outputData, *biasData, batchSize);
+ if (maxNormalize)
+ {
+ /* Parallelize on the batch itself */
+#pragma omp parallel \
+ for private(i,j) \
+ firstprivate(outDim, keysOffset, \
+ weightData, keysData, \
+ valuesData, outputData, \
+ cumSumSizesData, sizesData) \
+ schedule(static) \
+ if(keysSize*outDim > THNN_SPARSE_OMP_THRESHOLD && batchSize > 1)
+ for (j = 0; j < batchSize; j++)
+ {
+ real* loutputData = outputData + j;
+ real val = 0;
+ real absVal = 0;
+ long offset = j == 0 ? 0 : cumSumSizesData[j - 1];
+
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ long woffset = weightStride0*(keysData[offset] + keysOffset);
+ absVal = fabs(valuesData[offset]);
+ if (train)
+ {
+ if (absVal > weightData[woffset])
+ {
+ weightData[woffset] = absVal;
+ weightData[woffset+1] = 1/absVal;
+ }
+
+ /*
+ * The following can be used to scale the size of the updates
+ * depending on some rule, e.g. the frequency of a feature, ...
+ * This is used at update time.
+ * TODO: implement a smarter update scale.
+ */
+ weightData[woffset+2] = 1;
+ }
+ normalizedValuesData[offset] = (absVal > weightData[woffset] ? THNN_INDEXLINEAR_SIGN(valuesData[offset]):valuesData[offset]*weightData[woffset+1]) + weightData[woffset+3];
+ val += normalizedValuesData[offset] * weightData[woffset+maxNormalize];
+ offset++;
+ }
+ *loutputData += val;
+ }
+ }
+ else
+ {
+ /* Parallelize on the batch itself */
+#pragma omp parallel \
+ for private(i,j) \
+ firstprivate(outDim, weightData, \
+ keysData, valuesData, \
+ outputData, cumSumSizesData, \
+ sizesData) \
+ schedule(static) \
+ if(keysSize*outDim > THNN_SPARSE_OMP_THRESHOLD && batchSize > 1)
+ for (j = 0; j < batchSize; j++)
+ {
+ long offset = j == 0 ? 0 : cumSumSizesData[j - 1];
+ real* loutputData = outputData + j;
+ real val = 0;
+
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ val += weightData[weightStride0*(keysData[offset] + keysOffset)] * valuesData[offset];
+ offset++;
+ }
+ *loutputData += val;
+ }
+ }
+ }
+ else {
+#pragma omp parallel \
+ for private(i,j,k) \
+ firstprivate(outDim, weightData, \
+ keysData, valuesData, \
+ biasData, outputData, \
+ cumSumSizesData, sizesData) \
+ schedule(static) \
+ if(keysSize*outDim > THNN_SPARSE_OMP_THRESHOLD && batchSize > 1)
+ for (j = 0; j < batchSize; j++)
+ {
+ long offset = j == 0 ? 0 : cumSumSizesData[j - 1];
+ real val = 0;
+ real* loutputData = outputData + j*outDim;
+ real* lweightData = weightData;
+ memcpy(loutputData, biasData, outDim*sizeof(real));
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ real val;
+ long woffset = weightStride0*(keysData[offset] + keysOffset);
+ if (maxNormalize)
+ {
+ val = valuesData[offset];
+ real absVal = fabs(val);
+ if (train)
+ {
+ if (absVal > weightData[woffset])
+ {
+ weightData[woffset] = absVal;
+ weightData[woffset+1] = 1/absVal;
+ }
+
+ /*
+ * The following can be used to scale the size of the updates
+ * depending on some rule, e.g. the frequency of a feature, ...
+ * The commented section thereafter is just an example of what can be done:
+ *
+ *```
+ * weightData[woffset+2] = weightData[woffset+2]==0?1:(weightData[woffset+2] / (weightData[woffset+2] + 1));
+ * real alpha = 1;
+ * real beta = 0.01;
+ * real gamma = 1 - 0.000001;
+ * real l = weightData[woffset+2]==0?1/gamma:(weightData[woffset+2] - beta) / (alpha - beta);
+ * l = gamma*l;
+ * weightData[woffset+2] = (alpha-beta)*l + beta;
+ * ```
+ *
+ * TODO: implement a smarter update scale.
+ */
+ weightData[woffset+2] = 1;
+ }
+
+ /* Normalize + Clamp */
+ val = (absVal > weightData[woffset] ? THNN_INDEXLINEAR_SIGN(val):val*weightData[woffset+1]) + weightData[woffset+3];
+ normalizedValuesData[offset] = val;
+
+ lweightData = weightData + woffset + maxNormalize;
+ }
+ else
+ {
+ val = valuesData[offset];
+ lweightData = weightData + woffset;
+ }
+ if (outDim > THNN_SPARSE_OUTDIM_THRESHOLD)
+ {
+ THBlas_(axpy)(outDim, val, lweightData, 1, loutputData, 1);
+ }
+ else
+ {
+ for (k=0; k < outDim; k++)
+ {
+ loutputData[k] += lweightData[k] * val;
+ }
+ }
+ offset++;
+ }
+ }
+ }
+ return;
+}
+
+void THNN_(IndexLinear_updateParameters)(
+ THNNState *state,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *weight,
+ THTensor *bias,
+ THLongTensor *runningKeys,
+ THLongTensor *cumSumSizes,
+ long keysOffset,
+ accreal weightDecay_,
+ accreal learningRate_)
+{
+ real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_);
+ real learningRate = TH_CONVERT_ACCREAL_TO_REAL(learningRate_);
+ /* Retrieve all the dimensions of the problem */
+ long outDim = THTensor_(size)(bias, 0);
+ long woutDim = THTensor_(size)(weight, 1);
+ int maxNormalize = woutDim - outDim;
+ long keysSize = THLongTensor_size(runningKeys, 0);
+
+ /* Access the storage data/strides */
+ real* gradWeightData = THTensor_(data)(gradWeight);
+ real* weightData = THTensor_(data)(weight);
+ long weightStride0 = weight->stride[0];
+ real* gradBiasData = THTensor_(data)(gradBias);
+ real* biasData = THTensor_(data)(bias);
+ long* keysData = THLongTensor_data(runningKeys);
+
+ /* Make sure these inputs are contiguous to accelerate computations */
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 1, "gradWeight must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(gradBias), 2, "gradBias vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(weight), 3, "gradBias vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(bias), 4, "gradBias vector must be contiguous");
+ THArgCheck(THLongTensor_isContiguous(runningKeys), 5, "keys vector must be contiguous");
+
+ int j,k;
+ long offset = 0;
+
+ /* Update the bias first */
+ THVector_(cadd)(biasData, biasData, gradBiasData, -learningRate, outDim);
+
+ /* Separate cases: output dimension is == 1, or > 1
+ * This allows for some optimizations.
+ * No multithreading here as this could
+ * corrupt the results (hogwild style) */
+ if (outDim == 1)
+ {
+ if (maxNormalize)
+ {
+ if (weightDecay)
+ {
+ for (j = 0; j < keysSize; j++)
+ {
+ long woffset = weightStride0*(keysData[j] + keysOffset) + maxNormalize;
+ real lr = learningRate*weightData[woffset-2];
+ weightData[woffset-1] -= weightData[woffset]*gradWeightData[2*j]*lr;
+ weightData[woffset] -= gradWeightData[2*j+1]*lr - weightDecay * weightData[woffset-2] * weightData[woffset];
+ }
+ }
+ else
+ {
+ for (j = 0; j < keysSize; j++)
+ {
+ long woffset = weightStride0*(keysData[j] + keysOffset) + maxNormalize;
+ real lr = learningRate*weightData[woffset-2];
+ weightData[woffset-1] -= weightData[woffset]*gradWeightData[2*j]*lr;
+ weightData[woffset] -= gradWeightData[2*j+1]*lr;
+ }
+ }
+ }
+ else
+ {
+ if (weightDecay)
+ {
+ for (j = 0; j < keysSize; j++)
+ {
+ long woffset = weightStride0*(keysData[j] + keysOffset);
+ weightData[woffset] -= gradWeightData[j]*learningRate + weightDecay * weightData[woffset];
+ }
+ }
+ else
+ {
+ for (j = 0; j < keysSize; j++)
+ {
+ weightData[weightStride0*(keysData[j] + keysOffset)] -= gradWeightData[j]*learningRate;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j = 0; j < keysSize; j++)
+ {
+ real lr = learningRate;
+ real wd = weightDecay;
+ real* lweightData;
+ long woffset = weightStride0*(keysData[j] + keysOffset);
+ real* lgradWeightData = gradWeightData + j*outDim;
+ if (maxNormalize)
+ {
+ lgradWeightData += j*outDim;
+ /* weightData[woffset + 2] */
+ lweightData = weightData + woffset + maxNormalize - 2;
+ lr = lr*lweightData[0];
+ wd = weightDecay*lweightData[0];
+ /* weightData[woffset + 3] */
+ lweightData++;
+ for (k=0; k < outDim; k++)
+ {
+ lweightData[0] -= lgradWeightData[k]*lweightData[k+1]*lr;
+ }
+ lweightData++;
+ lgradWeightData += outDim;
+ }
+ else
+ {
+ lweightData = weightData + woffset;
+ }
+
+ /* We do sparse weight decay.
+ * We think it makes more sense. */
+ if (weightDecay)
+ {
+ for (k=0; k < outDim; k++)
+ {
+ lweightData[k] -= lweightData[k]*wd;
+ }
+ }
+
+ if (outDim > THNN_SPARSE_OUTDIM_THRESHOLD)
+ {
+ THBlas_(axpy)(outDim, -lr, lgradWeightData, 1, lweightData, 1);
+ }
+ else
+ {
+ for (k=0; k < outDim; k++)
+ {
+ lweightData[k] -= lgradWeightData[k]*lr;
+ }
+ }
+ }
+ }
+}
+
+
+void THNN_(IndexLinear_accUpdateGradParameters)(
+ THNNState *state,
+ THLongTensor *keys,
+ long keysOffset,
+ THTensor *values,
+ THLongTensor *sizes,
+ THLongTensor *cumSumSizes,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *bias,
+ accreal weightDecay_,
+ accreal scale_)
+{
+ real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_);
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ /* Retrieve all the dimensions of the problem */
+ long batchSize = THLongTensor_size(sizes, 0);
+ long keysSize = THLongTensor_size(keys, 0);
+ long outDim = THTensor_(size)(bias, 0);
+ long woutDim = THTensor_(size)(weight, 1);
+ int maxNormalize = woutDim - outDim;
+ THArgCheck(THNN_(checkKeysValues)(keys, values), 1, "Keys and values should have the same number of elements");
+
+ /* Access the storage data/strides */
+ real* gradOutputData = THTensor_(data)(gradOutput);
+ real* valuesData =THTensor_(data)(values);
+ real* weightData = THTensor_(data)(weight);
+ real* biasData = THTensor_(data)(bias);
+ long weightStride0 = weight->stride[0];
+ long biasStride = bias->stride[0];
+ long* keysData = THLongTensor_data(keys);
+ long* sizesData = THLongTensor_data(sizes);
+
+ /* Make sure these inputs are contiguous to accelerate computations */
+ THArgCheck(THLongTensor_isContiguous(keys), 1, "keys vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(values), 3, "values vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(gradOutput), 6, "gradOutput vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(weight), 7, "weight matrix must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(bias), 8, "bias matrix must be contiguous");
+
+ int i,j,k;
+
+ /* Separate cases: output dimension is == 1, or > 1
+ * This allows for some optimizations.
+ * No multithreading here as this could
+ * corrupt the results (hogwild style) */
+ if (outDim == 1)
+ {
+ if (maxNormalize)
+ {
+ long offset = 0;
+ for (j = 0; j < batchSize; j++)
+ {
+ real* lgradOutputData = gradOutputData + j;
+ *biasData -= *lgradOutputData * scale;
+ real val = *lgradOutputData * scale;
+ real* lweightData = weightData;
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ long idx = weightStride0*(keysData[offset] + keysOffset) + maxNormalize;
+ weightData[idx-1] -= weightData[idx]*val*weightData[idx-2];
+ weightData[idx] -= (val*valuesData[offset] - weightDecay * weightData[idx])*weightData[idx-2];
+ offset++;
+ }
+ }
+
+ offset = 0;
+ for (j = 0; j < batchSize; j++)
+ {
+ real* lweightData = weightData;
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ long idx = weightStride0*(keysData[offset] + keysOffset) + maxNormalize;
+ weightData[idx-2] = 0;
+ offset++;
+ }
+ }
+ }
+ else
+ {
+ if (weightDecay)
+ {
+ long offset = 0;
+ for (j = 0; j < batchSize; j++)
+ {
+ real* lgradOutputData = gradOutputData + j;
+ *biasData -= *lgradOutputData * scale;
+ real val = *lgradOutputData * scale;
+ real* lweightData = weightData;
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ long idx = weightStride0*(keysData[offset] + keysOffset);
+ weightData[idx] -= val * valuesData[offset] + weightData[idx] * weightDecay;
+ offset++;
+ }
+ }
+ }
+ else
+ {
+ long offset = 0;
+ for (j = 0; j < batchSize; j++)
+ {
+ real val = gradOutputData[j] * scale;
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ weightData[(keysData[offset] + keysOffset)*weightStride0] -= val * valuesData[offset];
+ offset++;
+ }
+ *biasData -= val;
+ }
+ }
+ }
+ }
+ else {
+ long offset = 0;
+ for (j = 0; j < batchSize; j++)
+ {
+ real val = 0;
+ real* lgradOutputData = gradOutputData + j*outDim;
+ real* lweightData = weightData;
+ THVector_(cadd)(biasData, biasData, lgradOutputData, -scale, outDim);
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ real val = valuesData[offset] * scale;
+ real wd = weightDecay;
+
+ // Max normalize case
+ if (maxNormalize)
+ {
+ lweightData = weightData + weightStride0*(keysData[offset] + keysOffset) + (maxNormalize-2);
+ val *= lweightData[0];
+ wd *= lweightData[0];
+ for (k=0; k < outDim; k++)
+ {
+ lweightData[1] -= lweightData[k+2]*scale*lgradOutputData[k]*lweightData[0];
+ }
+ lweightData += 2;
+ }
+ else
+ {
+ lweightData = weightData + weightStride0*(keysData[offset] + keysOffset);
+ }
+
+ /* We do sparse weight decay.
+ * We think it makes more sense. */
+ if (weightDecay)
+ {
+ if (outDim > THNN_SPARSE_OUTDIM_THRESHOLD)
+ {
+ THBlas_(axpy)(outDim, -wd, lweightData, 1, lweightData, 1);
+ }
+ else
+ {
+ for (k=0; k < outDim; k++)
+ {
+ lweightData[k] -= wd * lweightData[k];
+ }
+ }
+ }
+
+ if (outDim > THNN_SPARSE_OUTDIM_THRESHOLD)
+ {
+ THBlas_(axpy)(outDim, -val, lgradOutputData, 1, lweightData, 1);
+ }
+ else
+ {
+ for (k=0; k < outDim; k++)
+ {
+ lweightData[k] -= val * lgradOutputData[k];
+ }
+ }
+ offset++;
+ }
+ }
+
+ /* Max Normalize case:
+ * Reset the smart update scaling if
+ * one does it batch-wise.
+ * TODO: Decide what to do with that piece of code.
+ * NB: If the code belowe is uncommented, so should the commented
+ * code in IndexLinear:zeroGradParameters() */
+
+ /*
+ if (maxNormalize)
+ {
+ offset = 0;
+ for (j = 0; j < batchSize; j++)
+ {
+ real* lweightData = weightData;
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ real val = valuesData[offset] * scale;
+ real wd = weightDecay;
+
+ lweightData = weightData + weightStride0*(keysData[offset] + keysOffset) + (maxNormalize-2);
+ lweightData[0] = 0;
+ offset++;
+ }
+ }
+ }
+ */
+ }
+ return;
+}
+
+void THNN_(IndexLinear_accGradParameters)(
+ THNNState *state,
+ THLongTensor *keys,
+ long keysOffset,
+ THTensor *values,
+ THLongTensor *sizes,
+ THLongTensor *cumSumSizes,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *valuesBuffer,
+ accreal weightDecay_,
+ accreal scale_)
+{
+ real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_);
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ /* Retrieve all the dimensions of the problem */
+ long batchSize = THLongTensor_size(sizes, 0);
+ long keysSize = THLongTensor_size(keys, 0);
+ long outDim = THTensor_(size)(bias, 0);
+ long woutDim = THTensor_(size)(weight, 1);
+ long maxNormalize = (woutDim - outDim) > 0 ?1:0;
+ THArgCheck(THNN_(checkKeysValues)(keys, values), 1, "Keys and values should have the same number of elements");
+ long* sizesData = THLongTensor_data(sizes);
+
+ /* COmpute the cumulative sizes */
+ THLongTensor* cumSizes = THLongTensor_new();
+ THLongTensor_cumsum(cumSizes, sizes, 0);
+ long* cumSizesData = THLongTensor_data(cumSizes);
+
+ /* Resize the gradWeight buffer to keep it dense.
+ * That speeds up updates A LOT assuming random mem access. */
+ THTensor_(resize2d)(gradWeight, keysSize, outDim * (maxNormalize>0?2:1));
+
+ /* Access the storage data/strides */
+ real* gradOutputData = THTensor_(data)(gradOutput);
+ real* valuesData =THTensor_(data)(values);
+ real* gradWeightData = THTensor_(data)(gradWeight);
+ real* weightData = THTensor_(data)(weight);
+ real* gradBiasData = THTensor_(data)(gradBias);
+ long gradWeightStride0 = gradWeight->stride[0];
+ long weightStride0 = weight->stride[0];
+ long* keysData = THLongTensor_data(keys);
+
+ /* Make sure these inputs are contiguous to accelerate computations */
+ THArgCheck(THLongTensor_isContiguous(keys), 1, "keys vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(values), 3, "values vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(gradOutput), 6, "gradOutput vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 7, "gradWeight must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(gradBias), 8, "gradBias vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(weight), 9, "weight must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(bias), 10, "bias vector must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(valuesBuffer), 11, "valuesBuffer must be contiguous");
+
+ int i,j,k;
+
+ /* Separate cases: output dimension is == 1, or > 1
+ * This allows for some optimizations.
+ * No multithreading here as this could
+ * corrupt the results (hogwild style) */
+ if (outDim == 1)
+ {
+ for (j = 0; j < batchSize; j++)
+ {
+ long offset = j==0?0:cumSizesData[j-1];
+ real val = gradOutputData[j] * scale;
+ real* lgradWeightData = gradWeightData + offset;
+ real* lvaluesData = valuesData + offset;
+ long end = sizesData[j];
+
+ if (maxNormalize)
+ {
+ lgradWeightData += offset;
+ i = 0;
+ for(;i < end; i++)
+ {
+ lgradWeightData[2*i] = val;
+ lgradWeightData[2*i+1] = val * lvaluesData[i];
+ }
+ }
+ else
+ {
+ i = 0;
+ for(;i < end-4; i += 4)
+ {
+ lgradWeightData[i] = val * lvaluesData[i];
+ lgradWeightData[i+1] = val * lvaluesData[i+1];
+ lgradWeightData[i+2] = val * lvaluesData[i+2];
+ lgradWeightData[i+3] = val * lvaluesData[i+3];
+ }
+
+ for(; i < end; i++)
+ {
+ lgradWeightData[i] = val * lvaluesData[i];
+ }
+ }
+ *gradBiasData += val;
+ offset += end;
+ }
+ }
+ else {
+ for (j = 0; j < batchSize; j++)
+ {
+ long offset = j==0?0:cumSizesData[j-1];
+ real val = 0;
+ real* lgradOutputData = gradOutputData + j*outDim;
+ real* lgradWeightData = gradWeightData;
+ real* lweightData = weightData;
+ THVector_(cadd)(gradBiasData, gradBiasData, lgradOutputData, scale, outDim);
+ for (i = 0; i < sizesData[j]; i++)
+ {
+ real val = valuesData[offset] * scale;
+ lgradWeightData = gradWeightData + offset*outDim;
+ if (maxNormalize)
+ {
+ lgradWeightData += offset*outDim;
+ k = 0;
+ for(;k < outDim-4; k += 4)
+ {
+ lgradWeightData[k] = lgradOutputData[k]*scale;
+ lgradWeightData[k+1] = lgradOutputData[k+1]*scale;
+ lgradWeightData[k+2] = lgradOutputData[k+2]*scale;
+ lgradWeightData[k+3] = lgradOutputData[k+3]*scale;
+ }
+
+ for(; k < outDim; k++)
+ {
+ lgradWeightData[k] = lgradOutputData[k]*scale;
+ }
+ lgradWeightData += outDim;
+ }
+ k = 0;
+ for(;k < outDim-4; k += 4)
+ {
+ lgradWeightData[k] = val * lgradOutputData[k];
+ lgradWeightData[k+1] = val * lgradOutputData[k+1];
+ lgradWeightData[k+2] = val * lgradOutputData[k+2];
+ lgradWeightData[k+3] = val * lgradOutputData[k+3];
+ }
+
+ for(; k < outDim; k++)
+ {
+ lgradWeightData[k] = val * lgradOutputData[k];
+ }
+ offset++;
+ }
+ }
+ }
+ THLongTensor_free(cumSizes);
+ return;
+}
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/L1Cost.c b/contrib/lua-torch/nn/lib/THNN/generic/L1Cost.c
new file mode 100644
index 000000000..53940e894
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/L1Cost.c
@@ -0,0 +1,38 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/L1Cost.c"
+#else
+
+void THNN_(L1Cost_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output)
+{
+ THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
+ accreal sum = 0;
+
+ TH_TENSOR_APPLY(real, input,
+ sum += fabs(*input_data);
+ );
+
+ THTensor_(set1d)(output, 0, sum);
+}
+
+void THNN_(L1Cost_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput)
+{
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY2(real, gradInput, real, input,
+ if (*input_data > 0)
+ *gradInput_data = 1;
+ else if (*input_data < 0)
+ *gradInput_data = -1;
+ else
+ *gradInput_data = 0;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LeakyReLU.c b/contrib/lua-torch/nn/lib/THNN/generic/LeakyReLU.c
new file mode 100644
index 000000000..074047d83
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/LeakyReLU.c
@@ -0,0 +1,57 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LeakyReLU.c"
+#else
+
+void THNN_(LeakyReLU_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal negval_,
+ bool inplace)
+{
+ real negval = TH_CONVERT_ACCREAL_TO_REAL(negval_);
+ if (inplace)
+ {
+ TH_TENSOR_APPLY(real, input,
+ if (*input_data <= 0)
+ *input_data *= negval;
+ );
+ THTensor_(set)(output, input);
+ }
+ else
+ {
+ THTensor_(resizeAs)(output, input);
+ TH_TENSOR_APPLY2(real, output, real, input,
+ *output_data = *input_data > 0 ? *input_data : *input_data * negval;
+ );
+ }
+}
+
+void THNN_(LeakyReLU_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ accreal negval_,
+ bool inplace)
+{
+ real negval = TH_CONVERT_ACCREAL_TO_REAL(negval_);
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ if (inplace)
+ {
+ TH_TENSOR_APPLY2(real, gradOutput, real, input,
+ if (*input_data <= 0)
+ *gradOutput_data *= negval;
+ );
+ THTensor_(set)(gradInput, gradOutput);
+ }
+ else
+ {
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ *gradInput_data = *input_data > 0 ? *gradOutput_data : *gradOutput_data * negval;
+ );
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Linear.c b/contrib/lua-torch/nn/lib/THNN/generic/Linear.c
new file mode 100644
index 000000000..8c5cd115e
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/Linear.c
@@ -0,0 +1,114 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Linear.c"
+#else
+
+void THNN_(Linear_updateAddBuffer)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *addBuffer)
+{
+ long nframe = THTensor_(size)(input,0);
+ long nElement = THTensor_(nElement)(addBuffer);
+ if (nElement != nframe) {
+ THTensor_(resize1d)(addBuffer,nframe);
+ THTensor_(fill)(addBuffer,1.0);
+ }
+}
+
+void THNN_(Linear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *addBuffer)
+{
+ long dim = THTensor_(nDimension)(input);
+ if (dim == 1) {
+ THTensor_(resize1d)(output,THTensor_(size)(weight,0));
+ if (bias) {
+ THTensor_(copy)(output,bias);
+ }
+ else {
+ THTensor_(zero)(output);
+ }
+ THTensor_(addmv)(output,1,output,1,weight,input);
+ }
+ else if (dim == 2) {
+ long nframe = THTensor_(size)(input,0);
+ long nElement = THTensor_(nElement)(output);
+ THTensor_(resize2d)(output,nframe,THTensor_(size)(weight,0));
+ if (THTensor_(nElement)(output) != nElement) {
+ THTensor_(zero)(output);
+ }
+ THNN_(Linear_updateAddBuffer)(state,input,addBuffer);
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight,weight,0,1);
+ THTensor_(addmm)(output,0,output,1,input,tweight);
+ THTensor_(free)(tweight);
+ if (bias) {
+ THTensor_(addr)(output,1,output,1,addBuffer,bias);
+ }
+ }
+}
+
+void THNN_(Linear_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight)
+{
+ if (gradInput) {
+ long nElement = THTensor_(nElement)(gradInput);
+ THTensor_(resizeAs)(gradInput,input);
+ if (THTensor_(nElement)(gradInput) != nElement) {
+ THTensor_(zero)(gradInput);
+ }
+
+ long dim = THTensor_(nDimension)(input);
+ if (dim == 1) {
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight,weight,0,1);
+ THTensor_(addmv)(gradInput,0,gradInput,1,tweight,gradOutput);
+ THTensor_(free)(tweight);
+ }
+ else if (dim == 2) {
+ THTensor_(addmm)(gradInput,0,gradInput,1,gradOutput,weight);
+ }
+ }
+}
+
+void THNN_(Linear_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *addBuffer,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ long dim = THTensor_(nDimension)(input);
+ if (dim == 1) {
+ THTensor_(addr)(gradWeight,1,gradWeight,scale,gradOutput,input);
+ if (bias) {
+ THTensor_(cadd)(gradBias,gradBias,scale,gradOutput);
+ }
+ }
+ else if (dim == 2) {
+ THTensor *tgradOutput = THTensor_(new)();
+ THTensor_(transpose)(tgradOutput,gradOutput,0,1);
+ THTensor_(addmm)(gradWeight,1,gradWeight,scale,tgradOutput,input);
+ if (bias) {
+ THNN_(Linear_updateAddBuffer)(state,input,addBuffer);
+ THTensor_(addmv)(gradBias,1,gradBias,scale,tgradOutput,addBuffer);
+ }
+ THTensor_(free)(tgradOutput);
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LogSigmoid.c b/contrib/lua-torch/nn/lib/THNN/generic/LogSigmoid.c
new file mode 100644
index 000000000..651d56002
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/LogSigmoid.c
@@ -0,0 +1,36 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogSigmoid.c"
+#else
+
+void THNN_(LogSigmoid_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *buffer)
+{
+ THTensor_(resizeAs)(output, input);
+ THTensor_(resizeAs)(buffer, input);
+
+ TH_TENSOR_APPLY3(real, output, real, input, real, buffer,
+ real z = exp(-*input_data);
+ *buffer_data = z;
+ *output_data = -log(1. + z);
+ );
+}
+
+void THNN_(LogSigmoid_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *buffer)
+{
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ THTensor_(resizeAs)(gradInput, buffer);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer,
+ real z = *buffer_data;
+ *gradInput_data = *gradOutput_data * z / (1. + z);
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c b/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c
new file mode 100644
index 000000000..a7280422b
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/LogSoftMax.c
@@ -0,0 +1,137 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogSoftMax.c"
+#else
+
+void THNN_(LogSoftMax_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output)
+{
+ real *input_data, *output_data;
+ ptrdiff_t nframe = 0, dim = 0, stride = 0;
+ ptrdiff_t t, d;
+
+ if (input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ stride = 1;
+ }
+ else if (input->nDimension == 2)
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ stride = 1;
+ }
+ else if (input->nDimension == 3)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ stride = input->size[1]*input->size[2];
+ }
+ else if (input->nDimension == 4)
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ stride = input->size[2]*input->size[3];
+ }
+ else
+ THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected");
+
+ input = THTensor_(newContiguous)(input);
+ THTensor_(resizeAs)(output, input);
+
+ real *input_data0 = THTensor_(data)(input);
+ real *output_data0 = THTensor_(data)(output);
+
+ accreal logsum;
+ real maxInput;
+ #pragma omp parallel for private(t, d, maxInput, logsum, input_data, output_data)
+ for (t = 0; t < stride*nframe; t++)
+ {
+ logsum = 0;
+ maxInput = -THInf;
+ input_data = input_data0 + (t/stride)*dim*stride + t % stride;
+ output_data = output_data0 + (t/stride)*dim*stride + t % stride;
+
+ for (d = 0; d < dim; d++)
+ maxInput = THMax(maxInput, input_data[d*stride]);
+
+ for (d = 0; d < dim; d++)
+ logsum += exp(input_data[d*stride] - maxInput);
+ logsum = maxInput + log(logsum);
+
+ for (d = 0; d < dim; d++)
+ output_data[d*stride] = input_data[d*stride] - logsum;
+ }
+
+ THTensor_(free)(input);
+}
+
+void THNN_(LogSoftMax_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output)
+{
+ THNN_CHECK_SHAPE(input, gradOutput);
+ real *gradInput_data, *gradOutput_data, *output_data;
+ ptrdiff_t nframe = 0, dim = 0, stride = 0;
+ ptrdiff_t t, d;
+
+ if (output->nDimension == 1)
+ {
+ nframe = 1;
+ dim = output->size[0];
+ stride = 1;
+ }
+ else if (output->nDimension == 2)
+ {
+ nframe = output->size[0];
+ dim = output->size[1];
+ stride = 1;
+ }
+ else if (output->nDimension == 3)
+ {
+ nframe = 1;
+ dim = output->size[0];
+ stride = output->size[1]*output->size[2];
+ }
+ else if (output->nDimension == 4)
+ {
+ nframe = output->size[0];
+ dim = output->size[1];
+ stride = output->size[2]*output->size[3];
+ }
+ else
+ THError("1D, 2D, 3D or 4D tensor expected");
+
+ output = THTensor_(newContiguous)(output);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ THTensor_(resizeAs)(gradInput, output);
+ real *gradInput_data0 = THTensor_(data)(gradInput);
+ real *output_data0 = THTensor_(data)(output);
+ real *gradOutput_data0 = THTensor_(data)(gradOutput);
+ accreal sum;
+ #pragma omp parallel for private(t, sum, d, gradInput_data, output_data, gradOutput_data)
+ for (t = 0; t < stride*nframe; t++)
+ {
+ sum = 0;
+ gradInput_data = gradInput_data0 + (t/stride)*dim*stride + t % stride;
+ output_data = output_data0 + (t/stride)*dim*stride + t % stride;
+ gradOutput_data = gradOutput_data0 + (t/stride)*dim*stride + t % stride;
+
+ for (d = 0; d < dim; d++)
+ sum += gradOutput_data[d*stride];
+
+ for (d = 0; d < dim; d++)
+ gradInput_data[d*stride] = gradOutput_data[d*stride] - exp(output_data[d*stride])*sum;
+ }
+
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(output);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/LookupTable.c b/contrib/lua-torch/nn/lib/THNN/generic/LookupTable.c
new file mode 100644
index 000000000..46bc2c3c1
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/LookupTable.c
@@ -0,0 +1,225 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LookupTable.c"
+#else
+
+static void THNN_(LookupTable_resetCount)(
+ THInteger_t *count_data,
+ THIndexTensor *input)
+{
+ ptrdiff_t i;
+ THIndex_t *input_data = THIndexTensor_(data)(input);
+ ptrdiff_t numel = THIndexTensor_(nElement)(input);
+
+ for (i = 0; i<numel; i++)
+ {
+ long k = input_data[i] - TH_INDEX_BASE;
+ count_data[k] = 0;
+ }
+ for (i = 0; i<numel; i++)
+ {
+ long k = input_data[i] - TH_INDEX_BASE;
+ count_data[k]++;
+ }
+}
+
+void THNN_(LookupTable_accGradParameters)(
+ THNNState *state,
+ THIndexTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THIntegerTensor *count,
+ THTensor *sorted,
+ THIndexTensor *indices,
+ bool scaleGradByFreq,
+ int paddingValue,
+ accreal ascale)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(ascale);
+ ptrdiff_t i;
+ THInteger_t *count_data = NULL;
+
+ if (scaleGradByFreq)
+ {
+ THIntegerTensor_(resize1d)(count, gradWeight->size[0]);
+ count_data = THIntegerTensor_(data)(count);
+ }
+
+ if (!THTensor_(isContiguous)(gradWeight))
+ THError("gradWeight must be contiguous");
+ if (!THIndexTensor_(isContiguous)(input))
+ THError("input must be contiguous");
+ if (THIndexTensor_(nDimension)(input) != 1 && THIndexTensor_(nDimension)(input) != 2) {
+ THDescBuff s1 = THIndexTensor_(sizeDesc)(input);
+ THError("input must be a vector or matrix, but is of shape: %s", s1.str);
+ }
+
+ THIndex_t *input_data = THIndexTensor_(data)(input);
+ ptrdiff_t numel = THIndexTensor_(nElement)(input);
+ long numw = THTensor_(size)(gradWeight, 0);
+
+ // check that inputs are all within range
+ for (i=0; i<numel; i++)
+ if (input_data[i] < TH_INDEX_BASE || input_data[i] >= numw + TH_INDEX_BASE) {
+ THError("inputs need to be in the range %ld <= input < %ld, "
+ "but got input of value: %ld", TH_INDEX_BASE, (numw + TH_INDEX_BASE),
+ input_data[i]);
+ }
+
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ real *gw = THTensor_(data)(gradWeight);
+ real *go = THTensor_(data)(gradOutput);
+ long stride = THTensor_(stride)(gradWeight, 0);
+
+ if (count_data)
+ THNN_(LookupTable_resetCount)(count_data, input);
+
+#ifdef _OPENMP
+ if (numel > 1000)
+ {
+ // The strategy is to parallelize over sections of the vocabulary, so that
+ // thread 1 handles updates to gradWeight[0..nVocab/nThreads]. Every thread
+ // has to traverse the entire input, but the dominating factor is the axpy
+ // BLAS call.
+ #pragma omp parallel private(i)
+ {
+ int tid = omp_get_thread_num();
+ int nthreads = omp_get_num_threads();
+
+ long start = tid * (numw/nthreads + 1);
+ long end = start + (numw/nthreads + 1);
+ for (i=0; i<numel; i++)
+ {
+ if (input_data[i] != paddingValue)
+ {
+ long k = input_data[i] - TH_INDEX_BASE;
+ if (k >= start && k < end)
+ {
+ real scale_ = scale;
+ if (count_data) scale_ /= count_data[k];
+ THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
+ }
+ }
+ }
+ }
+
+ THTensor_(free)(gradOutput);
+ return;
+ }
+#endif
+
+ for (i=0; i<numel; i++)
+ {
+ if (input_data[i] != paddingValue)
+ {
+ long k = input_data[i] - TH_INDEX_BASE;
+ real scale_ = scale;
+ if (count_data) scale_ /= count_data[k];
+ THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
+ }
+ }
+
+ THTensor_(free)(gradOutput);
+}
+
+/*
+ * Keep the norm of weight smaller than maxNorm
+ */
+
+static void THNN_(LookupTable_renormRow)(
+ real *row_data,
+ long stride,
+ real maxNorm,
+ real normType)
+{
+ real norm = 0;
+ real new_norm;
+ long j;
+ for (j=0; j<stride; j++)
+ {
+ if (normType == 1) {
+ norm += fabs(row_data[j]);
+ } else if (normType == 2) {
+ norm += row_data[j] * row_data[j];
+ } else {
+ norm += pow(fabs(row_data[j]), normType);
+ }
+ }
+ norm = pow(norm, 1.0 / normType);
+ if (norm > maxNorm)
+ {
+ new_norm = maxNorm / (norm + 1e-7);
+ for (j=0; j<stride; j++) {
+ row_data[j] *= new_norm;
+ }
+ }
+}
+
+static int THNN_(compare_THIndex)(const void* a, const void* b)
+{
+ return *(const THIndex_t*)a < *(const THIndex_t*)b ? -1 : 1;
+}
+
+void THNN_(LookupTable_renorm)(
+ THNNState *state,
+ THIndexTensor *idx,
+ THTensor *weight,
+ accreal maxNorm_,
+ accreal normType_)
+{
+ real maxNorm = TH_CONVERT_ACCREAL_TO_REAL(maxNorm_);
+ real normType = TH_CONVERT_ACCREAL_TO_REAL(normType_);
+ if (!THTensor_(isContiguous)(weight))
+ THError("weight must be contiguous");
+ if (!THIndexTensor_(isContiguous)(idx))
+ THError("input must be contiguous");
+ if (THIndexTensor_(nDimension)(idx) != 1)
+ THError("idx must be a vector");
+ if (normType <= 0)
+ THError("non-positive-norm not supported");
+
+ ptrdiff_t i;
+ THIndex_t *row_idx = THIndexTensor_(data)(idx);
+ ptrdiff_t numel = THIndexTensor_(nElement)(idx);
+
+ long numw = THTensor_(size)(weight, 0);
+ long stride = THTensor_(stride)(weight, 0);
+ real *gw = THTensor_(data)(weight);
+ for (i=0; i<numel; i++) {
+ if (row_idx[i] < TH_INDEX_BASE || row_idx[i] >= numw + TH_INDEX_BASE) {
+ THError("input need to be in the range %ld <= input < %ld, "
+ "but got input of value: %ld", TH_INDEX_BASE, (numw + TH_INDEX_BASE),
+ row_idx[i]);
+ }
+ }
+ // get unique indices
+ qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
+ ptrdiff_t ptr = 0;
+ for (i=0; i<numel; i++)
+ if (i == 0 || row_idx[i] != row_idx[i-1])
+ row_idx[ptr++] = row_idx[i];
+ numel = ptr;
+
+#ifdef _OPENMP
+ if (numel > 1000)
+ {
+ // The strategy is to parallelize over the rows that appear in
+ // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads].
+ // This distributes the work evenly to each thread.
+ #pragma omp parallel for private(i)
+ for (i=0; i<numel; i++)
+ {
+ long k = row_idx[i] - TH_INDEX_BASE;
+ THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
+ }
+ return;
+ }
+#endif
+ for (i=0; i<numel; i++)
+ {
+ long k = row_idx[i] - TH_INDEX_BASE;
+ THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/MSECriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/MSECriterion.c
new file mode 100644
index 000000000..58911f6f0
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/MSECriterion.c
@@ -0,0 +1,45 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MSECriterion.c"
+#else
+
+void THNN_(MSECriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
+
+ real sum = 0;
+
+ TH_TENSOR_APPLY2(real, input, real, target,
+ real z = (*input_data - *target_data);
+ sum += z*z;
+ );
+
+ if (sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ THTensor_(set1d)(output, 0, sum);
+}
+
+void THNN_(MSECriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+
+ real norm = (sizeAverage ? 2./((real)THTensor_(nElement)(input)) : 2.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ *gradInput_data = norm * (*input_data - *target_data);
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/MarginCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/MarginCriterion.c
new file mode 100644
index 000000000..d6d9b60b9
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/MarginCriterion.c
@@ -0,0 +1,47 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MarginCriterion.c"
+#else
+
+void THNN_(MarginCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage,
+ accreal margin_)
+{
+ real margin = TH_CONVERT_ACCREAL_TO_REAL(margin_);
+ THNN_CHECK_NELEMENT(input, target);
+ THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
+ real sum = 0;
+
+ TH_TENSOR_APPLY2(real, input, real, target,
+ real z = (margin - *input_data * *target_data);
+ sum += z>0 ? z : 0;
+ );
+
+ if (sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ THTensor_(set1d)(output, 0, sum);
+}
+
+void THNN_(MarginCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage,
+ accreal margin_)
+{
+ real margin = TH_CONVERT_ACCREAL_TO_REAL(margin_);
+ THNN_CHECK_NELEMENT(input, target);
+ real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ *gradInput_data = (*input_data * *target_data) < margin ? -norm * *target_data : 0;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/MultiLabelMarginCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/MultiLabelMarginCriterion.c
new file mode 100644
index 000000000..16398c13c
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/MultiLabelMarginCriterion.c
@@ -0,0 +1,184 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MultiLabelMarginCriterion.c"
+#else
+
+// TODO: improve error messages
+void THNN_(MultiLabelMarginCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *output,
+ THTensor *isTarget,
+ bool sizeAverage)
+{
+ real *input_data, *isTarget_data;
+ THIndex_t *target_data;
+ long nframe, dim;
+ long t, d, dt, ddt;
+ real sum;
+
+ THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2,
+ "vector or matrix expected");
+
+ if (input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3,
+ "inconsistent target size");
+ }
+ else
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ THArgCheck((target->nDimension == 2) && (target->size[0] == nframe)
+ && (target->size[1] == dim), 3, "inconsistent target size");
+ }
+
+ THArgCheck(THIndexTensor_(minall)(target) >= -1+TH_INDEX_BASE, 3, "target out of range");
+ THArgCheck(THIndexTensor_(maxall)(target) < dim+TH_INDEX_BASE, 3, "target out of range");
+
+ target = THIndexTensor_(newContiguous)(target);
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+ target_data = THIndexTensor_(data)(target);
+
+ THNN_resizeAs_indices(isTarget, target);
+ THTensor_(zero)(isTarget);
+ isTarget_data = THTensor_(data)(isTarget);
+
+ sum = 0;
+ for (t = 0; t < nframe; t++)
+ {
+ for (ddt = 0; ddt < dim; ddt++)
+ {
+ THIndex_t target_idx = target_data[ddt] - TH_INDEX_BASE;
+ if (target_idx < 0)
+ break;
+ isTarget_data[target_idx] = 1;
+ }
+ for (dt = 0; dt < dim; dt++)
+ {
+ THIndex_t target_idx = target_data[dt] - TH_INDEX_BASE;
+ real input_target;
+ if (target_idx < 0)
+ break;
+
+ input_target = input_data[target_idx];
+ for (d = 0; d < dim; d++)
+ {
+ if (!isTarget_data[d])
+ {
+ real z = 1 - input_target + input_data[d];
+ if (z > 0)
+ sum += z;
+ }
+ }
+ }
+ input_data += dim;
+ target_data += dim;
+ isTarget_data += dim;
+ }
+
+ sum /= dim;
+ if (sizeAverage)
+ sum /= nframe;
+
+ THTensor_(set1d)(output, 0, sum);
+
+ THTensor_(free)(input);
+ THIndexTensor_(free)(target);
+}
+
+void THNN_(MultiLabelMarginCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *gradInput,
+ THTensor *isTarget,
+ bool sizeAverage)
+{
+ real *input_data;
+ real *gradInput_data;
+ THIndex_t *target_data;
+ real *isTarget_data;
+ long nframe, dim;
+ long t, d, dt;
+ real g;
+
+ THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2,
+ "vector or matrix expected");
+
+ if (input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3,
+ "inconsistent target size");
+ THArgCheck((isTarget->nDimension == 1) && (isTarget->size[0] == dim), 3,
+ "inconsistent isTarget size");
+ }
+ else
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ THArgCheck((target->nDimension == 2) && (target->size[0] == nframe)
+ && (target->size[1] == dim), 3, "inconsistent target size");
+ THArgCheck((isTarget->nDimension == 2) && (isTarget->size[0] == nframe)
+ && (isTarget->size[1] == dim), 3, "inconsistent isTarget size");
+ }
+
+ THArgCheck(THIndexTensor_(minall)(target) >= -1+TH_INDEX_BASE, 3, "target out of range");
+ THArgCheck(THIndexTensor_(maxall)(target) < dim+TH_INDEX_BASE, 3, "target out of range");
+
+ THArgCheck(THTensor_(minall)(isTarget) >= 0, 3, "isTarget out of range");
+ THArgCheck(THTensor_(maxall)(isTarget) <= 1, 3, "isTarget out of range");
+
+ target = THIndexTensor_(newContiguous)(target);
+ input = THTensor_(newContiguous)(input);
+ isTarget = THTensor_(newContiguous)(isTarget);
+ input_data = THTensor_(data)(input);
+ target_data = THIndexTensor_(data)(target);
+ isTarget_data = THTensor_(data)(isTarget);
+
+ g = sizeAverage ? ( 1./((real)(nframe*dim)) ) : ( 1./((real)dim) );
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+ gradInput_data = THTensor_(data)(gradInput);
+
+ for (t = 0; t < nframe; t++)
+ {
+ for (dt = 0; dt < dim; dt++)
+ {
+ THIndex_t target_idx = target_data[dt] - TH_INDEX_BASE;
+ real input_target;
+ if (target_idx < 0)
+ break;
+
+ input_target = input_data[target_idx];
+ for (d = 0; d < dim; d++)
+ {
+ if (!isTarget_data[d])
+ {
+ real z = 1 - input_target + input_data[d];
+ if (z > 0)
+ {
+ gradInput_data[target_idx] -= g;
+ gradInput_data[d] += g;
+ }
+ }
+ }
+ }
+ input_data += dim;
+ target_data += dim;
+ isTarget_data += dim;
+ gradInput_data += dim;
+ }
+
+ THTensor_(free)(input);
+ THIndexTensor_(free)(target);
+ THTensor_(free)(isTarget);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/MultiMarginCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/MultiMarginCriterion.c
new file mode 100644
index 000000000..2f8f8ff58
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/MultiMarginCriterion.c
@@ -0,0 +1,168 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MultiMarginCriterion.c"
+#else
+
+// TODO: improve error messages
+void THNN_(MultiMarginCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *output,
+ bool sizeAverage,
+ int p,
+ THTensor *weights,
+ accreal margin_)
+{
+ real margin = TH_CONVERT_ACCREAL_TO_REAL(margin_);
+ real *input_data, *weights_data;
+ THIndex_t *target_data;
+ long nframe, dim;
+ long t, d;
+ real sum;
+
+ THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2,
+ "vector or matrix expected");
+
+ if (input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ }
+ else
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3,
+ "inconsistent target size");
+ }
+
+ for (t = 0; t < nframe; t++)
+ {
+ THIndex_t idx = THIndexTensor_(get1d)(target, t);
+ THArgCheck((idx >= TH_INDEX_BASE) && (idx < dim + TH_INDEX_BASE), 3,
+ "target out of range");
+ }
+
+ input = THTensor_(newContiguous)(input);
+ target = THIndexTensor_(newContiguous)(target);
+ weights = weights ? THTensor_(newContiguous)(weights) : NULL;
+ input_data = THTensor_(data)(input);
+ target_data = THIndexTensor_(data)(target);
+ weights_data = weights ? THTensor_(data)(weights) : NULL;
+
+ sum = 0;
+ for (t = 0; t < nframe; t++)
+ {
+ THIndex_t target_idx = target_data[t] - TH_INDEX_BASE;
+ real input_target = input_data[target_idx];
+ for (d = 0; d < dim; d++)
+ {
+ real z = margin - input_target + input_data[d];
+ if (d == target_idx)
+ continue;
+
+ if (z > 0) {
+ real h = (p==1) ? z : z*z;
+ if(weights_data)
+ h *= weights_data[target_idx];
+ sum += h;
+ }
+ }
+ input_data += dim;
+ }
+
+ sum /= dim;
+ if(sizeAverage)
+ sum /= nframe;
+
+ THTensor_(set1d)(output, 0, sum);
+
+ THTensor_(free)(input);
+ THIndexTensor_(free)(target);
+ if(weights)
+ THTensor_(free)(weights);
+}
+
+void THNN_(MultiMarginCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage,
+ int p,
+ THTensor *weights,
+ accreal margin_)
+{
+ real margin = TH_CONVERT_ACCREAL_TO_REAL(margin_);
+ real *input_data;
+ real *gradInput_data;
+ THIndex_t *target_data;
+ real *weights_data;
+ long nframe, dim;
+ long t, d;
+ real g;
+
+ THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2,
+ "vector or matrix expected");
+
+ if (input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ }
+ else
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3,
+ "inconsistent target size");
+ }
+
+ g = (sizeAverage ? 1./((real)(nframe*dim)) : 1./((real)dim));
+
+ input = THTensor_(newContiguous)(input);
+ target = THIndexTensor_(newContiguous)(target);
+ input_data = THTensor_(data)(input);
+
+ THTensor_(resizeAs)(gradInput, input);
+ gradInput_data = THTensor_(data)(gradInput);
+
+ target_data = THIndexTensor_(data)(target);
+ weights = weights ? THTensor_(newContiguous)(weights) : NULL;
+ weights_data = weights ? THTensor_(data)(weights) : NULL;
+
+ for (t = 0; t < nframe; t++)
+ {
+ THIndex_t target_idx = target_data[t] - TH_INDEX_BASE;
+ real input_target = input_data[target_idx];
+ real gradInput_target = 0;
+ for (d = 0; d < dim; d++)
+ {
+ real z = margin - input_target + input_data[d];
+ if (d == target_idx)
+ continue;
+
+ if (z > 0)
+ {
+ real h = (p == 1) ? g : 2*g*z;
+ if(weights_data)
+ h *= weights_data[target_idx];
+ gradInput_target -= h;
+ gradInput_data[d] = h;
+ }
+ else
+ gradInput_data[d] = 0;
+ }
+ gradInput_data[target_idx] = gradInput_target;
+
+ input_data += dim;
+ gradInput_data += dim;
+ }
+
+ THTensor_(free)(input);
+ THIndexTensor_(free)(target);
+ if(weights)
+ THTensor_(free)(weights);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/PReLU.c b/contrib/lua-torch/nn/lib/THNN/generic/PReLU.c
new file mode 100644
index 000000000..488322fde
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/PReLU.c
@@ -0,0 +1,207 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/PReLU.c"
+#else
+
+void THNN_(PReLU_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THIndex_t nOutputPlane)
+{
+ THTensor_(resizeAs)(output, input);
+
+ if (nOutputPlane == 0)
+ {
+ // handle shared parameter case
+ real w = *THTensor_(data)(weight);
+ TH_TENSOR_APPLY2(real, output, real, input,
+ *output_data = (*input_data > 0) ? *input_data : w*(*input_data);
+ );
+ }
+ else
+ {
+ input = THTensor_(newContiguous)(input);
+ long bs = 1, ks = 1;
+ {
+ long input_ndim = THTensor_(nDimension)(input);
+ if (input->size[input_ndim > 1] != nOutputPlane)
+ THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
+
+ if (input_ndim > 1) {
+ bs = input->size[0];
+ for (int d = 2; d < input_ndim; d++) {
+ ks *= input->size[d];
+ }
+ }
+ }
+
+ real *output_data = THTensor_(data)(output);
+ real *input_data = THTensor_(data)(input);
+ real *weight_data = THTensor_(data)(weight);
+ THIndex_t i, j, k;
+#pragma omp parallel for private(j,k)
+ for (i = 0; i < bs; ++i)
+ {
+ real* n_input_data = input_data + i*nOutputPlane*ks;
+ real* n_output_data = output_data + i*nOutputPlane*ks;
+ for (j = 0; j < nOutputPlane; ++j)
+ {
+ for (k = 0; k < ks; ++k)
+ n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : weight_data[j] * n_input_data[k];
+ n_input_data += ks;
+ n_output_data += ks;
+ }
+ }
+ THTensor_(free)(input);
+ }
+}
+
+void THNN_(PReLU_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THIndex_t nOutputPlane)
+{
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ THTensor_(resizeAs)(gradInput, input);
+
+ if (nOutputPlane == 0)
+ {
+ real w = THTensor_(data)(weight)[0];
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ if ((*input_data) > 0)
+ *gradInput_data = *gradOutput_data;
+ else
+ *gradInput_data = w * (*gradOutput_data);
+ );
+ }
+ else
+ {
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ weight = THTensor_(newContiguous)(weight);
+ const real *input_data = THTensor_(data)(input);
+ const real *gradOutput_data = THTensor_(data)(gradOutput);
+ const real *weight_data = THTensor_(data)(weight);
+ real *gradInput_data = THTensor_(data)(gradInput);
+
+ long bs = 1, ks = 1;
+ {
+ long input_ndim = THTensor_(nDimension)(input);
+ if (input->size[input_ndim > 1] != nOutputPlane)
+ THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
+
+ if (input_ndim > 1) {
+ bs = input->size[0];
+ for (int d = 2; d < input_ndim; d++) {
+ ks *= input->size[d];
+ }
+ }
+ }
+
+ THIndex_t i, j, k;
+#pragma omp parallel for private(j,k)
+ for (i = 0; i < bs; ++i)
+ {
+ const real *n_input_data = input_data + i*nOutputPlane*ks;
+ const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
+ real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks;
+
+ for (j = 0; j < nOutputPlane; ++j)
+ {
+ real w = weight_data[j];
+ for (k = 0; k < ks; ++k)
+ {
+ if (n_input_data[k] > 0)
+ n_gradInput_data[k] = n_gradOutput_data[k];
+ else
+ n_gradInput_data[k] = n_gradOutput_data[k] * w;
+ }
+ n_input_data += ks;
+ n_gradInput_data += ks;
+ n_gradOutput_data += ks;
+ }
+ }
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+ }
+}
+
+void THNN_(PReLU_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradWeight,
+ THTensor *gradWeightBuf,
+ THTensor *gradWeightBuf2,
+ THIndex_t nOutputPlane,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THNN_CHECK_NELEMENT(input, gradOutput);
+
+ if (nOutputPlane == 0)
+ {
+ real *gradWeight_data = THTensor_(data)(gradWeight);
+ real sum = 0;
+ TH_TENSOR_APPLY2(real, input, real, gradOutput,
+ if ((*input_data) <= 0)
+ sum += (*input_data) * (*gradOutput_data);
+ );
+ gradWeight_data[0] += scale * sum;
+ }
+ else
+ {
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 6, "gradWeight needs to be contiguous");
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ weight = THTensor_(newContiguous)(weight);
+ long bs = 1, ks = 1;
+ {
+ long input_ndim = THTensor_(nDimension)(input);
+ if (input->size[input_ndim > 1] != nOutputPlane)
+ THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
+
+ if (input_ndim > 1) {
+ bs = input->size[0];
+ for (int d = 2; d < input_ndim; d++) {
+ ks *= input->size[d];
+ }
+ }
+ }
+
+ const real *input_data = THTensor_(data)(input);
+ const real *gradOutput_data = THTensor_(data)(gradOutput);
+ const real *weight_data = THTensor_(data)(weight);
+ real *gradWeight_data = THTensor_(data)(gradWeight);
+
+ THIndex_t i, j, k;
+ for (i = 0; i < bs; ++i)
+ {
+ const real *n_input_data = input_data + i*nOutputPlane*ks;
+ const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
+
+ for (j = 0; j < nOutputPlane; ++j)
+ {
+ real sum = 0;
+ for (k = 0; k < ks; ++k)
+ if (n_input_data[k] <= 0)
+ sum += n_gradOutput_data[k] * n_input_data[k];
+ gradWeight_data[j] += scale * sum;
+ n_input_data += ks;
+ n_gradOutput_data += ks;
+ }
+ }
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/RReLU.c b/contrib/lua-torch/nn/lib/THNN/generic/RReLU.c
new file mode 100644
index 000000000..8fd46d3c2
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/RReLU.c
@@ -0,0 +1,132 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/RReLU.c"
+#else
+
+void THNN_(RReLU_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *noise,
+ accreal lower_,
+ accreal upper_,
+ bool train,
+ bool inplace,
+ THGenerator *generator)
+{
+ real lower = TH_CONVERT_ACCREAL_TO_REAL(lower_);
+ real upper = TH_CONVERT_ACCREAL_TO_REAL(upper_);
+ if (train)
+ {
+ // get default random generator
+ THTensor_(resizeAs)(noise, input);
+ if (inplace)
+ {
+ TH_TENSOR_APPLY2(real, input, real, noise,
+ if (*input_data <= 0)
+ {
+ const real r = (real)THRandom_uniform(generator, lower, upper);
+ *input_data = (*input_data) * r;
+ *noise_data = r;
+ }
+ else
+ {
+ *noise_data = 1;
+ }
+ );
+ THTensor_(set)(output, input);
+ }
+ else
+ {
+ THTensor_(resizeAs)(output, input);
+ TH_TENSOR_APPLY3(real, input, real, output, real, noise,
+ if (*input_data <= 0)
+ {
+ const real r = (real)THRandom_uniform(generator, lower, upper);
+ *output_data = (*input_data) * r;
+ *noise_data = r;
+ }
+ else
+ {
+ *output_data = *input_data;
+ *noise_data = 1;
+ }
+ );
+ }
+ }
+ else
+ {
+ const real negSlope = (lower + upper) / 2;
+ if (inplace)
+ {
+ TH_TENSOR_APPLY(real, input,
+ if (*input_data <= 0)
+ {
+ *input_data = *input_data * negSlope;
+ }
+ );
+ THTensor_(set)(output, input);
+ }
+ else
+ {
+ THTensor_(resizeAs)(output, input);
+ TH_TENSOR_APPLY2(real, input, real, output,
+ const real r = (*input_data) <= 0 ? negSlope : 1;
+ *output_data = *input_data * r;
+ );
+ }
+ }
+}
+
+void THNN_(RReLU_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *noise,
+ accreal lower_,
+ accreal upper_,
+ bool train,
+ bool inplace)
+{
+ real lower = TH_CONVERT_ACCREAL_TO_REAL(lower_);
+ real upper = TH_CONVERT_ACCREAL_TO_REAL(upper_);
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ if (train && upper - lower > 1E-6) // e.g. if upper == lower, RReLU behaves like LeakyReLU
+ {
+ // multiply the gradient by the noise tensor
+ if (inplace)
+ {
+ THTensor_(cmul)(gradOutput, gradOutput, noise);
+ THTensor_(set)(gradInput, gradOutput);
+ }
+ else
+ {
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(cmul)(gradInput, gradOutput, noise);
+ }
+ }
+ else
+ {
+ // use constant factor for negative input values
+ const real negSlope = (lower + upper) / 2;
+ if (inplace)
+ {
+ TH_TENSOR_APPLY2(real, gradOutput, real, input,
+ if (*input_data <= 0)
+ {
+ *gradOutput_data = (*gradOutput_data) * negSlope;
+ }
+ );
+ THTensor_(set)(gradInput, gradOutput);
+ }
+ else
+ {
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ *gradInput_data = (*input_data) <= 0 ? (*gradOutput_data) * negSlope : (*gradOutput_data);
+ );
+ }
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Sigmoid.c b/contrib/lua-torch/nn/lib/THNN/generic/Sigmoid.c
new file mode 100644
index 000000000..17fb2cb4d
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/Sigmoid.c
@@ -0,0 +1,28 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Sigmoid.c"
+#else
+
+void THNN_(Sigmoid_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output)
+{
+ THTensor_(sigmoid)(output, input);
+}
+
+void THNN_(Sigmoid_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output)
+{
+ THNN_CHECK_NELEMENT(output, gradOutput);
+ THTensor_(resizeAs)(gradInput, output);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
+ real z = *output_data;
+ *gradInput_data = *gradOutput_data * (1. - z) * z;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SmoothL1Criterion.c b/contrib/lua-torch/nn/lib/THNN/generic/SmoothL1Criterion.c
new file mode 100644
index 000000000..d1928d11c
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SmoothL1Criterion.c
@@ -0,0 +1,49 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SmoothL1Criterion.c"
+#else
+
+void THNN_(SmoothL1Criterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
+
+ real sum = 0;
+ TH_TENSOR_APPLY2(real, input, real, target,
+ real z = fabs(*input_data - *target_data);
+ sum += z < 1 ? 0.5*z*z : z - 0.5;
+ );
+
+ if (sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ THTensor_(set1d)(output, 0, sum);
+}
+
+void THNN_(SmoothL1Criterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ real x = *input_data - *target_data;
+ if (x < -1.)
+ *gradInput_data = - norm;
+ else if (x > 1.)
+ *gradInput_data = norm;
+ else
+ *gradInput_data = norm * x;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SoftMarginCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/SoftMarginCriterion.c
new file mode 100644
index 000000000..bac0a3b53
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SoftMarginCriterion.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftMarginCriterion.c"
+#else
+
+void THNN_(SoftMarginCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
+
+ real sum;
+
+ sum = 0;
+ TH_TENSOR_APPLY2(real, input, real, target,
+ real z = log(1. + exp(-*input_data* *target_data));
+ sum += z;)
+
+ if(sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ THTensor_(set1d)(output, 0, sum);
+}
+
+void THNN_(SoftMarginCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage)
+{
+ THNN_CHECK_NELEMENT(input, target);
+ real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ real z = exp(-*target_data * *input_data);
+ *gradInput_data = -norm*(*target_data)*z/(1. + z);)
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SoftMax.c b/contrib/lua-torch/nn/lib/THNN/generic/SoftMax.c
new file mode 100644
index 000000000..7b60d64c2
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SoftMax.c
@@ -0,0 +1,150 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftMax.c"
+#else
+
+void THNN_(SoftMax_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output)
+{
+ real *input_data, *output_data;
+ ptrdiff_t nframe = 0, dim = 0, stride = 0;
+ ptrdiff_t t;
+
+ if (input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ stride = 1;
+ }
+ else if (input->nDimension == 2)
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ stride = 1;
+ }
+ else if (input->nDimension == 3)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ stride = input->size[1]*input->size[2];
+ }
+ else if (input->nDimension == 4)
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ stride = input->size[2]*input->size[3];
+ }
+ else
+ {
+ THArgCheck(0, 2, "1D, 2D, 3D or 4D tensor expected");
+ }
+
+ input = THTensor_(newContiguous)(input);
+ THTensor_(resizeAs)(output, input);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(t)
+ for (t = 0; t < stride*nframe; t++)
+ {
+ real *input_ptr = input_data + (t/stride)*dim*stride + t % stride;
+ real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
+
+ real inputMax = -THInf;
+ accreal sum;
+
+ ptrdiff_t d;
+ for (d = 0; d < dim; d++)
+ {
+ if (input_ptr[d*stride] >= inputMax) inputMax = input_ptr[d*stride];
+ }
+
+ sum = 0;
+ for (d = 0; d < dim; d++)
+ {
+ real z = exp(input_ptr[d*stride] - inputMax);
+ output_ptr[d*stride] = z;
+ sum += z;
+ }
+
+ for (d = 0; d < dim; d++)
+ {
+ output_ptr[d*stride] *= 1/sum;
+ }
+ }
+
+ THTensor_(free)(input);
+}
+
+void THNN_(SoftMax_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output)
+{
+ THNN_CHECK_SHAPE(input, gradOutput);
+ real *gradInput_data, *gradOutput_data, *output_data;
+ ptrdiff_t nframe = 0, dim = 0, stride = 0;
+ ptrdiff_t t;
+
+ if (output->nDimension == 1)
+ {
+ nframe = 1;
+ dim = output->size[0];
+ stride = 1;
+ }
+ else if (output->nDimension == 2)
+ {
+ nframe = output->size[0];
+ dim = output->size[1];
+ stride = 1;
+ }
+ else if (output->nDimension == 3)
+ {
+ nframe = 1;
+ dim = output->size[0];
+ stride = output->size[1]*output->size[2];
+ }
+ else if (output->nDimension == 4)
+ {
+ nframe = output->size[0];
+ dim = output->size[1];
+ stride = output->size[2]*output->size[3];
+ }
+ else
+ {
+ THError("1D, 2D, 3D or 4D tensor expected");
+ }
+
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ output = THTensor_(newContiguous)(output);
+
+ THTensor_(resizeAs)(gradInput, output);
+ gradInput_data = THTensor_(data)(gradInput);
+ output_data = THTensor_(data)(output);
+ gradOutput_data = THTensor_(data)(gradOutput);
+
+#pragma omp parallel for private(t)
+ for (t = 0; t < stride*nframe; t++)
+ {
+ real *gradInput_ptr = gradInput_data + (t/stride)*dim*stride + t % stride;
+ real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
+ real *gradOutput_ptr = gradOutput_data + (t/stride)*dim*stride + t % stride;
+
+ ptrdiff_t d;
+ accreal sum = 0;
+ for (d = 0; d < dim; d++)
+ sum += (accreal)gradOutput_ptr[d*stride] * output_ptr[d*stride];
+
+ for (d = 0; d < dim; d++)
+ gradInput_ptr[d*stride] = output_ptr[d*stride] * (gradOutput_ptr[d*stride] - sum);
+ }
+
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(output);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SoftPlus.c b/contrib/lua-torch/nn/lib/THNN/generic/SoftPlus.c
new file mode 100644
index 000000000..6491e66d6
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SoftPlus.c
@@ -0,0 +1,47 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftPlus.c"
+#else
+
+void THNN_(SoftPlus_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal beta_,
+ accreal threshold_)
+{
+ real beta = TH_CONVERT_ACCREAL_TO_REAL(beta_);
+ real threshold = TH_CONVERT_ACCREAL_TO_REAL(threshold_);
+ THTensor_(resizeAs)(output, input);
+
+ // f(x) = 1/beta * log(1 + exp(beta * x))
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = (*input_data * beta) > threshold ? *input_data : THLog1p(exp(*input_data * beta)) / beta;
+ );
+}
+
+void THNN_(SoftPlus_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output,
+ accreal beta_,
+ accreal threshold_)
+{
+ real beta = TH_CONVERT_ACCREAL_TO_REAL(beta_);
+ real threshold = TH_CONVERT_ACCREAL_TO_REAL(threshold_);
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ THTensor_(resizeAs)(gradInput, output);
+
+ // d/dx[log(1+exp(k*x))/k] = exp(kx) / (exp(kx) + 1)
+ // SINCE
+ // y = (1/k)*log(1+exp(k*x)) --> x = (1/k)*log(exp(k*y)-1)
+ // THEREFORE:
+ // d/dx(f(x)) = (exp(k*y) - 1) / exp(k*y)
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
+ real z = exp(*output_data * beta);
+ *gradInput_data = (*output_data * beta) > threshold ? *gradOutput_data : *gradOutput_data * (z - 1.)/z;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SoftShrink.c b/contrib/lua-torch/nn/lib/THNN/generic/SoftShrink.c
new file mode 100644
index 000000000..e77950868
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SoftShrink.c
@@ -0,0 +1,42 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftShrink.c"
+#else
+
+void THNN_(SoftShrink_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal lambda_)
+{
+ real lambda = TH_CONVERT_ACCREAL_TO_REAL(lambda_);
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input,
+ if ((*input_data) > lambda)
+ *output_data = *input_data - lambda;
+ else if ((*input_data) < -lambda)
+ *output_data = *input_data + lambda;
+ else
+ *output_data = 0;
+ );
+}
+
+void THNN_(SoftShrink_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ accreal lambda_)
+{
+ real lambda = TH_CONVERT_ACCREAL_TO_REAL(lambda_);
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ if ((*input_data) > lambda || (*input_data) < -lambda)
+ *gradInput_data = (*gradOutput_data);
+ else
+ *gradInput_data = 0;
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SparseLinear.c b/contrib/lua-torch/nn/lib/THNN/generic/SparseLinear.c
new file mode 100644
index 000000000..1cf712212
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SparseLinear.c
@@ -0,0 +1,564 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SparseLinear.c"
+#else
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#define ROW_PTR2(t, r) (THTensor_(data)(t) + (r) * (t)->stride[0])
+#define COL_PTR2(t, c) (THTensor_(data)(t) + (c) * (t)->stride[1])
+
+static bool THNN_(checkLegacyInput)(THTensor* t)
+{
+ return t->nDimension == 3 && t->size[2] == 2;
+}
+
+static bool THNN_(checkInput)(THTensor* t)
+{
+ return t->nDimension == 2 && t->size[1] == 3;
+}
+
+static bool THNN_(checkSize2D)(THTensor* t, long size0, long size1)
+{
+ return t->nDimension == 2 && t->size[0] == size0 && t->size[1] == size1;
+}
+
+static bool THNN_(checkSize1D)(THTensor* t, long size0)
+{
+ return t->nDimension == 1 && t->size[0] == size0;
+}
+
+static void THNN_(set1d)(THTensor *t, long x0, real value) {
+ THStorage_(set)(t->storage, t->storageOffset + x0*t->stride[0], value);
+}
+static real THNN_(get3d)(const THTensor *t, long x0, long x1, long x2) {
+ return THStorage_(get)(t->storage, t->storageOffset +
+ x0*t->stride[0] + x1*t->stride[1] + x2*t->stride[2]);
+}
+static real THNN_(get2d)(const THTensor *t, long x0, long x1) {
+ return THStorage_(get)(t->storage, t->storageOffset +
+ x0*t->stride[0] + x1*t->stride[1]);
+}
+
+void THNN_(SparseLinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias)
+{
+ long h, i, j, hp0, hp1;
+ long outDim = THTensor_(size)(weight, 0);
+ long inDim = THTensor_(size)(weight, 1);
+ long batchSize = THTensor_(size)(output, 0);
+
+ THArgCheck(THNN_(checkInput)(input), 2, "input must be in coo format, nnz x 3");
+ THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
+ THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");
+
+ long nnz = THTensor_(size)(input, 0);
+
+ THLongTensor * csr = THLongTensor_newWithSize1d(batchSize+1);
+ THLongTensor_zero(csr);
+
+ weight = THTensor_(newContiguous)(weight);
+
+//#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000)
+ for (i=0; i<nnz; i++) {
+ hp0 = (long)(THNN_(get2d)(input, i, 0)) - 1;
+ hp1 = (i+1 == nnz) ?
+ batchSize :
+ (long)(THNN_(get2d)(input, i+1, 0)) - 1;
+ if (hp0 != hp1) for (h = hp0; h < hp1; h++) {
+ THLongTensor_set1d(csr, h+1, i+1);
+ }
+ }
+
+
+ // output = weight * input + bias
+ THTensor_(zero)(output);
+#pragma omp parallel for private(h, i) schedule(static) if (nnz > 10000)
+ for (h = 0; h < batchSize; h++) {
+ long i_start = THLongTensor_get1d(csr, h);
+ long i_end = THLongTensor_get1d(csr, h+1);
+ for (i = i_start; i < i_end; i++) {
+ real val = THNN_(get2d)(input, i, 2);
+ if (val == 0) {
+ continue;
+ }
+
+ long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ THBlas_(axpy)(outDim,
+ val,
+ COL_PTR2(weight, offset), weight->stride[0],
+ ROW_PTR2(output, h), output->stride[1]);
+ } else {
+ THError("index out of bound. updateOutput: %d not between 1 and %d",
+ offset + 1, inDim);
+ }
+ }
+ }
+
+ THTensor* output_row = THTensor_(new)();
+ for (h = 0; h < batchSize; h++) {
+ THTensor_(select)(output_row, output, 0, h);
+ THTensor_(cadd)(output_row, bias, 1.0, output_row);
+ }
+ THTensor_(free)(output_row);
+ THLongTensor_free(csr);
+ THTensor_(free)(weight);
+}
+
+void THNN_(SparseLinear_legacyUpdateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias)
+{
+ long h, i;
+ long outDim = THTensor_(size)(weight, 0);
+ long inDim = THTensor_(size)(weight, 1);
+
+ THArgCheck(THNN_(checkLegacyInput)(input), 2, "input size must be batchsize x nnz x 2");
+ THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
+ THArgCheck(THNN_(checkSize1D)(bias, outDim), 5, "bias size wrong");
+
+ weight = THTensor_(newContiguous)(weight);
+
+ long batchSize = THTensor_(size)(input, 0);
+ long nnz = THTensor_(size)(input, 1);
+ THTensor_(resize2d)(output, batchSize, outDim);
+
+ // output = weight * input + bias
+ THTensor_(zero)(output);
+#pragma omp parallel for private(h, i) schedule(static) if ( \
+ batchSize > 1 && batchSize * nnz * outDim > 10000)
+ for (h = 0; h < batchSize; h++) {
+ for (i = 0; i < nnz; i++) {
+ real val = THNN_(get3d)(input, h, i, 1);
+ if (val == 0) {
+ continue;
+ }
+
+ long offset = (long)(THNN_(get3d)(input, h, i, 0)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ THBlas_(axpy)(outDim,
+ val,
+ COL_PTR2(weight, offset), weight->stride[0],
+ ROW_PTR2(output, h), output->stride[1]);
+ } else {
+ THError("index out of bound. updateOutput: %d not between 1 and %d",
+ offset + 1, inDim);
+ }
+ }
+ }
+
+ THTensor* output_row = THTensor_(new)();
+ for (h = 0; h < batchSize; h++) {
+ THTensor_(select)(output_row, output, 0, h);
+ THTensor_(cadd)(output_row, bias, 1.0, output_row);
+ }
+ THTensor_(free)(output_row);
+ THTensor_(free)(weight);
+}
+
+void THNN_(SparseLinear_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *weight,
+ THTensor *bias,
+ accreal weightDecay_,
+ accreal scale_)
+{
+ real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_);
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ long h, i, col, hp0, hp1;
+ long outDim = THTensor_(size)(weight, 0);
+ long inDim = THTensor_(size)(weight, 1);
+
+ THArgCheck(THNN_(checkInput)(input), 2,
+ "input must be in coo format, nnz x 3");
+ THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
+ "gradWeight size wrong");
+ THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5,
+ "gradBias size wrong");
+ THArgCheck(THTensor_(isContiguous)(gradOutput), 1,
+ "gradOutput must be contiguous");
+
+ long nnz = THTensor_(size)(input, 0);
+
+ THLongTensor* csc = THLongTensor_newWithSize1d(inDim+1);
+ THLongTensor_zero(csc);
+ weight = THTensor_(newContiguous)(weight);
+
+#pragma omp parallel for private(i, h, hp0, hp1) schedule(static) if (nnz > 10000)
+ for (i = 0; i < nnz; i++) {
+ hp0 = (long)(THNN_(get2d)(input, i, 1)) - 1;
+ hp1 = (i+1 == nnz) ?
+ inDim :
+ (long)(THNN_(get2d)(input, i+1, 1)) - 1;
+ if (hp0 != hp1) for (h = hp0; h < hp1; h++) {
+ THLongTensor_set1d(csc, h+1, i+1);
+ }
+ }
+
+ // gradWeight += gradOutput * input
+#pragma omp parallel for private(h, i, col) schedule(static) if (nnz > 10000)
+ for (col = 0; col < inDim; col++) {
+ long i_start = THLongTensor_get1d(csc, col);
+ long i_end = THLongTensor_get1d(csc, col+1);
+ for (i = i_start; i < i_end; i++) {
+ real val = scale * THNN_(get2d)(input, i, 2);
+
+ h = (long)(THNN_(get2d)(input, i, 0)) - 1;
+ long offset = (long)(THNN_(get2d)(input, i, 1)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ THBlas_(axpy)(outDim,
+ val,
+ ROW_PTR2(gradOutput, h), gradOutput->stride[1],
+ COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
+ } else {
+ THError(
+ "index out of bound. accGradParameters: %d not between 1 and %d",
+ offset + 1,
+ inDim);
+ }
+ }
+ }
+
+ // gradBias += gradOutput
+ THTensor* buf = THTensor_(new)();
+ THTensor_(sum)(buf, gradOutput, 0, 1);
+ THTensor_(cadd)(gradBias, gradBias, scale, buf);
+ THTensor_(free)(buf);
+ THLongTensor_free(csc);
+
+ if (weightDecay != 0) {
+ THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
+ }
+ THTensor_(free)(weight);
+}
+
+void THNN_(SparseLinear_legacyAccGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *weight,
+ THTensor *bias,
+ accreal weightDecay_,
+ accreal scale_)
+{
+ real weightDecay = TH_CONVERT_ACCREAL_TO_REAL(weightDecay_);
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ long h, i;
+ long outDim = THTensor_(size)(weight, 0);
+ long inDim = THTensor_(size)(weight, 1);
+
+ THArgCheck(THNN_(checkLegacyInput)(input), 2,
+ "input size must be batchsize x nnz x 2");
+ THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
+ "gradWeight size wrong");
+ THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5,
+ "gradBias size wrong");
+ THArgCheck(THTensor_(isContiguous)(gradOutput), 1,
+ "gradOutput must be contiguous");
+
+ long batchSize = THTensor_(size)(input, 0);
+ long nnz = THTensor_(size)(input, 1);
+ THTensor_(resize2d)(gradOutput, batchSize, outDim);
+
+ // gradWeight += gradOutput * input
+#pragma omp parallel for private(h, i) schedule(static) if (\
+ batchSize * nnz * outDim > 10000)
+ for (i = 0; i < nnz; i++) {
+ for (h = 0; h < batchSize; h++) {
+ real val = scale * THNN_(get3d)(input, h, i, 1);
+ if (val == 0) {
+ continue;
+ }
+
+ long offset = (long)(THNN_(get3d)(input, h, i, 0)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ THBlas_(axpy)(outDim,
+ val,
+ ROW_PTR2(gradOutput, h), gradOutput->stride[1],
+ COL_PTR2(gradWeight, offset), gradWeight->stride[0]);
+ } else {
+ THError(
+ "index out of bound. accGradParameters: %d not between 1 and %d",
+ offset + 1,
+ inDim);
+ }
+ }
+ }
+
+ // gradBias += gradOutput
+ THTensor* gradOutput_row = THTensor_(new)();
+ for (h = 0; h < batchSize; h++) {
+ THTensor_(select)(gradOutput_row, gradOutput, 0, h);
+ THTensor_(cadd)(gradBias, gradBias, scale, gradOutput_row);
+ }
+ THTensor_(free)(gradOutput_row);
+
+ if (weightDecay != 0) {
+ THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
+ }
+}
+
+void THNN_(SparseLinear_updateParameters)(
+ THNNState *state,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *lastInput,
+ accreal learningRate_)
+{
+ real learningRate = TH_CONVERT_ACCREAL_TO_REAL(learningRate_);
+ long h, i;
+ long outDim = weight->size[0];
+ long inDim = weight->size[1];
+
+ THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
+ "gradWeight size wrong");
+ THArgCheck(THNN_(checkSize1D)(bias, outDim), 3, "bias size wrong");
+ THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong");
+ THArgCheck(THNN_(checkInput)(lastInput), 6,
+ "input must be in coo format, nnz x 3");
+
+
+ long nnz = THTensor_(size)(lastInput, 0);
+
+ // collect unique offsets of non-0 val in input
+ THTensor* offsets = THTensor_(newWithSize1d)(nnz);
+ long cnt = 0;
+ for (i = 0; i < nnz; i++) {
+ real val = THNN_(get2d)(lastInput, i, 2);
+ if (val == 0) {
+ continue;
+ }
+ long offset = (long)(THNN_(get2d)(lastInput, i, 1)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ THNN_(set1d)(offsets, cnt++, offset);
+ } else {
+ THError(
+ "index out of bound. updateParameters: %d not between 1 and %d",
+ offset + 1,
+ inDim);
+ }
+ }
+ if (cnt == 0) return;
+ THTensor_(resize1d)(offsets, cnt);
+
+ THTensor* uniqueOffsets = THTensor_(new)();
+ THLongTensor* ri = THLongTensor_new();
+ THTensor_(sort)(uniqueOffsets, ri, offsets, 0, 0);
+ THLongTensor_free(ri);
+ THTensor_(free)(offsets);
+
+ cnt = 1;
+ real* uniqueOffsets_p = THTensor_(data)(uniqueOffsets);
+ for (i = 1; i < THTensor_(size)(uniqueOffsets, 0); i++) {
+ if (uniqueOffsets_p[i] != uniqueOffsets_p[i - 1]) {
+ uniqueOffsets_p[cnt++] = uniqueOffsets_p[i];
+ }
+ }
+ THTensor_(resize1d)(uniqueOffsets, cnt);
+
+ // weight += -learningRate * gradWeight
+ THTensor_(cadd)(bias, bias, -learningRate, gradBias);
+#pragma omp parallel for private(i) schedule(static) if (cnt * outDim > 10000)
+ for (i = 0; i < cnt; i++) {
+ long offset = (long)uniqueOffsets_p[i];
+ THBlas_(axpy)(outDim,
+ -learningRate,
+ COL_PTR2(gradWeight, offset), gradWeight->stride[0],
+ COL_PTR2(weight, offset), weight->stride[0]);
+ }
+
+ THTensor_(free)(uniqueOffsets);
+}
+
+void THNN_(SparseLinear_legacyUpdateParameters)(
+ THNNState *state,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *lastInput,
+ accreal learningRate_)
+{
+ real learningRate = TH_CONVERT_ACCREAL_TO_REAL(learningRate_);
+ long h, i;
+ long outDim = weight->size[0];
+ long inDim = weight->size[1];
+
+ THArgCheck(THNN_(checkSize2D)(gradWeight, outDim, inDim), 4,
+ "gradWeight size wrong");
+ THArgCheck(THNN_(checkSize1D)(bias, outDim), 3, "bias size wrong");
+ THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 5, "gradBias size wrong");
+ THArgCheck(THNN_(checkLegacyInput)(lastInput), 6,
+ "input size must be batchsize x nnz x 2");
+
+
+ long batchSize = THTensor_(size)(lastInput, 0);
+ long nnz = THTensor_(size)(lastInput, 1);
+
+ // collect unique offsets of non-0 val in input
+ THTensor* offsets = THTensor_(newWithSize1d)(batchSize * nnz);
+ long cnt = 0;
+ for (h = 0; h < batchSize; h++) {
+ for (i = 0; i < nnz; i++) {
+ real val = THNN_(get3d)(lastInput, h, i, 1);
+ if (val == 0 ) {
+ continue;
+ }
+ long offset = (long)(THNN_(get3d)(lastInput, h, i, 0)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ THNN_(set1d)(offsets, cnt++, offset);
+ } else {
+ THError(
+ "index out of bound. updateParameters: %d not between 1 and %d",
+ offset + 1,
+ inDim);
+ }
+ }
+ }
+ THTensor_(resize1d)(offsets, cnt);
+
+ THTensor* uniqueOffsets = THTensor_(new)();
+ THLongTensor* ri = THLongTensor_new();
+ THTensor_(sort)(uniqueOffsets, ri, offsets, 0, 0);
+ THLongTensor_free(ri);
+ THTensor_(free)(offsets);
+
+ cnt = 1;
+ real* uniqueOffsets_p = THTensor_(data)(uniqueOffsets);
+ for (i = 1; i < THTensor_(size)(uniqueOffsets, 0); i++) {
+ if (uniqueOffsets_p[i] != uniqueOffsets_p[i - 1]) {
+ uniqueOffsets_p[cnt++] = uniqueOffsets_p[i];
+ }
+ }
+ THTensor_(resize1d)(uniqueOffsets, cnt);
+
+ // weight += -learningRate * gradWeight
+ THTensor_(cadd)(bias, bias, -learningRate, gradBias);
+#pragma omp parallel for private(i) schedule(static) if (cnt * outDim > 10000)
+ for (i = 0; i < cnt; i++) {
+ long offset = (long)uniqueOffsets_p[i];
+ THBlas_(axpy)(outDim,
+ -learningRate,
+ COL_PTR2(gradWeight, offset), gradWeight->stride[0],
+ COL_PTR2(weight, offset), weight->stride[0]);
+ }
+
+ THTensor_(free)(uniqueOffsets);
+}
+
+void THNN_(SparseLinear_zeroGradParameters)(
+ THNNState *state,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *lastInput)
+{
+ long h, i, j;
+
+ long outDim = gradWeight->size[0];
+ long inDim = gradWeight->size[1];
+
+ THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong");
+ THArgCheck(THNN_(checkInput)(lastInput), 4,
+ "input must be in coo format, nnz x 3");
+
+ THTensor_(zero)(gradBias);
+
+ long nnz = THTensor_(size)(lastInput, 0);
+
+#pragma omp parallel for private(i, j) schedule(static) if ( \
+ nnz * outDim > 10000)
+ for (i = 0; i < nnz; i++) {
+ if (THNN_(get2d)(lastInput, i, 2) == 0 ) {
+ continue;
+ }
+
+ long offset = (long)(THNN_(get2d)(lastInput, i, 1)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ real* pGradWeight = COL_PTR2(gradWeight, offset);
+ if (gradWeight->stride[0] == 1) {
+ THVector_(fill)(pGradWeight, 0, outDim);
+ } else {
+ long stride = gradWeight->stride[0];
+ for (j = 0; j < outDim; ++j) {
+ pGradWeight[j * stride] = 0;
+ }
+ }
+ } else {
+ THError(
+ "index out of bound. zeroGradParameters: %d not between 1 and %d",
+ offset + 1,
+ inDim);
+ }
+ }
+}
+
+void THNN_(SparseLinear_legacyZeroGradParameters)(
+ THNNState *state,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *lastInput)
+{
+ long h, i, j;
+
+ long outDim = gradWeight->size[0];
+ long inDim = gradWeight->size[1];
+
+ THArgCheck(THNN_(checkSize1D)(gradBias, outDim), 3, "gradBias size wrong");
+ THArgCheck(THNN_(checkLegacyInput)(lastInput), 4,
+ "input size must be batchsize x nnz x 2");
+
+ THTensor_(zero)(gradBias);
+
+ long batchSize = THTensor_(size)(lastInput, 0);
+ long nnz = THTensor_(size)(lastInput, 1);
+
+#pragma omp parallel for private(h, i, j) schedule(static) if ( \
+ batchSize > 1 && batchSize * nnz * outDim > 10000)
+ for (h = 0; h < batchSize; h++) {
+ for (i = 0; i < nnz; i++) {
+ if (THNN_(get3d)(lastInput, h, i, 1) == 0 ) {
+ continue;
+ }
+
+ long offset = (long)(THNN_(get3d)(lastInput, h, i, 0)) - 1;
+ if (offset >= 0 && offset < inDim) {
+ real* pGradWeight = COL_PTR2(gradWeight, offset);
+ if (gradWeight->stride[0] == 1) {
+ THVector_(fill)(pGradWeight, 0, outDim);
+ } else {
+ long stride = gradWeight->stride[0];
+ for (j = 0; j < outDim; ++j) {
+ pGradWeight[j * stride] = 0;
+ }
+ }
+ } else {
+ THError(
+ "index out of bound. zeroGradParameters: %d not between 1 and %d",
+ offset + 1,
+ inDim);
+ }
+ }
+ }
+}
+
+#undef ROW_PTR2
+#undef COL_PTR2
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveAveragePooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveAveragePooling.c
new file mode 100644
index 000000000..3675b42d7
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveAveragePooling.c
@@ -0,0 +1,258 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialAdaptiveAveragePooling.c"
+#else
+
+#define START_IND(a,b,c) (int)floor((float)(a * c) / b)
+#define END_IND(a,b,c) (int)ceil((float)((a + 1) * c) / b)
+// #define START_IND(a,b,c) a * c / b
+// #define END_IND(a,b,c) (a + 1) * c / b + ((a + 1) * c % b > 0)?1:0
+
+static void THNN_(SpatialAdaptiveAveragePooling_updateOutput_frame)(
+ real *input_p,
+ real *output_p,
+ long nslices,
+ long iwidth,
+ long iheight,
+ long owidth,
+ long oheight,
+ long stridew,
+ long strideh,
+ long strided)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ /* loop over output */
+ long i, j;
+ for(i = 0; i < oheight; i++)
+ {
+ int y_start = START_IND(i, oheight, iheight);
+ int y_end = END_IND(i, oheight, iheight);
+ int kH = y_end-y_start;
+
+ for(j = 0; j < owidth; j++)
+ {
+
+ int x_start = START_IND(j, owidth, iwidth);
+ int x_end = END_IND(j, owidth, iwidth);
+ int kW = x_end-x_start;
+
+ /* local pointers */
+ real *ip = input_p + k*strided + y_start*strideh + x_start*stridew;
+ real *op = output_p + k*owidth*oheight + i*owidth + j;
+
+ /* compute local average: */
+ real sum = 0;
+ int x,y;
+ for(y = 0; y < kH; y++)
+ {
+ for(x = 0; x < kW; x++)
+ {
+ real val = *(ip + y*strideh + x*stridew);
+ sum += val;
+ }
+ }
+
+ /* set output to local average */
+ *op = sum / kW / kH;
+ }
+ }
+ }
+}
+
+void THNN_(SpatialAdaptiveAveragePooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int owidth,
+ int oheight)
+{
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ long nslices;
+ long iheight;
+ long iwidth;
+
+ long istride_d;
+ long istride_h;
+ long istride_w;
+ long istride_b;
+
+ real *input_data;
+ real *output_data;
+
+
+ THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
+ "3D or 4D (batch mode) tensor expected for input, but got: %s");
+
+ if (input->nDimension == 4)
+ {
+ istride_b = input->stride[0];
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimh-1];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ /* strides */
+ istride_d = input->stride[dimh-1];
+ istride_h = input->stride[dimh];
+ istride_w = input->stride[dimw];
+
+ /* resize output */
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nslices, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+ THNN_(SpatialAdaptiveAveragePooling_updateOutput_frame)(input_data, output_data,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ istride_w,istride_h,
+ istride_d);
+ }
+ else
+ {
+ long p;
+
+ THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialAdaptiveAveragePooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ istride_w,istride_h,
+ istride_d);
+ }
+ }
+}
+
+static void THNN_(SpatialAdaptiveAveragePooling_updateGradInput_frame)(
+ real *gradInput_p,
+ real *gradOutput_p,
+ long nslices,
+ long iwidth,
+ long iheight,
+ long owidth,
+ long oheight)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
+ real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
+
+ /* calculate average */
+ long i, j;
+ for(i = 0; i < oheight; i++)
+ {
+ int y_start = START_IND(i, oheight, iheight);
+ int y_end = END_IND(i, oheight, iheight);
+ int kH = y_end-y_start;
+
+ for(j = 0; j < owidth; j++)
+ {
+
+ int x_start = START_IND(j, owidth, iwidth);
+ int x_end = END_IND(j, owidth, iwidth);
+ int kW = x_end-x_start;
+
+ int x,y;
+ for(y = y_start; y < y_end; y++)
+ {
+ for(x = x_start; x < x_end; x++)
+ {
+ /* update gradient */
+ gradInput_p_k[y*iwidth + x] += gradOutput_p_k[i*owidth + j] / kW / kH;
+ }
+ }
+ }
+ }
+ }
+}
+
+void THNN_(SpatialAdaptiveAveragePooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput)
+{
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ int nslices;
+ int iheight;
+ int iwidth;
+ int oheight;
+ int owidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 4) {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimh-1];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ oheight = gradOutput->size[dimh];
+ owidth = gradOutput->size[dimw];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+
+ /* backprop */
+ if (input->nDimension == 3)
+ {
+ THNN_(SpatialAdaptiveAveragePooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight);
+ }
+ else
+ {
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialAdaptiveAveragePooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
+
+#undef START_IND
+#undef END_IND \ No newline at end of file
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveMaxPooling.c
new file mode 100644
index 000000000..fff716e67
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAdaptiveMaxPooling.c
@@ -0,0 +1,274 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialAdaptiveMaxPooling.c"
+#else
+
+static void THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(
+ real *input_p,
+ real *output_p,
+ THIndex_t *indx_p,
+ THIndex_t *indy_p,
+ long nslices,
+ long iwidth,
+ long iheight,
+ long owidth,
+ long oheight,
+ long stridew,
+ long strideh,
+ long strided)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ /* loop over output */
+ long i, j;
+ for(i = 0; i < oheight; i++)
+ {
+ int y_start = (int)floor((float)i / oheight * iheight);
+ int y_end = (int)ceil((float)(i + 1) / oheight * iheight);
+ int kH = y_end-y_start;
+
+ for(j = 0; j < owidth; j++)
+ {
+
+ int x_start = (int)floor((float)j / owidth * iwidth);
+ int x_end = (int)ceil((float)(j + 1) / owidth * iwidth);
+ int kW = x_end-x_start;
+
+ /* local pointers */
+ real *ip = input_p + k*strided + y_start*strideh + x_start*stridew;
+ real *op = output_p + k*owidth*oheight + i*owidth + j;
+ THIndex_t *indyp = indy_p + k*owidth*oheight + i*owidth + j;
+ THIndex_t *indxp = indx_p + k*owidth*oheight + i*owidth + j;
+
+ /* compute local max: */
+ long maxindex = -1;
+ real maxval = -FLT_MAX;
+ long tcntr = 0;
+ int x,y;
+ for(y = 0; y < kH; y++)
+ {
+ for(x = 0; x < kW; x++)
+ {
+ real val = *(ip + y*strideh + x*stridew);
+ if (val > maxval)
+ {
+ maxval = val;
+ maxindex = tcntr;
+ }
+ tcntr++;
+ }
+ }
+
+ /* set output to local max */
+ *op = maxval;
+
+ /* store location of max (x,y) */
+ *indyp = (maxindex / kW) + TH_INDEX_BASE;
+ *indxp = (maxindex % kW) + TH_INDEX_BASE;
+ }
+ }
+ }
+}
+
+void THNN_(SpatialAdaptiveMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int owidth,
+ int oheight)
+{
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ long nslices;
+ long iheight;
+ long iwidth;
+
+ long istride_d;
+ long istride_h;
+ long istride_w;
+ long istride_b;
+
+ real *input_data;
+ real *output_data;
+ THIndex_t *indices_data;
+
+
+ THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
+ "3D or 4D (batch mode) tensor expected for input, but got: %s");
+
+ if (input->nDimension == 4)
+ {
+ istride_b = input->stride[0];
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimh-1];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ /* strides */
+ istride_d = input->stride[dimh-1];
+ istride_h = input->stride[dimh];
+ istride_w = input->stride[dimw];
+
+ /* resize output */
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nslices, oheight, owidth);
+ /* indices will contain i,j locations for each output point */
+ THIndexTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
+ indices_data+nslices*owidth*oheight, indices_data,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ istride_w,istride_h,
+ istride_d);
+ }
+ else
+ {
+ long p;
+
+ THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
+ /* indices will contain i,j locations for each output point */
+ THIndexTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight,
+ indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ istride_w,istride_h,
+ istride_d);
+ }
+ }
+}
+
+static void THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(
+ real *gradInput_p,
+ real *gradOutput_p,
+ THIndex_t *indx_p,
+ THIndex_t *indy_p,
+ long nslices,
+ long iwidth,
+ long iheight,
+ long owidth,
+ long oheight)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
+ real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
+ THIndex_t *indx_p_k = indx_p + k*owidth*oheight;
+ THIndex_t *indy_p_k = indy_p + k*owidth*oheight;
+
+ /* calculate max points */
+ long i, j;
+ for(i = 0; i < oheight; i++)
+ {
+ int y_start = (int)floor((float) i / oheight * iheight);
+ for(j = 0; j < owidth; j++)
+ {
+ int x_start = (int)floor((float) j / owidth * iwidth);
+ /* retrieve position of max */
+ long maxi = indy_p_k[i*owidth + j] - TH_INDEX_BASE + y_start;
+ long maxj = indx_p_k[i*owidth + j] - TH_INDEX_BASE + x_start;
+
+ /* update gradient */
+ gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j];
+ }
+ }
+ }
+}
+
+void THNN_(SpatialAdaptiveMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices)
+{
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ int nslices;
+ int iheight;
+ int iwidth;
+ int oheight;
+ int owidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+ THIndex_t *indices_data;
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 4) {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimh-1];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ oheight = gradOutput->size[dimh];
+ owidth = gradOutput->size[dimw];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THIndexTensor_(data)(indices);
+
+ /* backprop */
+ if (input->nDimension == 3)
+ {
+ THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
+ indices_data+nslices*owidth*oheight, indices_data,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight);
+ }
+ else
+ {
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
+ indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialAveragePooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAveragePooling.c
new file mode 100644
index 000000000..c063502e7
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialAveragePooling.c
@@ -0,0 +1,329 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialAveragePooling.c"
+#else
+
+static inline void THNN_(SpatialAveragePooling_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ int kH, int kW, int dH, int dW, int padH, int padW,
+ bool ceil_mode) {
+
+ THArgCheck(kW > 0 && kH > 0, 5,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+ THArgCheck(dW > 0 && dH > 0, 8,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+
+ THArgCheck(kW/2 >= padW && kH/2 >= padH, 2,
+ "pad should be smaller than half of kernel size, but got "
+ "padW = %d, padH = %d, kW = %d, kH = %d",
+ padW, padH, kW, kH);
+
+ long nInputPlane = input->size[dimh-1];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long outputHeight, outputWidth;
+ long nOutputPlane = nInputPlane;
+
+ if(ceil_mode)
+ {
+ outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
+ outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1;
+ }
+ else
+ {
+ outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
+ outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1;
+ }
+
+ if (padW || padH)
+ {
+ // ensure that the last pooling starts inside the image
+ // needed to avoid problems in ceil mode
+ if ((outputHeight - 1)*dH >= inputHeight + padH)
+ --outputHeight;
+ if ((outputWidth - 1)*dW >= inputWidth + padW)
+ --outputWidth;
+ }
+
+ if (outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%dx%dx%d). "
+ "Calculated output size: (%dx%dx%d). Output size is too small",
+ nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+void THNN_(SpatialAveragePooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ bool ceil_mode,
+ bool count_include_pad)
+{
+ real *output_data;
+ real *input_data;
+
+ int dimw = 2;
+ int dimh = 1;
+ int dimc = 0;
+ long nbatch = 1;
+
+ long inputWidth;
+ long inputHeight;
+ long outputWidth;
+ long outputHeight;
+ long nInputPlane; // number of channels (or colors)
+
+ long k;
+
+ THNN_(SpatialAveragePooling_shapeCheck)
+ (input, NULL, kH, kW, dH, dW, padH, padW, ceil_mode);
+
+ if (input->nDimension == 4) {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimc++;
+ }
+
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ nInputPlane = input->size[dimc];
+
+ if(ceil_mode)
+ {
+ outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1;
+ outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
+ }
+ else
+ {
+ outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1;
+ outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
+ }
+ if (padW || padH)
+ {
+ // ensure that the last pooling starts inside the image
+ // needed to avoid problems in ceil mode
+ if ((outputHeight - 1)*dH >= inputHeight + padH)
+ --outputHeight;
+ if ((outputWidth - 1)*dW >= inputWidth + padW)
+ --outputWidth;
+ }
+
+ if (input->nDimension == 3)
+ THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
+ else
+ THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth);
+
+ input = THTensor_(newContiguous)(input);
+ THArgCheck(THTensor_(isContiguous)(output), 3, "output must be contiguous");
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(k)
+ for(k = 0; k < nInputPlane; k++)
+ {
+ long p;
+ for(p = 0; p < nbatch; p++)
+ {
+ long xx, yy;
+ /* For all output pixels... */
+ real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight;
+ real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
+ long i;
+ for(i = 0; i < outputWidth*outputHeight; i++)
+ ptr_output[i] = 0;
+
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ /* Compute the mean of the input image... */
+ long hstart = yy * dH - padH;
+ long wstart = xx * dW - padW;
+ long hend = fminf(hstart + kH, inputHeight + padH);
+ long wend = fminf(wstart + kW, inputWidth + padW);
+ int pool_size = (hend - hstart) * (wend - wstart);
+ hstart = fmaxf(hstart, 0);
+ wstart = fmaxf(wstart, 0);
+ hend = fminf(hend, inputHeight);
+ wend = fminf(wend, inputWidth);
+
+ real sum = 0;
+
+ int divide_factor;
+ if(count_include_pad)
+ divide_factor = pool_size;
+ else
+ divide_factor = (hend - hstart) * (wend - wstart);
+
+ long kx, ky;
+
+ for(ky = hstart; ky < hend; ky++)
+ {
+ for(kx = wstart; kx < wend; kx++)
+ sum += ptr_input[ky*inputWidth + kx];
+ }
+ /* Update output */
+ *ptr_output++ += sum/divide_factor;
+ }
+ }
+ }
+ }
+ THTensor_(free)(input);
+}
+
+void THNN_(SpatialAveragePooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ bool ceil_mode,
+ bool count_include_pad)
+{
+ int dimw = 2;
+ int dimh = 1;
+ int dimc = 0;
+ long nbatch = 1;
+ long ndim = 3;
+
+ long inputWidth;
+ long inputHeight;
+ long outputWidth;
+ long outputHeight;
+ long nInputPlane; // number of channels (or colors)
+
+ real *gradOutput_data;
+ real *input_data, *gradInput_data;
+
+ long k;
+
+ THNN_(SpatialAveragePooling_shapeCheck)
+ (input, gradOutput, kH, kW, dH, dW, padH, padW, ceil_mode);
+
+
+ if (input->nDimension == 4) {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimc++;
+ ndim = 4;
+ }
+
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ nInputPlane = input->size[dimc];
+
+ if(ceil_mode)
+ {
+ outputWidth = (long)(ceil((float)(inputWidth - kW + 2*padW) / dW)) + 1;
+ outputHeight = (long)(ceil((float)(inputHeight - kH + 2*padH) / dH)) + 1;
+ }
+ else
+ {
+ outputWidth = (long)(floor((float)(inputWidth - kW + 2*padW) / dW)) + 1;
+ outputHeight = (long)(floor((float)(inputHeight - kH + 2*padH) / dH)) + 1;
+ }
+ if (padW || padH)
+ {
+ // ensure that the last pooling starts inside the image
+ // needed to avoid problems in ceil mode
+ if ((outputHeight - 1)*dH >= inputHeight + padH)
+ --outputHeight;
+ if ((outputWidth - 1)*dW >= inputWidth + padW)
+ --outputWidth;
+ }
+
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+
+ THTensor_(resizeAs)(gradInput, input);
+
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ THArgCheck(THTensor_(isContiguous)(gradInput), 4, "gradInput must be contiguous");
+
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+
+#pragma omp parallel for private(k)
+ for(k = 0; k < nInputPlane; k++)
+ {
+ long p;
+ for(p = 0; p < nbatch; p++)
+ {
+ real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
+ long xx, yy;
+
+ real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
+ real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
+
+ long i;
+ for(i=0; i<inputWidth*inputHeight; i++)
+ ptr_gi[i] = 0.0;
+
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ long hstart = yy * dH - padH;
+ long wstart = xx * dW - padW;
+ long hend = fminf(hstart + kH, inputHeight + padH);
+ long wend = fminf(wstart + kW, inputWidth + padW);
+ int pool_size = (hend - hstart) * (wend - wstart);
+ hstart = fmaxf(hstart, 0);
+ wstart = fmaxf(wstart, 0);
+ hend = fminf(hend, inputHeight);
+ wend = fminf(wend, inputWidth);
+
+ real z = *ptr_gradOutput++;
+
+ int divide_factor;
+ if(count_include_pad)
+ divide_factor = pool_size;
+ else
+ divide_factor = (hend - hstart) * (wend - wstart);
+
+ long kx, ky;
+ for(ky = hstart ; ky < hend; ky++)
+ {
+ for(kx = wstart; kx < wend; kx++)
+ ptr_gradInput[ky*inputWidth + kx] += z/divide_factor;
+ }
+ }
+ }
+ }
+ }
+
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialClassNLLCriterion.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialClassNLLCriterion.c
new file mode 100644
index 000000000..d711c8590
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialClassNLLCriterion.c
@@ -0,0 +1,131 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialClassNLLCriterion.c"
+#else
+
+#define INITIAL_CHECK \
+ THArgCheck(THIndexTensor_(nDimension)(target) == 3, 3, \
+ "only batches of spatial targets supported (3D tensors)" \
+ " but got targets of dimension: %d", \
+ THIndexTensor_(nDimension)(target)); \
+ THArgCheck(THTensor_(nDimension)(input) == 4, 2, \
+ "only batches of spatial inputs supported (4D tensors), " \
+ "but got input of dimension: %d", THTensor_(nDimension)(input)); \
+ if (weights && THTensor_(nElement)(weights) != THTensor_(size)(input, 1)) { \
+ THError("weight tensor should be defined either for all or no classes"); \
+ } \
+ \
+ { \
+ long input0 = THTensor_(size)(input, 0); \
+ long input1 = THTensor_(size)(input, 1); \
+ long input2 = THTensor_(size)(input, 2); \
+ long input3 = THTensor_(size)(input, 3); \
+ long target0 = THIndexTensor_(size)(target, 0); \
+ long target1 = THIndexTensor_(size)(target, 1); \
+ long target2 = THIndexTensor_(size)(target, 2); \
+ THAssertMsg(input0 == target0 && input2 == target1 && input3 == target2, \
+ "size mismatch (got input: %ldx%ldx%ldx%ld, target: %ldx%ldx%ld)", \
+ input0, input1, input2, input3, target0, target1, target2); \
+ }
+
+void THNN_(SpatialClassNLLCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *output,
+ bool sizeAverage,
+ THTensor *weights,
+ THTensor *total_weight)
+{
+ INITIAL_CHECK;
+
+ input = THTensor_(newContiguous)(input);
+ target = THIndexTensor_(newContiguous)(target);
+ weights = weights ? THTensor_(newContiguous)(weights) : NULL;
+
+ real *input_data = THTensor_(data)(input);
+ THIndex_t *target_data = THIndexTensor_(data)(target);
+ real *weights_data = weights ? THTensor_(data)(weights) : NULL;
+ real *output_data = THTensor_(data)(output);
+ real *total_weight_data = THTensor_(data)(total_weight);
+
+ long batch_size = THTensor_(size)(input, 0);
+ long n_classes = THTensor_(size)(input, 1);
+ long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3);
+ long sample_size = map_size * n_classes;
+
+ real total_weight_acc = 0;
+ real output_acc = 0;
+ for (int b = 0; b < batch_size; b++) {
+ for (int elem = 0; elem < map_size; elem++) {
+ int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
+ THAssert(cur_target >= 0 && cur_target < n_classes);
+
+ real cur_weight = weights ? weights_data[cur_target] : 1.0f;
+ total_weight_acc += cur_weight;
+ output_acc -= input_data[b * sample_size + cur_target * map_size + elem] * cur_weight;
+ }
+ }
+ *total_weight_data = total_weight_acc;
+ *output_data = output_acc;
+
+ if (sizeAverage && *total_weight_data)
+ *output_data /= *total_weight_data;
+
+ THTensor_(free)(input);
+ THIndexTensor_(free)(target);
+ if (weights)
+ THTensor_(free)(weights);
+}
+
+void THNN_(SpatialClassNLLCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage,
+ THTensor *weights,
+ THTensor *total_weight)
+{
+ INITIAL_CHECK;
+ THArgCheck(THTensor_(isContiguous)(gradInput), 4,
+ "gradInput must be contiguous");
+
+ real *total_weight_data = THTensor_(data)(total_weight);
+ if (*total_weight_data <= 0)
+ return;
+
+ target = THIndexTensor_(newContiguous)(target);
+ weights = weights ? THTensor_(newContiguous)(weights) : NULL;
+
+ THIndex_t *target_data = THIndexTensor_(data)(target);
+ real *weights_data = weights ? THTensor_(data)(weights) : NULL;
+ real *gradInput_data = THTensor_(data)(gradInput);
+
+ long batch_size = THTensor_(size)(input, 0);
+ long n_classes = THTensor_(size)(input, 1);
+ long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3);
+ long sample_size = map_size * n_classes;
+
+ real normalize = sizeAverage ? *total_weight_data : 1.0f;
+
+ int b;
+ #pragma omp parallel for
+ for (b = 0; b < batch_size; b++) {
+ int elem;
+ for (elem = 0; elem < map_size; elem++) {
+ int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
+ THAssert(cur_target >= 0 && cur_target < n_classes);
+
+ gradInput_data[b * sample_size + cur_target * map_size + elem] =
+ -(weights ? weights_data[cur_target] : 1.0f) / normalize;
+ }
+ }
+
+ THIndexTensor_(free)(target);
+ if (weights)
+ THTensor_(free)(weights);
+}
+
+#undef INITIAL_CHECK
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionLocal.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionLocal.c
new file mode 100644
index 000000000..6db5a5db9
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionLocal.c
@@ -0,0 +1,367 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolutionLocal.c"
+#else
+
+static inline void THNN_(SpatialConvolutionLocal_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ THTensor *weight, THTensor *bias,
+ int kH, int kW, int dH,
+ int dW, int padH, int padW,
+ long inputHeight, long inputWidth,
+ long outputHeight, long outputWidth) {
+
+ THArgCheck(kW > 0 && kH > 0, 9,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+ THArgCheck(dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+
+ long nInputPlane = weight->size[2] / (kH * kW);
+ long nOutputPlane = weight->size[1];
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 3, 0, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(bias, 3, 1, outputHeight);
+ THNN_CHECK_DIM_SIZE(bias, 3, 2, outputWidth);
+ }
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+static THTensor* THNN_(view_weight_local)(THTensor *_weight)
+{
+ THTensor *weight = THTensor_(newContiguous)(_weight);
+ THArgCheck(weight->nDimension == 3 || weight->nDimension == 6, 4,
+ "weight tensor should be 3D or 6D - got %dD", weight->nDimension);
+ if (weight->nDimension == 6) {
+ long s1 = weight->size[0] * weight->size[1];
+ long s2 = weight->size[2];
+ long s3 = weight->size[3] * weight->size[4] * weight->size[5];
+ THTensor *old_weight = weight;
+ weight = THTensor_(newWithStorage3d)(weight->storage,
+ weight->storageOffset,
+ s1, -1, s2, -1, s3, -1);
+ THTensor_(free)(old_weight);
+ }
+ return weight;
+}
+
+static void THNN_(SpatialConvolutionLocal_updateOutput_frame)
+ (
+ THTensor *input, THTensor *output,
+ THTensor *weight, THTensor *bias, THTensor *finput,
+ int kW, int kH, int dW, int dH, int padW, int padH,
+ long nInputPlane, long inputWidth, long inputHeight,
+ long nOutputPlane, long outputWidth, long outputHeight)
+{
+ long i;
+ THTensor *output3d, *finput3d;
+
+ THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ outputWidth, outputHeight);
+
+ THTensor_(copy)(output, bias);
+
+ output3d = THTensor_(newWithStorage3d)
+ (output->storage, output->storageOffset,
+ outputHeight * outputWidth, 1,
+ nOutputPlane, outputHeight * outputWidth,
+ 1, nOutputPlane * outputHeight * outputWidth);
+
+ finput3d = THTensor_(newWithStorage3d)
+ (finput->storage, finput->storageOffset,
+ outputHeight * outputWidth, 1,
+ kW * kH * nInputPlane, outputHeight * outputWidth,
+ 1, kW * kH * nInputPlane * outputHeight * outputWidth);
+
+ // weight: oH*oW x nOutputPlane x nInputPlane*kH*kW
+ // finput3d: oH*oW x nInputPlane*kH*kW x 1
+ THTensor_(baddbmm)(output3d, 1.0, output3d, 1.0, weight, finput3d);
+ // output3d: oH*oW x nOutputPlane x 1
+
+ THTensor_(free)(output3d);
+ THTensor_(free)(finput3d);
+}
+
+void THNN_(SpatialConvolutionLocal_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ long inputWidth, long inputHeight,
+ long outputWidth, long outputHeight)
+{
+ weight = THNN_(view_weight_local)(weight);
+
+ THNN_(SpatialConvolutionLocal_shapeCheck)
+ (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW,
+ inputHeight, inputWidth, outputHeight, outputWidth);
+
+ input = THTensor_(newContiguous)(input);
+
+ long nInputPlane = THTensor_(size)(weight, 2)/ (kW * kH);
+ long nOutputPlane = THTensor_(size)(weight, 1);
+
+ if(input->nDimension == 3)
+ {
+ THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth);
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+
+ THNN_(SpatialConvolutionLocal_updateOutput_frame)
+ (input, output, weight, bias, finput,
+ kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+ }
+ else
+ {
+ long T = input->size[0];
+ long t;
+
+ THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth);
+ THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth);
+
+#pragma omp parallel for private(t)
+ for(t = 0; t < T; t++)
+ {
+ THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+ THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(SpatialConvolutionLocal_updateOutput_frame)
+ (input_t, output_t, weight, bias, finput_t,
+ kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+
+ THTensor_(free)(input_t);
+ THTensor_(free)(output_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(weight);
+}
+
+
+static void THNN_(SpatialConvolutionLocal_updateGradInput_frame)
+ (THTensor *gradInput, THTensor *gradOutput,
+ THTensor *weight, THTensor *fgradInput,
+ int kW, int kH, int dW, int dH, int padW, int padH,
+ long nInputPlane, long inputWidth, long inputHeight,
+ long nOutputPlane, long outputWidth, long outputHeight)
+{
+ THTensor *gradOutput3d, *fgradInput3d;
+ gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset,
+ outputHeight*outputWidth, 1,
+ nOutputPlane, outputHeight*outputWidth,
+ 1, nOutputPlane*outputHeight*outputWidth);
+ fgradInput3d = THTensor_(newWithStorage3d)(fgradInput->storage, fgradInput->storageOffset,
+ outputHeight*outputWidth, 1,
+ kW*kH*nInputPlane, outputHeight*outputWidth,
+ 1, kW*kH*nInputPlane*outputHeight*outputWidth);
+ // weight: oH*oW x nInputPlane*kH*kW x nOutputPlane
+ // gradOutput3d: oH*oW x nOutputPlane x 1
+ THTensor_(baddbmm)(fgradInput3d, 0.0, fgradInput3d, 1.0, weight, gradOutput3d);
+ // fgradInput3d: oH*oW x nInputPlane*kH*kW x 1
+
+ THTensor_(free)(gradOutput3d);
+ THTensor_(free)(fgradInput3d);
+
+ THTensor_(zero)(gradInput);
+
+ THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ outputWidth, outputHeight);
+
+}
+
+void THNN_(SpatialConvolutionLocal_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ long inputWidth, long inputHeight,
+ long outputWidth, long outputHeight)
+{
+ weight = THNN_(view_weight_local)(weight);
+
+ THNN_(SpatialConvolutionLocal_shapeCheck)
+ (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW,
+ inputHeight, inputWidth, outputHeight, outputWidth);
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ long nInputPlane = THTensor_(size)(weight,2)/(kW*kH);
+ long nOutputPlane = THTensor_(size)(weight,1);
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(resizeAs)(fgradInput, finput);
+
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight, weight, 1, 2);
+
+ if(input->nDimension == 3)
+ {
+ THNN_(SpatialConvolutionLocal_updateGradInput_frame)
+ (gradInput, gradOutput, tweight,
+ fgradInput, kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+ }
+ else
+ {
+ long T = input->size[0];
+ long t;
+
+#pragma omp parallel for private(t)
+ for(t = 0; t < T; t++)
+ {
+ THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+ THNN_(SpatialConvolutionLocal_updateGradInput_frame)
+ (gradInput_t, gradOutput_t, tweight, fgradInput_t,
+ kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+
+ THTensor_(free)(gradInput_t);
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(fgradInput_t);
+ }
+ }
+
+ THTensor_(free)(tweight);
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+}
+
+static void THNN_(SpatialConvolutionLocal_accGradParameters_frame)
+ (THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
+ THTensor *finput, real scale,
+ int kW, int kH, int dW, int dH, int padW, int padH,
+ long nInputPlane, long inputWidth, long inputHeight,
+ long nOutputPlane, long outputWidth, long outputHeight)
+{
+
+ THTensor *gradOutput3d, *finput3d;
+ gradOutput3d = THTensor_(newWithStorage3d)(gradOutput->storage, gradOutput->storageOffset,
+ outputHeight*outputWidth, 1,
+ nOutputPlane, outputHeight*outputWidth,
+ 1, nOutputPlane*outputHeight*outputWidth);
+ finput3d = THTensor_(newWithStorage3d)(finput->storage, finput->storageOffset,
+ outputHeight*outputWidth, 1,
+ 1, kW*kH*nInputPlane*outputHeight*outputWidth,
+ kW*kH*nInputPlane, outputHeight*outputWidth);
+ // gradOutput3d: oH*oW x nOutputPlane x 1
+ // finput3d: oH*oW x 1 x kW*kH*nInputPlane
+ THTensor_(baddbmm)(gradWeight, 1.0, gradWeight, scale, gradOutput3d, finput3d);
+ // gradWeight: oH*oW x nOutputPlane x kW*kH*nInputPlane
+
+ THTensor_(cadd)(gradBias, gradBias, scale, gradOutput);
+
+ THTensor_(free)(gradOutput3d);
+ THTensor_(free)(finput3d);
+}
+
+void THNN_(SpatialConvolutionLocal_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ long inputWidth, long inputHeight,
+ long outputWidth, long outputHeight,
+ accreal scale_)
+{
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
+ THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ gradWeight = THNN_(view_weight_local)(gradWeight);
+
+ THNN_(SpatialConvolutionLocal_shapeCheck)
+ (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW,
+ inputHeight, inputWidth, outputHeight, outputWidth);
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ long nInputPlane = THTensor_(size)(gradWeight,2)/(kW*kH);
+ long nOutputPlane = THTensor_(size)(gradWeight,1);
+
+ if(input->nDimension == 3)
+ {
+ THNN_(SpatialConvolutionLocal_accGradParameters_frame)
+ (gradOutput, gradWeight, gradBias, finput, scale,
+ kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+ }
+ else
+ {
+ long T = input->size[0];
+ long t;
+
+ for(t = 0; t < T; t++)
+ {
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(SpatialConvolutionLocal_accGradParameters_frame)
+ (gradOutput_t, gradWeight, gradBias, finput_t, scale,
+ kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(gradWeight);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMM.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMM.c
new file mode 100644
index 000000000..28fea517c
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMM.c
@@ -0,0 +1,377 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolutionMM.c"
+#else
+
+static inline void THNN_(SpatialConvolutionMM_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ THTensor *weight, THTensor *bias,
+ int kH, int kW, int dH, int dW, int padH, int padW) {
+
+ THArgCheck(kW > 0 && kH > 0, 9,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+ THArgCheck(dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+ THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 4, 5, weight,
+ "2D or 4D weight tensor expected, but got: %s");
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
+ }
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+
+ long nInputPlane = weight->size[1] / (kH * kW);
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long nOutputPlane = weight->size[0];
+ long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - kW) / dW + 1;
+
+ if (outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%d x %d x %d). "
+ "Calculated output size: (%d x %d x %d). Output size is too small",
+ nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+static THTensor* THNN_(view_weight_MM2d)(THTensor *weight) {
+ weight = THTensor_(newContiguous)(weight);
+ if (weight->nDimension == 4) {
+ long s1 = weight->size[0];
+ long s2 = weight->size[1] * weight->size[2] * weight->size[3];
+ THTensor *old_weight = weight;
+ weight = THTensor_(newWithStorage2d)(weight->storage, weight->storageOffset,
+ s1, -1, s2, -1);
+ THTensor_(free)(old_weight);
+ }
+ return weight;
+}
+
+static void THNN_(SpatialConvolutionMM_updateOutput_frame)(
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ long nInputPlane,
+ long inputWidth,
+ long inputHeight,
+ long nOutputPlane,
+ long outputWidth,
+ long outputHeight)
+{
+ long i;
+ THTensor *output2d;
+
+ THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ outputWidth, outputHeight);
+
+ output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
+ nOutputPlane, -1,
+ outputHeight*outputWidth, -1);
+ if (bias) {
+ for(i = 0; i < nOutputPlane; i++)
+ THVector_(fill)
+ (output->storage->data + output->storageOffset + output->stride[0] * i,
+ THTensor_(get1d)(bias, i), outputHeight*outputWidth);
+ } else {
+ THTensor_(zero)(output);
+ }
+
+ THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);
+
+ THTensor_(free)(output2d);
+}
+
+void THNN_(SpatialConvolutionMM_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH)
+{
+ weight = THNN_(view_weight_MM2d)(weight);
+
+ THNN_(SpatialConvolutionMM_shapeCheck)
+ (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW);
+
+ input = THTensor_(newContiguous)(input);
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ long nInputPlane = input->size[dimf];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long nOutputPlane = weight->size[0];
+ long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - kW) / dW + 1;
+
+ if(input->nDimension == 3)
+ {
+ THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth);
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+
+ THNN_(SpatialConvolutionMM_updateOutput_frame)
+ (input, output, weight, bias, finput,
+ kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+ }
+ else
+ {
+ long T = input->size[0];
+ long t;
+
+ THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth);
+ THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth);
+
+#pragma omp parallel for private(t)
+ for(t = 0; t < T; t++)
+ {
+ THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+ THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(SpatialConvolutionMM_updateOutput_frame)
+ (input_t, output_t, weight, bias, finput_t,
+ kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+
+ THTensor_(free)(input_t);
+ THTensor_(free)(output_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(weight);
+}
+
+static void THNN_(SpatialConvolutionMM_updateGradInput_frame)(
+ THTensor *gradInput,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *fgradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH)
+{
+ THTensor *gradOutput2d = THTensor_(newWithStorage2d)
+ (gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ gradOutput->size[1]*gradOutput->size[2], -1);
+ THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
+ THTensor_(free)(gradOutput2d);
+
+ THTensor_(zero)(gradInput);
+
+ THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH,
+ padW, padH,
+ gradInput->size[0], gradInput->size[2], gradInput->size[1],
+ gradOutput->size[2], gradOutput->size[1]);
+}
+
+void THNN_(SpatialConvolutionMM_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH)
+{
+ weight = THNN_(view_weight_MM2d)(weight);
+
+ THNN_(SpatialConvolutionMM_shapeCheck)
+ (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW);
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(resizeAs)(fgradInput, finput);
+
+ // depending on the BLAS library, fgradInput (result tensor) might
+ // be left uninitialized on zero alpha, which might lead to weird behavior
+ // hence, to be safe, zero it
+ THTensor_(zero)(fgradInput);
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight, weight, 0, 1);
+
+ if(input->nDimension == 3)
+ {
+ THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput,
+ tweight, fgradInput,
+ kW, kH, dW, dH, padW, padH);
+ }
+ else
+ {
+ long T = input->size[0];
+ long t;
+
+#pragma omp parallel for private(t)
+ for(t = 0; t < T; t++)
+ {
+ THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+ THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t,
+ tweight, fgradInput_t,
+ kW, kH, dW, dH, padW, padH);
+
+ THTensor_(free)(gradInput_t);
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(fgradInput_t);
+ }
+ }
+
+ THTensor_(free)(tweight);
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+}
+
+static void THNN_(SpatialConvolutionMM_accGradParameters_frame)(
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ real scale)
+{
+ long i;
+ THTensor *gradOutput2d = THTensor_(newWithStorage2d)
+ (gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ gradOutput->size[1]*gradOutput->size[2], -1);
+
+ THTensor *tfinput = THTensor_(new)();
+ THTensor_(transpose)(tfinput, finput, 0, 1);
+ THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, tfinput);
+ THTensor_(free)(tfinput);
+
+ if (gradBias) {
+ for(i = 0; i < gradBias->size[0]; i++)
+ {
+ long k;
+ real sum = 0;
+ real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0];
+ for(k = 0; k < gradOutput2d->size[1]; k++)
+ sum += data[k];
+ (gradBias->storage->data + gradBias->storageOffset)[i] += scale*sum;
+ }
+ }
+
+ THTensor_(free)(gradOutput2d);
+}
+
+void THNN_(SpatialConvolutionMM_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ accreal scale_)
+{
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
+ if (gradBias)
+ THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
+
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ gradWeight = THNN_(view_weight_MM2d)(gradWeight);
+
+ THNN_(SpatialConvolutionMM_shapeCheck)
+ (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW);
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ if(input->nDimension == 3)
+ {
+ THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight,
+ gradBias, finput, scale);
+ }
+ else
+ {
+ long T = input->size[0];
+ long t;
+
+ for(t = 0; t < T; t++)
+ {
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight,
+ gradBias, finput_t, scale);
+
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(gradWeight);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMap.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMap.c
new file mode 100644
index 000000000..142a03551
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialConvolutionMap.c
@@ -0,0 +1,277 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolutionMap.c"
+#else
+
+void THNN_(SpatialConvolutionMap_updateOutput)(
+ THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias,
+ THTensor *connTable, int nInputPlane, int nOutputPlane,
+ int dW, int dH)
+{
+ THArgCheck(
+ weight != NULL && weight->nDimension == 3
+ && connTable != NULL && connTable->size[0] == weight->size[0], 4,
+ "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
+ );
+
+ int dimw = 2;
+ int dimh = 1;
+ int dimc = 0;
+ long nbatch = 1;
+
+ THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimc++;
+ dimw++;
+ dimh++;
+ }
+
+ const long kH = weight->size[1];
+ const long kW = weight->size[2];
+
+ THArgCheck(input->size[dimc] >= nInputPlane, 2, "invalid number of input planes");
+ THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH, 2, "input image smaller than kernel size");
+
+ const long input_w = input->size[dimw];
+ const long input_h = input->size[dimh];
+ const long output_w = (input_w - kW) / dW + 1;
+ const long output_h = (input_h - kH) / dH + 1;
+
+ if (input->nDimension == 3)
+ THTensor_(resize3d)(output, nOutputPlane, output_h, output_w);
+ else
+ THTensor_(resize4d)(output, input->size[0], nOutputPlane, output_h, output_w);
+
+ /* contiguous */
+ input = THTensor_(newContiguous)(input);
+ output = THTensor_(newContiguous)(output);
+ weight = THTensor_(newContiguous)(weight);
+ bias = bias ? THTensor_(newContiguous)(bias) : bias;
+ connTable = THTensor_(newContiguous)(connTable);
+
+ /* get raw pointers */
+ real *input_data = THTensor_(data)(input);
+ real *output_data = THTensor_(data)(output);
+ real *weight_data = THTensor_(data)(weight);
+ real *bias_data = THTensor_(data)(bias);
+ real *connTable_data = THTensor_(data)(connTable);
+
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nOutputPlane; p++)
+ {
+ long m;
+ for (m = 0; m < nbatch; m++)
+ {
+ /* add bias */
+ real *ptr_output = output_data + p*output_w*output_h + m*nOutputPlane*output_w*output_h;
+ long j, k;
+ real z= bias_data[p];
+ for (j = 0; j < output_h*output_w; j++)
+ ptr_output[j] = z;
+
+ /* convolve all maps */
+ int nweight = connTable->size[0];
+ for (k = 0; k < nweight; k++)
+ {
+ /* get offsets for input/output */
+ int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
+ int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
+
+ if (o == p)
+ {
+ THTensor_(validXCorr2Dptr)(
+ output_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h,
+ 1.0,
+ input_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, input_h, input_w,
+ weight_data + k*kW*kH,
+ kH, kW,
+ dH, dW
+ );
+ }
+ }
+ }
+ }
+
+ /* clean up */
+ THTensor_(free)(input);
+ THTensor_(free)(output);
+ THTensor_(free)(weight);
+ if (bias) THTensor_(free)(bias);
+ THTensor_(free)(connTable);
+}
+
+void THNN_(SpatialConvolutionMap_updateGradInput)(
+ THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *bias,
+ THTensor *connTable, int nInputPlane, int nOutputPlane,
+ int dW, int dH)
+{
+ THArgCheck(
+ weight != NULL && weight->nDimension == 3
+ && connTable != NULL && connTable->size[0] == weight->size[0], 5,
+ "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
+ );
+
+ /* and dims */
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ const long input_h = input->size[dimh];
+ const long input_w = input->size[dimw];
+ const long output_h = gradOutput->size[dimh];
+ const long output_w = gradOutput->size[dimw];
+ const long kH = weight->size[1];
+ const long kW = weight->size[2];
+
+ /* contiguous */
+ gradInput = THTensor_(newContiguous)(gradInput);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ weight = THTensor_(newContiguous)(weight);
+ connTable = THTensor_(newContiguous)(connTable);
+
+ /* Resize/Zero */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* get raw pointers */
+ real *gradInput_data = THTensor_(data)(gradInput);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *weight_data = THTensor_(data)(weight);
+ real *connTable_data = THTensor_(data)(connTable);
+
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nInputPlane; p++)
+ {
+ long m;
+ for (m = 0; m < nbatch; m++)
+ {
+ long k;
+ /* backward all */
+ int nkernel = connTable->size[0];
+ for (k = 0; k < nkernel; k++)
+ {
+ int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
+ int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
+ if (i == p)
+ {
+ /* gradient to input */
+ THTensor_(fullConv2Dptr)(
+ gradInput_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, 1.0,
+ gradOutput_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h, output_h, output_w,
+ weight_data + k*kW*kH, kH, kW, dH, dW
+ );
+ }
+ }
+ }
+ }
+
+ /* clean up */
+ THTensor_(free)(gradInput);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+ THTensor_(free)(connTable);
+}
+
+void THNN_(SpatialConvolutionMap_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *connTable,
+ int nInputPlane,
+ int nOutputPlane,
+ int dW, int dH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THArgCheck(
+ gradWeight != NULL && gradWeight->nDimension == 3
+ && connTable != NULL && connTable->size[0] == gradWeight->size[0], 5,
+ "3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
+ );
+
+ /* and dims */
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ const long input_h = input->size[dimh];
+ const long input_w = input->size[dimw];
+ const long output_h = gradOutput->size[dimh];
+ const long output_w = gradOutput->size[dimw];
+ const long kH = gradWeight->size[1];
+ const long kW = gradWeight->size[2];
+
+ /* contiguous */
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
+ THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
+
+ /* get raw pointers */
+ real *input_data = THTensor_(data)(input);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *gradWeight_data = THTensor_(data)(gradWeight);
+ real *gradBias_data = THTensor_(data)(gradBias);
+
+
+ long k;
+ /* gradients wrt bias */
+#pragma omp parallel for private(k)
+ for (k = 0; k < nOutputPlane; k++)
+ {
+ long m;
+ for (m = 0; m < nbatch; m++)
+ {
+ real *ptr_gradOutput = gradOutput_data + k*output_w*output_h + m*nOutputPlane*output_w*output_h;
+ long l;
+ for (l = 0; l < output_h*output_w; l++)
+ gradBias_data[k] += scale*ptr_gradOutput[l];
+ }
+ }
+
+ /* gradients wrt weight */
+ const int nkernel = connTable->size[0];
+#pragma omp parallel for private(k)
+ for (k = 0; k < nkernel; k++)
+ {
+ long m;
+ for (m = 0; m < nbatch; m++)
+ {
+ int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
+ int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;
+
+ /* gradient to kernel */
+ THTensor_(validXCorr2DRevptr)(
+ gradWeight_data + k*kW*kH,
+ scale,
+ input_data + i*input_w*input_h + m*nInputPlane*input_w*input_h, input_h, input_w,
+ gradOutput_data + o*output_w*output_h + m*nOutputPlane*output_w*output_h , output_h, output_w,
+ dH, dW
+ );
+ }
+ }
+
+ /* clean up */
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialDepthWiseConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDepthWiseConvolution.c
new file mode 100644
index 000000000..efb66a3e3
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDepthWiseConvolution.c
@@ -0,0 +1,528 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialDepthWiseConvolution.c"
+#else
+
+static inline void THNN_(SpatialDepthWiseConvolution_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ THTensor *weight, THTensor *bias,
+ int kH, int kW, int dH, int dW, int padH, int padW) {
+
+ THArgCheck(kW > 0 && kH > 0, 9,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+ THArgCheck(dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+ THNN_ARGCHECK(weight->nDimension == 4, 5, weight,
+ "2D or 4D weight tensor expected, but got: %s");
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 2, 0, weight->size[0]);
+ THNN_CHECK_DIM_SIZE(bias, 2, 1, weight->size[1]);
+ }
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+
+ long nInputPlane = weight->size[1];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long nOutputPlane = weight->size[0];
+ long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - kW) / dW + 1;
+
+ if (outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%d x %d x %d). "
+ "Calculated output size: (%d x %d x %d). Output size is too small",
+ nInputPlane,inputHeight,inputWidth,nOutputPlane*nInputPlane,outputHeight,outputWidth);
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimf, nInputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimh, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimw, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim + 1, dimw + 1, outputWidth);
+ }
+}
+
+static void THNN_(SpatialDepthWiseConvolution_updateOutput_frame)(
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ long nInputPlane,
+ long inputWidth,
+ long inputHeight,
+ long nOutputPlane,
+ long outputWidth,
+ long outputHeight)
+{
+ long i;
+ THTensor *output2d;
+
+ THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH,
+ nInputPlane, inputWidth, inputHeight,
+ outputWidth, outputHeight);
+
+ output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
+ nOutputPlane, -1,
+ outputHeight*outputWidth, -1);
+ if (bias) {
+ for(i = 0; i < nOutputPlane; i++)
+ THVector_(fill)
+ (output->storage->data + output->storageOffset + output->stride[0] * i,
+ THTensor_(get1d)(bias, i), outputHeight*outputWidth);
+ } else {
+ THTensor_(zero)(output);
+ }
+
+ THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);
+
+ THTensor_(free)(output2d);
+}
+
+void THNN_(SpatialDepthWiseConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH)
+{
+ long nInputPlane = weight->nDimension == 2 ? weight->size[1]/(kH*kW) : weight->size[1];
+ long nOutputPlane = weight->size[0];
+ if (weight->nDimension == 2) {
+ THTensor_(resize4d)(weight, nOutputPlane, nInputPlane, kH, kW);
+ }
+
+ THNN_(SpatialDepthWiseConvolution_shapeCheck)
+ (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW);
+
+ THTensor *_weight = THTensor_(newTranspose)(weight, 0, 1);
+ weight = THTensor_(newContiguous)(_weight);
+
+ THTensor *_bias = NULL;
+ if(bias) {
+ _bias = THTensor_(newTranspose)(bias, 0, 1);
+ bias = THTensor_(newContiguous)(_bias);
+ }
+
+ // resize weight
+ long s1 = weight->size[0];
+ long s2 = weight->size[1];
+ long s3 = weight->size[2] * weight->size[3];
+ weight = THTensor_(newWithStorage3d)(weight->storage, weight->storageOffset,
+ s1, -1, s2, -1, s3, -1);
+
+ input = THTensor_(newContiguous)(input);
+
+ int ndim = input->nDimension;
+
+ int batch = 1;
+ if (ndim == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ }
+
+ long inputHeight = input->size[3];
+ long inputWidth = input->size[2];
+ long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - kW) / dW + 1;
+
+ long T = input->size[0];
+ long t;
+
+ THTensor_(resize5d)(output, T, nInputPlane, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize4d)(finput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth);
+
+#pragma omp parallel for private(t)
+ for(t = 0; t < T; t++)
+ {
+ THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+ THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ long i;
+#pragma omp parallel for private(i)
+ for(i = 0; i < nInputPlane; i++)
+ {
+ THTensor *weight_i = THTensor_(newSelect)(weight, 0, i);
+ THTensor *input_i = THTensor_(newNarrow)(input_t, 0, i, 1);
+ THTensor *output_i = THTensor_(newSelect)(output_t, 0, i);
+ THTensor *finput_i = THTensor_(newSelect)(finput_t, 0, i);
+ THTensor *bias_i = NULL;
+ if(bias) {
+ bias_i = THTensor_(newSelect)(bias, 0, i);
+ }
+ THNN_(SpatialDepthWiseConvolution_updateOutput_frame)
+ (input_i, output_i, weight_i, bias_i, finput_i,
+ kW, kH, dW, dH, padW, padH,
+ 1, inputWidth, inputHeight,
+ nOutputPlane, outputWidth, outputHeight);
+
+ THTensor_(free)(input_i);
+ THTensor_(free)(weight_i);
+ THTensor_(free)(bias_i);
+ THTensor_(free)(output_i);
+ THTensor_(free)(finput_i);
+ }
+ THTensor_(free)(input_t);
+ THTensor_(free)(output_t);
+ THTensor_(free)(finput_t);
+ }
+
+ THTensor_(free)(weight);
+ THTensor_(free)(_weight);
+ THTensor_(free)(bias);
+ THTensor_(free)(_bias);
+ THTensor_(resize4d)(output, T, nInputPlane * nOutputPlane, outputHeight, outputWidth);
+
+ if (batch == 0) {
+ THTensor_(select)(output, NULL, 0, 0);
+ THTensor_(select)(input, NULL, 0, 0);
+ THTensor_(select)(finput, NULL, 0, 0);
+ }
+ THTensor_(free)(input);
+}
+
+static void THNN_(SpatialDepthWiseConvolution_updateGradInput_frame)(
+ THTensor *gradInput,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *fgradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH)
+{
+ THTensor *gradOutput2d = THTensor_(newWithStorage2d)
+ (gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ gradOutput->size[1]*gradOutput->size[2], -1);
+ THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
+ THTensor_(free)(gradOutput2d);
+
+ THTensor_(zero)(gradInput);
+
+ THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH,
+ padW, padH,
+ gradInput->size[0], gradInput->size[2], gradInput->size[1],
+ gradOutput->size[2], gradOutput->size[1]);
+}
+
+void THNN_(SpatialDepthWiseConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH)
+{
+ long nInputPlane = weight->nDimension == 2 ? weight->size[1]/(kH*kW) : weight->size[1];
+ long nOutputPlane = weight->size[0];
+ if (weight->nDimension == 2) {
+ THTensor_(resize4d)(weight, nOutputPlane, nInputPlane, kH, kW);
+ }
+ gradOutput = THTensor_(newWithTensor)(gradOutput);
+
+ if (input->nDimension == 3) {
+ if (gradOutput->nDimension == 3) {
+ THTensor_(resize4d)(gradOutput, nInputPlane, nOutputPlane, gradOutput->size[1], gradOutput->size[2]);
+ }
+ }
+ else
+ {
+ if (gradOutput->nDimension == 4) {
+ THTensor_(resize5d)(gradOutput, gradOutput->size[0], nInputPlane, nOutputPlane, gradOutput->size[2], gradOutput->size[3]);
+ }
+ }
+
+
+ THNN_(SpatialDepthWiseConvolution_shapeCheck)
+ (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW);
+
+ THTensor *_weight = THTensor_(newTranspose)(weight, 0, 1);
+ weight = THTensor_(newContiguous)(_weight);
+
+
+ // resize weight
+ long s1 = weight->size[0];
+ long s2 = weight->size[1];
+ long s3 = weight->size[2] * weight->size[3];
+ weight = THTensor_(newWithStorage3d)(weight->storage, weight->storageOffset,
+ s1, -1, s2, -1, s3, -1);
+
+ input = THTensor_(newContiguous)(input);
+
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+ }
+
+ long inputHeight = input->size[3];
+ long inputWidth = input->size[2];
+ long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - kW) / dW + 1;
+
+ long T = input->size[0];
+ long t;
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(resize4d)(fgradInput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth);
+
+ // depending on the BLAS library, fgradInput (result tensor) might
+ // be left uninitialized on zero alpha, which might lead to weird behavior
+ // hence, to be safe, zero it
+ THTensor_(zero)(fgradInput);
+
+
+
+#pragma omp parallel for private(t)
+ for(t = 0; t < T; t++)
+ {
+ THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+
+ long i;
+#pragma omp parallel for private(i)
+ for(i = 0; i < nInputPlane; i++)
+ {
+ THTensor *weight_i = THTensor_(newSelect)(weight, 0, i);
+ THTensor *gradInput_i = THTensor_(newNarrow)(gradInput_t, 0, i, 1);
+ THTensor *gradOutput_i = THTensor_(newSelect)(gradOutput_t, 0, i);
+ THTensor *fgradInput_i = THTensor_(newSelect)(fgradInput_t, 0, i);
+
+ THTensor_(transpose)(weight_i, weight_i, 0, 1);
+
+ THNN_(SpatialDepthWiseConvolution_updateGradInput_frame)(gradInput_i, gradOutput_i,
+ weight_i, fgradInput_i,
+ kW, kH, dW, dH, padW, padH);
+
+ THTensor_(free)(gradInput_i);
+ THTensor_(free)(weight_i);
+ THTensor_(free)(gradOutput_i);
+ THTensor_(free)(fgradInput_i);
+ }
+
+ THTensor_(free)(gradInput_t);
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(fgradInput_t);
+ }
+
+ if (batch == 0) {
+ THTensor_(select)(gradOutput, NULL, 0, 0);
+ THTensor_(select)(input, NULL, 0, 0);
+ THTensor_(select)(gradInput, NULL, 0, 0);
+ THTensor_(select)(fgradInput, NULL, 0, 0);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+ THTensor_(free)(_weight);
+}
+
+static void THNN_(SpatialDepthWiseConvolution_accGradParameters_frame)(
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ accreal scale)
+{
+ long i;
+ THTensor *gradOutput2d = THTensor_(newWithStorage2d)
+ (gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ gradOutput->size[1]*gradOutput->size[2], -1);
+
+ THTensor_(transpose)(finput, finput, 0, 1);
+ THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput);
+ THTensor_(transpose)(finput, finput, 0, 1);
+
+ if (gradBias) {
+ for(i = 0; i < gradBias->size[0]; i++)
+ {
+ long k;
+ real sum = 0;
+ real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0];
+ for(k = 0; k < gradOutput2d->size[1]; k++)
+ sum += data[k];
+ (gradBias->storage->data + gradBias->storageOffset)[i] += scale*sum;
+ }
+ }
+
+ THTensor_(free)(gradOutput2d);
+}
+
+void THNN_(SpatialDepthWiseConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ accreal scale)
+{
+ long nInputPlane = gradWeight->nDimension == 2 ? gradWeight->size[1]/(kH*kW) : gradWeight->size[1];
+ long nOutputPlane = gradWeight->size[0];
+ if (gradWeight->nDimension == 2) {
+ THTensor_(resize4d)(gradWeight, nOutputPlane, nInputPlane, kH, kW);
+ }
+
+ gradOutput = THTensor_(newWithTensor)(gradOutput);
+ if (input->nDimension == 3) {
+ if (gradOutput->nDimension == 3) {
+ THTensor_(resize4d)(gradOutput, nInputPlane, nOutputPlane, gradOutput->size[1], gradOutput->size[2]);
+ }
+ }
+ else
+ {
+ if (gradOutput->nDimension == 4) {
+ THTensor_(resize5d)(gradOutput, gradOutput->size[0], nInputPlane, nOutputPlane, gradOutput->size[2], gradOutput->size[3]);
+ }
+ }
+
+
+ THNN_(SpatialDepthWiseConvolution_shapeCheck)
+ (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW);
+
+ // Transpose gradWeight & gradBias
+ THTensor_(transpose)(gradWeight, NULL, 0, 1);
+ THTensor *_gradWeight;
+ _gradWeight = gradWeight;
+ gradWeight = THTensor_(newContiguous)(gradWeight);
+
+ THTensor *_gradBias = NULL;
+ if(gradBias) {
+ THTensor_(transpose)(gradBias, NULL, 0, 1);
+ _gradBias = gradBias;
+ gradBias = THTensor_(newContiguous)(gradBias);
+ }
+
+ // resize gradWeight
+ long s1 = gradWeight->size[0];
+ long s2 = gradWeight->size[1];
+ long s3 = gradWeight->size[2] * gradWeight->size[3];
+ gradWeight = THTensor_(newWithStorage3d)(gradWeight->storage, gradWeight->storageOffset,
+ s1, -1, s2, -1, s3, -1);
+
+ input = THTensor_(newContiguous)(input);
+
+
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+ }
+
+ long inputHeight = input->size[3];
+ long inputWidth = input->size[2];
+ long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - kW) / dW + 1;
+
+ long T = input->size[0];
+ long t;
+ THTensor_(resize4d)(finput, T, nInputPlane, kW*kH*1, outputHeight*outputWidth);
+
+ for(t = 0; t < T; t++)
+ {
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+ long i;
+#pragma omp parallel for private(i)
+ for(i = 0; i < nInputPlane; i++)
+ {
+ THTensor *finput_i = THTensor_(newSelect)(finput_t, 0, i);
+ THTensor *gradOutput_i = THTensor_(newSelect)(gradOutput_t, 0, i);
+ THTensor *gradWeight_i = THTensor_(newSelect)(gradWeight, 0, i);
+ THTensor *gradBias_i = NULL;
+ if(gradBias) {
+ gradBias_i = THTensor_(newSelect)(gradBias, 0, i);
+ }
+ THNN_(SpatialDepthWiseConvolution_accGradParameters_frame)(gradOutput_i, gradWeight_i,
+ gradBias_i, finput_i, scale);
+
+ THTensor_(free)(finput_i);
+ THTensor_(free)(gradOutput_i);
+ THTensor_(free)(gradWeight_i);
+ THTensor_(free)(gradBias_i);
+ }
+
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(finput_t);
+ }
+
+ // Copy back and transpose back
+ THTensor_(transpose)(_gradWeight, NULL, 0, 1);
+ THTensor_(resize4d)(_gradWeight, nInputPlane, nOutputPlane, kH, kW);
+ THTensor_(copy)(_gradWeight, gradWeight);
+ THTensor_(transpose)(_gradWeight, NULL, 0, 1);
+
+ if(gradBias) {
+ THTensor_(transpose)(_gradBias, NULL, 0, 1);
+ THTensor_(resize2d)(_gradBias, nInputPlane, nOutputPlane);
+ THTensor_(copy)(_gradBias, gradBias);
+ THTensor_(transpose)(_gradBias, NULL, 0, 1);
+ }
+
+ if (batch == 0) {
+ THTensor_(select)(gradOutput, NULL, 0, 0);
+ THTensor_(select)(input, NULL, 0, 0);
+ THTensor_(select)(finput, NULL, 0, 0);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(gradWeight);
+ THTensor_(free)(gradBias);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedConvolution.c
new file mode 100644
index 000000000..897cc0da4
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedConvolution.c
@@ -0,0 +1,408 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialDilatedConvolution.c"
+#else
+
+static inline void THNN_(SpatialDilatedConvolution_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ THTensor *weight, THTensor *bias,
+ int kH, int kW, int dH, int dW, int padH, int padW,
+ int dilationH, int dilationW) {
+
+ THNN_ARGCHECK(weight->nDimension == 4, 4, weight,
+ "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
+ "but got: %s");
+ THArgCheck(kW > 0 && kH > 0, 9,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+ THArgCheck(dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+ THArgCheck(dilationW > 0 && dilationH > 0, 15,
+ "dilation should be greater than zero, but got dilationH: %d, dilationW: %d",
+ dilationH, dilationW);
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
+ }
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+
+ long nInputPlane = weight->size[1];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long nOutputPlane = weight->size[0];
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+ if (outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%ld x %ld x %ld). "
+ "Calculated output size: (%ld x %ld x %ld). Output size is too small",
+ nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+void THNN_(SpatialDilatedConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH)
+{
+
+ THNN_(SpatialDilatedConvolution_shapeCheck)
+ (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW,
+ dilationH, dilationW);
+
+ // Params:
+ int nInputPlane = weight->size[1];
+ int nOutputPlane = weight->size[0];
+
+ input = THTensor_(newContiguous)(input);
+ weight = THTensor_(newContiguous)(weight);
+ bias = bias ? THTensor_(newContiguous)(bias) : bias;
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ }
+ long inputWidth = input->size[3];
+ long inputHeight = input->size[2];
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(zero)(output);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth);
+
+ // Define a buffer of ones, for bias accumulation
+ // Note: this buffer can be shared with other modules, it only ever gets increased,
+ // and always contains ones.
+ if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize2d)(ones, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *output_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(output_n, output, 0, elt);
+
+ // Do Bias first:
+ // M,N,K are dims of matrix A and B
+ long m_ = nOutputPlane;
+ long n_ = outputHeight * outputWidth;
+ long k_ = 1;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ if (bias) {
+ THBlas_(gemm)(
+ 't', 'n',
+ n_, m_, k_,
+ 1,
+ THTensor_(data)(ones), k_,
+ THTensor_(data)(bias), k_,
+ 0,
+ THTensor_(data)(output_n), n_
+ );
+ } else {
+ THTensor_(zero)(output_n);
+ }
+
+ // Extract columns:
+ THNN_(im2col)(
+ THTensor_(data)(input_n),
+ nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+ dilationH, dilationW,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ long m = nOutputPlane;
+ long n = columns->size[1];
+ long k = nInputPlane*kH*kW;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 'n',
+ n, m, k,
+ 1,
+ THTensor_(data)(columns), n,
+ THTensor_(data)(weight), k,
+ 1,
+ THTensor_(data)(output_n), n
+ );
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(output_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(weight);
+ if (bias) THTensor_(free)(bias);
+}
+
+void THNN_(SpatialDilatedConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH)
+{
+ THNN_(SpatialDilatedConvolution_shapeCheck)
+ (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW,
+ dilationH, dilationW);
+
+ // Params
+ int nInputPlane = weight->size[1];
+ int nOutputPlane = weight->size[0];
+
+ input = THTensor_(newContiguous)(input);
+ weight = THTensor_(newContiguous)(weight);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1],
+ gradOutput->size[2]);
+ }
+
+ long inputWidth = input->size[3];
+ long inputHeight = input->size[2];
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(gradColumns, nInputPlane*kW*kH, outputHeight*outputWidth);
+ THTensor_(zero)(gradColumns);
+
+ // Helpers
+ THTensor *gradInput_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per sample:
+ THTensor_(select)(gradInput_n, gradInput, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // M,N,K are dims of matrix A and B
+ long m = nInputPlane*kW*kH;
+ long n = gradColumns->size[1];
+ long k = nOutputPlane;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 't',
+ n, m, k,
+ 1,
+ THTensor_(data)(gradOutput_n), n,
+ THTensor_(data)(weight), m,
+ 0,
+ THTensor_(data)(gradColumns), n
+ );
+
+ // Unpack columns back into input:
+ THNN_(col2im)(
+ THTensor_(data)(gradColumns),
+ nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+ dilationH, dilationW,
+ THTensor_(data)(gradInput_n)
+ );
+ }
+
+ // Free
+ THTensor_(free)(gradInput_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+}
+
+
+void THNN_(SpatialDilatedConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THNN_(SpatialDilatedConvolution_shapeCheck)
+ (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW,
+ dilationH, dilationW);
+
+ // Params
+ int nInputPlane = gradWeight->size[1];
+ int nOutputPlane = gradWeight->size[0];
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
+ if (gradBias)
+ THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0],
+ gradOutput->size[1], gradOutput->size[2]);
+ }
+
+ long inputWidth = input->size[3];
+ long inputHeight = input->size[2];
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Define a buffer of ones, for bias accumulation
+ if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize2d)(ones, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth);
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(im2col)(
+ THTensor_(data)(input_n),
+ nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+ dilationH, dilationW,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ long m = nOutputPlane;
+ long n = nInputPlane*kW*kH;
+ long k = columns->size[1];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 't', 'n',
+ n, m, k,
+ scale,
+ THTensor_(data)(columns), k,
+ THTensor_(data)(gradOutput_n), k,
+ 1,
+ THTensor_(data)(gradWeight), n
+ );
+
+ // Do Bias:
+ // M,N,K are dims of matrix A and B
+ long m_ = nOutputPlane;
+ long k_ = outputHeight * outputWidth;
+
+ // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
+ if (gradBias) {
+ THBlas_(gemv)(
+ 't',
+ k_, m_,
+ scale,
+ THTensor_(data)(gradOutput_n), k_,
+ THTensor_(data)(ones), 1,
+ 1,
+ THTensor_(data)(gradBias), 1
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize
+ if (batch == 0) {
+ THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedMaxPooling.c
new file mode 100644
index 000000000..8f4ad13c3
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialDilatedMaxPooling.c
@@ -0,0 +1,401 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialDilatedMaxPooling.c"
+#else
+
+static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)(
+ THTensor *input, THTensor *gradOutput, THIndexTensor *indices,
+ int kH, int kW, int dH, int dW, int padH, int padW,
+ int dilationH, int dilationW, bool ceil_mode) {
+
+ THArgCheck(kW > 0 && kH > 0, 5,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+ THArgCheck(dW > 0 && dH > 0, 8,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+ THArgCheck(dilationH > 0 && dilationW > 0, 12,
+ "dilation should be greater than zero, but got dilationH: %d dilationW: %d",
+ dilationH, dilationW);
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+
+ THArgCheck(kW/2 >= padW && kH/2 >= padH, 2,
+ "pad should be smaller than half of kernel size, but got "
+ "padW = %d, padH = %d, kW = %d, kH = %d",
+ padW, padH, kW, kH);
+
+ long nInputPlane = input->size[dimh-1];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long outputHeight, outputWidth;
+ long nOutputPlane = nInputPlane;
+
+ if (ceil_mode)
+ {
+ outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
+ outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
+ }
+ else
+ {
+ outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
+ outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
+ }
+
+ if (padW || padH)
+ {
+ // ensure that the last pooling starts inside the image
+ // needed to avoid problems in ceil mode
+ if ((outputHeight - 1)*dH >= inputHeight + padH)
+ --outputHeight;
+ if ((outputWidth - 1)*dW >= inputWidth + padW)
+ --outputWidth;
+ }
+
+ if (outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%dx%dx%d). "
+ "Calculated output size: (%dx%dx%d). Output size is too small",
+ nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+ if (indices != NULL) {
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth);
+ }
+}
+
+static void THNN_(SpatialDilatedMaxPooling_updateOutput_frame)(
+ real *input_p,
+ real *output_p,
+ THIndex_t *ind_p,
+ long nslices,
+ long iwidth,
+ long iheight,
+ long owidth,
+ long oheight,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ int dilationW,
+ int dilationH
+ )
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ /* loop over output */
+ long i, j;
+ real *ip = input_p + k*iwidth*iheight;
+ for(i = 0; i < oheight; i++)
+ {
+ for(j = 0; j < owidth; j++)
+ {
+ long hstart = i * dH - padH;
+ long wstart = j * dW - padW;
+ long hend = fminf(hstart + (kH - 1) * dilationH + 1, iheight);
+ long wend = fminf(wstart + (kW - 1) * dilationW + 1, iwidth);
+ while(hstart < 0)
+ hstart += dilationH;
+ while(wstart < 0)
+ wstart += dilationW;
+
+ /* local pointers */
+ real *op = output_p + k*owidth*oheight + i*owidth + j;
+ THIndex_t *indp = ind_p + k*owidth*oheight + i*owidth + j;
+
+ /* compute local max: */
+ long maxindex = -1;
+ real maxval = -THInf;
+ long tcntr = 0;
+ long x,y;
+ for(y = hstart; y < hend; y += dilationH)
+ {
+ for(x = wstart; x < wend; x += dilationW)
+ {
+ tcntr = y*iwidth + x;
+ real val = *(ip + tcntr);
+ if (val > maxval)
+ {
+ maxval = val;
+ maxindex = tcntr;
+ }
+ }
+ }
+
+ /* set output to local max */
+ *op = maxval;
+
+ /* store location of max */
+ *indp = maxindex + TH_INDEX_BASE;
+ }
+ }
+ }
+}
+
+void THNN_(SpatialDilatedMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ int dilationW,
+ int dilationH,
+ bool ceil_mode)
+{
+
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ long nInputPlane;
+ long inputHeight;
+ long inputWidth;
+ long outputHeight;
+ long outputWidth;
+ real *input_data;
+ real *output_data;
+ THIndex_t *indices_data;
+
+ THNN_(SpatialDilatedMaxPooling_shapeCheck)
+ (input, NULL, NULL, kH, kW, dH, dW,
+ padH, padW, dilationH, dilationW, ceil_mode);
+
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nInputPlane = input->size[dimh-1];
+ inputHeight = input->size[dimh];
+ inputWidth = input->size[dimw];
+ if (ceil_mode)
+ {
+ outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
+ outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
+ }
+ else
+ {
+ outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
+ outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
+ }
+
+ if (padW || padH)
+ {
+ // ensure that the last pooling starts inside the image
+ // needed to avoid problems in ceil mode
+ if ((outputHeight - 1)*dH >= inputHeight + padH)
+ --outputHeight;
+ if ((outputWidth - 1)*dW >= inputWidth + padW)
+ --outputWidth;
+ }
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ /* resize output */
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
+ /* indices will contain the locations for each output point */
+ THIndexTensor_(resize3d)(indices, nInputPlane, outputHeight, outputWidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ THNN_(SpatialDilatedMaxPooling_updateOutput_frame)
+ (input_data, output_data,
+ indices_data,
+ nInputPlane,
+ inputWidth, inputHeight,
+ outputWidth, outputHeight,
+ kW, kH, dW, dH,
+ padW, padH,
+ dilationW, dilationH
+ );
+ }
+ else
+ {
+ long p;
+
+ THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth);
+ /* indices will contain the locations for each output point */
+ THIndexTensor_(resize4d)(indices, nbatch, nInputPlane, outputHeight, outputWidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialDilatedMaxPooling_updateOutput_frame)
+ (input_data+p*nInputPlane*inputWidth*inputHeight,
+ output_data+p*nInputPlane*outputWidth*outputHeight,
+ indices_data+p*nInputPlane*outputWidth*outputHeight,
+ nInputPlane,
+ inputWidth, inputHeight,
+ outputWidth, outputHeight,
+ kW, kH, dW, dH,
+ padW, padH,
+ dilationW, dilationH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)(
+ real *gradInput_p,
+ real *gradOutput_p,
+ THIndex_t *ind_p,
+ long nInputPlane,
+ long inputWidth,
+ long inputHeight,
+ long outputWidth,
+ long outputHeight,
+ int dW,
+ int dH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nInputPlane; k++)
+ {
+ real *gradInput_p_k = gradInput_p + k*inputWidth*inputHeight;
+ real *gradOutput_p_k = gradOutput_p + k*outputWidth*outputHeight;
+ THIndex_t *ind_p_k = ind_p + k*outputWidth*outputHeight;
+
+ /* calculate max points */
+ long i, j;
+ for(i = 0; i < outputHeight; i++)
+ {
+ for(j = 0; j < outputWidth; j++)
+ {
+ /* retrieve position of max */
+ long maxp = ind_p_k[i*outputWidth + j] - TH_INDEX_BASE;
+ if (maxp != -1) {
+ /* update gradient */
+ gradInput_p_k[maxp] += gradOutput_p_k[i*outputWidth + j];
+ }
+ }
+ }
+ }
+}
+
+void THNN_(SpatialDilatedMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ int dilationW,
+ int dilationH,
+ bool ceil_mode)
+{
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ int nInputPlane;
+ int inputHeight;
+ int inputWidth;
+ int outputHeight;
+ int outputWidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+ THIndex_t *indices_data;
+
+ THNN_(SpatialDilatedMaxPooling_shapeCheck)
+ (input, gradOutput, indices, kH, kW, dH, dW,
+ padH, padW, dilationH, dilationW, ceil_mode);
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 4) {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nInputPlane = input->size[dimh-1];
+ inputHeight = input->size[dimh];
+ inputWidth = input->size[dimw];
+ outputHeight = gradOutput->size[dimh];
+ outputWidth = gradOutput->size[dimw];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THIndexTensor_(data)(indices);
+
+ /* backprop */
+ if (input->nDimension == 3)
+ {
+ THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)
+ (gradInput_data, gradOutput_data,
+ indices_data,
+ nInputPlane,
+ inputWidth, inputHeight,
+ outputWidth, outputHeight,
+ dW, dH);
+ }
+ else
+ {
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)
+ (gradInput_data+p*nInputPlane*inputWidth*inputHeight,
+ gradOutput_data+p*nInputPlane*outputWidth*outputHeight,
+ indices_data+p*nInputPlane*outputWidth*outputHeight,
+ nInputPlane,
+ inputWidth, inputHeight,
+ outputWidth, outputHeight,
+ dW, dH);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialFractionalMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFractionalMaxPooling.c
new file mode 100644
index 000000000..a98954cc6
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFractionalMaxPooling.c
@@ -0,0 +1,253 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialFractionalMaxPooling.c"
+#else
+
+static long* THNN_(SpatialFractionalMaxPooling_generateIntervals)(
+ real sample,
+ long inputSize,
+ long outputSize,
+ int poolSize) {
+ real alpha = (real) (inputSize - poolSize) / (real) (outputSize - 1);
+ long* sequence = (long*) THAlloc(sizeof(long) * outputSize);
+
+ long i;
+ for (i = 0; i < outputSize - 1; ++i) {
+ sequence[i] =
+ (long) ((i + sample) * alpha) - (long) (sample * alpha);
+ }
+ sequence[outputSize - 1] = inputSize - poolSize;
+
+ return sequence;
+}
+
+static void THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
+ real* input,
+ real* output,
+ THIndex_t* indices,
+ real* randomSamples,
+ long numPlanes,
+ long inputW, long inputH,
+ long outputW, long outputH,
+ int poolSizeW, int poolSizeH) {
+ long plane;
+#pragma omp parallel for private(plane)
+ for (plane = 0; plane < numPlanes; ++plane) {
+ /* each plane contains 2 random samples, one for W and one for H */
+ real* randomSamplesForPlane = randomSamples + plane * 2;
+
+ /* Generate interval sequence */
+ long* sequenceW =
+ THNN_(SpatialFractionalMaxPooling_generateIntervals)(
+ randomSamplesForPlane[0], inputW, outputW, poolSizeW);
+ long* sequenceH =
+ THNN_(SpatialFractionalMaxPooling_generateIntervals)(
+ randomSamplesForPlane[1], inputH, outputH, poolSizeH);
+
+ /* loop over output */
+ long h, w;
+
+ real* inputForPlane = input + plane * inputW * inputH;
+ real* outputForPlane = output + plane * outputW * outputH;
+ THIndex_t* indicesForPlane = indices + plane * outputW * outputH;
+
+ for (h = 0; h < outputH; ++h) {
+ long inputHStart = sequenceH[h];
+
+ for (w = 0; w < outputW; ++w) {
+ long inputWStart = sequenceW[w];
+
+ real maxVal = -THInf;
+ long maxIndex = -1;
+
+ long h2, w2;
+ for (h2 = inputHStart; h2 < inputHStart + poolSizeH; ++h2) {
+ for (w2 = inputWStart; w2 < inputWStart + poolSizeW; ++w2) {
+ THAssert(h2 >= 0 && h2 < inputH);
+ THAssert(w2 >= 0 && w2 < inputW);
+
+ long planeIndex = h2 * inputW + w2;
+ real val = inputForPlane[planeIndex];
+ if (val > maxVal) {
+ maxVal = val;
+ maxIndex = planeIndex;
+ }
+ }
+ }
+
+ THAssert(maxVal != -THInf);
+ THAssert(maxIndex != -1);
+
+ outputForPlane[h * outputW + w] = maxVal;
+ /* +1 to lua index */
+ indicesForPlane[h * outputW + w] = maxIndex + TH_INDEX_BASE;
+ }
+ }
+
+ THFree(sequenceW);
+ THFree(sequenceH);
+ }
+}
+
+void THNN_(SpatialFractionalMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int outputW, int outputH,
+ int poolSizeW, int poolSizeH,
+ THIndexTensor *indices,
+ THTensor *randomSamples) {
+
+ long numBatch = 1;
+ int planeDim = 0;
+ int heightDim = 1;
+ int widthDim = 2;
+
+ long numInputDims = THTensor_(nDimension)(input);
+ THNN_ARGCHECK(numInputDims == 3 || numInputDims == 4, 2, input,
+ "3D or 4D (batch mode) tensor expected for input, but got: %s");
+
+ if (numInputDims == 4) {
+ numBatch = THTensor_(size)(input, 0);
+ planeDim++;
+ heightDim++;
+ widthDim++;
+ }
+
+ /* sizes */
+ long numPlanes = THTensor_(size)(input, planeDim);
+ long inputH = THTensor_(size)(input, heightDim);
+ long inputW = THTensor_(size)(input, widthDim);
+
+ THArgCheck(outputH + poolSizeH - 1 < inputH, 7,
+ "poolSizeH (%d) too large relative to input height (%d)",
+ poolSizeH, inputH);
+ THArgCheck(outputW + poolSizeW - 1 < inputW, 6,
+ "poolSizeW (%d) too large relative to input width (%d)",
+ poolSizeW, inputW);
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ if (numInputDims == 3) {
+ /* resize output */
+ THTensor_(resize3d)(output, numPlanes, outputH, outputW);
+ /* indices will contain the locations for each output point */
+ THIndexTensor_(resize3d)(indices, numPlanes, outputH, outputW);
+
+ THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
+ THTensor_(data)(input),
+ THTensor_(data)(output),
+ THIndexTensor_(data)(indices),
+ THTensor_(data)(randomSamples),
+ numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH);
+ } else {
+ THTensor_(resize4d)(output, numBatch, numPlanes, outputH, outputW);
+ /* indices will contain the locations for each output point */
+ THIndexTensor_(resize4d)(indices, numBatch, numPlanes, outputH, outputW);
+
+ long batch;
+#pragma omp parallel for private(batch)
+ for (batch = 0; batch < numBatch; ++batch) {
+ THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
+ THTensor_(data)(input) + batch * numPlanes * inputH * inputW,
+ THTensor_(data)(output) + batch * numPlanes * outputH * outputW,
+ THIndexTensor_(data)(indices) + batch * numPlanes * outputH * outputW,
+ THTensor_(data)(randomSamples) + batch * numPlanes * 2,
+ numPlanes, inputW, inputH, outputW, outputH, poolSizeW, poolSizeH);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
+ real* gradInput,
+ real* gradOutput,
+ THIndex_t* indices,
+ long numPlanes,
+ long inputW, long inputH,
+ long outputW, long outputH) {
+ long plane;
+#pragma omp parallel for private(plane)
+ for (plane = 0; plane < numPlanes; plane++) {
+ real* gradInputForPlane = gradInput + plane * inputW * inputH;
+ real* gradOutputForPlane = gradOutput + plane * outputW * outputH;
+ THIndex_t* indicesForPlane = indices + plane * outputW * outputH;
+
+ long h, w;
+ for (h = 0; h < outputH; ++h) {
+ for (w = 0; w < outputW; ++w) {
+ long outputIndex = h * outputW + w;
+ long index = indicesForPlane[outputIndex] - TH_INDEX_BASE;
+ THAssert(index >= 0 && index < inputW * inputH);
+
+ gradInputForPlane[index] += gradOutputForPlane[outputIndex];
+ }
+ }
+ }
+}
+
+void THNN_(SpatialFractionalMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int outputW, int outputH,
+ int poolSizeW, int poolSizeH,
+ THIndexTensor *indices) {
+
+ long numBatch = 1;
+ int planeDim = 0;
+ int heightDim = 1;
+ int widthDim = 2;
+
+ long numInputDims = THTensor_(nDimension)(input);
+ if (numInputDims == 4) {
+ numBatch = THTensor_(size)(input, 0);
+ planeDim = 1;
+ heightDim++;
+ widthDim++;
+ }
+
+ /* sizes */
+ long numPlanes = THTensor_(size)(input, planeDim);
+ long inputH = THTensor_(size)(input, heightDim);
+ long inputW = THTensor_(size)(input, widthDim);
+
+ THArgCheck(outputW == THTensor_(size)(gradOutput, widthDim), 3,
+ "gradOutput width unexpected");
+ THArgCheck(outputH == THTensor_(size)(gradOutput, heightDim), 3,
+ "gradOutput height unexpected");
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* backprop */
+ if (numInputDims == 3) {
+ THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
+ THTensor_(data)(gradInput),
+ THTensor_(data)(gradOutput),
+ THIndexTensor_(data)(indices),
+ numPlanes, inputW, inputH, outputW, outputH);
+ } else {
+ long batch;
+#pragma omp parallel for private(batch)
+ for (batch = 0; batch < numBatch; ++batch) {
+ THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
+ THTensor_(data)(gradInput) + batch * numPlanes * inputH * inputW,
+ THTensor_(data)(gradOutput) + batch * numPlanes * outputH * outputW,
+ THIndexTensor_(data)(indices) + batch * numPlanes * outputH * outputW,
+ numPlanes, inputW, inputH, outputW, outputH);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c
new file mode 100644
index 000000000..2edc53b5a
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolution.c
@@ -0,0 +1,462 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialFullConvolution.c"
+#else
+
+static void THNN_(im2col)(const real* data_im, const int channels,
+ const int height, const int width, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ real* data_col) {
+ const int height_col = (height + 2 * pad_h -
+ (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ const int width_col = (width + 2 * pad_w -
+ (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+ const int channels_col = channels * kernel_h * kernel_w;
+ for (int c_col = 0; c_col < channels_col; ++c_col) {
+ int w_offset = c_col % kernel_w;
+ int h_offset = (c_col / kernel_w) % kernel_h;
+ int c_im = c_col / kernel_h / kernel_w;
+ for (int h_col = 0; h_col < height_col; ++h_col) {
+ for (int w_col = 0; w_col < width_col; ++w_col) {
+ int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
+ int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
+ data_col[(c_col * height_col + h_col) * width_col + w_col] =
+ (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
+ data_im[(c_im * height + h_im) * width + w_im] : 0;
+ }
+ }
+ }
+}
+
+static void THNN_(col2im)(const real* data_col, const int channels,
+ const int height, const int width, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ real* data_im) {
+ memset(data_im, 0, sizeof(real) * height * width * channels);
+ const int height_col = (height + 2 * pad_h -
+ (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ const int width_col = (width + 2 * pad_w -
+ (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+ const int channels_col = channels * kernel_h * kernel_w;
+ for (int c_col = 0; c_col < channels_col; ++c_col) {
+ int w_offset = c_col % kernel_w;
+ int h_offset = (c_col / kernel_w) % kernel_h;
+ int c_im = c_col / kernel_h / kernel_w;
+ for (int h_col = 0; h_col < height_col; ++h_col) {
+ for (int w_col = 0; w_col < width_col; ++w_col) {
+ int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
+ int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
+ if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
+ data_im[(c_im * height + h_im) * width + w_im] +=
+ data_col[(c_col * height_col + h_col) * width_col + w_col];
+ }
+ }
+ }
+}
+
+static inline void THNN_(SpatialFullConvolution_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ THTensor *weight, THTensor *bias,
+ int kH, int kW, int dH, int dW, int padH, int padW, int adjH, int adjW) {
+
+ THArgCheck(kW > 0 && kH > 0, 9,
+ "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
+ THArgCheck(dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+ THArgCheck(adjW < dW && adjH < dH, 15,
+ "output adjustment must be smaller than stride, but got adjH: %d adjW: %d dH: %d dW: %d",
+ adjH, adjW, dH, dW);
+ THNN_ARGCHECK(weight->nDimension == 2 || weight->nDimension == 4, 5, weight,
+ "2D or 4D weight tensor expected, but got: %s");
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]);
+ }
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimh = 1;
+ int dimw = 2;
+
+ if (ndim == 4) {
+ dimf++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+
+ long nInputPlane = weight->size[0];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long nOutputPlane = weight->size[1];
+ long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
+ long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
+
+ if (outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%d x %d x %d). "
+ "Calculated output size: (%d x %d x %d). Output size is too small",
+ nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+void THNN_(SpatialFullConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH)
+{
+ THNN_(SpatialFullConvolution_shapeCheck)
+ (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW, adjH, adjW);
+
+ int nInputPlane = THTensor_(size)(weight,0);
+ int nOutputPlane = THTensor_(size)(weight,1);
+
+ input = THTensor_(newContiguous)(input);
+ weight = THTensor_(newContiguous)(weight);
+ bias = bias ? THTensor_(newContiguous)(bias) : bias;
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ }
+
+ long inputHeight = input->size[2];
+ long inputWidth = input->size[3];
+ long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
+ long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
+ THTensor_(zero)(columns);
+
+ // Define a buffer of ones, for bias accumulation
+ // Note: this buffer can be shared with other modules, it only ever gets increased,
+ // and always contains ones.
+ if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize2d)(ones, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *output_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(output_n, output, 0, elt);
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long m = weight->size[1] * weight->size[2] * weight->size[3];
+ long n = columns->size[1];
+ long k = weight->size[0];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 't',
+ n, m, k,
+ 1,
+ THTensor_(data)(input_n), n,
+ THTensor_(data)(weight), m,
+ 0,
+ THTensor_(data)(columns), n
+ );
+
+ // Unpack columns back into input:
+ THNN_(col2im)(
+ THTensor_(data)(columns),
+ nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
+ 1, 1,
+ THTensor_(data)(output_n)
+ );
+
+ // Do Bias after:
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long m_ = nOutputPlane;
+ long n_ = outputHeight * outputWidth;
+ long k_ = 1;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ if (bias) {
+ THBlas_(gemm)(
+ 't', 'n',
+ n_, m_, k_,
+ 1,
+ THTensor_(data)(ones), k_,
+ THTensor_(data)(bias), k_,
+ 1,
+ THTensor_(data)(output_n), n_
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(output_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(weight);
+ if (bias) THTensor_(free)(bias);
+}
+
+void THNN_(SpatialFullConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH)
+{
+ THNN_(SpatialFullConvolution_shapeCheck)
+ (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, adjH, adjW);
+
+ int nInputPlane = THTensor_(size)(weight,0);
+ int nOutputPlane = THTensor_(size)(weight,1);
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ weight = THTensor_(newContiguous)(weight);
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
+ }
+
+ long inputWidth = input->size[3];
+ long inputHeight = input->size[2];
+ long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
+ long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
+ THTensor_(zero)(gradInput);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth);
+
+ // Helpers
+ THTensor *gradInput_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per sample:
+ THTensor_(select)(gradInput_n, gradInput, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(im2col)(
+ THTensor_(data)(gradOutput_n),
+ nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
+ 1, 1,
+ THTensor_(data)(gradColumns)
+ );
+
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long m = weight->size[0];
+ long n = gradColumns->size[1];
+ long k = weight->size[1] * weight->size[2] * weight->size[3];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 'n',
+ n, m, k,
+ 1,
+ THTensor_(data)(gradColumns), n,
+ THTensor_(data)(weight), k,
+ 0,
+ THTensor_(data)(gradInput_n), n
+ );
+ }
+
+
+ // Free
+ THTensor_(free)(gradInput_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+}
+
+
+void THNN_(SpatialFullConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THNN_(SpatialFullConvolution_shapeCheck)
+ (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, adjH, adjW);
+
+ int nInputPlane = THTensor_(size)(gradWeight,0);
+ int nOutputPlane = THTensor_(size)(gradWeight,1);
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
+ if (gradBias)
+ THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
+ int batch = 1;
+ if (input->nDimension == 3) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
+ THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
+ }
+
+ long inputWidth = input->size[3];
+ long inputHeight = input->size[2];
+ long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW;
+ long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Define a buffer of ones, for bias accumulation
+ if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize2d)(ones, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(im2col)(
+ THTensor_(data)(gradOutput_n),
+ nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
+ 1, 1,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long n = columns->size[0]; // nOutputPlane * kh * kw
+ long m = input_n->size[0]; // nInputPlane
+ long k = columns->size[1]; // inputHeight * inputWidth
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 't', 'n',
+ n, m, k,
+ scale,
+ THTensor_(data)(columns), k,
+ THTensor_(data)(input_n), k,
+ 1,
+ THTensor_(data)(gradWeight), n
+ );
+
+
+ // Do Bias:
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ long m_ = nOutputPlane;
+ long k_ = outputHeight * outputWidth;
+
+ // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
+ if (gradBias) {
+ THBlas_(gemv)(
+ 't',
+ k_, m_,
+ scale,
+ THTensor_(data)(gradOutput_n), k_,
+ THTensor_(data)(ones), 1,
+ 1,
+ THTensor_(data)(gradBias), 1
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize
+ if (batch == 0) {
+ THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolutionMap.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolutionMap.c
new file mode 100644
index 000000000..6952fbe25
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialFullConvolutionMap.c
@@ -0,0 +1,222 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialFullConvolutionMap.c"
+#else
+
+void THNN_(SpatialFullConvolutionMap_updateOutput)(
+ THNNState *state, THTensor *input, THTensor *output_, THTensor *weight, THTensor *bias,
+ THTensor *connTable, int nInputPlane, int nOutputPlane,
+ int dW, int dH)
+{
+ THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
+ THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous");
+ THArgCheck(
+ weight != NULL && weight->nDimension == 3
+ && connTable != NULL && connTable->size[0] == weight->size[0], 4,
+ "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
+ );
+
+ const int kH = (int)weight->size[1];
+ const int kW = (int)weight->size[2];
+
+ THArgCheck(input != NULL && input->nDimension == 3, 2, "3D tensor expected");
+ THArgCheck(input->size[0] >= nInputPlane, 2, "invalid number of input planes");
+
+ THTensor_(resize3d)(
+ output_, nOutputPlane,
+ (input->size[1] - 1) * dH + kH,
+ (input->size[2] - 1) * dW + kW
+ );
+
+ /* contiguous */
+ input = THTensor_(newContiguous)(input);
+ THTensor* output = THTensor_(newContiguous)(output_);
+
+ /* get raw pointers */
+ real *input_data = THTensor_(data)(input);
+ real *output_data = THTensor_(data)(output);
+ real *weight_data = THTensor_(data)(weight);
+ real *bias_data = THTensor_(data)(bias);
+ real *connTable_data = THTensor_(data)(connTable);
+
+ /* and dims */
+ const long input_h = input->size[1];
+ const long input_w = input->size[2];
+ const long output_h = output->size[1];
+ const long output_w = output->size[2];
+ const long weight_h = weight->size[1];
+ const long weight_w = weight->size[2];
+
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nOutputPlane; p++)
+ {
+ /* add bias */
+ real *ptr_output = output_data + p*output_w*output_h;
+ long j;
+ int nweight;
+ long k;
+
+ for (j = 0; j < output_h*output_w; j++)
+ ptr_output[j] = bias_data[p];
+
+ /* convolve all maps */
+ nweight = connTable->size[0];
+ for (k = 0; k < nweight; k++)
+ {
+ /* get offsets for input/output */
+ int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
+ int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
+
+ if (o == p)
+ {
+ THTensor_(fullConv2Dptr)(
+ output_data + o*output_w*output_h,
+ 1.0,
+ input_data + i*input_w*input_h, input_h, input_w,
+ weight_data + k*weight_w*weight_h, weight_h, weight_w,
+ dH, dW
+ );
+ }
+ }
+ }
+
+ /* clean up */
+ THTensor_(free)(input);
+ THTensor_(freeCopyTo)(output, output_);
+}
+
+void THNN_(SpatialFullConvolutionMap_updateGradInput)(
+ THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput_, THTensor *weight, THTensor *bias,
+ THTensor *connTable, int nInputPlane, int nOutputPlane,
+ int dW, int dH)
+{
+ THArgCheck(
+ weight != NULL && weight->nDimension == 3
+ && connTable != NULL && connTable->size[0] == weight->size[0], 5,
+ "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
+ );
+
+ /* contiguous */
+ THTensor* gradInput = THTensor_(newContiguous)(gradInput_);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* Resize/Zero */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* get raw pointers */
+ real *gradInput_data = THTensor_(data)(gradInput);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *weight_data = THTensor_(data)(weight);
+ real *connTable_data = THTensor_(data)(connTable);
+
+ /* and dims */
+ const long input_h = input->size[1];
+ const long input_w = input->size[2];
+ const long output_h = gradOutput->size[1];
+ const long output_w = gradOutput->size[2];
+ const long kH = weight->size[1];
+ const long kW = weight->size[2];
+
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nInputPlane; p++)
+ {
+ long k;
+ /* backward all */
+ int nkernel = connTable->size[0];
+ for (k = 0; k < nkernel; k++)
+ {
+ int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
+ int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
+ if (i == p)
+ {
+ /* gradient to input */
+ THTensor_(validXCorr2Dptr)(
+ gradInput_data + i*input_w*input_h,
+ 1.0,
+ gradOutput_data + o*output_w*output_h, output_h, output_w,
+ weight_data + k*kW*kH, kH, kW,
+ dH, dW
+ );
+ }
+ }
+ }
+
+ /* clean up */
+ THTensor_(freeCopyTo)(gradInput, gradInput_);
+ THTensor_(free)(gradOutput);
+}
+
+void THNN_(SpatialFullConvolutionMap_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *connTable,
+ int nInputPlane,
+ int nOutputPlane,
+ int dW, int dH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THArgCheck(
+ gradWeight != NULL && gradWeight->nDimension == 3
+ && connTable != NULL && connTable->size[0] == gradWeight->size[0], 5,
+ "3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
+ );
+
+ /* contiguous */
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* get raw pointers */
+ real *input_data = THTensor_(data)(input);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *gradWeight_data = THTensor_(data)(gradWeight);
+ real *gradBias_data = THTensor_(data)(gradBias);
+
+ /* and dims */
+ const long input_h = input->size[1];
+ const long input_w = input->size[2];
+ const long output_h = gradOutput->size[1];
+ const long output_w = gradOutput->size[2];
+ const long weight_h = gradWeight->size[1];
+ const long weight_w = gradWeight->size[2];
+
+ /* gradients wrt bias */
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nOutputPlane; k++)
+ {
+ real *ptr_gradOutput = gradOutput_data + k*output_w*output_h;
+ long l;
+ for (l = 0; l < output_h*output_w; l++)
+ gradBias_data[k] += scale*ptr_gradOutput[l];
+ }
+
+ /* gradients wrt weight */
+ int nkernel = connTable->size[0];
+#pragma omp parallel for private(k)
+ for (k = 0; k < nkernel; k++)
+ {
+ int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
+ int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;
+
+ /* gradient to kernel */
+ THTensor_(validXCorr2DRevptr)(
+ gradWeight_data + k*weight_w*weight_h,
+ scale,
+ gradOutput_data + o*output_w*output_h, output_h, output_w,
+ input_data + i*input_w*input_h, input_h, input_w,
+ dH, dW
+ );
+ }
+
+ /* clean up */
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxPooling.c
new file mode 100644
index 000000000..88aaa40e1
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxPooling.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialMaxPooling.c"
+#else
+
+void THNN_(SpatialMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ bool ceil_mode)
+{
+ THNN_(SpatialDilatedMaxPooling_updateOutput)(
+ state, input, output, indices,
+ kW, kH, dW, dH, padW, padH, 1, 1, ceil_mode
+ );
+}
+
+void THNN_(SpatialMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ bool ceil_mode)
+{
+ THNN_(SpatialDilatedMaxPooling_updateGradInput)(
+ state, input, gradOutput, gradInput, indices,
+ kW, kH, dW, dH, padW, padH, 1, 1, ceil_mode
+ );
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxUnpooling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxUnpooling.c
new file mode 100644
index 000000000..320538686
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialMaxUnpooling.c
@@ -0,0 +1,234 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialMaxUnpooling.c"
+#else
+
+static void THNN_(SpatialMaxUnpooling_updateOutput_frame)(real *input_p, real *output_p,
+ THIndex_t *ind_p,
+ int nslices,
+ int iwidth, int iheight,
+ int owidth, int oheight)
+{
+ int k;
+ int has_error = 0;
+ THIndex_t error_index;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ real *output_p_k = output_p + k*owidth*oheight;
+ real *input_p_k = input_p + k*iwidth*iheight;
+ THIndex_t *ind_p_k = ind_p + k*iwidth*iheight;
+
+ int i, j;
+ THIndex_t maxp;
+ for(i = 0; i < iheight; i++)
+ {
+ for(j = 0; j < iwidth; j++)
+ {
+ maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
+ if(maxp<0 || maxp>=owidth*oheight){
+#pragma omp critical
+ {
+ has_error = 1;
+ error_index = maxp;
+ }
+ } else {
+ output_p_k[maxp] = input_p_k[i*iwidth + j]; /* update output */
+ }
+ }
+ }
+ }
+ if (has_error) {
+ THError("found an invalid max index %ld (output volumes are of size %dx%d)",
+ error_index, oheight, owidth);
+ }
+}
+
+void THNN_(SpatialMaxUnpooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int owidth, int oheight)
+{
+ int dimw = 2;
+ int dimh = 1;
+ int nbatch = 1;
+ int nslices;
+ int iheight;
+ int iwidth;
+ real *input_data;
+ real *output_data;
+ THIndex_t *indices_data;
+
+
+ THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
+ "3D or 4D (batch mode) tensor expected for input, but got: %s");
+ THNN_CHECK_SHAPE_INDICES(input, indices);
+
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimh-1];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+
+ /* get contiguous input and indices */
+ input = THTensor_(newContiguous)(input);
+ indices = THIndexTensor_(newContiguous)(indices);
+
+ /* resize output */
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nslices, oheight, owidth);
+ THTensor_(zero)(output);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data,
+ indices_data,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight);
+ }
+ else
+ {
+ int p;
+
+ THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
+ THTensor_(zero)(output);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialMaxUnpooling_updateOutput_frame)(
+ input_data+p*nslices*iwidth*iheight,
+ output_data+p*nslices*owidth*oheight,
+ indices_data+p*nslices*iwidth*iheight,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+ THIndexTensor_(free)(indices);
+}
+
+static void THNN_(SpatialMaxUnpooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
+ THIndex_t *ind_p,
+ int nslices,
+ int iwidth, int iheight,
+ int owidth, int oheight)
+{
+ int k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
+ real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
+ THIndex_t *ind_p_k = ind_p + k*iwidth*iheight;
+
+ int i, j;
+ THIndex_t maxp;
+ for(i = 0; i < iheight; i++)
+ {
+ for(j = 0; j < iwidth; j++)
+ {
+ maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
+ if(maxp < 0 || maxp >= owidth * oheight) {
+ THError("invalid max index %ld, owidth= %d, oheight= %d", maxp, owidth, oheight);
+ }
+ gradInput_p_k[i*iwidth + j] = gradOutput_p_k[maxp]; /* update gradient */
+ }
+ }
+ }
+}
+
+void THNN_(SpatialMaxUnpooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int owidth, int oheight)
+{
+ int dimw = 2;
+ int dimh = 1;
+ int nbatch = 1;
+ int nslices;
+ int iheight;
+ int iwidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+ THIndex_t *indices_data;
+
+ THNN_CHECK_SHAPE_INDICES(input, indices);
+
+ /* get contiguous gradOutput and indices */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ indices = THIndexTensor_(newContiguous)(indices);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 4) {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimh-1];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+
+ if(owidth!=gradOutput->size[dimw] || oheight!=gradOutput->size[dimh]){
+ THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d",
+ oheight, owidth, gradOutput->size[dimh], gradOutput->size[dimw]);
+ }
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THIndexTensor_(data)(indices);
+
+ /* backprop */
+ if (input->nDimension == 3)
+ {
+ THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
+ indices_data,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight);
+ }
+ else
+ {
+ int p;
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
+ indices_data+p*nslices*iwidth*iheight,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+ THIndexTensor_(free)(indices);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c
new file mode 100644
index 000000000..dcde660ea
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReflectionPadding.c
@@ -0,0 +1,260 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialReflectionPadding.c"
+#else
+
+static void THNN_(SpatialReflectionPadding_updateOutput_frame)(
+ real *input_p, real *output_p,
+ long nslices,
+ long iwidth, long iheight,
+ long owidth, long oheight,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b)
+{
+ int iStartX = fmax(0, -pad_l);
+ int iStartY = fmax(0, -pad_t);
+ int oStartX = fmax(0, pad_l);
+ int oStartY = fmax(0, pad_t);
+
+ long k, ip_x, ip_y;
+#pragma omp parallel for private(k, ip_x, ip_y)
+
+ for (k = 0; k < nslices; k++)
+ {
+ long i, j;
+ for (i = 0; i < oheight; i++) {
+ for (j = 0; j < owidth; j++) {
+ if (j < pad_l) {
+ ip_x = pad_l * 2 - j;
+ } else if (j >= pad_l && j < iwidth + pad_l) {
+ ip_x = j;
+ } else {
+ ip_x = (iwidth + pad_l - 1) * 2 - j;
+ }
+ ip_x = ip_x - oStartX + iStartX;
+
+ if (i < pad_t) {
+ ip_y = pad_t * 2 - i;
+ } else if (i >= pad_t && i < iheight + pad_t) {
+ ip_y = i;
+ } else {
+ ip_y = (iheight + pad_t - 1) * 2 - i;
+ }
+ ip_y = ip_y - oStartY + iStartY;
+
+ real *dest_p = output_p + k*owidth*oheight + i * owidth + j;
+ real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
+ *dest_p = *src_p;
+ }
+ }
+ }
+}
+
+void THNN_(SpatialReflectionPadding_updateOutput)(THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b)
+{
+ int dimw = 2;
+ int dimh = 1;
+ int dimslices = 0;
+ long nbatch = 1;
+ long nslices;
+ long iheight;
+ long iwidth;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+
+ THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
+ "3D or 4D (batch mode) tensor expected for input, but got: %s");
+
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ oheight = iheight + pad_t + pad_b;
+ owidth = iwidth + pad_l + pad_r;
+
+ THArgCheck(owidth >= 1 || oheight >= 1 , 2,
+ "input (H: %d, W: %d)is too small."
+ " Calculated output H: %d W: %d",
+ iheight, iwidth, oheight, owidth);
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ /* resize output */
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nslices, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+ THNN_(SpatialReflectionPadding_updateOutput_frame)(input_data, output_data,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ pad_l, pad_r,
+ pad_t, pad_b);
+ }
+ else
+ {
+ long p;
+
+ THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialReflectionPadding_updateOutput_frame)(
+ input_data+p*nslices*iwidth*iheight,
+ output_data+p*nslices*owidth*oheight,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ pad_l, pad_r,
+ pad_t, pad_b);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(SpatialReflectionPadding_updateGradInput_frame)(
+ real *ginput_p, real *goutput_p,
+ long nslices,
+ long iwidth, long iheight,
+ long owidth, long oheight,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b)
+{
+ int iStartX = fmax(0, -pad_l);
+ int iStartY = fmax(0, -pad_t);
+ int oStartX = fmax(0, pad_l);
+ int oStartY = fmax(0, pad_t);
+
+ long k, ip_x, ip_y;
+#pragma omp parallel for private(k, ip_x, ip_y)
+
+ for (k = 0; k < nslices; k++)
+ {
+ long i, j;
+ for (i = 0; i < oheight; i++) {
+ for (j = 0; j < owidth; j++) {
+ if (j < pad_l) {
+ ip_x = pad_l * 2 - j;
+ } else if (j >= pad_l && j < iwidth + pad_l) {
+ ip_x = j;
+ } else {
+ ip_x = (iwidth + pad_l - 1) * 2 - j;
+ }
+ ip_x = ip_x - oStartX + iStartX;
+
+ if (i < pad_t) {
+ ip_y = pad_t * 2 - i;
+ } else if (i >= pad_t && i < iheight + pad_t) {
+ ip_y = i;
+ } else {
+ ip_y = (iheight + pad_t - 1) * 2 - i;
+ }
+ ip_y = ip_y - oStartY + iStartY;
+
+ real *src_p = goutput_p + k*owidth*oheight + i * owidth + j;
+ real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
+ *dest_p += *src_p;
+ }
+ }
+ }
+}
+
+void THNN_(SpatialReflectionPadding_updateGradInput)(THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b)
+{
+ int dimw = 2;
+ int dimh = 1;
+ int dimslices = 0;
+ long nbatch = 1;
+ long nslices;
+ long iheight;
+ long iwidth;
+ long oheight;
+ long owidth;
+
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ oheight = iheight + pad_t + pad_b;
+ owidth = iwidth + pad_l + pad_r;
+
+ THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
+ "gradOutput width unexpected. Expected: %d, Got: %d",
+ owidth, THTensor_(size)(gradOutput, dimw));
+ THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
+ "gradOutput height unexpected. Expected: %d, Got: %d",
+ oheight, THTensor_(size)(gradOutput, dimh));
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* backprop */
+ if (input->nDimension == 3) {
+ THNN_(SpatialReflectionPadding_updateGradInput_frame)(
+ THTensor_(data)(gradInput),
+ THTensor_(data)(gradOutput),
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ pad_l, pad_r,
+ pad_t, pad_b);
+ } else {
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++) {
+ THNN_(SpatialReflectionPadding_updateGradInput_frame)(
+ THTensor_(data)(gradInput) + p * nslices * iheight * iwidth,
+ THTensor_(data)(gradOutput) + p * nslices * oheight * owidth,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ pad_l, pad_r,
+ pad_t, pad_b);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialReplicationPadding.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReplicationPadding.c
new file mode 100644
index 000000000..4e318aa70
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialReplicationPadding.c
@@ -0,0 +1,260 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialReplicationPadding.c"
+#else
+
+static void THNN_(SpatialReplicationPadding_updateOutput_frame)(
+ real *input_p, real *output_p,
+ long nslices,
+ long iwidth, long iheight,
+ long owidth, long oheight,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b)
+{
+ int iStartX = fmax(0, -pad_l);
+ int iStartY = fmax(0, -pad_t);
+ int oStartX = fmax(0, pad_l);
+ int oStartY = fmax(0, pad_t);
+
+ long k, ip_x, ip_y;
+#pragma omp parallel for private(k, ip_x, ip_y)
+ for (k = 0; k < nslices; k++)
+ {
+ long i, j;
+ for (i = 0; i < oheight; i++) {
+ for (j = 0; j < owidth; j++) {
+ if (j < pad_l) {
+ ip_x = pad_l;
+ } else if (j >= pad_l && j < iwidth + pad_l) {
+ ip_x = j;
+ } else {
+ ip_x = iwidth + pad_l - 1;
+ }
+ ip_x = ip_x - oStartX + iStartX;
+
+ if (i < pad_t) {
+ ip_y = pad_t;
+ } else if (i >= pad_t && i < iheight + pad_t) {
+ ip_y = i;
+ } else {
+ ip_y = iheight + pad_t - 1;
+ }
+ ip_y = ip_y - oStartY + iStartY;
+
+ real *dest_p = output_p + k*owidth*oheight + i * owidth + j;
+ real *src_p = input_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
+ *dest_p = *src_p;
+ }
+ }
+ }
+}
+
+void THNN_(SpatialReplicationPadding_updateOutput)(THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b)
+{
+ int dimw = 2;
+ int dimh = 1;
+ int dimslices = 0;
+ long nbatch = 1;
+ long nslices;
+ long iheight;
+ long iwidth;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+
+ THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
+ "3D or 4D (batch mode) tensor expected for input, but got: %s");
+
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ oheight = iheight + pad_t + pad_b;
+ owidth = iwidth + pad_l + pad_r;
+
+ THArgCheck(owidth >= 1 || oheight >= 1 , 2,
+ "input (H: %d, W: %d)is too small."
+ " Calculated output H: %d W: %d",
+ iheight, iwidth, oheight, owidth);
+
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ /* resize output */
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nslices, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+ THNN_(SpatialReplicationPadding_updateOutput_frame)(input_data, output_data,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ pad_l, pad_r,
+ pad_t, pad_b);
+ }
+ else
+ {
+ long p;
+
+ THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(SpatialReplicationPadding_updateOutput_frame)(
+ input_data+p*nslices*iwidth*iheight,
+ output_data+p*nslices*owidth*oheight,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ pad_l, pad_r,
+ pad_t, pad_b);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(SpatialReplicationPadding_updateGradInput_frame)(
+ real *ginput_p, real *goutput_p,
+ long nslices,
+ long iwidth, long iheight,
+ long owidth, long oheight,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b)
+{
+ int iStartX = fmax(0, -pad_l);
+ int iStartY = fmax(0, -pad_t);
+ int oStartX = fmax(0, pad_l);
+ int oStartY = fmax(0, pad_t);
+
+ long k, ip_x, ip_y;
+#pragma omp parallel for private(k, ip_x, ip_y)
+ for (k = 0; k < nslices; k++)
+ {
+ long i, j;
+ for (i = 0; i < oheight; i++) {
+ for (j = 0; j < owidth; j++) {
+ if (j < pad_l) {
+ ip_x = pad_l;
+ } else if (j >= pad_l && j < iwidth + pad_l) {
+ ip_x = j;
+ } else {
+ ip_x = iwidth + pad_l - 1;
+ }
+ ip_x = ip_x - oStartX + iStartX;
+
+ if (i < pad_t) {
+ ip_y = pad_t;
+ } else if (i >= pad_t && i < iheight + pad_t) {
+ ip_y = i;
+ } else {
+ ip_y = iheight + pad_t - 1;
+ }
+ ip_y = ip_y - oStartY + iStartY;
+
+ real *src_p = goutput_p + k*owidth*oheight + i * owidth + j;
+ real *dest_p = ginput_p + k*iwidth*iheight + ip_y * iwidth + ip_x;
+ *dest_p += *src_p;
+ }
+ }
+ }
+}
+
+void THNN_(SpatialReplicationPadding_updateGradInput)(THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b)
+{
+ int dimw = 2;
+ int dimh = 1;
+ int dimslices = 0;
+ long nbatch = 1;
+ long nslices;
+ long iheight;
+ long iwidth;
+ long oheight;
+ long owidth;
+
+ if (input->nDimension == 4)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ oheight = iheight + pad_t + pad_b;
+ owidth = iwidth + pad_l + pad_r;
+
+ THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
+ "gradOutput width unexpected. Expected: %d, Got: %d",
+ owidth, THTensor_(size)(gradOutput, dimw));
+ THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
+ "gradOutput height unexpected. Expected: %d, Got: %d",
+ oheight, THTensor_(size)(gradOutput, dimh));
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* backprop */
+ if (input->nDimension == 3) {
+ THNN_(SpatialReplicationPadding_updateGradInput_frame)(
+ THTensor_(data)(gradInput),
+ THTensor_(data)(gradOutput),
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ pad_l, pad_r,
+ pad_t, pad_b);
+ } else {
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++) {
+ THNN_(SpatialReplicationPadding_updateGradInput_frame)(
+ THTensor_(data)(gradInput) + p * nslices * iheight * iwidth,
+ THTensor_(data)(gradOutput) + p * nslices * oheight * owidth,
+ nslices,
+ iwidth, iheight,
+ owidth, oheight,
+ pad_l, pad_r,
+ pad_t, pad_b);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialSubSampling.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialSubSampling.c
new file mode 100644
index 000000000..4c077bc64
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialSubSampling.c
@@ -0,0 +1,302 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialSubSampling.c"
+#else
+
+static inline void THNN_(SpatialSubSampling_shapeCheck)(
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *weight,
+ int kW, int kH) {
+ int ndims = input->nDimension;
+ THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+ THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
+
+ int nInputPlane = THTensor_(size)(weight, 0);
+
+ int dimw = 2;
+ int dimh = 1;
+
+ long inputWidth;
+ long inputHeight;
+
+ if (input->nDimension == 4) {
+ dimw++;
+ dimh++;
+ }
+
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+
+ THArgCheck(input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes");
+ THArgCheck(inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size");
+}
+
+void THNN_(SpatialSubSampling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ int kW, int kH,
+ int dW, int dH)
+{
+ THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous");
+
+ real *weight_data = THTensor_(data)(weight);
+ real *bias_data = THTensor_(data)(bias);
+ real *output_data;
+ real *input_data;
+
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+
+ long inputWidth;
+ long inputHeight;
+ long outputWidth;
+ long outputHeight;
+
+ int nInputPlane = THTensor_(size)(weight,0);
+
+ long k;
+
+ THNN_(SpatialSubSampling_shapeCheck)(input, NULL, weight, kW, kH);
+
+ if (input->nDimension == 4) {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ outputWidth = (inputWidth - kW) / dW + 1;
+ outputHeight = (inputHeight - kH) / dH + 1;
+
+ if (input->nDimension == 3)
+ THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
+ else
+ THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth);
+
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(k)
+ for(k = 0; k < nInputPlane; k++)
+ {
+ long p;
+ for(p = 0; p < nbatch; p++)
+ {
+ long xx, yy;
+ /* For all output pixels... */
+ real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight;
+ /* Get the good mask for (k,i) (k out, i in) */
+ real the_weight = weight_data[k];
+ /* Initialize to the bias */
+ real z = bias_data[k];
+ long i;
+ for(i = 0; i < outputWidth*outputHeight; i++)
+ ptr_output[i] = z;
+
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ /* Compute the mean of the input image... */
+ real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
+ real sum = 0;
+ long kx, ky;
+
+ for(ky = 0; ky < kH; ky++)
+ {
+ for(kx = 0; kx < kW; kx++)
+ sum += ptr_input[kx];
+ ptr_input += inputWidth; /* next input line */
+ }
+ /* Update output */
+ *ptr_output++ += the_weight*sum;
+ }
+ }
+ }
+ }
+ THTensor_(free)(input);
+}
+
+void THNN_(SpatialSubSampling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ int kW, int kH,
+ int dW, int dH)
+{
+ THNN_(SpatialSubSampling_shapeCheck)(input, gradOutput, weight, kW, kH);
+
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+
+ long inputWidth;
+ long inputHeight;
+ long outputWidth;
+ long outputHeight;
+
+ int nInputPlane = THTensor_(size)(weight,0);
+
+ real *weight_data;
+ real *gradOutput_data;
+ real *input_data, *gradInput_data;
+
+ long k;
+
+ if (input->nDimension == 4) {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ }
+
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ outputWidth = (inputWidth - kW) / dW + 1;
+ outputHeight = (inputHeight - kH) / dH + 1;
+
+ weight_data = THTensor_(data)(weight);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+
+ input_data = THTensor_(data)(input);
+
+ THTensor_(resizeAs)(gradInput, input);
+ gradInput_data = THTensor_(data)(gradInput);
+
+#pragma omp parallel for private(k)
+ for(k = 0; k < nInputPlane; k++)
+ {
+ long p;
+ for(p = 0; p < nbatch; p++)
+ {
+ real the_weight = weight_data[k];
+ real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
+ long xx, yy;
+
+ real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
+ long i;
+ for(i=0; i<inputWidth*inputHeight; i++)
+ ptr_gi[i] = 0.0;
+
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
+ real z = *ptr_gradOutput++ * the_weight;
+ long kx, ky;
+
+ for(ky = 0; ky < kH; ky++)
+ {
+ for(kx = 0; kx < kW; kx++)
+ ptr_gradInput[kx] += z;
+ ptr_gradInput += inputWidth;
+ }
+ }
+ }
+ }
+ }
+ THTensor_(free)(gradOutput);
+}
+
+void THNN_(SpatialSubSampling_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ int kW, int kH,
+ int dW, int dH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THNN_(SpatialSubSampling_shapeCheck)(input, gradOutput, gradWeight, kW, kH);
+
+ long nbatch = 1;
+ long dimw = 2;
+ long dimh = 1;
+
+ long inputWidth;
+ long inputHeight;
+ long outputWidth;
+ long outputHeight;
+
+ int nInputPlane = THTensor_(size)(gradWeight,0);
+
+ real *gradWeight_data;
+ real *gradBias_data;
+ real *gradOutput_data;
+ real *input_data;
+
+ long k;
+
+ if (input->nDimension == 4) {
+ dimw++;
+ dimh++;
+ nbatch = input->size[0];
+ }
+
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ outputWidth = (inputWidth - kW) / dW + 1;
+ outputHeight = (inputHeight - kH) / dH + 1;
+
+ gradWeight_data = THTensor_(data)(gradWeight);
+ gradBias_data = THTensor_(data)(gradBias);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+
+#pragma omp parallel for private(k)
+ for(k = 0; k < nInputPlane; k++)
+ {
+ long p;
+ for(p = 0; p < nbatch; p++)
+ {
+ real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
+ real sum;
+ long xx, yy;
+ long i;
+
+ sum = 0;
+ for(i = 0; i < outputWidth*outputHeight; i++)
+ sum += ptr_gradOutput[i];
+ gradBias_data[k] += scale*sum;
+
+ sum = 0;
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
+ real z = *ptr_gradOutput++;
+ long kx, ky;
+
+ for(ky = 0; ky < kH; ky++)
+ {
+ for(kx = 0; kx < kW; kx++)
+ sum += z * ptr_input[kx];
+ ptr_input += inputWidth;
+ }
+ }
+ }
+ gradWeight_data[k] += scale*sum;
+ }
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingBilinear.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingBilinear.c
new file mode 100644
index 000000000..8bc487ead
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingBilinear.c
@@ -0,0 +1,174 @@
+// Adapted from interp.cpp from Caffe util by Pauline Luc
+// Originally developed by George Papandreou
+
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialUpSamplingBilinear.c"
+#else
+
+static inline void THNN_(SpatialUpSamplingBilinear_shapeCheck)
+ (THTensor *input, THTensor *gradOutput,
+ int nBatch, int nChannels,
+ int inputHeight, int inputWidth,
+ int outputHeight, int outputWidth) {
+ THArgCheck(inputHeight > 0 && inputWidth > 0
+ && outputHeight > 0 && outputWidth > 0, 2,
+ "input and output sizes should be greater than 0,"
+ " but got input (H: %d, W: %d) output (H: %d, W: %d)",
+ inputHeight, inputWidth, outputHeight, outputWidth);
+ if (input != NULL) {
+ THNN_ARGCHECK(input->nDimension == 4, 2, input,
+ "4D input tensor expected but got: %s");
+ }
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth);
+ }
+}
+
+void THNN_(SpatialUpSamplingBilinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int outputHeight,
+ int outputWidth){
+
+ int nbatch = THTensor_(size)(input, 0);
+ int channels = THTensor_(size)(input, 1);
+ int inputHeight = THTensor_(size)(input, 2);
+ int inputWidth = THTensor_(size)(input, 3);
+
+ THNN_(SpatialUpSamplingBilinear_shapeCheck)
+ (input, NULL,
+ nbatch, channels,
+ inputHeight, inputWidth,
+ outputHeight, outputWidth);
+
+ input = THTensor_(newContiguous)(input);
+ THTensor_(resize4d)(output,
+ THTensor_(size)(input, 0),
+ THTensor_(size)(input, 1),
+ outputHeight, outputWidth);
+ THTensor_(zero)(output);
+ real *idata = THTensor_(data)(input);
+ real *odata = THTensor_(data)(output);
+ channels = nbatch * channels;
+ THAssert(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0);
+ // special case: just copy
+ if (inputHeight == outputHeight && inputWidth == outputWidth) {
+ for (int h2 = 0; h2 < outputHeight; ++h2) {
+ const int h1 = h2;
+ for (int w2 = 0; w2 < outputWidth; ++w2) {
+ const int w1 = w2;
+ const real* pos1 = &idata[h1 * inputWidth + w1];
+ real* pos2 = &odata[h2 * outputWidth + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos2[0] = pos1[0];
+ pos1 += inputWidth * inputHeight;
+ pos2 += outputWidth * outputHeight;
+ }
+ }
+ }
+ return;
+ }
+ const float rheight =(outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f;
+ const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1) / (outputWidth - 1) : 0.f;
+ for (int h2 = 0; h2 < outputHeight; ++h2) {
+ const float h1r = rheight * h2;
+ const int h1 = h1r;
+ const int h1p = (h1 < inputHeight - 1) ? 1 : 0;
+ const real h1lambda = h1r - h1;
+ const real h0lambda = (real)1. - h1lambda;
+ for (int w2 = 0; w2 < outputWidth; ++w2) {
+ const float w1r = rwidth * w2;
+ const int w1 = w1r;
+ const int w1p = (w1 < inputWidth - 1) ? 1 : 0;
+ const real w1lambda = w1r - w1;
+ const real w0lambda = (real)1. - w1lambda;
+ const real* pos1 = &idata[h1 * inputWidth + w1];
+ real* pos2 = &odata[h2 * outputWidth + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos2[0] = h0lambda * (w0lambda * pos1[0]+ w1lambda * pos1[w1p])
+ + h1lambda * (w0lambda * pos1[h1p * inputWidth]
+ + w1lambda * pos1[h1p * inputWidth + w1p]);
+ pos1 += inputWidth * inputHeight;
+ pos2 += outputWidth * outputHeight;
+ }
+ }
+ }
+ THTensor_(free)(input);
+}
+
+void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
+ THNNState *state,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int nbatch,
+ int channels,
+ int inputHeight,
+ int inputWidth,
+ int outputHeight,
+ int outputWidth){
+
+ THNN_(SpatialUpSamplingBilinear_shapeCheck)
+ (NULL, gradOutput,
+ nbatch, channels,
+ inputHeight, inputWidth,
+ outputHeight, outputWidth);
+
+ THTensor_(resize4d)(gradInput, nbatch, channels, inputHeight, inputWidth);
+ THTensor_(zero)(gradInput);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ real *data1 = THTensor_(data)(gradInput);
+ real *data2 = THTensor_(data)(gradOutput);
+ channels = nbatch * channels;
+
+ // special case: same-size matching grids
+ if (inputHeight == outputHeight && inputWidth == outputWidth) {
+ for (int h2 = 0; h2 < outputHeight; ++h2) {
+ const int h1 = h2;
+ for (int w2 = 0; w2 < outputWidth; ++w2) {
+ const int w1 = w2;
+ real* pos1 = &data1[h1 * inputWidth + w1];
+ const real* pos2 = &data2[h2 * outputWidth + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos1[0] += pos2[0];
+ pos1 += inputWidth * inputHeight;
+ pos2 += outputWidth * outputHeight;
+ }
+ }
+ }
+ return;
+ }
+ const float rheight =(outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f;
+ const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1)/(outputWidth - 1) : 0.f;
+ for (int h2 = 0; h2 < outputHeight; ++h2) {
+ const float h1r = rheight * h2;
+ const int h1 = h1r;
+ const int h1p = (h1 < inputHeight - 1) ? 1 : 0;
+ const real h1lambda = h1r - h1;
+ const real h0lambda = (real)1. - h1lambda;
+ for (int w2 = 0; w2 < outputWidth; ++w2) {
+ const float w1r = rwidth * w2;
+ const int w1 = w1r;
+ const int w1p = (w1 < inputWidth - 1) ? 1 : 0;
+ const real w1lambda = w1r - w1;
+ const real w0lambda = (real)1. - w1lambda;
+ real* pos1 = &data1[h1 * inputWidth + w1];
+ const real* pos2 = &data2[h2 * outputWidth + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos1[0] += h0lambda * w0lambda * pos2[0];
+ pos1[w1p] += h0lambda * w1lambda * pos2[0];
+ pos1[h1p * inputWidth] += h1lambda * w0lambda * pos2[0];
+ pos1[h1p * inputWidth + w1p] += h1lambda * w1lambda * pos2[0];
+ pos1 += inputWidth * inputHeight;
+ pos2 += outputWidth * outputHeight;
+ }
+ }
+ }
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingNearest.c b/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingNearest.c
new file mode 100644
index 000000000..b4699ff3e
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/SpatialUpSamplingNearest.c
@@ -0,0 +1,199 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialUpSamplingNearest.c"
+#else
+
+
+static inline void THNN_(SpatialUpSamplingNearest_shapeCheck)
+ (THTensor *input, THTensor *gradOutput,
+ int scale_factor) {
+ THArgCheck(input != NULL, 2, "4D input tensor expected but got NULL");
+ THArgCheck(scale_factor > 1, 4,
+ "scale_factor must be greater than 1, but got: %d", scale_factor);
+ THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
+ "3D or 4D input tensor expected but got: %s");
+ if (input->nDimension == 3) {
+ int nChannels = THTensor_(size)(input, 0);
+ int inputHeight = THTensor_(size)(input, 1);
+ int inputWidth = THTensor_(size)(input, 2);
+ int outputHeight = inputHeight * scale_factor;
+ int outputWidth = inputWidth * scale_factor;
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, 3, 0, nChannels);
+ THNN_CHECK_DIM_SIZE(gradOutput, 3, 1, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, 3, 2, outputWidth);
+ }
+ } else {
+ int nBatch = THTensor_(size)(input, 0);
+ int nChannels = THTensor_(size)(input, 1);
+ int inputHeight = THTensor_(size)(input, 2);
+ int inputWidth = THTensor_(size)(input, 3);
+ int outputHeight = inputHeight * scale_factor;
+ int outputWidth = inputWidth * scale_factor;
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nBatch);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, nChannels);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth);
+ }
+ }
+}
+
+void THNN_(SpatialUpSamplingNearest_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int scale_factor)
+{
+ THNN_(SpatialUpSamplingNearest_shapeCheck)(input, NULL, scale_factor);
+ int inputHeight = THTensor_(size)(input, input->nDimension-2);
+ int inputWidth = THTensor_(size)(input, input->nDimension-1);
+ int outputHeight = inputHeight * scale_factor;
+ int outputWidth = inputWidth * scale_factor;
+
+ if (input->nDimension == 3) {
+ THTensor_(resize3d)(output,
+ THTensor_(size)(input, 0),
+ outputHeight, outputWidth);
+ } else {
+ THTensor_(resize4d)(output,
+ THTensor_(size)(input, 0),
+ THTensor_(size)(input, 1),
+ outputHeight, outputWidth);
+ }
+
+ int dW = scale_factor;
+ int dH = scale_factor;
+ int xDim = input->nDimension-2;
+ int yDim = input->nDimension-1;
+
+ // dims
+ int idim = input->nDimension;
+ int osz0 = output->size[0];
+ int osz1 = output->size[1];
+ int osz2 = output->size[2];
+ int osz3 = 1;
+ if (idim > 3) {
+ osz3 = output->size[3];
+ }
+
+ // get strides
+ long *is = input->stride;
+ long *os = output->stride;
+
+ // get raw pointers
+ real *pin = THTensor_(data)(input);
+ real *pout = THTensor_(data)(output);
+
+ // perform the upsampling
+ int i0, i1, i2, i3, isrc, idst;
+ int iout[4]; // Output indices
+ int iin[4]; // Input indices
+
+ for (i0 = 0; i0 < osz0; i0++) {
+ iout[0] = i0;
+ iin[0] = i0;
+ for (i1 = 0; i1 < osz1; i1++) {
+ iout[1] = i1;
+ iin[1] = i1;
+ for (i2 = 0; i2 < osz2; i2++) {
+ iout[2] = i2;
+ iin[2] = i2;
+ for (i3 = 0; i3 < osz3; i3++) {
+ iout[3] = i3;
+ iin[3] = i3;
+
+ // set the indices for the upsampled dimensions
+ iin[xDim] = iout[xDim] / dW;
+ iin[yDim] = iout[yDim] / dH;
+
+ idst = i0*os[0] + i1*os[1] + i2*os[2];
+ isrc = iin[0]*is[0] + iin[1]*is[1] + iin[2]*is[2];
+ if (idim > 3) {
+ idst += i3*os[3];
+ isrc += iin[3]*is[3];
+ }
+
+ pout[idst] = pin[isrc];
+ }
+ }
+ }
+ }
+}
+
+void THNN_(SpatialUpSamplingNearest_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int scale_factor)
+{
+ THNN_(SpatialUpSamplingNearest_shapeCheck)(input, gradOutput, scale_factor);
+ THTensor_(resizeAs)(gradInput, input);
+
+ int dW = scale_factor;
+ int dH = scale_factor;
+ int xDim = gradInput->nDimension-2;
+ int yDim = gradInput->nDimension-1;
+
+ // dims
+ int idim = gradInput->nDimension; // Guaranteed to be between 3 and 5
+ int isz0 = gradInput->size[0];
+ int isz1 = gradInput->size[1];
+ int isz2 = gradInput->size[2];
+ int isz3 = 1;
+ if (idim > 3) {
+ isz3 = gradInput->size[3];
+ }
+
+ // get strides
+ long *is = gradInput->stride;
+ long *os = gradOutput->stride;
+
+ // get raw pointers
+ real *pin = THTensor_(data)(gradInput);
+ real *pout = THTensor_(data)(gradOutput);
+
+ // perform the upsampling
+ int i0, i1, i2, i3, isrc, idst, x, y;
+ int iin[4]; // Input indices
+ int iout[4]; // Output indices
+
+ THTensor_(zero)(gradInput);
+
+ for (i0 = 0; i0 < isz0; i0++) {
+ iin[0] = i0;
+ iout[0] = i0;
+ for (i1 = 0; i1 < isz1; i1++) {
+ iin[1] = i1;
+ iout[1] = i1;
+ for (i2 = 0; i2 < isz2; i2++) {
+ iin[2] = i2;
+ iout[2] = i2;
+ for (i3 = 0; i3 < isz3; i3++) {
+ iin[3] = i3;
+ iout[3] = i3;
+
+ idst = i0*is[0] + i1*is[1] + i2*is[2];
+ if (idim > 3) {
+ idst += i3*is[3];
+ }
+
+ // Now accumulate the gradients from gradOutput
+ for (y = 0; y < dH; y++) {
+ for (x = 0; x < dW; x++) {
+ iout[xDim] = dW * iin[xDim] + x;
+ iout[yDim] = dH * iin[yDim] + y;
+ isrc = iout[0]*os[0] + iout[1]*os[1] + iout[2]*os[2];
+ if (idim > 3) {
+ isrc += iout[3]*os[3];
+ }
+ pin[idst] += pout[isrc];
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Sqrt.c b/contrib/lua-torch/nn/lib/THNN/generic/Sqrt.c
new file mode 100644
index 000000000..174884e34
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/Sqrt.c
@@ -0,0 +1,52 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Sqrt.c"
+#else
+
+void THNN_(Sqrt_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal eps_)
+{
+ real eps = TH_CONVERT_ACCREAL_TO_REAL(eps_);
+ THTensor_(resizeAs)(output, input);
+ THTensor_(sqrt)(output, input);
+}
+
+void THNN_(Sqrt_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output)
+{
+ THNN_CHECK_SHAPE(output, gradOutput);
+ THTensor_(resizeAs)(gradInput, input);
+
+ if (output->nDimension == 1 ||
+ !THTensor_(isContiguous)(output) ||
+ !THTensor_(isContiguous)(gradOutput) ||
+ !THTensor_(isContiguous)(gradInput))
+ {
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
+ *gradInput_data = (*output_data == 0.0) ? 0.0 : (0.5 * (*gradOutput_data / *output_data));
+ );
+ }
+ else
+ {
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *gradInput_data = THTensor_(data)(gradInput);
+ real *output_data = THTensor_(data)(output);
+ long i;
+#pragma omp parallel for private(i)
+ for(i = 0; i < THTensor_(nElement)(output); i++)
+ {
+ if (output_data[i] == 0.0)
+ gradInput_data[i] = 0.0;
+ else
+ gradInput_data[i] = 0.5 * (gradOutput_data[i] / output_data[i]);
+ }
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Square.c b/contrib/lua-torch/nn/lib/THNN/generic/Square.c
new file mode 100644
index 000000000..aad0a911c
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/Square.c
@@ -0,0 +1,59 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Square.c"
+#else
+
+void THNN_(Square_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output)
+{
+ THTensor_(resizeAs)(output, input);
+
+ if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
+ {
+ TH_TENSOR_APPLY2(real, output, real, input,
+ *output_data = (*input_data) * (*input_data);
+ );
+ }
+ else
+ {
+ real *output_data = THTensor_(data)(output);
+ real *input_data = THTensor_(data)(input);
+ long i;
+#pragma omp parallel for private(i)
+ for (i = 0; i < THTensor_(nElement)(input); i++)
+ output_data[i] = input_data[i]*input_data[i];
+ }
+}
+
+void THNN_(Square_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput)
+{
+ THNN_CHECK_SHAPE(input, gradOutput);
+ THTensor_(resizeAs)(gradInput, input);
+
+ if (input->nDimension == 1 ||
+ !THTensor_(isContiguous)(input) ||
+ !THTensor_(isContiguous)(gradOutput) ||
+ !THTensor_(isContiguous)(gradInput))
+ {
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ *gradInput_data = 2.0 * (*gradOutput_data) * (*input_data);
+ );
+ }
+ else
+ {
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *gradInput_data = THTensor_(data)(gradInput);
+ real *input_data = THTensor_(data)(input);
+ long i;
+#pragma omp parallel for private(i)
+ for (i = 0; i < THTensor_(nElement)(gradInput); i++)
+ gradInput_data[i] = 2.0 * gradOutput_data[i] * input_data[i];
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/THNN.h b/contrib/lua-torch/nn/lib/THNN/generic/THNN.h
new file mode 100644
index 000000000..76a28eb2d
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/THNN.h
@@ -0,0 +1,1501 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/THNN.h"
+#else
+
+TH_API void THNN_(Abs_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output); // [OUT] Abs output
+TH_API void THNN_(Abs_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. output
+ THTensor *gradInput); // [OUT] gradient w.r.t. input
+
+TH_API void THNN_(AbsCriterion_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *target, // tensor with target values
+ THTensor *output, // [OUT] a one-element tensor with loss
+ bool sizeAverage); // if true, the loss will be divided by batch size
+TH_API void THNN_(AbsCriterion_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *target, // tensor with target values
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ bool sizeAverage); // if true, the gradient will be normalized by batch size
+
+TH_API void THNN_(BCECriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage,
+ THTensor *weights); // [OPTIONAL]
+TH_API void THNN_(BCECriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage,
+ THTensor *weights); // [OPTIONAL]
+
+TH_API void THNN_(ClassNLLCriterion_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor (1D/2D)
+ THIndexTensor *target, // tensor containing indexes of target classes
+ THTensor *output, // [OUT] a one-element tensor with loss
+ bool sizeAverage, // if true, the loss will be normalized by batch size and class weights
+ THTensor *weights, // [OPTIONAL] class weights
+ THTensor *total_weight, // [BUFFER]
+ long ignore_index); // target index to ignore (loss = 0, gradInput = 0)
+TH_API void THNN_(ClassNLLCriterion_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor (1D/2D)
+ THIndexTensor *target, // tensor containing indexes of target classes
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ bool sizeAverage, // if true, the loss will be normalized by batch size and class weights
+ THTensor *weights, // [OPTIONAL] class weights
+ THTensor *total_weight, // [BUFFER]
+ long ignore_index); // target index to ignore (loss = 0, gradInput = 0)
+
+TH_API void THNN_(SpatialClassNLLCriterion_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor (4D)
+ THIndexTensor *target, // tensor containing indexes of target classes (3D)
+ THTensor *output, // [OUT] a one-element tensor with loss
+ bool sizeAverage, // if true, the loss will be normalized by batch size and class weights
+ THTensor *weights, // [OPTIONAL] class weights
+ THTensor *total_weight); // [BUFFER]
+TH_API void THNN_(SpatialClassNLLCriterion_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor (4D)
+ THIndexTensor *target, // tensor containing indexes of target classes (3D)
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ bool sizeAverage, // if true, the loss will be normalized by batch size and class weights
+ THTensor *weights, // [OPTIONAL] class weights
+ THTensor *total_weight); // [BUFFER]
+
+TH_API void THNN_(ELU_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output, // [OUT] ELU output
+ accreal alpha, // an ELU parameter (as in paper)
+ bool inplace); // if true, modifies gradOutput and sets gradInput onto it (no additional memory is allocated)
+TH_API void THNN_(ELU_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. output
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ THTensor *output, // output from a forward pass
+ accreal alpha, // an ELU parameter (as in paper)
+ bool inplace); // if true, modifies gradOutput and sets gradInput onto it (no additional memory is allocated)
+
+TH_API void THNN_(DistKLDivCriterion_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *target, // target tensor
+ THTensor *output, // [OUT] a one-element tensor containing the loss
+ bool sizeAverage); // if true, the loss will be normalized **by total number of elements**
+TH_API void THNN_(DistKLDivCriterion_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *target, // target tensor
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ bool sizeAverage); // if true, the loss will be normalized **by total number of elements**
+
+TH_API void THNN_(GatedLinear_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output, // [OUT] output tensor, half size of input along dimension dim
+ int dim); // dimension for halving operation
+TH_API void THNN_(GatedLinear_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t module's output
+ THTensor *gradInput, // [OUT] gradient w.r.t input
+ int dim); // dimension for halving operation
+
+// HardShink outputs 0 on interval of (-lambda; lambda) or original value otherwise.
+TH_API void THNN_(HardShrink_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output, // [OUT] output tensor
+ accreal lambda); // HardShrink parameter
+TH_API void THNN_(HardShrink_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. module's output
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ accreal lambda); // HardShrink parameter
+
+// HardTanh clamps the values to the interval [min_val; max_val].
+TH_API void THNN_(HardTanh_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output, // [OUT] output tensor
+ accreal min_val, // lower threshold
+ accreal max_val, // upper threshold
+ bool inplace);
+TH_API void THNN_(HardTanh_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. module's output
+ THTensor *gradInput, // [OUT] gradient w.r.t. the input
+ accreal min_val, // lower threshold
+ accreal max_val, // upper threshold
+ bool inplace);
+
+TH_API void THNN_(L1Cost_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output); // [OUT] output tensor
+TH_API void THNN_(L1Cost_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // [OPTIONAL] gradient w.r.t module's output
+ THTensor *gradInput); // [OUT] gradient w.r.t the input
+
+TH_API void THNN_(LeakyReLU_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // [MODIFIED] input tensor
+ THTensor *output, // [OUT] output tensor
+ accreal negval, // negative part slope
+ bool inplace); // if true, modifies the input tensor and sets the output tensor on it (no additional memory is allocated)
+TH_API void THNN_(LeakyReLU_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // [MODIFIED] gradient w.r.t. module's output
+ THTensor *gradInput, // [OUT] gradient w.r.t. the input
+ accreal negval, // negative part slope
+ bool inplace); // if true, modifies gradOutput and sets gradInput onto it (no additional memory is allocated)
+
+TH_API void THNN_(GRUFused_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *hidden,
+ THTensor *bias1, // [OPTIONAL]
+ THTensor *bias2, // [OPTIONAL]
+ THTensor *hx,
+ THTensor *output,
+ THTensor *storage);
+TH_API void THNN_(GRUFused_updateGradInput)(
+ THNNState *state,
+ THTensor *gradInInput,
+ THTensor *gradInHidden,
+ THTensor *gradOutput,
+ THTensor *gradInputHx,
+ THTensor *storage);
+
+TH_API void THNN_(LSTMFused_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *hidden,
+ THTensor *bias1, // [OPTIONAL]
+ THTensor *bias2, // [OPTIONAL]
+ THTensor *cell,
+ THTensor *output,
+ THTensor *outputCell);
+TH_API void THNN_(LSTMFused_updateGradInput)(
+ THNNState *state,
+ THTensor *storage,
+ THTensor *gradInGates,
+ THTensor *cx,
+ THTensor *cy,
+ THTensor *gradOutput,
+ THTensor *gradOutputCell,
+ THTensor *gradInputCx);
+
+TH_API void THNN_(LogSigmoid_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output, // output tensor
+ THTensor *buffer); // [BUFFER]
+TH_API void THNN_(LogSigmoid_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input
+ THTensor *gradOutput, // gradient w.r.t. module's output
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ THTensor *buffer); // [BUFFER]
+
+TH_API void THNN_(LogSoftMax_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output); // [OUT] output tensor
+TH_API void THNN_(LogSoftMax_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. module's output
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ THTensor *output); // module's output
+
+TH_API void THNN_(LookupTable_accGradParameters)(
+ THNNState *state,
+ THIndexTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THIntegerTensor *count,
+ THTensor *sorted, // [OPTIONAL]
+ THIndexTensor *indices, // [OPTIONAL]
+ bool scaleGradByFreq,
+ int paddingValue,
+ accreal scale);
+
+TH_API void THNN_(LookupTable_renorm)(
+ THNNState *state, // library's state
+ THIndexTensor *idx, // vector containing row indices (modified in function)
+ THTensor *weight, // 2D tensor whose rows will be renormalized
+ accreal maxNorm, // maximum norm
+ accreal normType); // the norm type (e.g., normType=2, then it's 2-norm)
+
+TH_API void THNN_(MarginCriterion_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *target, // target tensor (should contain only 1s and -1s)
+ THTensor *output, // [OUT] a one-element tensor containing the loss
+ bool sizeAverage, // if true, the loss is normalized by **total number of elements**
+ accreal margin); // a margin that is required for the loss to be 0
+
+TH_API void THNN_(MarginCriterion_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *target, // target tensor (should contin only 1s and -1s)
+ THTensor *gradInput, // [OUT] gradient w.r.t. module's input
+ bool sizeAverage, // if true, the gradient is normalized by **total number of elements**
+ accreal margin); // a margin that is required for the loss to be 0
+
+TH_API void THNN_(SoftMarginCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage);
+
+TH_API void THNN_(SoftMarginCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage);
+
+TH_API void THNN_(MSECriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage);
+TH_API void THNN_(MSECriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage);
+
+TH_API void THNN_(MultiLabelMarginCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *output,
+ THTensor *isTarget,
+ bool sizeAverage);
+TH_API void THNN_(MultiLabelMarginCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *gradInput,
+ THTensor *isTarget,
+ bool sizeAverage);
+
+TH_API void THNN_(MultiMarginCriterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *output,
+ bool sizeAverage,
+ int p,
+ THTensor* weights, // [OPTIONAL]
+ accreal margin);
+TH_API void THNN_(MultiMarginCriterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THIndexTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage,
+ int p,
+ THTensor *weights, // [OPTIONAL]
+ accreal margin);
+
+TH_API void THNN_(PReLU_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THIndex_t nOutputPlane);
+TH_API void THNN_(PReLU_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THIndex_t nOutputPlane);
+TH_API void THNN_(PReLU_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradWeight,
+ THTensor *gradWeightBuf,
+ THTensor *gradWeightBuf2,
+ THIndex_t nOutputPlane,
+ accreal scale);
+
+TH_API void THNN_(Linear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *addBuffer);
+TH_API void THNN_(Linear_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight);
+TH_API void THNN_(Linear_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *addBuffer,
+ accreal scale);
+
+TH_API void THNN_(RReLU_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *noise,
+ accreal lower,
+ accreal upper,
+ bool train,
+ bool inplace,
+ THGenerator *generator);
+TH_API void THNN_(RReLU_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *noise,
+ accreal lower,
+ accreal upper,
+ bool train,
+ bool inplace);
+
+TH_API void THNN_(Sigmoid_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output);
+TH_API void THNN_(Sigmoid_updateGradInput)(
+ THNNState *state,
+ THTensor *input, // [OPTIONAL]
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output);
+
+TH_API void THNN_(SmoothL1Criterion_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *output,
+ bool sizeAverage);
+TH_API void THNN_(SmoothL1Criterion_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *target,
+ THTensor *gradInput,
+ bool sizeAverage);
+
+TH_API void THNN_(SoftMax_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output);
+TH_API void THNN_(SoftMax_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output);
+
+TH_API void THNN_(SoftPlus_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal beta,
+ accreal threshold);
+TH_API void THNN_(SoftPlus_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output,
+ accreal beta,
+ accreal threshold);
+
+TH_API void THNN_(SoftShrink_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal lambda);
+TH_API void THNN_(SoftShrink_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ accreal lambda);
+
+
+TH_API void THNN_(IndexLinear_updateOutput)(
+ THNNState *state,
+ THIndexTensor *keys,
+ long keysOffset,
+ THTensor *values,
+ THIndexTensor *sizes,
+ THIndexTensor *cumSumSizes,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *normalizedValues,
+ int train);
+TH_API void THNN_(IndexLinear_accGradParameters)(
+ THNNState *state,
+ THIndexTensor *keys,
+ long keysOffset,
+ THTensor *values,
+ THIndexTensor *sizes,
+ THIndexTensor *cumSumSizes,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor* valuesBuffer,
+ accreal weightDecay,
+ accreal scale);
+TH_API void THNN_(IndexLinear_accUpdateGradParameters)(
+ THNNState *state,
+ THIndexTensor *keys,
+ long keysOffset,
+ THTensor *values,
+ THIndexTensor *sizes,
+ THIndexTensor *cumSumSizes,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *bias,
+ accreal weightDecay,
+ accreal scale);
+TH_API void THNN_(IndexLinear_updateParameters)(
+ THNNState *state,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *weight,
+ THTensor *bias,
+ THIndexTensor *runningKeys,
+ THIndexTensor *cumSumSizes,
+ long keysOffset,
+ accreal weightDecay,
+ accreal learningRate);
+
+TH_API void THNN_(SparseLinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias);
+TH_API void THNN_(SparseLinear_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *weight,
+ THTensor *bias,
+ accreal weightDecay,
+ accreal scale);
+TH_API void THNN_(SparseLinear_zeroGradParameters)(
+ THNNState *state,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *lastInput);
+TH_API void THNN_(SparseLinear_updateParameters)(
+ THNNState *state,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *lastInput,
+ accreal learningRate);
+TH_API void THNN_(SparseLinear_legacyUpdateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias);
+TH_API void THNN_(SparseLinear_legacyAccGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *weight,
+ THTensor *bias,
+ accreal weightDecay,
+ accreal scale);
+TH_API void THNN_(SparseLinear_legacyZeroGradParameters)(
+ THNNState *state,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *lastInput);
+TH_API void THNN_(SparseLinear_legacyUpdateParameters)(
+ THNNState *state,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *lastInput,
+ accreal learningRate);
+
+TH_API void THNN_(Sqrt_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal eps);
+TH_API void THNN_(Sqrt_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output);
+
+TH_API void THNN_(Square_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output);
+TH_API void THNN_(Square_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput);
+
+TH_API void THNN_(Tanh_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output);
+TH_API void THNN_(Tanh_updateGradInput)(
+ THNNState *state,
+ THTensor *input, // [OPTIONAL]
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output);
+
+TH_API void THNN_(Threshold_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal threshold,
+ accreal val,
+ bool inplace);
+TH_API void THNN_(Threshold_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ accreal threshold,
+ accreal val,
+ bool inplace);
+
+TH_API void THNN_(TemporalConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ int kW, int dW,
+ int inputFrameSize,
+ int outputFrameSize);
+TH_API void THNN_(TemporalConvolution_updateGradInput)(
+ THNNState* state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ int kW, int dW);
+TH_API void THNN_(TemporalConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ int kW, int dW,
+ accreal scale);
+TH_API void THNN_(TemporalMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kW, int dW);
+TH_API void THNN_(TemporalMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kW, int dW);
+TH_API void THNN_(TemporalSubSampling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ int kW, int dW,
+ int inputFrameSize);
+TH_API void THNN_(TemporalSubSampling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ int kW, int dW);
+TH_API void THNN_(TemporalSubSampling_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ int kW, int dW,
+ accreal scale);
+
+TH_API void THNN_(TemporalRowConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst);
+TH_API void THNN_(TemporalRowConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst);
+TH_API void THNN_(TemporalRowConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst,
+ accreal scale);
+
+TH_API void THNN_(BatchNormalization_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight, // [OPTIONAL]
+ THTensor *bias, // [OPTIONAL]
+ THTensor *running_mean,
+ THTensor *running_var,
+ THTensor *save_mean,
+ THTensor *save_std,
+ bool train,
+ double momentum,
+ double eps);
+TH_API void THNN_(BatchNormalization_backward)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput, // [OPTIONAL]
+ THTensor *gradWeight, // [OPTIONAL]
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *weight, // [OPTIONAL]
+ THTensor *running_mean,
+ THTensor *running_var,
+ THTensor *save_mean,
+ THTensor *save_std,
+ bool train,
+ double scale,
+ double eps);
+
+TH_API void THNN_(SpatialConvolutionMap_updateOutput)(
+ THNNState *state, // library state
+ THTensor *input, // input tensor
+ THTensor *output, // [OUT] convolution output
+ THTensor *weight, // 3D weight tensor (connTable:size(1) x kH x kW)
+ THTensor *bias, // 1D bias tensor (nOutputPlane)
+ THTensor *connTable, // connection table
+ int nInputPlane, // number of input planes
+ int nOutputPlane, // number of output planes
+ int dW, int dH); // stride
+TH_API void THNN_(SpatialConvolutionMap_updateGradInput)(
+ THNNState *state, // library state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. output
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ THTensor *weight, // 3D weight tensor (connTable:size(1) x kH x kW)
+ THTensor *bias, // 1D bias tensor (nOutputPlane)
+ THTensor *connTable, // connection table
+ int nInputPlane, // number of input planes
+ int nOutputPlane, // number of output planes
+ int dW, int dH); // stride
+TH_API void THNN_(SpatialConvolutionMap_accGradParameters)(
+ THNNState *state, // library state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. output
+ THTensor *gradWeight, // 3D gradWeight tensor (connTable:size(1) x kH x kW)
+ THTensor *gradBias, // 1D gradBias tensor (nOutputPlane)
+ THTensor *connTable, // connection table
+ int nInputPlane, // number of input planes
+ int nOutputPlane, // number of output planes
+ int dW, int dH, // stride
+ accreal scale); // scaling factor
+
+TH_API void THNN_(SpatialConvolutionMM_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias, // [OPTIONAL]
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH);
+TH_API void THNN_(SpatialConvolutionMM_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH);
+TH_API void THNN_(SpatialConvolutionMM_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ accreal scale);
+
+TH_API void THNN_(SpatialDepthWiseConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias, // [OPTIONAL]
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH);
+TH_API void THNN_(SpatialDepthWiseConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH);
+TH_API void THNN_(SpatialDepthWiseConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ accreal scale);
+
+TH_API void THNN_(SpatialConvolutionLocal_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ long inputWidth, long inputHeight,
+ long outputWidth, long outputHeight);
+TH_API void THNN_(SpatialConvolutionLocal_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ long inputWidth, long inputHeight,
+ long outputWidth, long outputHeight);
+TH_API void THNN_(SpatialConvolutionLocal_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ long inputWidth, long inputHeight,
+ long outputWidth, long outputHeight,
+ accreal scale);
+
+TH_API void THNN_(SpatialAdaptiveMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int owidth, int oheight);
+TH_API void THNN_(SpatialAdaptiveMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices);
+
+TH_API void THNN_(SpatialAdaptiveAveragePooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int owidth, int oheight);
+TH_API void THNN_(SpatialAdaptiveAveragePooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput);
+
+TH_API void THNN_(SpatialAveragePooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ bool ceil_mode,
+ bool count_include_pad);
+TH_API void THNN_(SpatialAveragePooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ bool ceil_mode,
+ bool count_include_pad);
+
+TH_API void THNN_(SpatialFractionalMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int outputW, int outputH,
+ int poolSizeW, int poolSizeH,
+ THIndexTensor *indices,
+ THTensor *randomSamples);
+TH_API void THNN_(SpatialFractionalMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int outputW, int outputH,
+ int poolSizeW, int poolSizeH,
+ THIndexTensor *indices);
+
+TH_API void THNN_(SpatialFullConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias, // [OPTIONAL]
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH);
+TH_API void THNN_(SpatialFullConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH);
+TH_API void THNN_(SpatialFullConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int adjW, int adjH,
+ accreal scale);
+
+TH_API void THNN_(SpatialFullConvolutionMap_updateOutput)(
+ THNNState *state, // library state
+ THTensor *input, // input tensor
+ THTensor *output, // [OUT] convolution output
+ THTensor *weight, // 3D weight tensor (connTable:size(1) x kH x kW)
+ THTensor *bias, // 1D bias tensor (nOutputPlane)
+ THTensor *connTable, // connection table
+ int nInputPlane, // number of input planes
+ int nOutputPlane, // number of output planes
+ int dW, int dH); // stride
+TH_API void THNN_(SpatialFullConvolutionMap_updateGradInput)(
+ THNNState *state, // library state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. output
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ THTensor *weight, // 3D weight tensor (connTable:size(1) x kH x kW)
+ THTensor *bias, // 1D bias tensor (nOutputPlane)
+ THTensor *connTable, // connection table
+ int nInputPlane, // number of input planes
+ int nOutputPlane, // number of output planes
+ int dW, int dH); // stride
+TH_API void THNN_(SpatialFullConvolutionMap_accGradParameters)(
+ THNNState *state, // library state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t. output
+ THTensor *gradWeight, // 3D gradWeight tensor (connTable:size(1) x kH x kW)
+ THTensor *gradBias, // 1D gradBias tensor (nOutputPlane)
+ THTensor *connTable, // connection table
+ int nInputPlane, // number of input planes
+ int nOutputPlane, // number of output planes
+ int dW, int dH, // stride
+ accreal scale); // scaling factor
+
+TH_API void THNN_(SpatialDilatedConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias, // [OPTIONAL]
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH);
+
+TH_API void THNN_(SpatialDilatedConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH);
+
+TH_API void THNN_(SpatialDilatedConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH,
+ accreal scale);
+
+TH_API void THNN_(SpatialMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ bool ceil_mode);
+TH_API void THNN_(SpatialMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ bool ceil_mode);
+
+TH_API void THNN_(SpatialDilatedMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH,
+ bool ceil_mode);
+TH_API void THNN_(SpatialDilatedMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH,
+ bool ceil_mode);
+
+TH_API void THNN_(SpatialMaxUnpooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int owidth, int oheight);
+TH_API void THNN_(SpatialMaxUnpooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int owidth, int oheight);
+
+TH_API void THNN_(SpatialSubSampling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ int kW, int kH,
+ int dW, int dH);
+TH_API void THNN_(SpatialSubSampling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ int kW, int kH,
+ int dW, int dH);
+TH_API void THNN_(SpatialSubSampling_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ int kW, int kH,
+ int dW, int dH,
+ accreal scale);
+
+TH_API void THNN_(SpatialUpSamplingNearest_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int scale_factor);
+TH_API void THNN_(SpatialUpSamplingNearest_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int scale_factor);
+
+TH_API void THNN_(SpatialUpSamplingBilinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int outputHeight,
+ int outputWidth);
+TH_API void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
+ THNNState *state,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int nbatch,
+ int nchannels,
+ int inputHeight,
+ int inputWidth,
+ int outputHeight,
+ int outputWidth);
+
+TH_API void THNN_(unfolded_acc)(
+ THTensor *finput,
+ THTensor *input,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int nInputPlane,
+ int inputWidth, int inputHeight,
+ int outputWidth, int outputHeight);
+TH_API void THNN_(unfolded_copy)(
+ THTensor *finput,
+ THTensor *input,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int nInputPlane,
+ int inputWidth, int inputHeight,
+ int outputWidth, int outputHeight);
+
+TH_API void THNN_(VolumetricAveragePooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH);
+TH_API void THNN_(VolumetricAveragePooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH);
+
+TH_API void THNN_(VolumetricConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias, // [OPTIONAL]
+ THTensor *finput,
+ THTensor *fgradInput,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH);
+TH_API void THNN_(VolumetricConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH);
+TH_API void THNN_(VolumetricConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *finput,
+ THTensor *fgradInput,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ accreal scale);
+
+TH_API void THNN_(VolumetricConvolutionMM_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias, // [OPTIONAL]
+ THTensor *finput,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH);
+TH_API void THNN_(VolumetricConvolutionMM_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH);
+TH_API void THNN_(VolumetricConvolutionMM_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *finput,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ accreal scale);
+
+TH_API void THNN_(VolumetricFractionalMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int outputT, int outputW, int outputH,
+ int poolSizeT, int poolSizeW, int poolSizeH,
+ THIndexTensor *indices,
+ THTensor *randomSamples);
+TH_API void THNN_(VolumetricFractionalMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int outputT, int outputW, int outputH,
+ int poolSizeT, int poolSizeW, int poolSizeH,
+ THIndexTensor *indices);
+
+TH_API void THNN_(VolumetricFullConvolution_updateOutput)(
+ THNNState *state, // library state
+ THTensor *input, // 4D or 5D (batch) tensor
+ THTensor *output, // [OUT] volumetric convolution output
+ THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
+ THTensor *bias, // [OPTIONAL] gradBias tensor (nOutputPlane)
+ THTensor *finput, // [OUT] internal columns buffer
+ THTensor *fgradInput, // [OUT] internal ones buffer
+ int dT, int dW, int dH, // stride of the convolution
+ int pT, int pW, int pH, // padding
+ int aT, int aW, int aH); // extra output adjustment
+TH_API void THNN_(VolumetricFullConvolution_updateGradInput)(
+ THNNState *state, // library state
+ THTensor *input, // 4D or 5D (batch) tensor
+ THTensor *gradOutput, // gradient w.r.t. output
+ THTensor *gradInput, // [OUT] gradient w.r.t. input
+ THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
+ THTensor *finput, // internal columns buffer
+ THTensor *fgradInput, // internal ones buffer
+ int dT, int dW, int dH, // stride
+ int pT, int pW, int pH, // padding
+ int aT, int aW, int aH); // extra output adjustment
+TH_API void THNN_(VolumetricFullConvolution_accGradParameters)(
+ THNNState *state, // library state
+ THTensor *input, // 4D or 5D (batch) tensor
+ THTensor *gradOutput, // gradient w.r.t. output
+ THTensor *gradWeight, // gradWeight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
+ THTensor *gradBias, // [OPTIONAL] gradBias tensor (nOutputPlane)
+ THTensor *finput, // internal columns buffer
+ THTensor *fgradInput, // internal ones buffer
+ int dT, int dW, int dH, // stride
+ int pT, int pW, int pH, // padding
+ int aT, int aW, int aH, // extra output adjustment
+ accreal scale); // scaling factor
+
+TH_API void THNN_(VolumetricDilatedConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias, // [OPTIONAL]
+ THTensor *columns,
+ THTensor *ones,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH);
+
+TH_API void THNN_(VolumetricDilatedConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH);
+
+TH_API void THNN_(VolumetricDilatedConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *columns,
+ THTensor *ones,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH,
+ accreal scale);
+
+TH_API void THNN_(VolumetricMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ bool ceilMode);
+TH_API void THNN_(VolumetricMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ bool ceilMode);
+
+TH_API void THNN_(VolumetricDilatedMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ int dilationT, int dilationW, int dilationH,
+ bool ceilMode);
+TH_API void THNN_(VolumetricDilatedMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ int dilationT, int dilationW, int dilationH,
+ bool ceilMode);
+
+TH_API void THNN_(VolumetricMaxUnpooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int oT, int oW, int oH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH);
+TH_API void THNN_(VolumetricMaxUnpooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int oT, int oW, int oH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH);
+
+TH_API void THNN_(SpatialReflectionPadding_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b);
+
+TH_API void THNN_(SpatialReflectionPadding_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b);
+
+TH_API void THNN_(SpatialReplicationPadding_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b);
+
+TH_API void THNN_(SpatialReplicationPadding_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b);
+
+TH_API void THNN_(VolumetricReplicationPadding_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback);
+
+TH_API void THNN_(VolumetricReplicationPadding_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback);
+
+TH_API void THNN_(VolumetricUpSamplingNearest_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int scale_factor);
+TH_API void THNN_(VolumetricUpSamplingNearest_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int scale_factor);
+
+TH_API void THNN_(VolumetricUpSamplingTrilinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int outputDepth,
+ int outputHeight,
+ int outputWidth);
+TH_API void THNN_(VolumetricUpSamplingTrilinear_updateGradInput)(
+ THNNState *state,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int nbatch,
+ int nchannels,
+ int inputDepth,
+ int inputHeight,
+ int inputWidth,
+ int outputDepth,
+ int outputHeight,
+ int outputWidth);
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Tanh.c b/contrib/lua-torch/nn/lib/THNN/generic/Tanh.c
new file mode 100644
index 000000000..ecf0708c2
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/Tanh.c
@@ -0,0 +1,49 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Tanh.c"
+#else
+
+void THNN_(Tanh_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output)
+{
+ THTensor_(tanh)(output, input);
+}
+
+void THNN_(Tanh_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *output)
+{
+ THNN_CHECK_SHAPE(output, gradOutput);
+ THTensor_(resizeAs)(gradInput, output);
+
+ if (output->nDimension == 1 ||
+ !THTensor_(isContiguous)(output) ||
+ !THTensor_(isContiguous)(gradOutput) ||
+ !THTensor_(isContiguous)(gradInput))
+ {
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,
+ real z = *output_data; \
+ *gradInput_data = *gradOutput_data * (1. - z*z);
+ );
+ }
+ else
+ {
+ real* ptr_gradOutput = THTensor_(data)(gradOutput);
+ real* ptr_gradInput = THTensor_(data)(gradInput);
+ real* ptr_output = THTensor_(data)(output);
+ long i;
+
+#pragma omp parallel for private(i)
+ for (i = 0; i < THTensor_(nElement)(gradInput); i++)
+ {
+ real z = ptr_output[i];
+ ptr_gradInput[i] = ptr_gradOutput[i] * (1. - z*z);
+ }
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalConvolution.c
new file mode 100644
index 000000000..8cfd97d85
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalConvolution.c
@@ -0,0 +1,398 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalConvolution.c"
+#else
+
+static inline void THNN_(TemporalConvolution_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ int kW,
+ int dW,
+ int *inputFrameSize) {
+
+ THArgCheck(kW > 0, 9,
+ "kernel size should be greater than zero, but got kW: %d", kW);
+ THArgCheck(dW > 0, 11,
+ "stride should be greater than zero, but got dW: %d", dW);
+
+ int dimS = 0; // sequence dimension
+ int dimF = 1; // feature dimension
+
+ if (input->nDimension == 3)
+ {
+ dimS = 1;
+ dimF = 2;
+ }
+ THNN_ARGCHECK(input->nDimension == 2 || input->nDimension == 3, 2, input,
+ "2D or 3D (batch mode) tensor expected for input, but got: %s");
+ if (inputFrameSize != NULL) {
+ THArgCheck(input->size[dimF] == *inputFrameSize, 2,
+ "invalid input frame size. Got: %d, Expected: %d",
+ input->size[dimF], *inputFrameSize);
+ }
+ THArgCheck(input->size[dimS] >= kW, 2,
+ "input sequence smaller than kernel size. Got: %d, Expected: %d",
+ input->size[dimS], kW);
+}
+
+void THNN_(TemporalConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ int kW,
+ int dW,
+ int inputFrameSize,
+ int outputFrameSize)
+{
+ THTensor *outputWindow, *inputWindow;
+ int nInputFrame, nOutputFrame;
+ long k, i;
+
+ int dimS = 0; // sequence dimension
+ int dimF = 1; // feature dimension
+
+ if (input->nDimension == 3)
+ {
+ dimS = 1;
+ dimF = 2;
+ }
+
+ THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
+ THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous");
+ THNN_(TemporalConvolution_shapeCheck)
+ (state, input, kW, dW, &inputFrameSize);
+ input = THTensor_(newContiguous)(input);
+ outputWindow = THTensor_(new)();
+ inputWindow = THTensor_(new)();
+
+ nInputFrame = input->size[dimS];
+ nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+ if (input->nDimension == 2)
+ {
+ THTensor_(resize2d)(output,
+ nOutputFrame,
+ outputFrameSize);
+
+ /* bias first */
+ for(k = 0; k < nOutputFrame; k++)
+ {
+ THTensor_(select)(outputWindow, output, 0, k);
+ THTensor_(copy)(outputWindow, bias);
+ }
+
+ /* ouch */
+ for(k = 0; nOutputFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputFrame -= nFrame;
+
+ THTensor_(setStorage2d)(inputWindow, input->storage,
+ input->storageOffset+k*dW*input->size[1],
+ nFrame, inputFrameStride*input->size[1],
+ kW*input->size[1], 1);
+
+ THTensor_(setStorage2d)(outputWindow, output->storage,
+ output->storageOffset + k*output->size[1],
+ nFrame, outputFrameStride*output->size[1],
+ output->size[1], 1);
+
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight, weight, 0, 1);
+ THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, tweight);
+ THTensor_(free)(tweight);
+ }
+ }
+ else
+ {
+ THTensor *outputSample = THTensor_(new)();
+ THTensor *inputSample = THTensor_(new)();
+ int nBatchFrame = input->size[0];
+
+ THTensor_(resize3d)(output,
+ nBatchFrame,
+ nOutputFrame,
+ outputFrameSize);
+
+ for(i = 0; i < nBatchFrame; i++)
+ {
+ THTensor_(select)(outputSample, output, 0, i);
+ THTensor_(select)(inputSample, input, 0, i);
+ long nOutputSampleFrame = nOutputFrame;
+
+ /* bias first */
+ for(k = 0; k < nOutputFrame; k++)
+ {
+ THTensor_(select)(outputWindow, outputSample, 0, k);
+ THTensor_(copy)(outputWindow, bias);
+ }
+
+ /* ouch */
+ for(k = 0; nOutputSampleFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputSampleFrame -= nFrame;
+
+ THTensor_(setStorage2d)(inputWindow, inputSample->storage,
+ inputSample->storageOffset+k*dW*inputSample->size[1],
+ nFrame, inputFrameStride*inputSample->size[1],
+ kW*inputSample->size[1], 1);
+
+ THTensor_(setStorage2d)(outputWindow, outputSample->storage,
+ outputSample->storageOffset + k*outputSample->size[1],
+ nFrame, outputFrameStride*outputSample->size[1],
+ outputSample->size[1], 1);
+
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight, weight, 0, 1);
+ THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, tweight);
+ THTensor_(free)(tweight);
+ }
+ }
+ THTensor_(free)(outputSample);
+ THTensor_(free)(inputSample);
+ }
+
+ THTensor_(free)(outputWindow);
+ THTensor_(free)(inputWindow);
+ THTensor_(free)(input);
+
+}
+
+void THNN_(TemporalConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ int kW,
+ int dW)
+{
+ long nInputFrame;
+ long nOutputFrame;
+
+ THTensor *gradOutputWindow;
+ THTensor *gradInputWindow;
+ long k, i;
+
+ int dimS = 0; // sequence dimension
+ int dimF = 1; // feature dimension
+
+ if (gradOutput->nDimension == 3)
+ {
+ dimS = 1;
+ dimF = 2;
+ }
+
+ THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
+ THNN_(TemporalConvolution_shapeCheck)(
+ state, input, kW, dW, NULL);
+ nInputFrame = input->size[dimS];
+ nOutputFrame = gradOutput->size[dimS];
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ gradOutputWindow = THTensor_(new)();
+ gradInputWindow = THTensor_(new)();
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (gradOutput->nDimension == 2)
+ {
+ /* ouch */
+ for(k = 0; nOutputFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputFrame -= nFrame;
+
+ THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
+ gradOutput->storageOffset + k*gradOutput->size[1],
+ nFrame, outputFrameStride*gradOutput->size[1],
+ gradOutput->size[1], 1);
+
+ THTensor_(setStorage2d)(gradInputWindow, gradInput->storage,
+ gradInput->storageOffset+k*dW*gradInput->size[1],
+ nFrame, inputFrameStride*gradInput->size[1],
+ kW*gradInput->size[1], 1);
+
+ THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
+ }
+ }
+ else
+ {
+ THTensor *gradOutputSample = THTensor_(new)();
+ THTensor *gradInputSample = THTensor_(new)();
+ int nBatchFrame = input->size[0];
+
+ for(i = 0; i < nBatchFrame; i++)
+ {
+ THTensor_(select)(gradOutputSample, gradOutput, 0, i);
+ THTensor_(select)(gradInputSample, gradInput, 0, i);
+ int nOutputSampleFrame = nOutputFrame;
+
+ /* ouch */
+ for(k = 0; nOutputSampleFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputSampleFrame -= nFrame;
+
+ THTensor_(setStorage2d)(gradOutputWindow, gradOutputSample->storage,
+ gradOutputSample->storageOffset + k*gradOutputSample->size[1],
+ nFrame, outputFrameStride*gradOutputSample->size[1],
+ gradOutputSample->size[1], 1);
+
+ THTensor_(setStorage2d)(gradInputWindow, gradInputSample->storage,
+ gradInputSample->storageOffset+k*dW*gradInputSample->size[1],
+ nFrame, inputFrameStride*gradInputSample->size[1],
+ kW*gradInputSample->size[1], 1);
+
+ THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
+ }
+ }
+ THTensor_(free)(gradOutputSample);
+ THTensor_(free)(gradInputSample);
+ }
+
+ THTensor_(free)(gradOutputWindow);
+ THTensor_(free)(gradInputWindow);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(input);
+
+}
+
+void THNN_(TemporalConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ int kW,
+ int dW,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ long nInputFrame;
+ long nOutputFrame;
+
+ THTensor *gradOutputWindow;
+ THTensor *inputWindow;
+ long k, i;
+
+ int dimS = 0; // sequence dimension
+ int dimF = 1; // feature dimension
+
+ if (gradOutput->nDimension == 3)
+ {
+ dimS = 1;
+ dimF = 2;
+ }
+
+ THNN_(TemporalConvolution_shapeCheck)(
+ state, input, kW, dW, NULL);
+ nInputFrame = input->size[dimS];
+ nOutputFrame = gradOutput->size[dimS];
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ gradOutputWindow = THTensor_(new)();
+ inputWindow = THTensor_(new)();
+
+ if (input->nDimension == 2)
+ {
+ /* bias first */
+ for(k = 0; k < nOutputFrame; k++)
+ {
+ THTensor_(select)(gradOutputWindow, gradOutput, 0, k);
+ THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
+ }
+
+ /* ouch */
+ for(k = 0; nOutputFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputFrame -= nFrame;
+
+ THTensor_(setStorage2d)(inputWindow, input->storage,
+ input->storageOffset+k*dW*input->size[1],
+ nFrame, inputFrameStride*input->size[1],
+ kW*input->size[1], 1);
+
+ THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
+ gradOutput->storageOffset + k*gradOutput->size[1],
+ nFrame, outputFrameStride*gradOutput->size[1],
+ gradOutput->size[1], 1);
+
+ THTensor *tgradOutputWindow = THTensor_(new)();
+ THTensor_(transpose)(tgradOutputWindow, gradOutputWindow, 0, 1);
+ THTensor_(addmm)(gradWeight, 1, gradWeight, scale, tgradOutputWindow, inputWindow);
+ THTensor_(free)(tgradOutputWindow);
+ }
+ }
+ else
+ {
+ THTensor *gradOutputSample = THTensor_(new)();
+ THTensor *inputSample = THTensor_(new)();
+ int nBatchFrame = input->size[0];
+
+ for(i = 0; i < nBatchFrame; i++)
+ {
+ THTensor_(select)(gradOutputSample, gradOutput, 0, i);
+ THTensor_(select)(inputSample, input, 0, i);
+ int nOutputSampleFrame = nOutputFrame;
+
+ /* bias first */
+ for(k = 0; k < nOutputFrame; k++)
+ {
+ THTensor_(select)(gradOutputWindow, gradOutputSample, 0, k);
+ THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
+ }
+
+ /* ouch */
+ for(k = 0; nOutputSampleFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputSampleFrame -= nFrame;
+
+ THTensor_(setStorage2d)(inputWindow, inputSample->storage,
+ inputSample->storageOffset+k*dW*inputSample->size[1],
+ nFrame, inputFrameStride*inputSample->size[1],
+ kW*inputSample->size[1], 1);
+
+ THTensor_(setStorage2d)(gradOutputWindow, gradOutputSample->storage,
+ gradOutputSample->storageOffset + k*gradOutputSample->size[1],
+ nFrame, outputFrameStride*gradOutputSample->size[1],
+ gradOutputSample->size[1], 1);
+
+ THTensor *tgradOutputWindow = THTensor_(new)();
+ THTensor_(transpose)(tgradOutputWindow, gradOutputWindow, 0, 1);
+ THTensor_(addmm)(gradWeight, 1, gradWeight, scale, tgradOutputWindow, inputWindow);
+ THTensor_(free)(tgradOutputWindow);
+ }
+ }
+ THTensor_(free)(gradOutputSample);
+ THTensor_(free)(inputSample);
+ }
+
+ THTensor_(free)(gradOutputWindow);
+ THTensor_(free)(inputWindow);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(input);
+
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalMaxPooling.c
new file mode 100644
index 000000000..344c1b3fd
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalMaxPooling.c
@@ -0,0 +1,283 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalMaxPooling.c"
+#else
+
+static inline void THNN_(TemporalMaxPooling_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THIndexTensor *indices,
+ int kW,
+ int dW) {
+ long niframe;
+ long framesize;
+ long noframe;
+
+ int dimS = 0; // sequence dimension
+ int dimF = 1; // feature dimension
+ int ndims = input->nDimension;
+
+ if (input->nDimension == 3)
+ {
+ dimS = 1;
+ dimF = 2;
+ }
+
+ niframe = input->size[dimS];
+ framesize = input->size[dimF];
+ noframe = (niframe - kW) / dW + 1;
+
+ THArgCheck(kW > 0, 5,
+ "kernel size should be greater than zero, but got kW: %d", kW);
+ THArgCheck(dW > 0, 6,
+ "stride should be greater than zero, but got dW: %d", dW);
+
+ THNN_ARGCHECK(input->nDimension == 2 || input->nDimension == 3, 2, input,
+ "2D or 3D (batch mode) tensor expected for input, but got: %s");
+ THArgCheck(input->size[dimS] >= kW, 2,
+ "input sequence smaller than kernel size. Got: %d, Expected: %d",
+ input->size[dimS], kW);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndims, dimS, noframe);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndims, dimF, framesize)
+ }
+ if (indices != NULL) {
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndims, dimS, noframe);
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndims, dimF, framesize);
+ }
+}
+
+void THNN_(TemporalMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kW,
+ int dW)
+{
+ long niframe;
+ long framesize;
+ long noframe;
+
+ real *input_data;
+ real *output_data;
+ THIndex_t *indices_data;
+
+ long t, y;
+
+ int dimS = 0; // sequence dimension
+ int dimF = 1; // feature dimension
+
+ THNN_(TemporalMaxPooling_shapeCheck)(state, input, NULL, NULL, kW, dW);
+
+ if (input->nDimension == 3)
+ {
+ dimS = 1;
+ dimF = 2;
+ }
+
+ /* sizes */
+ niframe = input->size[dimS];
+ framesize = input->size[dimF];
+ noframe = (niframe - kW) / dW + 1;
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ if (input->nDimension == 2)
+ {
+ /* resize output */
+ THTensor_(resize2d)(output, noframe, framesize);
+
+ /* indices will contain index locations for each output point */
+ THIndexTensor_(resize2d)(indices, noframe, framesize);
+
+ /* get raw pointers */
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ for(t = 0; t < noframe; t++)
+ {
+ real *ip = input_data + t*framesize*dW;
+ real *op = output_data + t*framesize;
+ THIndex_t *xp = indices_data + t*framesize;
+#pragma omp parallel for private(y)
+ for(y = 0; y < framesize; y++)
+ {
+ /* compute local max: */
+ long maxindex = -1;
+ real maxval = -THInf;
+ long x;
+ for(x = 0; x < kW; x++)
+ {
+ real val = ip[x*framesize+y];
+ if (val > maxval)
+ {
+ maxval = val;
+ maxindex = x;
+ }
+ }
+
+ /* set output to local max */
+ op[y] = maxval;
+ xp[y] = (real)maxindex;
+ }
+ }
+ }
+ else
+ {
+ /* number of batch frames */
+ long nbframe = input->size[0];
+ long i;
+
+ /* resize output */
+ THTensor_(resize3d)(output, nbframe, noframe, framesize);
+
+ /* indices will contain index locations for each output point */
+ THIndexTensor_(resize3d)(indices, nbframe, noframe, framesize);
+
+ /* get raw pointers */
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ for(i = 0; i < nbframe; i++)
+ {
+ real *inputSample_data = input_data + i*niframe*framesize;
+ real *outputSample_data = output_data + i*noframe*framesize;
+ THIndex_t *indicesSample_data = indices_data + i*noframe*framesize;
+
+ for(t = 0; t < noframe; t++)
+ {
+ real *ip = inputSample_data + t*framesize*dW;
+ real *op = outputSample_data + t*framesize;
+ THIndex_t *xp = indicesSample_data + t*framesize;
+
+#pragma omp parallel for private(y)
+ for(y = 0; y < framesize; y++)
+ {
+ /* compute local max: */
+ long maxindex = -1;
+ real maxval = -THInf;
+ long x;
+ for(x = 0; x < kW; x++)
+ {
+ real val = ip[x*framesize+y];
+ if (val > maxval)
+ {
+ maxval = val;
+ maxindex = x;
+ }
+ }
+
+ /* set output to local max */
+ op[y] = maxval;
+ xp[y] = (real)maxindex;
+ }
+ }
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+
+}
+
+void THNN_(TemporalMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kW,
+ int dW)
+{
+ long niframe;
+ int noframe;
+ long framesize;
+
+ real *gradInput_data;
+ real *gradOutput_data;
+ THIndex_t *indices_data;
+
+ long t, y;
+
+ THNN_(TemporalMaxPooling_shapeCheck)(state, input, gradOutput, indices, kW, dW);
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize and zero */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ int dimS = 0; // sequence dimension
+ int dimF = 1; // feature dimension
+
+ if (input->nDimension == 3)
+ {
+ dimS = 1;
+ dimF = 2;
+ }
+ /* sizes */
+ niframe = input->size[dimS];
+ noframe = gradOutput->size[dimS];
+ framesize = gradOutput->size[dimF];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THIndexTensor_(data)(indices);
+
+ if (input->nDimension == 2)
+ {
+ for(t = 0; t < noframe; t++)
+ {
+ real *gip = gradInput_data + t*framesize*dW;
+ real *gop = gradOutput_data + t*framesize;
+ THIndex_t *xp = indices_data + t*framesize;
+#pragma omp parallel for private(y)
+ for(y = 0; y < framesize; y++)
+ {
+ /* compute local max: */
+ long maxindex = (long)xp[y];
+ if (maxindex != -1)
+ gip[maxindex*framesize+y] += gop[y];
+ }
+ }
+ }
+ else
+ {
+ /* number of batch frames */
+ long nbframe = input->size[0];
+ long i;
+
+ for(i = 0; i < nbframe; i++)
+ {
+ real *gradInputSample_data = gradInput_data + i*niframe*framesize;
+ real *gradOutputSample_data = gradOutput_data + i*noframe*framesize;
+ THIndex_t *indicesSample_data = indices_data + i*noframe*framesize;
+
+ for(t = 0; t < noframe; t++)
+ {
+ real *gip = gradInputSample_data + t*framesize*dW;
+ real *gop = gradOutputSample_data + t*framesize;
+ THIndex_t *xp = indicesSample_data + t*framesize;
+#pragma omp parallel for private(y)
+ for(y = 0; y < framesize; y++)
+ {
+ /* compute local max: */
+ long maxindex = (long)xp[y];
+ if (maxindex != -1)
+ gip[maxindex*framesize+y] += gop[y];
+ }
+ }
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c
new file mode 100644
index 000000000..e3ae41e22
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalRowConvolution.c
@@ -0,0 +1,472 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalRowConvolution.c"
+#else
+
+static inline void THNN_(TemporalRowConvolution_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *bias,
+ int kW,
+ int dW,
+ int padW) {
+
+ THArgCheck(kW > 0, 5,
+ "kernel size should be greater than zero, but got kW: %d", kW);
+ THArgCheck(dW > 0, 6,
+ "stride should be greater than zero, but got dW: %d", dW);
+ THNN_ARGCHECK(weight->nDimension == 3, 3, weight,
+ "3D weight tensor expected, but got: %s");
+ THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
+ THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous");
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
+ }
+
+ // we're always looking at (possibly batch) x feats x seq
+ int ndim = input->nDimension;
+ int dimF = 0;
+ int dimS = 1;
+
+ if (ndim == 3) {
+ ++dimS;
+ ++dimF;
+ }
+
+ THNN_ARGCHECK(ndim == 2 || ndim == 3, 1, input,
+ "2D or 3D (batch mode) input tensor expected, but got :%s");
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[dimS];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (nOutputFrame < 1) {
+ THError("Given input size: (%d x %d). "
+ "Calculated output size: (%d x %d). Output size is too small",
+ inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame);
+ }
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame);
+ }
+}
+
+static void THNN_(unfolded_acc_row)(
+ THTensor *finput,
+ THTensor *input,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ size_t c;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+// #pragma omp parallel for private(c)
+ for (c = 0; c < inputFrameSize; c++) {
+ size_t kw, x;
+ long long ix = 0;
+
+ for (kw = 0; kw < kW; kw++) {
+ real *src = finput_data
+ + c * (kW * nOutputFrame)
+ + kw * (nOutputFrame);
+ real *dst = input_data + c * (nInputFrame);
+
+ ix = (long long)(kw);
+ if (dW == 1) {
+ real *dst_slice = dst + (size_t)(ix);
+ THVector_(cadd)(dst_slice, dst_slice, src, 1, nOutputFrame);
+ } else {
+ for (x = 0; x < nOutputFrame; x++) {
+ real *dst_slice = dst + (size_t)(ix + x * dW);
+ THVector_(cadd)(dst_slice, dst_slice,
+ src + (size_t)(x), 1, 1);
+ }
+ }
+ }
+ }
+}
+
+static void THNN_(unfolded_copy_row)(
+ THTensor *finput,
+ THTensor *input,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ long k;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+// #pragma omp parallel for private(k)
+ for (k = 0; k < inputFrameSize * kW; k++) {
+ size_t c = k / kW;
+ size_t rest = k % kW;
+ size_t kw = rest % kW;
+ size_t x;
+ long long ix;
+ real *dst = finput_data + c * (kW * nOutputFrame) + kw * (nOutputFrame);
+ real *src = input_data + c * (nInputFrame);
+
+ ix = (long long)(kw);
+ if (dW == 1) {
+ memcpy(dst, src+(size_t)(ix), sizeof(real) * (nOutputFrame));
+ } else {
+ for (x = 0; x < nOutputFrame; x++) {
+ memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW),
+ sizeof(real) * 1);
+ }
+ }
+ }
+}
+
+static void THNN_(TemporalRowConvolution_updateOutput_frame)(
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ long i;
+
+ THTensor *output3d = THTensor_(newWithStorage3d)(
+ output->storage, output->storageOffset,
+ inputFrameSize, -1,
+ 1, -1,
+ nOutputFrame, -1);
+
+ THNN_(unfolded_copy_row)(finput, input, kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(zero)(output);
+
+ if (bias != NULL) {
+ for (i = 0; i < inputFrameSize; i++)
+ THVector_(fill)
+ (output->storage->data + output->storageOffset
+ + output->stride[0] * i,
+ THTensor_(get1d)(bias, i), nOutputFrame);
+ }
+
+ THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput);
+
+ THTensor_(free)(output3d);
+}
+
+void THNN_(TemporalRowConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput, // unused here but needed for Cuda
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst) {
+
+ int ndim = input->nDimension;
+
+ THTensor *tinput;
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ input = THTensor_(newContiguous)(tinput);
+ } else {
+ input = THTensor_(newContiguous)(input);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)(
+ state, input, NULL, weight, bias, kW, dW, padW);
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (ndim == 2) { /* non-batch mode */
+
+ THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame);
+ THTensor_(resize2d)(output, inputFrameSize, nOutputFrame);
+
+ THTensor_(zero)(finput);
+ THTensor_(zero)(output);
+
+ THNN_(TemporalRowConvolution_updateOutput_frame)
+ (input, output, weight, bias, finput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ } else {
+ long T = input->size[0];
+ long t;
+
+ THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame);
+ THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame);
+
+ THTensor_(zero)(finput);
+ THTensor_(zero)(output);
+
+#pragma omp parallel for private(t)
+ for (t = 0; t < T; t++) {
+ THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+ THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(TemporalRowConvolution_updateOutput_frame)
+ (input_t, output_t, weight, bias, finput_t,
+ kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(free)(input_t);
+ THTensor_(free)(output_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ if (!featFirst) { // NOTE: output will NOT be contiguous in this case
+ THTensor_(transpose)(output, output, ndim - 1, ndim - 2);
+ THTensor_(free)(tinput);
+ }
+
+ THTensor_(free)(input);
+}
+
+static void THNN_(TemporalRowConvolution_updateGradInput_frame)(
+ THTensor *gradInput,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
+ gradOutput->storage, gradOutput->storageOffset,
+ inputFrameSize, -1,
+ 1, -1,
+ nOutputFrame, -1);
+
+ // weight: inputFrameSize x kW x 1
+ // gradOutput3d: inputFrameSize x 1 x nOutputFrame
+ THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d);
+ // fgradInput: inputFrameSize x kW x nOutputFrame
+ THTensor_(free)(gradOutput3d);
+
+ THTensor_(zero)(gradInput);
+
+ THNN_(unfolded_acc_row)(fgradInput, gradInput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+}
+
+void THNN_(TemporalRowConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst) {
+
+ int ndim = input->nDimension;
+
+ THTensor *tinput, *tgradOutput;
+
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
+
+ input = THTensor_(newContiguous)(tinput);
+ gradOutput = THTensor_(newContiguous)(tgradOutput);
+
+ } else {
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)(state, input, gradOutput, weight,
+ NULL, kW, dW, padW);
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ THTensor_(resizeAs)(fgradInput, finput);
+ THTensor_(resizeAs)(gradInput, input);
+
+ THTensor_(zero)(fgradInput);
+ THTensor_(zero)(gradInput);
+
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight, weight, 1, 2);
+
+ if (ndim == 2) {
+ THNN_(TemporalRowConvolution_updateGradInput_frame)
+ (gradInput, gradOutput, tweight, fgradInput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+ } else {
+ long T = input->size[0];
+ long t;
+
+#pragma omp parallel for private(t)
+ for (t = 0; t < T; t++) {
+
+ THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+ THNN_(TemporalRowConvolution_updateGradInput_frame)
+ (gradInput_t, gradOutput_t, tweight, fgradInput_t,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(free)(gradInput_t);
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(fgradInput_t);
+ }
+ }
+
+ THTensor_(free)(tweight);
+
+ if (!featFirst) { // NOTE: gradInput will NOT be contiguous in this case
+
+ THTensor_(free)(tinput);
+ THTensor_(free)(tgradOutput);
+
+ THTensor_(transpose)(gradInput, gradInput, ndim - 1, ndim - 2);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+
+}
+
+static void THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
+ THTensor *finput, real scale) {
+
+ long i;
+ THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
+ gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ 1, -1,
+ gradOutput->size[1], -1);
+
+ THTensor *tfinput = THTensor_(new)();
+ THTensor_(transpose)(tfinput, finput, 1, 2);
+ // gradOutput3d: inputFrameSize x 1 x nOutputFrame
+ // finput: inputFrameSize x nOutputFrame x kW
+ THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, tfinput);
+ // gradWeight: inputFrameSize x 1 x kW
+ THTensor_(free)(tfinput);
+
+ if (gradBias != NULL) {
+ for (i = 0; i < gradBias->size[0]; i++) {
+ long k;
+ real sum = 0;
+ real *data = gradOutput3d->storage->data
+ + gradOutput3d->storageOffset
+ + i * gradOutput3d->stride[0];
+ for (k = 0; k < gradOutput3d->size[2]; k++) {
+ sum += data[k];
+ }
+ (gradBias->storage->data + gradBias->storageOffset)[i]
+ += scale * sum;
+ }
+ }
+
+ THTensor_(free)(gradOutput3d);
+
+}
+
+void THNN_(TemporalRowConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst,
+ accreal scale_) {
+
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ int ndim = input->nDimension;
+
+ THTensor *tinput, *tgradOutput;
+
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
+
+ input = THTensor_(newContiguous)(tinput);
+ gradOutput = THTensor_(newContiguous)(tgradOutput);
+ } else {
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)
+ (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW);
+
+ long inputFrameSize = gradWeight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (ndim == 2) {
+ THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ gradOutput, gradWeight, gradBias, finput, scale);
+ } else {
+ long T = input->size[0];
+ long t;
+
+ for (t = 0; t < T; t++) {
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ gradOutput_t, gradWeight, gradBias, finput_t, scale);
+
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ if (!featFirst) {
+ THTensor_(free)(tinput);
+ THTensor_(free)(tgradOutput);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/TemporalSubSampling.c b/contrib/lua-torch/nn/lib/THNN/generic/TemporalSubSampling.c
new file mode 100644
index 000000000..68f35e28a
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/TemporalSubSampling.c
@@ -0,0 +1,156 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalSubSampling.c"
+#else
+
+static inline void THNN_(TemporalSubSampling_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ int kW,
+ int dW,
+ int *inputFrameSize) {
+ int nInputFrame, nOutputFrame;
+
+ THArgCheck(kW > 0, 6,
+ "kernel size should be greater than zero, but got kW: %d", kW);
+ THArgCheck(dW > 0, 7,
+ "stride should be greater than zero, but got dW: %d", dW);
+
+ THNN_ARGCHECK(input->nDimension == 2, 2, input,
+ "2D or 3D (batch mode) tensor expected for input, but got: %s");
+ if (inputFrameSize != NULL) {
+ THArgCheck( input->size[1] == *inputFrameSize, 2,
+ "invalid input frame size. Got: %d, Expected: %d",
+ input->size[1], *inputFrameSize);
+ }
+ THArgCheck( input->size[0] >= kW, 2,
+ "input sequence smaller than kernel size. Got %d, Expected: %d",
+ input->size[0], kW);
+
+ nInputFrame = input->size[0];
+ nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, input->nDimension, 0, nOutputFrame);
+ if (inputFrameSize != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, input->nDimension, 1, *inputFrameSize);
+ }
+ }
+}
+
+void THNN_(TemporalSubSampling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ int kW,
+ int dW,
+ int inputFrameSize)
+{
+ THTensor *outputFrame, *inputWindow;
+ int nInputFrame, nOutputFrame;
+ long k;
+
+ THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
+ THArgCheck(!bias || THTensor_(isContiguous)(bias), 4, "bias must be contiguous");
+ THNN_(TemporalSubSampling_shapeCheck)(state, input, NULL, kW, dW, &inputFrameSize);
+
+ outputFrame = THTensor_(new)();
+ inputWindow = THTensor_(new)();
+
+ nInputFrame = input->size[0];
+ nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+ THTensor_(resize2d)(output,
+ nOutputFrame,
+ inputFrameSize);
+
+ for(k = 0; k < nOutputFrame; k++)
+ {
+ THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
+ THTensor_(select)(outputFrame, output, 0, k);
+ THTensor_(sum)(outputFrame, inputWindow, 0, 1);
+ THTensor_(cmul)(outputFrame, outputFrame, weight);
+ THTensor_(cadd)(outputFrame, outputFrame, 1, bias);
+ }
+
+ THTensor_(free)(outputFrame);
+ THTensor_(free)(inputWindow);
+}
+
+void THNN_(TemporalSubSampling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ int kW,
+ int dW)
+{
+
+ THTensor *gradOutputFrame;
+ THTensor *gradInputWindow, *buffer, *kwunit;
+ long k;
+
+ THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
+ THNN_(TemporalSubSampling_shapeCheck)(state, input, gradOutput, kW, dW, NULL);
+
+ gradOutputFrame = THTensor_(new)();
+ gradInputWindow = THTensor_(new)();
+ buffer = THTensor_(new)();
+ kwunit = THTensor_(newWithSize1d)(kW);
+
+ THTensor_(fill)(kwunit, 1);
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ for(k = 0; k < gradOutput->size[0]; k++)
+ {
+ THTensor_(narrow)(gradInputWindow, gradInput, 0, k*dW, kW);
+ THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
+ THTensor_(cmul)(buffer, weight, gradOutputFrame);
+ THTensor_(addr)(gradInputWindow, 1, gradInputWindow, 1, kwunit, buffer);
+ }
+
+ THTensor_(free)(gradOutputFrame);
+ THTensor_(free)(gradInputWindow);
+ THTensor_(free)(buffer);
+ THTensor_(free)(kwunit);
+}
+
+void THNN_(TemporalSubSampling_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ int kW,
+ int dW,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THTensor *gradOutputFrame;
+ THTensor *inputWindow, *buffer;
+ long k;
+
+ THNN_(TemporalSubSampling_shapeCheck)(state, input, gradOutput, kW, dW, NULL);
+ gradOutputFrame = THTensor_(new)();
+ inputWindow = THTensor_(new)();
+ buffer = THTensor_(new)();
+
+ for(k = 0; k < gradOutput->size[0]; k++)
+ {
+ THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
+ THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
+ THTensor_(sum)(buffer, inputWindow, 0, 1);
+ THTensor_(addcmul)(gradWeight, gradWeight, scale, buffer, gradOutputFrame);
+ THTensor_(cadd)(gradBias, gradBias, scale, gradOutputFrame);
+ }
+
+ THTensor_(free)(gradOutputFrame);
+ THTensor_(free)(inputWindow);
+ THTensor_(free)(buffer);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/Threshold.c b/contrib/lua-torch/nn/lib/THNN/generic/Threshold.c
new file mode 100644
index 000000000..949c7a07c
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/Threshold.c
@@ -0,0 +1,64 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Threshold.c"
+#else
+
+void THNN_(Threshold_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ accreal threshold_,
+ accreal val_,
+ bool inplace)
+{
+ real threshold = TH_CONVERT_ACCREAL_TO_REAL(threshold_);
+ real val = TH_CONVERT_ACCREAL_TO_REAL(val_);
+ if (inplace)
+ {
+ TH_TENSOR_APPLY(real, input,
+ if (*input_data <= threshold)
+ *input_data = val;
+ );
+ THTensor_(set)(output, input);
+ }
+ else
+ {
+ THTensor_(resizeAs)(output, input);
+ TH_TENSOR_APPLY2(real, output, real, input,
+ *output_data = (*input_data > threshold) ? *input_data : val;
+ );
+ }
+}
+
+void THNN_(Threshold_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ accreal threshold_,
+ accreal val_,
+ bool inplace)
+{
+ real threshold = TH_CONVERT_ACCREAL_TO_REAL(threshold_);
+ real val = TH_CONVERT_ACCREAL_TO_REAL(val_);
+ THNN_CHECK_NELEMENT(input, gradOutput);
+ if (inplace)
+ {
+ TH_TENSOR_APPLY2(real, gradOutput, real, input,
+ if ((*input_data) <= threshold)
+ *gradOutput_data = 0;
+ );
+ THTensor_(set)(gradInput, gradOutput);
+ }
+ else
+ {
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ if ((*input_data) > threshold)
+ *gradInput_data = *gradOutput_data;
+ else
+ *gradInput_data = 0;
+ );
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricAveragePooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricAveragePooling.c
new file mode 100644
index 000000000..91c870e6f
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricAveragePooling.c
@@ -0,0 +1,373 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c"
+#else
+
+static inline void THNN_(VolumetricAveragePooling_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH) {
+ long nslices;
+ long itime;
+ long iheight;
+ long iwidth;
+ long otime;
+ long oheight;
+ long owidth;
+ int ndim = input->nDimension;
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (input->nDimension == 5)
+ {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ THArgCheck(kT > 0 && kW > 0 && kH > 0, 5,
+ "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d",
+ kT, kH, kW);
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 8,
+ "stride should be greater than zero, but got dT: %d dH: %d dW: %d",
+ dT, dH, dW);
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+
+ THArgCheck(input->size[dimw] >= kW && input->size[dimh] >= kH
+ && input->size[dimt] >= kT, 2,
+ "input image (T: %d H: %d W: %d) smaller than "
+ "kernel size (kT: %d kH: %d kW: %d)",
+ input->size[dimt], input->size[dimh], input->size[dimw],
+ kT, kH, kW);
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ otime = (itime - kT) / dT + 1;
+ oheight = (iheight - kH) / dH + 1;
+ owidth = (iwidth - kW) / dW + 1;
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimN, nslices);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, otime);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, oheight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, owidth);
+ }
+}
+
+static void THNN_(VolumetricAveragePooling_updateOutput_frame)(
+ real *input_p,
+ real *output_p,
+ long nslices,
+ long itime,
+ long iwidth,
+ long iheight,
+ long otime,
+ long owidth,
+ long oheight,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ /* loop over output */
+ long i, j, ti;
+ for (ti = 0; ti < otime; ti++)
+ {
+ for (i = 0; i < oheight; i++)
+ {
+ for (j = 0; j < owidth; j++)
+ {
+ /* local pointers */
+ real *ip = input_p + k * itime * iwidth * iheight
+ + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
+ real *op = output_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+
+ /* compute local sum: */
+ real sum = 0.0;
+ int x, y, z;
+
+ for (z=0; z < kT; z++)
+ {
+ for (y = 0; y < kH; y++)
+ {
+ for (x = 0; x < kW; x++)
+ {
+ sum += *(ip + z * iwidth * iheight + y * iwidth + x);
+ }
+ }
+ }
+
+ /* set output to local max */
+ *op = sum / (kT * kW * kH);
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricAveragePooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH)
+{
+ long nslices;
+ long itime;
+ long iheight;
+ long iwidth;
+ long otime;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+
+ THNN_(VolumetricAveragePooling_shapeCheck)(
+ state, input, NULL, kT, kW, kH,
+ dT, dW, dH);
+
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (input->nDimension == 5)
+ {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ otime = (itime - kT) / dT + 1;
+ oheight = (iheight - kH) / dH + 1;
+ owidth = (iwidth - kW) / dW + 1;
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ if (input->nDimension == 4) /* non-batch mode */
+ {
+ /* resize output */
+ THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+ THNN_(VolumetricAveragePooling_updateOutput_frame)(
+ input_data, output_data, nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH,
+ dT, dW, dH
+ );
+ }
+ else /* batch mode */
+ {
+ long p;
+ long nBatch = input->size[0];
+
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
+ /* resize output */
+ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p=0; p < nBatch; p++)
+ {
+ THNN_(VolumetricAveragePooling_updateOutput_frame)(
+ input_data + p * istride, output_data + p * ostride, nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH,
+ dT, dW, dH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(VolumetricAveragePooling_updateGradInput_frame)(
+ real *gradInput_p,
+ real *gradOutput_p,
+ long nslices,
+ long itime,
+ long iwidth,
+ long iheight,
+ long otime,
+ long owidth,
+ long oheight,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ /* loop over output */
+ long i, j, ti;
+ for (ti = 0; ti < otime; ti++)
+ {
+ for (i = 0; i < oheight; i++)
+ {
+ for (j = 0; j < owidth; j++)
+ {
+ /* local pointers */
+ real *ip = gradInput_p + k * itime * iwidth * iheight
+ + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
+ real *op = gradOutput_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+
+ /* scatter gradients out to footprint: */
+ real val = *op / (kT * kW * kH);
+ int x,y,z;
+ for (z=0; z < kT; z++)
+ {
+ for (y = 0; y < kH; y++)
+ {
+ for (x = 0; x < kW; x++)
+ {
+ *(ip + z * iwidth * iheight + y * iwidth + x) += val;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricAveragePooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH)
+{
+ int nslices;
+ int itime;
+ int iheight;
+ int iwidth;
+ int otime;
+ int oheight;
+ int owidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ THNN_(VolumetricAveragePooling_shapeCheck)(
+ state, input, gradOutput, kT, kW, kH,
+ dT, dW, dH);
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 5)
+ {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ otime = gradOutput->size[dimt];
+ oheight = gradOutput->size[dimh];
+ owidth = gradOutput->size[dimw];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+
+ /* backprop */
+ if (input->nDimension == 4) /* non-batch mode*/
+ {
+ THNN_(VolumetricAveragePooling_updateGradInput_frame)(
+ gradInput_data, gradOutput_data, nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH,
+ dT, dW, dH
+ );
+ }
+ else /* batch mode */
+ {
+ long p;
+ long nBatch = input->size[0];
+
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nBatch; p++)
+ {
+ THNN_(VolumetricAveragePooling_updateGradInput_frame)(
+ gradInput_data + p * istride, gradOutput_data + p * ostride, nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH,
+ dT, dW, dH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolution.c
new file mode 100644
index 000000000..be1aa82e6
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolution.c
@@ -0,0 +1,260 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricConvolution.c"
+#else
+
+void THNN_(VolumetricConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput, // only used by cuda impl
+ THTensor *fgradInput, // only used by cuda impl
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
+
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (input->nDimension == 5)
+ {
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ long nOutputPlane = weight->size[0];
+ long kT = weight->size[2];
+ long kH = weight->size[3];
+ long kW = weight->size[4];
+ long inputDepth = input->size[dimt];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long outputDepth = (inputDepth - kT) / dT + 1;
+ long outputWidth = (inputWidth - kW) / dW + 1;
+ long outputHeight = (inputHeight - kH) / dH + 1;
+ THTensor *outn = THTensor_(new)();
+ long i, j;
+ if (input->nDimension == 4) /* non-batch mode */
+ {
+ THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+
+ /* add bias */
+ if (bias) {
+ for (i = 0; i < bias->size[0]; i++)
+ {
+ THTensor_(select)(outn, output, 0, i);
+ THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+ }
+ } else {
+ THTensor_(zero)(output);
+ }
+
+ /* do convolutions */
+ THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X");
+ }
+ else /* batch mode */
+ {
+ long nBatch = input->size[0];
+ THTensor_(resize5d)(output, nBatch, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor *inb = THTensor_(new)();
+ THTensor *outb = THTensor_(new)();
+
+ /* loop over batches */
+ for (j = 0; j < nBatch; j++)
+ {
+ THTensor_(select)(inb, input, 0, j);
+ THTensor_(select)(outb, output, 0, j);
+
+ /* add bias */
+ if (bias) {
+ for (i = 0; i < bias->size[0]; i++)
+ {
+ THTensor_(select)(outn, outb, 0, i);
+ THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+ }
+ } else {
+ THTensor_(zero)(outb);
+ }
+
+ /* do convolutions */
+ THTensor_(conv3Dmv)(outb, 1.0, 1.0, inb, weight, dT, dH, dW, "V", "X");
+ }
+
+ THTensor_(free)(inb);
+ THTensor_(free)(outb);
+ }
+ THTensor_(free)(outn);
+}
+
+void THNN_(VolumetricConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput, // only used by cuda impl
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
+
+ THNN_ARGCHECK(weight->nDimension == 5, 4, weight,
+ "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
+ "expected for weight, but got: %s");
+
+ int nOutputPlane = (int)weight->size[0];
+
+ THNN_ARGCHECK(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3,
+ gradOutput,
+ "4D or 5D (batch mode) tensor expected for gradOutput, but got: %s");
+
+ int dimPlane = 0;
+ if (gradOutput->nDimension == 5)
+ {
+ dimPlane++;
+ }
+
+ THArgCheck(nOutputPlane == gradOutput->size[dimPlane], 1,
+ "Number of output features is not equal to nOutputPlane"
+ );
+
+ /* gradient to input */
+ THTensor *tweight = THTensor_(newTranspose)(weight, 0, 1);
+ if (gradOutput->nDimension == 4) /* non-batch mode */
+ {
+ THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C");
+ }
+ else /* batch mode */
+ {
+ long nBatch = gradOutput->size[0];
+ THTensor *ginpb = THTensor_(new)();
+ THTensor *goutb = THTensor_(new)();
+ long j;
+
+ THTensor_(resize5d)(gradInput,
+ input->size[0], input->size[1], input->size[2], input->size[3], input->size[4]
+ );
+
+ /* loop over batches */
+ for (j = 0; j < nBatch; j++)
+ {
+ THTensor_(select)(ginpb, gradInput, 0, j);
+ THTensor_(select)(goutb, gradOutput, 0, j);
+ THTensor_(conv3Dmv)(ginpb, 0.0, 1.0, goutb, tweight, dT, dH, dW, "F", "C");
+ }
+ THTensor_(free)(ginpb);
+ THTensor_(free)(goutb);
+ }
+
+ THTensor_(free)(tweight);
+}
+
+void THNN_(VolumetricConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput, // only used by cuda impl
+ THTensor *fgradInput, // only used by cuda impl
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THArgCheck(pT != 0 || pW != 0 || pH != 0, 9, "padding not supported by CPU backend"); // sharing signature with CUDA version
+
+ THNN_ARGCHECK(gradWeight->nDimension == 5, 4, gradWeight,
+ "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
+ "expected for gradWeight, but got: %s");
+
+ int nOutputPlane = (int)gradWeight->size[0];
+ if (gradBias) {
+ THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5,
+ "gradBias tensor has wrong size"
+ );
+ }
+
+ long k;
+ real *gradBias_data;
+ THTensor *gradOutSlice;
+ int dimPlane = 0;
+ if (gradOutput->nDimension == 5)
+ {
+ dimPlane++;
+ }
+
+ THArgCheck(nOutputPlane == gradOutput->size[dimPlane], 1,
+ "Number of output features is not equal to nOutputPlane"
+ );
+
+ if (gradOutput->nDimension == 4) /* non-batch mode */
+ {
+ /* gradient to bias */
+ if (gradBias) {
+ gradBias_data = THTensor_(data)(gradBias);
+ gradOutSlice = THTensor_(new)();
+ for (k = 0; k < nOutputPlane; k++)
+ {
+ THTensor_(select)(gradOutSlice, gradOutput, 0, k);
+ gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
+ }
+ THTensor_(free)(gradOutSlice);
+ }
+
+ /* gradient to kernels */
+ THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW);
+ }
+ else /* batch mode */
+ {
+ long nBatch = gradOutput->size[0];
+ THTensor *inpb = THTensor_(new)();
+ THTensor *goutb = THTensor_(new)();
+ long j;
+
+ /* loop over batches */
+ for (j = 0; j < nBatch; j++)
+ {
+ THTensor_(select)(inpb, input, 0, j);
+ THTensor_(select)(goutb, gradOutput, 0, j);
+
+ /* gradient to bias */
+ if (gradBias) {
+ gradBias_data = THTensor_(data)(gradBias);
+ gradOutSlice = THTensor_(new)();
+ for (k = 0; k < nOutputPlane; k++)
+ {
+ THTensor_(select)(gradOutSlice, goutb, 0, k);
+ gradBias_data[k] += scale * THTensor_(sumall)(gradOutSlice);
+ }
+ THTensor_(free)(gradOutSlice);
+ }
+
+ /* gradient to kernels */
+ THTensor_(conv3DRevger)(gradWeight, 1.0, scale, inpb, goutb, dT, dH, dW);
+ }
+ THTensor_(free)(inpb);
+ THTensor_(free)(goutb);
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolutionMM.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolutionMM.c
new file mode 100644
index 000000000..00a121db6
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricConvolutionMM.c
@@ -0,0 +1,628 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricConvolutionMM.c"
+#else
+
+static void inline THNN_(VolumetricConvolutionMM_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *bias,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH) {
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+ THArgCheck(kT > 0 && kW > 0 && kH > 0, 8,
+ "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW);
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
+
+ int ndim = input->nDimension;
+ int dimf = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (ndim == 5)
+ {
+ dimf++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ long nInputPlane;
+ long inputDepth;
+ long inputHeight;
+ long inputWidth;
+ long nOutputPlane;
+ long outputDepth;
+ long outputHeight;
+ long outputWidth;
+
+ nInputPlane = input->size[dimf];
+ inputDepth = input->size[dimt];
+ inputHeight = input->size[dimh];
+ inputWidth = input->size[dimw];
+ nOutputPlane = weight->size[0];
+ outputDepth = (inputDepth + 2*pT - kT) / dT + 1;
+ outputHeight = (inputHeight + 2*pH - kH) / dH + 1;
+ outputWidth = (inputWidth + 2*pW - kW) / dW + 1;
+
+ if (outputWidth < 1 || outputHeight < 1 || outputDepth < 1)
+ {
+ THError(
+ "Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
+ nInputPlane, inputDepth, inputHeight, inputWidth,
+ nOutputPlane, outputDepth, outputHeight, outputWidth
+ );
+ }
+
+ THArgCheck(weight->nDimension == 2 || weight->nDimension == 5, 4,
+ "weight tensor should be 2D or 5D - got %d", weight->nDimension);
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
+ }
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, outputDepth);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+static int THNN_(view_weight)(THTensor **_weight)
+{
+ THTensor *weight = *_weight;
+ if (weight->nDimension == 5) {
+ long s1 = weight->size[0];
+ long s2 = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4];
+ *_weight = THTensor_(newWithStorage2d)(weight->storage, weight->storageOffset, s1, -1, s2, -1);
+ return 1;
+ }
+ return 0;
+}
+
+/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */
+static void THNN_(unfolded_acc_vol)(
+ THTensor *finput,
+ THTensor *input,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int nInputPlane,
+ int inputDepth,
+ int inputWidth,
+ int inputHeight,
+ int outputDepth,
+ int outputWidth,
+ int outputHeight)
+{
+ int nip;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+//#pragma omp parallel for private(nip)
+ for (nip = 0; nip < nInputPlane; nip++)
+ {
+ int kt, kw, kh, t, y, x, it, ix, iy;
+ for (kt = 0; kt < kT; kt++)
+ {
+ for (kh = 0; kh < kH; kh++)
+ {
+ for (kw = 0; kw < kW; kw++)
+ {
+ real *src = finput_data
+ + nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth)
+ + kt * (kH*kW*outputDepth*outputHeight*outputWidth)
+ + kh * (kW*outputDepth*outputHeight*outputWidth)
+ + kw * (outputDepth*outputHeight*outputWidth);
+
+ real *dst = input_data + nip*(inputDepth*inputHeight*inputWidth);
+ if (pT > 0 || pH > 0 || pW > 0)
+ {
+ for (t = 0; t < outputDepth; t++)
+ {
+ it = t*dT - pT + kt;
+ for (y = 0; y < outputHeight; y++)
+ {
+ iy = y*dH - pH + kh;
+ for (x = 0; x < outputWidth; x++)
+ {
+ ix = x*dW - pW + kw;
+ if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth)
+ {
+ }
+ else
+ {
+ real *dst_slice = dst+it*inputHeight*inputWidth+iy*inputWidth+ix;
+ THVector_(cadd)(dst_slice, dst_slice, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1);
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for (t = 0; t < outputDepth; t++)
+ {
+ it = t*dT + kt;
+ for (y = 0; y < outputHeight; y++)
+ {
+ iy = y*dH + kh;
+ for(x = 0; x < outputWidth; x++)
+ {
+ ix = x*dW + kw;
+ real *dst_slice = dst+it*inputHeight*inputWidth+iy*inputWidth+ix;
+ THVector_(cadd)(dst_slice, dst_slice, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void THNN_(unfolded_copy_vol)(
+ THTensor *finput,
+ THTensor *input,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int nInputPlane,
+ int inputDepth,
+ int inputWidth,
+ int inputHeight,
+ int outputDepth,
+ int outputWidth,
+ int outputHeight)
+{
+ long k;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+// #pragma omp parallel for private(k)
+ for (k = 0; k < nInputPlane*kT*kH*kW; k++)
+ {
+ int nip = k / (kT*kH*kW);
+ int rest = k % (kT*kH*kW);
+ int kt = rest / (kH*kW);
+ rest = rest % (kH*kW);
+ int kh = rest / kW;
+ int kw = rest % kW;
+ int t,x,y,it,ix,iy;
+ real *dst = finput_data
+ + nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth)
+ + kt * (kH*kW*outputDepth*outputHeight*outputWidth)
+ + kh * (kW*outputDepth*outputHeight*outputWidth)
+ + kw * (outputDepth*outputHeight*outputWidth);
+ real *src = input_data + nip*(inputDepth*inputHeight*inputWidth);
+
+ if (pT > 0 || pH > 0 || pW > 0)
+ {
+ for (t = 0; t < outputDepth; t++)
+ {
+ it = t*dT - pT + kt;
+ for (y = 0; y < outputHeight; y++)
+ {
+ iy = y*dH - pH + kh;
+ for (x = 0; x < outputWidth; x++)
+ {
+ ix = x*dW - pW + kw;
+ if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth)
+ memset(dst+t*outputHeight*outputWidth+y*outputWidth+x, 0, sizeof(real)*(1));
+ else
+ memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1));
+ }
+ }
+ }
+ }
+ else
+ {
+ for (t = 0; t < outputDepth; t++)
+ {
+ it = t*dT + kt;
+ for (y = 0; y < outputHeight; y++)
+ {
+ iy = y*dH + kh;
+ for(x = 0; x < outputWidth; x++)
+ {
+ ix = x*dW + kw;
+ memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1));
+ }
+ }
+ }
+ }
+ }
+}
+
+static void THNN_(VolumetricConvolutionMM_updateOutput_frame)(
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ long nInputPlane,
+ long inputDepth,
+ long inputWidth,
+ long inputHeight,
+ long nOutputPlane,
+ long outputDepth,
+ long outputWidth,
+ long outputHeight)
+{
+ long i;
+ THTensor *output2d;
+
+ THNN_(unfolded_copy_vol)(
+ finput, input,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH,
+ nInputPlane,
+ inputDepth, inputWidth, inputHeight,
+ outputDepth, outputWidth, outputHeight
+ );
+
+ output2d = THTensor_(newWithStorage2d)(
+ output->storage, output->storageOffset, nOutputPlane, -1,
+ outputDepth*outputHeight*outputWidth, -1
+ );
+
+ if (bias) {
+ for (i = 0; i < nOutputPlane; i++)
+ {
+ THVector_(fill)(
+ output->storage->data+output->storageOffset+output->stride[0]*i,
+ THTensor_(get1d)(bias, i),
+ outputDepth*outputHeight*outputWidth
+ );
+ }
+ } else {
+ THTensor_(zero)(output);
+ }
+
+ THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput);
+
+ THTensor_(free)(output2d);
+}
+
+void THNN_(VolumetricConvolutionMM_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ int dimf = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+ int freeWeight = 0;
+
+ long nInputPlane;
+ long inputDepth;
+ long inputHeight;
+ long inputWidth;
+ long nOutputPlane;
+ long outputDepth;
+ long outputHeight;
+ long outputWidth;
+
+ THNN_(VolumetricConvolutionMM_shapeCheck)(
+ state, input, NULL, weight, bias,
+ kT, kW, kH, dT, dW, dH, pT, pW, pH);
+ input = THTensor_(newContiguous)(input);
+
+ if (input->nDimension == 5)
+ {
+ dimf++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ nInputPlane = input->size[dimf];
+ inputDepth = input->size[dimt];
+ inputHeight = input->size[dimh];
+ inputWidth = input->size[dimw];
+ nOutputPlane = weight->size[0];
+ outputDepth = (inputDepth + 2*pT - kT) / dT + 1;
+ outputHeight = (inputHeight + 2*pH - kH) / dH + 1;
+ outputWidth = (inputWidth + 2*pW - kW) / dW + 1;
+
+ freeWeight = THNN_(view_weight)(&weight);
+
+ if (input->nDimension == 4)
+ {
+ THTensor_(resize2d)(finput, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth);
+ THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+
+ THNN_(VolumetricConvolutionMM_updateOutput_frame)(
+ input, output, weight, bias, finput,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH,
+ nInputPlane, inputDepth, inputWidth, inputHeight,
+ nOutputPlane, outputDepth, outputWidth, outputHeight
+ );
+ }
+ else
+ {
+ long T = input->size[0];
+ long t;
+
+ THTensor_(resize3d)(finput, T, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth);
+ THTensor_(resize5d)(output, T, nOutputPlane, outputDepth, outputHeight, outputWidth);
+
+// #pragma omp parallel for private(t)
+ for (t = 0; t < T; t++)
+ {
+ THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+ THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(VolumetricConvolutionMM_updateOutput_frame)(
+ input_t, output_t, weight, bias, finput_t,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH,
+ nInputPlane, inputDepth, inputWidth, inputHeight,
+ nOutputPlane, outputDepth, outputWidth, outputHeight
+ );
+
+ THTensor_(free)(input_t);
+ THTensor_(free)(output_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ THTensor_(free)(input);
+ if (freeWeight)
+ THTensor_(free)(weight);
+}
+
+static void THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
+ THTensor *gradInput,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *fgradInput,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ THTensor *gradOutput2d = THTensor_(newWithStorage2d)(
+ gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1
+ );
+
+ THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d);
+ THTensor_(free)(gradOutput2d);
+
+ THTensor_(zero)(gradInput);
+
+ THNN_(unfolded_acc_vol)(
+ fgradInput, gradInput,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH,
+ gradInput->size[0], gradInput->size[1], gradInput->size[3], gradInput->size[2],
+ gradOutput->size[1], gradOutput->size[3], gradOutput->size[2]
+ );
+}
+
+void THNN_(VolumetricConvolutionMM_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ int nOutputPlane = (int)weight->size[0];
+
+ THNN_(VolumetricConvolutionMM_shapeCheck)(
+ state, input, gradOutput, weight, NULL,
+ kT, kW, kH, dT, dW, dH, pT, pW, pH);
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ int freeWeight = THNN_(view_weight)(&weight);
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(resizeAs)(fgradInput, finput);
+ // depending on the BLAS library, fgradInput (result tensor) might
+ // be left uninitialized on zero alpha, which might lead to weird behavior
+ // hence, to be safe, zero it
+ THTensor_(zero)(fgradInput);
+ THTensor *tweight = THTensor_(new)();
+ THTensor_(transpose)(tweight, weight, 0, 1);
+
+ if (input->nDimension == 4)
+ {
+ THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
+ gradInput, gradOutput, tweight, fgradInput,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH
+ );
+ }
+ else
+ {
+ long T = input->size[0];
+ long t;
+
+//#pragma omp parallel for private(t)
+ for (t = 0; t < T; t++)
+ {
+ THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+ THNN_(VolumetricConvolutionMM_updateGradInput_frame)(
+ gradInput_t, gradOutput_t, tweight, fgradInput_t,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH
+ );
+
+ THTensor_(free)(gradInput_t);
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(fgradInput_t);
+ }
+ }
+
+ THTensor_(free)(tweight);
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ if (freeWeight)
+ THTensor_(free)(weight);
+}
+
+static void THNN_(VolumetricConvolutionMM_accGradParameters_frame)(
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ real scale)
+{
+ long i;
+ THTensor *gradOutput2d = THTensor_(newWithStorage2d)(
+ gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1
+ );
+
+ THTensor *tfinput = THTensor_(new)();
+ THTensor_(transpose)(tfinput, finput, 0, 1);
+ THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, tfinput);
+ THTensor_(free)(tfinput);
+
+ if (gradBias) {
+ for (i = 0; i < gradBias->size[0]; i++)
+ {
+ long k;
+ real sum = 0;
+ real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0];
+ for (k = 0; k < gradOutput2d->size[1]; k++)
+ sum += data[k];
+
+ (gradBias->storage->data + gradBias->storageOffset)[i] += scale * sum;
+ }
+ }
+
+ THTensor_(free)(gradOutput2d);
+}
+
+void THNN_(VolumetricConvolutionMM_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ int freeWeight;
+ int nOutputPlane = (int)gradWeight->size[0];
+
+ THNN_(VolumetricConvolutionMM_shapeCheck)(
+ state, input, gradOutput, gradWeight, gradBias,
+ kT, kW, kH, dT, dW, dH, pT, pW, pH);
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ freeWeight = THNN_(view_weight)(&gradWeight);
+
+ if (input->nDimension == 4) // non-batch mode
+ {
+ THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
+ }
+ else // batch mode
+ {
+ long T = input->size[0];
+ long t;
+
+ for (t = 0; t < T; t++)
+ {
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
+
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ if (freeWeight)
+ THTensor_(free)(gradWeight);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedConvolution.c
new file mode 100644
index 000000000..ca740f78e
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedConvolution.c
@@ -0,0 +1,420 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricDilatedConvolution.c"
+#else
+
+static inline void THNN_(VolumetricDilatedConvolution_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ THTensor *weight, THTensor *bias,
+ int kT, int kH, int kW, int dT, int dH, int dW,
+ int padT, int padH, int padW,
+ int dilationT, int dilationH, int dilationW) {
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+ THNN_ARGCHECK(weight->nDimension == 5, 4, weight,
+ "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
+ "expected for weight, but got: %s");
+ THArgCheck(kT > 0 && kW > 0 && kH > 0, 8,
+ "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d", kT, kH, kW);
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
+ THArgCheck(dilationT > 0 && dilationW > 0 && dilationH > 0, 15,
+ "dilation should be greater than zero, but got dilationT: %d, dilationH: %d, dilationW: %d",
+ dilationT, dilationH, dilationW);
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
+ }
+
+ // Params
+ int ndim = input->nDimension;
+ int nInputPlane = weight->size[1];
+ int nOutputPlane = weight->size[0];
+ int dimf = 0;
+ int dimd = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (ndim == 5) {
+ dimf++;
+ dimd++;
+ dimh++;
+ dimw++;
+ }
+
+ long inputDepth = input->size[dimd];
+ long inputHeight = input->size[dimh];
+ long inputWidth = input->size[dimw];
+ long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+ if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
+ nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth);
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+void THNN_(VolumetricDilatedConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *columns,
+ THTensor *ones,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH)
+{
+ THNN_(VolumetricDilatedConvolution_shapeCheck)(
+ input, NULL, weight, bias,
+ kT, kH, kW, dT, dH, dW, padT, padH, padW,
+ dilationT, dilationH, dilationW);
+
+ // Params:
+ int nInputPlane = weight->size[1];
+ int nOutputPlane = weight->size[0];
+
+ input = THTensor_(newContiguous)(input);
+ weight = THTensor_(newContiguous)(weight);
+ bias = bias ? THTensor_(newContiguous)(bias) : bias;
+ int batch = 1;
+ if (input->nDimension == 4) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ }
+
+ long inputDepth = input->size[2];
+ long inputHeight = input->size[3];
+ long inputWidth = input->size[4];
+ long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(zero)(output);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
+
+ // Define a buffer of ones, for bias accumulation
+ // Note: this buffer can be shared with other modules, it only ever gets increased,
+ // and always contains ones.
+ if (ones->nDimension != 3 ||
+ ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *output_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(output_n, output, 0, elt);
+
+ // Do Bias first:
+ // M,N,K are dims of matrix A and B
+ long m_ = nOutputPlane;
+ long n_ = outputDepth * outputHeight * outputWidth;
+ long k_ = 1;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ if (bias) {
+ THBlas_(gemm)(
+ 't', 'n',
+ n_, m_, k_,
+ 1,
+ THTensor_(data)(ones), k_,
+ THTensor_(data)(bias), k_,
+ 0,
+ THTensor_(data)(output_n), n_
+ );
+ } else {
+ THTensor_(zero)(output_n);
+ }
+
+ // Extract columns:
+ THNN_(vol2col)(
+ THTensor_(data)(input_n),
+ nInputPlane, inputDepth, inputHeight, inputWidth,
+ kT, kH, kW, padT, padH, padW, dT, dH, dW,
+ dilationT, dilationH, dilationW,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ long m = nOutputPlane;
+ long n = columns->size[1];
+ long k = nInputPlane*kT*kH*kW;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 'n',
+ n, m, k,
+ 1,
+ THTensor_(data)(columns), n,
+ THTensor_(data)(weight), k,
+ 1,
+ THTensor_(data)(output_n), n
+ );
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(output_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(weight);
+ if (bias) THTensor_(free)(bias);
+}
+
+void THNN_(VolumetricDilatedConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH)
+{
+ THNN_(VolumetricDilatedConvolution_shapeCheck)(
+ input, gradOutput, weight, NULL,
+ kT, kH, kW, dT, dH, dW, padT, padH, padW,
+ dilationT, dilationH, dilationW);
+
+ // Params
+ int nInputPlane = weight->size[1];
+ int nOutputPlane = weight->size[0];
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ weight = THTensor_(newContiguous)(weight);
+
+ int batch = 1;
+ if (input->nDimension == 4) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+ }
+
+ long inputDepth = input->size[2];
+ long inputWidth = input->size[4];
+ long inputHeight = input->size[3];
+ long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(gradColumns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
+ THTensor_(zero)(gradColumns);
+
+ // Helpers
+ THTensor *gradInput_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per sample:
+ THTensor_(select)(gradInput_n, gradInput, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // M,N,K are dims of matrix A and B
+ long m = nInputPlane*kT*kW*kH;
+ long n = gradColumns->size[1];
+ long k = nOutputPlane;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 't',
+ n, m, k,
+ 1,
+ THTensor_(data)(gradOutput_n), n,
+ THTensor_(data)(weight), m,
+ 0,
+ THTensor_(data)(gradColumns), n
+ );
+
+ // Unpack columns back into input:
+ THNN_(col2vol)(
+ THTensor_(data)(gradColumns),
+ nInputPlane, inputDepth, inputHeight, inputWidth,
+ kT, kH, kW, padT, padH, padW, dT, dH, dW,
+ dilationT, dilationH, dilationW,
+ THTensor_(data)(gradInput_n)
+ );
+ }
+
+ // Free
+ THTensor_(free)(gradInput_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+}
+
+void THNN_(VolumetricDilatedConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *columns,
+ THTensor *ones,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH,
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ THNN_(VolumetricDilatedConvolution_shapeCheck)(
+ input, gradOutput, gradWeight, gradBias,
+ kT, kH, kW, dT, dH, dW, padT, padH, padW,
+ dilationT, dilationH, dilationW);
+
+ // Params
+ int nInputPlane = gradWeight->size[1];
+ int nOutputPlane = gradWeight->size[0];
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ int batch = 1;
+ if (input->nDimension == 4) {
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+ }
+
+ long inputDepth = input->size[2];
+ long inputWidth = input->size[4];
+ long inputHeight = input->size[3];
+ long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Define a buffer of ones, for bias accumulation
+ if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(vol2col)(
+ THTensor_(data)(input_n),
+ nInputPlane, inputDepth, inputHeight, inputWidth,
+ kT, kH, kW, padT, padH, padW, dT, dH, dW,
+ dilationT, dilationH, dilationW,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ long m = nOutputPlane;
+ long n = nInputPlane*kT*kW*kH;
+ long k = columns->size[1];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 't', 'n',
+ n, m, k,
+ scale,
+ THTensor_(data)(columns), k,
+ THTensor_(data)(gradOutput_n), k,
+ 1,
+ THTensor_(data)(gradWeight), n
+ );
+
+ // Do Bias:
+ // M,N,K are dims of matrix A and B
+ long m_ = nOutputPlane;
+ long k_ = outputDepth * outputHeight * outputWidth;
+
+ // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
+ if (gradBias) {
+ THBlas_(gemv)(
+ 't',
+ k_, m_,
+ scale,
+ THTensor_(data)(gradOutput_n), k_,
+ THTensor_(data)(ones), 1,
+ 1,
+ THTensor_(data)(gradBias), 1
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize
+ if (batch == 0) {
+ THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedMaxPooling.c
new file mode 100644
index 000000000..66c0f9531
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricDilatedMaxPooling.c
@@ -0,0 +1,515 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricDilatedMaxPooling.c"
+#else
+
+static inline void THNN_(VolumetricDilatedMaxPooling_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THIndexTensor *indices,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int pT, int pW, int pH,
+ int dilationT, int dilationW, int dilationH,
+ bool ceilMode) {
+ int ndim = input->nDimension;
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+ long nslices;
+ long itime;
+ long iheight;
+ long iwidth;
+ long otime;
+ long oheight;
+ long owidth;
+
+ THArgCheck(kT > 0 && kW > 0 && kH > 0, 5,
+ "kernel size should be greater than zero, but got kT: %d kH: %d kW: %d",
+ kT, kH, kW);
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 8,
+ "stride should be greater than zero, but got dT: %d dH: %d dW: %d",
+ dT, dH, dW);
+ THArgCheck(dilationT > 0 && dilationW > 0 && dilationH > 0, 14,
+ "dilation should be greater than 0, but got dilationT: %d dilationH: %d dilationW: %d",
+ dilationT, dilationH, dilationW);
+
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+
+ if (input->nDimension == 5)
+ {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ THArgCheck(kT/2 >= pT && kW/2 >= pW && kH/2 >= pH, 2,
+ "pad should be smaller than half of kernel size, but got "
+ "kT: %d kW: %d, kH: %d, padT: %d, padW: %d, padH: %d",
+ kT, kW, kH, pT, pW, pH);
+
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ if (ceilMode)
+ {
+ otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
+ oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
+ owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
+ }
+ else
+ {
+ otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
+ oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
+ owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
+ }
+
+ if (pT || pW || pH)
+ {
+ // ensure that the last pooling starts inside the image
+ if ((otime - 1)*dT >= itime + pT)
+ --otime;
+ if ((oheight - 1)*dH >= iheight + pH)
+ --oheight;
+ if ((owidth - 1)*dW >= iwidth + pW)
+ --owidth;
+ }
+
+ if (otime < 1 || owidth < 1 || oheight < 1)
+ THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
+ nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimN, nslices);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimt, otime);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, oheight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, owidth);
+ }
+ if (indices != NULL) {
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimN, nslices);
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimt, otime);
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, oheight);
+ THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, owidth);
+ }
+}
+
+static void THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)(
+ real *input_p,
+ real *output_p,
+ THIndex_t *indz_p,
+ long nslices,
+ long itime,
+ long iwidth,
+ long iheight,
+ long otime,
+ long owidth,
+ long oheight,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int dilationT,
+ int dilationW,
+ int dilationH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ /* loop over output */
+ long i, j, ti;
+ for (ti = 0; ti < otime; ti++)
+ {
+ for (i = 0; i < oheight; i++)
+ {
+ for (j = 0; j < owidth; j++)
+ {
+ /* local pointers */
+
+ long start_t = ti * dT - pT;
+ long start_h = i * dH - pH;
+ long start_w = j * dW - pW;
+
+ long kernel_t = fminf(kT, kT + start_t);
+ long kernel_h = fminf(kH, kH + start_h);
+ long kernel_w = fminf(kW, kW + start_w);
+
+ while(start_t < 0)
+ start_t += dilationT;
+ while(start_h < 0)
+ start_h += dilationH;
+ while(start_w < 0)
+ start_w += dilationW;
+
+ real *ip = input_p + k * itime * iwidth * iheight
+ + start_t * iwidth * iheight + start_h * iwidth + start_w;
+ real *op = output_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+ THIndex_t *indzp = indz_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+
+ /* compute local max: */
+ real maxval = -THInf;
+ int x,y,z;
+ int mx, my, mz;
+ mx = my = mz = -1;
+
+ for (z = 0; z < kernel_t; z++)
+ {
+ for (y = 0; y < kernel_h; y++)
+ {
+ for (x = 0; x < kernel_w; x++)
+ {
+ if ((start_t + z * dilationT < itime) && (start_h + y * dilationH < iheight) && (start_w + x * dilationW < iwidth))
+ {
+ real val = *(ip + z * dilationT * iwidth * iheight + y * dilationH * iwidth + x * dilationW);
+ if (val > maxval)
+ {
+ maxval = val;
+ // Store indices w.r.t the kernel dimension
+ mz = z + (kT - kernel_t);
+ my = y + (kH - kernel_h);
+ mx = x + (kW - kernel_w);
+ }
+ }
+ }
+ }
+ }
+
+ // set max values
+ ((unsigned char*)(indzp))[0] = mz;
+ ((unsigned char*)(indzp))[1] = my;
+ ((unsigned char*)(indzp))[2] = mx;
+ ((unsigned char*)(indzp))[3] = 0;
+
+ /* set output to local max */
+ *op = maxval;
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricDilatedMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int dilationT,
+ int dilationW,
+ int dilationH,
+ bool ceilMode)
+{
+ long nslices;
+ long itime;
+ long iheight;
+ long iwidth;
+ long otime;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+ THIndex_t *indices_data;
+
+
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (input->nDimension == 5)
+ {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ THNN_(VolumetricDilatedMaxPooling_shapeCheck)(
+ state, input, NULL, NULL,
+ kT, kW, kH, dT, dW, dH,
+ pT, pW, pH, dilationT, dilationW, dilationH,
+ ceilMode);
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ if (ceilMode)
+ {
+ otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
+ oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
+ owidth = (int)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
+ }
+ else
+ {
+ otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
+ oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
+ owidth = (int)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
+ }
+
+ if (pT || pW || pH)
+ {
+ // ensure that the last pooling starts inside the image
+ if ((otime - 1)*dT >= itime + pT)
+ --otime;
+ if ((oheight - 1)*dH >= iheight + pH)
+ --oheight;
+ if ((owidth - 1)*dW >= iwidth + pW)
+ --owidth;
+ }
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ if (input->nDimension == 4) /* non-batch mode */
+ {
+ /* resize output */
+ THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
+ /* indices will contain ti,i,j uchar locations packed into float/double */
+ THIndexTensor_(resize4d)(indices, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)(
+ input_data, output_data,
+ indices_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH,
+ dilationT, dilationW, dilationH
+ );
+ }
+ else /* batch mode */
+ {
+ long p;
+ long nBatch = input->size[0];
+
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
+ /* resize output */
+ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
+ /* indices will contain ti,i,j locations for each output point */
+ THIndexTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+#pragma omp parallel for private(p)
+ for (p=0; p < nBatch; p++)
+ {
+ THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)(
+ input_data + p * istride,
+ output_data + p * ostride,
+ indices_data + p * ostride,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH,
+ dT, dW, dH,
+ pT, pW, pH,
+ dilationT, dilationW, dilationH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)(
+ real *gradInput_p,
+ real *gradOutput_p,
+ THIndex_t *indz_p,
+ long nslices,
+ long itime,
+ long iwidth,
+ long iheight,
+ long otime,
+ long owidth,
+ long oheight,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int dilationT,
+ int dilationW,
+ int dilationH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight;
+ real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight;
+ THIndex_t *indz_p_k = indz_p + k * otime * owidth * oheight;
+
+ /* calculate max points */
+ long ti, i, j;
+ for (ti = 0; ti < otime; ti++)
+ {
+ for (i = 0; i < oheight; i++)
+ {
+ for (j = 0; j < owidth; j++)
+ {
+ /* retrieve position of max */
+ THIndex_t * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j];
+ long maxti = ((unsigned char*)(indzp))[0] * dilationT + ti * dT - pT;
+ long maxi = ((unsigned char*)(indzp))[1] * dilationH + i * dH - pH;
+ long maxj = ((unsigned char*)(indzp))[2] * dilationW + j * dW - pW;
+
+ if (maxti != -1) {
+ /* update gradient */
+ gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] +=
+ gradOutput_p_k[ti * oheight * owidth + i * owidth + j];
+ }
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricDilatedMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ int dilationT,
+ int dilationW,
+ int dilationH,
+ bool ceilMode)
+{
+ int nslices;
+ int itime;
+ int iheight;
+ int iwidth;
+ int otime;
+ int oheight;
+ int owidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+ THIndex_t *indices_data;
+
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ THNN_(VolumetricDilatedMaxPooling_shapeCheck)(
+ state, input, gradOutput, indices,
+ kT, kW, kH, dT, dW, dH,
+ pT, pW, pH, dilationT, dilationW, dilationH,
+ ceilMode);
+
+ // TODO: gradOutput shape check
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 5)
+ {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ otime = gradOutput->size[dimt];
+ oheight = gradOutput->size[dimh];
+ owidth = gradOutput->size[dimw];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THIndexTensor_(data)(indices);
+
+ /* backprop */
+ if (input->nDimension == 4) /* non-batch mode*/
+ {
+ THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)(
+ gradInput_data, gradOutput_data,
+ indices_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ dT, dW, dH,
+ pT, pW, pH,
+ dilationT, dilationW, dilationH
+ );
+ }
+ else /* batch mode */
+ {
+ long p;
+ long nBatch = input->size[0];
+
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nBatch; p++)
+ {
+ THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)(
+ gradInput_data + p * istride,
+ gradOutput_data + p * ostride,
+ indices_data + p * ostride,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ dT, dW, dH,
+ pT, pW, pH,
+ dilationT, dilationW, dilationH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFractionalMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFractionalMaxPooling.c
new file mode 100644
index 000000000..236986bb9
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFractionalMaxPooling.c
@@ -0,0 +1,279 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricFractionalMaxPooling.c"
+#else
+
+static long* THNN_(VolumetricFractionalMaxPooling_generateIntervals)(
+ real sample,
+ long inputSize,
+ long outputSize,
+ int poolSize) {
+ real alpha = (real) (inputSize - poolSize) / (real) (outputSize - 1);
+ long* sequence = (long*) THAlloc(sizeof(long) * outputSize);
+
+ long i;
+ for (i = 0; i < outputSize - 1; ++i) {
+ sequence[i] =
+ (long) ((i + sample) * alpha) - (long) (sample * alpha);
+ }
+ sequence[outputSize - 1] = inputSize - poolSize;
+
+ return sequence;
+}
+
+static void THNN_(VolumetricFractionalMaxPooling_updateOutput_frame)(
+ real* input,
+ real* output,
+ THIndex_t* indices,
+ real* randomSamples,
+ long numPlanes,
+ long inputT, long inputW, long inputH,
+ long outputT, long outputW, long outputH,
+ int poolSizeT, int poolSizeW, int poolSizeH) {
+ long plane;
+#pragma omp parallel for private(plane)
+ for (plane = 0; plane < numPlanes; ++plane) {
+ /* each plane contains 3 random samples, one for T, one for W, and one for H */
+ real* randomSamplesForPlane = randomSamples + plane * 3;
+
+ /* Generate interval sequence */
+ long* sequenceT =
+ THNN_(VolumetricFractionalMaxPooling_generateIntervals)(
+ randomSamplesForPlane[0], inputT, outputT, poolSizeT);
+ long* sequenceW =
+ THNN_(VolumetricFractionalMaxPooling_generateIntervals)(
+ randomSamplesForPlane[1], inputW, outputW, poolSizeW);
+ long* sequenceH =
+ THNN_(VolumetricFractionalMaxPooling_generateIntervals)(
+ randomSamplesForPlane[2], inputH, outputH, poolSizeH);
+
+ /* loop over output */
+ long h, w, t;
+
+ real* inputForPlane = input + plane * inputT * inputW * inputH;
+ real* outputForPlane = output + plane * outputT * outputW * outputH;
+ THIndex_t* indicesForPlane = indices + plane * outputT * outputW * outputH;
+
+ for (h = 0; h < outputH; ++h) {
+ long inputHStart = sequenceH[h];
+
+ for (w = 0; w < outputW; ++w) {
+ long inputWStart = sequenceW[w];
+
+ for (t = 0; t < outputT; ++t) {
+ long inputTStart = sequenceT[t];
+
+ real maxVal = -THInf;
+ long maxIndex = -1;
+
+ long h2, w2, t2;
+ for (h2 = inputHStart; h2 < inputHStart + poolSizeH; ++h2) {
+ for (w2 = inputWStart; w2 < inputWStart + poolSizeW; ++w2) {
+ for (t2 = inputTStart; t2 < inputTStart + poolSizeT; ++t2) {
+ THAssert(h2 >= 0 && h2 < inputH);
+ THAssert(w2 >= 0 && w2 < inputW);
+ THAssert(t2 >= 0 && t2 < inputT);
+
+ long planeIndex = h2 * inputW * inputT + w2 * inputT + t2;
+ real val = inputForPlane[planeIndex];
+ if (val > maxVal) {
+ maxVal = val;
+ maxIndex = planeIndex;
+ }
+ }
+ }
+ }
+
+ THAssert(maxVal != -THInf);
+ THAssert(maxIndex != -1);
+
+ outputForPlane[h * outputW * outputT + w * outputT + t] = maxVal;
+ /* +1 to lua index */
+ indicesForPlane[h * outputW * outputT + w * outputT + t] = maxIndex + TH_INDEX_BASE;
+ }
+ }
+ }
+
+ THFree(sequenceT);
+ THFree(sequenceW);
+ THFree(sequenceH);
+ }
+}
+
+void THNN_(VolumetricFractionalMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int outputT, int outputW, int outputH,
+ int poolSizeT, int poolSizeW, int poolSizeH,
+ THIndexTensor *indices,
+ THTensor *randomSamples) {
+
+ long numBatch = 1;
+ int planeDim = 0;
+ int heightDim = 1;
+ int widthDim = 2;
+ int timeDim = 3;
+
+ long numInputDims = THTensor_(nDimension)(input);
+ THNN_ARGCHECK(numInputDims == 4 || numInputDims == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+
+ if (numInputDims == 5) {
+ numBatch = THTensor_(size)(input, 0);
+ planeDim++;
+ heightDim++;
+ widthDim++;
+ timeDim++;
+ }
+
+ /* sizes */
+ long numPlanes = THTensor_(size)(input, planeDim);
+ long inputH = THTensor_(size)(input, heightDim);
+ long inputW = THTensor_(size)(input, widthDim);
+ long inputT = THTensor_(size)(input, timeDim);
+
+ THArgCheck(outputH + poolSizeH - 1 < inputH, 9,
+ "poolSizeH (%d) too large relative to input height (%d)",
+ poolSizeH, inputH);
+ THArgCheck(outputW + poolSizeW - 1 < inputW, 8,
+ "poolSizeW (%d) too large relative to input width (%d)",
+ poolSizeW, inputW);
+ THArgCheck(outputT + poolSizeT - 1 < inputT, 7,
+ "poolSizeT (%d) too large relative to input time (%d)",
+ poolSizeT, inputT);
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ if (numInputDims == 4) {
+ /* resize output */
+ THTensor_(resize4d)(output, numPlanes, outputH, outputW, outputT);
+ /* indices will contain the locations for each output point */
+ THIndexTensor_(resize4d)(indices, numPlanes, outputH, outputW, outputT);
+
+ THNN_(VolumetricFractionalMaxPooling_updateOutput_frame)(
+ THTensor_(data)(input),
+ THTensor_(data)(output),
+ THIndexTensor_(data)(indices),
+ THTensor_(data)(randomSamples),
+ numPlanes, inputT, inputW, inputH,
+ outputT, outputW, outputH, poolSizeT, poolSizeW, poolSizeH);
+ } else {
+ THTensor_(resize5d)(output, numBatch, numPlanes, outputH, outputW, outputT);
+ /* indices will contain the locations for each output point */
+ THIndexTensor_(resize5d)(indices, numBatch, numPlanes, outputH, outputW, outputT);
+
+ long batch;
+#pragma omp parallel for private(batch)
+ for (batch = 0; batch < numBatch; ++batch) {
+ THNN_(VolumetricFractionalMaxPooling_updateOutput_frame)(
+ THTensor_(data)(input) + batch * numPlanes * inputH * inputW * inputT,
+ THTensor_(data)(output) + batch * numPlanes * outputH * outputW * outputT,
+ THIndexTensor_(data)(indices) + batch * numPlanes * outputH * outputW * outputT,
+ THTensor_(data)(randomSamples) + batch * numPlanes * 3,
+ numPlanes, inputT, inputW, inputH,
+ outputT, outputW, outputH, poolSizeT, poolSizeW, poolSizeH);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(VolumetricFractionalMaxPooling_updateGradInput_frame)(
+ real* gradInput,
+ real* gradOutput,
+ THIndex_t* indices,
+ long numPlanes,
+ long inputT, long inputW, long inputH,
+ long outputT, long outputW, long outputH) {
+ long plane;
+#pragma omp parallel for private(plane)
+ for (plane = 0; plane < numPlanes; plane++) {
+ real* gradInputForPlane = gradInput + plane * inputT * inputW * inputH;
+ real* gradOutputForPlane = gradOutput + plane * outputT * outputW * outputH;
+ THIndex_t* indicesForPlane = indices + plane * outputT * outputW * outputH;
+
+ long h, w, t;
+ for (h = 0; h < outputH; ++h) {
+ for (w = 0; w < outputW; ++w) {
+ for (t = 0; t < outputT; ++t) {
+ long outputIndex = h * outputW * outputT + w * outputT + t;
+ long index = indicesForPlane[outputIndex] - TH_INDEX_BASE;
+ THAssert(index >= 0 && index < inputT * inputW * inputH);
+
+ gradInputForPlane[index] += gradOutputForPlane[outputIndex];
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricFractionalMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int outputT, int outputW, int outputH,
+ int poolSizeT, int poolSizeW, int poolSizeH,
+ THIndexTensor *indices) {
+
+ long numBatch = 1;
+ int planeDim = 0;
+ int heightDim = 1;
+ int widthDim = 2;
+ int timeDim = 3;
+
+ long numInputDims = THTensor_(nDimension)(input);
+ if (numInputDims == 5) {
+ numBatch = THTensor_(size)(input, 0);
+ planeDim = 1;
+ heightDim++;
+ widthDim++;
+ timeDim++;
+ }
+
+ /* sizes */
+ long numPlanes = THTensor_(size)(input, planeDim);
+ long inputH = THTensor_(size)(input, heightDim);
+ long inputW = THTensor_(size)(input, widthDim);
+ long inputT = THTensor_(size)(input, timeDim);
+
+ THArgCheck(outputT == THTensor_(size)(gradOutput, timeDim), 3,
+ "gradOutput time unexpected");
+ THArgCheck(outputW == THTensor_(size)(gradOutput, widthDim), 3,
+ "gradOutput width unexpected");
+ THArgCheck(outputH == THTensor_(size)(gradOutput, heightDim), 3,
+ "gradOutput height unexpected");
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* backprop */
+ if (numInputDims == 4) {
+ THNN_(VolumetricFractionalMaxPooling_updateGradInput_frame)(
+ THTensor_(data)(gradInput),
+ THTensor_(data)(gradOutput),
+ THIndexTensor_(data)(indices),
+ numPlanes, inputT, inputW, inputH, outputT, outputW, outputH);
+ } else {
+ long batch;
+#pragma omp parallel for private(batch)
+ for (batch = 0; batch < numBatch; ++batch) {
+ THNN_(VolumetricFractionalMaxPooling_updateGradInput_frame)(
+ THTensor_(data)(gradInput) + batch * numPlanes * inputH * inputW * inputT,
+ THTensor_(data)(gradOutput) + batch * numPlanes * outputH * outputW * outputT,
+ THIndexTensor_(data)(indices) + batch * numPlanes * outputH * outputW * outputT,
+ numPlanes, inputT, inputW, inputH, outputT, outputW, outputH);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFullConvolution.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFullConvolution.c
new file mode 100644
index 000000000..c974fab50
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricFullConvolution.c
@@ -0,0 +1,541 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricFullConvolution.c"
+#else
+
+static void THNN_(vol2col)(
+ const real *data_vol, const int channels,
+ const int depth, const int height, const int width,
+ const int kT, const int kH, const int kW,
+ const int pT, const int pH, const int pW,
+ const int dT, const int dH, const int dW,
+ const int dilationT, const int dilationH, const int dilationW,
+ real *data_col)
+{
+ int c, t, h, w;
+ int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ int channels_col = channels * kT * kH * kW;
+ for (c = 0; c < channels_col; ++c)
+ {
+ int w_offset = c % kW;
+ int h_offset = (c / kW) % kH;
+ int t_offset = (c / kW / kH) % kT;
+ int c_vol = c / kT / kH / kW;
+ for (t = 0; t < depth_col; ++t)
+ {
+ for (h = 0; h < height_col; ++h)
+ {
+ for (w = 0; w < width_col; ++w)
+ {
+ int t_pad = t * dT - pT + t_offset * dilationT;
+ int h_pad = h * dH - pH + h_offset * dilationH;
+ int w_pad = w * dW - pW + w_offset * dilationW;
+ if (t_pad >= 0 && t_pad < depth &&
+ h_pad >= 0 && h_pad < height &&
+ w_pad >= 0 && w_pad < width)
+ data_col[((c * depth_col + t) * height_col + h) * width_col + w] =
+ data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad];
+ else
+ data_col[((c * depth_col + t) * height_col + h) * width_col + w] = 0;
+ }
+ }
+ }
+ }
+}
+
+static void THNN_(col2vol)(
+ const real* data_col, const int channels,
+ const int depth, const int height, const int width,
+ const int kT, const int kH, const int kW,
+ const int pT, const int pH, const int pW,
+ const int dT, const int dH, const int dW,
+ const int dilationT, const int dilationH, const int dilationW,
+ real* data_vol)
+{
+ int c, t, h, w;
+ memset(data_vol, 0, sizeof(real) * depth * height * width * channels);
+ int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ int channels_col = channels * kT * kH * kW;
+ for (c = 0; c < channels_col; ++c)
+ {
+ int w_offset = c % kW;
+ int h_offset = (c / kW) % kH;
+ int t_offset = (c / kW / kH) % kT;
+ int c_vol = c / kT / kH / kW;
+ for (t = 0; t < depth_col; ++t)
+ {
+ for (h = 0; h < height_col; ++h)
+ {
+ for (w = 0; w < width_col; ++w)
+ {
+ int t_pad = t * dT - pT + t_offset * dilationT;
+ int h_pad = h * dH - pH + h_offset * dilationH;
+ int w_pad = w * dW - pW + w_offset * dilationW;
+ if (t_pad >= 0 && t_pad < depth &&
+ h_pad >= 0 && h_pad < height &&
+ w_pad >= 0 && w_pad < width)
+ data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad] +=
+ data_col[((c * depth_col + t) * height_col + h) * width_col + w];
+ }
+ }
+ }
+ }
+}
+
+static inline void THNN_(VolumetricFullConvolution_shapeCheck)(
+ THTensor *input, THTensor *gradOutput,
+ THTensor *weight, THTensor *bias,
+ int dT, int dW, int dH, int pT, int pW, int pH,
+ int aT, int aW, int aH) {
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+ // number of input & output planes and kernel size is indirectly defined by the weight tensor
+ THNN_ARGCHECK(weight->nDimension == 5, 4, weight,
+ "5D (nOutputPlane x nInputPlane x kT x kH x kW) tensor "
+ "expected for weight, but got: %s");
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 11,
+ "stride should be greater than zero, but got dT: %d dH: %d dW: %d", dT, dH, dW);
+ THArgCheck(aT < dT && aW < dW && aH < dH, 15,
+ "output adjustment must be smaller than stride, but got "
+ "adjT: %d adjH: %d adjW: %d dT: %d dH: %d dW: %d",
+ aT, aH, aW, dT, dH, dW);
+
+ int ndim = input->nDimension;
+ const int nInputPlane = (int)weight->size[0];
+ const int nOutputPlane = (int)weight->size[1];
+ const int kT = (int)weight->size[2];
+ const int kH = (int)weight->size[3];
+ const int kW = (int)weight->size[4];
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[1]);
+ }
+
+ int dimf = 0;
+ int dimd = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (ndim == 5) {
+ dimf++;
+ dimd++;
+ dimh++;
+ dimw++;
+ }
+
+ const long inputWidth = input->size[dimw];
+ const long inputHeight = input->size[dimh];
+ const long inputDepth = input->size[dimd];
+ const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
+ const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
+ const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
+
+ if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
+ nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth);
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane);
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimd, outputDepth);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
+ }
+}
+
+void THNN_(VolumetricFullConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input, // 4D or 5D (batch) tensor
+ THTensor *output,
+ THTensor *weight, // weight tensor (nInputPlane x nOutputPlane x kT x kH x kW)
+ THTensor *bias,
+ THTensor *finput, // internal columns buffer
+ THTensor *fgradInput, // internal ones buffer
+ int dT, int dW, int dH, // stride of the convolution
+ int pT, int pW, int pH, // padding
+ int aT, int aW, int aH) // extra output adjustment
+{
+ THTensor *columns = finput;
+ THTensor *ones = fgradInput;
+
+ THNN_(VolumetricFullConvolution_shapeCheck)(
+ input, NULL, weight, bias,
+ dT, dW, dH, pT, pW, pH, aT, aW, aH);
+
+ const int nInputPlane = (int)weight->size[0];
+ const int nOutputPlane = (int)weight->size[1];
+ const int kT = (int)weight->size[2];
+ const int kH = (int)weight->size[3];
+ const int kW = (int)weight->size[4];
+
+ input = THTensor_(newContiguous)(input);
+ weight = THTensor_(newContiguous)(weight);
+ bias = bias ? THTensor_(newContiguous)(bias) : bias;
+ int batch = 1;
+ if (input->nDimension == 4)
+ {
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ }
+
+ const long inputWidth = input->size[4];
+ const long inputHeight = input->size[3];
+ const long inputDepth = input->size[2];
+ const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
+ const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
+ const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
+
+ // Batch size + input planes
+ const long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
+ THTensor_(zero)(columns);
+
+ // Define a buffer of ones, for bias accumulation
+ // Note: this buffer can be shared with other modules, it only ever gets increased,
+ // and always contains ones.
+ if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth)
+ {
+ // Resize plane and fill with ones...
+ THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *output_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; ++elt)
+ {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(output_n, output, 0, elt);
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ const long m = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4];
+ const long n = columns->size[1];
+ const long k = weight->size[0];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 't',
+ n, m, k,
+ 1,
+ THTensor_(data)(input_n), n,
+ THTensor_(data)(weight), m,
+ 0,
+ THTensor_(data)(columns), n
+ );
+
+ // Unpack columns back into input:
+ THNN_(col2vol)(
+ THTensor_(data)(columns),
+ nOutputPlane, outputDepth, outputHeight, outputWidth,
+ kT, kH, kW,
+ pT, pH, pW,
+ dT, dH, dW,
+ 1, 1, 1,
+ THTensor_(data)(output_n)
+ );
+
+ // Do Bias after:
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ const long m_ = nOutputPlane;
+ const long n_ = outputDepth * outputHeight * outputWidth;
+ const long k_ = 1;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ if (bias) {
+ THBlas_(gemm)(
+ 't', 'n',
+ n_, m_, k_,
+ 1,
+ THTensor_(data)(ones), k_,
+ THTensor_(data)(bias), k_,
+ 1,
+ THTensor_(data)(output_n), n_
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(output_n);
+
+ // Resize output
+ if (batch == 0)
+ {
+ THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(weight);
+ if (bias) THTensor_(free)(bias);
+}
+
+void THNN_(VolumetricFullConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput, // only used by cuda impl
+ int dT, int dW, int dH, // stride
+ int pT, int pW, int pH, // padding
+ int aT, int aW, int aH) // extra output adjustment
+{
+ THTensor *gradColumns = finput;
+
+ // number of input & output planes and kernel size is indirectly defined by the weight tensor
+ THNN_(VolumetricFullConvolution_shapeCheck)(
+ input, gradOutput, weight, NULL,
+ dT, dW, dH, pT, pW, pH, aT, aW, aH);
+
+ const int nInputPlane = (int)weight->size[0];
+ const int nOutputPlane = (int)weight->size[1];
+ const int kT = (int)weight->size[2];
+ const int kH = (int)weight->size[3];
+ const int kW = (int)weight->size[4];
+
+ input = THTensor_(newContiguous)(input);
+ weight = THTensor_(newContiguous)(weight);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ int batch = 1;
+ if (input->nDimension == 4)
+ {
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+ }
+
+ const long inputWidth = input->size[4];
+ const long inputHeight = input->size[3];
+ const long inputDepth = input->size[2];
+ const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
+ const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
+ const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
+
+ // Batch size + input planes
+ const long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth);
+ THTensor_(zero)(gradInput);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
+
+ // Helpers
+ THTensor *gradInput_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; ++elt)
+ {
+ // Matrix mulitply per sample:
+ THTensor_(select)(gradInput_n, gradInput, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(vol2col)(
+ THTensor_(data)(gradOutput_n),
+ nOutputPlane, outputDepth, outputHeight, outputWidth,
+ kT, kH, kW,
+ pT, pH, pW,
+ dT, dH, dW,
+ 1, 1, 1,
+ THTensor_(data)(gradColumns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ const long m = weight->size[0];
+ const long n = gradColumns->size[1];
+ const long k = weight->size[1] * weight->size[2] * weight->size[3] * weight->size[4];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 'n',
+ n, m, k,
+ 1,
+ THTensor_(data)(gradColumns), n,
+ THTensor_(data)(weight), k,
+ 0,
+ THTensor_(data)(gradInput_n), n
+ );
+ }
+
+ // Free
+ THTensor_(free)(gradInput_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize output
+ if (batch == 0)
+ {
+ THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ THTensor_(free)(weight);
+}
+
+void THNN_(VolumetricFullConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int dT, int dW, int dH, // stride
+ int pT, int pW, int pH, // padding
+ int aT, int aW, int aH, // extra output adjustment
+ accreal scale_)
+{
+ real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
+ // number of input & output planes and kernel size is indirectly defined by the gradWeight tensor
+ THNN_(VolumetricFullConvolution_shapeCheck)(
+ input, gradOutput, gradWeight, gradBias,
+ dT, dW, dH, pT, pW, pH, aT, aW, aH);
+
+ int nInputPlane = (int)gradWeight->size[0];
+ int nOutputPlane = (int)gradWeight->size[1];
+ int kT = (int)gradWeight->size[2];
+ int kH = (int)gradWeight->size[3];
+ int kW = (int)gradWeight->size[4];
+
+ THTensor *columns = finput;
+ THTensor *ones = fgradInput;
+
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
+ if (gradBias)
+ THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
+
+ int batch = 1;
+ if (input->nDimension == 4)
+ {
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+ }
+
+ const long inputWidth = input->size[4];
+ const long inputHeight = input->size[3];
+ const long inputDepth = input->size[2];
+ const long outputWidth = (inputWidth - 1) * dW - 2*pW + kW + aW;
+ const long outputHeight = (inputHeight - 1) * dH - 2*pH + kH + aH;
+ const long outputDepth = (inputDepth - 1) * dT - 2*pT + kT + aT;
+
+ // Batch size + input planes
+ const long batchSize = input->size[0];
+
+ // Define a buffer of ones, for bias accumulation
+ if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth)
+ {
+ // Resize plane and fill with ones...
+ THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ int elt;
+ // For each elt in batch, do:
+ for (elt = 0; elt < batchSize; ++elt)
+ {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(vol2col)(
+ THTensor_(data)(gradOutput_n), nOutputPlane,
+ outputDepth, outputHeight, outputWidth,
+ kT, kH, kW,
+ pT, pH, pW,
+ dT, dH, dW,
+ 1, 1, 1,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ const long n = columns->size[0]; // nOutputPlane * kt * kh * kw
+ const long m = input_n->size[0]; // nInputPlane
+ const long k = columns->size[1]; // inputHeight * inputWidth
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 't', 'n',
+ n, m, k,
+ scale,
+ THTensor_(data)(columns), k,
+ THTensor_(data)(input_n), k,
+ 1,
+ THTensor_(data)(gradWeight), n
+ );
+
+ // Do Bias:
+ // M,N,K are dims of matrix A and B
+ // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+ const long m_ = nOutputPlane;
+ const long k_ = outputDepth * outputHeight * outputWidth;
+
+ // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
+ if (gradBias) {
+ THBlas_(gemv)(
+ 't',
+ k_, m_,
+ scale,
+ THTensor_(data)(gradOutput_n), k_,
+ THTensor_(data)(ones), 1,
+ 1,
+ THTensor_(data)(gradBias), 1
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize
+ if (batch == 0)
+ {
+ THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxPooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxPooling.c
new file mode 100644
index 000000000..a3601e0b6
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxPooling.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c"
+#else
+
+void THNN_(VolumetricMaxPooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ bool ceilMode)
+{
+ THNN_(VolumetricDilatedMaxPooling_updateOutput)(
+ state, input, output, indices,
+ kT, kW, kH, dT, dW, dH,
+ pT, pW, pH, 1, 1, 1, ceilMode);
+}
+
+void THNN_(VolumetricMaxPooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int kT,
+ int kW,
+ int kH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH,
+ bool ceilMode)
+{
+ THNN_(VolumetricDilatedMaxPooling_updateGradInput)(
+ state, input, gradOutput, gradInput, indices,
+ kT, kW, kH, dT, dW, dH,
+ pT, pW, pH, 1, 1, 1, ceilMode);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxUnpooling.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxUnpooling.c
new file mode 100644
index 000000000..d9d9e5951
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricMaxUnpooling.c
@@ -0,0 +1,373 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricMaxUnpooling.c"
+#else
+
+static inline void THNN_(VolumetricMaxUnpooling_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THIndexTensor *indices,
+ int oT,
+ int oW,
+ int oH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+
+ THNN_CHECK_SHAPE_INDICES(input, indices);
+
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 10,
+ "stride should be greater than zero, but got dT: %d dH: %d dW: %d",
+ dT, dH, dW);
+
+ int dimw = 3;
+ int dimh = 2;
+ int dimt = 1;
+ int dimn = 0;
+
+ if (input->nDimension == 5)
+ {
+ dimt++;
+ dimw++;
+ dimh++;
+ dimn++;
+ }
+ int nslices = input->size[dimn];
+
+ if (gradOutput != NULL) {
+ if (oT != gradOutput->size[dimt] || oW != gradOutput->size[dimw] || oH != gradOutput->size[dimh])
+ {
+ THError(
+ "Inconsistent gradOutput size. oT= %d, oH= %d, oW= %d, gradOutput: %dx%dx%d",
+ oT, oH, oW, gradOutput->size[dimt], gradOutput->size[dimh], gradOutput->size[dimw]
+ );
+ }
+
+ THNN_CHECK_DIM_SIZE(gradOutput, input->nDimension, dimn, nslices);
+ }
+}
+
+static void THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
+ real *input_p,
+ real *output_p,
+ THIndex_t *ind_p,
+ int nslices,
+ int iT,
+ int iW,
+ int iH,
+ int oT,
+ int oW,
+ int oH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ int k;
+ int has_error = 0;
+ THIndex_t error_index;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ int ti, i, j, maxz, maxy, maxx;
+ for (ti = 0; ti < iT; ti++)
+ {
+ for (i = 0; i < iH; i++)
+ {
+ for (j = 0; j < iW; j++)
+ {
+ int start_t = ti * dT - pT;
+ int start_h = i * dH - pH;
+ int start_w = j * dW - pW;
+
+ real *input_p_k = input_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
+ THIndex_t *ind_p_k = ind_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
+
+ maxz = ((unsigned char*)(ind_p_k))[0]; /* retrieve position of max */
+ maxy = ((unsigned char*)(ind_p_k))[1];
+ maxx = ((unsigned char*)(ind_p_k))[2];
+
+ THIndex_t idx = k*oT*oW*oH + oH*oW*(start_t+maxz) + oW*(start_h+maxy) + (start_w+maxx);
+ if (start_t+maxz<0 || start_h+maxy<0 || start_w+maxx<0 || start_t+maxz>=oT
+ || start_h+maxy>=oH || start_w+maxx>=oW)
+ {
+#pragma omp critical
+ {
+ has_error = 1;
+ error_index = idx;
+ }
+ } else {
+ output_p[idx] = *input_p_k; /* update output */
+ }
+ }
+ }
+ }
+ }
+ if (has_error) {
+ THError(
+ "found an invalid max index %ld (output volumes are of size %dx%dx%d)",
+ error_index, oT, oH, oW
+ );
+ }
+}
+
+void THNN_(VolumetricMaxUnpooling_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THIndexTensor *indices,
+ int oT,
+ int oW,
+ int oH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ int dimw = 3;
+ int dimh = 2;
+ int dimt = 1;
+ int nbatch = 1;
+ int nslices;
+ int iT;
+ int iH;
+ int iW;
+ real *input_data;
+ real *output_data;
+ THIndex_t *indices_data;
+
+ THNN_(VolumetricMaxUnpooling_shapeCheck)(
+ state, input, NULL, indices,
+ oT, oW, oH, dT, dW, dH, pT, pW, pH);
+
+ if (input->nDimension == 5)
+ {
+ nbatch = input->size[0];
+ dimt++;
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimt-1];
+ iT = input->size[dimt];
+ iH = input->size[dimh];
+ iW = input->size[dimw];
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+ indices = THIndexTensor_(newContiguous)(indices);
+
+ /* resize output */
+ if (input->nDimension == 4)
+ {
+ THTensor_(resize4d)(output, nslices, oT, oH, oW);
+ THTensor_(zero)(output);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
+ input_data, output_data,
+ indices_data,
+ nslices,
+ iT, iW, iH,
+ oT, oW, oH,
+ dT, dW, dH, pT, pW, pH
+ );
+ }
+ else
+ {
+ int p;
+
+ THTensor_(resize5d)(output, nbatch, nslices, oT, oH, oW);
+ THTensor_(zero)(output);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THIndexTensor_(data)(indices);
+
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(VolumetricMaxUnpooling_updateOutput_frame)(
+ input_data+p*nslices*iT*iW*iH,
+ output_data+p*nslices*oT*oW*oH,
+ indices_data+p*nslices*iT*iW*iH,
+ nslices,
+ iT, iW, iH,
+ oT, oW, oH,
+ dT, dW, dH,
+ pT, pW, pH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+ THIndexTensor_(free)(indices);
+}
+
+static void THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
+ real *gradInput_p,
+ real *gradOutput_p,
+ THIndex_t *ind_p,
+ int nslices,
+ int iT,
+ int iW,
+ int iH,
+ int oT,
+ int oW,
+ int oH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ int k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ int ti, i, j, maxz, maxy, maxx;
+ for (ti = 0; ti < iT; ti++)
+ {
+ for (i = 0; i < iH; i++)
+ {
+ for (j = 0; j < iW; j++)
+ {
+ int start_t = ti * dT - pT;
+ int start_h = i * dH - pH;
+ int start_w = j * dW - pW;
+
+ real *gradInput_p_k = gradInput_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
+ THIndex_t *ind_p_k = ind_p + k*iT*iW*iH + ti*iW*iH + i*iW + j;
+
+ maxz = ((unsigned char*)(ind_p_k))[0]; /* retrieve position of max */
+ maxy = ((unsigned char*)(ind_p_k))[1];
+ maxx = ((unsigned char*)(ind_p_k))[2];
+
+ if (start_t+maxz<0 || start_h+maxy<0 || start_w+maxx<0
+ || start_t+maxz>=oT || start_h+maxy>=oH || start_w+maxx>=oW)
+ {
+ THError(
+ "invalid max index z= %d, y= %d, x= %d, oT= %d, oW= %d, oH= %d",
+ start_t+maxz, start_h+maxy, start_w+maxx, oT, oW, oH
+ );
+ }
+ *gradInput_p_k = gradOutput_p[k*oT*oW*oH + oH*oW*(start_t+maxz)
+ + oW*(start_h+maxy) + (start_w+maxx)]; /* update gradient */
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricMaxUnpooling_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THIndexTensor *indices,
+ int oT,
+ int oW,
+ int oH,
+ int dT,
+ int dW,
+ int dH,
+ int pT,
+ int pW,
+ int pH)
+{
+ int dimw = 3;
+ int dimh = 2;
+ int dimt = 1;
+ int nbatch = 1;
+ int nslices;
+ int iT;
+ int iH;
+ int iW;
+ real *gradInput_data;
+ real *gradOutput_data;
+ THIndex_t *indices_data;
+
+ THNN_(VolumetricMaxUnpooling_shapeCheck)(
+ state, input, gradOutput, indices,
+ oT, oW, oH, dT, dW, dH, pT, pW, pH);
+
+ // TODO: check gradOutput shape
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ indices = THIndexTensor_(newContiguous)(indices);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 5)
+ {
+ nbatch = input->size[0];
+ dimt++;
+ dimw++;
+ dimh++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimt-1];
+ iT = input->size[dimt];
+ iH = input->size[dimh];
+ iW = input->size[dimw];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THIndexTensor_(data)(indices);
+
+ /* backprop */
+ if (input->nDimension == 4)
+ {
+ THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
+ gradInput_data, gradOutput_data,
+ indices_data,
+ nslices,
+ iT, iW, iH,
+ oT, oW, oH,
+ dT, dW, dH,
+ pT, pW, pH
+ );
+ }
+ else
+ {
+ int p;
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(VolumetricMaxUnpooling_updateGradInput_frame)(
+ gradInput_data+p*nslices*iT*iW*iH,
+ gradOutput_data+p*nslices*oT*oW*oH,
+ indices_data+p*nslices*iT*iW*iH,
+ nslices,
+ iT, iW, iH,
+ oT, oW, oH,
+ dT, dW, dH,
+ pT, pW, pH
+ );
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+ THIndexTensor_(free)(indices);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricReplicationPadding.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricReplicationPadding.c
new file mode 100644
index 000000000..4d8993ec2
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricReplicationPadding.c
@@ -0,0 +1,357 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricReplicationPadding.c"
+#else
+
+static inline void THNN_(VolumetricReplicationPadding_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback) {
+ int dimw = 3;
+ int dimh = 2;
+ int dimd = 1;
+ int dimslices = 0;
+ long nslices;
+ long idepth;
+ long iheight;
+ long iwidth;
+ long odepth;
+ long oheight;
+ long owidth;
+
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D (batch mode) tensor expected for input, but got: %s");
+
+ if (input->nDimension == 5)
+ {
+ dimw++;
+ dimh++;
+ dimd++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ idepth = input->size[dimd];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ odepth = idepth + pfront + pback;
+ oheight = iheight + ptop + pbottom;
+ owidth = iwidth + pleft + pright;
+
+ THArgCheck(owidth >= 1 || oheight >= 1 || odepth >= 1, 2,
+ "input (D: %d H: %d, W: %d)is too small."
+ " Calculated output D: %d H: %d W: %d",
+ idepth, iheight, iwidth, odepth, oheight, owidth);
+
+ if (gradOutput != NULL) {
+ THArgCheck(nslices == THTensor_(size)(gradOutput, dimslices), 3,
+ "gradOutput width unexpected. Expected: %d, Got: %d",
+ nslices, THTensor_(size)(gradOutput, dimslices));
+ THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
+ "gradOutput width unexpected. Expected: %d, Got: %d",
+ owidth, THTensor_(size)(gradOutput, dimw));
+ THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
+ "gradOutput height unexpected. Expected: %d, Got: %d",
+ oheight, THTensor_(size)(gradOutput, dimh));
+ THArgCheck(odepth == THTensor_(size)(gradOutput, dimd), 3,
+ "gradOutput depth unexpected. Expected: %d, Got: %d",
+ odepth, THTensor_(size)(gradOutput, dimd));
+ }
+}
+
+static void THNN_(VolumetricReplicationPadding_updateOutput_frame)(
+ real *input_p, real *output_p,
+ long nslices,
+ long iwidth, long iheight, long idepth,
+ long owidth, long oheight, long odepth,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback)
+{
+ int iStartX = fmax(0, -pleft);
+ int iStartY = fmax(0, -ptop);
+ int iStartZ = fmax(0, -pfront);
+ int oStartX = fmax(0, pleft);
+ int oStartY = fmax(0, ptop);
+ int oStartZ = fmax(0, pfront);
+
+ long k, ip_x, ip_y, ip_z;
+#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
+ for (k = 0; k < nslices; k++) {
+ long i, j, z;
+ for (z = 0; z < odepth; z++) {
+ for (i = 0; i < oheight; i++) {
+ for (j = 0; j < owidth; j++) {
+ if (j < pleft) {
+ ip_x = pleft;
+ } else if (j >= pleft && j < iwidth + pleft) {
+ ip_x = j;
+ } else {
+ ip_x = iwidth + pleft - 1;
+ }
+ ip_x = ip_x - oStartX + iStartX;
+
+ if (i < ptop) {
+ ip_y = ptop;
+ } else if (i >= ptop && i < iheight + ptop) {
+ ip_y = i;
+ } else {
+ ip_y = iheight + ptop - 1;
+ }
+ ip_y = ip_y - oStartY + iStartY;
+
+ if (z < pfront) {
+ ip_z = pfront;
+ } else if (z >= pfront && z < idepth + pfront) {
+ ip_z = z;
+ } else {
+ ip_z = idepth + pfront - 1;
+ }
+ ip_z = ip_z - oStartZ + iStartZ;
+
+ real *dest_p = output_p + k * owidth * oheight * odepth +
+ z * owidth * oheight + i * owidth + j;
+ real *src_p = input_p + k * iwidth * iheight * idepth +
+ ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
+ *dest_p = *src_p;
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricReplicationPadding_updateOutput)(THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback)
+{
+ int dimw = 3;
+ int dimh = 2;
+ int dimd = 1;
+ int dimslices = 0;
+ long nbatch = 1;
+ long nslices;
+ long idepth;
+ long iheight;
+ long iwidth;
+ long odepth;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+
+THNN_(VolumetricReplicationPadding_shapeCheck)(
+ state, input, NULL, pleft, pright,
+ ptop, pbottom, pfront, pback);
+
+ if (input->nDimension == 5)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimd++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ idepth = input->size[dimd];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ odepth = idepth + pfront + pback;
+ oheight = iheight + ptop + pbottom;
+ owidth = iwidth + pleft + pright;
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ /* resize output */
+ if (input->nDimension == 4)
+ {
+ THTensor_(resize4d)(output, nslices, odepth, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+ THNN_(VolumetricReplicationPadding_updateOutput_frame)(
+ input_data, output_data, nslices, iwidth, iheight, idepth,
+ owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront,
+ pback);
+ }
+ else
+ {
+ long p;
+
+ THTensor_(resize5d)(output, nbatch, nslices, odepth, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(VolumetricReplicationPadding_updateOutput_frame)(
+ input_data + p * nslices * iwidth * iheight * idepth,
+ output_data + p * nslices * owidth * oheight * odepth,
+ nslices,
+ iwidth, iheight, idepth,
+ owidth, oheight, odepth,
+ pleft, pright,
+ ptop, pbottom,
+ pfront, pback);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
+ real *ginput_p, real *goutput_p,
+ long nslices,
+ long iwidth, long iheight, long idepth,
+ long owidth, long oheight, long odepth,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback)
+{
+ int iStartX = fmax(0, -pleft);
+ int iStartY = fmax(0, -ptop);
+ int iStartZ = fmax(0, -pfront);
+ int oStartX = fmax(0, pleft);
+ int oStartY = fmax(0, ptop);
+ int oStartZ = fmax(0, pfront);
+
+ long k, ip_x, ip_y, ip_z;
+#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
+ for (k = 0; k < nslices; k++) {
+ long i, j, z;
+ for (z = 0; z < odepth; z++) {
+ for (i = 0; i < oheight; i++) {
+ for (j = 0; j < owidth; j++) {
+ if (j < pleft) {
+ ip_x = pleft;
+ } else if (j >= pleft && j < iwidth + pleft) {
+ ip_x = j;
+ } else {
+ ip_x = iwidth + pleft - 1;
+ }
+ ip_x = ip_x - oStartX + iStartX;
+
+ if (i < ptop) {
+ ip_y = ptop;
+ } else if (i >= ptop && i < iheight + ptop) {
+ ip_y = i;
+ } else {
+ ip_y = iheight + ptop - 1;
+ }
+ ip_y = ip_y - oStartY + iStartY;
+
+ if (z < pfront) {
+ ip_z = pfront;
+ } else if (z >= pfront && z < idepth + pfront) {
+ ip_z = z;
+ } else {
+ ip_z = idepth + pfront - 1;
+ }
+ ip_z = ip_z - oStartZ + iStartZ;
+
+ real *src_p = goutput_p + k * owidth * oheight * odepth +
+ z * owidth * oheight + i * owidth + j;
+ real *dest_p = ginput_p + k * iwidth * iheight * idepth +
+ ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
+ *dest_p += *src_p;
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricReplicationPadding_updateGradInput)(THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback)
+{
+ int dimw = 3;
+ int dimh = 2;
+ int dimd = 1;
+ int dimslices = 0;
+ long nbatch = 1;
+ long nslices;
+ long idepth;
+ long iheight;
+ long iwidth;
+ long odepth;
+ long oheight;
+ long owidth;
+
+ if (input->nDimension == 5)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimd++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ idepth = input->size[dimd];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ odepth = idepth + pfront + pback;
+ oheight = iheight + ptop + pbottom;
+ owidth = iwidth + pleft + pright;
+
+
+THNN_(VolumetricReplicationPadding_shapeCheck)(
+ state, input, NULL, pleft, pright,
+ ptop, pbottom, pfront, pback);
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* backprop */
+ if (input->nDimension == 4) {
+ THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
+ THTensor_(data)(gradInput),
+ THTensor_(data)(gradOutput),
+ nslices,
+ iwidth, iheight, idepth,
+ owidth, oheight, odepth,
+ pleft, pright,
+ ptop, pbottom,
+ pfront, pback);
+ } else {
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++) {
+ THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
+ THTensor_(data)(gradInput) + p * nslices * idepth * iheight * iwidth,
+ THTensor_(data)(gradOutput) + p * nslices * odepth * oheight * owidth,
+ nslices,
+ iwidth, iheight, idepth,
+ owidth, oheight, odepth,
+ pleft, pright,
+ ptop, pbottom,
+ pfront, pback);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingNearest.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingNearest.c
new file mode 100644
index 000000000..9068fb58d
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingNearest.c
@@ -0,0 +1,226 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricUpSamplingNearest.c"
+#else
+
+
+static inline void THNN_(VolumetricUpSamplingNearest_shapeCheck)
+ (THTensor *input, THTensor *gradOutput,
+ int scale_factor) {
+ THArgCheck(input != NULL, 2, "5D input tensor expected but got NULL");
+ THArgCheck(scale_factor > 1, 4,
+ "scale_factor must be greater than 1, but got: %d", scale_factor);
+ THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
+ "4D or 5D input tensor expected but got: %s");
+ if (input->nDimension == 4) {
+ int nChannels = THTensor_(size)(input, 0);
+ int inputDepth = THTensor_(size)(input, 1);
+ int inputHeight = THTensor_(size)(input, 2);
+ int inputWidth = THTensor_(size)(input, 3);
+ int outputDepth = inputDepth * scale_factor;
+ int outputHeight = inputHeight * scale_factor;
+ int outputWidth = inputWidth * scale_factor;
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 0, nChannels);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 1, outputDepth);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 2, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, 4, 3, outputWidth);
+ }
+ } else {
+ int nBatch = THTensor_(size)(input, 0);
+ int nChannels = THTensor_(size)(input, 1);
+ int inputDepth = THTensor_(size)(input, 2);
+ int inputHeight = THTensor_(size)(input, 3);
+ int inputWidth = THTensor_(size)(input, 4);
+ int outputDepth = inputDepth * scale_factor;
+ int outputHeight = inputHeight * scale_factor;
+ int outputWidth = inputWidth * scale_factor;
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch);
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels);
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth);
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth);
+ }
+ }
+}
+
+void THNN_(VolumetricUpSamplingNearest_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int scale_factor)
+{
+ THNN_(VolumetricUpSamplingNearest_shapeCheck)(input, NULL, scale_factor);
+ int inputDepth = THTensor_(size)(input, input->nDimension-3);
+ int inputHeight = THTensor_(size)(input, input->nDimension-2);
+ int inputWidth = THTensor_(size)(input, input->nDimension-1);
+ int outputDepth = inputDepth * scale_factor;
+ int outputHeight = inputHeight * scale_factor;
+ int outputWidth = inputWidth * scale_factor;
+
+ if (input->nDimension == 4) {
+ THTensor_(resize4d)(output,
+ THTensor_(size)(input, 0),
+ outputDepth, outputHeight, outputWidth);
+ } else {
+ THTensor_(resize5d)(output,
+ THTensor_(size)(input, 0),
+ THTensor_(size)(input, 1),
+ outputDepth, outputHeight, outputWidth);
+ }
+
+ int dT = scale_factor;
+ int dW = scale_factor;
+ int dH = scale_factor;
+ int xDim = input->nDimension-3;
+ int yDim = input->nDimension-2;
+ int zDim = input->nDimension-1;
+
+ // dims
+ int idim = input->nDimension;
+ int osz0 = output->size[0];
+ int osz1 = output->size[1];
+ int osz2 = output->size[2];
+ int osz3 = output->size[3];
+ int osz4 = 1;
+ if (idim > 4) {
+ osz4 = output->size[4];
+ }
+
+ // get strides
+ long *is = input->stride;
+ long *os = output->stride;
+
+ // get raw pointers
+ real *pin = THTensor_(data)(input);
+ real *pout = THTensor_(data)(output);
+
+ // perform the upsampling
+ int i0, i1, i2, i3, i4, isrc, idst;
+ int iout[5]; // Output indices
+ int iin[5]; // Input indices
+
+ for (i0 = 0; i0 < osz0; i0++) {
+ iout[0] = i0;
+ iin[0] = i0;
+ for (i1 = 0; i1 < osz1; i1++) {
+ iout[1] = i1;
+ iin[1] = i1;
+ for (i2 = 0; i2 < osz2; i2++) {
+ iout[2] = i2;
+ iin[2] = i2;
+ for (i3 = 0; i3 < osz3; i3++) {
+ iout[3] = i3;
+ iin[3] = i3;
+ for (i4 = 0; i4 < osz4; i4++) {
+ iout[4] = i4;
+ iin[4] = i4;
+
+ // set the indices for the upsampled dimensions
+ iin[xDim] = iout[xDim] / dW;
+ iin[yDim] = iout[yDim] / dH;
+ iin[zDim] = iout[zDim] / dT;
+
+ idst = i0*os[0] + i1*os[1] + i2*os[2] + i3*os[3];
+ isrc = iin[0]*is[0] + iin[1]*is[1] + iin[2]*is[2] + iin[3]*is[3];
+ if (idim > 4) {
+ idst += i4*os[4];
+ isrc += iin[4]*is[4];
+ }
+
+ pout[idst] = pin[isrc];
+ }
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricUpSamplingNearest_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int scale_factor)
+{
+ THNN_(VolumetricUpSamplingNearest_shapeCheck)(input, gradOutput, scale_factor);
+ THTensor_(resizeAs)(gradInput, input);
+
+ int dW = scale_factor;
+ int dH = scale_factor;
+ int dT = scale_factor;
+ int xDim = gradInput->nDimension-3;
+ int yDim = gradInput->nDimension-2;
+ int zDim = gradInput->nDimension-1;
+
+ // dims
+ int idim = gradInput->nDimension; // Guaranteed to be between 3 and 5
+ int isz0 = gradInput->size[0];
+ int isz1 = gradInput->size[1];
+ int isz2 = gradInput->size[2];
+ int isz3 = gradInput->size[3];
+ int isz4 = 1;
+ if (idim > 4) {
+ isz4 = gradInput->size[4];
+ }
+
+ // get strides
+ long *is = gradInput->stride;
+ long *os = gradOutput->stride;
+
+ // get raw pointers
+ real *pin = THTensor_(data)(gradInput);
+ real *pout = THTensor_(data)(gradOutput);
+
+ // perform the upsampling
+ int i0, i1, i2, i3, i4, isrc, idst, x, y, z;
+ int iin[5]; // Input indices
+ int iout[5]; // Output indices
+
+ THTensor_(zero)(gradInput);
+
+ for (i0 = 0; i0 < isz0; i0++) {
+ iin[0] = i0;
+ iout[0] = i0;
+ for (i1 = 0; i1 < isz1; i1++) {
+ iin[1] = i1;
+ iout[1] = i1;
+ for (i2 = 0; i2 < isz2; i2++) {
+ iin[2] = i2;
+ iout[2] = i2;
+ for (i3 = 0; i3 < isz3; i3++) {
+ iin[3] = i3;
+ iout[3] = i3;
+
+ for (i4 = 0; i4 < isz4; i4++) {
+ iin[4] = i4;
+ iout[4] = i4;
+
+ idst = i0*is[0] + i1*is[1] + i2*is[2] + i3*is[3];
+ if (idim > 4) {
+ idst += i4*is[4];
+ }
+
+ // Now accumulate the gradients from gradOutput
+ for (z = 0; z < dT; z++) {
+ for (y = 0; y < dH; y++) {
+ for (x = 0; x < dW; x++) {
+ iout[xDim] = dW * iin[xDim] + x;
+ iout[yDim] = dH * iin[yDim] + y;
+ iout[zDim] = dT * iin[zDim] + z;
+ isrc = iout[0]*os[0] + iout[1]*os[1] + iout[2]*os[2] + iout[3]*os[3];
+ if (idim > 4) {
+ isrc += iout[4]*os[4];
+ }
+ pin[idst] += pout[isrc];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingTrilinear.c b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingTrilinear.c
new file mode 100644
index 000000000..f2b04dba9
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/VolumetricUpSamplingTrilinear.c
@@ -0,0 +1,213 @@
+// Adapted from interp.cpp from Caffe util by Pauline Luc
+// Originally developed by George Papandreou
+
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricUpSamplingTrilinear.c"
+#else
+
+static inline void THNN_(VolumetricUpSamplingTrilinear_shapeCheck)
+ (THTensor *input, THTensor *gradOutput,
+ int nBatch, int nChannels,
+ int inputDepth, int inputHeight, int inputWidth,
+ int outputDepth, int outputHeight, int outputWidth) {
+ THArgCheck(inputDepth > 0 && inputHeight > 0 && inputWidth > 0
+ && outputDepth > 0 && outputHeight > 0 && outputWidth > 0, 2,
+ "input and output sizes should be greater than 0,"
+ " but got input (D: %d, H: %d, W: %d) output (D: %d, H: %d, W: %d)",
+ inputDepth, inputHeight, inputWidth, outputDepth, outputHeight, outputWidth);
+ if (input != NULL) {
+ THNN_ARGCHECK(input->nDimension == 5, 2, input,
+ "5D input tensor expected but got: %s");
+ }
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 0, nBatch);
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 1, nChannels);
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 2, outputDepth);
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 3, outputHeight);
+ THNN_CHECK_DIM_SIZE(gradOutput, 5, 4, outputWidth);
+ }
+}
+
+void THNN_(VolumetricUpSamplingTrilinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int outputDepth,
+ int outputHeight,
+ int outputWidth){
+
+ int nbatch = THTensor_(size)(input, 0);
+ int channels = THTensor_(size)(input, 1);
+ int inputDepth = THTensor_(size)(input, 2);
+ int inputHeight = THTensor_(size)(input, 3);
+ int inputWidth = THTensor_(size)(input, 4);
+
+ THNN_(VolumetricUpSamplingTrilinear_shapeCheck)
+ (input, NULL,
+ nbatch, channels,
+ inputDepth, inputHeight, inputWidth,
+ outputDepth, outputHeight, outputWidth);
+
+ input = THTensor_(newContiguous)(input);
+ THTensor_(resize5d)(output,
+ THTensor_(size)(input, 0),
+ THTensor_(size)(input, 1),
+ outputDepth, outputHeight, outputWidth);
+ THTensor_(zero)(output);
+ real *idata = THTensor_(data)(input);
+ real *odata = THTensor_(data)(output);
+ channels = nbatch * channels;
+ THAssert(inputDepth > 0 && inputHeight > 0 && inputWidth > 0 &&
+ outputDepth > 0 && outputHeight > 0 && outputWidth > 0);
+ // special case: just copy
+ if (inputDepth == outputDepth && inputHeight == outputHeight && inputWidth == outputWidth) {
+ for (int t2 = 0; t2 < outputDepth; ++t2) {
+ const int t1 = t2;
+ for (int h2 = 0; h2 < outputHeight; ++h2) {
+ const int h1 = h2;
+ for (int w2 = 0; w2 < outputWidth; ++w2) {
+ const int w1 = w2;
+ const real* pos1 = &idata[t1 * inputHeight * inputWidth + h1 * inputWidth + w1];
+ real* pos2 = &odata[t2 * outputHeight * outputWidth + h2 * outputWidth + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos2[0] = pos1[0];
+ pos1 += inputWidth * inputHeight * inputDepth;
+ pos2 += outputWidth * outputHeight * outputDepth;
+ }
+ }
+ }
+ }
+ return;
+ }
+ const float rdepth = (outputDepth > 1) ? (float)(inputDepth - 1)/(outputDepth - 1) : 0.f;
+ const float rheight = (outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f;
+ const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1) / (outputWidth - 1) : 0.f;
+ for (int t2 = 0; t2 < outputDepth; ++t2) {
+ const float t1r = rdepth * t2;
+ const int t1 = t1r;
+ const int t1p = (t1 < inputDepth - 1) ? 1 : 0;
+ const real t1lambda = t1r - t1;
+ const real t0lambda = (real)1. - t1lambda;
+ for (int h2 = 0; h2 < outputHeight; ++h2) {
+ const float h1r = rheight * h2;
+ const int h1 = h1r;
+ const int h1p = (h1 < inputHeight - 1) ? 1 : 0;
+ const real h1lambda = h1r - h1;
+ const real h0lambda = (real)1. - h1lambda;
+ for (int w2 = 0; w2 < outputWidth; ++w2) {
+ const float w1r = rwidth * w2;
+ const int w1 = w1r;
+ const int w1p = (w1 < inputWidth - 1) ? 1 : 0;
+ const real w1lambda = w1r - w1;
+ const real w0lambda = (real)1. - w1lambda;
+ const real* pos1 = &idata[t1 * inputHeight * inputWidth + h1 * inputWidth + w1];
+ real* pos2 = &odata[t2 * outputHeight * outputWidth + h2 * outputWidth + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos2[0] = t0lambda * (h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p])
+ + h1lambda * (w0lambda * pos1[h1p * inputWidth]
+ + w1lambda * pos1[h1p * inputWidth + w1p]))
+ + t1lambda * (h0lambda * (w0lambda * pos1[t1p * inputHeight * inputWidth]
+ + w1lambda * pos1[t1p * inputHeight * inputWidth
+ + w1p])
+ + h1lambda * (w0lambda * pos1[t1p * inputHeight * inputWidth
+ + h1p * inputWidth]
+ + w1lambda * pos1[t1p * inputHeight * inputWidth
+ + h1p * inputWidth + w1p]));
+ pos1 += inputWidth * inputHeight * inputDepth;
+ pos2 += outputWidth * outputHeight * outputDepth;
+ }
+ }
+ }
+ }
+ THTensor_(free)(input);
+}
+
+void THNN_(VolumetricUpSamplingTrilinear_updateGradInput)(
+ THNNState *state,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int nbatch,
+ int channels,
+ int inputDepth,
+ int inputHeight,
+ int inputWidth,
+ int outputDepth,
+ int outputHeight,
+ int outputWidth){
+
+ THNN_(VolumetricUpSamplingTrilinear_shapeCheck)
+ (NULL, gradOutput,
+ nbatch, channels,
+ inputDepth, inputHeight, inputWidth,
+ outputDepth, outputHeight, outputWidth);
+
+ THTensor_(resize5d)(gradInput, nbatch, channels, inputDepth, inputHeight, inputWidth);
+ THTensor_(zero)(gradInput);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ real *data1 = THTensor_(data)(gradInput);
+ real *data2 = THTensor_(data)(gradOutput);
+ channels = nbatch * channels;
+
+ // special case: same-size matching grids
+ if (inputDepth == outputDepth && inputHeight == outputHeight && inputWidth == outputWidth) {
+ for (int t2 = 0; t2 < outputDepth; ++t2) {
+ const int t1 = t2;
+ for (int h2 = 0; h2 < outputHeight; ++h2) {
+ const int h1 = h2;
+ for (int w2 = 0; w2 < outputWidth; ++w2) {
+ const int w1 = w2;
+ real* pos1 = &data1[t1 * inputHeight * inputWidth + h1 * inputWidth + w1];
+ const real* pos2 = &data2[t2 * outputHeight * outputWidth + h2 * outputWidth + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos1[0] += pos2[0];
+ pos1 += inputWidth * inputHeight * inputDepth;
+ pos2 += outputWidth * outputHeight * outputDepth;
+ }
+ }
+ }
+ }
+ return;
+ }
+ const float rdepth = (outputDepth > 1) ? (float)(inputDepth - 1)/(outputDepth - 1) : 0.f;
+ const float rheight = (outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f;
+ const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1)/(outputWidth - 1) : 0.f;
+ for (int t2 = 0; t2 < outputDepth; ++t2) {
+ const float t1r = rdepth * t2;
+ const int t1 = t1r;
+ const int t1p = (t1 < inputDepth - 1) ? 1 : 0;
+ const real t1lambda = t1r - t1;
+ const real t0lambda = (real)1. - t1lambda;
+ for (int h2 = 0; h2 < outputHeight; ++h2) {
+ const float h1r = rheight * h2;
+ const int h1 = h1r;
+ const int h1p = (h1 < inputHeight - 1) ? 1 : 0;
+ const real h1lambda = h1r - h1;
+ const real h0lambda = (real)1. - h1lambda;
+ for (int w2 = 0; w2 < outputWidth; ++w2) {
+ const float w1r = rwidth * w2;
+ const int w1 = w1r;
+ const int w1p = (w1 < inputWidth - 1) ? 1 : 0;
+ const real w1lambda = w1r - w1;
+ const real w0lambda = (real)1. - w1lambda;
+ real* pos1 = &data1[t1 * inputHeight * inputWidth + h1 * inputWidth + w1];
+ const real* pos2 = &data2[t2 * outputHeight * outputWidth + h2 * outputWidth + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos1[0] += t0lambda * h0lambda * w0lambda * pos2[0];
+ pos1[w1p] += t0lambda * h0lambda * w1lambda * pos2[0];
+ pos1[h1p * inputWidth] += t0lambda * h1lambda * w0lambda * pos2[0];
+ pos1[h1p * inputWidth + w1p] += t0lambda * h1lambda * w1lambda * pos2[0];
+ pos1[t1p * inputHeight * inputWidth] += t1lambda * h0lambda * w0lambda * pos2[0];
+ pos1[t1p * inputHeight * inputWidth + w1p] += t1lambda * h0lambda * w1lambda * pos2[0];
+ pos1[t1p * inputHeight * inputWidth + h1p * inputWidth] += t1lambda * h1lambda * w0lambda * pos2[0];
+ pos1[t1p * inputHeight * inputWidth + h1p * inputWidth + w1p] += t1lambda * h1lambda * w1lambda * pos2[0];
+ pos1 += inputWidth * inputHeight * inputDepth;
+ pos2 += outputWidth * outputHeight * outputDepth;
+ }
+ }
+ }
+ }
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/generic/unfold.c b/contrib/lua-torch/nn/lib/THNN/generic/unfold.c
new file mode 100644
index 000000000..14a73b567
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/generic/unfold.c
@@ -0,0 +1,166 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/unfold.c"
+#else
+
+/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */
+void THNN_(unfolded_acc)(
+ THTensor *finput,
+ THTensor *input,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ int nInputPlane,
+ int inputWidth,
+ int inputHeight,
+ int outputWidth,
+ int outputHeight)
+{
+ // This function assumes that
+ // outputHeight*dH does not overflow a long
+ // outputWidth*dW does not overflow a long
+
+ int nip;
+
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+#pragma omp parallel for private(nip)
+ for(nip = 0; nip < nInputPlane; nip++)
+ {
+ int kw, kh, y, x;
+ long ix, iy;
+ for(kh = 0; kh < kH; kh++)
+ {
+ for(kw = 0; kw < kW; kw++)
+ {
+ real *src = finput_data + nip*((size_t)kH*kW*outputHeight*outputWidth) + kh*((size_t)kW*outputHeight*outputWidth) + kw*((size_t)outputHeight*outputWidth);
+ real *dst = input_data + nip*((size_t)inputHeight*inputWidth);
+ if (padW > 0 || padH > 0) {
+ int lpad,rpad;
+ for(y = 0; y < outputHeight; y++) {
+ iy = (long)y*dH - padH + kh;
+ if (iy < 0 || iy >= inputHeight) {
+ } else {
+ if (dW==1){
+ ix = 0 - padW + kw;
+ lpad = fmaxf(0,padW-kw);
+ rpad = fmaxf(0,padW-(kW-kw-1));
+ real *dst_slice = dst+(size_t)iy*inputWidth+ix+lpad;
+ THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+lpad, 1, outputWidth - lpad - rpad); /* note: THVector_add could handle 1 value better */
+ }
+ else{
+ for (x=0; x<outputWidth; x++){
+ ix = (long)x*dW - padW + kw;
+ if (ix < 0 || ix >= inputWidth){
+ }else{
+ real *dst_slice = dst+(size_t)iy*inputWidth+ix;
+ THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+x, 1, 1);
+ }
+ }
+ }
+ }
+ }
+ } else {
+ for(y = 0; y < outputHeight; y++) {
+ iy = (long)y*dH + kh;
+ ix = 0 + kw;
+ if (dW == 1 ) {
+ real *dst_slice = dst+(size_t)iy*inputWidth+ix;
+ THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth, 1, outputWidth); /* note: THVector_add could handle 1 value better */
+ }else{
+ for(x = 0; x < outputWidth; x++) {
+ real *dst_slice = dst+(size_t)iy*inputWidth+ix+x*dW;
+ THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+x, 1, 1);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void THNN_(unfolded_copy)(
+ THTensor *finput,
+ THTensor *input,
+ int kW,
+ int kH,
+ int dW,
+ int dH,
+ int padW,
+ int padH,
+ int nInputPlane,
+ int inputWidth,
+ int inputHeight,
+ int outputWidth,
+ int outputHeight)
+{
+ // This function assumes that
+ // kH*kW does not overflow an int
+ // nInputPlane*kH*kW does not overflow a long
+ // outputHeight*dH does not overflow a long
+ // outputWidth*dW does not overflow a long
+
+ long k;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+#pragma omp parallel for private(k)
+ for(k = 0; k < (long)nInputPlane*kH*kW; k++) {
+ long nip = k / (kH*kW);
+ long rest = k % (kH*kW);
+ long kh = rest / kW;
+ long kw = rest % kW;
+ int x, y;
+ long ix, iy;
+ real *dst = finput_data + nip*((size_t)kH*kW*outputHeight*outputWidth) + kh*((size_t)kW*outputHeight*outputWidth) + kw*((size_t)outputHeight*outputWidth);
+ real *src = input_data + nip*((size_t)inputHeight*inputWidth);
+ if (padW > 0 || padH > 0) {
+ long lpad,rpad;
+ for(y = 0; y < outputHeight; y++) {
+ iy = (long)y*dH - padH + kh;
+ if (iy < 0 || iy >= inputHeight) {
+ memset(dst+(size_t)y*outputWidth, 0, sizeof(real)*outputWidth);
+ } else {
+ if (dW==1){
+ ix = 0 - padW + kw;
+ lpad = fmaxf(0,padW-kw);
+ rpad = fmaxf(0,padW-(kW-kw-1));
+ if (outputWidth-rpad-lpad <= 0) {
+ memset(dst+(size_t)y*outputWidth, 0, sizeof(real)*outputWidth);
+ } else {
+ if (lpad > 0) memset(dst+(size_t)y*outputWidth, 0, sizeof(real)*lpad);
+ memcpy(dst+(size_t)y*outputWidth+lpad, src+(size_t)iy*inputWidth+ix+lpad, sizeof(real)*(outputWidth-rpad-lpad));
+ if (rpad > 0) memset(dst+(size_t)y*outputWidth + outputWidth - rpad, 0, sizeof(real)*rpad);
+ }
+ }
+ else{
+ for (x=0; x<outputWidth; x++){
+ ix = (long)x*dW - padW + kw;
+ if (ix < 0 || ix >= inputWidth)
+ memset(dst+(size_t)y*outputWidth+x, 0, sizeof(real)*1);
+ else
+ memcpy(dst+(size_t)y*outputWidth+x, src+(size_t)iy*inputWidth+ix, sizeof(real)*(1));
+ }
+ }
+ }
+ }
+ } else {
+ for(y = 0; y < outputHeight; y++) {
+ iy = (long)y*dH + kh;
+ ix = 0 + kw;
+ if (dW == 1)
+ memcpy(dst+(size_t)y*outputWidth, src+(size_t)iy*inputWidth+ix, sizeof(real)*outputWidth);
+ else{
+ for (x=0; x<outputWidth; x++)
+ memcpy(dst+(size_t)y*outputWidth+x, src+(size_t)iy*inputWidth+ix+(long)x*dW, sizeof(real)*(1));
+ }
+ }
+ }
+ }
+}
+
+#endif
diff --git a/contrib/lua-torch/nn/lib/THNN/init.c b/contrib/lua-torch/nn/lib/THNN/init.c
new file mode 100644
index 000000000..5c8c023dc
--- /dev/null
+++ b/contrib/lua-torch/nn/lib/THNN/init.c
@@ -0,0 +1,280 @@
+#include "TH.h"
+#include "THNN.h"
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
+
+#define THNN_CHECK_SHAPE(I1, I2) \
+ if (I1 != NULL && I2 != NULL && !THTensor_(isSameSizeAs)(I1, I2)) \
+ { \
+ THDescBuff s1 = THTensor_(sizeDesc)(I1); \
+ THDescBuff s2 = THTensor_(sizeDesc)(I2); \
+ THError(#I1 " and " #I2 " shapes do not match: " \
+ #I1 " %s, " #I2 " %s", s1.str, s2.str); \
+ }
+
+#define THNN_CHECK_SHAPE_INDICES(I1, I2) \
+ THLongStorage *size2 = THLongTensor_newSizeOf(I2); \
+ if (I1 != NULL && I2 != NULL && !THTensor_(isSize)(I1, size2)) \
+ { \
+ THDescBuff s1 = THTensor_(sizeDesc)(I1); \
+ THDescBuff s2 = THLongTensor_sizeDesc(I2); \
+ THLongStorage_free(size2); \
+ THError(#I1 " and " #I2 " shapes do not match: " \
+ #I1 " %s, " #I2 " %s", s1.str, s2.str); \
+ } else { \
+ THLongStorage_free(size2); \
+ }
+
+#define THNN_CHECK_NELEMENT(I1, I2) \
+ if (I1 != NULL && I2 != NULL ) { \
+ ptrdiff_t n1 = THTensor_(nElement)(I1); \
+ ptrdiff_t n2 = THTensor_(nElement)(I2); \
+ if (n1 != n2) \
+ { \
+ THDescBuff s1 = THTensor_(sizeDesc)(I1); \
+ THDescBuff s2 = THTensor_(sizeDesc)(I2); \
+ THError(#I1 " and " #I2 " have different number of elements: " \
+ #I1 "%s has %ld elements, while " \
+ #I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \
+ } \
+ }
+
+#define THNN_CHECK_DIM_SIZE(T, DIM, DIM_SIZE, SIZE) \
+ if (THTensor_(nDimension)(T) != DIM || \
+ THTensor_(size)(T, DIM_SIZE) != SIZE) { \
+ THDescBuff s1 = THTensor_(sizeDesc)(T); \
+ THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \
+ " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
+ }
+
+#define THNN_CHECK_DIM_SIZE_INDICES(T, DIM, DIM_SIZE, SIZE) \
+ if (THIndexTensor_(nDimension)(T) != DIM || \
+ THIndexTensor_(size)(T, DIM_SIZE) != SIZE) { \
+ THDescBuff s1 = THIndexTensor_(sizeDesc)(T); \
+ THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \
+ " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
+ }
+
+#define THNN_ARGCHECK(COND, ARG, T, FORMAT) \
+ if (!(COND)) { \
+ THDescBuff s1 = THTensor_(sizeDesc)(T); \
+ THArgCheck(COND, ARG, FORMAT, s1.str); \
+ }
+
+#include "generic/Abs.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/AbsCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/BCECriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/ClassNLLCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialClassNLLCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/DistKLDivCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/ELU.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/HardShrink.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/HardTanh.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/GatedLinearUnit.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/L1Cost.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LeakyReLU.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/FusedRNNKernel.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LogSigmoid.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LogSoftMax.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LookupTable.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MSECriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MultiLabelMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MultiMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Linear.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/PReLU.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/RReLU.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Sigmoid.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SmoothL1Criterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftMax.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftPlus.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftShrink.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SparseLinear.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/IndexLinear.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Sqrt.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Square.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Tanh.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Threshold.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalSubSampling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalRowConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/BatchNormalization.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/unfold.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolutionMap.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolutionMM.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialDepthWiseConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolutionLocal.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialFullConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialFullConvolutionMap.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialDilatedConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialAdaptiveMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialAdaptiveAveragePooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialAveragePooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialFractionalMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialDilatedMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialMaxUnpooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialSubSampling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialUpSamplingNearest.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialUpSamplingBilinear.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricAveragePooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricConvolutionMM.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricFullConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricDilatedConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricDilatedMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricFractionalMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricMaxUnpooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialReflectionPadding.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialReplicationPadding.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricReplicationPadding.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricUpSamplingNearest.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricUpSamplingTrilinear.c"
+#include "THGenerateFloatTypes.h"
+
diff --git a/contrib/lua-torch/nn/mkdocs.yml b/contrib/lua-torch/nn/mkdocs.yml
new file mode 100644
index 000000000..a37a34fb0
--- /dev/null
+++ b/contrib/lua-torch/nn/mkdocs.yml
@@ -0,0 +1,18 @@
+site_name: nn
+theme : simplex
+repo_url : https://github.com/torch/nn
+use_directory_urls : false
+markdown_extensions: [extra]
+docs_dir : doc
+pages:
+- [index.md, Home]
+- [module.md, Modules, Module Interface]
+- [containers.md, Modules, Containers]
+- [transfer.md, Modules, Transfer Functions]
+- [simple.md, Modules, Simple Layers]
+- [table.md, Modules, Table Layers]
+- [convolution.md, Modules, Convolution Layers]
+- [criterion.md, Criterion, Criterions]
+- [overview.md, Additional Documentation, Overview]
+- [training.md, Additional Documentation, Training]
+- [testing.md, Additional Documentation, Testing]
diff --git a/contrib/lua-torch/nn/test.lua b/contrib/lua-torch/nn/test.lua
new file mode 100755
index 000000000..4e3f627fc
--- /dev/null
+++ b/contrib/lua-torch/nn/test.lua
@@ -0,0 +1,8787 @@
+-- you can easily test specific units like this:
+-- th -lnn -e "nn.test{'LookupTable'}"
+-- th -lnn -e "nn.test{'LookupTable', 'Add'}"
+
+local mytester = torch.Tester()
+local jac
+local sjac
+
+local precision = 1e-5
+local expprecision = 1.1e-4
+
+local nntest = torch.TestSuite()
+
+local function equal(t1, t2, msg)
+ if (torch.type(t1) == "table") then
+ for k, v in pairs(t2) do
+ equal(t1[k], t2[k], msg)
+ end
+ else
+ mytester:eq(t1, t2, 0.00001, msg)
+ end
+end
+
+
+--[[ Generate tests to exercise the tostring component of modules. ]]
+local tostringTestModules = {
+ nnLinear = nn.Linear(1, 2),
+ nnReshape = nn.Reshape(10),
+ nnSpatialZeroPadding = nn.SpatialZeroPadding(1, 1, 1, 1)}
+for test_name, component in pairs(tostringTestModules) do
+ nntest['tostring' .. test_name] =
+ function ()
+ mytester:assert(tostring(component):find(
+ torch.type(component) .. '(', 1, true) ~= nil,
+ 'nn components should have a descriptive tostring' ..
+ ' beginning with the classname')
+ end
+end
+
+function nntest.Add()
+ local inj_vals = {math.random(3,5), 1} -- Also test the inj = 1 spatial case
+ local ini = math.random(3,5)
+ local ink = math.random(3,5)
+
+ for ind, inj in pairs(inj_vals) do
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Add(ini,inj,ink)
+
+ -- 1D
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format('error on bias [%s]', t))
+ end
+
+ -- 2D
+ local nframe = math.random(50,70)
+ local input = torch.Tensor(nframe, ini,inj,ink):zero()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format('error on bias [%s]', t))
+ end
+
+ -- IO
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+ end
+end
+
+function nntest.Bottle()
+ local ini = 2
+ local inj = 3
+ local ink = 4
+ local out = 5
+ local input = torch.Tensor(ini,inj,ink):normal()
+ local linear = nn.Linear(ink, out)
+ local module1 = nn.Bottle(linear)
+ local module2 = nn.Sequential()
+ module2:add(nn.View(ini*inj, ink))
+ module2:add(linear)
+ module2:add(nn.View(ini, inj, out))
+ local output1 = module1:forward(input)
+ local output2 = module2:forward(input)
+ mytester:eq(output1, output2, 0.0001, 'Bottle output not the same as Module')
+
+ local shape = {4, 5, 6, 7, 8, 1, 3}
+ local input = torch.Tensor(table.unpack(shape)):normal()
+ local module = nn.Sequential()
+ module:add(nn.Squeeze(2))
+ module:add(nn.Linear(3, 3))
+ local module1 = nn.Bottle(module, 3, 2)
+ local outShape = {4, 5, 6, 7, 8, 3}
+ local module2 = nn.Sequential()
+ module2:add(nn.View(4*5*6*7*8, 1, 3))
+ module2:add(module)
+ module2:add(nn.View(table.unpack(outShape)))
+ local output1 = module1:forward(input)
+ local grad = torch.Tensor(output1:size()):normal()
+ local gradOutput1 = module1:backward(input, grad):clone()
+ local output2 = module2:forward(input)
+ local gradOutput2 = module2:backward(input, grad):clone()
+ mytester:eq(output1, output2, 0.0001, 'Bottle output not the same as Module')
+ mytester:eq(gradOutput1, gradOutput2, 0.0001, 'Bottle gradOutput not the same as Module')
+end
+
+function nntest.WeightNorm()
+ local input = torch.rand(10, 5)
+
+ -- temporal convolution
+ local model = nn.WeightNorm(nn.TemporalConvolution(5, 20, 2, 1))
+ local err = nn.Jacobian.testJacobianParameters(model, input,
+ model.bias, model.gradBias)
+ mytester:assert(err < precision, 'Temporal Convolution bias')
+ err = nn.Jacobian.testJacobianParameters(model, input,
+ model.g, model.gradG)
+ mytester:assert(err < precision, 'Temporal Convolution g')
+ err = nn.Jacobian.testJacobianParameters(model, input,
+ model.v, model.gradV)
+ mytester:assert(err < precision, 'Temporal Convolution v')
+
+ -- linear
+ model = nn.WeightNorm(nn.Linear(5, 20))
+ err = nn.Jacobian.testJacobianParameters(model, input,
+ model.bias, model.gradBias)
+ mytester:assert(err < precision, 'Linear bias')
+ err = nn.Jacobian.testJacobianParameters(model, input, model.g, model.gradG)
+ mytester:assert(err < precision, 'Linear g')
+ err = nn.Jacobian.testJacobianParameters(model, input,
+ model.v, model.gradV)
+ mytester:assert(err < precision, 'Linear v')
+
+ -- euclidean with weight but no bias
+ input = torch.rand(10, 5)
+ model = nn.WeightNorm(nn.Euclidean(5, 20))
+ err = nn.Jacobian.testJacobianParameters(model, input, model.g, model.gradG)
+ mytester:assert(err < precision, 'Euclidean g')
+ err = nn.Jacobian.testJacobianParameters(model, input,
+ model.v, model.gradV)
+ mytester:assert(err < precision, 'Euclidean v')
+
+ -- spatial convolution with 4D weights
+ input = torch.rand(5, 10, 10)
+ model = nn.WeightNorm(nn.SpatialConvolution(5, 20, 2, 2, 3, 3, 1, 1), 2)
+ err = nn.Jacobian.testJacobianParameters(model, input,
+ model.bias, model.gradBias)
+ mytester:assert(err < precision, 'Spatial Convolution bias')
+ err = nn.Jacobian.testJacobianParameters(model, input,
+ model.g, model.gradG)
+ mytester:assert(err < precision, 'Spatial Convolution g')
+ err = nn.Jacobian.testJacobianParameters(model, input,
+ model.v, model.gradV)
+ mytester:assert(err < precision, 'Spatial Convolution v')
+
+ -- linear save/load
+ model = nn.WeightNorm(nn.Linear(5, 20))
+ input = torch.rand(10, 5)
+ local out = model:forward(input)
+ local modelr = torch.deserialize(torch.serialize(model))
+ local outr = modelr:forward(input)
+ mytester:assertTensorEq(out, outr)
+end
+
+function nntest.LinearWeightNorm()
+ local input = torch.rand(10, 5)
+ local model = nn.LinearWeightNorm(5, 20)
+
+ -- check gradient
+ local err = nn.Jacobian.testJacobianParameters(model, input, model.bias, model.gradBias)
+ mytester:assert(err < precision, 'bias')
+ err = nn.Jacobian.testJacobianParameters(model, input, model.g, model.gradG)
+ mytester:assert(err < precision, 'g')
+ err = nn.Jacobian.testJacobianParameters(model, input, model.v, model.gradV)
+ mytester:assert(err < precision, 'v')
+
+ -- check conversion functions
+ local linear = nn.Linear(5,20)
+ local wnFromLin = nn.LinearWeightNorm.fromLinear(linear)
+ local linFromWn = wnFromLin:toLinear()
+
+ local linOut = linear:forward(input)
+ local wnOut = wnFromLin:forward(input)
+ local linFromWnOut = linFromWn:forward(input)
+
+ mytester:assertTensorEq(linOut, wnOut, precision, "outputs are not equivalent")
+ mytester:assertTensorEq(wnOut, linFromWnOut, precision, "outputs are not equivalent")
+
+ -- check conversion with nobias
+ linear = nn.Linear(5,20,false)
+ wnFromLin = nn.LinearWeightNorm.fromLinear(linear)
+ linFromWn = wnFromLin:toLinear()
+
+ linOut = linear:forward(input)
+ wnOut = wnFromLin:forward(input)
+ linFromWnOut = linFromWn:forward(input)
+
+ mytester:assertTensorEq(linear.weight, wnFromLin.weight, precision, "weights are not equivalent")
+ mytester:assert(not wnFromLin.bias)
+ mytester:assert(not linear.bias)
+ mytester:assertTensorEq(linOut, wnOut, precision, "outputs are not equivalent")
+ mytester:assertTensorEq(wnOut, linFromWnOut, precision, "outputs are not equivalent")
+
+ -- check gradient with nobias
+ model = wnFromLin
+
+ err = nn.Jacobian.testJacobianParameters(model, input, model.g, model.gradG)
+ mytester:assert(err < precision, 'g')
+ err = nn.Jacobian.testJacobianParameters(model, input, model.v, model.gradV)
+ mytester:assert(err < precision, 'v')
+end
+
+function nntest.CAdd()
+ local function testBackwardPass(module, input, params, dparams)
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, "error computing gradiens w.r.t. inputs")
+
+ err = jac.testJacobianParameters(module, input, params, dparams)
+ mytester:assertlt(err,precision, "error computing gradients w.r.t params")
+
+ err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, "error in update using gradients w.r.t parameters")
+
+ --Test all of the various update methods
+ for test, err in pairs(jac.testAllUpdate(module, input, "bias", "gradBias")) do
+ mytester:assertlt(err, precision, string.format("error on bias [%s]", test))
+ end
+ end
+
+ local function testModuleIO(module, input)
+ local fwdErr,bkwdErr = jac.testIO(module,input)
+ mytester:asserteq(fwdErr, 0, torch.typename(module) .. " - i/o forward err ")
+ mytester:asserteq(bkwdErr, 0, torch.typename(module) .. " - i/o backward err ")
+ end
+
+ local function testCAddWithNonBatchedInput()
+ local channels = math.random(3,5)
+ local width = math.random(3,5)
+ local height = math.random(3,5)
+
+ local input = torch.Tensor(channels, height, width):zero()
+
+ --Per channel bias
+ local module = nn.CAdd(channels, 1, 1)
+ local params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ local output = module:forward(input)
+ mytester:assert(output:isSameSizeAs(input))
+
+ for i = 1, module.bias:view(-1):size(1) do
+ local bias = module.bias:view(-1)[i]
+ local result = output[i]:view(-1)
+ local expectedResult = torch.Tensor({bias}):expandAs(result)
+ mytester:assertTensorEq(result, expectedResult, precision)
+ end
+
+ --Per row bias
+ module = nn.CAdd(1, height, 1)
+ params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ output = module:forward(input)
+ mytester:assert(output:isSameSizeAs(input))
+
+ for i = 1, module.bias:view(-1):size(1) do
+ local bias = module.bias:view(-1)[i]
+ local result = output[{{}, {i}, {}}]:contiguous():view(-1)
+ local expectedResult = torch.Tensor({bias}):expandAs(result)
+ mytester:assertTensorEq(result, expectedResult, precision)
+ end
+
+ --Per column bias
+ module = nn.CAdd(1, 1, width)
+ params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ output = module:forward(input)
+ mytester:assert(output:isSameSizeAs(input))
+
+ for i = 1, module.bias:view(-1):size(1) do
+ local bias = module.bias:view(-1)[i]
+ local result = output[{{}, {}, {i}}]:contiguous():view(-1)
+ local expectedResult = torch.Tensor({bias}):expandAs(result)
+ mytester:assertTensorEq(result, expectedResult, precision)
+ end
+
+ --Per input component bias
+ module = nn.CAdd(channels, height, width)
+ params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ output = module:forward(input)
+
+ mytester:assert(output:isSameSizeAs(input))
+ mytester:assert(module.bias:isSameSizeAs(input))
+ mytester:assertTensorEq(module.bias, output, precision)
+
+ testModuleIO(module, input)
+ end
+
+ local function testCAddWithBatchedInput()
+ local batchSize = math.random(3,5)
+ local channels = math.random(3,5)
+ local width = math.random(3,5)
+ local height = math.random(3,5)
+
+ local input = torch.Tensor(batchSize, channels, height, width):zero()
+ local module = nn.CAdd(batchSize, channels, height, width)
+
+ --Per batch bias
+ local module = nn.CAdd(batchSize, 1, 1, 1)
+ local params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ local output = module:forward(input)
+ mytester:assert(output:isSameSizeAs(input))
+
+ for i = 1, module.bias:view(-1):size(1) do
+ local bias = module.bias:view(-1)[i]
+ local result = output[i]:view(-1)
+ local expectedResult = torch.Tensor({bias}):expandAs(result)
+ mytester:assertTensorEq(result, expectedResult, precision)
+ end
+
+ --Per channel bias
+ module = nn.CAdd(1, channels, 1, 1)
+ params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ output = module:forward(input)
+ mytester:assert(output:isSameSizeAs(input))
+
+ for i = 1, module.bias:view(-1):size(1) do
+ local bias = module.bias:view(-1)[i]
+ local result = output[{{}, {i}, {}, {}}]:contiguous():view(-1)
+ local expectedResult = torch.Tensor({bias}):expandAs(result)
+ mytester:assertTensorEq(result, expectedResult, precision)
+ end
+
+ --Per row bias
+ module = nn.CAdd(1, 1, height, 1)
+ params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ output = module:forward(input)
+ mytester:assert(output:isSameSizeAs(input))
+
+ for i = 1, module.bias:view(-1):size(1) do
+ local bias = module.bias:view(-1)[i]
+ local result = output[{{}, {}, {i}, {}}]:contiguous():view(-1)
+ local expectedResult = torch.Tensor({bias}):expandAs(result)
+ mytester:assertTensorEq(result, expectedResult, precision)
+ end
+
+ --Per column bias
+ module = nn.CAdd(1, 1, 1, width)
+ params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ output = module:forward(input)
+ mytester:assert(output:isSameSizeAs(input))
+
+ for i = 1, module.bias:view(-1):size(1) do
+ local bias = module.bias:view(-1)[i]
+ local result = output[{{}, {}, {}, {i}}]:contiguous():view(-1)
+ local expectedResult = torch.Tensor({bias}):expandAs(result)
+ mytester:assertTensorEq(result, expectedResult, precision)
+ end
+
+ --Per input component bias
+ module = nn.CAdd(batchSize, channels, height, width)
+ params, gradParams = module:getParameters()
+
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ output = module:forward(input)
+
+ mytester:assert(output:isSameSizeAs(input))
+ mytester:assert(module.bias:isSameSizeAs(input))
+ mytester:assertTensorEq(module.bias, output, precision)
+
+ testModuleIO(module, input)
+ end
+
+
+ local function testCAddWithLessDimsThanInput()
+ local input = torch.rand(4,5)
+ local module = nn.CAdd(5)
+ local params, gradParams = module:getParameters()
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ local output = module:forward(input)
+ local expandedBias = module.bias:view(1,5):expand(4,5):clone()
+ mytester:assert(output:isSameSizeAs(input))
+ mytester:assertTensorEq(expandedBias, output, precision)
+
+ testModuleIO(module, input)
+
+ input = torch.rand(4,5,6)
+ module = nn.CAdd(5,6)
+ params, gradParams = module:getParameters()
+ testBackwardPass(module, input, params, gradParams)
+
+ input:zero()
+ local output = module:forward(input)
+ expandedBias = module.bias:view(1,5,6):expand(4,5,6):clone()
+ mytester:assert(output:isSameSizeAs(input))
+ mytester:assertTensorEq(expandedBias, output, precision)
+
+ testModuleIO(module, input)
+ end
+
+
+ testCAddWithNonBatchedInput()
+ testCAddWithBatchedInput()
+ testCAddWithLessDimsThanInput()
+end
+
+function nntest.CMul()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local inl = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.CMul(1, ini, inj, ink, 1)
+
+ -- 1D
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ -- 2D
+ local nframe = math.random(3,14)
+ local input = torch.randn(nframe, ini,inj,ink)
+ local output = module:forward(input)
+ local output2 = torch.cmul(input, module.weight:view(1,ini,inj,ink):expandAs(input))
+ mytester:assertTensorEq(output2, output, 0.000001, 'CMul forward 2D err')
+
+ module:zeroGradParameters()
+ local gradWeight = module.gradWeight:clone()
+ local gradInput = module:backward(input, output)
+ local gradInput2 = gradInput:clone():zero()
+ local outputView = output:view(input:size(1), -1)
+ gradInput2:view(input:size(1), -1):addcmul(1, module.weight:view(1,-1):expandAs(outputView), outputView)
+ mytester:assertTensorEq(gradInput2, gradInput, 0.000001, 'CMul updateGradInput 2D err')
+ mytester:assert(gradInput:isSameSizeAs(input), 'CMul gradInput 2D size err')
+
+ local inputView = input:view(nframe, -1)
+ local gradWeightView = gradWeight:view(1, -1)
+ for i=1,nframe do
+ gradWeightView:addcmul(1, inputView[i], outputView[i])
+ end
+ mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'CMul accGradParameters 2D err')
+ mytester:assert(module.weight:isSameSizeAs(module.gradWeight), 'CMul gradWeight size err')
+
+ -- Expansion
+ input = torch.randn(nframe, ini,inj,ink,inl)
+ output = module:forward(input)
+ output2 = torch.cmul(input, module.weight:expandAs(input))
+ mytester:assertTensorEq(output2, output, 0.000001, 'CMul forward expand err')
+
+ module:zeroGradParameters()
+ gradWeight:zero()
+ gradInput = module:backward(input, output)
+ gradInput2 = gradInput:clone():zero()
+ gradInput2:addcmul(1, module.weight:expandAs(output), output)
+ mytester:assertTensorEq(gradInput2, gradInput, 0.000001, 'CMul updateGradInput expansion err')
+ mytester:assert(gradInput:isSameSizeAs(input), 'CMul gradInput expand size err')
+
+ for i=1,nframe do
+ -- 4 is the [non-batch] singleton dim
+ gradWeight:add(torch.cmul(input[i], output[i]):sum(4))
+ end
+ mytester:assertTensorEq(gradWeight:sum(5), module.gradWeight, 0.000001, 'CMul accGradParameters expand err')
+ mytester:assert(module.weight:isSameSizeAs(module.gradWeight), 'CMul accGradParameters expand size err')
+
+ input:zero()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format('error on weight [%s]', t))
+ end
+
+ -- Non-contiguous input or gradOutput
+ local testModule = nn.CMul(4, 3, 5)
+ local testInput = torch.rand(10, 3, 5):resize(10, 1, 3, 5):expand(10, 4, 3, 5)
+ local testOutput = testModule:forward(testInput)
+
+ mytester:assert(testOutput:isSameSizeAs(testInput), 'CMul non-contiguous forward err')
+
+ local testGradOutput = torch.rand(10, 3, 5):resize(10, 1, 3, 5):expand(10, 4, 3, 5)
+ testOutput = testModule:forward(testInput)
+ local testGradInput = testModule:backward(testOutput, testGradOutput)
+
+ mytester:assert(testGradInput:isSameSizeAs(testGradOutput), 'CMul non-contiguous backward err')
+
+ -- IO
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Contiguous()
+ local module = nn.Contiguous()
+
+ -- Contiguous input
+ local input = torch.rand(30,20,10)
+ local output = module:forward(input)
+
+ mytester:assert(output:ne(input):sum() == 0, 'output not equal to input')
+
+ -- Make input non-contiguous
+ local input2 = output:transpose(1,2)
+ local output2 = module:forward(input2)
+
+ mytester:assert(output2:ne(output:contiguous()):sum() == 0, 'output not equal to input')
+end
+
+function nntest.Dropout()
+ local p = 0.2 --prob of droping out a neuron
+ local input = torch.Tensor(1000):fill((1-p))
+ local module = nn.Dropout(p)
+ -- version 2
+ local output = module:forward(input)
+ mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output')
+ local gradInput = module:backward(input, input)
+ mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput')
+ -- test inplace version
+ local module = nn.Dropout(p,nil,true)
+ local output = module:forward(input:clone())
+ mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output')
+ local gradInput = module:backward(input:clone(), input:clone())
+ mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput')
+
+ -- version 1 (old nnx version)
+ local input = input:fill(1)
+ local module = nn.Dropout(p,true)
+ local output = module:forward(input)
+ mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output')
+ local gradInput = module:backward(input, input)
+ mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput')
+end
+
+function nntest.SpatialDropout()
+ local p = 0.2 --prob of dropiing out a neuron
+ local w = math.random(1,5)
+ local h = math.random(1,5)
+ local nfeats = 1000
+ local input = torch.Tensor(nfeats, w, h):fill(1)
+ local module = nn.SpatialDropout(p)
+ module.train = true
+ local output = module:forward(input)
+ mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output')
+ local gradInput = module:backward(input, input)
+ mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput')
+end
+
+function nntest.SpatialDropoutBatch()
+ local p = 0.2 --prob of dropiing out a neuron
+ local bsz = math.random(1,5)
+ local w = math.random(1,5)
+ local h = math.random(1,5)
+ local nfeats = 1000
+ local input = torch.Tensor(bsz, nfeats, w, h):fill(1)
+ local module = nn.SpatialDropout(p)
+ module.train = true
+ local output = module:forward(input)
+ mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output')
+ local gradInput = module:backward(input, input)
+ mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput')
+end
+
+function nntest.VolumetricDropout()
+ local p = 0.2 --prob of dropiing out a neuron
+ local t = math.random(1,5)
+ local w = math.random(1,5)
+ local h = math.random(1,5)
+ local nfeats = 1000
+ local input = torch.Tensor(nfeats, t, w, h):fill(1)
+ local module = nn.VolumetricDropout(p)
+ module.train = true
+ local output = module:forward(input)
+ mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output')
+ local gradInput = module:backward(input, input)
+ mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput')
+end
+
+function nntest.VolumetricDropoutBatch()
+ local p = 0.2 --prob of dropiing out a neuron
+ local bsz = math.random(1,5)
+ local t = math.random(1,5)
+ local w = math.random(1,5)
+ local h = math.random(1,5)
+ local nfeats = 1000
+ local input = torch.Tensor(bsz, nfeats, t, w, h):fill(1)
+ local module = nn.VolumetricDropout(p)
+ module.train = true
+ local output = module:forward(input)
+ mytester:assert(math.abs(output:mean() - (1-p)) < 0.05, 'dropout output')
+ local gradInput = module:backward(input, input)
+ mytester:assert(math.abs(gradInput:mean() - (1-p)) < 0.05, 'dropout gradInput')
+end
+
+function nntest.ReLU()
+ local input = torch.randn(3,4)
+ local gradOutput = torch.randn(3,4)
+ local module = nn.ReLU()
+ local output = module:forward(input)
+ local output2 = input:clone():gt(input, 0):cmul(input)
+ mytester:assertTensorEq(output, output2, 0.000001, 'ReLU output')
+ local gradInput = module:backward(input, gradOutput)
+ local gradInput2 = input:clone():gt(input, 0):cmul(gradOutput)
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ReLU gradInput')
+end
+
+function nntest.ReLU6()
+ for inplace = 0, 1 do
+ local input = torch.randn(3, 4):mul(6)
+ local gradOutput = torch.randn(3,4)
+ local module = nn.ReLU6(inplace == 1)
+ local output = module:forward(input:clone())
+ local gt = input:clone():gt(input, 0)
+ local lt = input:clone():lt(input, 6)
+ local output2 = gt:clone():cmul(lt):cmul(input)
+ output2:add(6, input:clone():gt(input, 6))
+ mytester:assertTensorEq(output, output2, 0.000001, 'ReLU6 output '..(inplace and '(inplace)' or '') )
+ local gradInput = module:backward(input, gradOutput:clone())
+ local gradInput2 = gt:clone():cmul(lt):cmul(gradOutput)
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ReLU gradInput '..(inplace and '(inplace)' or '') )
+ end
+end
+
+function nntest.GatedLinearUnit()
+ local model = nn.GatedLinearUnit()
+ local t = torch.Tensor({{1, 1}, {2, 2}, {3, 3}})
+ local thalf = torch.Tensor():resizeAs(t):copy(t):narrow(2, 1, 1)
+ mytester:assertTensorEq(
+ thalf:cmul(torch.sigmoid(thalf)),
+ model:forward(t):resizeAs(thalf),
+ 0.000001,
+ 'Gated Linear output'
+ )
+ t = torch.Tensor({{1, 1, 1, 1}, {2, 2, 2, 2}, {3, 3, 3, 3}})
+ thalf = torch.Tensor():resizeAs(t):copy(t):narrow(2, 1, 2)
+ mytester:assertTensorEq(
+ thalf:cmul(torch.sigmoid(thalf)),
+ model:forward(t),
+ 0.000001,
+ 'Gated Linear Unit output'
+ )
+
+ local input = torch.rand(1, 10)
+ local err = jac.testJacobian(model, input)
+ mytester:assert(err < precision, 'Gated Linear gradient')
+
+ input = torch.rand(5, 10, 6)
+ model = nn.GatedLinearUnit(2)
+ err = jac.testJacobian(model, input)
+ mytester:assert(err < precision, 'Gated Linear gradient, non-default dim')
+
+ input = torch.rand(5, 10, 6)
+ model = nn.GatedLinearUnit(3)
+ err = jac.testJacobian(model, input)
+ mytester:assert(err < precision, 'Gated Linear gradient, non-default dim')
+
+ input = torch.rand(5, 10)
+ model = nn.Sequential()
+ model:add(nn.Linear(10, 10))
+ model:add(nn.GatedLinearUnit())
+ model:add(nn.ReLU())
+ model:add(nn.LogSoftMax())
+ err = jac.testJacobian(model, input)
+ mytester:assert(err < precision, 'Gated Linear gradient with other layers')
+end
+
+function nntest.CReLU()
+ local function _verifyCReLU(featureMaps, concatenatedFeatureMaps)
+ local rectifiedFeatureMaps = nn.ReLU():forward(featureMaps)
+ local rectifiedNegFeatureMaps = nn.ReLU():forward(-featureMaps)
+
+ mytester:asserteq(concatenatedFeatureMaps:size(1), featureMaps:size(1) * 2,
+ "CReLU should double the number of feature maps")
+
+ for i = 1, rectifiedFeatureMaps:size(1) do
+ local found = false
+ for j = 1, concatenatedFeatureMaps:size(1) do
+ found = found or rectifiedFeatureMaps[i]:equal(concatenatedFeatureMaps[j])
+ end
+ mytester:assert(found, "Original (rectified) feature maps should be in the output of CReLU")
+ end
+
+ for i = 1, rectifiedNegFeatureMaps:size(1) do
+ local found = false
+ for j = 1, concatenatedFeatureMaps:size(1) do
+ found = found or rectifiedFeatureMaps[i]:equal(concatenatedFeatureMaps[j])
+ end
+ mytester:assert(found, "The negative of the original (rectified) feature maps should be in the output of CReLU")
+ end
+ end
+
+ local model = nn.Sequential()
+ model:add(nn.SpatialConvolution(1, 3, 3, 3, 1, 1, 1, 1))
+
+ for _, inplace in pairs({true, false}) do
+ --batched
+ local crelu = nn.CReLU(3, inplace)
+ local input = torch.Tensor(2, 1, 20, 20):uniform()
+ local featureMaps = model:forward(input)
+ local concatenatedFeatureMaps = crelu:forward(featureMaps)
+ for i = 1, input:size(1) do
+ _verifyCReLU(featureMaps[i], concatenatedFeatureMaps[i])
+ end
+
+ --non-batched
+ local input = torch.Tensor(1, 20, 20):uniform()
+ local featureMaps = model:forward(input)
+ local concatenatedFeatureMaps = crelu:forward(featureMaps)
+ _verifyCReLU(featureMaps, concatenatedFeatureMaps)
+ end
+
+ --test gradients w.r.t input
+ local jac = nn.Jacobian
+
+ for _, inplace in pairs({true, false}) do
+ local crelu = nn.CReLU(3, inplace)
+ --batched
+ local input = torch.Tensor(2, 3, 20, 20):uniform()
+ local err = jac.testJacobian(crelu, input)
+ mytester:assertlt(err, precision, "error computing gradients w.r.t. inputs")
+
+ --I/O
+ local fwdErr,bkwdErr = jac.testIO(crelu,input)
+ mytester:asserteq(fwdErr, 0, torch.typename(crelu) .. " - i/o forward err ")
+ mytester:asserteq(bkwdErr, 0, torch.typename(crelu) .. " - i/o backward err ")
+
+ --non-batched
+ input = torch.Tensor(3, 20, 20):uniform()
+ err = jac.testJacobian(crelu,input)
+ mytester:assertlt(err, precision, "error computing gradients w.r.t. inputs")
+
+ --I/O
+ local fwdErr,bkwdErr = jac.testIO(crelu,input)
+ mytester:asserteq(fwdErr, 0, torch.typename(crelu) .. " - i/o forward err ")
+ mytester:asserteq(bkwdErr, 0, torch.typename(crelu) .. " - i/o backward err ")
+ end
+
+end
+
+function nntest.Exp()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Exp()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Log()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Log()
+
+ local err = jac.testJacobian(module,input, 0.1, 10)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input, 0.1, 10)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.HardTanh()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.HardTanh()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision , 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test inclusive bounds -- HardTahn(1,inf) should behave like Threshold(1)
+ local input = torch.Tensor({1})
+ local gradOutput = torch.Tensor({1})
+ local gradOutputClone = gradOutput:clone()
+ local module = nn.HardTanh(1, math.huge, true)
+ local tanhGradInput = module:backward(input, gradOutput)
+
+ local input = input:clone()
+ local gradOutput = gradOutputClone
+ local module = nn.Threshold(1, 0, true)
+ local threshGradInput = module:backward(input, gradOutput)
+ mytester:assertTensorEq(tanhGradInput, threshGradInput, 0.000001, 'HardTanh gradInput')
+end
+
+function nntest.Clamp()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local max_value = math.abs(math.random())
+ local min_value = -math.abs(math.random())
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Clamp(min_value, max_value)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision , 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Abs()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Abs()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision , 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Threshold()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Threshold(torch.uniform(-2,2),torch.uniform(-2,2))
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.ELU()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.ELU(0.3)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision , 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.ELUIP()
+ local input = torch.randn(3,4)
+ local input2 = input:clone()
+ local gradOutput = torch.randn(3,4)
+ local gradOutput2 = gradOutput:clone()
+
+ -- Compare in-place to not in-place
+ local module = nn.ELU(0.3, true)
+ local module2 = nn.ELU(0.3, false)
+
+ local output = module:forward(input)
+ local output2 = module2:forward(input2)
+ mytester:assertTensorEq(output, output2, 0.000001, 'ELU output')
+ local gradInput = module:backward(input, gradOutput)
+ local gradInput2 = module2:backward(input2, gradOutput2)
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ELU gradInput')
+end
+
+function nntest.PReLU()
+ local ini = math.random(3,5)
+ local input = torch.Tensor(ini):zero()
+
+ local module = nn.PReLU(ini)
+
+ -- 1D
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ -- 2D
+ local nframe = math.random(1,7)
+ local input = torch.Tensor(nframe, ini):zero()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ -- 4D
+ local nframe = math.random(1,7)
+ local kW, kH = math.random(1,8), math.random(1,8)
+ local input = torch.Tensor(nframe, ini, kW, kH):zero()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ -- IO
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.RReLU()
+ local nframe = math.random(1,7)
+ local size = math.random(1,7)
+ local kW, kH = math.random(1,8), math.random(1,8)
+ local input = torch.Tensor(nframe, size, kW, kH):zero()
+
+ local l = 1/math.random(5,8)
+ local u = 1/math.random(3,5)
+
+ -- test in evaluation mode (not inplace), RReLU behaves like LeakyReLU
+ local module = nn.RReLU(l, u, false)
+ mytester:assert(module.train, 'default mode ')
+ module:evaluate()
+
+ -- gradient check
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ -- IO
+ local ferr,berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test training and evalation mode
+ for _,train in ipairs({true,false}) do
+ -- test with separate output buffer and inplace
+ for _,inplace in ipairs({false,true}) do
+ module = nn.RReLU(l, u, inplace)
+ if train then
+ module:training()
+ else
+ module:evaluate()
+ end
+ input = torch.rand(nframe, size, kW, kH) - 0.5
+ input:storage()[1] = -1
+ local original_input = input:clone()
+ local output = module:forward(input)
+ mytester:assert(output:sign():eq(original_input:sign()):all(), 'sign flipped forward ')
+ local gradOutput = torch.ones(output:size())
+ local gradInput = module:backward(input, gradOutput)
+ mytester:assert(gradInput:gt(0):eq(input:ne(0)):all(), 'gradient ')
+ mytester:assert(gradInput:lt(1):eq(input:le(0)):all(), 'backward negative inputs ')
+ mytester:assert(gradInput:eq(1):eq(input:gt(0)):all(), 'backward positive inputs ')
+ if not train then
+ local err = gradInput[input:le(0)]:mean()-(module.lower+module.upper)/2
+ mytester:assertlt(err, precision, 'error on gradient ')
+ end
+
+ input = -torch.rand(1000)
+ module:forward(input) -- fill internal noise tensor
+ local g = module:backward(input, torch.ones(1000))
+ local err = math.abs(g[input:le(0)]:mean()-(module.lower+module.upper)/2)
+ mytester:assertlt(err, 0.05, 'mean deviation of gradient for negative inputs ')
+ end
+ end
+end
+
+function nntest.LeakyReLU()
+ local input = torch.randn(3,4)
+ local gradOutput = torch.randn(3,4)
+ local negval = math.random()
+ local module = nn.LeakyReLU(negval)
+ local output = module:forward(input)
+ local output2 = input:clone():gt(input, 0):cmul(input) + input:clone():le(input,0):cmul(input) * module.negval
+ mytester:assertTensorEq(output, output2, 0.000001, 'LeakyReLU output')
+ local gradInput = module:backward(input, gradOutput)
+ local gradInput2 = input:clone():gt(input, 0):cmul(gradOutput) + input:clone():le(input,0):cmul(gradOutput) * module.negval
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'LeakyReLU gradInput')
+end
+
+function nntest.LeakyReLUIP()
+ local input = torch.randn(3,4)
+ local gradOutput = torch.randn(3,4)
+ local negval = math.random()
+ local module = nn.LeakyReLU(negval,true)
+ local output = input:clone():gt(input, 0):cmul(input) + input:clone():le(input,0):cmul(input) * module.negval
+ local output2 = module:forward(input)
+ mytester:assertTensorEq(output2, output, 0.000001, 'LeakyReLU output')
+ local gradInput = input:clone():gt(input, 0):cmul(gradOutput) + input:clone():le(input,0):cmul(gradOutput) * module.negval
+ local gradInput2 = module:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInput2, gradInput, 0.000001, 'LeakyReLU gradInput')
+end
+
+function nntest.HardShrink()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.HardShrink(math.random()/2)
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SoftShrink()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.SoftShrink(math.random()/2)
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Power()
+ local in1 = torch.rand(5,7)
+ local module = nn.Power(2)
+ local out = module:forward(in1)
+ local err = out:dist(in1:cmul(in1))
+ mytester:assertlt(err, 1e-15, torch.typename(module) .. ' - forward err ')
+
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local pw = torch.uniform()*math.random(1,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Power(pw)
+
+ local err = nn.Jacobian.testJacobian(module, input, 0.1, 2)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module,input, 0.1, 2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Normalize()
+ -- compare forward against torch implementation
+ -- and check gradient
+ for _,p in pairs({1,2,3,4,1.5}) do
+ local ini = math.random(3,10)
+ local input = torch.randn(ini)
+ local module = nn.Normalize(p)
+ local out = module:forward(input)
+ local expected = torch.div(input,input:norm(p))
+ mytester:assertTensorEq(out, expected, 1e-7,
+ torch.typename(module) ..' (' .. p ..') - forward err ')
+
+ local err = jac.testJacobian(module, input, -2, 2)
+ mytester:assertlt(err, precision, 'error norm '..p..' on state ')
+ end
+
+ -- batch mode
+ for _,p in pairs({1,2,3,4,torch.uniform()*math.random(1,10),math.huge}) do
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(inj, ini):zero()
+
+ local module = nn.Normalize(p)
+
+ local err = jac.testJacobian(module, input, -2, 2)
+ mytester:assertlt(err, precision, 'error norm '..p..' on state ')
+ end
+
+ -- test IO correctness
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(inj, ini):zero()
+
+ local module = nn.Normalize(2)
+
+ local ferr, berr = jac.testIO(module,input, 0.1, 2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+end
+
+function nntest.Square()
+ local in1 = torch.rand(5,7)
+ local module = nn.Square()
+ local out = module:forward(in1)
+ local err = out:dist(in1:cmul(in1))
+ mytester:assertlt(err, 1e-15, torch.typename(module) .. ' - forward err ')
+
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Square()
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Sqrt()
+ local in1 = torch.rand(5,7)
+ local module = nn.Sqrt()
+ local out = module:forward(in1)
+ local err = out:dist(in1:sqrt())
+ mytester:assertlt(err, 1e-15, torch.typename(module) .. ' - forward err ')
+
+ -- Test zero inputs; we will avoid a div-by-zero by setting to zero
+ local zin = torch.DoubleTensor(5, 7):zero()
+ module:forward(zin)
+ local zgradout = torch.rand(5, 7)
+ local zgradin = module:backward(zin, zgradout)
+ mytester:assertTensorEq(zgradin, torch.DoubleTensor(5, 7):zero(), 0.000001, "error in sqrt backward singularity")
+
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Sqrt()
+
+ local err = nn.Jacobian.testJacobian(module, input, 0.1, 2)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input, 0, 2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Linear()
+ local ini = math.random(3,5)
+ local inj_vals = {math.random(3,5), 1} -- Also test the inj = 1 spatial case
+ local input = torch.Tensor(ini):zero()
+
+ for ind, inj in pairs(inj_vals) do
+ local module = nn.Linear(ini,inj)
+
+ local function jacTests(module)
+ -- 1D
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on bias ')
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update] ')
+ end
+
+ nn.hessian.enable()
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ if module.bias then
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianBias')
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+ end
+
+ -- 2D
+ local nframe = math.random(50,70)
+ local input = torch.Tensor(nframe, ini):zero()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on bias ')
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update] ')
+ end
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ if module.bias then
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diag HessianBias')
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+ end
+
+ -- IO
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+ end
+
+ jacTests(module)
+ module:noBias()
+ jacTests(module)
+ module.bias = torch.Tensor(inj):zero()
+ module.gradBias = torch.Tensor(inj):zero()
+ module:reset()
+ jacTests(module)
+ end -- for ind, inj in pairs(inj_vals) do
+end
+
+local function test_sparse_linear(inb, ini, inj, numNonzero)
+ local module = nn.SparseLinear(ini,inj, true)
+ local linear = nn.Linear(ini, inj)
+ linear.weight = module.weight:clone()
+ linear.bias = module.bias:clone()
+ module:zeroGradParameters()
+ linear:zeroGradParameters()
+
+ -- Create a random sparse vector
+ local input = {}
+ local nonsparse = torch.zeros(inb, ini)
+ for i=1,inb do
+ local nnz = math.random(1, 3) + numNonzero
+ local inds = torch.randperm(ini)[{{1,nnz}}]
+ input[i] = torch.Tensor(nnz, 2)
+ input[i]:select(2,1):copy(inds)
+ input[i]:select(2,2):copy(torch.rand(nnz))
+ nonsparse[i]:scatter(1, input[i]:select(2,1):long(), input[i]:select(2,2))
+ end
+ local gradOutput = torch.rand(inb, inj)
+
+ local cmps = {'weight', 'bias', 'gradWeight', 'gradBias'}
+
+ -- Check output wrt linear, non-batch
+ local actual = module:forward(input[1])
+ local expected = linear:forward(nonsparse[1])
+ local actualgi = module:backward(input[1], gradOutput[1])
+ local expectedgi = linear:backward(nonsparse[1], gradOutput[1])
+ module:updateParameters(1)
+ linear:updateParameters(1)
+ local err = (expected - actual):abs():max()
+ local gierr = (expectedgi - actualgi[1]:select(2,2)):abs():max()
+ mytester:assertle(err, precision, 'error on result')
+ mytester:assertle(gierr, precision, 'error on gradInput')
+
+ for _,var in ipairs(cmps) do
+ local err = (module[var] - linear[var]):abs():max()
+ mytester:assertle(err, precision, 'error on '..var)
+ end
+ module:zeroGradParameters()
+ linear:zeroGradParameters()
+
+ -- Check output wrt linear, batch
+ -- doing this n times checks for fast last input param updates
+ local test_n_times = function(ntimes)
+ local actual, expected, actualgi, expectedgi
+ for i=1, ntimes do
+ actual = module:forward(input)
+ expected = linear:forward(nonsparse)
+ actualgi = module:backward(input, gradOutput)
+ expectedgi = linear:backward(nonsparse, gradOutput)
+ end
+ module:updateParameters(1)
+ linear:updateParameters(1)
+ local err = (expected - actual):abs():max()
+ local gicheck = torch.Tensor():resizeAs(expectedgi)
+ for i=1,#actualgi do gicheck[i]:copy(actualgi[i]:select(2,2)) end
+ local gierr = (expectedgi - gicheck):abs():max()
+ mytester:assertle(err, precision, 'error on result with ntimes = '..ntimes)
+ mytester:assertle(gierr, precision, 'error on gradInput with ntimes = '..ntimes)
+
+ for _,var in ipairs(cmps) do
+ local err = (module[var] - linear[var]):abs():max()
+ mytester:assertle(err, precision, 'error on '..var..' with ntimes = '..ntimes)
+ end
+
+ module:zeroGradParameters()
+ linear:zeroGradParameters()
+ mytester:assertle(module.gradWeight:sum(), precision, 'error zeroing gradweight')
+ mytester:assertle(module.gradBias:sum(), precision, 'error zeroing gradweight')
+
+ end
+
+ test_n_times(1)
+ test_n_times(2)
+ test_n_times(3)
+
+ -- legacy batch mode
+ local batch = math.random(2,5)
+
+ local input = torch.Tensor(batch, numNonzero, 2):zero()
+ for k=1,batch do
+ local N = {}
+ for i = 1, ini do N[i] = i end
+ for i = 1, numNonzero do
+ local j = math.random(i,ini)
+ N[i], N[j] = N[j], N[i]
+ end
+ for i = 1, numNonzero do input[{k,i,1}] = N[i] end
+ end
+ local values = input:select(3,2)
+ values:copy(torch.rand(values:nElement())):mul(2):add(-1)
+
+ -- Check output
+ local actual = module:forward(input):clone()
+ local expected = torch.Tensor(batch, inj)
+ for k = 1, batch do
+ expected[k]:copy(module:forward(input[k]))
+ end
+ local err = (expected - actual):abs():max()
+ mytester:assertle(err, precision, 'error on batch result forward')
+end
+
+function nntest.SparseLinear()
+ local inb = math.random(5,10)
+ local ini = math.random(50,100)
+ local inj = math.random(5,10)
+ local numNonzero = math.random(3,5)
+
+ test_sparse_linear(inb, ini, inj, numNonzero)
+ -- Tests OMP parallelism
+ test_sparse_linear(1, 50000, 10, 20000)
+ test_sparse_linear(1000, 1000, 10, 100)
+end
+
+local function testIndexLinear(bsize, iSize, oSize, nnz)
+ local inb = bsize
+ local ini = iSize
+ local inj = oSize
+
+ local ilinear = nn.IndexLinear(ini,inj, true, nil, nil, nil, false)
+ local ilinear2 = nn.IndexLinear(ini,inj, true, nil, nil, nil, false)
+ local linear = nn.Linear(ini, inj)
+ ilinear.weight:zero()
+ ilinear.weight:copy(linear.weight:t():clone())
+ ilinear.bias = linear.bias:clone()
+ ilinear:zeroGradParameters()
+
+ ilinear2.weight:zero()
+ ilinear2.weight:copy(linear.weight:t():clone())
+ ilinear2.bias = linear.bias:clone()
+ ilinear2:zeroGradParameters()
+
+ linear:zeroGradParameters()
+
+ -- Create a random sparse vector
+ local input = {{},{}}
+ local flatInput = {torch.LongTensor(), torch.Tensor(), torch.LongTensor()}
+ local nonsparse = torch.zeros(inb, ini)
+ local sizes = flatInput[3]
+ sizes:resize(inb)
+ for i=1,inb do
+ sizes[i] = nnz
+ input[1][i] = torch.randperm(ini)[{{1,nnz}}]:long()
+ input[2][i] = torch.ones(nnz):uniform()
+ nonsparse[i]:scatter(1, input[1][i], input[2][i])
+ end
+ flatInput[1]:cat(input[1])
+ flatInput[2]:cat(input[2])
+
+ local gradOutput = torch.rand(inb, inj)
+ local cmps = {'weight', 'bias', 'gradBias'}
+ -- Check output wrt linear, non-batch
+ local actual = ilinear:forward({input[1][1], input[2][1]})
+ local actual2 = ilinear2:forward({input[1][1], input[2][1], flatInput[3][1]})
+ local expected = linear:forward(nonsparse[1])
+
+ local actualgi = ilinear:backward({input[1][1], input[2][1]}, gradOutput[1])
+ local actualgi2 = ilinear2:backward({input[1][1], input[2][1], flatInput[3][1]}, gradOutput[1])
+ local expectedgi = linear:backward(nonsparse[1], gradOutput[1])
+
+ ilinear:updateParameters(1)
+ ilinear2:updateParameters(1)
+ linear:updateParameters(1)
+
+ local err = (expected - actual):abs():max()
+ local err2 = (expected - actual2):abs():max()
+
+ local gierr = (expectedgi - actualgi[2]):abs():max()
+ local gierr2 = (expectedgi - actualgi2[2]):abs():max()
+
+ mytester:assertle(err, precision, 'error on result for tensor array')
+ mytester:assertle(gierr, precision, 'error on gradInput for tensor array')
+
+ mytester:assertle(err2, precision, 'error on result for batched tensor')
+ mytester:assertle(gierr2, precision, 'error on gradInput for batched tensor')
+
+ for _,var in ipairs(cmps) do
+ local err, err2
+ if var == 'weight' then
+ err = (ilinear[var]:t() - linear[var]):abs():max()
+ err2 = (ilinear2[var]:t() - linear[var]):abs():max()
+ else
+ err = (ilinear[var] - linear[var]):abs():max()
+ err2 = (ilinear2[var] - linear[var]):abs():max()
+ end
+ mytester:assertle(err, precision, 'error on '..var..' for tensor array')
+ mytester:assertle(err2, precision, 'error on '..var..' for batched tensor')
+ end
+ ilinear:zeroGradParameters()
+ ilinear2:zeroGradParameters()
+ linear:zeroGradParameters()
+
+ -- Check output wrt linear, batch
+ -- doing this n times checks for fast last input param updates
+ local test_n_times = function(ntimes)
+ local actual, expected, actualgi, expectedgi
+ for i=1, ntimes do
+ actual = ilinear:forward(input)
+ actual2 = ilinear2:forward(flatInput)
+ expected = linear:forward(nonsparse)
+
+ actualgi = ilinear:backward(input, gradOutput)
+ actualgi2 = ilinear2:backward(flatInput, gradOutput)
+ expectedgi = linear:backward(nonsparse, gradOutput)
+ end
+ ilinear:updateParameters(1)
+ ilinear2:updateParameters(1)
+ linear:updateParameters(1)
+
+ local err = (expected - actual):abs():max()
+ local err2 = (expected - actual2):abs():max()
+
+ local gicheck = torch.Tensor():resizeAs(expectedgi)
+ local gicheck2 = actualgi2[2]
+
+ for i=1,#actualgi[2] do
+ gicheck[i]:copy(actualgi[2][i])
+ end
+ local gierr = (expectedgi - gicheck):abs():max()
+ local gierr2 = (expectedgi - gicheck2):abs():max()
+
+ mytester:assertle(err, precision, 'error on result for tensor array with ntimes = '..ntimes)
+ mytester:assertle(err2, precision, 'error on result for batched tensor with ntimes = '..ntimes)
+
+ mytester:assertle(gierr, precision, 'error on gradInput for tensor array with ntimes = '..ntimes)
+ mytester:assertle(gierr2, precision, 'error on gradInput for batched tensor with ntimes = '..ntimes)
+
+ for _,var in ipairs(cmps) do
+ local err, err2
+ if var == 'weight' then
+ err = (ilinear[var]:t() - linear[var]):abs():max()
+ err2 = (ilinear2[var]:t() - linear[var]):abs():max()
+ else
+ err = (ilinear[var] - linear[var]):abs():max()
+ err2 = (ilinear2[var] - linear[var]):abs():max()
+ end
+ mytester:assertle(err, precision, 'error on '..var..' for tensor array')
+ mytester:assertle(err2, precision, 'error on '..var..' for batched tensor')
+ end
+
+ ilinear:zeroGradParameters()
+ ilinear2:zeroGradParameters()
+ linear:zeroGradParameters()
+ mytester:assertle(ilinear.gradBias:sum(), precision, 'error zeroing gradbias for tensor array')
+ mytester:assertle(ilinear2.gradBias:sum(), precision, 'error zeroing gradbias for batched tensor')
+ end
+ test_n_times(1)
+ test_n_times(2)
+ test_n_times(3)
+end
+
+function nntest.IndexLinear()
+ testIndexLinear(4, 40 , 10, 30)
+ testIndexLinear(4, 40 , 500, 30)
+ testIndexLinear(4, 200000 , 5, 150000)
+
+ local sizes = {
+ {osize = 1, isize = 10000, nnz = 10000, bsize = 16},
+ {osize = 10, isize = 10000, nnz = 10000, bsize = 16},
+ {osize = 100, isize = 10000, nnz = 10000, bsize = 16},
+
+ {osize = 1, isize = 10000, nnz = 200000, bsize = 1},
+ {osize = 10, isize = 10000, nnz = 200000, bsize = 1},
+ {osize = 100, isize = 10000, nnz = 200000, bsize = 1},
+
+ {osize = 1, isize = 10000, nnz = 200000, bsize = 2},
+ {osize = 10, isize = 10000, nnz = 200000, bsize = 2},
+ {osize = 100, isize = 10000, nnz = 200000, bsize = 2},
+ }
+
+ for i, lsizes in ipairs(sizes) do
+ -- Test multithreaded updates
+ local isize = lsizes.isize
+ local osize = lsizes.osize
+ local il = nn.IndexLinear(isize, osize)
+ local batch = {{},{}}
+ local idx = 100
+ local nnz = lsizes.nnz
+ local bsize = lsizes.bsize
+ for i=1,bsize do
+ batch[1][i] = torch.LongTensor(nnz):fill(idx)
+ batch[2][i] = torch.DoubleTensor(nnz):fill(1)
+ end
+ local totalSize = bsize*nnz
+ local lr = 0.01
+ -- Update the same index all over
+ local out = il:updateOutput(batch)
+ out:fill(1)
+ il:backwardUpdate(batch, out, lr)
+ il:backward(batch, out, 1)
+ il:updateParameters(lr)
+ for i=1,osize do
+ mytester:assertlt(math.abs(il.weight[idx][i] + totalSize * lr * 2), precision, 'parameters update was wrong.')
+ end
+ end
+end
+
+function nntest.Bilinear()
+
+ -- set up data:
+ local N = 10
+ local D1 = 5
+ local D2 = 4
+ local K = 3
+ local input = {torch.randn(N, D1), torch.randn(N, D2)}
+ local target = torch.randn(N, K)
+
+ -- test forward
+ local module = nn.Bilinear(D1, D2, K)
+ local expected = torch.zeros(N,K)
+ for k = 1, K do
+ local temp = torch.mm(module.weight[k], input[2]:t())
+ temp:cmul(input[1]:t())
+ temp = temp:sum(1)
+ temp:add(module.bias[k])
+ expected[{{},k}] = temp:view(-1)
+ end
+ local output = module:forward(input)
+ mytester:assertTensorEq(expected, output, 0.000001, 'Bilinear forward 2D err')
+
+ -- For testing grads we'll follow the nn.DotProduct strategy of using a SplitTable
+ local input2 = torch.randn(2, N, D1)
+ local module2 = nn.Sequential()
+ module2:add(nn.SplitTable(1))
+ module2:add(nn.ParallelTable():add(nn.Linear(D1,D1)):add(nn.Linear(D1,D2)))
+ module2:add(nn.Bilinear(D1, D2, K))
+ module2:add(nn.Linear(K,1))
+
+ local err = jac.testJacobian(module2, input2)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module2, input2, module2:get(3).weight, module2:get(3).gradWeight)
+ mytester:assertlt(err, precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module2, input2, module2:get(3).bias, module2:get(3).gradBias)
+ mytester:assertlt(err, precision, 'error on bias ')
+
+end
+
+function nntest.PartialLinear()
+
+ -- settings for experiment:
+ local N = 10
+ local D = 5
+ local K = 15
+
+ -- test forward-backward pass of module:
+ local module = nn.PartialLinear(D, K)
+ for sub_K = 1,K do
+
+ -- get random test case:
+ local input = torch.randn(N, D)
+ local partition = torch.randperm(K):narrow(1, 1, sub_K)
+
+ -- do forward-backward pass:
+ module:setPartition(partition)
+ module:forward(input)
+ mytester:asserteq(module.output:size(1), N)
+ mytester:asserteq(module.output:size(2), sub_K)
+ module:backward(input, torch.ones(N, sub_K))
+ mytester:asserteq(module.gradInput:size(1), input:size(1))
+ mytester:asserteq(module.gradInput:size(2), input:size(2))
+
+ -- do parameter update:
+ local lr = .01
+ module:updateParameters(lr)
+ end
+ module:resetPartition()
+
+ -- compare output with linear layer:
+ local module2 = nn.Linear(D, K)
+ module2.weight:copy(module.network:get(1):get(2).weight)
+ module2.bias:fill(0)
+ if module.bias then module2.bias:copy(module.bias) end
+ local input = torch.randn(N, D)
+ local diff = (module:forward(input) - module2:forward(input)):abs():sum()
+ mytester:assertlt(diff, 1e-7)
+
+ -- gradient checks:
+ local sub_K = 5
+ local partition = torch.randperm(K):narrow(1, 1, sub_K)
+ module:setPartition(partition)
+ local err = sjac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = sjac.testJacobianParameters(module, input, module.network:get(1):get(2).weight, module.network:get(1):get(2).gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = sjac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on bias ')
+
+ local err = sjac.testJacobianUpdateParameters(module, input, module.network:get(1):get(2).weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ local err = sjac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+ local ferr, berr = sjac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Euclidean()
+ local ini = math.random(5,7)
+ local inj = math.random(5,7)
+ local input = torch.randn(ini)
+ local gradOutput = torch.randn(inj)
+ local module = nn.Euclidean(ini,inj)
+ local output = module:forward(input):clone()
+
+ local output2 = torch.Tensor(inj):zero()
+ for o = 1,module.weight:size(2) do
+ output2[o] = input:dist(module.weight:select(2,o))
+ end
+ mytester:assertTensorEq(output, output2, 0.000001, 'Euclidean forward 1D err')
+
+ local input2 = torch.randn(8, ini)
+ input2[2]:copy(input)
+ local output2 = module:forward(input2)
+ mytester:assertTensorEq(output2[2], output, 0.000001, 'Euclidean forward 2D err')
+
+ local output = module:forward(input):clone()
+ module:zeroGradParameters()
+ local gradInput = module:backward(input, gradOutput, 1):clone()
+ local gradInput2 = torch.zeros(ini)
+ local temp = input:clone()
+ for o = 1,module.weight:size(2) do
+ temp:copy(input)
+ temp:add(-1,module.weight:select(2,o))
+ temp:mul(gradOutput[o]/output[o])
+ gradInput2:add(temp)
+ end
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'Euclidean updateGradInput 1D err')
+
+ local gradWeight = module.gradWeight:clone():zero()
+ for o = 1,module.weight:size(2) do
+ temp:copy(module.weight:select(2,o)):add(-1,input)
+ temp:mul(gradOutput[o]/output[o])
+ gradWeight:select(2,o):add(1, temp)
+ end
+ mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'Euclidean accGradParameters 1D err')
+
+ local input2 = input:view(1, -1):repeatTensor(8, 1)
+ local gradOutput2 = gradOutput:view(1, -1):repeatTensor(8, 1)
+ local output2 = module:forward(input2)
+ module:zeroGradParameters()
+ local gradInput2 = module:backward(input2, gradOutput2, 1/8)
+ mytester:assertTensorEq(gradInput2[2], gradInput, 0.000001, 'Euclidean updateGradInput 2D err')
+
+ mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'Euclidean accGradParameters 2D err')
+
+ input:zero()
+ module.fastBackward = false
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.WeightedEuclidean()
+ local ini = math.random(5,7)
+ local inj = math.random(5,7)
+ local input = torch.randn(ini)
+ local gradOutput = torch.randn(inj)
+ local module = nn.WeightedEuclidean(ini,inj)
+
+ local output = module:forward(input):clone()
+
+ local output2 = torch.Tensor(inj):zero()
+ local temp = input:clone()
+ for o = 1,module.weight:size(2) do
+ temp:copy(input):add(-1,module.weight:select(2,o))
+ temp:cmul(temp)
+ temp:cmul(module.diagCov:select(2,o)):cmul(module.diagCov:select(2,o))
+ output2[o] = math.sqrt(temp:sum())
+ end
+ mytester:assertTensorEq(output, output2, 0.000001, 'WeightedEuclidean forward 1D err')
+
+ local input2 = torch.randn(8, ini)
+ input2[2]:copy(input)
+ local output2 = module:forward(input2)
+ mytester:assertTensorEq(output2[2], output, 0.000001, 'WeightedEuclidean forward 2D err')
+
+ local output = module:forward(input):clone()
+ module:zeroGradParameters()
+ local gradInput = module:backward(input, gradOutput, 1):clone()
+ local gradInput2 = torch.zeros(ini)
+ for o = 1,module.weight:size(2) do
+ temp:copy(input)
+ temp:add(-1,module.weight:select(2,o))
+ temp:cmul(module.diagCov:select(2,o)):cmul(module.diagCov:select(2,o))
+ temp:mul(gradOutput[o]/output[o])
+ gradInput2:add(temp)
+ end
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'WeightedEuclidean updateGradInput 1D err')
+
+ local gradWeight = module.gradWeight:clone():zero()
+ local gradDiagCov = module.gradDiagCov:clone():zero()
+ for o = 1,module.weight:size(2) do
+ if output[o] ~= 0 then
+ temp:copy(module.weight:select(2,o)):add(-1,input)
+ temp:cmul(module.diagCov:select(2,o)):cmul(module.diagCov:select(2,o))
+ temp:mul(gradOutput[o]/output[o])
+ gradWeight:select(2,o):add(temp)
+
+ temp:copy(module.weight:select(2,o)):add(-1,input)
+ temp:cmul(temp)
+ temp:cmul(module.diagCov:select(2,o))
+ temp:mul(gradOutput[o]/output[o])
+ gradDiagCov:select(2,o):add(temp)
+ end
+ end
+ mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'WeightedEuclidean accGradParameters gradWeight 1D err')
+ mytester:assertTensorEq(gradDiagCov, module.gradDiagCov, 0.000001, 'WeightedEuclidean accGradParameters gradDiagCov 1D err')
+
+ local input2 = input:view(1, -1):repeatTensor(8, 1)
+ local gradOutput2 = gradOutput:view(1, -1):repeatTensor(8, 1)
+ local output2 = module:forward(input2)
+ module:zeroGradParameters()
+ local gradInput2 = module:backward(input2, gradOutput2, 1/8)
+ mytester:assertTensorEq(gradInput2[2], gradInput, 0.000001, 'WeightedEuclidean updateGradInput 2D err')
+
+ mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'WeightedEuclidean accGradParameters gradWeight 2D err')
+ mytester:assertTensorEq(gradDiagCov, module.gradDiagCov, 0.000001, 'WeightedEuclidean accGradParameters gradDiagCov 2D err')
+
+ input:zero()
+ module.fastBackward = false
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.diagCov, module.gradDiagCov)
+ mytester:assertlt(err,precision, 'error on bias ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ input:zero()
+ module:zeroGradParameters()
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.diagCov, module.gradDiagCov)
+ mytester:assertlt(err,precision, 'error on bias ')
+
+ local ferr,berr = jac.testIO(module,input2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+local function criterionJacobianTest(cri, input, target)
+ local eps = 1e-6
+ local _ = cri:forward(input, target)
+ local dfdx = cri:backward(input, target)
+ -- for each input perturbation, do central difference
+ local centraldiff_dfdx = torch.Tensor():resizeAs(dfdx)
+ local input_s = input:storage()
+ local centraldiff_dfdx_s = centraldiff_dfdx:storage()
+ for i=1,input:nElement() do
+ -- f(xi + h)
+ input_s[i] = input_s[i] + eps
+ local fx1 = cri:forward(input, target)
+ -- f(xi - h)
+ input_s[i] = input_s[i] - 2*eps
+ local fx2 = cri:forward(input, target)
+ -- f'(xi) = (f(xi + h) - f(xi - h)) / 2h
+ local cdfx = (fx1 - fx2) / (2*eps)
+ -- store f' in appropriate place
+ centraldiff_dfdx_s[i] = cdfx
+ -- reset input[i]
+ input_s[i] = input_s[i] + eps
+ end
+
+ -- compare centraldiff_dfdx with :backward()
+ local err = (centraldiff_dfdx - dfdx):abs():max()
+ mytester:assertlt(err, precision, 'error in difference between central difference and :backward')
+end
+
+local function criterionJacobianTest1DTable(cri, input0, target)
+ -- supposes input is a tensor, which is splitted in the first dimension
+ local input = input0:split(1,1)
+ for i=1,#input do
+ input[i] = input[i][1]
+ end
+ local eps = 1e-6
+ local _ = cri:forward(input, target)
+ local dfdx = cri:backward(input, target)
+ -- for each input perturbation, do central difference
+ local centraldiff_dfdx = torch.Tensor():resizeAs(input0)
+ local input_s = input0:storage()
+ local centraldiff_dfdx_s = centraldiff_dfdx:storage()
+ for i=1,input0:nElement() do
+ -- f(xi + h)
+ input_s[i] = input_s[i] + eps
+ local fx1 = cri:forward(input, target)
+ -- f(xi - h)
+ input_s[i] = input_s[i] - 2*eps
+ local fx2 = cri:forward(input, target)
+ -- f'(xi) = (f(xi + h) - f(xi - h)) / 2h
+ local cdfx = (fx1 - fx2) / (2*eps)
+ -- store f' in appropriate place
+ centraldiff_dfdx_s[i] = cdfx
+ -- reset input[i]
+ input_s[i] = input_s[i] + eps
+ end
+ local centraldiff_dfdx_t = centraldiff_dfdx:split(1,1)
+ for i=1,#centraldiff_dfdx_t do
+ centraldiff_dfdx_t[i] = centraldiff_dfdx_t[i][1]
+ end
+ for i=1,#centraldiff_dfdx_t do
+ -- compare centraldiff_dfdx with :backward()
+ local err = (centraldiff_dfdx_t[i] - dfdx[i]):abs():max()
+ mytester:assertlt(err, precision, 'error in difference between central difference and :backward')
+ end
+end
+
+function nntest.SmoothL1Criterion()
+ local input = torch.rand(10)
+ local target = input:clone():add(torch.rand(10))
+ local cri = nn.SmoothL1Criterion()
+ criterionJacobianTest(cri, input, target)
+end
+
+function nntest.MSECriterion()
+ local input = torch.rand(10)
+ local target = input:clone():add(torch.rand(10))
+ local cri = nn.MSECriterion()
+ criterionJacobianTest(cri, input, target)
+end
+
+function nntest.SpatialAutoCropMSECriterion()
+ -- Tests the assumptions on input and target dimensions for the
+ -- nn.SpatialAutoCropMSECriterion criterion
+ local function testInputBounds()
+ for _, average in pairs({true, false}) do
+ local sMSE = nn.SpatialAutoCropMSECriterion(average)
+
+ local input = torch.Tensor(3, 3, 3)
+ local target = torch.Tensor(4, 3, 3)
+ mytester:assertError(function() sMSE:forward(input, target) end,
+ "Target and input must have same number of channels")
+
+ input = torch.Tensor(2, 4, 3, 3)
+ target = torch.Tensor(2, 3, 3, 3)
+ mytester:assertError(function() sMSE:forward(input, target) end,
+ "Target and input must have same number of channels")
+
+ input = torch.Tensor(2, 3, 3, 3)
+ target = torch.Tensor(1, 3, 3, 3)
+ mytester:assertError(function() sMSE:forward(input, target) end,
+ "Target and input must have same batch size")
+
+ input = torch.Tensor(2, 5, 5)
+ target = torch.Tensor(2, 5, 4)
+ mytester:assertError(function() sMSE:forward(input, target) end,
+ "input resolution must be smaller or equal to the spatial resolution of the target")
+
+ input = torch.Tensor(1, 2, 5, 5)
+ target = torch.Tensor(1, 2, 4, 5)
+ mytester:assertError(function() sMSE:forward(input, target) end,
+ "input resolution must be smaller or equal to the spatial resolution of the target")
+ end
+ end
+
+ -- Tests that the forward pass of nn.SpatialAutoCropMSECriterion
+ -- is equivalent to the forward pass of nn.MSECriterion with a pre-cropped target
+ local function testSpatialAutoCropMSECriterionBatched()
+ for _, average in pairs({true, false}) do
+ local sMSE = nn.SpatialAutoCropMSECriterion(average)
+ local MSE = nn.MSECriterion(average)
+
+ local batchSize = math.random(1,10)
+ local channels = math.random(1,10)
+ local inputHeight = math.random(1, 50)
+ local inputWidth = math.random(1, 50)
+ local targetHeight = inputHeight + math.random(0,5)
+ local targetWidth = inputWidth + math.random(0,5)
+
+ local input = torch.Tensor(batchSize, channels, inputHeight, inputWidth):uniform()
+ local target = torch.Tensor(batchSize, channels, targetHeight, targetWidth):uniform()
+
+ local heightStartIdx = 1 + math.floor((targetHeight - inputHeight)/2.0)
+ local heightEndIdx = heightStartIdx + inputHeight - 1
+ local widthStartIdx = 1 + math.floor((targetWidth - inputWidth)/2.0)
+ local widthEndIdx = widthStartIdx + inputWidth - 1
+
+ local croppedTarget = target[{{}, {}, {heightStartIdx, heightEndIdx}, {widthStartIdx, widthEndIdx}}]
+
+ local sMSEOut = nn.SpatialAutoCropMSECriterion(average):forward(input, target)
+ local MSEOut = MSE:forward(input, croppedTarget)
+ mytester:asserteq(sMSEOut, MSEOut)
+
+ local gradOutput = torch.Tensor():resizeAs(croppedTarget):uniform()
+ local sMSEGradInput = sMSE:backward(input, gradOutput)
+ local MSEGradInput = MSE:backward(input, gradOutput)
+ mytester:assertTensorEq(sMSEGradInput, MSEGradInput, 1e-7)
+ criterionJacobianTest(sMSE, input, gradOutput)
+ end
+ end
+
+ local function testSpatialAutoCropMSECriterionNonBatched()
+ for _, average in pairs({true, false}) do
+ local sMSE = nn.SpatialAutoCropMSECriterion(average)
+ local MSE = nn.MSECriterion(average)
+
+ local channels = math.random(1,10)
+ local inputHeight = math.random(1, 50)
+ local inputWidth = math.random(1, 50)
+ local targetHeight = inputHeight + math.random(0,5)
+ local targetWidth = inputWidth + math.random(0,5)
+
+ local input = torch.Tensor(channels, inputHeight, inputWidth):uniform()
+ local target = torch.Tensor(channels, targetHeight, targetWidth):uniform()
+
+ local heightStartIdx = 1 + math.floor((targetHeight - inputHeight)/2.0)
+ local heightEndIdx = heightStartIdx + inputHeight - 1
+ local widthStartIdx = 1 + math.floor((targetWidth - inputWidth)/2.0)
+ local widthEndIdx = widthStartIdx + inputWidth - 1
+
+ local croppedTarget = target[{{}, {heightStartIdx, heightEndIdx}, {widthStartIdx, widthEndIdx}}]
+
+ local sMSEOut = nn.SpatialAutoCropMSECriterion(average):forward(input, target)
+ local MSEOut = MSE:forward(input, croppedTarget)
+ mytester:asserteq(sMSEOut, MSEOut)
+
+ local gradOutput = torch.Tensor():resizeAs(croppedTarget):uniform()
+ local sMSEGradInput = sMSE:backward(input, gradOutput)
+ local MSEGradInput = MSE:backward(input, gradOutput)
+ mytester:assertTensorEq(sMSEGradInput, MSEGradInput, 1e-7)
+ criterionJacobianTest(sMSE, input, gradOutput)
+ end
+ end
+
+ testInputBounds()
+ testSpatialAutoCropMSECriterionBatched()
+ testSpatialAutoCropMSECriterionNonBatched()
+end
+
+function nntest.ClassSimplexCriterion()
+ local nClasses = torch.random(3,15)
+ local input = torch.rand(nClasses)
+ local target = torch.random(1,nClasses)
+ local cri = nn.ClassSimplexCriterion(nClasses)
+ criterionJacobianTest(cri, input, target)
+end
+
+
+function nntest.MarginCriterion()
+ local input = torch.rand(100)
+ local target = input:clone():add(torch.rand(100))
+ local cri = nn.MarginCriterion()
+ criterionJacobianTest(cri, input, target)
+end
+
+function nntest.SoftMarginCriterion()
+ local input = torch.rand(100)
+ local target = input:clone():add(torch.rand(100))
+ local cri = nn.SoftMarginCriterion()
+ criterionJacobianTest(cri, input, target)
+end
+
+function nntest.MultiMarginCriterion()
+ local input = torch.rand(100)
+ local target = math.random(1,100)
+ local cri = nn.MultiMarginCriterion(math.random(1,2), nil, 0.1)
+ criterionJacobianTest(cri, input, target)
+
+ local cri = nn.MultiMarginCriterion()
+ criterionJacobianTest(cri, input, target)
+
+ local cri = nn.MultiMarginCriterion(2)
+ criterionJacobianTest(cri, input, target)
+
+ local weights = torch.randn(100)
+ local cri = nn.MultiMarginCriterion(1, weights)
+end
+
+function nntest.MarginRankingCriterion()
+ local input = {torch.rand(1), torch.rand(1)}
+ local mrc = nn.MarginRankingCriterion()
+ local output = mrc:forward(input, 1)
+ local gradInput = mrc:backward(input, 1)
+ -- cast to float
+ local input2 = {input[1]:float(), input[2]:float()}
+ local mrc2 = mrc:clone():float()
+ local output2 = mrc2:forward(input2, 1)
+ local gradInput2 = mrc2:backward(input2, 1)
+ mytester:assert(math.abs(output2 - output) < 0.00001, "MRC:type() forward error")
+ mytester:assertTensorEq(gradInput[1]:float(), gradInput2[1], 0.00001, "MRC:type() backward error 1")
+ mytester:assert(torch.type(gradInput2[1]) == 'torch.FloatTensor', "MRC:type() error 1")
+ mytester:assertTensorEq(gradInput[2]:float(), gradInput2[2], 0.00001, "MRC:type() backward error 2")
+ mytester:assert(torch.type(gradInput2[2]) == 'torch.FloatTensor', "MRC:type() error 2")
+
+ -- batch, sizeAverage true, jacobian
+ local margin = math.random() * 2 - 1
+ local batch_size = math.random(1,10)
+ local crit = nn.MarginRankingCriterion(margin)
+ crit.sizeAverage = true
+ local v = torch.rand(2, batch_size)
+ local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1)
+ criterionJacobianTest1DTable(crit,v,t)
+
+ -- batch, sizeAverage false, jacobian
+ local margin = math.random() * 2 - 1
+ local crit = nn.MarginRankingCriterion(margin)
+ crit.sizeAverage = false
+ local v = torch.rand(2, batch_size)
+ local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1)
+ criterionJacobianTest1DTable(crit,v,t)
+end
+
+function nntest.ModuleCriterion()
+ local input = torch.randn(8,4)
+ local target = torch.randn(8,4)
+ local inputModule = nn.Tanh()
+ local criterion = nn.MSECriterion()
+ local mc = nn.ModuleCriterion(criterion, inputModule)
+
+ local err = mc:forward(input, target)
+ local gradInput = mc:backward(input, target)
+
+ local output = inputModule:forward(input)
+ local err2 = criterion:forward(output, target)
+ local gradOutput = criterion:backward(output, target)
+ local gradInput2 = inputModule:backward(input, gradOutput)
+
+ mytester:assert(err == err2, "ModuleCriterion backward err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "ModuleCriterion backward err")
+end
+
+function nntest.MaskedSelect()
+ local input = torch.randn(4, 5)
+ local mask = torch.ByteTensor(4, 5):bernoulli()
+ local module = nn.MaskedSelect()
+ local out = module:forward({input, mask})
+ local err = out:dist(input:maskedSelect(mask))
+ mytester:assertlt(err, 1e-15, torch.typename(module) .. ' - forward err ')
+
+ local gradOut = torch.Tensor({20, 80})
+ input = torch.Tensor({{10, 20}, {30, 40}})
+ local inTarget = torch.Tensor({{20, 0}, {0, 80}})
+ local mask = torch.ByteTensor({{1, 0}, {0, 1}})
+ local module = nn.MaskedSelect()
+ module:forward({input, mask})
+ local gradIn = module:backward({input, mask}, gradOut)
+ mytester:assertTensorEq(inTarget, gradIn[1], 1e-15, torch.typename(module) .. ' - backward err ')
+end
+
+function nntest.ParallelCriterion()
+ local input = {torch.rand(2,10), torch.randn(2,10)}
+ local target = {torch.IntTensor{1,8}, torch.randn(2,10)}
+ local nll = nn.ClassNLLCriterion()
+ local mse = nn.MSECriterion()
+ local pc = nn.ParallelCriterion():add(nll, 0.5):add(mse)
+ local output = pc:forward(input, target)
+ local output2 = nll:forward(input[1], target[1])/2 + mse:forward(input[2], target[2])
+ mytester:assert(math.abs(output2 - output) < 0.00001, "ParallelCriterion forward error")
+ local gradInput2 = {nll:backward(input[1], target[1]):clone():div(2), mse:backward(input[2], target[2])}
+ local gradInput = pc:backward(input, target)
+ mytester:assertTensorEq(gradInput[1], gradInput2[1], 0.000001, "ParallelCriterion backward error 1")
+ mytester:assertTensorEq(gradInput[2], gradInput2[2], 0.000001, "ParallelCriterion backward error 2")
+
+ -- test type
+ pc:float()
+ gradInput[1], gradInput[2] = gradInput[1]:clone(), gradInput[2]:clone()
+ local input3 = {input[1]:float(), input[2]:float()}
+ local target3 = {target[1]:float(), target[2]:float()}
+ local output3 = pc:forward(input3, target3)
+ local gradInput3 = pc:backward(input3, target3)
+ mytester:assert(math.abs(output3 - output) < 0.00001, "ParallelCriterion forward error type")
+ mytester:assertTensorEq(gradInput[1]:float(), gradInput3[1], 0.000001, "ParallelCriterion backward error 1 type")
+ mytester:assertTensorEq(gradInput[2]:float(), gradInput3[2], 0.000001, "ParallelCriterion backward error 2 type")
+
+ -- test repeatTarget
+ local input = {torch.rand(2,10), torch.randn(2,10)}
+ local target = torch.randn(2,10)
+ local mse = nn.MSECriterion()
+ local pc = nn.ParallelCriterion(true):add(mse, 0.5):add(mse:clone())
+ local output = pc:forward(input, target)
+ local output2 = mse:forward(input[1], target)/2 + mse:forward(input[2], target)
+ mytester:assert(math.abs(output2 - output) < 0.00001, "ParallelCriterion repeatTarget forward error")
+ local gradInput = pc:backward(input, target)
+ local gradInput2 = {mse:backward(input[1], target):clone():div(2), mse:backward(input[2], target)}
+ mytester:assertTensorEq(gradInput[1], gradInput2[1], 0.000001, "ParallelCriterion repeatTarget backward error 1")
+ mytester:assertTensorEq(gradInput[2], gradInput2[2], 0.000001, "ParallelCriterion repeatTarget backward error 2")
+
+ -- table input
+ local input = {torch.randn(2,10), {torch.rand(2,10), torch.randn(2,10)}}
+ local target = {torch.IntTensor{2,5}, {torch.IntTensor{1,8}, torch.randn(2,10)}}
+ local nll2 = nn.ClassNLLCriterion()
+ local nll = nn.ClassNLLCriterion()
+ local mse = nn.MSECriterion()
+ local pc = nn.ParallelCriterion():add(nll, 0.5):add(mse)
+ local pc2 = nn.ParallelCriterion():add(nll2, 0.4):add(pc)
+ local output = pc2:forward(input, target)
+ local output2 = nll2:forward(input[1], target[1])*0.4 + nll:forward(input[2][1], target[2][1])/2 + mse:forward(input[2][2], target[2][2])
+ mytester:assert(math.abs(output2 - output) < 0.00001, "ParallelCriterion table forward error")
+ local gradInput2 = {
+ nll2:backward(input[1], target[1]):clone():mul(0.4),
+ {nll:backward(input[2][2], target[2][1]):clone():div(2), mse:backward(input[2][2], target[2][2])}
+ }
+ local gradInput = pc2:backward(input, target)
+ mytester:assertTensorEq(gradInput[1], gradInput2[1], 0.000001, "ParallelCriterion table backward error 1")
+ mytester:assertTensorEq(gradInput[2][1], gradInput2[2][1], 0.000001, "ParallelCriterion table backward error 2")
+ mytester:assertTensorEq(gradInput[2][2], gradInput2[2][2], 0.000001, "ParallelCriterion table backward error 3")
+end
+
+function nntest.MultiCriterion()
+ local input = torch.rand(2,10)
+ local target = torch.IntTensor{1,8}
+ local nll = nn.ClassNLLCriterion()
+ local nll2 = nn.CrossEntropyCriterion()
+ local mc = nn.MultiCriterion():add(nll, 0.5):add(nll2)
+ local output = mc:forward(input, target)
+ local output2 = nll:forward(input, target)/2 + nll2:forward(input, target)
+ mytester:assert(math.abs(output2 - output) < 0.00001, "MultiCriterion forward error")
+ local gradInput = mc:backward(input, target)
+ local gradInput2 = nll:backward(input, target):clone():div(2):add(nll2:backward(input, target))
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "MultiCriterion backward error ")
+
+ -- test type
+ mc:float()
+ gradInput = gradInput:clone()
+ local input3 = input:float()
+ local target3 = target:float()
+ local output3 = mc:forward(input3, target3)
+ local gradInput3 = mc:backward(input3, target3)
+ mytester:assert(math.abs(output3 - output) < 0.00001, "MultiCriterion forward error type")
+ mytester:assertTensorEq(gradInput:float(), gradInput3, 0.000001, "MultiCriterion backward error type")
+
+ -- test table input
+ mc:double()
+ local input = {torch.randn(2,10), {torch.randn(2,10), torch.randn(2,10)}}
+ local target = {torch.IntTensor{1,8}, {torch.IntTensor{5,6}, torch.IntTensor{4,3}}}
+ local pnllc = nn.ParallelCriterion():add(nll):add(nn.ParallelCriterion():add(nll:clone()):add(nll:clone()))
+ local pnllc2 = nn.ParallelCriterion():add(nll2):add(nn.ParallelCriterion():add(nll2:clone()):add(nll2:clone()))
+ local mc = nn.MultiCriterion():add(pnllc, 0.5):add(pnllc2)
+ local output = mc:forward(input, target)
+ local output2 = pnllc:forward(input, target)/2 + pnllc2:forward(input, target)
+ mytester:assert(math.abs(output2 - output) < 0.00001, "MultiCriterion forward table error")
+ local gradInput = mc:backward(input, target)
+ local gradInput2 = pnllc:clone():backward(input, target)
+ local gradInput2b = pnllc2:backward(input, target)
+ gradInput2[1]:div(2):add(gradInput2b[1])
+ gradInput2[2][1]:div(2):add(gradInput2b[2][1])
+ gradInput2[2][2]:div(2):add(gradInput2b[2][2])
+ mytester:assertTensorEq(gradInput[1], gradInput2[1], 0.000001, "MultiCriterion backward table 1 error ")
+ mytester:assertTensorEq(gradInput[2][1], gradInput2[2][1], 0.000001, "MultiCriterion backward table 2 error ")
+ mytester:assertTensorEq(gradInput[2][2], gradInput2[2][2], 0.000001, "MultiCriterion backward table 3 error ")
+end
+
+function nntest.WeightedMSECriterion()
+ local input = torch.rand(10)
+ local target = input:clone():add(torch.rand(10))
+ local cri = nn.WeightedMSECriterion(torch.rand(10))
+ criterionJacobianTest(cri, input, target)
+end
+
+function nntest.BCECriterion()
+ local eps = 1e-2
+ local input = torch.rand(10)*(1-eps) + eps/2
+ local target = torch.rand(10)*(1-eps) + eps/2
+ local cri = nn.BCECriterion()
+ criterionJacobianTest(cri, input, target)
+ --with weights
+ local weights= torch.rand(10)*(1-eps) + eps/2
+ local cri = nn.BCECriterion(weights)
+ criterionJacobianTest(cri, input, target)
+ -- with weights + batch
+ local bsz = 5
+ local input = torch.rand(bsz, 10)*(1-eps) + eps/2
+ local target = torch.rand(bsz, 10)*(1-eps) + eps/2
+ criterionJacobianTest(cri, input, target)
+end
+
+function nntest.DistKLDivCriterion()
+ local input = torch.rand(10)
+ local target = input:clone():add(torch.rand(10))
+ local cri = nn.DistKLDivCriterion(true) -- sizeAverage = true
+ criterionJacobianTest(cri, input, target)
+ cri = nn.DistKLDivCriterion(false) -- sizeAverage = false
+ criterionJacobianTest(cri, input, target)
+end
+
+function nntest.ClassNLLCriterion()
+ local batchsize = math.random(2,4)
+ local numLabels = math.random(5,10)
+
+ local function testclassnll(input, target)
+ -- default ClassNLLCriterion
+ local cri = nn.ClassNLLCriterion()
+ criterionJacobianTest(cri, input, target)
+
+ -- ClassNLLCriterion with weights
+ local weights = torch.rand(numLabels)
+ weights = weights / weights:sum()
+ cri = nn.ClassNLLCriterion(weights)
+ criterionJacobianTest(cri, input, target)
+ end
+
+ -- input/target: 1D/number
+ testclassnll(torch.rand(numLabels), math.random(1,numLabels))
+ -- input/target: 1D/1D
+ testclassnll(torch.rand(numLabels), torch.LongTensor(1):random(1, numLabels))
+ -- input/target: 2D/1D
+ testclassnll(torch.rand(batchsize, numLabels), torch.LongTensor(batchsize):random(1,numLabels))
+ -- test ignoreIndex
+ local ignoreIndex = -1
+ local cri = nn.ClassNLLCriterion(nil, nil, ignoreIndex)
+ local input = torch.randn(numLabels)
+ local target = ignoreIndex
+ mytester:assert(cri:forward(input, target) == 0)
+ mytester:assert(cri:backward(input, target):abs():sum() == 0)
+ local input = torch.randn(batchsize, numLabels)
+ local target = torch.LongTensor(batchsize):random(1,numLabels)
+ target[1] = ignoreIndex
+ local output = cri:forward(input, target)
+ local gradInput = cri:backward(input, target):clone()
+ mytester:assert(gradInput[1]:abs():sum() == 0)
+ local input, target = input:sub(2,batchsize), target:sub(2,batchsize)
+ local output2 = cri:forward(input, target)
+ mytester:assert(math.abs(output2 - output) < 0.0000001)
+ local gradInput2 = cri:backward(input, target)
+ mytester:assertTensorEq(gradInput2, gradInput:sub(2,batchsize), 0.0000001)
+end
+
+function nntest.SpatialClassNLLCriterion()
+ local numLabels = math.random(5,10)
+ local h = math.random(5, 20)
+ local w = math.random(5, 20)
+ local batchSize = math.random(1, 4)
+ local input = torch.rand(batchSize, numLabels, h, w)
+ local target = torch.Tensor(batchSize, h, w)
+ target:apply(function() return math.random(1, numLabels) end)
+
+ -- default ClassNLLCriterion
+ local cri = nn.SpatialClassNLLCriterion()
+ criterionJacobianTest(cri, input, target)
+
+ -- ClassNLLCriterion with weights
+ local weights = torch.rand(numLabels)
+ cri = nn.SpatialClassNLLCriterion(weights)
+ criterionJacobianTest(cri, input, target)
+
+ -- check with ClassNLLCriterion
+ local spatial = nn.SpatialClassNLLCriterion(weights)
+ local regular = nn.ClassNLLCriterion(weights)
+ local spatial_out = spatial:forward(input, target)
+ local regular_out = regular:forward(input:permute(1, 3, 4, 2):contiguous():view(-1, numLabels),
+ target:view(-1))
+ mytester:eq(spatial_out, regular_out, 1e-6,
+ "spatial and regular criterions give different results")
+end
+
+function nntest.MultiLabelSoftMarginCriterion()
+ -- test w/o weights
+
+ local cri = nn.MultiLabelSoftMarginCriterion()
+
+ -- stochastic
+ local numLabels = math.random(5, 10)
+ local input = torch.randn(numLabels)
+ local target = torch.round(torch.rand(numLabels))
+ criterionJacobianTest(cri, input, target)
+
+ -- batch
+ local numLabels = math.random(5, 10)
+ local bsz = math.random(3, 7)
+ local input = torch.randn(bsz, numLabels)
+ local target = torch.round(torch.rand(bsz, numLabels))
+ criterionJacobianTest(cri, input, target)
+
+ -- test weights
+
+ local numLabels = math.random(5, 10)
+ local weights = torch.randn(numLabels)
+ local cri = nn.MultiLabelSoftMarginCriterion(weights)
+
+ -- stochastic
+ local input = torch.randn(numLabels)
+ local target = torch.round(torch.rand(numLabels))
+ criterionJacobianTest(cri, input, target)
+
+ -- batch
+ local bsz = math.random(3, 7)
+ local input = torch.randn(bsz, numLabels)
+ local target = torch.round(torch.rand(bsz, numLabels))
+ criterionJacobianTest(cri, input, target)
+end
+
+function nntest.CrossEntropyCriterion()
+ -- stochastic
+ local numLabels = math.random(5, 10)
+ local input = torch.zeros(numLabels)
+ local target = torch.random(1, numLabels)
+
+ local cri = nn.CrossEntropyCriterion()
+ criterionJacobianTest(cri, input, target)
+
+ -- batch
+ local numLabels = math.random(5,10)
+ local bsz = math.random(3, 7)
+ local input = torch.zeros(bsz, numLabels)
+ local target = torch.Tensor(bsz):random(1, numLabels)
+
+ local cri = nn.CrossEntropyCriterion()
+ criterionJacobianTest(cri, input, target)
+
+ -- with weights
+ local weights = torch.rand(numLabels)
+ weights = weights / weights:sum()
+ cri = nn.CrossEntropyCriterion(weights)
+ criterionJacobianTest(cri, input, target)
+
+ -- verify nll.sizeAverage preservation
+ cri = nn.CrossEntropyCriterion(weights)
+ cri.nll.sizeAverage = false
+ criterionJacobianTest(cri, input, target)
+ mytester:eq(cri.nll.sizeAverage, false,
+ "ClassNLLCriterion.sizeAverage overwritten")
+
+ -- verify nll.sizeAverage propagation
+ cri = nn.CrossEntropyCriterion(weights)
+ cri.sizeAverage = false
+ criterionJacobianTest(cri, input, target)
+ mytester:eq(cri.nll.sizeAverage, false,
+ "ClassNLLCriterion.sizeAverage not propagated")
+end
+
+function nntest.LogSigmoid()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.LogSigmoid()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.LogSoftmax()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local input = torch.Tensor(ini,inj):zero()
+ local module = nn.LogSoftMax()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err, 1e-3, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test logsoftmax when gradOutput is non-contiguous
+ local layer = nn.LogSoftMax()
+ layer:zeroGradParameters()
+ local input = torch.randn(4, 10)
+ local data = torch.randn(4, 20)
+ local gradOutput = data:narrow(2, 1, 10):fill(0)
+ local output = layer:forward(input)
+ local gradInput1 = layer:backward(input, gradOutput):clone()
+ local output = layer:forward(input)
+ gradOutput = gradOutput:clone()
+ local gradInput2 = layer:backward(input, gradOutput):clone()
+
+ mytester:assertlt(gradInput1:add(-1, gradInput2):abs():max(),
+ 1e-10,
+ torch.typename(layer)
+ .. ' non-contiguous gradOutput check')
+
+
+
+
+end
+
+function nntest.SpatialLogSoftMax()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local inl = math.random(3,5)
+ local input = torch.Tensor(inl, ink, inj, ini):zero()
+ local module = nn.SpatialLogSoftMax()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,expprecision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+-- function nntest.TemporalLogSoftmax()
+-- local ini = math.random(10,20)
+-- local inj = math.random(10,20)
+-- local input = torch.Tensor(ini,inj):zero()
+-- local module = nn.TemporalLogSoftMax()
+
+-- local err = jac.testJacobian(module,input)
+-- mytester:assertlt(err,precision, 'error on state ')
+
+-- local ferr,berr = jac.testIO(module,input)
+-- mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+-- mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+-- end
+
+function nntest.Max()
+ -- 1D
+ local ini = math.random(3,7)
+ local input = torch.Tensor(ini):zero()
+ local module = nn.Max(1)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- negative dimension
+ local module = nn.Max(-1)
+ local input = torch.Tensor({1, 2, 3})
+ local expected = torch.Tensor({3})
+ local output = module:forward(input)
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+ -- batch
+ local module = nn.Max(1, 1)
+ local input = torch.Tensor({{1, 2, 3},{4, 5, 6}})
+ local expected = torch.Tensor({3, 6})
+ local output = module:forward(input)
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+
+ -- 3D
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj*ink):zero()
+ local module = nn.Max(1)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Min()
+ -- 1D
+ local ini = math.random(3,7)
+ local input = torch.Tensor(ini):zero()
+ local module = nn.Min(1)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- negative dimension
+ local module = nn.Min(-1)
+ local input = torch.Tensor({1, 2, 3})
+ local expected = torch.Tensor({1})
+ local output = module:forward(input)
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+ -- batch
+ local module = nn.Min(1, 1)
+ local input = torch.Tensor({{1, 2, 3},{4, 5, 6}})
+ local expected = torch.Tensor({1, 4})
+ local output = module:forward(input)
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+
+ -- 3D
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj*ink):zero()
+ local module = nn.Min(1)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Mean()
+ -- 1D
+ local ini = math.random(3,7)
+ local input = torch.Tensor(ini):zero()
+ local module = nn.Mean(1)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- negative dimension
+ local module = nn.Mean(-1)
+ local input = torch.Tensor({1, 2, 3})
+ local expected = torch.Tensor({2})
+ local output = module:forward(input)
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+ -- batch
+ local module = nn.Mean(1, 1)
+ local input = torch.Tensor({{1, 2, 3},{4, 5, 6}})
+ local expected = torch.Tensor({2, 5})
+ local output = module:forward(input)
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+
+ -- 3D
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Mean(torch.random(1,3))
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Mul()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Mul()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Sigmoid()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Sigmoid()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Softmax()
+ local ini = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, ini):zero()
+ local module = nn.SoftMax()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,expprecision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialSoftMax()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local inl = math.random(3,5)
+ local input = torch.Tensor(inl, ink, inj, ini):zero()
+ local module = nn.SpatialSoftMax()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,expprecision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Softmin()
+ local ini = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, ini):zero()
+ local module = nn.SoftMin()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,expprecision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Softsign()
+ local ini = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, ini):zero()
+ local module = nn.SoftSign()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SoftPlus()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.SoftPlus()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialSubtractiveNormalization_2dkernel()
+ local inputSize = math.random(6,9)
+ local kersize = 3
+ local nbfeatures = math.random(3,5)
+ local kernel = torch.Tensor(kersize,kersize):fill(1)
+ local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
+ local input = torch.rand(nbfeatures,inputSize,inputSize/2)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test batch mode
+ local output = module:forward(input):clone()
+ local gradOutput = output:clone():uniform(0,1)
+ local gradInput = module:backward(input, gradOutput):clone()
+ local batchSize = 4
+ local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
+ input2[2]:copy(input)
+
+ local output2 = module:forward(input2)
+ local gradOutput2 = output2:clone():uniform(0,1)
+ gradOutput2[2]:copy(gradOutput)
+ local gradInput2 = module:backward(input2, gradOutput2)
+
+ mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 2d forward batch err")
+ mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 2d backward batch err")
+
+ local err = jac.testJacobian(module,input2)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+end
+
+function nntest.SpatialSubtractiveNormalization_1dkernel()
+ local inputSize = math.random(6,9)
+ local kersize = 3
+ local nbfeatures = math.random(3,5)
+ local kernel = torch.Tensor(kersize):fill(1)
+ local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
+ local input = torch.rand(nbfeatures,inputSize,inputSize/2)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test batch mode
+ local output = module:forward(input):clone()
+ local gradOutput = output:clone():uniform(0,1)
+ local gradInput = module:backward(input, gradOutput):clone()
+ local batchSize = 4
+ local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
+ input2[2]:copy(input)
+
+ local output2 = module:forward(input2)
+ local gradOutput2 = output2:clone():uniform(0,1)
+ gradOutput2[2]:copy(gradOutput)
+ local gradInput2 = module:backward(input2, gradOutput2)
+
+ mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 1d forward batch err")
+ mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 1d backward batch err")
+
+ local err = jac.testJacobian(module,input2)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialDivisiveNormalization_2dkernel()
+ local inputSize = math.random(6,9)
+ local kersize = 3
+ local nbfeatures = math.random(3,5)
+ local kernel = torch.Tensor(kersize,kersize):fill(1)
+ local module = nn.SpatialDivisiveNormalization(nbfeatures,kernel)
+ local input = torch.rand(nbfeatures,inputSize,inputSize/2)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test batch mode
+ local output = module:forward(input):clone()
+ local gradOutput = output:clone():uniform(0,1)
+ local gradInput = module:backward(input, gradOutput):clone()
+ local batchSize = 4
+ local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
+ input2[2]:copy(input)
+
+ local output2 = module:forward(input2)
+ local gradOutput2 = output2:clone():uniform(0,1)
+ gradOutput2[2]:copy(gradOutput)
+ local gradInput2 = module:backward(input2, gradOutput2)
+
+ mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 2d forward batch err")
+ mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 2d backward batch err")
+
+ local err = jac.testJacobian(module,input2)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialDivisiveNormalization_1dkernel()
+ local inputSize = math.random(6,9)
+ local kersize = 3
+ local nbfeatures = math.random(3,5)
+ local kernel = torch.Tensor(kersize):fill(1)
+ local module = nn.SpatialDivisiveNormalization(nbfeatures,kernel)
+ local input = torch.rand(nbfeatures,inputSize,inputSize/2)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test batch mode
+ local output = module:forward(input):clone()
+ local gradOutput = output:clone():uniform(0,1)
+ local gradInput = module:backward(input, gradOutput):clone()
+ local batchSize = 4
+ local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
+ input2[2]:copy(input)
+
+ local output2 = module:forward(input2)
+ local gradOutput2 = output2:clone():uniform(0,1)
+ gradOutput2[2]:copy(gradOutput)
+ local gradInput2 = module:backward(input2, gradOutput2)
+
+ mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 1d forward batch err")
+ mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 1d backward batch err")
+
+ local err = jac.testJacobian(module,input2)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialContrastiveNormalization()
+ local inputSize = math.random(6,9)
+ local kersize = 3
+ local nbfeatures = math.random(3,5)
+ local kernel = torch.Tensor(kersize,kersize):fill(1)
+ local module = nn.SpatialContrastiveNormalization(nbfeatures,kernel)
+ local input = torch.rand(nbfeatures,inputSize,inputSize/2)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test batch mode and type
+ local output = module:forward(input):clone()
+ local gradOutput = output:clone():uniform(0,1)
+ local gradInput = module:backward(input, gradOutput):clone()
+ local batchSize = 4
+ local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2):float()
+ input2[2]:copy(input)
+
+ module:float() -- type-cast
+ local output2 = module:forward(input2)
+ local gradOutput2 = output2:clone():uniform(0,1)
+ gradOutput2[2]:copy(gradOutput)
+ local gradInput2 = module:backward(input2, gradOutput2)
+
+ mytester:assertTensorEq(output2[2], output:float(), 0.000002, "SpatialContrastiveNormalization 2d forward batch err")
+ mytester:assertTensorEq(gradOutput2[2], gradOutput:float(), 0.000002, "SpatialContrastiveNormalization 2d backward batch err")
+
+ module:double()
+ input2 = input2:double()
+ local err = jac.testJacobian(module,input2)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialCrossMapLRN()
+ local inputSize = math.random(6,9)
+ local size = math.random(1,3)*2+1
+ local nbfeatures = math.random(3,8)
+ local alpha = math.random(1,100)/100
+ local beta = math.random(0,100)/100
+ local k = math.random(1,3)
+ local module = nn.SpatialCrossMapLRN(size, alpha, beta, k)
+ local input = torch.rand(nbfeatures,inputSize,inputSize)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- test batch mode and type
+ local output = module:forward(input):clone()
+ local gradOutput = output:clone():uniform(0,1)
+ local gradInput = module:backward(input, gradOutput):clone()
+ local batchSize = 4
+ local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize):float()
+ input2[2]:copy(input)
+
+ module:float() -- type-cast
+ local output2 = module:forward(input2)
+ local gradOutput2 = output2:clone():uniform(0,1)
+ gradOutput2[2]:copy(gradOutput)
+ local gradInput2 = module:backward(input2, gradOutput2)
+
+ mytester:assertTensorEq(output2[2], output:float(), 0.000001, "SpatialCrossMapLRN 2d forward batch err")
+ mytester:assertTensorEq(gradOutput2[2], gradOutput:float(), 0.000001, "SpatialCrossMapLRN 2d backward batch err")
+
+ module:double()
+ input2 = input2:double()
+ local err = jac.testJacobian(module,input2)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input2)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+
+function nntest.SpatialConvolution()
+ local from = math.random(1,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(5,7)
+ local outj = math.random(5,7)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ local function jacTests(module)
+ -- stochastic
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+ end
+
+ nn.hessian.enable()
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ if module.bias then
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diag HessianBias')
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+ end
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,5)
+ outi = math.random(4,8)
+ outj = math.random(4,8)
+ ini = (outi-1)*si+ki
+ inj = (outj-1)*sj+kj
+ module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+ end
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ if module.bias then
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diag HessianBias')
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+ end
+
+ jacTests(module)
+ module:noBias()
+ jacTests(module)
+ module.bias = torch.Tensor(module.nOutputPlane):zero()
+ module.gradBias = torch.Tensor(module.nOutputPlane):zero()
+ module:reset()
+ jacTests(module)
+
+ local output = module:forward(input):clone()
+ local gradOutput = output:clone():normal()
+ local gradInput = module:forward(input, gradOutput):clone()
+ local bigWeight = module.weight.new(module.weight:nElement() * 4):fill(0/0) -- fill with nans
+ local newWeight = bigWeight:narrow(1, module.weight:nElement() * 3, module.weight:nElement())
+ newWeight = newWeight:viewAs(module.weight):copy(module.weight)
+ module.weight = newWeight
+ local newOutput = module:forward(input)
+ local newGradInput = module:forward(input, gradOutput)
+ mytester:asserteq((newOutput - output):abs():max(), 0,
+ torch.typename(module) .. ' forward failure case in a getParameters setting ')
+ mytester:asserteq((newGradInput - gradInput):abs():max(), 0,
+ torch.typename(module) .. ' backward failure case in a getParameters setting ')
+
+end
+
+function nntest.SpatialConvolutionMM()
+ local from = math.random(2,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local di = math.random(1,4)
+ local dj = math.random(1,4)
+ local padW = math.random(0,2)
+ local padH = math.random(0,2)
+ local outi = math.random(5,9)
+ local outj = math.random(5,9)
+ local ini = (outi-1)*di+ki-padW*2
+ local inj = (outj-1)*dj+kj-padH*2
+ local module = nn.SpatialConvolutionMM(from, to, ki, kj, di, dj, padW, padH)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ -- stochastic
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,5)
+
+ module = nn.SpatialConvolutionMM(from, to, ki, kj, di, dj, padW, padH)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- non-contiguous
+ local input = torch.randn(batch,from,ini,inj):transpose(3,4) -- non-contiguous
+ local inputc = input:contiguous() -- contiguous
+ local output = module:forward(input):clone()
+ local outputc = module:forward(inputc):clone()
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+ local gradInput = module:backward(input, output):clone()
+ local gradInputc = module:backward(inputc, outputc):clone()
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+end
+
+function nntest.SpatialConvolutionLocal()
+ local from = math.random(1,4)
+ local to = math.random(1,4)
+ local ki = math.random(1,3)
+ local kj = math.random(1,3)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local outi = math.random(5,6)
+ local outj = math.random(5,6)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.SpatialConvolutionLocal(from, to, ini, inj, ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ -- stochastic
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ nn.hessian.enable()
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diag HessianBias')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,5)
+ outi = math.random(4,6)
+ outj = math.random(4,6)
+ ini = (outi-1)*si+ki
+ inj = (outj-1)*sj+kj
+ module = nn.SpatialConvolutionLocal(from, to, ini, inj, ki, kj, si, sj)
+ input = torch.Tensor(batch, from, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diag HessianBias')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- check against nn.SpatialConvolution
+ local conv = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+ torch.repeatTensor(module.bias, conv.bias:view(to, 1, 1), 1, outj, outi)
+ torch.repeatTensor(module.weight, conv.weight:view(1, 1, from, to, ki, kj), outi, outj, 1, 1, 1, 1)
+ local input = torch.rand(batch, from, inj, ini)
+ local output = module:forward(input)
+ local outputConv = conv:forward(input)
+ local err = torch.dist(output, outputConv)
+ mytester:assertlt(err, precision, 'error checking against nn.SpatialConvolution')
+end
+
+function nntest.SpatialFullConvolution()
+ local from = math.random(2,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local di = math.random(1,4)
+ local dj = math.random(1,4)
+ local padW = math.random(0,2)
+ local padH = math.random(0,2)
+ local outi = math.random(5,9)
+ local outj = math.random(5,9)
+ local adjW = (outi + padW*2 - ki) % di
+ local adjH = (outj + padH*2 - kj) % dj
+ local ini = math.floor((outi + padW*2 - ki)/di + 1)
+ local inj = math.floor((outj + padH*2 - kj)/dj + 1)
+ local module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ local function jacTests(module)
+ -- stochastic
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+ end
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,5)
+
+ module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+ -- Check that the required output size matches the actual output size
+ local output = module:forward(input)
+ mytester:asserteq(output:size(3), outj, 'output height error')
+ mytester:asserteq(output:size(4), outi, 'output width error')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+ end
+
+ jacTests(module)
+ module:noBias()
+ jacTests(module)
+ module.bias = torch.Tensor(module.nOutputPlane):zero()
+ module.gradBias = torch.Tensor(module.nOutputPlane):zero()
+ module:reset()
+ jacTests(module)
+
+ -- non-contiguous
+ local batch = math.random(2,5)
+ local input = torch.randn(batch,from,ini,inj):transpose(3,4) -- non-contiguous
+ local inputc = input:contiguous() -- contiguous
+ local output = module:forward(input)
+ local outputc = module:forward(inputc)
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+ local gradInput = module:backward(input, output)
+ local gradInputc = module:backward(inputc, outputc)
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+end
+
+function nntest.SpatialFullConvolutionDualInput()
+ local from = math.random(2,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local di = math.random(1,4)
+ local dj = math.random(1,4)
+ local padW = math.random(0,2)
+ local padH = math.random(0,2)
+ local outi = math.random(5,9)
+ local outj = math.random(5,9)
+ local ini = math.floor((outi + padW*2 - ki)/di + 1)
+ local inj = math.floor((outj + padH*2 - kj)/dj + 1)
+ local adjW = (outi + 2 * padW - ki) % di
+ local adjH = (outj + 2 * padH - kj) % dj
+ local targetTensor = torch.Tensor(outj, outi):zero()
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ local module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH)
+ local moduleRef = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH)
+ moduleRef.weight:copy(module.weight)
+ moduleRef.bias:copy(module.bias)
+
+ -- Check that the required output size matches the actual output size
+ -- when using the dual input mode
+ local output = module:forward({input, targetTensor})
+ mytester:asserteq(output:size(2), outj, 'output height error')
+ mytester:asserteq(output:size(3), outi, 'output width error')
+
+ -- Check that backward and forward match the reference module
+ local outputRef = moduleRef:forward(input)
+ mytester:asserteq(0, (output-outputRef):abs():max(), torch.typename(module) .. ' - output err ')
+ local gradOutput = outputRef:clone():uniform()
+ local gradInputRef = moduleRef:backward(input, gradOutput)
+ local gradInput = module:backward({input, targetTensor}, gradOutput)
+ mytester:asserteq(0, (gradInput[1]-gradInputRef):abs():max(), torch.typename(module) .. ' - gradInput[1] err ')
+
+ -- Check that gradInput[2] is the singleton tensor {0}
+ mytester:asserteq(gradInput[2]:storage():size(), 1, torch.typename(module) .. ' - gradInput[2] size err ')
+ mytester:asserteq(gradInput[2]:storage()[1], 0, torch.typename(module) .. ' - gradInput[2] value err ')
+end
+
+function nntest.SpatialDilatedConvolution()
+ local from = math.random(1,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local di = math.random(1,4)
+ local dj = math.random(1,4)
+ local padW = math.random(0,2)
+ local padH = math.random(0,2)
+ local outi = math.random(5,9)
+ local outj = math.random(5,9)
+ local dilationW = math.random(1,10)
+ local dilationH = math.random(1,10)
+ local ini = (outi - 1) * di - 2 * padW + dilationW * (ki-1) + 1
+ local inj = (outj - 1) * dj - 2 * padH + dilationH * (kj-1) + 1
+
+ local module = nn.SpatialDilatedConvolution(from, to, ki, kj, di, dj, padW, padH, dilationW, dilationH)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ -- stochastic
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,5)
+
+ module = nn.SpatialDilatedConvolution(from, to, ki, kj, di, dj, padW, padH, dilationW, dilationH)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+ -- Check that the required output size matches the actual output size
+ local output = module:forward(input)
+ mytester:asserteq(output:size(3), outj, 'output height error')
+ mytester:asserteq(output:size(4), outi, 'output width error')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- non-contiguous
+ local input = torch.randn(batch,from,ini,inj):transpose(3,4) -- non-contiguous
+ local inputc = input:contiguous() -- contiguous
+ local output = module:forward(input)
+ local outputc = module:forward(inputc)
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+ local gradInput = module:backward(input, output)
+ local gradInputc = module:backward(inputc, outputc)
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+end
+
+function nntest.SpatialConvolutionMap()
+ local from = math.random(1,5)
+ local fanin = math.random(1, from)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local outi = math.random(5,9)
+ local outj = math.random(5,9)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+
+ local module = nn.SpatialConvolutionMap(nn.tables.random(from, to, fanin), ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ nn.hessian.enable()
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diag HessianBias')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,6)
+ module = nn.SpatialConvolutionMap(nn.tables.random(from, to, fanin), ki, kj, si, sj)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diag HessianBias')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialFullConvolutionMap()
+ local from = math.random(2,4)
+ local to = math.random(2,5)
+ local fanin = math.random(1, from)
+ local tt = nn.tables.random(from, to, fanin)
+ local ki = math.random(2,5)
+ local kj = math.random(2,5)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local ini = math.random(5,7)
+ local inj = math.random(5,7)
+ local module = nn.SpatialFullConvolutionMap(tt, ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ -- stochastic
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ nn.hessian.enable()
+
+ local err = jac.testDiagHessianInput(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianInput')
+
+ local err = jac.testDiagHessianWeight(module, input)
+ mytester:assertlt(err , precision, 'error on diagHessianWeight')
+
+ local err = jac.testDiagHessianBias(module, input)
+ mytester:assertlt(err , precision, 'error on diag HessianBias')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialFullConvolutionCompare()
+ local from = math.random(2,4)
+ local to = math.random(2,5)
+ local tt = nn.tables.full(from, to)
+ local ki = math.random(2,5)
+ local kj = math.random(2,5)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local ini = math.random(7,8)
+ local inj = math.random(7,8)
+ local module1 = nn.SpatialFullConvolutionMap(tt, ki, kj, si, sj)
+ local module2 = nn.SpatialFullConvolution(from, to, ki, kj, si, sj)
+ local input = torch.rand(from, inj, ini)
+ for k=1,tt:size(1) do
+ module1.weight[k]:copy(module2.weight[tt[k][1]][tt[k][2]])
+ module1.bias:copy(module2.bias)
+ end
+
+ local o1 = module1:updateOutput(input)
+ local o2 = module2:updateOutput(input)
+ mytester:assertlt(o1:dist(o2), precision, 'error on output')
+
+ local go1 = torch.rand(o1:size())
+ local go2 = go1:clone()
+
+ local gi1= module1:updateGradInput(input,go1)
+ local gi2 = module2:updateGradInput(input,go2)
+ mytester:assertlt(gi1:dist(gi2), precision, 'error on gradInput')
+
+ module1:zeroGradParameters()
+ module2:zeroGradParameters()
+
+ module1:accGradParameters(input,go1)
+ module2:accGradParameters(input,go2)
+ for k=1,tt:size(1) do
+ mytester:assertlt(module1.gradWeight[k]:dist(module2.gradWeight[tt[k][1]][tt[k][2]]),precision,'error on gradWeight ' .. k)
+ end
+ mytester:assertlt(module1.gradBias:dist(module2.gradBias),precision,'error on gradBias ')
+end
+
+local function batchcompare(smod, sin, plist)
+ local bs = torch.LongStorage(sin:dim()+1)
+ bs[1] = 1
+ for i=1,sin:dim() do bs[i+1] = sin:size()[i] end
+ local bin = torch.Tensor(bs):copy(sin)
+ local bmod = smod:clone()
+
+ local sout = smod:forward(sin):clone()
+ local bout = bmod:forward(bin):clone()
+
+ local sgout = torch.randn(sout:size())
+ local bgout = torch.Tensor(bout:size())
+ bgout:copy(sgout)
+
+ local sgin = smod:backward(sin, sgout)
+ local bgin = bmod:backward(bin, bgout)
+
+ smod:accGradParameters(sin, sgout, 1)
+ bmod:accGradParameters(bin, bgout, 1)
+
+ mytester:assertTensorEq(sout,bout:select(1,1), 1e-8, 'batchcompare error on output')
+ mytester:assertTensorEq(sgin,bgin:select(1,1), 1e-8, 'batchcompare error on gradInput')
+
+ for i,v in pairs(plist) do
+ mytester:assertTensorEq(smod[v],bmod[v], 1e-8, 'batchcompare error on ' .. v)
+ end
+end
+
+function nntest.SpatialConvolutionBatchCompare()
+ local from = math.random(1,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(5,9)
+ local outj = math.random(5,9)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+
+ local module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+ module:zeroGradParameters()
+ local input = torch.randn(from,inj,ini)
+
+ batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.SpatialFullConvolutionBatchCompare()
+ local from = math.random(1,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local ini = math.random(5,9)
+ local inj = math.random(5,9)
+
+ local module = nn.SpatialFullConvolution(from, to, ki, kj, si, sj)
+ module:zeroGradParameters()
+ local input = torch.randn(from, inj, ini)
+
+ batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+
+
+function nntest.SpatialSubSamplingBatchCompare()
+ local from = math.random(1,6)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(6,10)
+ local outj = math.random(6,10)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+ module:zeroGradParameters()
+ local input = torch.randn(from,inj,ini)--torch.Tensor(from, inj, ini):zero()
+
+ batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.SpatialSubSampling()
+ local from = math.random(1,6)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(6,10)
+ local outj = math.random(6,10)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local batch = math.random(2,5)
+ outi = math.random(4,8)
+ outj = math.random(4,8)
+ ini = (outi-1)*si+ki
+ inj = (outj-1)*sj+kj
+ module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.SpatialMaxPooling()
+ for _,ceil_mode in pairs({true,false}) do
+ local from = math.random(1,5)
+ local ki = math.random(1,4)
+ local kj = math.random(1,4)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local outi = math.random(4,5)
+ local outj = math.random(4,5)
+ local padW = math.min(math.random(0,1),math.floor(ki/2))
+ local padH = math.min(math.random(0,1),math.floor(kj/2))
+ local ini = (outi-1)*si+ki-2*padW
+ local inj = (outj-1)*sj+kj-2*padH
+
+ local ceil_string = ceil_mode and 'ceil' or 'floor'
+ local module = nn.SpatialMaxPooling(ki,kj,si,sj,padW,padH)
+ if ceil_mode then module:ceil() else module:floor() end
+ local input = torch.rand(from,inj,ini)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ local nbatch = math.random(2,5)
+ input = torch.rand(nbatch,from,inj,ini)
+ module = nn.SpatialMaxPooling(ki,kj,si,sj,padW,padH)
+ if ceil_mode then module:ceil() else module:floor() end
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision)
+ end
+end
+
+function nntest.SpatialMaxUnpooling()
+ for _,ceil_mode in pairs({true,false}) do
+ local from = math.random(1,5)
+ local ki = math.random(2,4)
+ local kj = math.random(2,4)
+ local si, sj = ki, kj
+ local outi = math.random(4,5)
+ local outj = math.random(4,5)
+ local padW = math.min(math.random(0,1),math.floor(ki/2))
+ local padH = math.min(math.random(0,1),math.floor(kj/2))
+ local ini = (outi-1)*si+ki-2*padW
+ local inj = (outj-1)*sj+kj-2*padH
+
+ local ceil_string = ceil_mode and 'ceil' or 'floor'
+ local poolingModule = nn.SpatialMaxPooling(ki,kj,si,sj,padW,padH)
+ if ceil_mode then poolingModule:ceil() else poolingModule:floor() end
+ local module = nn.SpatialMaxUnpooling(poolingModule)
+
+ local original = torch.rand(from,inj,ini)
+ local input = poolingModule:forward(original)
+ local output = module:forward(input)
+
+ mytester:assert(output:isSameSizeAs(original),'SpatialMaxUnpooling output size err')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ local nbatch = math.random(2,5)
+ original = torch.rand(nbatch,from,inj,ini)
+ input = poolingModule:forward(original)
+ output = module:forward(input)
+
+ mytester:assert(output:isSameSizeAs(original),'SpatialMaxUnpooling batch output size err')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision)
+ end
+end
+
+function nntest.SpatialDilatedMaxPooling()
+ for _,ceil_mode in pairs({true,false}) do
+ local from = math.random(1,5)
+ local ki = math.random(1,4)
+ local kj = math.random(1,4)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local outi = math.random(4,5)
+ local outj = math.random(4,5)
+ local padW = math.min(math.random(0,1),math.floor(ki/2))
+ local padH = math.min(math.random(0,1),math.floor(kj/2))
+ local dilationW = math.random(1,5)
+ local dilationH = math.random(1,5)
+ local ini = (outi-1)*si+(dilationW*(ki-1)+1)-2*padW
+ local inj = (outj-1)*sj+(dilationH*(kj-1)+1)-2*padH
+
+ local ceil_string = ceil_mode and 'ceil' or 'floor'
+ local module = nn.SpatialDilatedMaxPooling(ki,kj,si,sj,padW,padH,dilationW, dilationH)
+ if ceil_mode then module:ceil() else module:floor() end
+ local input = torch.rand(from,inj,ini)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+
+ -- batch
+ local nbatch = math.random(2,5)
+ input = torch.rand(nbatch,from,inj,ini)
+ module = nn.SpatialDilatedMaxPooling(ki,kj,si,sj,padW,padH,dilationW,dilationH)
+ if ceil_mode then module:ceil() else module:floor() end
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
+ end
+end
+
+function nntest.SpatialFractionalMaxPooling()
+ local batch = math.random(1, 3)
+ local plane = math.random(1, 3)
+ local outW = math.random(1, 7)
+ local outH = math.random(1, 7)
+ local poolSizeW = math.random(2, 4)
+ local poolSizeH = math.random(2, 4)
+
+ local minInW = outW + poolSizeW
+ local minInH = outH + poolSizeH
+
+ local inW = math.random(minInW, minInW + 6)
+ local inH = math.random(minInH, minInH + 6)
+
+ -- fix the pooling regions so they aren't regenerated with every
+ -- forward(), so testJacobian can work properly
+ local module =
+ nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH, outW, outH)
+ :fixPoolingRegions()
+ local input = nil
+ if batch == 1 then
+ input = torch.Tensor(plane, inH, inW):zero()
+ else
+ input = torch.Tensor(batch, plane, inH, inW):zero()
+ end
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state')
+end
+
+function nntest.SpatialFractionalMaxPooling_Ratio()
+ -- Fix a reduction ratio, and test with two different input sizes
+ local reductionRatioW = torch.uniform(0.4, 0.74)
+ local reductionRatioH = torch.uniform(0.4, 0.74)
+
+ for tries = 1, 2 do
+ local batch = math.random(1, 3)
+ local plane = math.random(1, 3)
+ local poolSizeW = math.random(2, 3)
+ local poolSizeH = math.random(2, 3)
+
+ local minInW = math.random(5, 8) + poolSizeW
+ local minInH = math.random(5, 8) + poolSizeH
+
+ local inW = math.random(minInW, minInW + 6)
+ local inH = math.random(minInH, minInH + 6)
+
+ -- fix the pooling regions so they aren't regenerated with every
+ -- forward(), so testJacobian can work properly
+ local module =
+ nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH,
+ reductionRatioW, reductionRatioH)
+ :fixPoolingRegions()
+ local input = nil
+ if batch == 1 then
+ input = torch.Tensor(plane, inH, inW):zero()
+ else
+ input = torch.Tensor(batch, plane, inH, inW):zero()
+ end
+
+ -- Make sure that the output size is based on our ratio
+ local output = module:updateOutput(input)
+ if batch == 1 then
+ mytester:asserteq(output:size(3), math.floor(reductionRatioW * inW))
+ mytester:asserteq(output:size(2), math.floor(reductionRatioH * inH))
+ else
+ mytester:asserteq(output:size(4), math.floor(reductionRatioW * inW))
+ mytester:asserteq(output:size(3), math.floor(reductionRatioH * inH))
+ end
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state')
+ end
+end
+
+function nntest.SpatialAveragePooling()
+ for _,count_include_pad in pairs({true,false}) do
+ for _,ceil_mode in pairs({true,false}) do
+ local from = math.random(1,5)
+ local ki = math.random(1,4)
+ local kj = math.random(1,4)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local outi = math.random(4,5)
+ local outj = math.random(4,5)
+ local padW = math.min(math.random(0,1),math.floor(ki/2))
+ local padH = math.min(math.random(0,1),math.floor(kj/2))
+ local ini = (outi-1)*si+ki-2*padW
+ local inj = (outj-1)*sj+kj-2*padH
+
+ local mode_string = ceil_mode and 'ceil' or 'floor'
+
+ local module = nn.SpatialAveragePooling(ki, kj, si, sj, padW, padH)
+ if ceil_mode then module:ceil() else module:floor() end
+ if count_include_pad then
+ module:setCountIncludePad()
+ mode_string = mode_string .. ' - count include padding'
+ else
+ module:setCountExcludePad()
+ mode_string = mode_string .. ' - count exclude padding'
+ end
+ local input = torch.Tensor(from, inj, ini):uniform()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error'..mode_string..' on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ local batch = math.random(2,5)
+ outi = math.random(4,5)
+ outj = math.random(4,5)
+ local padW = math.min(math.random(0,1),math.floor(ki/2))
+ local padH = math.min(math.random(0,1),math.floor(kj/2))
+ local ini = (outi-1)*si+ki-2*padW
+ local inj = (outj-1)*sj+kj-2*padH
+
+ module = nn.SpatialAveragePooling(ki, kj, si, sj, padW, padH)
+ if ceil_mode then module:ceil() else module:floor() end
+ if count_include_pad then
+ module:setCountIncludePad()
+ else
+ module:setCountExcludePad()
+ end
+ input = torch.Tensor(batch,from,inj,ini):uniform()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error'..mode_string..' on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision)
+
+ end
+ end
+ -- test against SpatialSubSampling
+ local from = math.random(1,6)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(6,10)
+ local outj = math.random(6,10)
+ local padW = 0
+ local padH = 0
+ local ini = (outi-1)*si+ki-2*padW
+ local inj = (outj-1)*sj+kj-2*padH
+
+ local module = nn.SpatialAveragePooling(ki, kj, si, sj, padW, padH)
+ local sap = nn.SpatialSubSampling(from, ki, kj, si, sj)
+ sap.weight:fill(1.0/(ki*kj))
+ sap.bias:fill(0.0)
+
+ local input = torch.Tensor(from, inj, ini):uniform()
+
+ local output = module:forward(input)
+ local gradInput = module:backward(input, output)
+ local output2 = sap:forward(input)
+ local gradInput2 = sap:updateGradInput(input, output)
+
+ mytester:assertTensorEq(output, output2, 0.000001, torch.typename(module) .. ' forward err ')
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, torch.typename(module) .. ' backward err ')
+
+ -- test against SpatialSubSampling, batch mode
+ local batch = math.random(2,5)
+ outi = math.random(4,8)
+ outj = math.random(4,8)
+ local padW = 0
+ local padH = 0
+ local ini = (outi-1)*si+ki-2*padW
+ local inj = (outj-1)*sj+kj-2*padH
+
+ module = nn.SpatialAveragePooling(ki, kj, si, sj, padW, padH)
+ input = torch.Tensor(batch,from,inj,ini):uniform()
+
+ local sap = nn.SpatialSubSampling(from, ki, kj, si, sj)
+ sap.weight:fill(1.0/(ki*kj))
+ sap.bias:fill(0.0)
+
+ local output = module:forward(input)
+ local gradInput = module:backward(input, output)
+ local output2 = sap:forward(input)
+ local gradInput2 = sap:updateGradInput(input, output)
+
+ mytester:assertTensorEq(output, output2, 0.000001, torch.typename(module) .. ' forward err (Batch) ')
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, torch.typename(module) .. ' backward err (Batch) ')
+
+end
+
+function nntest.SpatialAdaptiveMaxPooling()
+ local from = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local ini = math.random(1,16)
+ local inj = math.random(1,16)
+
+ local module = nn.SpatialAdaptiveMaxPooling(ki,kj)
+ local input = torch.rand(from,ini,inj)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ local nbatch = math.random(1,3)
+ input = torch.rand(nbatch,from,ini,inj)
+ module = nn.SpatialAdaptiveMaxPooling(ki,kj)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state (Batch) ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision)
+
+ -- non-contiguous
+
+ input = torch.rand(from,ini,inj):transpose(2,3)
+ module = nn.SpatialAdaptiveMaxPooling(ki,kj)
+ local inputc = input:contiguous() -- contiguous
+ local output = module:forward(input):clone()
+ local outputc = module:forward(inputc):clone()
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ')
+ local gradInput = module:backward(input, output):clone()
+ local gradInputc = module:backward(inputc, outputc):clone()
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ')
+
+ -- non-contiguous batch
+ local nbatch = math.random(1,3)
+ input = torch.rand(nbatch,from,ini,inj):transpose(1,3):transpose(2,4)
+ local inputc = input:contiguous() -- contiguous
+ module = nn.SpatialAdaptiveMaxPooling(ki,kj)
+
+ local output = module:forward(input):clone()
+ local outputc = module:forward(inputc):clone()
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ')
+ local gradInput = module:backward(input, output):clone()
+ local gradInputc = module:backward(inputc, outputc):clone()
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ')
+
+end
+
+function nntest.SpatialAdaptiveAveragePooling()
+ local from = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local ini = math.random(1,16)
+ local inj = math.random(1,16)
+
+ local module = nn.SpatialAdaptiveAveragePooling(ki,kj)
+ local input = torch.rand(from,ini,inj)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ local nbatch = math.random(1,3)
+ input = torch.rand(nbatch,from,ini,inj)
+ module = nn.SpatialAdaptiveAveragePooling(ki,kj)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state (Batch) ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision)
+
+ -- non-contiguous
+
+ input = torch.rand(from,ini,inj):transpose(2,3)
+ module = nn.SpatialAdaptiveAveragePooling(ki,kj)
+ local inputc = input:contiguous() -- contiguous
+ local output = module:forward(input):clone()
+ local outputc = module:forward(inputc):clone()
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ')
+ local gradInput = module:backward(input, output):clone()
+ local gradInputc = module:backward(inputc, outputc):clone()
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ')
+
+ -- non-contiguous batch
+ local nbatch = math.random(1,3)
+ input = torch.rand(nbatch,from,ini,inj):transpose(1,3):transpose(2,4)
+ local inputc = input:contiguous() -- contiguous
+ module = nn.SpatialAdaptiveAveragePooling(ki,kj)
+
+ local output = module:forward(input):clone()
+ local outputc = module:forward(inputc):clone()
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ')
+ local gradInput = module:backward(input, output):clone()
+ local gradInputc = module:backward(inputc, outputc):clone()
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ')
+
+end
+
+function nntest.SpatialLPPooling()
+ local fanin = math.random(1,4)
+ local osizex = math.random(1,4)
+ local osizey = math.random(1,4)
+ local p = 2
+ local mx = math.random(2,6)
+ local my = math.random(2,6)
+ local dx = math.random(2,mx)
+ local dy = math.random(2,my)
+ local sizex = osizex*mx
+ local sizey = osizey*my
+ local module = nn.SpatialLPPooling(fanin,p,mx,my,dx,dy)
+ local input = torch.rand(fanin,sizey,sizex)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Sum()
+ -- 1D
+ local ini = math.random(3,7)
+ local input = torch.Tensor(ini):zero()
+ local module = nn.Sum(1)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- negative dimension
+ local module = nn.Sum(-1)
+ local input = torch.Tensor({1, 2, 3})
+ local expected = torch.Tensor({6})
+ local output = module:forward(input)
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+
+ -- batch
+ local dimension = 1
+ local module = nn.Sum(dimension, 1)
+ local input = torch.Tensor({{1, 2, 3},{4, 5, 6}})
+ local expected = torch.Tensor({6, 15})
+ local output = module:forward(input)
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- mean + batch
+ local dimension = 1
+ local module = nn.Sum(dimension, 1, true)
+ local input = torch.Tensor({{1, 2, 3},{4, 5, 6}})
+ local expected = input:mean(dimension + 1)
+ local output = module:forward(input)
+
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- squeeze
+ local dimension = 1
+ local module = nn.Sum(dimension, nil, nil, false)
+ local input = torch.Tensor({{1, 2, 3},{4, 5, 6}})
+ local expected = torch.Tensor({5, 7, 9}):view(1, 3)
+ local output = module:forward(input)
+
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+ mytester:assert(output:isSameSizeAs(expected), 'sizes mismatch')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- squeeze + batch
+ local dimension = 1
+ local module = nn.Sum(dimension, 1, nil, false)
+ local input = torch.Tensor({{1, 2, 3},{4, 5, 6}})
+ local expected = torch.Tensor({6, 15}):view(2, 1)
+ local output = module:forward(input)
+
+ mytester:assertlt(torch.norm(output-expected), precision, 'error on forward ')
+ mytester:assert(output:isSameSizeAs(expected), 'sizes mismatch')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- 3D
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Sum(torch.random(1,3))
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Tanh()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Tanh()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision , 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.TemporalConvolution()
+ -- 1D
+ local from = math.random(1,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local si = math.random(1,4)
+ local outi = math.random(5,7)
+ local ini = (outi-1)*si+ki
+ local module = nn.TemporalConvolution(from, to, ki,si)
+ local input = torch.Tensor(ini, from):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update]')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update]')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- 2D
+ local nBatchFrame = 4
+ local input = torch.Tensor(nBatchFrame, ini, from):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update]')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update]')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- 2D matches 1D
+ local output = module:forward(input):clone()
+ local outputGrad = torch.randn(output:size())
+ local inputGrad = module:backward(input, outputGrad):clone()
+
+ local input1D = input:select(1, 2)
+ local output1D = module:forward(input1D)
+ local outputGrad1D = outputGrad:select(1, 2)
+ local inputGrad1D = module:backward(input1D, outputGrad1D)
+
+ mytester:assertTensorEq(output:select(1,2), output1D, 0.000001, 'error on 2D vs 1D forward)')
+ mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001, 'error on 2D vs 1D backward)')
+end
+
+function nntest.TemporalDynamicKMaxPooling()
+ local features = math.random(5,10)
+ local seqLen = math.random(6,9)
+ local minK = math.random(3,6)
+ local factor = math.random(1,100)*0.01
+ local nBatchFrame = math.random(2,4)
+ local module = nn.TemporalDynamicKMaxPooling(minK, factor)
+
+ -- 1D
+ local input = torch.Tensor(seqLen, features)
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+
+ -- 2D
+ local input = torch.Tensor(nBatchFrame, seqLen, features)
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+
+ -- 2D matches 1D
+ local output = module:forward(input):clone()
+ local outputGrad = torch.randn(output:size())
+ local inputGrad = module:backward(input, outputGrad):clone()
+
+ local input1D = input:select(1, 2)
+ local output1D = module:forward(input1D)
+ local outputGrad1D = outputGrad:select(1, 2)
+ local inputGrad1D = module:backward(input1D, outputGrad1D)
+
+ mytester:assertTensorEq(output:select(1,2), output1D, 0.000001, 'error on 2D vs 1D forward)')
+ mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001, 'error on 2D vs 1D backward)')
+
+
+end
+
+function nntest.TemporalSubSampling()
+ local from = math.random(1,5)
+ local ki = math.random(1,6)
+ local si = math.random(1,4)
+ local outi = math.random(6,9)
+ local ini = (outi-1)*si+ki
+ local module = nn.TemporalSubSampling(from, ki, si)
+ local input = torch.Tensor(ini, from):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+
+function nntest.TemporalRowConvolution()
+ if true then return end -- until this unit test is fixed...
+ local from = math.random(1,5)
+ local ki = math.random(1,5)
+ local si = math.random(1,2)
+ local outi = math.random(5,7)
+ local ini = (outi-1)*si+ki
+
+ local function jacTest(module)
+
+ local input
+ if module.featFirst then
+ input = torch.Tensor(from, ini):zero()
+ else
+ input = torch.Tensor(ini, from):zero()
+ end
+
+ -- 1D
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, "error on state" )
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err, precision, "error on weight ")
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err, precision, "error on bias ")
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err, precision, "error on weight [direct update] ")
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err, precision, "error on bias [direct update] ")
+ end
+
+ for t, err in pairs(jac.testAllUpdate(module, input, "weight", "gradWeight")) do
+ mytester:assertlt(err, precision, string.format(
+ "error on weight [%s] ", t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ "error on bias [%s] ", t))
+ end
+ end
+
+ -- 2D
+ local nBatchFrame = 4
+ if module.featFirst then
+ input = torch.Tensor(nBatchFrame, from, ini):zero()
+ else
+ input = torch.Tensor(nBatchFrame, ini, from):zero()
+ end
+
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, "error on state" )
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err, precision, "error on weight ")
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err, precision, "error on bias ")
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err, precision, "error on weight [direct update] ")
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err, precision, "error on bias [direct update] ")
+ end
+
+ for t, err in pairs(jac.testAllUpdate(module, input, "weight", "gradWeight")) do
+ mytester:assertlt(err, precision, string.format(
+ "error on weight [%s] ", t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ "error on bias [%s] ", t))
+ end
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. " - i/o forward err ", precision)
+ mytester:eq(0, berr, torch.typename(module) .. " - i/o forward err ", precision)
+
+ -- 2D matches 1D
+ local output = module:forward(input):clone()
+ local outputGrad = torch.randn(output:size())
+ local inputGrad = module:backward(input, outputGrad):clone()
+
+ local input1D = input:select(1, 2)
+ local output1D = module:forward(input1D)
+ local outputGrad1D = outputGrad:select(1, 2)
+ local inputGrad1D = module:backward(input1D, outputGrad1D)
+
+ mytester:assertTensorEq(output:select(1,2), output1D, 0.000001,
+ "error on 2D vs 1D forward")
+ mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001,
+ "error on 2D vs 1D backward")
+ end
+
+ local module = nn.TemporalRowConvolution(from, ki, si)
+ jacTest(module)
+ module:noBias()
+ jacTest(module)
+ module.bias = torch.Tensor(module.inputFrameSize):zero()
+ module.gradBias = torch.Tensor(module.inputFrameSize):zero()
+ module:reset()
+ module.featFirst = true
+ jacTest(module)
+ module:noBias()
+ jacTest(module, true)
+end
+
+function nntest.TemporalMaxPooling()
+ local from = math.random(2,4)
+ local ki = math.random(5,7)
+ local si = math.random(1,2)
+ local outi = math.random(30,40)
+ local ini = (outi-1)*si+ki
+ local module = nn.TemporalMaxPooling(ki, si)
+ local input = torch.Tensor(ini, from):zero()
+
+ -- 1D
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- 2D
+ local nBatchFrame = 2
+ local input = torch.Tensor(nBatchFrame, ini, from):zero()
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- 2D matches 1D
+ local output = module:forward(input):clone()
+ local outputGrad = torch.randn(output:size())
+ local inputGrad = module:backward(input, outputGrad):clone()
+
+ local input1D = input:select(1, 2)
+ local output1D = module:forward(input1D)
+ local outputGrad1D = outputGrad:select(1, 2)
+ local inputGrad1D = module:backward(input1D, outputGrad1D)
+
+ mytester:assertTensorEq(output:select(1,2), output1D, 0.000001, 'error on 2D vs 1D forward)')
+ mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001, 'error on 2D vs 1D backward)')
+end
+
+function nntest.VolumetricFullConvolution_simple_test()
+ local module = nn.VolumetricFullConvolution(3, 1, 3, 3, 3, 3, 3, 3);
+ module.weight:fill(1);
+ module.bias:fill(0.1);
+
+ local input = torch.Tensor(1, 3, 2, 2, 2):zero();
+ for c = 1,3 do
+ input[1][c][1][1][1] = 1
+ end
+ local output = module:forward(input)
+ for t = 1,6 do
+ for h = 1,6 do
+ for w = 1,6 do
+ if t <= 3 and h <= 3 and w <= 3 then
+ mytester:assertlt(output[1][1][t][h][w] - 3.1, precision, 'error on forward ')
+ else
+ mytester:assertlt(output[1][1][t][h][w] - 0.1, precision, 'error on forward ')
+ end
+ end
+ end
+ end
+
+ module:zeroGradParameters()
+ local gradOut = torch.Tensor(1, 1, 6, 6, 6):fill(0.1);
+ local gradIn = module:backward(input, gradOut)
+ for t = 1,2 do
+ for h = 1,2 do
+ for w = 1,2 do
+ mytester:assertlt(gradIn[1][1][t][h][w] - 2.7, precision,
+ 'error on backward input gradients ')
+ end
+ end
+ end
+
+ mytester:assertlt(module.gradBias[1] - 21.6, precision,
+ 'error on backward gradBias ')
+ for c = 1,3 do
+ for t = 1,3 do
+ for h = 1,3 do
+ for w = 1,3 do
+ mytester:assertlt(module.gradWeight[c][1][t][h][w] - 0.1, precision,
+ 'error on backward weight gradients ')
+ end
+ end
+ end
+ end
+end
+
+function nntest.VolumetricFullConvolution()
+ local from = math.random(2,3)
+ local to = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = ki
+ local st = math.random(1,3)
+ local si = math.random(1,3)
+ local sj = si
+ local int = math.random(3,4)
+ local ini = math.random(3,4)
+ local inj = math.random(3,4)
+ local bs = math.random(1, 6)
+ local module = nn.VolumetricFullConvolution(from, to, kt, ki, kj, st, si, sj)
+
+ local input = torch.Tensor(bs, from, int, ini, inj):zero()
+
+ local function jacTests(module)
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+ end
+
+ jacTests(module)
+ module:noBias()
+ jacTests(module)
+ module.bias = torch.Tensor(module.nOutputPlane):zero()
+ module.gradBias = torch.Tensor(module.nOutputPlane):zero()
+ module:reset()
+ jacTests(module)
+end
+
+function nntest.VolumetricFullConvolutionDualInput()
+ local from = math.random(2,3)
+ local to = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = math.random(3,4)
+ local dt = math.random(1,3)
+ local di = math.random(1,3)
+ local dj = math.random(1,3)
+ local padT = math.random(0,2)
+ local padW = math.random(0,2)
+ local padH = math.random(0,2)
+ local outt = math.random(5,9)
+ local outi = math.random(5,9)
+ local outj = math.random(5,9)
+ local int = math.floor((outt + padT*2 - kt)/dt + 1)
+ local ini = math.floor((outi + padW*2 - ki)/di + 1)
+ local inj = math.floor((outj + padH*2 - kj)/dj + 1)
+ local adjT = (outt + 2 * padT - kt) % dt
+ local adjW = (outi + 2 * padW - ki) % di
+ local adjH = (outj + 2 * padH - kj) % dj
+ local targetTensor = torch.Tensor(outt, outj, outi):zero()
+ local input = torch.Tensor(from, int, inj, ini):zero()
+
+ local module = nn.VolumetricFullConvolution(from, to, kt, ki, kj, dt, di, dj, padT, padW, padH)
+ local moduleRef = nn.VolumetricFullConvolution(from, to, kt, ki, kj, dt, di, dj, padT, padW, padH, adjT, adjW, adjH)
+ moduleRef.weight:copy(module.weight)
+ moduleRef.bias:copy(module.bias)
+
+ -- Check that the required output size matches the actual output size
+ -- when using the dual input mode
+ local output = module:forward({input, targetTensor})
+ mytester:asserteq(output:size(2), outt, 'output depth error')
+ mytester:asserteq(output:size(3), outj, 'output height error')
+ mytester:asserteq(output:size(4), outi, 'output width error')
+
+ -- Check that backward and forward match the reference module
+ local outputRef = moduleRef:forward(input)
+ mytester:asserteq(0, (output-outputRef):abs():max(), torch.typename(module) .. ' - output err ')
+ local gradOutput = outputRef:clone():uniform()
+ local gradInputRef = moduleRef:backward(input, gradOutput)
+ local gradInput = module:backward({input, targetTensor}, gradOutput)
+ mytester:asserteq(0, (gradInput[1]-gradInputRef):abs():max(), torch.typename(module) .. ' - gradInput[1] err ')
+
+ -- Check that gradInput[2] is the singleton tensor {0}
+ mytester:asserteq(gradInput[2]:storage():size(), 1, torch.typename(module) .. ' - gradInput[2] size err ')
+ mytester:asserteq(gradInput[2]:storage()[1], 0, torch.typename(module) .. ' - gradInput[2] value err ')
+end
+
+function nntest.VolumetricConvolution()
+ local from = math.random(2,4)
+ local to = math.random(1,4)
+ local kt = math.random(1,4)
+ local ki = math.random(1,4)
+ local kj = math.random(1,4)
+ local st = math.random(1,3)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local padT = math.random(0,2)
+ local padW = math.random(0,2)
+ local padH = math.random(0,2)
+ local outt = math.random(5,7)
+ local outi = math.random(5,7)
+ local outj = math.random(5,7)
+ local int = (outt-1)*st+kt-padT*2
+ local ini = (outi-1)*si+ki-padW*2
+ local inj = (outj-1)*sj+kj-padH*2
+ local module = nn.VolumetricConvolution(from, to, kt, ki, kj, st, si, sj, padT, padW, padH)
+ local input = torch.Tensor(from, int, inj, ini):zero()
+
+ local function jacTests(module)
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+ end
+
+ jacTests(module)
+ module:noBias()
+ jacTests(module)
+ module.bias = torch.Tensor(module.nOutputPlane):zero()
+ module.gradBias = torch.Tensor(module.nOutputPlane):zero()
+ module:reset()
+ jacTests(module)
+end
+
+function nntest.VolumetricDilatedConvolution()
+ local from = math.random(1,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local kk = math.random(1,5)
+ local di = math.random(1,4)
+ local dj = math.random(1,4)
+ local dk = math.random(1,4)
+ local padW = 0 -- math.random(0,2)
+ local padH = 0 -- math.random(0,2)
+ local padT = 0 -- math.random(0,2)
+ local outi = math.random(2,3)
+ local outj = math.random(2,5)
+ local outk = math.random(2,5)
+ local dilationW = math.random(1,3)
+ local dilationH = math.random(1,3)
+ local dilationT = math.random(1,3)
+ local ini = (outi - 1) * di - 2 * padW + dilationW * (ki-1) + 1
+ local inj = (outj - 1) * dj - 2 * padH + dilationH * (kj-1) + 1
+ local ink = (outk - 1) * dk - 2 * padT + dilationT * (kk-1) + 1
+
+ local module = nn.VolumetricDilatedConvolution(from, to, kk, ki, kj, dk, di, dj, padT, padW, padH, dilationT, dilationW, dilationH)
+ local input = torch.Tensor(from, ink, inj, ini):zero()
+
+ -- stochastic
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,5)
+
+ module = nn.VolumetricDilatedConvolution(from, to, kk, ki, kj, dk, di, dj, padT, padW, padH, dilationT, dilationW, dilationH)
+ input = torch.Tensor(batch,from,ink,inj,ini):zero()
+
+ -- Check that the required output size matches the actual output size
+ local output = module:forward(input)
+ mytester:asserteq(output:size(3), outk, 'output width error')
+ mytester:asserteq(output:size(4), outj, 'output height error')
+ mytester:asserteq(output:size(5), outi, 'output width error')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- non-contiguous
+ local input = torch.randn(batch,from,ink,ini,inj):transpose(4,5) -- non-contiguous
+ local inputc = input:contiguous() -- contiguous
+ local output = module:forward(input)
+ local outputc = module:forward(inputc)
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+ local gradInput = module:backward(input, output)
+ local gradInputc = module:backward(inputc, outputc)
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+end
+
+function nntest.VolumetricConvolutionBatchCompare()
+ local from = math.random(2,3)
+ local to = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = math.random(3,4)
+ local st = math.random(2,3)
+ local si = math.random(2,3)
+ local sj = math.random(2,3)
+ local padT = math.random(0,2)
+ local padW = math.random(0,2)
+ local padH = math.random(0,2)
+ local outt = math.random(3,4)
+ local outi = math.random(3,4)
+ local outj = math.random(3,4)
+ local int = (outt-1)*st+kt-padT*2
+ local ini = (outi-1)*si+ki-padW*2
+ local inj = (outj-1)*sj+kj-padH*2
+ local module = nn.VolumetricConvolution(from, to, kt, ki, kj, st, si, sj, padT, padW, padH)
+ module:zeroGradParameters()
+ local input = torch.randn(from, int, inj, ini)
+ batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.VolumetricAveragePooling()
+ local from = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = math.random(3,4)
+ local st = math.random(2,3)
+ local si = math.random(2,3)
+ local sj = math.random(2,3)
+ local outt = math.random(3,4)
+ local outi = math.random(3,4)
+ local outj = math.random(3,4)
+ local int = (outt-1)*st+kt
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj)
+ local input = torch.Tensor(from, int, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ local nbatch = math.random(2,3)
+ module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj)
+ input = torch.Tensor(nbatch, from, int, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state (Batch) ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision)
+end
+
+function nntest.VolumetricMaxPooling()
+ local from = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = math.random(3,4)
+ local st = math.random(2,3)
+ local si = math.random(2,3)
+ local sj = math.random(2,3)
+ local outt = math.random(3,4)
+ local outi = math.random(3,4)
+ local outj = math.random(3,4)
+ local padT = math.min(math.random(0,2),math.floor(kt/2))
+ local padW = math.min(math.random(0,2),math.floor(ki/2))
+ local padH = math.min(math.random(0,2),math.floor(kj/2))
+ local int = (outt-1)*st+kt-2*padT
+ local ini = (outi-1)*si+ki-2*padW
+ local inj = (outj-1)*sj+kj-2*padH
+ local module = nn.VolumetricMaxPooling(kt, ki, kj, st, si, sj, padT, padW, padH)
+ local input = torch.Tensor(from, int, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ local nbatch = math.random(2,3)
+ module = nn.VolumetricMaxPooling(kt, ki, kj, st, si, sj, padT, padW, padH)
+ input = torch.Tensor(nbatch, from, int, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state (Batch) ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision)
+end
+
+function nntest.VolumetricDilatedMaxPooling()
+ for _,ceil_mode in pairs({true,false}) do
+ local from = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = math.random(3,4)
+ local st = math.random(2,3)
+ local si = math.random(2,3)
+ local sj = math.random(2,3)
+ local outt = math.random(3,4)
+ local outi = math.random(3,4)
+ local outj = math.random(3,4)
+ local padT = math.min(math.random(0,1),math.floor(kt/2))
+ local padW = math.min(math.random(0,1),math.floor(ki/2))
+ local padH = math.min(math.random(0,1),math.floor(kj/2))
+ local dilationT = math.random(1,3)
+ local dilationW = math.random(1,3)
+ local dilationH = math.random(1,3)
+ local int = (outt-1)*st+(dilationT*(kt-1)+1)-2*padT
+ local ini = (outi-1)*si+(dilationW*(ki-1)+1)-2*padW
+ local inj = (outj-1)*sj+(dilationH*(kj-1)+1)-2*padH
+
+ local ceil_string = ceil_mode and 'ceil' or 'floor'
+ local module = nn.VolumetricDilatedMaxPooling(kt,ki,kj,st,si,sj,padT,padW,padH,dilationT,dilationW,dilationH)
+ if ceil_mode then module:ceil() else module:floor() end
+ local input = torch.rand(from,int,inj,ini)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+
+ -- batch
+ local nbatch = math.random(2,5)
+ input = torch.rand(nbatch,from,int,inj,ini)
+ module = nn.VolumetricDilatedMaxPooling(kt,ki,kj,st,si,sj,padT,padW,padH,dilationT,dilationW,dilationH)
+ if ceil_mode then module:ceil() else module:floor() end
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
+ end
+end
+
+function nntest.VolumetricFractionalMaxPooling()
+ local batch = math.random(1, 3)
+ local plane = math.random(1, 3)
+ local outT = math.random(1, 7)
+ local outW = math.random(1, 7)
+ local outH = math.random(1, 7)
+ local poolSizeT = math.random(2, 4)
+ local poolSizeW = math.random(2, 4)
+ local poolSizeH = math.random(2, 4)
+
+ local minInT = outT + poolSizeT
+ local minInW = outW + poolSizeW
+ local minInH = outH + poolSizeH
+
+ local inT = math.random(minInT, minInT + 6)
+ local inW = math.random(minInW, minInW + 6)
+ local inH = math.random(minInH, minInH + 6)
+
+ -- fix the pooling regions so they aren't regenerated with every
+ -- forward(), so testJacobian can work properly
+ local module =
+ nn.VolumetricFractionalMaxPooling(poolSizeT, poolSizeW, poolSizeH, outT, outW, outH)
+ :fixPoolingRegions()
+ local input = nil
+ if batch == 1 then
+ input = torch.Tensor(plane, inH, inW, inT):zero()
+ else
+ input = torch.Tensor(batch, plane, inH, inW, inT):zero()
+ end
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state')
+end
+
+function nntest.VolumetricFractionalMaxPooling_Ratio()
+ -- Fix a reduction ratio, and test with two different input sizes
+ local reductionRatioT = torch.uniform(0.4, 0.74)
+ local reductionRatioW = torch.uniform(0.4, 0.74)
+ local reductionRatioH = torch.uniform(0.4, 0.74)
+
+ for tries = 1, 2 do
+ local batch = math.random(1, 3)
+ local plane = math.random(1, 3)
+ local poolSizeT = math.random(2, 3)
+ local poolSizeW = math.random(2, 3)
+ local poolSizeH = math.random(2, 3)
+
+ local minInT = math.random(5, 8) + poolSizeT
+ local minInW = math.random(5, 8) + poolSizeW
+ local minInH = math.random(5, 8) + poolSizeH
+
+ local inT = math.random(minInT, minInT + 6)
+ local inW = math.random(minInW, minInW + 6)
+ local inH = math.random(minInH, minInH + 6)
+
+ -- fix the pooling regions so they aren't regenerated with every
+ -- forward(), so testJacobian can work properly
+ local module =
+ nn.VolumetricFractionalMaxPooling(poolSizeT, poolSizeW, poolSizeH,
+ reductionRatioT, reductionRatioW,
+ reductionRatioH)
+ :fixPoolingRegions()
+ local input = nil
+ if batch == 1 then
+ input = torch.Tensor(plane, inH, inW, inT):zero()
+ else
+ input = torch.Tensor(batch, plane, inH, inW, inT):zero()
+ end
+
+ -- Make sure that the output size is based on our ratio
+ local output = module:updateOutput(input)
+ if batch == 1 then
+ mytester:asserteq(output:size(4), math.floor(reductionRatioT * inT))
+ mytester:asserteq(output:size(3), math.floor(reductionRatioW * inW))
+ mytester:asserteq(output:size(2), math.floor(reductionRatioH * inH))
+ else
+ mytester:asserteq(output:size(5), math.floor(reductionRatioT * inT))
+ mytester:asserteq(output:size(4), math.floor(reductionRatioW * inW))
+ mytester:asserteq(output:size(3), math.floor(reductionRatioH * inH))
+ end
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state')
+ end
+end
+
+function nntest.VolumetricMaxUnpooling()
+ local from = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = math.random(3,4)
+ local st, si, sj = kt, ki, kj
+ local outt = math.random(3,4)
+ local outi = math.random(3,4)
+ local outj = math.random(3,4)
+ local padT = math.min(math.random(0,2),math.floor(kt/2))
+ local padW = math.min(math.random(0,2),math.floor(ki/2))
+ local padH = math.min(math.random(0,2),math.floor(kj/2))
+ local int = (outt-1)*st+kt-2*padT
+ local ini = (outi-1)*si+ki-2*padW
+ local inj = (outj-1)*sj+kj-2*padH
+
+ local poolingModule = nn.VolumetricMaxPooling(kt, ki, kj, st, si, sj, padT, padW, padH)
+ local module = nn.VolumetricMaxUnpooling(poolingModule)
+
+ local original = torch.rand(from,int,inj,ini)
+ local input = poolingModule:forward(original)
+ local output = module:forward(input)
+ mytester:assert(output:isSameSizeAs(original),'VolumetricMaxUnpooling output size err')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ local nbatch = math.random(2,3)
+ original = torch.rand(nbatch,from,int,inj,ini)
+ input = poolingModule:forward(original)
+ output = module:forward(input)
+
+ mytester:assert(output:isSameSizeAs(original),'VolumetricMaxUnpooling batch output size err')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on Batch')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ', precision)
+end
+
+function nntest.VolumetricMaxPooling_boundary()
+ -- simple kernel 2x2x2 with striding 2x2x2
+ local module = nn.VolumetricMaxPooling(2, 2, 2, 2, 2, 2):ceil()
+ local nip = math.random(3,256)
+ local input = torch.rand(nip, 2, 7, 7)
+
+ -- do a forward pass
+ local output = module:forward(input)
+
+ -- checking output size
+ mytester:asserteq(output:size(1), nip, 'wrong output channels')
+ mytester:asserteq(output:size(2), 1, 'wrong output temporal length')
+ mytester:asserteq(output:size(3), 4, 'wrong output height')
+ mytester:asserteq(output:size(4), 4, 'wrong output width')
+
+ -- checking output signals at top right
+ for c = 1,nip do
+ local max_val = input[c][1][1][7]
+ for t = 1,2 do
+ for h = 1,2 do
+ max_val = math.max(max_val, input[c][t][h][7])
+ end
+ end
+ mytester:asserteq(output[c][1][1][4], max_val, 'wrong forward execution')
+ end
+ -- checking output signals at bottom left
+ for c = 1,nip do
+ local max_val = input[c][1][7][1]
+ for t = 1,2 do
+ for w = 1,2 do
+ max_val = math.max(max_val, input[c][t][7][w])
+ end
+ end
+ mytester:asserteq(output[c][1][4][1], max_val, 'wrong forward execution')
+ end
+
+ -- check output signals at right bottom
+ for c = 1,nip do
+ local max_val = math.max(input[c][1][7][7], input[c][2][7][7])
+ mytester:asserteq(output[c][1][4][4], max_val, 'wrong forward execution')
+ end
+
+
+ -- backward is supposed to be tested in nntest.VolumetricMaxPooling
+ -- This is only test the boundary cases
+end
+
+function nntest.Module_getParameters_1()
+ local n = nn.Sequential()
+ n:add( nn.Linear(10,10) )
+ local p = n:getParameters()
+
+ mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'getParameters(): weights wrong')
+ mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'getParameters(): bias wrong')
+end
+
+function nntest.Module_getParameters_2()
+ local n = nn.Sequential()
+ n:add( nn.Linear(10,10) )
+ local _ = n:getParameters()
+
+ n:add( nn.Linear(10,10) )
+ local p = n:getParameters()
+
+ mytester:asserteq((p[{ {111,210} }] - n.modules[2].weight):norm(), 0, 'error when appending new module')
+ mytester:asserteq((p[{ {211,220} }] - n.modules[2].bias):norm(), 0, 'error when appending new module')
+end
+
+function nntest.Module_getParameters_3()
+ local n = nn.Sequential()
+ n:add( nn.Linear(10,10) )
+ n:add( n.modules[1]:clone() )
+ local p = n:getParameters()
+
+ mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'error when using cloning')
+ mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'error when using cloning')
+
+ mytester:asserteq((p[{ {111,210} }] - n.modules[2].weight):norm(), 0, 'error when using cloning')
+ mytester:asserteq((p[{ {211,220} }] - n.modules[2].bias):norm(), 0, 'error when using cloning')
+
+ mytester:asserteq((p[{ {111,210} }] - n.modules[1].weight):norm(), 0, 'error when using cloning')
+ mytester:asserteq((p[{ {211,220} }] - n.modules[1].bias):norm(), 0, 'error when using cloning')
+
+ n:reset()
+
+ mytester:assertgt((p[{ {111,210} }] - n.modules[1].weight):norm(), 0, 'error when using cloning')
+ mytester:assertgt((p[{ {211,220} }] - n.modules[1].bias):norm(), 0, 'error when using cloning')
+end
+
+function nntest.Module_getParameters_4()
+ local n = nn.Sequential()
+ n:add( nn.Linear(10,10) )
+ n:add( n.modules[1]:clone() )
+ local _ = n:getParameters()
+
+ n:add(nn.Linear(10,10))
+ local p = n:getParameters()
+
+ mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'error when using cloning')
+ mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'error when using cloning')
+
+ mytester:asserteq((p[{ {111,210} }] - n.modules[2].weight):norm(), 0, 'error when using cloning')
+ mytester:asserteq((p[{ {211,220} }] - n.modules[2].bias):norm(), 0, 'error when using cloning')
+
+ mytester:asserteq((p[{ {221,320} }] - n.modules[3].weight):norm(), 0, 'error when using cloning')
+ mytester:asserteq((p[{ {321,330} }] - n.modules[3].bias):norm(), 0, 'error when using cloning')
+
+ mytester:asserteq(p:nElement(), 3*(10*10+10), 'error: incorrect number of elements in flat vector')
+end
+
+function nntest.Module_getParameters_5()
+ local n = nn.Sequential()
+ n:add( nn.Linear(10,10) )
+ n:add( n.modules[1]:clone('weight','bias','gradWeight','gradBias') )
+ local p = n:getParameters()
+
+ mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'error when using cloning+sharing')
+ mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'error when using cloning+sharing')
+
+ mytester:asserteq((p[{ {1,100} }] - n.modules[2].weight):norm(), 0, 'error when using cloning+sharing')
+ mytester:asserteq((p[{ {101,110} }] - n.modules[2].bias):norm(), 0, 'error when using cloning+sharing')
+
+ n:reset()
+
+ mytester:asserteq((p[{ {1,100} }] - n.modules[2].weight):norm(), 0, 'error when using cloning+sharing')
+ mytester:asserteq((p[{ {101,110} }] - n.modules[2].bias):norm(), 0, 'error when using cloning+sharing')
+
+ mytester:asserteq(p:nElement(), (10*10+10), 'error: incorrect number of elements in flat vector')
+end
+
+function nntest.Module_getParameters_6()
+ local n = nn.Sequential()
+ n:add( nn.Linear(10,10) )
+ n:add( n.modules[1]:clone('weight','bias','gradWeight','gradBias') )
+ local _ = n:getParameters()
+
+ n:add(nn.Linear(10,10))
+ local p = n:getParameters()
+
+ mytester:asserteq((p[{ {1,100} }] - n.modules[1].weight):norm(), 0, 'error when using cloning+sharing')
+ mytester:asserteq((p[{ {101,110} }] - n.modules[1].bias):norm(), 0, 'error when using cloning+sharing')
+
+ mytester:asserteq((p[{ {1,100} }] - n.modules[2].weight):norm(), 0, 'error when using cloning+sharing')
+ mytester:asserteq((p[{ {101,110} }] - n.modules[2].bias):norm(), 0, 'error when using cloning+sharing')
+
+ mytester:asserteq((p[{ {111,210} }] - n.modules[3].weight):norm(), 0, 'error when using cloning+sharing')
+ mytester:asserteq((p[{ {211,220} }] - n.modules[3].bias):norm(), 0, 'error when using cloning+sharing')
+
+ mytester:asserteq(p:nElement(), 2*(10*10+10), 'error: incorrect number of elements in flat vector')
+end
+
+function nntest.Module_getParameters_7()
+ local n = nn.Sequential()
+ n:add( nn.Linear(10,10) )
+ n:add( n.modules[1]:clone('weight','bias','gradWeight','gradBias') )
+ local _ = n:getParameters()
+
+ n:add(nn.Linear(10,10))
+ local _ = n:getParameters()
+
+ local n1 = nn.Sequential()
+ n1:add( nn.Linear(10,10) )
+
+ local n2 = nn.Sequential()
+ n2:add( nn.Linear(10,10) )
+
+ local n = nn.Sequential()
+ n:add( n1 )
+ n:add( n2 )
+
+ local _ = n:getParameters()
+
+ local nf = nn.Sequential()
+ nf:add( n1 )
+ nf:add( nn.Linear(10,1) )
+
+ local p = nf:getParameters()
+
+ mytester:asserteq((p[{ {1,100} }] - n1.modules[1].weight):norm(), 0, 'error when using cloning+partial realloc')
+ mytester:asserteq((p[{ {101,110} }] - n1.modules[1].bias):norm(), 0, 'error when using cloning+partial realloc')
+
+ mytester:asserteq((p[{ {111,120} }] - nf.modules[2].weight):norm(), 0, 'error when using cloning+partial realloc')
+ mytester:asserteq((p[{ {121,121} }] - nf.modules[2].bias):norm(), 0, 'error when using cloning+partial realloc')
+
+ mytester:asserteq(p:nElement(), 121, 'error: incorrect number of elements in flat vector')
+end
+
+function nntest.Module_getParameters_8()
+ local function makeMLP(nin, ns)
+ local net = nn.Sequential()
+
+ for k,v in ipairs(ns) do
+ net:add(nn.Linear(nin, v))
+ nin = v
+ end
+ local _,_ = net:getParameters()
+ return net
+ end
+
+ local mlp1 = makeMLP(10, {10,10})
+ local mlp2 = makeMLP(10, {10,10})
+
+ local net = nn.Sequential():add(mlp1:get(1))
+ :add(mlp2:get(1))
+
+ -- clone the second MLP to ensure that the weights before calling getParameters are preserved
+ mlp2 = mlp2:clone()
+
+ local p, _ = net:getParameters()
+
+ mytester:asserteq((p[{ {1,100} }] - net.modules[1].weight):norm(), 0, 'error when using partial realloc')
+ mytester:asserteq((p[{ {111,210} }] - net.modules[2].weight):norm(), 0, 'error when using partial realloc')
+ -- check that the weights have the same values as before get Parameters was called
+ mytester:asserteq((net.modules[1].weight - mlp1.modules[1].weight):norm(), 0, ' error when using partial realloc')
+ mytester:asserteq((net.modules[2].weight - mlp2.modules[1].weight):norm(), 0, ' error when using partial realloc')
+
+end
+
+function nntest.Module_getParameters_10()
+ -- tensors are non-contiguous but compact; they can be gathered
+ local L = nn.Linear(10,10)
+ L.weight = torch.Tensor(10,10):t():fill(1)
+ local tmp = torch.Tensor(10,10):fill(2)
+ L.bias = tmp:select(1,2)
+ local P = L:getParameters()
+ mytester:asserteq(L.weight:mean(), 1)
+ mytester:asserteq(L.bias:mean(), 2)
+ mytester:asserteq(L.weight:storage(), L.bias:storage())
+ mytester:asserteq(P:nElement(), 110)
+ mytester:asserteq(P:storage():size(), 110)
+ mytester:assertlt(L.bias[{ {10} }]:storageOffset() - 1, L.bias:storage():size())
+end
+
+function nntest.Module_getParameters_11()
+ -- tensors are non-compact; they can't be gathered
+ local L = nn.Linear(10,10)
+ local tmp = torch.Tensor(10,10):fill(2)
+ L.bias = tmp:select(2,2)
+ local ok, err = pcall(L.getParameters, L)
+ mytester:assert(not ok)
+end
+
+function nntest.Module_getParameters_12()
+ -- tensors are expanded (i.e. have dimension 0)
+ local L = nn.Linear(10,10)
+ L.weight = torch.Tensor(10, 1):fill(1)
+ torch.expand(L.weight, 10, 10)
+ L.gradWeight = torch.Tensor(10, 1):fill(1)
+ torch.expand(L.gradWeight, 10, 10)
+ L.bias = torch.Tensor(10):fill(2)
+ local P = L:getParameters()
+ mytester:asserteq(L.weight:mean(), 1)
+ mytester:asserteq(L.bias:mean(), 2)
+ mytester:asserteq(L.weight:storage(), L.bias:storage())
+ mytester:asserteq(P:nElement(), 20)
+ mytester:asserteq(P:storage():size(), 20)
+ mytester:assertlt(L.bias[{ {10} }]:storageOffset() - 1, L.bias:storage():size())
+end
+
+function nntest.Module_listModules()
+ local batchSize = 4
+ local inputSize, outputSize = 7, 6
+ local linear = nn.Linear(inputSize, outputSize)
+ local tanh = nn.Tanh()
+ local reshape = nn.Reshape(outputSize/2, 2)
+ local mlp3 = nn.Sequential()
+ mlp3:add(linear)
+ mlp3:add(tanh)
+ mlp3:add(reshape)
+
+ local mlp2 = nn.Sequential()
+ local view = nn.View(outputSize)
+ local linear2 = nn.Linear(outputSize, inputSize)
+ local tanh2 = nn.Tanh()
+ mlp2:add(mlp3)
+ mlp2:add(view)
+ mlp2:add(linear2)
+ mlp2:add(tanh2)
+
+ local concat = nn.ConcatTable()
+ local id = nn.Identity()
+ concat:add(mlp2)
+ concat:add(id)
+ local mlp = nn.Sequential()
+ local add = nn.CAddTable()
+ mlp:add(concat)
+ mlp:add(add)
+
+ local modules2 = {mlp, concat, mlp2, mlp3, linear, tanh, reshape, view, linear2, tanh2, id, add}
+ local modules = mlp:listModules()
+
+ mytester:assert(#modules2 == #modules, 'missing modules error')
+
+ for i,module in ipairs(modules) do
+ mytester:assert(torch.type(module) == torch.type(modules2[i]), 'module error')
+ end
+end
+
+function nntest.PairwiseDistance()
+ -- Note: testJacobian doesn't support table inputs, and rather than re-write
+ -- it so that it does, I'll just use a split table module on the input.
+ -- I assume both SplitTable and Sequential do not have bugs, otherwise this
+ -- test will break.
+ for p = 1,4 do -- test a few Lp norms
+ -- TEST CASE 1: non-batch input, same code path but includes a resize
+ local ini = math.random(3,5)
+ local input = torch.Tensor(2, ini):zero()
+ local module = nn.Sequential()
+ module:add(nn.SplitTable(1))
+ module:add(nn.PairwiseDistance(p))
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err, 1e-4, ' error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module)..' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module)..' - i/o backward err ')
+
+ -- Also check that the forward prop result is correct.
+ input = torch.rand(2, ini)
+ err = torch.dist(input:select(1,1), input:select(1,2), p) -
+ module:forward(input)[1]
+ mytester:assertlt(err,precision, ' error on non-batch fprop ')
+
+ -- TEST CASE 2: batch input
+ local inj = math.random(3,5)
+ input = torch.Tensor(2, inj, ini):zero()
+
+ -- (Rebuild the module to avoid correlated tests)
+ module = nn.Sequential()
+ module:add(nn.SplitTable(1))
+ module:add(nn.PairwiseDistance(p))
+
+ err = jac.testJacobian(module,input)
+ mytester:assertlt(err, 1e-4, ' error on state ')
+
+ -- Also check that the forward prop result is correct.
+ -- manually calculate each distance separately
+ local inputa = torch.rand(inj,ini)
+ local inputb = torch.rand(inj,ini)
+ local dist_manual = torch.Tensor(inj)
+ for i=1, inputa:size(1) do
+ dist_manual[i] = torch.dist(inputa:select(1,i), inputb:select(1,i),p)
+ end
+ -- compare the distances to the module's fprop
+ local dist = module:forward(torch.cat(inputa,inputb,1):resize(2,inj,ini))
+ err = dist - dist_manual
+ mytester:assertlt(err:norm(), precision, torch.typename(module) ..
+ ' error on batch fprop ')
+ end
+end
+
+function nntest.Index()
+ local net = nn.Index(1)
+
+ -- test 1D
+ local input = {torch.Tensor{10, 20, 30}, torch.LongTensor{1, 2, 2, 3}}
+ local output = net:forward(input)
+ equal(output, torch.Tensor{10, 20, 20, 30}, "error in 1D forward pass")
+
+ local gradOutput = torch.Tensor{1, 1, 1, 3 }
+ local gradInput = net:backward(input, gradOutput)
+ equal(gradInput[1], torch.Tensor{1, 2, 3}, "error in 1D backward pass")
+
+ -- test 2D
+ local input = {torch.Tensor{{10, 20}, {30, 40}}, torch.LongTensor{1, 1}}
+ local output = net:forward(input)
+ equal(output, torch.Tensor{{10, 20}, {10, 20}}, "error in 2D forward pass")
+
+ local gradOutput = torch.Tensor{{1, 2}, {1, 2}}
+ local gradInput = net:backward(input, gradOutput)
+ equal(gradInput[1], torch.Tensor{{2, 4}, {0, 0}}, "error in 2D backward pass")
+
+ -- test clearState
+ local m = nn.Index(1)
+ local tensor = torch.Tensor(10, 3)
+ local indices = torch.LongTensor{ 2,3,4}
+
+ m:clearState()
+ m:forward({tensor, indices})
+ m:backward({tensor,indices}, torch.rand(3,3))
+
+end
+
+function nntest.Squeeze()
+ local input = torch.Tensor(2,1,3):zero()
+ local module = nn.Squeeze()
+ equal(module:forward(input), input:squeeze(), "error in forward pass")
+ local output = input:squeeze()
+ equal(module:backward(input, output), input, "error in backward pass")
+
+ -- testing the dimension option:
+ local input = torch.Tensor(2,1,1,3):zero()
+ local module = nn.Squeeze(2)
+ equal(module:forward(input), input:squeeze(2), "error in forward pass with dimension")
+ local output = input:squeeze(2)
+ equal(module:backward(input, output), input, "error in backward pass with dimension")
+
+ -- with batch
+ local input = torch.Tensor(2,1,1,3):zero()
+ local module = nn.Squeeze(2, 3)
+ equal(module:forward(input), input:squeeze(3), "error in forward pass with dimension")
+ local output = input:squeeze(3)
+ equal(module:backward(input, output), input, "error in backward pass with dimension")
+
+
+ -- ... of size one
+ local input = torch.Tensor(1,1,1,3):zero()
+ local module = nn.Squeeze(2, 3)
+ equal(module:forward(input), input:squeeze(3), "error in forward pass with dimension")
+ local output = input:squeeze(3)
+ equal(module:backward(input, output), input, "error in backward pass with dimension")
+end
+
+function nntest.Unsqueeze()
+ local function assertInputOutputSize(inputSize, outputSize, tf)
+ local input = torch.Tensor(table.unpack(inputSize)):zero()
+ local output = torch.Tensor(table.unpack(outputSize)):zero()
+ local gradInput = input:clone()
+ local gradOutput = output:clone()
+ equal(tf:forward(input), output, "error in forward pass")
+ equal(tf:backward(input, gradOutput), gradInput, "error in backward pass")
+ end
+
+ local function test_normal()
+ -- insert dim 1 at head
+ local inputSize, outputSize = {2,3,4}, {1, 2,3,4}
+ local pos = 1
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos))
+
+ -- insert dim 1 at tail
+ local inputSize, outputSize = {2,3,4}, {2,3,4, 1}
+ local pos = 4
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos))
+
+ -- insert dim 1 in between
+ local inputSize, outputSize = {2,3,4}, {2, 1, 3,4}
+ local pos = 2
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos))
+ end
+
+ local function test_batchmode()
+ -- batch mode: insert dim 1 at head
+ local inputSize, outputSize = {5, 2, 3, 4}, {5, 1, 2, 3, 4}
+ local pos = 1
+ local numInputDims = 3
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos, numInputDims))
+
+ -- batch mode: insert dim 1 at tail
+ local inputSize, outputSize = {5, 2, 3, 4}, {5, 2, 3, 4, 1}
+ local pos = 4
+ local numInputDims = 3
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos, numInputDims))
+
+ -- batch mode: insert dim 1 in between
+ local inputSize, outputSize = {5, 2, 3, 4}, {5, 2, 1, 3, 4}
+ local pos = 2
+ local numInputDims = 3
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos, numInputDims))
+ end
+
+ local function test_sizeone()
+ local inputSize, outputSize = {1,1,3,1}, {1,1, 1, 3,1}
+ local pos = 3
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos))
+
+ local inputSize, outputSize = {1,1,3,2}, {1,1,3,2, 1}
+ local pos = 3
+ local numInputDims = 2
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos, numInputDims))
+ end
+
+ local function test_sizestrange()
+ local inputSize, outputSize = {2}, {2,1}
+ local pos = 2
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos))
+
+ local inputSize, outputSize = {1}, {1, 1}
+ local pos = 1
+ assertInputOutputSize(inputSize,outputSize, nn.Unsqueeze(pos))
+ end
+
+ test_normal()
+ test_batchmode()
+ test_sizeone()
+ test_sizestrange()
+end
+
+function nntest.LookupTable()
+ local totalIndex = math.random(6,9)
+ local nIndex = math.random(3,5)
+ local entry_size = math.random(2,5)
+
+ local function dotest(module, input, minval, maxval)
+ local output = module:forward(input)
+ module:backwardUpdate(input, output, 0.1)
+ input:zero()
+
+ -- 1D
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight, minval, maxval)
+ mytester:assertlt(err,precision, '1D error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight, minval, maxval)
+ mytester:assertlt(err,precision, '1D error on weight [direct update] ')
+
+ module.gradWeight:zero()
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ '1D error on weight [%s]', t))
+ end
+
+ -- 2D
+ local nframe = math.random(2,5)
+ local input = torch.IntTensor(nframe, nIndex):zero()
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight, minval, maxval)
+ mytester:assertlt(err,precision, '2D error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight, minval, maxval)
+ mytester:assertlt(err,precision, '2D error on weight [direct update] ')
+
+ module.gradWeight:zero()
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ '2D error on weight [%s]', t))
+ end
+
+ -- IO
+ module.gradInput = torch.Tensor(3,4):zero() --fixes an error
+ local ferr,berr = jac.testIO(module,input,minval,maxval)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- accUpdate
+ module:accUpdateOnly()
+ mytester:assert(not module.gradWeight, 'gradWeight is nil')
+ module:float()
+ local output = module:forward(input)
+ module:backwardUpdate(input, output, 0.1)
+ end
+ -- test without padding
+ local input = torch.randperm(totalIndex):narrow(1,1,nIndex):int()
+ local module = nn.LookupTable(totalIndex, entry_size)
+ dotest(module, input, 1, totalIndex)
+ -- test with padding set to 1, but no padding in inputs
+ local input = torch.randperm(totalIndex):narrow(1,1,nIndex):int()
+ local module = nn.LookupTable(totalIndex, entry_size, 1)
+ dotest(module, input, 2, totalIndex)
+ -- test whether padding weights remain unchanged
+ local paddingValue = math.random(totalIndex)
+ local module = nn.LookupTable(totalIndex, entry_size, paddingValue)
+ local padw = module.weight:select(1,paddingValue):fill(1)
+ local padw_sum = padw:sum()
+ local input = torch.IntTensor(nIndex)
+ for i = 1, 100 do
+ input:apply(
+ function() -- set randomly half of the input as padding
+ if torch.random(2) == 1 then return paddingValue end
+ return torch.random(totalIndex)
+ end)
+ local y = module:updateOutput(input)
+ module:updateGradInput(input, y)
+ module:accUpdateGradParameters(input, y, 0.1)
+ end
+ local err = padw_sum - padw:sum()
+ mytester:assertlt(err,precision, 'padding update error ')
+ -- test whether the weights changes accordingly when maxNorm is not nil
+ local all_index = torch.randperm(totalIndex):int()
+ -- input can have duplicates
+ local input = torch.repeatTensor(all_index:narrow(1,1,nIndex), 2)
+ local maxNorm = math.random()
+ for _, normType in ipairs{1, 2, math.random()} do
+ local module = nn.LookupTable(totalIndex, entry_size, 0, maxNorm, normType)
+ local oriW = module.weight:clone()
+ local output = module:updateOutput(input)
+ -- check output is of small norm
+ for j = 1,output:size(1) do
+ local norm = torch.norm(output:select(1, j), normType)
+ if norm > maxNorm then
+ local err = norm - maxNorm;
+ mytester:assertlt(math.abs(err), precision, string.format(
+ 'output after renorm exceeds maxNorm=[%f] with normType=[%f]', maxNorm, normType))
+ end
+ end
+ -- check the update of the module.weight
+ for j = 1,totalIndex do
+ local k = all_index[j]
+ if j <= nIndex then -- k is an index in "input"
+ local norm = torch.norm(module.weight:select(1, k), normType)
+ local oriNorm = torch.norm(oriW:select(1, k), normType)
+ if oriNorm > maxNorm then
+ local err = norm - maxNorm
+ mytester:assertlt(math.abs(err), precision, 'unexpected norm after renorm')
+ else
+ local err = norm - oriNorm
+ mytester:assertlt(math.abs(err), precision, 'unpexpected norm after renorm')
+ end
+ else -- k is not an index in "input"
+ local err = module.weight:select(1,k):sum() - oriW:select(1,k):sum()
+ mytester:assertlt(math.abs(err), precision, 'unexpected changes in weight after renorm')
+ end
+ end
+ end
+end
+
+function nntest.AddConstant()
+ local nbatch = torch.random(3, 5)
+ local f = torch.random(3, 5)
+ local h = torch.random(7,9)
+ local w = torch.random(7,9)
+ local input = torch.rand(nbatch, f, h, w):mul(20):add(-10) -- [-10, 10]
+
+ local constant = torch.randn(1):squeeze()
+ local mod = nn.AddConstant(constant)
+
+ -- Test FPROP
+ local output = mod:forward(input)
+ local delta = output - input
+ mytester:assertlt(delta:add(-constant):abs():max(), precision, 'fprop error')
+
+ -- Test BPROP
+ local err = jac.testJacobian(mod, input)
+ mytester:assertlt(err, precision, 'bprop error ')
+
+ -- inplace comparisons
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local constant = torch.uniform()*math.random(1,10)
+
+ local input1 = torch.rand(ink, inj, ini)
+ local input2 = input1:clone()
+
+ local module1 = nn.AddConstant(constant,true)
+ local module2 = nn.AddConstant(constant)
+
+ local gradOutput1 = torch.rand(ink, inj, ini)
+ local gradOutput2 = gradOutput1:clone()
+
+ local out1 = module1:forward(input1)
+ local out2 = module2:forward(input2)
+
+ mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) ..
+ ' - in-place forward err ')
+
+ local gradInput1 = module1:backward(input1, gradOutput1)
+ local gradInput2 = module2:backward(input2, gradOutput2)
+
+ mytester:asserteq(0, (gradInput1-gradInput2):abs():max(),
+ torch.typename(module1) .. ' - in-place backward err ')
+
+ local input1 = torch.rand(ink, inj, ini)
+ local input2 = input1:clone()
+
+ module1:forward(input1)
+ module1:backward(module1.output,torch.rand(input1:size()))
+
+ local err = (input1-input2):abs():max()
+ mytester:asserteq(err, 0, torch.typename(module1) ..
+ ' - inplace input change err ')
+
+ local module3 = nn.AddConstant(torch.Tensor{1,2,3})
+ local out3 = module3:forward(torch.Tensor{-1,-2,-3})
+ mytester:asserteq(0, out3:abs():max(), torch.typename(module3) ..
+ ' - tensor constant forward err ')
+ local module4 = nn.AddConstant(torch.Tensor{1,2,3})
+ local out4 = module3:forward(torch.Tensor{{-1,-2,-3},{-1,-2,-3}})
+ mytester:asserteq(0, out4:abs():max(), torch.typename(module4) ..
+ ' - batch tensor constant forward err ')
+end
+
+function nntest.MulConstant()
+ local nbatch = torch.random(3, 5)
+ local f = torch.random(3, 5)
+ local h = torch.random(7,9)
+ local w = torch.random(7,9)
+ local input = torch.rand(nbatch, f, h, w):mul(20):add(-10) -- [-10, 10]
+
+ local constant = torch.randn(1):squeeze()
+ local mod = nn.MulConstant(constant)
+
+ -- Test FPROP
+ local output = mod:forward(input)
+ local scale = output:clone():cdiv(input)
+ mytester:assertlt(scale:add(-constant):abs():max(), precision, 'fprop error')
+
+ -- Test BPROP
+ local err = jac.testJacobian(mod, input)
+ mytester:assertlt(err, precision, 'bprop error ')
+
+ -- inplace comparisons
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local constant = torch.uniform()*math.random(1,10)
+
+ local input1 = torch.rand(ink, inj, ini)
+ local input2 = input1:clone()
+
+ local module1 = nn.MulConstant(constant,true)
+ local module2 = nn.MulConstant(constant)
+
+ local gradOutput1 = torch.rand(ink, inj, ini)
+ local gradOutput2 = gradOutput1:clone()
+
+ local out1 = module1:forward(input1)
+ local out2 = module2:forward(input2)
+
+ mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) ..
+ ' - in-place forward err ')
+
+ local gradInput1 = module1:backward(input1, gradOutput1)
+ local gradInput2 = module2:backward(input2, gradOutput2)
+
+ mytester:asserteq(0, (gradInput1-gradInput2):abs():max(),
+ torch.typename(module1) .. ' - in-place backward err ')
+
+ local input1 = torch.rand(ink, inj, ini)
+ local input2 = input1:clone()
+
+ module1:forward(input1)
+ module1:backward(module1.output,torch.rand(input1:size()))
+
+ local err = (input1-input2):abs():max()
+ mytester:assertalmosteq(err, 0, 1e-15, torch.typename(module1) ..
+ ' - inplace input change err ')
+end
+
+function nntest.Copy()
+ local input = torch.randn(3,4):double()
+ local c = nn.Copy('torch.DoubleTensor', 'torch.FloatTensor')
+ local output = c:forward(input)
+ mytester:assert(torch.type(output) == 'torch.FloatTensor', 'copy forward type err')
+ mytester:assertTensorEq(output, input:float(), 0.000001, 'copy forward value err')
+ local gradInput = c:backward(input, output)
+ mytester:assert(torch.type(gradInput) == 'torch.DoubleTensor', 'copy backward type err')
+ mytester:assertTensorEq(gradInput, input, 0.000001, 'copy backward value err')
+ c.dontCast = true
+ c:double()
+ mytester:assert(torch.type(output) == 'torch.FloatTensor', 'copy forward type err')
+end
+
+function nntest.CMaxTable()
+ local input1 = torch.Tensor{{1,3},{2,4}}
+ local input2 = torch.Tensor{{4,2},{3,1}}
+ local input = {input1, input2}
+ local module = nn.CMaxTable()
+ local err1 = torch.add(module:forward(input), -1, torch.Tensor{{4,3},{3,4}})
+ mytester:assertalmosteq(err1:abs():max(), 0, 1e-15, "CMaxTable forward call")
+ local gradOutputs = torch.Tensor{5,6,7,8}
+ local gradInputs = module:backward(input, gradOutputs)
+ local err2 = torch.add(gradInputs[1], -1, torch.Tensor{{0,6},{0,8}})
+ local err3 = torch.add(gradInputs[2], -1, torch.Tensor{{5,0},{7,0}})
+ mytester:assertalmosteq(err2:abs():max(), 0, 1e-15, "CMaxTable backward call")
+ mytester:assertalmosteq(err3:abs():max(), 0, 1e-15, "CMaxTable backward call")
+end
+
+function nntest.CMinTable()
+ local input1 = torch.Tensor{{1,3},{2,4}}
+ local input2 = torch.Tensor{{4,2},{3,1}}
+ local input = {input1, input2}
+ local module = nn.CMinTable()
+ local err1 = torch.add(module:forward(input), -1, torch.Tensor{{1,2},{2,1}})
+ mytester:assertalmosteq(err1:abs():max(), 0, 1e-15, "CMinTable forward call")
+ local gradOutputs = torch.Tensor{5,6,7,8}
+ local gradInputs = module:backward(input, gradOutputs)
+ local err2 = torch.add(gradInputs[1], -1, torch.Tensor{{5,0},{7,0}})
+ local err3 = torch.add(gradInputs[2], -1, torch.Tensor{{0,6},{0,8}})
+ mytester:assertalmosteq(err2:abs():max(), 0, 1e-15, "CMinTable backward call")
+ mytester:assertalmosteq(err3:abs():max(), 0, 1e-15, "CMinTable backward call")
+end
+
+function nntest.JoinTable()
+ local tensor = torch.rand(3,4,5)
+ local input = {tensor, tensor}
+ local module
+ for d = 1,tensor:dim() do
+ module = nn.JoinTable(d)
+ mytester:asserteq(module:forward(input):size(d), tensor:size(d)*2, "dimension " .. d)
+ end
+
+ -- Minibatch
+ local tensor = torch.rand(3,4,5)
+ local input = {tensor, tensor}
+ local module
+ for d = 1,tensor:dim()-1 do
+ module = nn.JoinTable(d, 2)
+ mytester:asserteq(module:forward(input):size(d+1), tensor:size(d+1)*2, "dimension " .. d)
+ end
+end
+
+function nntest.SplitTable()
+ local input = torch.randn(3,4,5)
+ local module
+ for d = 1,input:dim() do
+ module = nn.SplitTable(d)
+ mytester:asserteq(#module:forward(input), input:size(d), "dimension " .. d)
+ end
+
+ -- Minibatch
+ local input = torch.randn(3,4,5)
+ local module
+ for d = 1,input:dim()-1 do
+ module = nn.SplitTable(d, 2)
+ mytester:asserteq(#module:forward(input), input:size(d+1), "dimension " .. d)
+ end
+
+ -- Negative indices
+ local module = nn.SplitTable(-3)
+ local input = torch.randn(3,4,5)
+ mytester:asserteq(#module:forward(input), 3, "negative index")
+ local input = torch.randn(2,3,4,5)
+ mytester:asserteq(#module:forward(input), 3, "negative index (minibatch)")
+end
+
+function nntest.Select()
+ -- Test negative Select
+ local input = torch.Tensor{{4,6,7}, {8,0,1}}
+ mytester:asserteq(nn.Select(1,-1):forward(input)[1], 8, "negative index")
+ mytester:asserteq(nn.Select(1,-1):forward(input)[2], 0, "negative index")
+ mytester:asserteq(nn.Select(1,-2):forward(input)[2], 6, "negative index")
+ mytester:asserteq(nn.Select(-1,-1):forward(input)[1], 7, "negative dim + negative index")
+ mytester:asserteq(nn.Select(-1,-1):forward(input)[2], 1, "negative dim + negative index")
+end
+
+function nntest.SelectTable()
+ local input = {
+ torch.rand(3,4,5), torch.rand(3,4,5),
+ {torch.rand(3,4,5)},
+ {torch.rand(3,4,5), {torch.rand(3,4,5)}}
+ }
+ local gradOutputs = {
+ torch.rand(3,4,5), torch.rand(3,4,5),
+ {torch.rand(3,4,5)},
+ {torch.rand(3,4,5), {torch.rand(3,4,5)}}
+ }
+ local zeros = {
+ torch.Tensor(3,4,5):zero(), torch.Tensor(3,4,5):zero(),
+ {torch.Tensor(3,4,5):zero()},
+ {torch.Tensor(3,4,5):zero(), {torch.Tensor(3,4,5):zero()}}
+ }
+ local nonIdx = {2,3,4,1}
+ local module
+ for idx = 1,#input do
+ module = nn.SelectTable(idx)
+ local output = module:forward(input)
+ equal(output, input[idx], "output dimension " .. idx)
+ local gradInput = module:backward(input, gradOutputs[idx])
+ equal(gradInput[idx], gradOutputs[idx], "gradInput[idx] dimension " .. idx)
+ equal(gradInput[nonIdx[idx]], zeros[nonIdx[idx]], "gradInput[nonIdx] dimension " .. idx)
+ end
+
+ -- test negative index
+ local idx = -2
+ module = nn.SelectTable(idx)
+ local output = module:forward(input)
+ equal(output, input[#input+idx+1], "output dimension " .. idx)
+ local gradInput = module:backward(input, gradOutputs[#input+idx+1])
+ equal(gradInput[#input+idx+1], gradOutputs[#input+idx+1], "gradInput[idx] dimension " .. idx)
+ equal(gradInput[nonIdx[#input+idx+1]], zeros[nonIdx[#input+idx+1]], "gradInput[nonIdx] dimension " .. idx)
+
+ -- test typecast
+ local idx = #input
+ module = nn.SelectTable(idx)
+ module:float()
+ local output = module:forward(input)
+ equal(output, input[idx], "type output")
+ local gradInput = module:backward(input, gradOutputs[idx])
+ equal(gradInput[idx], gradOutputs[idx], "gradInput[idx] dimension " .. idx)
+ equal(gradInput[nonIdx[idx]], zeros[nonIdx[idx]], "gradInput[nonIdx] dimension " .. idx)
+
+ -- test on differently sized sub-input tables given consequetively
+ local input1 = {
+ torch.rand(3,4,5),
+ {torch.rand(3,4,5), torch.rand(3,4,5), torch.rand(3,4,5)}
+ }
+ local input2 = {
+ torch.rand(3,4,5),
+ {torch.rand(3,4,5), torch.rand(3,4,5)}
+ }
+
+ module = nn.SelectTable(1)
+ local output = module:forward(input1)
+ equal(output, input1[1], "output dimension 1")
+ local gradInput = module:backward(input1, output)
+ mytester:assert(#gradInput == #input1, "Table lengths")
+ mytester:assert(#gradInput[2] == #input1[2], "Sub-Table lengths")
+ output = module:forward(input2)
+ equal(output, input2[1], "output dimension 1")
+ gradInput = module:backward(input2, output)
+ mytester:assert(#gradInput == #input2, "Table lengths")
+ mytester:assert(#gradInput[2] == #input2[2], "Sub-Table lengths")
+
+ -- test on tables of increasing size
+ local input1 = {torch.rand(3,4,5), torch.rand(3,4,5)}
+ local input2 = {torch.rand(3,4,5), torch.rand(3,4,5), torch.rand(3,4,5)}
+ local gradOutput1 = torch.randn(3,4,5)
+ local gradOutput2 = torch.randn(3,4,5)
+
+ local module1 = nn.SelectTable(-1)
+ local output1 = module1:forward(input1):clone()
+ local output2 = module1:forward(input2)
+ local gradInput_ = module1:backward(input1, gradOutput1)
+ local gradInput1 = {}
+ for k,v in ipairs(gradInput_) do gradInput1[k] = v:clone() end
+ local gradInput2 = module1:backward(input2, gradOutput2)
+
+ local module3 = nn.SelectTable(-1)
+ local module4 = nn.SelectTable(-1)
+ local output3 = module3:forward(input1)
+ local output4 = module4:forward(input2)
+ local gradInput3 = module3:backward(input1, gradOutput1)
+ local gradInput4 = module4:backward(input2, gradOutput2)
+
+ equal(output1, output3, "output 1 and 3")
+ equal(output2, output4, "output 2 and 4")
+ equal(gradInput1, gradInput3, "gradInput 1 and 3")
+ equal(gradInput2, gradInput4, "gradInput 2 and 4")
+end
+
+function nntest.MixtureTable()
+ -- 2D
+ -- expertInput is a Table:
+ local expertInput = torch.randn(5,3,6)
+ local gradOutput = torch.randn(5,6)
+ local input = {
+ torch.rand(5,3),
+ {expertInput:select(2,1), expertInput:select(2,2), expertInput:select(2,3)}
+ }
+ local module = nn.MixtureTable()
+ local output = module:forward(input)
+ local output2 = torch.cmul(input[1]:view(5,3,1):expand(5,3,6), expertInput):sum(2):squeeze(2)
+ mytester:assertTensorEq(output, output2, 0.000001, "mixture output")
+ local gradInput = module:backward(input, gradOutput)
+ local gradOutput2 = torch.view(gradOutput, 5, 1, 6):expandAs(expertInput)
+ local gaterGradInput2 = torch.cmul(gradOutput2, expertInput):sum(3):select(3,1)
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture gater gradInput")
+ local expertGradInput2 = torch.cmul(input[1]:view(5,3,1):expand(5,3,6), gradOutput:view(5,1,6):expand(5,3,6))
+ for i, expertGradInput in ipairs(gradInput[2]) do
+ mytester:assertTensorEq(expertGradInput, expertGradInput2:select(2,i), 0.000001, "mixture expert "..i.." gradInput")
+ end
+ -- expertInput is a Tensor:
+ local input = {input[1], expertInput}
+ local module = nn.MixtureTable(2)
+ local output = module:forward(input)
+ mytester:assertTensorEq(output, output2, 0.000001, "mixture2 output")
+ local gradInput = module:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture2 gater gradInput")
+ mytester:assertTensorEq(gradInput[2], expertGradInput2, 0.000001, "mixture2 expert gradInput")
+
+ -- 3D
+ local expertInput = torch.randn(5,6,3,2)
+ local gradOutput = torch.randn(5,6,2)
+ -- expertInput is a Table:
+ local input = {
+ torch.rand(5,3),
+ {expertInput:select(3,1), expertInput:select(3,2), expertInput:select(3,3)}
+ }
+ local module = nn.MixtureTable()
+ local output = module:forward(input)
+ local output2 = torch.cmul(input[1]:view(5,1,3,1):expand(5,6,3,2), expertInput):sum(3):squeeze(3)
+ mytester:assertTensorEq(output, output2, 0.000001, "mixture3 output")
+ local gradInput = module:backward(input, gradOutput)
+ local gradOutput2 = torch.view(gradOutput,5,6,1,2):expandAs(expertInput)
+ local gaterGradInput2 = torch.cmul(gradOutput2, expertInput):sum(4):select(4,1):sum(2):select(2,1)
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture3 gater gradInput")
+ local expertGradInput2 = torch.cmul(input[1]:view(5,1,3,1):expand(5,6,3,2), gradOutput2)
+ for i, expertGradInput in ipairs(gradInput[2]) do
+ mytester:assertTensorEq(expertGradInput, expertGradInput2:select(3,i), 0.000001, "mixture3 expert "..i.." gradInput")
+ end
+ -- expertInput is a Tensor
+ local input = {input[1], expertInput}
+ local module = nn.MixtureTable(3)
+ local output = module:forward(input)
+ mytester:assertTensorEq(output, output2, 0.000001, "mixture4 output")
+ local gradInput = module:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture4 gater gradInput")
+ mytester:assertTensorEq(gradInput[2], expertGradInput2, 0.000001, "mixture4 expert gradInput")
+
+ -- 1D
+ -- expertInput is a Table:
+ local expertInput = torch.randn(3,6)
+ local gradOutput = torch.randn(6)
+ local input = {
+ torch.rand(3),
+ {expertInput:select(1,1), expertInput:select(1,2), expertInput:select(1,3)}
+ }
+ local module = nn.MixtureTable()
+ local output = module:forward(input)
+ local output2 = torch.cmul(input[1]:view(3,1):expand(3,6), expertInput):sum(1):squeeze(1)
+ mytester:assertTensorEq(output, output2, 0.000001, "mixture5 output")
+ local gradInput = module:backward(input, gradOutput)
+ local gradOutput2 = torch.view(gradOutput, 1, 6):expandAs(expertInput)
+ local gaterGradInput2 = torch.cmul(gradOutput2, expertInput):sum(2):select(2,1)
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture5 gater gradInput")
+ local expertGradInput2 = torch.cmul(input[1]:view(3,1):expand(3,6), gradOutput:view(1,6):expand(3,6))
+ for i, expertGradInput in ipairs(gradInput[2]) do
+ mytester:assertTensorEq(expertGradInput, expertGradInput2:select(1,i), 0.000001, "mixture5 expert "..i.." gradInput")
+ end
+ -- test type-cast
+ module:float()
+ local input2 = {
+ input[1]:float(),
+ {input[2][1]:float(), input[2][2]:float(), input[2][3]:float()}
+ }
+ local output = module:forward(input2)
+ mytester:assertTensorEq(output, output2:float(), 0.000001, "mixture5B output")
+ local gradInput = module:backward(input2, gradOutput:float())
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2:float(), 0.000001, "mixture5B gater gradInput")
+ for i, expertGradInput in ipairs(gradInput[2]) do
+ mytester:assertTensorEq(expertGradInput, expertGradInput2:select(1,i):float(), 0.000001, "mixture5B expert "..i.." gradInput")
+ end
+ -- expertInput is a Tensor:
+ local input = {input[1], expertInput}
+ local module = nn.MixtureTable(1)
+ local output = module:forward(input)
+ mytester:assertTensorEq(output, output2, 0.000001, "mixture6 output")
+ local gradInput = module:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture6 gater gradInput")
+ mytester:assertTensorEq(gradInput[2], expertGradInput2, 0.000001, "mixture6 expert gradInput")
+ -- test type-cast:
+ module:float()
+ local input2 = {input[1]:float(), expertInput:float()}
+ local output = module:forward(input2)
+ mytester:assertTensorEq(output, output2:float(), 0.000001, "mixture6B output")
+ local gradInput = module:backward(input2, gradOutput:float())
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2:float(), 0.000001, "mixture6B gater gradInput")
+ mytester:assertTensorEq(gradInput[2], expertGradInput2:float(), 0.000001, "mixture6B expert gradInput")
+
+ --2D gater, 1D expert
+ -- expertInput is a Table:
+ local expertInput = torch.randn(5,3)
+ local gradOutput = torch.randn(5)
+ local input = {
+ torch.rand(5,3),
+ {expertInput:select(2,1), expertInput:select(2,2), expertInput:select(2,3)}
+ }
+ local module = nn.MixtureTable()
+ local output = module:forward(input)
+ local output2 = torch.cmul(input[1], expertInput):sum(2):squeeze(2)
+ mytester:assertTensorEq(output, output2, 0.000001, "mixture7 output")
+ local gradInput = module:backward(input, gradOutput)
+ local gradOutput2 = torch.view(gradOutput, 5, 1):expandAs(expertInput)
+ local gaterGradInput2 = torch.cmul(gradOutput2, expertInput)
+ mytester:assertTensorEq(gradInput[1], gaterGradInput2, 0.000001, "mixture7 gater gradInput")
+ local expertGradInput2 = torch.cmul(input[1], gradOutput:view(5,1):expand(5,3))
+ for i, expertGradInput in ipairs(gradInput[2]) do
+ mytester:assertTensorEq(expertGradInput, expertGradInput2:select(2,i), 0.000001, "mixture7 expert "..i.." gradInput")
+ end
+end
+
+function nntest.Narrow()
+ -- check basic narrow functionality #1
+ local input = torch.rand(9, 4, 14)
+ local output = input:narrow(1, 3, 5)
+ local gradOutput = torch.rand(5, 4, 14)
+ local gradInput = torch.zeros(9, 4, 14)
+ gradInput:narrow(1, 3, 5):copy(gradOutput)
+ local module1 = nn.Narrow(1, 3, 5)
+ local output1 = module1:forward(input)
+ local gradInput1 = module1:backward(input, gradOutput)
+ local module2 = nn.Narrow(1, 3, -3)
+ local output2 = module2:forward(input)
+ local gradInput2 = module2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #1 output err")
+ mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #1 gradInput err")
+ mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #1 negative output err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #1 negative gradInput err")
+
+ -- check basic narrow functionality #2
+ local input = torch.rand(3, 10, 4)
+ local output = input:narrow(2, 5, 3)
+ local gradOutput = torch.rand(3, 3, 4)
+ local gradInput = torch.zeros(3, 10, 4)
+ gradInput:narrow(2, 5, 3):copy(gradOutput)
+ local module1 = nn.Narrow(2, 5, 3)
+ local output1 = module1:forward(input)
+ local gradInput1 = module1:backward(input, gradOutput)
+ local module2 = nn.Narrow(2, 5, -4)
+ local output2 = module2:forward(input)
+ local gradInput2 = module2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #2 output err")
+ mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #2 gradInput err")
+ mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #2 negative output err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #2 negative gradInput err")
+
+ -- check basic narrow functionality #3
+ local input = torch.rand(6, 11, 7)
+ local output = input:narrow(3, 1, 1)
+ local gradOutput = torch.rand(6, 11, 1)
+ local gradInput = torch.zeros(6, 11, 7)
+ gradInput:narrow(3, 1, 1):copy(gradOutput)
+ local module1 = nn.Narrow(3, 1, 1)
+ local output1 = module1:forward(input)
+ local gradInput1 = module1:backward(input, gradOutput)
+ local module2 = nn.Narrow(3, 1, -7)
+ local output2 = module2:forward(input)
+ local gradInput2 = module2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #3 output err")
+ mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #3 gradInput err")
+ mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #3 negative output err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #3 negative gradInput err")
+
+ -- check basic narrow functionality #4
+ local input = torch.rand(3, 10, 4)
+ local output = input:narrow(2, 5, 3)
+ local gradOutput = torch.rand(3, 3, 4)
+ local gradInput = torch.zeros(3, 10, 4)
+ gradInput:narrow(2, 5, 3):copy(gradOutput)
+ local module1 = nn.Narrow(-2, 5, 3)
+ local output1 = module1:forward(input)
+ local gradInput1 = module1:backward(input, gradOutput)
+ local module2 = nn.Narrow(-2, 5, -4)
+ local output2 = module2:forward(input)
+ local gradInput2 = module2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #4 output err")
+ mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #4 gradInput err")
+ mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #4 negative output err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #4 negative gradInput err")
+
+ -- check narrow negative offset
+ local input = torch.rand(3, 10, 4)
+ local output = input:narrow(2, 1, 3)
+ local gradOutput = torch.rand(3, 3, 4)
+ local gradInput = torch.zeros(3, 10, 4)
+ gradInput:narrow(2, 1, 3):copy(gradOutput)
+ local module1 = nn.Narrow(2, -1, 7)
+ local output1 = module1:forward(input)
+ local gradInput1 = module1:backward(input, gradOutput)
+ local module2 = nn.Narrow(2, 1, 3)
+ local output2 = module2:forward(input)
+ local gradInput2 = module2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #5 output err")
+ mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #5 gradInput err")
+ mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #5 negative output err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #5 negative gradInput err")
+end
+
+function nntest.NarrowTable()
+ local input = torch.randn(3,10,4)
+ local gradOutput = torch.randn(3,3,4)
+ local nt = nn.NarrowTable(5,3)
+ local seq = nn.Sequential()
+ seq:add(nn.SplitTable(1,2))
+ seq:add(nt)
+ seq:add(nn.JoinTable(1,1))
+ seq:add(nn.Reshape(3,3,4))
+ local seq2 = nn.Narrow(2,5,3)
+ local output = seq:forward(input)
+ local gradInput = seq:backward(input, gradOutput)
+ local output2 = seq2:forward(input)
+ local gradInput2 = seq2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output2, 0.0000001, "NarrowTable output err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "NarrowTable gradInput err")
+
+ -- now try it with a smaller input
+ local input = input:narrow(2, 1, 8)
+ local output = seq:forward(input)
+ local gradInput = seq:backward(input, gradOutput)
+ local output2 = seq2:forward(input)
+ local gradInput2 = seq2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output2, 0.0000001, "NarrowTable small output err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "NarrowTable small gradInput err")
+
+ -- test type-cast
+ local input = input:float()
+ local gradOutput = gradOutput:float()
+ seq:float()
+ seq2:float()
+ local output = seq:forward(input)
+ local gradInput = seq:backward(input, gradOutput)
+ local output2 = seq2:forward(input)
+ local gradInput2 = seq2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output2, 0.0000001, "NarrowTable output float err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "NarrowTable gradInput float err")
+end
+
+function nntest.View()
+ local input = torch.rand(10)
+ local template = torch.rand(5,2)
+ local target = template:size():totable()
+ local module = nn.View(template:size())
+ mytester:assertTableEq(module:forward(input):size():totable(), target, "Error in forward (1)")
+ local module = nn.View(table.unpack(target))
+ mytester:assertTableEq(module:forward(input):size():totable(), target, "Error in forward (2)")
+
+ -- Minibatch
+ local minibatch = torch.rand(5,10)
+ mytester:asserteq(module:forward(minibatch):size(1),
+ minibatch:size(1),
+ "Error in minibatch dimension")
+ mytester:asserteq(module:forward(minibatch):nElement(),
+ minibatch:nElement(),
+ "Error in minibatch nElement")
+ local module = nn.View(-1):setNumInputDims(1)
+ mytester:asserteq(module:forward(minibatch):size(1),
+ minibatch:size(1),
+ "Error in minibatch dimension with size -1")
+ mytester:asserteq(module:forward(minibatch):nElement(),
+ minibatch:nElement(),
+ "Error in minibatch nElement with size -1")
+
+ -- another setNumInputDims case
+ local minibatch = torch.rand(5,4,10)
+ local module = nn.View(-1):setNumInputDims(2)
+ mytester:asserteq(module:forward(minibatch):size(1),
+ minibatch:size(1),
+ "Error in minibatch dimension with size -1")
+
+ -- another setNumInputDims case
+ local minibatch = torch.rand(2,5,4,10)
+ local module = nn.View(4,-1):setNumInputDims(2)
+ local out = module:forward(minibatch)
+ mytester:asserteq(out:size(1), minibatch:size(1)*minibatch:size(2),
+ "Error in minibatch dimension with size -1")
+ mytester:asserteq(out:size(2), minibatch:size(3),
+ "Error in minibatch dimension with size -1")
+ mytester:asserteq(out:size(3), minibatch:size(4),
+ "Error in minibatch dimension with size -1")
+
+ -- Minibatch Generalization
+ local minibatch = torch.rand(5,2,6)
+ local module = nn.View(6)
+ mytester:asserteq(
+ module:forward(minibatch):size(1),
+ minibatch:size(1)*minibatch:size(2),
+ "Error in minibatch generalization dimension")
+ mytester:asserteq(
+ module:forward(minibatch):nElement(),
+ minibatch:nElement(),
+ "Error in minibatch generalization nElement")
+end
+
+function nntest.Reshape()
+ local input = torch.rand(10)
+ local template = torch.rand(5,2)
+ local target = template:size():totable()
+ local module = nn.Reshape(template:size())
+ mytester:assertTableEq(module:forward(input):size():totable(), target, "Error in forward (1)")
+ local module = nn.View(table.unpack(target))
+ mytester:assertTableEq(module:forward(input):size():totable(), target, "Error in forward (2)")
+
+ -- Minibatch
+ local minibatch = torch.rand(5,10)
+ mytester:asserteq(module:forward(minibatch):size(1),
+ minibatch:size(1),
+ "Error in minibatch dimension")
+ mytester:asserteq(module:forward(minibatch):nElement(),
+ minibatch:nElement(),
+ "Error in minibatch nElement")
+end
+
+-- Define a test for SpatialUpSamplingCuda
+function nntest.SpatialUpSamplingNearest()
+ local scale = torch.random(2,4)
+ for dim = 3,4 do
+ local m = nn.SpatialUpSamplingNearest(scale)
+
+ -- Create a randomly sized dimD vector
+ local shape = {}
+ for i = 1, dim do
+ table.insert(shape, torch.random(2, 2+dim-1))
+ end
+
+ -- Check that the gradient is correct by using finite elements
+ local input = torch.Tensor(table.unpack(shape)):zero()
+
+ local err = jac.testJacobian(m, input)
+ mytester:assertlt(err, precision, ' error on state ')
+
+ local ferr, berr = jac.testIO(m, input)
+ mytester:asserteq(ferr, 0, torch.typename(m)..' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(m)..' - i/o backward err ')
+ end
+end
+
+function nntest.SpatialUpSamplingBilinear()
+ for scale=2,4 do
+ for dim = 3,4 do
+ local m = nn.SpatialUpSamplingBilinear(scale)
+
+ -- Create a randomly sized dimD vector
+ local shape = {}
+ for i = 1, dim do
+ table.insert(shape, torch.random(2, 2+dim-1))
+ end
+
+ -- Check that the gradient is correct by using finite elements
+ local input = torch.DoubleTensor(table.unpack(shape)):normal()
+
+ local err = jac.testJacobian(m, input)
+ mytester:assertlt(err, precision, ' error on state ')
+
+ local ferr, berr = jac.testIO(m, input)
+ mytester:asserteq(ferr, 0, torch.typename(m)..' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(m)..' - i/o backward err ')
+ end
+ end
+end
+
+function nntest.Concat()
+ local input = torch.randn(4, 2)
+ local num_modules = math.random(2, 5)
+ local linears = {}
+ for i = 1,num_modules do
+ linears[i] = nn.Linear(2,5)
+ end
+
+ local m = nn.Concat(1)
+ for _,module in ipairs(linears) do
+ m:add(module)
+ module:zeroGradParameters()
+ module.weight:fill(1)
+ module.bias:fill(0)
+ end
+ mytester:asserteq(m:size(), num_modules)
+
+ local output = m:forward(input)
+ local output2 = input:sum(2):expand(4, 5):repeatTensor(num_modules, 1)
+ mytester:assertTensorEq(output2, output, 0.000001, 'Concat forward err')
+
+ local gradInput = m:backward(input, torch.ones(output2:size()))
+ local gradInput2 = torch.ones(4, 2):fill(num_modules * 5)
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'Concat backward err (gradInput)')
+
+ local gradWeight = input:sum(1):expand(5, 2)
+ for _,module in ipairs(linears) do
+ mytester:assertTensorEq(gradWeight, module.gradWeight, 0.000001, 'Concat backward err (gradWeight)')
+ end
+end
+
+function nntest.Parallel()
+ local input = torch.randn(3, 4, 5)
+ local m = nn.Parallel(1,3)
+ m:add(nn.View(4,5,1))
+ m:add(nn.View(4,5,1))
+ m:add(nn.View(4,5,1))
+
+ local output = m:forward(input)
+ local output2 = input:transpose(1,3):transpose(1,2)
+ mytester:assertTensorEq(output2, output, 0.000001, 'Parallel forward err')
+
+ local gradInput = m:backward(input, output2)
+ mytester:assertTensorEq(gradInput, input, 0.000001, 'Parallel backward err')
+end
+
+function nntest.ParallelTable()
+ local input = torch.randn(3, 4, 5)
+ local p = nn.ParallelTable()
+ p:add(nn.View(4,5,1))
+ p:add(nn.View(4,5,1))
+ p:add(nn.View(4,5,1))
+ local m = nn.Sequential()
+ m:add(nn.SplitTable(1))
+ m:add(p)
+ m:add(nn.JoinTable(3))
+
+ local output = m:forward(input)
+ local output2 = input:transpose(1,3):transpose(1,2)
+ mytester:assertTensorEq(output2, output, 0.000001, 'ParallelTable forward err')
+
+ local gradInput = m:backward(input, output2)
+ mytester:assertTensorEq(gradInput, input, 0.000001, 'ParallelTable backward err')
+end
+
+function nntest.ConcatTable()
+ -- Test tensor input
+ local input = torch.rand(5, 5, 5)
+ local m = nn.Sequential()
+
+ local concat = nn.ConcatTable()
+ concat:add(nn.Identity())
+
+ m:add(concat) -- Output of concat is a table of length 1
+ m:add(nn.JoinTable(1)) -- jac needs a tensor tensor output
+
+ local err = jac.testJacobian(m, input)
+ mytester:assertlt(err, precision, ' error on state ')
+
+ local ferr, berr = jac.testIO(m, input)
+ mytester:asserteq(ferr, 0, torch.typename(m)..' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(m)..' - i/o backward err ')
+
+ -- Now test a table input
+ local input = {
+ torch.randn(3,4):float(), torch.randn(3,4):float(), {torch.randn(3,4):float()}
+ }
+ local _gradOutput = {
+ torch.randn(3,3,4):float(), torch.randn(3,3,4):float(), torch.randn(3,3,4):float()
+ }
+ local gradOutput = {
+ {_gradOutput[1][1], _gradOutput[2][1], {_gradOutput[3][1]}},
+ {_gradOutput[1][2], _gradOutput[2][2], {_gradOutput[3][2]}},
+ {_gradOutput[1][3], _gradOutput[2][3], {_gradOutput[3][3]}}
+ }
+ local module = nn.ConcatTable()
+ module:add(nn.Identity())
+ module:add(nn.Identity())
+ module:add(nn.Identity())
+ module:float()
+
+ local output = module:forward(input)
+ local output2 = {input, input, input}
+ equal(output2, output, "ConcatTable table output")
+ local gradInput = module:backward(input, gradOutput)
+ local gradInput2 = {_gradOutput[1]:sum(1):squeeze(1), _gradOutput[2]:sum(1):squeeze(1), {_gradOutput[3]:sum(1):squeeze(1)}}
+ equal(gradInput, gradInput2, "ConcatTable table gradInput")
+
+ -- test outputs for variable length inputs
+ local test = nn.ConcatTable()
+ test:add(nn.Identity())
+ test:add(nn.Identity())
+
+ local x = {torch.randn(5), torch.randn(5)}
+ local y = {torch.randn(5)}
+
+ local o1 = #(test:forward(x))
+ local go1 = #(test:backward(x, {x, x}))
+ local o2 = #(test:forward(y))
+ local go2 = #(test:backward(y, {y, y}))
+ mytester:assert(o1 == 2, "ConcatTable table variable length")
+ mytester:assert(go1 == 2, "ConcatTable table variable length")
+ mytester:assert(o2 == 2, "ConcatTable table variable length")
+ mytester:assert(go2 == 1, "ConcatTable table variable length")
+end
+
+function nntest.MapTable()
+ local map = nn.MapTable(nn.Linear(10,5))
+ local lin = map:get(1):clone()
+
+ -- ParalleTable with clones as reference
+ local parallel = nn.ParallelTable()
+ parallel:add(lin)
+ parallel:add(lin:clone('weight','bias'))
+ parallel:add(lin:clone('weight','bias'))
+
+ local input = {torch.rand(10), torch.rand(10), torch.rand(10)}
+ local gradOutput = {torch.ones(5), torch.ones(5), torch.ones(5)}
+
+ local outputM = map:forward(input)
+ local outputP = parallel:forward(input)
+ mytester:assertTensorEq(outputM[1], outputP[1])
+ mytester:assertTensorEq(outputM[2], outputP[2])
+ mytester:assertTensorEq(outputM[3], outputP[3])
+ mytester:assert(map:size() == #input)
+
+ map:zeroGradParameters()
+ parallel:zeroGradParameters()
+ local gradInputM = map:backward(input, gradOutput)
+ local gradInputP = parallel:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInputM[1], gradInputP[1])
+ mytester:assertTensorEq(gradInputM[2], gradInputP[2])
+ mytester:assertTensorEq(gradInputM[3], gradInputP[3])
+
+ map:updateParameters(1)
+ parallel:updateParameters(1)
+ mytester:assertTensorEq(map:get(1).weight, parallel:get(1).weight, 0.00001)
+
+ local output = map:forward({input[1], input[2], input[3], input[3]})
+ mytester:assert(#output == 4)
+ local output = map:forward({input[1], input[2]})
+ mytester:assert(#output == 2)
+
+ map:resize(10)
+ mytester:assert(map:size() == 10)
+ map:resize(4)
+ mytester:assert(map:size() == 4)
+ mytester:assert(torch.pointer(map:get(4).weight:storage())
+ == torch.pointer(map:get(1).weight:storage()))
+ map:clearState()
+ mytester:assert(map:size() == 1)
+
+ -- check if gradients are correctly reset
+ -- share weights and gradients
+ map = nn.MapTable(nn.Linear(10,5))
+ map:forward(input)
+ _, gradParams = map:getParameters()
+ gradParams:uniform()
+ map:zeroGradParameters()
+ mytester:assertlt(gradParams:sum(),precision)
+
+ -- check if gradients are correctly reset
+ -- do not share weights and gradients
+ map = nn.MapTable(nn.Linear(10,5),false)
+ map:forward(input)
+ _, gradParams = map:getParameters()
+ gradParams:uniform()
+ map:zeroGradParameters()
+ mytester:assertlt(gradParams:sum(),precision)
+end
+
+function nntest.FlattenTable()
+ -- Create a nested table. Obviously we can't even stochastically test
+ -- the space of all possible nested tables (it's infinite), but here is a
+ -- hand-coded one that covers all the cases we need:
+ local input = {
+ torch.rand(1),
+ {
+ torch.rand(2),
+ {
+ torch.rand(3)
+ },
+ },
+ torch.rand(4)
+ }
+ local gradOutput = {
+ torch.rand(1),
+ torch.rand(2),
+ torch.rand(3),
+ torch.rand(4)
+ }
+
+ -- Check the FPROP
+ local m = nn.FlattenTable()
+ local output = m:forward(input)
+ mytester:assert(#output == 4, torch.typename(m)..' - fprop err ')
+ -- This is ugly, but check that the mapping from input to output is correct
+ mytester:assert(output[1] == input[1])
+ mytester:assert(output[2] == input[2][1])
+ mytester:assert(output[3] == input[2][2][1])
+ mytester:assert(output[4] == input[3])
+
+ -- Check the BPROP
+ local gradInput = m:backward(input, gradOutput)
+ -- Again, check that the mapping is correct
+ mytester:assert(gradOutput[1] == gradInput[1])
+ mytester:assert(gradOutput[2] == gradInput[2][1])
+ mytester:assert(gradOutput[3] == gradInput[2][2][1])
+ mytester:assert(gradOutput[4] == gradInput[3])
+
+ -- More uglyness: FlattenTable doesn't rebuild the table every updateOutput
+ -- call, so we need to make sure that modifications to the input are
+ -- detected correctly (and that the table is correctly rebuilt.
+ -- CASE 1: Nothing changes so the output table shouldn't be redefined
+ local old_input_map = m.input_map
+ local old_output = m.output
+ local _ = m:forward(input)
+ mytester:assert(old_input_map == m.input_map and old_output == m.output)
+
+ -- CASE 2: An element is added to the input table
+ old_input_map = m.input_map
+ old_output = m.output
+ input[2][#(input[2])+1] = torch.rand(5)
+ m:forward(input)
+ mytester:assert(old_input_map ~= m.input_map and old_output ~= m.output)
+
+ -- CASE 3: An element is removed from the input table
+ old_input_map = m.input_map
+ old_output = m.output
+ input[#input] = nil
+ m:forward(input)
+ mytester:assert(old_input_map ~= m.input_map and old_output ~= m.output)
+
+ -- At this point further testing is not necessary I think, but just to be
+ -- consistent: perform a jacobian test by using SplitTable and JointTable
+ -- elements
+ m = nn.Sequential()
+ local par = nn.ParallelTable()
+ par:add(nn.SplitTable(1))
+ par:add(nn.SplitTable(1))
+ m:add(nn.SplitTable(1))
+ m:add(par) -- this will create a nested table
+ m:add(nn.FlattenTable()) -- This will flatten the nested table
+ m:add(nn.JoinTable(1)) -- Finally, this will create a 1D tensor
+
+ input = torch.Tensor(2,2,2)
+ local err = jac.testJacobian(m, input)
+ mytester:assertlt(err, precision, 'error on bprop ')
+end
+
+function nntest.L1Penalty()
+ local weight = 1
+ local sizeAverage = false
+ local m = nn.L1Penalty(weight, sizeAverage, false)
+
+ local input = torch.rand(2,10):add(-0.5)
+ input[1][1] = 0
+
+ local _ = m:forward(input)
+ local grad = m:backward(input, torch.ones(input:size()))
+
+ local err = input:clone():abs():sum()*weight - m.loss
+ mytester:assertlt(math.abs(err), precision, 'error on fprop ')
+
+ local true_grad = (input:gt(0):typeAs(grad) +
+ input:lt(0):typeAs(grad):mul(-1)):mul(weight)
+ mytester:assertlt((true_grad - grad):abs():max(), precision,
+ 'error on bprop ')
+
+ -- Note: We cannot use the Jacobian test for this Module since the backward
+ -- gradient cannot be estimated using finite differences (ie, the loss
+ -- during BPROP is not included in the FPROP output)
+end
+
+function nntest.L1Cost()
+ local input = torch.rand(10) * 2 - 1
+ local m = nn.L1Cost()
+ local output = m:forward(input)
+ local err = output - torch.abs(input):sum()
+ mytester:assertalmosteq(err, 0, 1e-15, 'L1Cost forward')
+end
+
+function nntest.DepthConcat()
+ local outputSize = torch.IntTensor{5,6,7,8}
+ local input = torch.randn(2,3,12,12)
+ local gradOutput = torch.randn(2, outputSize:sum(), 12, 12)
+ local concat = nn.DepthConcat(2)
+ concat:add(nn.SpatialConvolutionMM(3, outputSize[1], 1, 1, 1, 1)) --> 2, 5, 12, 12
+ concat:add(nn.SpatialConvolutionMM(3, outputSize[2], 3, 3, 1, 1)) --> 2, 6, 10, 10
+ concat:add(nn.SpatialConvolutionMM(3, outputSize[3], 4, 4, 1, 1)) --> 2, 7, 9, 9
+ concat:add(nn.SpatialConvolutionMM(3, outputSize[4], 5, 5, 1, 1)) --> 2, 8, 8, 8
+ concat:zeroGradParameters()
+ -- forward/backward
+ local outputConcat = concat:forward(input)
+ local gradInputConcat = concat:backward(input, gradOutput)
+ -- the spatial dims are the largest, the nFilters is the sum
+ local output = torch.Tensor(2, outputSize:sum(), 12, 12):zero() -- zero for padding
+ local narrows = { {{},{1,5},{},{}}, {{},{6,11},{2,11},{2,11}}, {{},{12,18},{2,10},{2,10}}, {{},{19,26},{3,10},{3,10}} }
+ local gradInput = input:clone():zero()
+ for i=1,4 do
+ local conv = concat:get(i)
+ local gradWeight = conv.gradWeight:clone()
+ conv:zeroGradParameters()
+ output[narrows[i]]:copy(conv:forward(input))
+ gradInput:add(conv:backward(input, gradOutput[narrows[i]]))
+ mytester:assertTensorEq(gradWeight, conv.gradWeight, 0.000001, "Error in SpatialConcat:accGradParameters for conv "..i)
+ end
+ mytester:assertTensorEq(output, outputConcat, 0.000001, "Error in SpatialConcat:updateOutput")
+ mytester:assertTensorEq(gradInput, gradInputConcat, 0.000001, "Error in SpatialConcat:updateGradInput")
+end
+
+function nntest.MV()
+ local mv = nn.MV(false)
+ local outdim = torch.random(10,20)
+ local indim = torch.random(10,20)
+ local M = torch.randn(outdim, indim)
+ local V = torch.randn(indim)
+
+ -- Test forward pass.
+ local output = mv:forward({M, V})
+ mytester:assertTableEq(output:size():totable(), {outdim},
+ 'Output has wrong dimensionality')
+ mytester:assertTensorEq(output, M * V, 1e-10,
+ 'Wrong output')
+
+ -- Test backward pass.
+ local gradOutput = torch.randn(outdim)
+ local gradInput = mv:backward({M, V}, gradOutput)
+ mytester:assert(#gradInput == 2, 'gradInput must be table of size 2')
+ local gradM, gradV = table.unpack(gradInput)
+ mytester:assertTableEq(gradM:size():totable(), M:size():totable(),
+ 'Gradient for input M has wrong size')
+ mytester:assertTableEq(gradV:size():totable(), V:size():totable(),
+ 'Gradient for input V has wrong size')
+ mytester:assertTensorEq(gradM, torch.ger(gradOutput, V), 1e-10,
+ 'Wrong gradient for input M')
+ -- d/dV(j) (A(i,j)V(j)) = (
+ mytester:assertTensorEq(gradV, M:t() * gradOutput, 1e-10,
+ 'Wrong gradient for input V')
+end
+
+function nntest.BatchMVNoTranspose()
+ local mv = nn.MV()
+ local outdim = torch.random(10,20)
+ local indim = torch.random(10,20)
+ for bSize = 1, 11, 5 do
+ local M = torch.randn(bSize, outdim, indim)
+ local V = torch.randn(bSize, indim)
+
+ -- Test forward pass.
+ local output = mv:forward({M, V})
+ mytester:assertTableEq(output:size():totable(), {bSize, outdim},
+ 'Output has wrong dimensionality')
+ for i = 1, bSize do
+ mytester:assertTensorEq(output[i], M[i] * V[i], 1e-10,
+ 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+
+ -- Test backward pass.
+ local gradOutput = torch.randn(bSize, outdim)
+ local gradInput = mv:backward({M, V}, gradOutput)
+ mytester:assert(#gradInput == 2, 'gradInput must be table of size 2')
+ local gradM, gradV = table.unpack(gradInput)
+ mytester:assertTableEq(gradM:size():totable(), M:size():totable(),
+ 'Gradient for input M has wrong size')
+ mytester:assertTableEq(gradV:size():totable(), V:size():totable(),
+ 'Gradient for input V has wrong size')
+ for i = 1, bSize do
+ mytester:assertTensorEq(gradM[i], torch.ger(gradOutput[i], V[i]), 1e-10,
+ 'Gradient for input M wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ mytester:assertTensorEq(gradV[i], M[i]:t() * gradOutput[i], 1e-10,
+ 'Gradient for input V wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+ end
+end
+
+function nntest.BatchMVTranspose()
+ local mv = nn.MV(true)
+ local outdim = torch.random(10,20)
+ local indim = torch.random(10,20)
+ for bSize = 1, 11, 5 do
+ local M = torch.randn(bSize, indim, outdim)
+ local V = torch.randn(bSize, indim)
+
+ -- Test forward pass.
+ local output = mv:forward({M, V})
+ mytester:assertTableEq(output:size():totable(), {bSize, outdim},
+ 'Output has wrong dimensionality')
+ for i = 1, bSize do
+ mytester:assertTensorEq(output[i], M[i]:t() * V[i], 1e-10,
+ 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+
+ -- Test backward pass.
+ local gradOutput = torch.randn(bSize, outdim)
+ local gradInput = mv:backward({M, V}, gradOutput)
+ mytester:assert(#gradInput == 2, 'gradInput must be table of size 2')
+ local gradM, gradV = table.unpack(gradInput)
+ mytester:assertTableEq(gradM:size():totable(), M:size():totable(),
+ 'Gradient for input M has wrong size')
+ mytester:assertTableEq(gradV:size():totable(), V:size():totable(),
+ 'Gradient for input V has wrong size')
+ for i = 1, bSize do
+ mytester:assertTensorEq(gradM[i], torch.ger(V[i], gradOutput[i]), 1e-10,
+ 'Gradient for input M wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ mytester:assertTensorEq(gradV[i], M[i] * gradOutput[i], 1e-10,
+ 'Gradient for input V wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+ end
+end
+
+local function createMatrixInputSizes()
+ local M = torch.random(10, 20)
+ local N = torch.random(10, 20)
+ local P = torch.random(10, 20)
+ return M, N, P
+end
+
+function nntest.MM()
+ local mm = nn.MM(false, true)
+ local M, N, P = createMatrixInputSizes()
+ local A = torch.randn(M, N)
+ local B = torch.randn(P, N)
+
+ -- Test forward pass.
+ local output = mm:forward({A, B})
+ mytester:assertTableEq(output:size():totable(), {M, P},
+ 'Output has wrong dimensionality')
+ mytester:assertTensorEq(output, A * B:t(), 1e-10,
+ 'Wrong output')
+
+ -- Test backward pass.
+ local gradOutput = torch.randn(M, P)
+ local gradInput = mm:backward({A, B}, gradOutput)
+ mytester:assert(#gradInput == 2, 'gradInput must be table of size 2')
+ local gradA, gradB = table.unpack(gradInput)
+ mytester:assertTableEq(gradA:size():totable(), A:size():totable(),
+ 'Gradient for input A has wrong size')
+ mytester:assertTableEq(gradB:size():totable(), B:size():totable(),
+ 'Gradient for input B has wrong size')
+ mytester:assertTensorEq(gradA, gradOutput * B, 1e-10,
+ 'Wrong gradient for input A')
+ mytester:assertTensorEq(gradB, gradOutput:t() * A, 1e-10,
+ 'Wrong gradient for input B')
+end
+
+function nntest.BatchMMNoTranspose()
+ local mm = nn.MM()
+ local M, N, P = createMatrixInputSizes()
+ for bSize = 1, 11, 5 do
+ local A = torch.randn(bSize, M, N)
+ local B = torch.randn(bSize, N, P)
+
+ -- Test forward pass.
+ local output = mm:forward({A, B})
+ mytester:assertTableEq(output:size():totable(), {bSize, M, P},
+ 'Output has wrong dimensionality')
+ for i = 1, bSize do
+ mytester:assertTensorEq(output[i], A[i] * B[i], 1e-10,
+ 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+
+ -- Test backward pass.
+ local gradOutput = torch.randn(bSize, M, P)
+ local gradInput = mm:backward({A, B}, gradOutput)
+ mytester:assert(#gradInput == 2, 'gradInput must be table of size 2')
+ local gradA, gradB = table.unpack(gradInput)
+ mytester:assertTableEq(gradA:size():totable(), A:size():totable(),
+ 'Gradient for input A has wrong size')
+ mytester:assertTableEq(gradB:size():totable(), B:size():totable(),
+ 'Gradient for input B has wrong size')
+ for i = 1, bSize do
+ mytester:assertTensorEq(gradA[i], gradOutput[i] * B[i]:t(), 1e-10,
+ 'Gradient for input A wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ mytester:assertTensorEq(gradB[i], A[i]:t() * gradOutput[i], 1e-10,
+ 'Gradient for input B wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+ end
+end
+
+function nntest.BatchMMTransposeA()
+ local mm = nn.MM(true, false)
+ local M, N, P = createMatrixInputSizes()
+ for bSize = 1, 11, 5 do
+ local A = torch.randn(bSize, N, M)
+ local B = torch.randn(bSize, N, P)
+
+ -- Test forward pass.
+ local output = mm:forward({A, B})
+ mytester:assertTableEq(output:size():totable(), {bSize, M, P},
+ 'Output has wrong dimensionality')
+ for i = 1, bSize do
+ mytester:assertTensorEq(output[i], A[i]:t() * B[i], 1e-10,
+ 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+
+ -- Test backward pass.
+ local gradOutput = torch.randn(bSize, M, P)
+ local gradInput = mm:backward({A, B}, gradOutput)
+ mytester:assert(#gradInput == 2, 'gradInput must be table of size 2')
+ local gradA, gradB = table.unpack(gradInput)
+ mytester:assertTableEq(gradA:size():totable(), A:size():totable(),
+ 'Gradient for input A has wrong size')
+ mytester:assertTableEq(gradB:size():totable(), B:size():totable(),
+ 'Gradient for input B has wrong size')
+ for i = 1, bSize do
+ mytester:assertTensorEq(gradA[i], B[i] * gradOutput[i]:t(), 1e-10,
+ 'Gradient for input A wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ mytester:assertTensorEq(gradB[i], A[i] * gradOutput[i], 1e-10,
+ 'Gradient for input B wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+ end
+end
+
+function nntest.BatchMMTransposeB()
+ local mm = nn.MM(false, true)
+ local M, N, P = createMatrixInputSizes()
+ for bSize = 1, 11, 5 do
+ local A = torch.randn(bSize, M, N)
+ local B = torch.randn(bSize, P, N)
+
+ -- Test forward pass.
+ local output = mm:forward({A, B})
+ mytester:assertTableEq(output:size():totable(), {bSize, M, P},
+ 'Output has wrong dimensionality')
+ for i = 1, bSize do
+ mytester:assertTensorEq(output[i], A[i] * B[i]:t(), 1e-10,
+ 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+
+ -- Test backward pass.
+ local gradOutput = torch.randn(bSize, M, P)
+ local gradInput = mm:backward({A, B}, gradOutput)
+ mytester:assert(#gradInput == 2, 'gradInput must be table of size 2')
+ local gradA, gradB = table.unpack(gradInput)
+ mytester:assertTableEq(gradA:size():totable(), A:size():totable(),
+ 'Gradient for input A has wrong size')
+ mytester:assertTableEq(gradB:size():totable(), B:size():totable(),
+ 'Gradient for input B has wrong size')
+ for i = 1, bSize do
+ mytester:assertTensorEq(gradA[i], gradOutput[i] * B[i], 1e-10,
+ 'Gradient for input A wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ mytester:assertTensorEq(gradB[i], gradOutput[i]:t() * A[i], 1e-10,
+ 'Gradient for input B wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+ end
+end
+
+function nntest.BatchMMTransposeBoth()
+ local mm = nn.MM(true, true)
+ local M, N, P = createMatrixInputSizes()
+ for bSize = 1, 11, 5 do
+ local A = torch.randn(bSize, N, M)
+ local B = torch.randn(bSize, P, N)
+
+ -- Test forward pass.
+ local output = mm:forward({A, B})
+ mytester:assertTableEq(output:size():totable(), {bSize, M, P},
+ 'Output has wrong dimensionality')
+ for i = 1, bSize do
+ mytester:assertTensorEq(output[i], A[i]:t() * B[i]:t(), 1e-10,
+ 'Output wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+
+ -- Test backward pass.
+ local gradOutput = torch.randn(bSize, M, P)
+ local gradInput = mm:backward({A, B}, gradOutput)
+ mytester:assert(#gradInput == 2, 'gradInput must be table of size 2')
+ local gradA, gradB = table.unpack(gradInput)
+ mytester:assertTableEq(gradA:size():totable(), A:size():totable(),
+ 'Gradient for input A has wrong size')
+ mytester:assertTableEq(gradB:size():totable(), B:size():totable(),
+ 'Gradient for input B has wrong size')
+ for i = 1, bSize do
+ mytester:assertTensorEq(gradA[i], B[i]:t() * gradOutput[i]:t(), 1e-10,
+ 'Gradient for input A wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ mytester:assertTensorEq(gradB[i], gradOutput[i]:t() * A[i]:t(), 1e-10,
+ 'Gradient for input B wrong for bSize = ' .. bSize .. ' and i = ' .. i)
+ end
+ end
+end
+
+function nntest.DotProduct()
+ local indim = math.random(1,10)
+
+ -- test 1D forward
+ local input = {torch.rand(indim),torch.rand(indim)}
+ local module = nn.DotProduct()
+ local expected = input[1]:dot(input[2])
+ local output = module:forward(input)
+ mytester:assertlt(math.abs(expected-output[1]), precision, 'error on forward ')
+
+ -- check gradients
+ -- Note: testJacobian doesn't support table inputs, and rather than re-write
+ -- it so that it does, I'll just use a split table module on the input.
+ -- I assume both SplitTable and Sequential do not have bugs, otherwise this
+ -- test will break.
+ local input = torch.rand(2,indim)
+ local module = nn.Sequential()
+ module:add(nn.SplitTable(1))
+ module:add(nn.DotProduct())
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- IO
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ -- rebuild module to avoid correlated tests
+ local module = nn.Sequential()
+ module:add(nn.SplitTable(1))
+ module:add(nn.DotProduct())
+
+ local nframes = math.random(1,10)
+ local indim = math.random(1,10)
+ local input = torch.rand(2,nframes,indim)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'batch error on state ')
+end
+
+function nntest.CosineDistance()
+ local indim = math.random(1,10)
+ local input = {torch.rand(indim),torch.rand(indim)}
+
+ -- check forward against previous implementation
+ local module = nn.CosineDistance()
+
+ local w1 = input[1]:dot(input[2])
+ local w2 = math.sqrt(input[1]:dot(input[1]))
+ local w3 = math.sqrt(input[2]:dot(input[2]))
+ local output_old = w1/w2/w3
+
+ local output = module:forward(input)
+
+ mytester:assertlt(math.abs(output_old-output[1]),precision,'error on forward ')
+
+
+ -- check gradients
+ -- Note: testJacobian doesn't support table inputs, and rather than re-write
+ -- it so that it does, I'll just use a split table module on the input.
+ -- I assume both SplitTable and Sequential do not have bugs, otherwise this
+ -- test will break.
+ local input = torch.rand(2,indim)
+ local module = nn.Sequential()
+ module:add(nn.SplitTable(1))
+ module:add(nn.CosineDistance())
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ -- IO
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+
+ -- batch
+ -- rebuild module to avoid correlated tests
+ local module = nn.Sequential()
+ module:add(nn.SplitTable(1))
+ module:add(nn.CosineDistance())
+
+ local nframes = math.random(1,10)
+ local indim = math.random(1,10)
+ local input = torch.rand(2,nframes,indim)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'batch error on state ')
+
+end
+
+function nntest.CosineEmbeddingCriterion()
+ local v1 = torch.Tensor{1, 0}
+ local v2 = torch.Tensor{0.5, math.sqrt(3)*0.5}
+
+ local crit = nn.CosineEmbeddingCriterion(0.6)
+ local output = crit:forward({v1, v2}, -1) -- must be Called before backward
+ local grads = crit:backward({v1, v2}, -1)
+
+ local zero = torch.Tensor(2):zero()
+ equal(grads[1], zero, 'gradient should be zero')
+ equal(grads[2], zero, 'gradient should be zero')
+
+ -- check jacobians
+ local margin = math.random()*2-1
+ local dim = 5
+ local batch_size = 1
+ local crit = nn.CosineEmbeddingCriterion(margin)
+ local v = torch.rand(2,dim)
+ criterionJacobianTest1DTable(crit,v,1)
+ criterionJacobianTest1DTable(crit,v,-1)
+
+ -- batch with hand-computed values
+ local v1 = torch.Tensor{{1, 0}, {0.5, math.sqrt(3)*0.5}}
+ local v2 = torch.Tensor{{0.5, math.sqrt(3)*0.5}, {1, 0}}
+
+ local t = torch.Tensor{-1,-1}
+ local crit = nn.CosineEmbeddingCriterion(0.6)
+ local output = crit:forward({v1, v2}, t) -- must be Called before backward
+ local grads = crit:backward({v1, v2}, t)
+
+ local zero = torch.Tensor(2,2):zero()
+ equal(grads[1], zero, 'gradient should be zero')
+ equal(grads[2], zero, 'gradient should be zero')
+
+ -- batch, sizeAverage true, jacobian
+ local margin = math.random()*2-1
+ local dim = 5
+ local batch_size = 2
+ local crit = nn.CosineEmbeddingCriterion(margin)
+ crit.sizeAverage = true
+ local v = torch.rand(2,batch_size,dim)
+ local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1)
+ criterionJacobianTest1DTable(crit,v,t)
+
+ -- batch, sizeAverage false, jacobian
+ local margin = math.random()*2-1
+ local crit = nn.CosineEmbeddingCriterion(margin)
+ crit.sizeAverage = false
+ local v = torch.rand(2,batch_size,dim)
+ local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1)
+ criterionJacobianTest1DTable(crit,v,t)
+end
+
+function nntest.HingeEmbeddingCriterion()
+ local x = torch.Tensor{0.3,2.1,1.8,0}
+ local y = torch.Tensor{1,-1,-1,1}
+ local expgrads = torch.Tensor{1,0,-1,1} / 4
+
+ local crit = nn.HingeEmbeddingCriterion(2)
+ local output = crit:forward(x, y) -- must be called before backward
+ local grads = crit:backward(x, y)
+
+ mytester:assert(math.abs(output - (0.3 + 0.2) / 4) < 1e-10)
+ equal(grads, expgrads)
+end
+
+function nntest.Replicate()
+ local vector = torch.rand(3)
+
+ local r1 = nn.Replicate(2, 1)
+ local r2 = nn.Replicate(2, 2)
+
+ local vOutput1 = r1:forward(vector):clone()
+ local vOutput2 = r2:forward(vector):clone()
+
+ local expected1 = torch.zeros(2, 3)
+ local expected2 = torch.zeros(3, 2)
+ expected1:select(1, 1):copy(vector)
+ expected1:select(1, 2):copy(vector)
+ expected2:select(2, 1):copy(vector)
+ expected2:select(2, 2):copy(vector)
+
+ mytester:assertTensorEq(vOutput1, expected1, precision, 'Wrong tiling of data when replicating vector.')
+ mytester:assertTensorEq(vOutput2, expected2, precision, 'Wrong tiling of data when replicating vector.')
+
+ -- batch mode
+ local vector = torch.rand(4,3)
+
+ local r1 = nn.Replicate(2, 1, 1)
+ local r2 = nn.Replicate(2, 2, 1)
+
+ local vOutput1 = r1:forward(vector):clone()
+ local vOutput2 = r2:forward(vector):clone()
+
+ local expected1 = torch.zeros(4, 2, 3)
+ local expected2 = torch.zeros(4, 3, 2)
+ expected1:select(2, 1):copy(vector)
+ expected1:select(2, 2):copy(vector)
+ expected2:select(3, 1):copy(vector)
+ expected2:select(3, 2):copy(vector)
+
+ mytester:assertTensorEq(vOutput1, expected1, precision, 'Wrong tiling of data when replicating batch vector.')
+ mytester:assertTensorEq(vOutput2, expected2, precision, 'Wrong tiling of data when replicating batch vector.')
+end
+
+local function testBatchNormalization(moduleName, dim, k)
+ local planes = torch.random(1,k)
+ local size = { torch.random(2, k), planes }
+ for i=1,dim do
+ table.insert(size, torch.random(1,k))
+ end
+ local input = torch.zeros(table.unpack(size)):uniform()
+
+ local function jacTests(module, input, affine)
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ if affine then
+ local err = jac.testJacobianParameters(module, input,
+ module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input,
+ module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format('error on bias [%s]', t))
+ end
+ end
+
+ -- IO
+ local ferr,berr = jac.testIO(module,input)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+ end
+
+ local module = nn[moduleName](planes)
+ module:training()
+ jacTests(module, input, true)
+ module:evaluate()
+ jacTests(module, input, true)
+ jacTests(module, input[1], true)
+
+ -- batch norm without affine transform
+ module = nn[moduleName](planes, 1e-5, 0.1, false)
+ module:training()
+ jacTests(module, input, false)
+ module:evaluate()
+ jacTests(module, input, false)
+ jacTests(module, input[1], false)
+end
+
+function nntest.BatchNormalization()
+ testBatchNormalization('BatchNormalization', 0, 20)
+end
+
+function nntest.SpatialBatchNormalization()
+ testBatchNormalization('SpatialBatchNormalization', 2, 6)
+end
+
+function nntest.VolumetricBatchNormalization()
+ testBatchNormalization('VolumetricBatchNormalization', 3, 4)
+end
+
+function nntest.GradientReversal()
+ local ini = math.random(3,5)
+ local inj = math.random(3,5)
+ local ink = math.random(3,5)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ -- Two GradientReversal layers should cancel each other out
+ local module = nn.Sequential()
+ module:add(nn.GradientReversal())
+ module:add(nn.GradientReversal())
+
+ local err = jac.testJacobian(module,input, 0.1, 10)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input, 0.1, 10)
+ mytester:eq(ferr, 0, torch.typename(module) .. ' - i/o forward err ', precision)
+ mytester:eq(berr, 0, torch.typename(module) .. ' - i/o backward err ', precision)
+end
+
+function nntest.Padding()
+ local fanin = math.random(1,3)
+ local sizex = math.random(4,16)
+ local sizey = math.random(4,16)
+ local pad = math.random(-3,3)
+ local index = math.random(1, fanin)
+ local val = torch.randn(1):squeeze()
+ local module = nn.Padding(1, pad, 3, val, index)
+ local input = torch.rand(fanin,sizey,sizex)
+ local size = input:size():totable()
+ size[1] = size[1] + math.abs(pad)
+
+ local output = module:forward(input)
+ mytester:assertTableEq(size, output:size():totable(), 0.00001, "Padding size error")
+
+ local gradInput = module:backward(input, output)
+ mytester:assertTensorEq(gradInput, input, 0.00001, "Padding backward error")
+end
+
+function nntest.addSingletonDimension()
+ local dims = torch.random(5)
+ local size = torch.LongTensor(dims):random(10)
+ local perm = torch.randperm(dims):totable()
+ local tensor = torch.Tensor(table.unpack(size:totable())):uniform():permute(table.unpack(perm))
+ size = torch.gather(size, 1, torch.LongTensor(perm))
+
+ local firstDim = nn.utils.addSingletonDimension(tensor)
+ mytester:assertTableEq(firstDim:size():totable(), {1, table.unpack(size:totable())},
+ "wrong size for singleton dimension 1")
+ mytester:assertTensorEq(firstDim[1], tensor, 0,
+ "wrong content for singleton dimension 1")
+
+ local dim = torch.random(dims + 1)
+ local result = nn.utils.addSingletonDimension(tensor, dim)
+ local resultSize = size:totable()
+ table.insert(resultSize, dim, 1)
+ mytester:assertTableEq(result:size():totable(), resultSize,
+ "wrong size for random singleton dimension")
+ mytester:assertTensorEq(result:select(dim, 1), tensor, 0,
+ "wrong content for random singleton dimension")
+
+ mytester:assertError(function() nn.utils.addSingletonDimension(tensor, dims + 2) end,
+ "invalid dimension not detected")
+
+ -- passing output tensor as argument
+ local resultArg = torch.Tensor()
+ local resultR = nn.utils.addSingletonDimension(resultArg, tensor, dim)
+ mytester:eq(resultArg:size():totable(), resultSize,
+ 'wrong content for random singleton dimension '..
+ 'when the result is passed as argument')
+ mytester:eq(resultArg, result, 'wrong content for random singleton dimension '..
+ 'when the result is passed as argument')
+
+ mytester:eq(resultR == resultArg, true,
+ 'new tensor is created when it should use the provided tensor')
+end
+
+function nntest.SpatialReflectionPadding()
+ local batch = math.random(1,3)
+ local plane = math.random(1,3)
+ local sizeY = math.random(7,16)
+ local sizeX = math.random(7,16)
+ local padL = math.random(-3,3)
+ local padR = math.random(-3,3)
+ local padT = math.random(-3,3)
+ local padB = math.random(-3,3)
+ local jac = nn.Jacobian
+ local layer = nn.SpatialReflectionPadding(padL, padR, padT, padB)
+ local input = torch.rand(batch, plane, sizeY, sizeX)
+ local err = jac.testJacobian(layer, input)
+ mytester:assertalmosteq(err, 0.0, 1e-7)
+end
+
+function nntest.SpatialReplicationPadding()
+ local batch = math.random(1,3)
+ local plane = math.random(1,3)
+ local sizeY = math.random(7,16)
+ local sizeX = math.random(7,16)
+ local padL = math.random(-3,3)
+ local padR = math.random(-3,3)
+ local padT = math.random(-3,3)
+ local padB = math.random(-3,3)
+ local jac = nn.Jacobian
+ local layer = nn.SpatialReplicationPadding(padL, padR, padT, padB)
+ local input = torch.rand(batch, plane, sizeY, sizeX)
+ local err = jac.testJacobian(layer, input)
+ mytester:assertalmosteq(err, 0.0, 1e-7)
+end
+
+function nntest.VolumetricReplicationPadding()
+ for batch = 0, 1 do
+ local nbatch
+ if batch == 1 then
+ nbatch = math.random(1,3)
+ end
+ local plane = math.random(1,3)
+ local sizeZ = math.random(1,4)
+ local sizeY = math.random(7,11)
+ local sizeX = math.random(7,11)
+ local padLeft = math.random(-3,3)
+ local padRight = math.random(-3,3)
+ local padTop = math.random(-3,3)
+ local padBottom = math.random(-3,3)
+ local padFront = math.random(3,3)
+ local padBack = math.random(3,3)
+ local jac = nn.Jacobian
+ local layer =
+ nn.VolumetricReplicationPadding(padLeft, padRight, padTop,
+ padBottom, padFront, padBack)
+ local input
+ if batch == 1 then
+ input = torch.rand(nbatch, plane, sizeZ, sizeY, sizeX)
+ else
+ input = torch.rand(plane, sizeZ, sizeY, sizeX)
+ end
+ local err = jac.testJacobian(layer, input)
+ mytester:assertalmosteq(err, 0.0, 1e-7)
+ end
+end
+
+function nntest.PixelShuffle()
+ -- Checks whether a given tensor has the specified size
+ local function tensorHasSize(tensor, size)
+ local tensorSize = tensor:size()
+
+ if tensorSize:size() ~= #size then
+ return false
+ end
+ for i,v in ipairs(size) do
+ if tensorSize[i] ~= size[i] then
+ return false
+ end
+ end
+ return true
+ end
+
+ --Verifies that the output is the input re-shuffled as per Eq 4. in
+ -- "Real-Time Single Image and Video Super-Resolution Using an Efficient
+ -- Sub-Pixel Convolutional Neural Network", Shi et al.
+ -- @param - the input, low-resolution image of shape [1, c, h , w]
+ -- @param - the output, super resolved image of shape [1, c, h ,w]
+ -- @param - upscale factor of the super resolutin
+ -- @returns true if output complies with Eq 4.
+ local function verifyPixelShuffle(_input, _output, upscaleFactor)
+ local input = _input
+ local output = _output
+
+ if input:nDimension() == 3 then
+ input = input:view(1, input:size(1), input:size(2), input:size(3))
+ output = output:view(1, output:size(1), output:size(2), output:size(3))
+ end
+
+ for c = 1, output:size(2) do
+ for h = 1, output:size(3) do
+ for w = 1, output:size(4) do
+ local heightIdx = torch.floor((h - 1)/upscaleFactor) + 1
+ local widthIdx = torch.floor((w - 1)/upscaleFactor) + 1
+ --c does not need to be (c - 1) as it starts at 1 not zero
+ local channelIdx = upscaleFactor * ((h-1) % upscaleFactor) + ((w-1) % upscaleFactor) + 1 + (c-1)*upscaleFactor*upscaleFactor
+
+ mytester:assertTensorEq(output[{{}, {c}, {h}, {w}}], input[{{}, {channelIdx}, {heightIdx}, {widthIdx}}],
+ string.format("output at location (%d, %d, %d) is incorrect", c, h, w))
+ end
+ end
+ end
+ return true
+ end
+
+ -- Checks the nn.PixelShuffle layer's forward pass. It checks that is
+ -- re-arranges input pixels correctly according to Eq. 4 of
+ -- "Real-Time Single Image and Video Super-Resolution Using an Efficient
+ -- Sub-Pixel Convolutional Neural Network", Shi et al.
+ -- This function tests for multip batch sizes, multiple channels and multiple input dimensions (square)
+ -- It also tests for normal tensors (un-batched)
+ local function testPixelShuffleUpdateOutput()
+ --Test with batched input
+ for h = 1, 3 do
+ local batchSize = torch.round(torch.uniform(1, 3))
+ for i = 1, 3 do
+ local upscaleFactor = torch.round(torch.uniform(2,5))
+ local pixelShuffle = nn.PixelShuffle(upscaleFactor)
+ for j = 1, 3 do
+ local channels = torch.round(torch.uniform(1, 4))
+ for k = 1, 3 do
+
+ local inputDim = torch.round(torch.uniform(5, 10))
+ local input = torch.Tensor(batchSize, channels * upscaleFactor * upscaleFactor, inputDim, inputDim)
+ input:uniform()
+
+ local output = pixelShuffle:forward(input)
+ local expectedOutputDim = inputDim * upscaleFactor
+ mytester:assert(tensorHasSize(output, {batchSize, channels, expectedOutputDim, expectedOutputDim}),
+ string.format("Output tensor should have size (%d, %d, %d, %d) not %s", batchSize, channels, expectedOutputDim, expectedOutputDim, tostring(output:size())))
+ verifyPixelShuffle(input, output, upscaleFactor)
+ end
+ end
+ end
+ end
+
+ --Test with non-batched input
+ local inputDim = torch.round(torch.uniform(5, 10))
+ local channels = torch.round(torch.uniform(1, 4))
+ local upscaleFactor = torch.round(torch.uniform(2,5))
+
+ local input = torch.Tensor(channels * upscaleFactor * upscaleFactor, inputDim, inputDim)
+ input:uniform()
+
+ local pixelShuffle = nn.PixelShuffle(upscaleFactor)
+ local output = pixelShuffle:forward(input)
+ local expectedOutputDim = inputDim * upscaleFactor
+ mytester:assert(tensorHasSize(output, {channels, expectedOutputDim, expectedOutputDim}),
+ string.format("Output tensor should have size (%d, %d, %d) not %s", channels, expectedOutputDim, expectedOutputDim, tostring(output:size())))
+
+ verifyPixelShuffle(input, output, upscaleFactor)
+ end
+
+ -- Checks the nn.PixelShuffle layer's backward pass. It checks that is
+ -- essentially performs the inverse of Eq 4. in
+ -- "Real-Time Single Image and Video Super-Resolution Using an Efficient
+ -- Sub-Pixel Convolutional Neural Network", Shi et al.
+ -- This function tests for multip batch sizes, multiple channels and multiple input dimensions (square)
+ -- It also tests for normal tensors (un-batched)
+ local function testPixelShuffleUpdateGradInput()
+ --Test with batched input
+ for h = 1, 3 do
+ local batchSize = torch.round(torch.uniform(1, 3))
+ for i = 1, 3 do
+ local upscaleFactor = torch.round(torch.uniform(2,5))
+ local pixelShuffle = nn.PixelShuffle(upscaleFactor)
+ for j = 1, 3 do
+ local channels = torch.round(torch.uniform(1, 4))
+ for k = 1, 3 do
+ local inputDim = torch.round(torch.uniform(5, 10))
+ local input = torch.Tensor(batchSize, channels * upscaleFactor * upscaleFactor, inputDim, inputDim)
+
+ input:uniform()
+
+ local output = pixelShuffle:forward(input)
+ --here we treat output as the same as gradOutput as they have the same shape
+ local reconstructedInput = pixelShuffle:backward(input, output)
+ mytester:assertTensorEq(reconstructedInput, input, 0)
+ end
+ end
+ end
+ end
+
+ --Test with non-batched input
+ local inputDim = torch.round(torch.uniform(5, 10))
+ local channels = torch.round(torch.uniform(1, 4))
+ local upscaleFactor = torch.round(torch.uniform(2,5))
+ local input = torch.Tensor(channels * upscaleFactor * upscaleFactor, inputDim, inputDim)
+ input:uniform()
+
+ local pixelShuffle = nn.PixelShuffle(upscaleFactor)
+ local output = pixelShuffle:forward(input)
+ --here we treat output as the same as gradOutput as they have the same shape
+ local reconstructedInput = pixelShuffle:backward(input, output)
+ mytester:assertTensorEq(reconstructedInput, input, 0)
+
+ local err = jac.testJacobian(pixelShuffle, input)
+ mytester:assertlt(err,precision, "error computing gradiens w.r.t. inputs")
+ end
+
+ local function testModuleIO()
+ --Test with non-batched input
+ local inputDim = torch.round(torch.uniform(5, 10))
+ local channels = torch.round(torch.uniform(1, 4))
+ local upscaleFactor = torch.round(torch.uniform(2,5))
+ local input = torch.Tensor(channels * upscaleFactor * upscaleFactor, inputDim, inputDim):uniform()
+ local pixelShuffle = nn.PixelShuffle(upscaleFactor)
+
+ local fwdErr,bkwdErr = jac.testIO(pixelShuffle,input)
+ mytester:asserteq(fwdErr, 0, torch.typename(pixelShuffle) .. " - i/o forward err ")
+ mytester:asserteq(bkwdErr, 0, torch.typename(pixelShuffle) .. " - i/o backward err ")
+ end
+
+ testPixelShuffleUpdateOutput()
+ testPixelShuffleUpdateGradInput()
+ testModuleIO()
+end
+
+function nntest.Typecast()
+ local function make_network()
+ local seq = nn.Sequential()
+ seq:add(nn.Linear(15, 10))
+ seq:add(nn.Linear(15, 10))
+ seq.modules[1].bias:fill(1)
+ seq.modules[2].bias:fill(2)
+ return seq
+ end
+
+ -- make sure that the typecasts aren't nops
+ assert(torch.getdefaulttensortype() == 'torch.DoubleTensor')
+
+ -- basic net
+ local net = make_network()
+ net.modules[1].empty_tensor = torch.Tensor()
+ net:float()
+ assert(net.modules[1].bias:type() == 'torch.FloatTensor',
+ net.modules[1].bias:type())
+ assert(net.modules[1].empty_tensor:type() == 'torch.FloatTensor')
+ assert(net.modules[1].bias ~= net.modules[2].bias)
+ net.modules[1].bias:fill(3)
+ assert(net.modules[1].bias[1] == 3)
+ assert(net.modules[2].bias[1] == 2)
+
+ -- shared tensors remain shared
+ local net = make_network()
+ net.modules[2].bias = net.modules[1].bias
+ net:float()
+ assert(net.modules[1].bias:type() == 'torch.FloatTensor')
+ assert(net.modules[1].bias == net.modules[2].bias)
+ assert(net.modules[1].bias[1] == 1)
+
+ -- shared storages remain shared
+ local net = make_network()
+ net.modules[2].bias:set(net.modules[1].bias)
+ local net = net:float()
+ assert(net.modules[1].bias:type() == 'torch.FloatTensor')
+ assert(net.modules[1].bias ~= net.modules[2].bias)
+ net.modules[1].bias:fill(3)
+ assert(net.modules[1].bias[1] == 3)
+ assert(net.modules[2].bias[1] == 3)
+
+ -- tricky: overlapping views on the same storage are preserved
+ local net = make_network()
+ local overlap_storage = torch.Tensor(15):fill(1)
+ net.modules[1].bias = overlap_storage:narrow(1, 1, 10)
+ net.modules[2].bias = overlap_storage:narrow(1, 6, 10)
+ net:float()
+ assert(net.modules[1].bias:type() == 'torch.FloatTensor')
+ assert(net.modules[1].bias ~= net.modules[2].bias)
+ net.modules[1].bias:fill(3)
+ assert(net.modules[1].bias[1] == 3)
+ assert(net.modules[2].bias[1] == 3)
+ assert(net.modules[2].bias[6] == 1) -- only the first 5 elements overlapped
+
+ -- check recursiveType on a table
+ local net1 = make_network()
+ local net2 = make_network()
+ net2.modules[1].bias:set(net1.modules[1].bias)
+ net1:float()
+ net2:float()
+ net1.modules[1].bias:fill(3)
+ assert(net2.modules[1].bias[1] == 1)
+
+ local net1 = make_network()
+ local net2 = make_network()
+ net2.modules[1].bias:set(net1.modules[1].bias)
+
+ local tensorCache = {}
+ net1:type('torch.FloatTensor', tensorCache)
+ net2:type('torch.FloatTensor', tensorCache)
+ net1.modules[1].bias:fill(3)
+ assert(net2.modules[1].bias[1] == 3)
+
+ local net1 = make_network()
+ local net2 = make_network()
+ net2.modules[1].bias:set(net1.modules[1].bias)
+
+ nn.utils.recursiveType({net1, net2}, 'torch.FloatTensor')
+ net1.modules[1].bias:fill(3)
+ assert(net2.modules[1].bias[1] == 3)
+
+ -- smoke test some modules with custom type methods
+ local custom_type_modules = {
+ nn.MixtureTable(3),
+ nn.ConcatTable(),
+ nn.Copy(),
+ nn.Copy(nil, nil, nil, true),
+ nn.SpatialContrastiveNormalization(),
+ nn.DotProduct(),
+ nn.PairwiseDistance(1),
+ nn.SpatialDivisiveNormalization(),
+ nn.SpatialSubtractiveNormalization()
+ }
+ for _, module in ipairs(custom_type_modules) do
+ module:float()
+ end
+end
+
+function nntest.Module_apply()
+ local s = nn.Sequential()
+ s:add(nn.Linear(10,10))
+ local s2 = nn.Sequential()
+ s2:add(nn.Linear(10,5))
+ s:add(s2)
+ s:add(nn.Tanh())
+
+ local seen = 0
+ s:apply(function(module)
+ if torch.type(module) == 'nn.Linear' then
+ module.bias:resize(20)
+ seen = seen + 1
+ end
+ end)
+ mytester:asserteq(seen, 2)
+ mytester:asserteq(s.modules[1].bias:size(1), 20)
+ mytester:asserteq(s2.modules[1].bias:size(1), 20)
+end
+
+function nntest.Module_replace()
+ -- test replace in container
+ local s = nn.Sequential()
+ s:add(nn.Linear(10,10))
+ s:add(nn.Sigmoid())
+ s:replace(function(module)
+ return torch.type(module) == 'nn.Sigmoid' and nn.Tanh() or module
+ end)
+ -- test replace of a single module
+ local single = nn.Tanh()
+ local replaced = single:replace(function(module)
+ return torch.type(module) == 'nn.Tanh' and nn.Sigmoid() or module
+ end)
+ mytester:asserteq(torch.type(s:get(2)), 'nn.Tanh', 'replace in container')
+ mytester:asserteq(torch.type(replaced), 'nn.Sigmoid', 'replace in single module')
+end
+
+function nntest.Cosine()
+ local inputSize = 4
+ local outputSize = 5
+
+ -- test 1D
+ local input = torch.randn(inputSize)
+ local gradOutput = torch.randn(outputSize)
+ local cosine = nn.Cosine(inputSize,outputSize)
+ local output = cosine:forward(input)
+ local inputNorm = input:norm()+1e-12
+ local weight2 = cosine.weight[2]
+ local output2 = torch.dot(weight2, input)/((weight2:norm()+1e-12)*inputNorm)
+ mytester:assert(math.abs(output2 - output[2]) < 0.000001,"Cosine output 1D err weight[2]")
+ local output2 = torch.mv(cosine.weight, input)
+ output2:cdiv(cosine.weight:norm(2,2)+1e-12):div(inputNorm)
+ mytester:assertTensorEq(output, output2, 0.000001, "Cosine output 1D err")
+ local gradInput = cosine:updateGradInput(input, gradOutput)
+ local gradInput2 = gradInput:clone():zero()
+ for j=1,outputSize do
+ local w_j = cosine.weight[j]
+ local nw_j = w_j:norm()+1e-12
+ for i=1,inputSize do
+ local w_ij = w_j[i]
+ local grad_i = (w_ij/(inputNorm*nw_j))
+ grad_i = grad_i - (output[j]*input[i]/(inputNorm*inputNorm))
+ grad_i = grad_i * gradOutput[j]
+ gradInput2[i] = gradInput2[i] + grad_i
+ end
+ end
+ mytester:assertTensorEq(gradInput2, gradInput, 0.000001, "Cosine gradInput 1D err")
+ cosine:zeroGradParameters()
+ cosine:accGradParameters(input, gradOutput, 1)
+ local gradWeight2 = cosine.weight:clone():zero()
+ for j=1,outputSize do
+ local w_j = cosine.weight[j]
+ local nw_j = w_j:norm()+1e-12
+ for i=1,inputSize do
+ local w_ij = w_j[i]
+ local gW_ij = (gradOutput[j]/nw_j) * ( ( input[i] / inputNorm ) - (output[j] * w_ij / nw_j) )
+ gradWeight2[{j,i}] = gW_ij
+ end
+ end
+ mytester:assertTensorEq(cosine.gradWeight, gradWeight2, 0.000001, "Cosine gradWeight 2D err")
+
+ -- test 2D
+ local batchSize = 3
+ local input = torch.randn(batchSize, inputSize)
+ local gradOutput = torch.randn(batchSize, outputSize)
+ cosine:zeroGradParameters()
+ local cosine2 = cosine:clone()
+ local output = cosine:forward(input)
+ local output2 = cosine2:forward(input[2])
+ mytester:assertTensorEq(output[2], output2, 0.000001, "Cosine output 2D err")
+ local gradInput = cosine:backward(input, gradOutput)
+
+ local gradInput2 = gradInput:clone():zero()
+ for i=1,batchSize do
+ cosine2:forward(input[i], gradOutput[i])
+ gradInput2[i]:copy(cosine2:backward(input[i], gradOutput[i]))
+ end
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "Cosine gradInput 2D err")
+ mytester:assertTensorEq(cosine.gradWeight, cosine2.gradWeight, 0.000001, "Cosine gradWeight 2D err")
+end
+
+function nntest.DistanceRatioCriterion()
+ local sizeAverage = true
+ local crit = nn.DistanceRatioCriterion(sizeAverage)
+ local X = torch.rand(32,1):fill(1)
+ local Y = torch.rand(32,1):fill(1)
+
+ -- Unit Test updateOutput
+ local loss = crit:forward({X, Y})
+ local trueLoss = 1 + math.log(math.exp(-1) + math.exp(-1))
+ assert(math.abs(loss - trueLoss) < 0.000001,
+ "DistanceRatioCriterion forward incorrect output")
+
+ -- Unit Test updateGradInput
+ local dxdy = crit:backward({X, Y})
+ local dx = dxdy[1]
+ local dy = dxdy[2]
+ assert(math.abs(dx:sum() - 0.5) < 0.000001,
+ "DistanceRatioCriterion backward (dx) incorrect output")
+ assert(math.abs(dy:sum() + 0.5) < 0.000001,
+ "DistanceRatioCriterion backward (dy) incorrect output")
+end
+
+function nntest.ErrorHandling()
+ local l = nn.Linear(1, 1)
+ local p = nn.Parallel(1, 1):add(l)
+ local c = nn.Concat(1):add(p)
+ local model = nn.Sequential():add(nn.Identity()):add(c):add(nn.Identity())
+ local function errmsg(module, i)
+ return 'In ' .. i .. ' module of ' .. torch.type(module) .. ':\n'
+ end
+ local expected_err = errmsg(model, 2) .. errmsg(c, 1) .. errmsg(p, 1)
+ mytester:assertErrorObj(
+ function()
+ model:forward(torch.randn(1,2,2))
+ end,
+ function(err)
+ return err:find(expected_err) and err:find('size mismatch')
+ end,
+ "Failure expected or bad error message (missing information or reason)"
+ )
+end
+
+function nntest.GPU()
+ -- this is a placeholder to let you know that the nn.GPU unit test
+ -- is located in cunn package.
+end
+
+function nntest.Profile()
+ local mx_overhead = 0.05
+ local print_every = 3
+ local net = nn.Profile(nn.Linear(3,4), print_every)
+ local input, gradOutput = torch.randn(1, 3), torch.randn(1, 4)
+ local output, gradInput = net:forward(input), net:backward(input, gradOutput)
+ mytester:assertTensorEq(net.modules[1].output, output, 0.000001)
+ mytester:assertTensorEq(net.modules[1].gradInput, gradInput, 0.000001)
+end
+
+function nntest.NaN()
+ local _ = require 'moses'
+ local input = torch.randn(2,3)
+ local gradOutput = torch.randn(2,4)
+ local lin = nn.Linear(3,4)
+ lin:zeroGradParameters()
+ local nan = nn.NaN(lin)
+ mytester:assert(nan.id == 1)
+ -- test that it works when no NaNs are present
+ local output = nan:forward(input):clone()
+ local gradInput = nan:backward(input, gradOutput):clone()
+ local gradWeight = lin.gradWeight:clone()
+ local gradBias = lin.gradBias:clone()
+ lin:zeroGradParameters()
+ local output2 = lin:forward(input)
+ local gradInput2 = lin:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output2, 0.000001)
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001)
+ mytester:assertTensorEq(gradWeight, lin.gradWeight, 0.000001)
+ mytester:assertTensorEq(gradBias, lin.gradBias, 0.000001)
+ -- test with some NaNs
+ input:zero():log():log()
+ local sum = input:sum()
+ mytester:assert(_.isNaN(sum))
+ mytester:assert(not pcall(function() nan:forward(input) end))
+ lin.bias:fill(sum)
+ input = torch.randn(2,3)
+ mytester:assert(not pcall(function() nan:forward(input) end))
+ lin.bias:uniform(0,1)
+ gradOutput:fill(sum)
+ mytester:assert(not pcall(function() nan:backward(input, gradOutput) end))
+ gradOutput:uniform(0,1)
+ lin.gradBias:fill(sum)
+ mytester:assert(not pcall(function() nan:backward(input, gradOutput) end))
+end
+
+function nntest.DontCast()
+ local input = torch.randn(3,4)
+ local gradOutput = torch.randn(3,2)
+ local linear = nn.Linear(4,2):float()
+ local mlp = nn.DontCast(linear, true)
+ linear:zeroGradParameters()
+ local linear = linear:clone()
+ local output = mlp:forward(input)
+ local gradInput = mlp:backward(input, gradOutput)
+ mytester:assert(torch.type(output) == 'torch.DoubleTensor')
+ mytester:assert(torch.type(gradInput) == 'torch.DoubleTensor')
+ local output2 = linear:forward(input:float())
+ local gradInput2 = linear:backward(input:float(), gradOutput:float())
+ mytester:assertTensorEq(output:float(), output2, 0.000001)
+ mytester:assertTensorEq(gradInput:float(), gradInput2, 0.000001)
+ local mlp3 = nn.DontCast(linear:clone())
+ mlp3:zeroGradParameters()
+ local output3 = mlp3:forward(input:float())
+ local gradInput3 = mlp3:backward(input:float(), gradOutput:float())
+ mytester:assert(torch.type(output3) == 'torch.FloatTensor')
+ mytester:assert(torch.type(gradInput3) == 'torch.FloatTensor')
+ mytester:assertTensorEq(output3, output2, 0.000001)
+ mytester:assertTensorEq(gradInput3, gradInput2, 0.000001)
+
+ mlp:float()
+ local output4 = mlp:forward(input:float())
+ local gradInput4 = mlp:backward(input:float(), gradOutput:float())
+ mytester:assert(torch.type(output4) == 'torch.FloatTensor')
+ mytester:assert(torch.type(gradInput4) == 'torch.FloatTensor')
+ mytester:assertTensorEq(output3, output4, 0.000001)
+ mytester:assertTensorEq(gradInput3, gradInput4, 0.000001)
+ mlp:double()
+ mytester:assert(torch.type(linear.output) == 'torch.FloatTensor')
+ local output = mlp:forward(input)
+ local gradInput = mlp:backward(input, gradOutput)
+ mytester:assert(torch.type(output4) == 'torch.FloatTensor')
+ mytester:assert(torch.type(gradInput4) == 'torch.FloatTensor')
+ mytester:assertTensorEq(output3, output:float(), 0.000001)
+ mytester:assertTensorEq(gradInput3, gradInput:float(), 0.000001)
+
+ -- test table inputs/outputs
+ local input = {torch.randn(3,4), torch.randn(3,4)}
+ local gradOutput = {torch.randn(3,2), torch.randn(3,2)}
+ local linear = nn.ParallelTable():add(nn.Linear(4,2)):add(nn.Linear(4,2)):float()
+ local mlp = nn.DontCast(linear, true)
+ linear:zeroGradParameters()
+ local linear = linear:clone()
+ local output = mlp:forward(input)
+ local gradInput = mlp:backward(input, gradOutput)
+ mytester:assert(torch.type(output[1]) == 'torch.DoubleTensor')
+ mytester:assert(torch.type(gradInput[1]) == 'torch.DoubleTensor')
+ mytester:assert(torch.type(output[2]) == 'torch.DoubleTensor')
+ mytester:assert(torch.type(gradInput[2]) == 'torch.DoubleTensor')
+ local _ = require 'moses'
+ local finput = _.map(input, function(k,v) return v:float() end)
+ local foutput = _.map(output, function(k,v) return v:float() end)
+ local fgradInput = _.map(gradInput, function(k,v) return v:float() end)
+ local fgradOutput = _.map(gradOutput, function(k,v) return v:float() end)
+ local output2 = linear:forward(finput)
+ local gradInput2 = linear:backward(finput, fgradOutput)
+ mytester:assertTensorEq(foutput[1], output2[1], 0.000001)
+ mytester:assertTensorEq(foutput[2], output2[2], 0.000001)
+ mytester:assertTensorEq(fgradInput[1], gradInput2[1], 0.000001)
+ mytester:assertTensorEq(fgradInput[2], gradInput2[2], 0.000001)
+ local mlp3 = nn.DontCast(linear:clone())
+ mlp3:zeroGradParameters()
+ local output3 = mlp3:forward(finput)
+ local gradInput3 = mlp3:backward(finput, fgradOutput)
+ mytester:assert(torch.type(output3[1]) == 'torch.FloatTensor')
+ mytester:assert(torch.type(gradInput3[1]) == 'torch.FloatTensor')
+ mytester:assert(torch.type(output3[2]) == 'torch.FloatTensor')
+ mytester:assert(torch.type(gradInput3[2]) == 'torch.FloatTensor')
+ mytester:assertTensorEq(output3[1], output2[1], 0.000001)
+ mytester:assertTensorEq(gradInput3[1], gradInput2[1], 0.000001)
+ mytester:assertTensorEq(output3[2], output2[2], 0.000001)
+ mytester:assertTensorEq(gradInput3[2], gradInput2[2], 0.000001)
+ mlp:float()
+ local output4 = mlp:forward(finput)
+ local gradInput4 = mlp:backward(finput, fgradOutput)
+ mytester:assert(torch.type(output4[1]) == 'torch.FloatTensor')
+ mytester:assert(torch.type(gradInput4[1]) == 'torch.FloatTensor')
+ mytester:assert(torch.type(output4[2]) == 'torch.FloatTensor')
+ mytester:assert(torch.type(gradInput4[2]) == 'torch.FloatTensor')
+ mytester:assertTensorEq(output3[1], output4[1], 0.000001)
+ mytester:assertTensorEq(gradInput3[1], gradInput4[1], 0.000001)
+ mytester:assertTensorEq(output3[2], output4[2], 0.000001)
+ mytester:assertTensorEq(gradInput3[2], gradInput4[2], 0.000001)
+ mlp:double()
+ mytester:assert(torch.type(linear.output) == 'table')
+ mytester:assert(torch.type(linear.output[1]) == 'torch.FloatTensor')
+ mytester:assert(torch.type(linear.output[2]) == 'torch.FloatTensor')
+ local output = mlp:forward(input)
+ local gradInput = mlp:backward(input, gradOutput)
+ mytester:assertTensorEq(output3[1], output[1]:float(), 0.000001)
+ mytester:assertTensorEq(gradInput3[1], gradInput[1]:float(), 0.000001)
+end
+
+function nntest.SpatialDepthWiseConvolution()
+ local epsilon = 0.00001
+
+ local SC = nn.SpatialConvolution
+ local SDWC = nn.SpatialDepthWiseConvolution
+
+ local function spatialDepthWiseConv(
+ nInputPlane, multiplier, kernel, stride, padding, inputSize, weight, bias
+ )
+ local conv = SDWC(nInputPlane, multiplier, kernel, kernel, stride, stride, padding, padding)
+ conv.weight = weight
+ conv.bias = bias
+ return conv
+ end
+
+ -- Utility spatialDepthWiseConv_util() function --------------------------------
+ -- By Alfredo Canziani, alfredo.canziani@gmail.com -----------------------------
+ local function spatialDepthWiseConv_util(
+ nInputPlane, multiplier, kernel, stride, padding, inputSize, weight, bias
+ )
+
+ local conv = nn.Sequential()
+ conv:add(nn.Contiguous())
+ conv:add(nn.View(-1, 1, inputSize, inputSize))
+ conv:add(SC(1, multiplier, kernel, kernel, stride, stride, padding, padding))
+
+ local depthWiseConv = nn.Parallel(2, 2)
+ for channel = 1, nInputPlane do
+ local tempConv = conv:clone()
+ tempConv:get(3).weight = weight:narrow(2, channel, 1):clone()
+ tempConv:get(3).bias = bias:select(2, channel):clone()
+ depthWiseConv:add(tempConv)
+ end
+ depthWiseConv:add(nn.Contiguous())
+ return depthWiseConv
+ end
+
+ local n = 3 -- nInputPlane
+ local s = 28 -- input height and width
+ local b = 3 -- batch size
+ local m = 4 -- multiplier
+ local k = 3 -- kernel size
+ local p = 1 -- padding
+ local st = 1 -- stride
+
+ local testBatch = 1e3 -- number of repetition
+
+ local X = torch.rand(b, n, s, s) -- 1x3x299x299 images
+ local weight = torch.rand(m, n, k, k) -- weight
+ local bias = torch.rand(m, n) -- bias
+
+ local model = spatialDepthWiseConv(n, m, k, st, p, s, weight, bias)
+ local model_util = spatialDepthWiseConv_util(n, m, k, st, p, s, weight, bias)
+
+ local Y_util = model_util:forward(X)
+ local Y = model:forward(X)
+
+ local abs_diff = Y_util:clone():csub(Y):abs()
+ mytester:assert(torch.all(abs_diff:lt(epsilon)))
+end
+
+function nntest.Constant()
+ local input = torch.randn(20,3,7)
+ local gradOutput = torch.randn(20,30,6)
+ local value = torch.randn(30,6)
+ local const = nn.Constant(value:clone(), 2)
+ local output = const:forward(input)
+ local gradInput = const:backward(input, output)
+ local output2 = value:view(1,30,6):expand(20,30,6)
+ mytester:assertTensorEq(output2, output, 0.000001, "Constant forward err")
+ mytester:assertTensorEq(gradInput, input:zero(), 0.000001, "Constant backward err")
+end
+
+function nntest.WhiteNoise()
+ local input = torch.zeros(3, 28, 28)
+ local addNoise = nn.WhiteNoise()
+ local output = addNoise:forward(input)
+ local meanValue = output:mean()
+ local stdValue = output:std()
+ mytester:assert(meanValue > -0.01 and meanValue < 0.01)
+ mytester:assert(stdValue < 0.15 and stdValue >= 0)
+
+ -- Evaluate
+ addNoise:evaluate()
+ output = addNoise:forward(input)
+ meanValue = output:mean()
+ stdValue = output:std()
+ mytester:assert(meanValue == 0)
+ mytester:assert(stdValue == 0)
+
+ -- backprop
+ addNoise:training()
+ local gradOutput = torch.rand(3, 28, 28)
+ local gradInput = addNoise:updateGradInput(input, gradOutput)
+ mytester:assertTensorEq(gradOutput, gradInput, 0.000001, "WhiteNoise backward err")
+end
+
+function nntest.OneHot()
+ local nClass = 10
+
+ -- batch mode
+ local batchSize = 3
+ local input = torch.LongTensor(batchSize):random(1, nClass)
+ local gradOutput = torch.randn(batchSize, nClass)
+
+ local oh = nn.OneHot(nClass)
+
+ local output = oh:forward(input)
+ local output2 = torch.Tensor(batchSize, nClass):zero()
+ local eye = torch.eye(nClass)
+ output2:index(eye, 1, input)
+ mytester:assertTensorEq(output, output2, 0.000001, "OneHot forward batch err")
+ mytester:assert(output:dim() == 2)
+
+ -- non-batch mode (number input)
+ local num = 3
+ local output3 = torch.zeros(nClass)
+ output3[num] = 1.0
+ mytester:assertTensorEq(oh:forward(num), output3, 0.000001, "OneHot forward number err")
+
+ local gradInput = oh:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInput, input:double():zero(), 0.000001, "OneHot backward batch err")
+
+ if pcall(function() require 'cunn' end) then
+ oh:cuda()
+
+ -- test with long input
+ local output = oh:forward(input)
+ mytester:assert(torch.type(output) == 'torch.CudaTensor')
+ mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot forward batch long-cuda err")
+
+ -- test with cuda input
+ local input = input:cuda()
+ gradOutput = gradOutput:cuda()
+
+ local output = oh:forward(input)
+ mytester:assert(torch.type(output) == 'torch.CudaTensor')
+ mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot forward batch cuda err")
+
+ local gradInput2 = oh:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInput, gradInput2:double(), 0.000001, "OneHot backward batch err")
+ cutorch.synchronize()
+
+ -- non-batch mode (number input)
+ mytester:assertTensorEq(oh:forward(num), output3:cuda(), 0.000001, "OneHot forward number err")
+ end
+
+ -- multi-dimensional input
+ local inputSize = 2
+ local input = torch.LongTensor(batchSize, inputSize):random(1, nClass)
+ local gradOutput = torch.randn(batchSize, inputSize, nClass)
+
+ local oh = nn.OneHot(nClass, 2)
+
+ local output = oh:forward(input)
+ local output2 = torch.Tensor(batchSize*inputSize, nClass):zero()
+ local eye = torch.eye(nClass)
+ output2:index(eye, 1, input:view(-1))
+ output2:resize(batchSize, inputSize, nClass)
+ mytester:assertTensorEq(output, output2, 0.000001, "OneHot 2d forward batch err")
+ mytester:assert(output:dim() == 3)
+
+ local gradInput = oh:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInput, input:double():zero(), 0.000001, "OneHot 2d backward batch err")
+
+ if pcall(function() require 'cunn' end) then
+ oh:cuda()
+
+ -- test with long input
+ local output = oh:forward(input)
+ mytester:assert(torch.type(output) == 'torch.CudaTensor')
+ mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot 2d forward batch long-cuda err")
+
+ -- test with cuda input
+ local input = input:cuda()
+ gradOutput = gradOutput:cuda()
+
+ local output = oh:forward(input)
+ mytester:assert(torch.type(output) == 'torch.CudaTensor')
+ mytester:assertTensorEq(output:double(), output2, 0.000001, "OneHot 2d forward batch cuda err")
+
+ local gradInput2 = oh:backward(input, gradOutput)
+ mytester:assertTensorEq(gradInput, gradInput2:double(), 0.000001, "OneHot 2d backward batch err")
+
+ local benchmark = false
+ if benchmark then
+ local input = torch.FloatTensor(50, 50):random(1,65):cuda()
+
+ local oh = nn.OneHot(65):cuda()
+
+ oh:forward(input)
+ cutorch.synchronize()
+ local a = torch.Timer()
+ for i=1,10 do
+ oh:forward(input)
+ end
+ cutorch.synchronize()
+ local gputime = a:time().real
+
+ oh:float()
+ input = input:float()
+ oh:forward(input)
+ a = torch.Timer()
+ for i=1,10 do
+ oh:forward(input)
+ end
+ local cputime = a:time().real
+ print("Onehot GPU vs CPU time", gputime, cputime)
+ end
+ end
+end
+
+function nntest.ZeroGrad()
+ local input = torch.randn(3,4)
+ local zg = nn.ZeroGrad()
+ local output = zg:forward(input)
+ mytester:assertTensorEq(input, output, 0.00000001)
+ local gradInput = zg:backward(input, input)
+ local gradInput2 = gradInput:clone():zero()
+ mytester:assertTensorEq(gradInput, gradInput2, 0.0000001)
+end
+
+function nntest.ZipTable()
+ -- input : { {a1,a2}, {b1,b2}, {c1,c2} }
+ -- output : { {a1,b1,c1}, {a2,b2,c2} }
+ local z = nn.ZipTable()
+ local input = {
+ {torch.randn(3,4), torch.randn(3,4)},
+ {torch.randn(3,4), torch.randn(3,4)},
+ {torch.randn(3,4), torch.randn(3,4)}
+ }
+ local output = z:forward(input)
+ mytester:assert(#output == 2, "ZipTable #output")
+ mytester:assert(#(output[1]) == 3, "ZipTable #output[1]")
+ mytester:assertTensorEq(input[1][1], output[1][1], 0.000001, "ZipTable input11")
+ mytester:assertTensorEq(input[1][2], output[2][1], 0.000001, "ZipTable input12")
+ mytester:assertTensorEq(input[3][2], output[2][3], 0.000001, "ZipTable input32")
+ local gradInput = z:backward(input, output)
+ mytester:assert(#gradInput == 3, "ZipTable #gradInput")
+ mytester:assert(#(gradInput[1]) == 2, "ZipTable #gradInput[1]")
+ mytester:assertTensorEq(input[1][1], gradInput[1][1], 0.000001, "ZipTable gradInput11")
+ mytester:assertTensorEq(input[1][2], gradInput[1][2], 0.000001, "ZipTable gradInput12")
+ mytester:assertTensorEq(input[3][2], gradInput[3][2], 0.000001, "ZipTable gradInput32")
+end
+
+function nntest.ZipTableOneToMany()
+ -- input : { v, {a,b,c} }
+ -- output : { {v,a}, {v,b}, {v,c} }
+ local z = nn.ZipTableOneToMany()
+ local input = { torch.randn(3), { torch.randn(4), torch.rand(4), torch.rand(4) } }
+ local output = z:forward(input)
+ mytester:assert(#output == 3, "ZipTableOneToMany #output")
+ mytester:assert(#(output[1]) == 2, "ZipTableOneToMany #output[1]")
+ mytester:assert(#(output[2]) == 2, "ZipTableOneToMany #output[2]")
+ mytester:assert(#(output[3]) == 2, "ZipTableOneToMany #output[3]")
+ mytester:assertTensorEq(input[1], output[1][1], 0.000001, "ZipTableOneToMany input1 output11")
+ mytester:assertTensorEq(input[1], output[2][1], 0.000001, "ZipTableOneToMany input1 output21")
+ mytester:assertTensorEq(input[1], output[3][1], 0.000001, "ZipTableOneToMany input1 output31")
+ mytester:assertTensorEq(input[2][1], output[1][2], 0.000001, "ZipTableOneToMany input21")
+ mytester:assertTensorEq(input[2][2], output[2][2], 0.000001, "ZipTableOneToMany input22")
+ mytester:assertTensorEq(input[2][3], output[3][2], 0.000001, "ZipTableOneToMany input23")
+ local gradInput = z:backward(input, output)
+ mytester:assert(#gradInput == 2, "ZipTableOneToMany #gradInput")
+ mytester:assert(#(gradInput[2]) == 3, "ZipTableOneToMany #gradInput[2]")
+ mytester:assertTensorEq(input[2][1], gradInput[2][1], 0.000001, "ZipTableOneToMany gradInput21")
+ mytester:assertTensorEq(input[2][2], gradInput[2][2], 0.000001, "ZipTableOneToMany gradInput22")
+ mytester:assertTensorEq(input[2][3], gradInput[2][3], 0.000001, "ZipTableOneToMany gradInput32")
+ mytester:assertTensorEq(torch.mul(input[1], 3), gradInput[1], 0.000001, "ZipTableOneToMany gradInput21")
+end
+
+function nntest.Collapse()
+ local c = nn.Collapse(3)
+ local input = torch.randn(8,3,4,5)
+ local output = c:forward(input)
+ mytester:assertTensorEq(input:view(8,-1), output, 0.000001, "Collapse:forward")
+ local gradInput = c:backward(input, output)
+ mytester:assertTensorEq(gradInput, input, 0.000001, "Collapse:backward")
+ mytester:assertTableEq(gradInput:size():totable(), input:size():totable(), 0.000001, "Collapse:backward size")
+ local input2 = input:transpose(1,4)
+ local output2 = c:forward(input2)
+ mytester:assertTensorEq(input2:contiguous():view(5,-1), output2, 0.000001, "Collapse:forward non-contiguous")
+ local gradInput2 = c:backward(input2, output2)
+ mytester:assertTensorEq(gradInput2, input2, 0.000001, "Collapse:backward non-contiguous")
+ mytester:assertTableEq(gradInput2:size():totable(), input2:size():totable(), 0.000001, "Collapse:backward size non-contiguous")
+end
+
+function nntest.Convert()
+ -- batch mode
+ local c = nn.Convert('bchw', 'chwb')
+ local input = torch.randn(8,3,5,5)
+ local output = c:forward(input)
+ local output2 = input:transpose(1,4):transpose(1,3):transpose(1,2)
+ mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd bchw->chwb")
+ local gradInput = c:backward(input, output)
+ mytester:assertTensorEq(gradInput, input, 0.000001, "Convert bwd bchw->chwb")
+ local c = nn.Convert('bchw', 'bf')
+ local output = c:forward(input)
+ local output2 = input:view(8,-1)
+ mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd bchw->bf")
+ c:float()
+ local output = c:forward(input:float())
+ mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type()")
+ local output = c:forward(input)
+ mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() double->float")
+ -- non-batch mode
+ local c = nn.Convert('chw', 'hwc')
+ local input = torch.randn(3,5,5)
+ local output = c:forward(input)
+ local output2 = input:transpose(1,3):transpose(1,2)
+ mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd chw->hwc non-batch")
+ local gradInput = c:backward(input, output)
+ mytester:assertTensorEq(gradInput, input, 0.000001, "Convert bwd chw->hwc non-batch")
+ local c = nn.Convert('chw', 'f')
+ local output = c:forward(input)
+ local output2 = input:view(-1)
+ mytester:assertTensorEq(output, output2, 0.000001, "Convert fwd chw->bf non-batch")
+ c:float()
+ local output = c:forward(input:float())
+ mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() non-batch")
+ local output = c:forward(input)
+ mytester:assertTensorEq(output, output2:float(), 0.000001, "Convert:type() double->float non-batch")
+end
+
+function nntest.CAddTensorTable()
+ -- input : { v, {a,b,c} }
+ -- output : { v+a, v+b, v+c }
+ local z = nn.CAddTensorTable()
+ local input = { torch.randn(3), { torch.randn(3), torch.rand(3), torch.rand(3) } }
+ local output = z:forward(input)
+ mytester:assert(#output == 3, "CAddTensorTable #output")
+ mytester:assertTensorEq(input[1]+input[2][1], output[1], 0.00001, "CAddTensorTable input21 output1")
+ mytester:assertTensorEq(input[1]+input[2][2], output[2], 0.00001, "CAddTensorTable input22 output2")
+ mytester:assertTensorEq(input[1]+input[2][3], output[3], 0.00001, "CAddTensorTable input23 output3")
+ local gradInput = z:backward(input, output)
+ mytester:assert(#gradInput == 2, "CAddTensorTable #gradInput")
+ mytester:assert(#(gradInput[2]) == 3, "CAddTensorTable #gradInput[2]")
+ mytester:assertTensorEq(output[1], gradInput[2][1], 0.000001, "CAddTensorTable gradInput21")
+ mytester:assertTensorEq(output[2], gradInput[2][2], 0.000001, "CAddTensorTable gradInput22")
+ mytester:assertTensorEq(output[3], gradInput[2][3], 0.000001, "CAddTensorTable gradInput23")
+ mytester:assertTensorEq(output[1]+output[2]+output[3], gradInput[1], 0.000001, "CAddTensorTable gradInput1")
+end
+
+-- Unit Test Kmeans layer
+function nntest.Kmeans()
+ local k = 3
+ local dim = 5
+ local batchSize = 200
+ local input = torch.Tensor(batchSize, dim)
+ for i=1, batchSize do
+ input[i]:fill(torch.random(1, k))
+ end
+
+ local verbose = false
+
+ local attempts = 10
+ local iter = 100
+ local bestLoss = 100000000
+ local bestKm = nil
+ local tempLoss = 0
+ local learningRate = 1
+
+ local initTypes = {'random', 'kmeans++'}
+ local useCudas = {false}
+ if pcall(function() require 'cunn' end) then
+ useCudas[2] = true
+ end
+ for _, initType in pairs(initTypes) do
+ for _, useCuda in pairs(useCudas) do
+
+ if useCuda then
+ input = input:cuda()
+ else
+ input = input:double()
+ end
+
+ local timer = torch.Timer()
+ for j=1, attempts do
+ local km = nn.Kmeans(k, dim)
+ if useCuda then km:cuda() end
+
+ if initType == 'kmeans++' then
+ km:initKmeansPlus(input)
+ else
+ km:initRandom(input)
+ end
+
+ for i=1, iter do
+ km:zeroGradParameters()
+
+ km:forward(input)
+ km:backward(input, gradOutput)
+
+ -- Gradient descent
+ km.weight:add(-learningRate, km.gradWeight)
+ tempLoss = km.loss
+ end
+ if verbose then print("Attempt Loss " .. j ..": " .. tempLoss) end
+ if tempLoss < bestLoss then
+ bestLoss = tempLoss
+ end
+ if (initType == 'kmeans++' and bestLoss < 0.00001) or (initType == 'random' and bestLoss < 500) then
+ break
+ end
+ end
+ if verbose then
+ print("InitType: " .. initType .. " useCuda: " .. tostring(useCuda))
+ print("Best Loss: " .. bestLoss)
+ print("Total time: " .. timer:time().real)
+ end
+ if initType == 'kmeans++' then
+ mytester:assert(bestLoss < 0.00001, "Kmeans++ error ("..(useCuda and 'cuda' or 'double')..")")
+ else
+ mytester:assert(bestLoss < 500, "Kmeans error ("..(useCuda and 'cuda' or 'double')..")")
+ end
+ end
+ end
+end
+
+mytester:add(nntest)
+
+jac = nn.Jacobian
+sjac = nn.SparseJacobian
+function nn.test(tests, seed)
+ -- Limit number of threads since everything is small
+ local nThreads = torch.getnumthreads()
+ torch.setnumthreads(1)
+ -- randomize stuff
+ local seed = seed or (1e5 * torch.tic())
+ print('Seed: ', seed)
+ math.randomseed(seed)
+ torch.manualSeed(seed)
+ mytester:run(tests)
+ torch.setnumthreads(nThreads)
+ return mytester
+end
+
+function nn.testTHNN(tests, seed)
+ require 'test.LinearTHNN'
+ nn.Linear = nn.LinearTHNN
+ return nn.test(tests,seed)
+end
diff --git a/contrib/lua-torch/nn/utils.lua b/contrib/lua-torch/nn/utils.lua
new file mode 100644
index 000000000..17b52afb3
--- /dev/null
+++ b/contrib/lua-torch/nn/utils.lua
@@ -0,0 +1,223 @@
+nn.utils = {}
+
+-- oops; someone forgot to add torch.Storage.type
+-- TODO replace with torch.Storage.type when implemented
+local function torch_Storage_type(self, type)
+ local current = torch.typename(self)
+ if not type then return current end
+ if type ~= current then
+ local new = torch.getmetatable(type).new()
+ if self:size() > 0 then
+ new:resize(self:size()):copy(self)
+ end
+ return new
+ else
+ return self
+ end
+end
+
+-- tensorCache maintains a list of all tensors and storages that have been
+-- converted (recursively) by calls to recursiveType() and type().
+-- It caches conversions in order to preserve sharing semantics
+-- i.e. if two tensors share a common storage, then type conversion
+-- should preserve that.
+--
+-- You can preserve sharing semantics across multiple networks by
+-- passing tensorCache between the calls to type, e.g.
+--
+-- > tensorCache = {}
+-- > net1:type('torch.CudaTensor', tensorCache)
+-- > net2:type('torch.CudaTensor', tensorCache)
+-- > nn.utils.recursiveType(anotherTensor, 'torch.CudaTensor', tensorCache)
+--
+-- Implementation note: to make Lua table lookup behave correctly,
+-- tensor keys are stored as actual tensor objects, while storage
+-- keys are stored as the pointers themselves (as numbers).
+function nn.utils.recursiveType(param, type, tensorCache)
+ tensorCache = tensorCache or {}
+
+ if torch.type(param) == 'table' then
+ for k, v in pairs(param) do
+ param[k] = nn.utils.recursiveType(v, type, tensorCache)
+ end
+ elseif torch.isTypeOf(param, 'nn.Module') or
+ torch.isTypeOf(param, 'nn.Criterion') then
+ param:type(type, tensorCache)
+ elseif torch.isTensor(param) then
+ if torch.typename(param) ~= type then
+ local newparam
+ if tensorCache[param] then
+ newparam = tensorCache[param]
+ else
+ newparam = torch.Tensor():type(type)
+ local storageType = type:gsub('Tensor','Storage')
+ if param:storage() then
+ local storage_key = torch.pointer(param:storage())
+ if not tensorCache[storage_key] then
+ tensorCache[storage_key] = torch_Storage_type(
+ param:storage(), storageType)
+ end
+ assert(torch.type(tensorCache[storage_key]) == storageType)
+ newparam:set(
+ tensorCache[storage_key],
+ param:storageOffset(),
+ param:size(),
+ param:stride()
+ )
+ end
+ tensorCache[param] = newparam
+ end
+ assert(torch.type(newparam) == type)
+ param = newparam
+ end
+ end
+ return param
+end
+
+function nn.utils.recursiveResizeAs(t1,t2)
+ if torch.type(t2) == 'table' then
+ t1 = (torch.type(t1) == 'table') and t1 or {t1}
+ for key,_ in pairs(t2) do
+ t1[key], t2[key] = nn.utils.recursiveResizeAs(t1[key], t2[key])
+ end
+ for key,_ in pairs(t1) do
+ if not t2[key] then
+ t1[key] = nil
+ end
+ end
+ elseif torch.isTensor(t2) then
+ t1 = torch.isTensor(t1) and t1 or t2.new()
+ t1:resize(t2:size())
+ else
+ error("expecting nested tensors or tables. Got "..
+ torch.type(t1).." and "..torch.type(t2).." instead")
+ end
+ return t1, t2
+end
+
+function nn.utils.recursiveFill(t2, val)
+ if torch.type(t2) == 'table' then
+ for key,_ in pairs(t2) do
+ t2[key] = nn.utils.recursiveFill(t2[key], val)
+ end
+ elseif torch.isTensor(t2) then
+ t2:fill(val)
+ else
+ error("expecting tensor or table thereof. Got "
+ ..torch.type(t2).." instead")
+ end
+ return t2
+end
+
+function nn.utils.recursiveAdd(t1, val, t2)
+ if not t2 then
+ assert(val, "expecting at least two arguments")
+ t2 = val
+ val = 1
+ end
+ val = val or 1
+ if torch.type(t2) == 'table' then
+ t1 = (torch.type(t1) == 'table') and t1 or {t1}
+ for key,_ in pairs(t2) do
+ t1[key], t2[key] = nn.utils.recursiveAdd(t1[key], val, t2[key])
+ end
+ elseif torch.isTensor(t1) and torch.isTensor(t2) then
+ t1:add(val, t2)
+ else
+ error("expecting nested tensors or tables. Got "..
+ torch.type(t1).." and "..torch.type(t2).." instead")
+ end
+ return t1, t2
+end
+
+function nn.utils.recursiveCopy(t1,t2,async)
+ if torch.type(t2) == 'table' then
+ t1 = (torch.type(t1) == 'table') and t1 or {t1}
+ for key,_ in pairs(t2) do
+ t1[key], t2[key] = nn.utils.recursiveCopy(t1[key], t2[key], async)
+ end
+ elseif torch.isTensor(t2) then
+ t1 = torch.isTensor(t1) and t1 or t2.new()
+ t1:resize(t2:size())
+ if async then
+ t1:copyAsync(t2)
+ else
+ t1:copy(t2)
+ end
+ else
+ error("expecting nested tensors or tables. Got "..
+ torch.type(t1).." and "..torch.type(t2).." instead")
+ end
+ return t1, t2
+end
+
+function nn.utils.addSingletonDimension(...)
+ local view, t, dim
+ if select('#',...) < 3 then
+ t, dim = select(1,...)
+ else
+ view, t, dim = select(1,...)
+ assert(torch.isTensor(view),
+ "output tensor expected, got " .. type(view))
+ end
+
+ assert(torch.isTensor(t), "input tensor expected")
+ dim = dim or 1
+ assert(dim > 0 and dim <= (t:dim() + 1), "invalid dimension: " .. dim
+ .. '. Tensor is of ' .. t:dim() .. ' dimensions.')
+
+ view = view or t.new()
+ local size = torch.LongStorage(t:dim() + 1)
+ local stride = torch.LongStorage(t:dim() + 1)
+
+ for d = 1, dim - 1 do
+ size[d] = t:size(d)
+ stride[d] = t:stride(d)
+ end
+ size[dim] = 1
+ stride[dim] = 1
+ for d = dim + 1, t:dim() + 1 do
+ size[d] = t:size(d - 1)
+ stride[d] = t:stride(d - 1)
+ end
+
+ view:set(t:storage(), t:storageOffset(), size, stride)
+ return view
+end
+
+function nn.utils.contiguousView(output, input, ...)
+ output = output or input.new()
+ if input:isContiguous() then
+ output:view(input, ...)
+ else
+ output:resize(input:size())
+ output:copy(input)
+ output:view(output, ...)
+ end
+ return output
+end
+
+-- go over specified fields and clear them. accepts
+-- nn.utils.clearState(self, {'_buffer', '_buffer2'}) and
+-- nn.utils.clearState(self, '_buffer', '_buffer2')
+function nn.utils.clear(self, ...)
+ local arg = {...}
+ if #arg > 0 and type(arg[1]) == 'table' then
+ arg = arg[1]
+ end
+ local function clear(f)
+ if self[f] then
+ if torch.isTensor(self[f]) then
+ self[f]:set()
+ elseif type(self[f]) == 'table' then
+ self[f] = {}
+ else
+ self[f] = nil
+ end
+ end
+ end
+ for i,v in ipairs(arg) do clear(v) end
+ return self
+end
+
+table.unpack = table.unpack or unpack