[Project] Remove torch

author: Vsevolod Stakhov <vsevolod@highsecure.ru> 2019-07-01 15:13:04 +0100
committer: Vsevolod Stakhov <vsevolod@highsecure.ru> 2019-07-01 15:13:04 +0100
commit: 891b250b452f8e1963a99931f241ac75e34d0281 (patch)
tree: ab56b822aca3cc6d02a3c9afbe8ca2f6d1c0381f /contrib/lua-torch/decisiontree
parent: 38691d998d019ac0fba95720c337e3f9badf55c4 (diff)
download: rspamd-891b250b452f8e1963a99931f241ac75e34d0281.tar.gz
rspamd-891b250b452f8e1963a99931f241ac75e34d0281.zip
44 files changed, 0 insertions, 6590 deletions
diff --git a/contrib/lua-torch/decisiontree/CMakeLists.txt b/contrib/lua-torch/decisiontree/CMakeLists.txt
deleted file mode 100644
index b94b4deb2..000000000
--- a/contrib/lua-torch/decisiontree/CMakeLists.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-LIST(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}")
-
-SET(src
-   init.c
-   hash_map.c
-)
-SET(luasrc
-   _env.lua
-   benchmark.lua
-   CartNode.lua
-   CartTrainer.lua
-   CartTree.lua
-   DataSet.lua
-   DecisionForest.lua
-   DecisionForestTrainer.lua
-   DecisionTree.lua
-   DFD.lua
-   GiniState.lua
-   GradientBoostState.lua
-   GradientBoostTrainer.lua
-   init.lua
-   LogitBoostCriterion.lua
-   math.lua
-   MSECriterion.lua
-   RandomForestTrainer.lua
-   Sparse2Dense.lua
-   SparseTensor.lua
-   test.lua
-   TreeState.lua
-   utils.lua
-   WorkPool.lua
-)
-
-IF (WITH_OPENMP)
-   FIND_PACKAGE(OpenMP)
-   IF(OPENMP_FOUND)
-      MESSAGE(STATUS "Compiling with OpenMP support")
-      SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-      SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-      SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
-   ENDIF(OPENMP_FOUND)
-ENDIF (WITH_OPENMP)
-
-ADD_TORCH_PACKAGE(decisiontree "${src}" "${luasrc}" "A decision tree library, for Torch")
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
-### Torch packages supposes libraries prefix is "lib"
-SET_TARGET_PROPERTIES(decisiontree PROPERTIES
-        PREFIX "lib"
-        IMPORT_PREFIX "lib")
-TARGET_LINK_LIBRARIES(decisiontree ${TH_LIBRARIES})
-INSTALL(TARGETS decisiontree DESTINATION ${RSPAMD_LIBDIR})
diff --git a/contrib/lua-torch/decisiontree/CartNode.lua b/contrib/lua-torch/decisiontree/CartNode.lua
deleted file mode 100644
index e6a85006c..000000000
--- a/contrib/lua-torch/decisiontree/CartNode.lua
+++ /dev/null
@@ -1,42 +0,0 @@
-local dt = require 'decisiontree._env'
-local CartNode = torch.class("dt.CartNode", dt)
-
-function CartNode:__init(nodeId, leftChild, rightChild, splitFeatureId, splitFeatureValue, score, splitGain)
-   self.nodeId = nodeId or 0
-   self.leftChild = leftChild
-   self.rightChild = rightChild
-   self.splitFeatureId = splitFeatureId or -1
-   self.splitFeatureValue = splitFeatureValue or 0
-   self.score = score or 0
-   self.splitGain = splitGain
-end
-
-function CartNode:__tostring__()
-   return self:recursivetostring()
-end
-
-function CartNode:recursivetostring(indent)
-   indent = indent or ' '
-
-   -- Is this a leaf node?
-   local res = ''
-   if not (self.leftChild or self.rightChild) then
-      res = res .. self.score .. '\n'
-   else
-      -- Print the criteria
-      res = res .. 'input[' .. self.splitFeatureId .. '] <' .. self.splitFeatureValue .. '?\n'
-
-      -- Print the branches
-      if self.leftChild then
-         res = res .. indent .. 'True->' .. self.leftChild:recursivetostring(indent .. '  ')
-      end
-      if self.rightChild then
-         res = res .. indent .. 'False->' .. self.rightChild:recursivetostring(indent .. '  ')
-      end
-   end
-   return res
-end
-
-function CartNode:clone()
-   return CartNode(self.nodeId, self.leftChild, self.rightChild, self.splitFeatureId, self.splitFeatureValue, self.score, self.splitGain)
-end
diff --git a/contrib/lua-torch/decisiontree/CartTrainer.lua b/contrib/lua-torch/decisiontree/CartTrainer.lua
deleted file mode 100644
index 63ae6c148..000000000
--- a/contrib/lua-torch/decisiontree/CartTrainer.lua
+++ /dev/null
@@ -1,180 +0,0 @@
-local dt = require "decisiontree._env"
-local _ = require "moses"
-
-local CartTrainer = torch.class("dt.CartTrainer", dt)
-
--- Generic CART trainer
-function CartTrainer:__init(dataset, minLeafSize, maxLeafNodes)
-   assert(torch.isTypeOf(dataset, 'dt.DataSet'))
-   self.dataset = dataset
-   self.minLeafSize = assert(minLeafSize) -- min examples per leaf
-   self.maxLeafNodes = assert(maxLeafNodes) -- max leaf nodes in tree
-
-   -- by default, single thread
-   self.parallelMode = 'singlethread'
-end
-
-function CartTrainer:train(rootTreeState, activeFeatures)
-   assert(torch.isTypeOf(rootTreeState, 'dt.TreeState'))
-   assert(torch.isTensor(activeFeatures))
-   local root = dt.CartNode()
-   root.id = 0
-   root.score = rootTreeState:score(self.dataset)
-
-   local nleaf = 1
-
-   -- TODO : nodeparallel: parallelize here. The queue is a workqueue.
-   local queue = {}
-   table.insert(queue, 1, {cartNode=root, treeState=rootTreeState})
-
-   while #queue > 0 and nleaf < self.maxLeafNodes do
-      local treeGrowerArgs = table.remove(queue, #queue)
-      local currentTreeState = treeGrowerArgs.treeState
-
-      -- Note: if minLeafSize = 1 and maxLeafNode = inf, then each example will be its own leaf...
-      if self:hasEnoughTrainingExamplesToSplit(currentTreeState.exampleIds:size(1)) then
-         nleaf = self:processNode(nleaf, queue, treeGrowerArgs.cartNode, currentTreeState, activeFeatures)
-      end
-   end
-
-   -- CartTree with random branching (when feature is missing)
-   local branchleft = function() return math.random() < 0.5 end
-   return dt.CartTree(root, branchleft), nleaf
-end
-
-function CartTrainer:processNode(nleaf, queue, node, treeState, activeFeatures)
-   local bestSplit
-   if self.parallelMode == 'singlethread' then
-      bestSplit = self:findBestSplitForAllFeatures(treeState, activeFeatures)
-   elseif self.parallelMode == 'featureparallel' then
-      bestSplit = self:findBestSplitForAllFeaturesFP(treeState, activeFeatures)
-   else
-      error("Unrecognized parallel mode: " .. self.parallelMode)
-   end
-
-   if bestSplit then
-      local leftTreeState, rightTreeState = treeState:branch(bestSplit, self.dataset)
-      assert(bestSplit.leftChildSize + bestSplit.rightChildSize == leftTreeState.exampleIds:size(1) + rightTreeState.exampleIds:size(1), "The left and right subtrees don't match the split found!")
-      self:setValuesAndCreateChildrenForNode(node, bestSplit, leftTreeState, rightTreeState, nleaf)
-
-      table.insert(queue, 1, {cartNode=node.leftChild, treeState=leftTreeState})
-      table.insert(queue, 1, {cartNode=node.rightChild, treeState=rightTreeState})
-
-      return nleaf + 1
-    end
-
-    return nleaf
-end
-
-function CartTrainer:findBestSplitForAllFeatures(treeState, activeFeatures)
-   local timer = torch.Timer()
-   local bestSplit = treeState:findBestSplit(self.dataset, activeFeatures, self.minLeafSize, -1, -1)
-
-   if bestSplit then
-      assert(torch.type(bestSplit) == 'table')
-   end
-
-   if dt.PROFILE then
-      print("findBestSplitForAllFeatures time="..timer:time().real)
-   end
-   return bestSplit
-end
-
--- Updates the parentNode with the bestSplit information by creates left/right child Nodes.
-function CartTrainer:setValuesAndCreateChildrenForNode(parentNode, bestSplit, leftState, rightState, nleaf)
-   assert(torch.isTypeOf(parentNode, 'dt.CartNode'))
-   assert(torch.type(bestSplit) == 'table')
-   assert(torch.isTypeOf(leftState, 'dt.TreeState'))
-   assert(torch.isTypeOf(rightState, 'dt.TreeState'))
-   assert(torch.type(nleaf) == 'number')
-
-   local leftChild = dt.CartNode()
-   leftChild.score = leftState:score(self.dataset)
-   leftChild.nodeId = 2 * nleaf - 1
-
-   local rightChild = dt.CartNode()
-   rightChild.score = rightState:score(self.dataset)
-   rightChild.nodeId = 2 * nleaf
-
-   parentNode.splitFeatureId = bestSplit.splitId
-   parentNode.splitFeatureValue = bestSplit.splitValue
-   parentNode.leftChild = leftChild
-   parentNode.rightChild = rightChild
-   parentNode.splitGain = bestSplit.splitGain
-end
-
--- We minimally need 2 * N examples in the parent to satisfy >= N examples per child
-function CartTrainer:hasEnoughTrainingExamplesToSplit(count)
-   return count >= 2 * self.minLeafSize
-end
-
--- call before training to enable feature-parallelization
-function CartTrainer:featureParallel(workPool)
-   assert(self.parallelMode == 'singlethread', self.parallelMode)
-   self.parallelMode = 'featureparallel'
-   self.workPool = torch.type(workPool) == 'number' and dt.WorkPool(workPool) or workPool
-   assert(torch.isTypeOf(self.workPool, 'dt.WorkPool'))
-
-   -- this deletes all SparseTensor hash maps so that they aren't serialized
-   self.dataset:deleteIndex()
-
-   -- require the dt package
-   self.workPool:update('require', {libname='decisiontree',varname='dt'})
-   -- setup worker store (each worker will have its own copy)
-   local store = {
-      dataset=self.dataset,
-      minLeafSize=self.minLeafSize
-   }
-   self.workPool:update('storeKeysValues', store)
-end
-
--- feature parallel
-function CartTrainer:findBestSplitForAllFeaturesFP(treeState, activeFeatures)
-   local timer = torch.Timer()
-   local bestSplit
-   if treeState.findBestSplitFP then
-      bestSplit = treeState:findBestSplitFP(self.dataset, activeFeatures, self.minLeafSize, self.workPool.nThread)
-   end
-
-   if not bestSplit then
-      for i=1,self.workPool.nThread do
-         -- upvalues
-         local treeState = treeState
-         local shardId = i
-         local nShard = self.workPool.nThread
-         local featureIds = activeFeatures
-         -- closure
-         local task = function(store)
-            assert(store.dataset)
-            assert(store.minLeafSize)
-            if treeState.threadInitialize then
-               treeState:threadInitialize()
-            end
-
-            local bestSplit = treeState:findBestSplit(store.dataset, featureIds, store.minLeafSize, shardId, nShard)
-            return bestSplit
-         end
-
-         self.workPool:writeup('execute', task)
-      end
-
-      for i=1,self.workPool.nThread do
-         local taskname, candidateSplit = self.workPool:read()
-         assert(taskname == 'execute')
-         if candidateSplit then
-            if ((not bestSplit) or candidateSplit.splitGain < bestSplit.splitGain) then
-               bestSplit = candidateSplit
-            end
-         end
-      end
-   end
-
-   if bestSplit then
-      assert(torch.type(bestSplit) == 'table')
-   end
-
-   if dt.PROFILE then
-      print("findBestSplitForAllFeaturesFP time="..timer:time().real)
-   end
-   return bestSplit
-end
diff --git a/contrib/lua-torch/decisiontree/CartTree.lua b/contrib/lua-torch/decisiontree/CartTree.lua
deleted file mode 100644
index c74dfda9e..000000000
--- a/contrib/lua-torch/decisiontree/CartTree.lua
+++ /dev/null
@@ -1,90 +0,0 @@
-local _ = require "moses"
-local dt = require 'decisiontree._env'
-
--- CART (classification-regression decision tree).
--- The example is always branched to the left when the splitting feature is missing.
-local CartTree = torch.class("dt.CartTree", "dt.DecisionTree", dt)
-
-function CartTree:__init(root, branchleft)
-   assert(torch.isTypeOf(root, 'dt.CartNode'))
-   self.root = root
-   self.branchleft = branchleft or function() return true end
-end
-
--- TODO optimize this
-function CartTree:score(input, stack, optimized)
-   if optimized == true and stack == nil and torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
-      return input.nn.CartTreeFastScore(input, self.root, input.new())
-   end
-   return self:recursivescore(self.root, input, stack)
-end
-
--- Continuous: if input[node.splitFeatureId] < node.splitFeatureValue then leftNode else rightNode
--- Binary: if input[node.splitFeatureId] == 0 then leftNode else rightNode
--- when stack is provided, it is returned as the third argument containing the stack of nodes from root to leaf
-function CartTree:recursivescore(node, input, stack)
-   assert(torch.isTypeOf(node, 'dt.CartNode'))
-
-   if stack then
-      stack = torch.type(stack) == 'table' and stack or {}
-      table.insert(stack, node)
-   end
-
-   if not (node.leftChild or node.rightChild) then
-      return node.score, node.nodeId, stack
-   elseif not node.leftChild then
-      return self:recursivescore(node.rightChild, input, stack)
-   elseif not node.rightChild then
-      return self:recursivescore(node.leftChild, input, stack)
-   end
-
-   local splitId = node.splitFeatureId
-   local splitVal = node.splitFeatureValue
-
-   if input[splitId] then -- if has key
-      local featureVal = input[splitId]
-      local nextNode = featureVal < splitVal and node.leftChild or node.rightChild
-      return self:recursivescore(nextNode, input, stack)
-   end
-
-   -- if feature is missing, branch left
-   local nextNode = self.branchleft() and node.leftChild or node.rightChild
-   return self:recursivescore(nextNode, input, stack)
-end
-
-function CartTree:__tostring__()
-   return self.root:recursivetostring()
-end
-
--- expects a stack returned by score
-function CartTree:stackToString(stack, input)
-   assert(torch.type(stack) == 'table')
-   assert(torch.isTypeOf(stack[1], 'dt.CartNode'))
-
-   local res = 'Stack nodes from root to leaf\n'
-   for i,node in ipairs(stack) do
-      if not (node.leftChild or node.rightChild) then
-         res = res .. "score="..node.score .. '\n'
-      else
-         local istr = ''
-         if input then
-            istr = '=' .. (input[node.splitFeatureId] or 'nil')
-         end
-         res = res .. 'input[' .. node.splitFeatureId .. ']' .. istr ..' < ' .. node.splitFeatureValue .. ' ? '
-         res = res .. '(' .. ((node.leftChild and node.rightChild) and 'LR' or node.leftChild and 'L' or node.rightChild and 'R' or 'WAT?') .. ') '
-         if node.leftChild == stack[i+1] then
-            res = res .. 'Left\n'
-         elseif node.rightChild == stack[i+1] then
-            res = res .. 'Right\n'
-         else
-            error"stackToString error"
-         end
-      end
-   end
-   return res .. #stack .. " nodes"
-end
-
-function CartTree:clone()
-   return CartTree(self.root:clone(), self.branchleft)
-end
-
diff --git a/contrib/lua-torch/decisiontree/DFD.lua b/contrib/lua-torch/decisiontree/DFD.lua
deleted file mode 100644
index e4746212a..000000000
--- a/contrib/lua-torch/decisiontree/DFD.lua
+++ /dev/null
@@ -1,182 +0,0 @@
--- nn.DFD: Decision Forest Discretizer
--- Takes a dense input and outputs a sparse output.
--- Each node in the forest is its own feature.
--- When a node is traversed, its commensurate feature takes on a value of 1.
--- For all non-traversed nodes, the feature is 0.
-local DFD, parent = torch.class("nn.DFD", "nn.Module")
-
--- TODO: add :type, as the default will convert the long tensors
-function DFD:__init(df, onlyLastNode)
-   parent.__init(self)
-   if torch.type(df) == 'table' then
-      self:reconstructFromInfo(df)
-   else
-      assert(torch.type(df) == 'dt.DecisionForest')
-
-      self.rootIds = torch.LongTensor()
-      -- nodeId of left and right child nodes
-      self.leftChild = torch.LongTensor()
-      self.rightChild = torch.LongTensor()
-      -- index and value of the feature that splits this node
-      self.splitFeatureId = torch.LongTensor()
-      self.splitFeatureValue = torch.Tensor()
-      -- initialize state given df
-      self:convertForest2Tensors(df)
-      self:clearState()
-   end
-   self.onlyLastNode = onlyLastNode
-   self.nTrees = self.rootIds:size(1)
-end
-
--- converts a DecisionForest to efficient tensor representation
-function DFD:convertForest2Tensors(df)
-   self.rootIds:resize(#df.trees)
-
-   -- nodeId will map to featureId
-   local nodeId = 0
-   -- sets nodeIds of all subnodes
-   -- and measures the maximum depth over all trees
-   local function recursiveTree(node, depth)
-      depth = (depth or 0) + 1
-      local rdepth = depth
-      nodeId = nodeId + 1
-      node._nodeId = nodeId
-
-      if node.leftChild then
-         rdepth = math.max(rdepth, recursiveTree(node.leftChild, depth))
-      end
-      if node.rightChild then
-         rdepth = math.max(rdepth, recursiveTree(node.rightChild, depth))
-      end
-      return rdepth
-   end
-
-   -- sum over trees of max depth
-   self.depth = 0
-   for i,tree in ipairs(df.trees) do
-      assert(torch.isTypeOf(tree.root, 'dt.CartNode'))
-      self.depth = self.depth + recursiveTree(tree.root)
-   end
-   -- remove roots from depth
-   self.depth = self.depth - self.rootIds:size(1)
-
-   -- total number of nodes in all trees
-   self.nNode = nodeId
-
-   -- nodeId of left and right child nodes
-   self.leftChild:resize(self.nNode):fill(-1)
-   self.rightChild:resize(self.nNode):fill(-1)
-   -- index and value of the feature that splits this node
-   self.splitFeatureId:resize(self.nNode):fill(-1)
-   self.splitFeatureValue:resize(self.nNode):fill(-1)
-
-   -- aggregates CartNode attributes to an efficient tensor representation
-   local function recursiveTree2(node)
-      local nodeId = assert(node._nodeId)
-      assert(self.splitFeatureId[nodeId] == -1)
-
-      if node.leftChild then
-         self.leftChild[nodeId] = assert(node.leftChild._nodeId)
-         recursiveTree2(node.leftChild)
-      else
-      	 self.leftChild[nodeId] = 0
-      end
-
-      if node.rightChild then
-         self.rightChild[nodeId] = assert(node.rightChild._nodeId)
-         recursiveTree2(node.rightChild)
-      else
-      	 self.rightChild[nodeId] = 0
-      end
-
-      -- each node splits the dataset on a feature id-value pair
-      self.splitFeatureId[nodeId] = assert(node.splitFeatureId)
-      self.splitFeatureValue[nodeId] = assert(node.splitFeatureValue)
-   end
-
-   for i,tree in ipairs(df.trees) do
-      self.rootIds[i] = assert(tree.root._nodeId)
-      recursiveTree2(tree.root)
-   end
-
-   assert(self.leftChild:min() >= 0)
-   assert(self.rightChild:min() >= 0)
-end
-
--- input is a batchsize x inputsize tensor
-function DFD:updateOutput(input)
-   assert(torch.isTensor(input))
-   assert(input:dim() == 2)
-   input = input:contiguous()
-
-   local batchsize, inputsize = input:size(1), input:size(2)
-   local size = self.onlyLastNode and self.nTree or self.depth
-
-   -- each sample's output keys is resized to maxdepth, which is the maximum size that it can take on
-   self.outputkeys = self.outputkeys or torch.LongTensor()
-   self.outputkeys:resize(batchsize, size)
-   -- values are 1
-   self.outputvalues = self.outputvalues or input.new()
-   self.outputvalues:resize(batchsize, size):fill(1)
-
-   self.output = input.nn.DFD_computeOutput(self.outputkeys, self.outputvalues, self.rootIds, self.leftChild, self.rightChild, self.splitFeatureId, self.splitFeatureValue, input, self.onlyLastNode)
-   return self.output
-end
-
-function DFD:type(type, tensorCache)
-   if type then
-      local info = self:getReconstructionInfo()
-      for k, v in pairs(info) do
-         if torch.type(v) ~= 'torch.LongTensor' then
-            info[k] = nil
-         end
-      end
-      parent.type(self, type, tensorCache)
-      self:reconstructFromInfo(info)
-      return self
-   else
-      return parent.type(self)
-   end
-end
-
-function DFD:updateGradInput()
-   error"Not Implemented"
-end
-
-function DFD:clearState()
-   self.output = {{},{}}
-   self.taskbuffer = {}
-   self.outputkeys = nil
-   self.outputvalues = nil
-   self._range = nil
-   self._indices = nil
-   self._mask = nil
-end
-
-function DFD:reconstructFromInfo(DFDinfo)
-   for k,v in pairs(DFDinfo) do
-      self[k] = v
-   end
-   assert(self.leftChild:nDimension() == 1)
-   assert(self.rightChild:nDimension() == 1)
-   assert(self.leftChild:size(1) == self.nNode)
-   assert(self.rightChild:size(1) == self.nNode)
-   assert(self.leftChild:min() >= 0)
-   assert(self.rightChild:min() >= 0)
-   assert(self.splitFeatureId:nDimension() == 1)
-   assert(self.splitFeatureValue:nDimension() == 1)
-   assert(self.splitFeatureId:size(1) == self.splitFeatureValue:size(1))
-end
-
-function DFD:getReconstructionInfo()
-   local DFDinfo = {
-      nNode = self.nNode,
-      rootIds = self.rootIds,
-      leftChild = self.leftChild,
-      rightChild = self.rightChild,
-      splitFeatureId = self.splitFeatureId,
-      splitFeatureValue = self.splitFeatureValue,
-      depth = self.depth
-   }
-   return DFDinfo
-end
diff --git a/contrib/lua-torch/decisiontree/DataSet.lua b/contrib/lua-torch/decisiontree/DataSet.lua
deleted file mode 100644
index 15058a7c6..000000000
--- a/contrib/lua-torch/decisiontree/DataSet.lua
+++ /dev/null
@@ -1,142 +0,0 @@
-local dt = require "decisiontree._env"
-
-local DataSet = torch.class("dt.DataSet", dt)
-
-function DataSet:__init(input, target, nThreads)
-   if torch.type(input) == 'table' then
-      assert(torch.isTypeOf(input[1], 'torch.SparseTensor'))
-   else
-      assert(torch.isTensor(input))
-   end
-   self.input = input
-   assert(torch.isTensor(target))
-   self.target = target
-   self.nThreads = nThreads or 1
-
-   self.sortedFeatureValues, self.featureIds = self:sortFeatureValues(input)
-end
-
--- group examples by featureId. For each featureId, sort examples by featureValue (ascending order)
--- returns a table mapping featureIds to sorted lists of exampleIds
--- e.g. {featureId={example1,example2,example3}}
-function DataSet:sortFeatureValues(inputs)
-   local isSparse = torch.typename(inputs[1]):match('torch.*SparseTensor')
-   assert(isSparse or torch.isTensor(inputs))
-
-   local featureIds = torch.LongTensor()
-   local dataset = {} -- TODO use tds.Hash (will require SparseTensor to be userdata)
-   if isSparse then
-      local proto = inputs[1].values
-      -- get list of featureIds
-      local featureMap = {}
-      for i,input in ipairs(inputs) do
-         input.keys:apply(function(key)
-            featureMap[key] = (featureMap[key] or 0) + 1
-         end)
-      end
-      local _ = require "moses"
-      featureIds = featureIds.new(_.keys(featureMap))
-      local featureCounts = torch.LongTensor(featureIds:size(1))
-      for i=1,featureIds:size(1) do
-         featureCounts[i] = featureMap[featureIds[i]]
-      end
-
-      for i=1,featureIds:size(1) do
-         local featureId = featureIds[i]
-         local featureCount = featureCounts[i]
-         dataset[featureId] = {
-            values=proto.new(featureCount),
-            examples=torch.LongTensor(featureCount),
-            i=0
-         }
-      end
-
-      for exampleId,input in ipairs(inputs) do
-         local sparseIdx = 0
-         input.keys:apply(function(key)
-            sparseIdx = sparseIdx + 1
-            local f = dataset[key]
-            f.i = f.i + 1
-            f.values[f.i] = input.values[sparseIdx]
-            f.examples[f.i] = exampleId
-         end)
-      end
-
-      local sortVal, sortIdx = proto.new(), torch.LongTensor()
-      for featureId,f in pairs(dataset) do
-         assert(f.values:size(1) == f.i)
-         sortVal:sort(sortIdx, f.values, 1, false)
-
-         local sortedExampleIds = torch.LongTensor(f.i)
-         sortedExampleIds:index(f.examples, 1, sortIdx)
-
-         dataset[featureId] = sortedExampleIds
-      end
-   else
-      assert(torch.isTensor(inputs))
-      featureIds:range(1,inputs:size(2))
-
-      for i=1,inputs:size(2) do
-         local featureId = i
-         local values = inputs:select(2, i)
-         local _, sortedFeatureExampleIds = values:sort(1, false)
-         dataset[featureId] = sortedFeatureExampleIds
-      end
-   end
-
-   return dataset, featureIds
-end
-
-function DataSet:getSortedFeature(featureId)
-   assert(self.sortedFeatureValues)
-   return self.sortedFeatureValues[featureId]
-end
-
-function DataSet:size()
-   return self.target:size(1)
-end
-
-function DataSet:getExampleIds()
-   if not self.exampleIds then
-      self.exampleIds = torch.LongTensor():range(1,self:size())
-   end
-   return self.exampleIds
-end
-
-function DataSet:countPositive(exampleIds)
-   assert(torch.type(exampleIds) == 'torch.LongTensor')
-   local dt = require 'decisiontree'
-   local buffer = dt.getBufferTable('DataSet')
-   buffer.tensor = buffer.tensor or self.target.new()
-   buffer.tensor:index(self.target, 1, exampleIds)
-   local nPositive = 0
-   buffer.tensor:apply(function(x)
-      if x > 0 then nPositive = nPositive + 1 end
-   end)
-   return nPositive
-end
-
-function DataSet:initScore()
-   self.score = self.score or torch.Tensor()
-   self.score:resize(self:size()):fill(0)
-end
-
-function DataSet:buildIndex()
-   if torch.type(self.input) == 'table' then
-      for exampleId,input in ipairs(self.input) do
-         if torch.isTypeOf(input, 'torch.SparseTensor') then
-            input:buildIndex()
-         end
-      end
-   end
-end
-
-function DataSet:deleteIndex()
-   if torch.type(self.input) == 'table' then
-      for exampleId,input in ipairs(self.input) do
-         if torch.isTypeOf(input, 'torch.SparseTensor') then
-            input:deleteIndex()
-         end
-      end
-   end
-end
diff --git a/contrib/lua-torch/decisiontree/DecisionForest.lua b/contrib/lua-torch/decisiontree/DecisionForest.lua
deleted file mode 100644
index cac748e7e..000000000
--- a/contrib/lua-torch/decisiontree/DecisionForest.lua
+++ /dev/null
@@ -1,82 +0,0 @@
-local dt = require "decisiontree._env"
-
--- Decision forest that ensembles a bag of decision trees.
-local DecisionForest = torch.class("dt.DecisionForest", "dt.DecisionTree", dt)
-
-function DecisionForest:__init(trees, weight, bias)
-   assert(torch.type(trees) == 'table')
-   self.trees = trees
-   if #trees == 0 then
-      self.weight = weight or torch.Tensor()
-      assert(torch.isTensor(self.weight))
-      assert(self.weight:nElement() == 0)
-   else
-      assert(torch.isTypeOf(trees[1], 'dt.DecisionTree'))
-      self.weight = weight or torch.Tensor(#trees):fill(1)
-      assert(torch.isTensor(self.weight))
-      assert(self.weight:dim() == 1)
-      assert(self.weight:min() >= 0, "Expecting positive weights")
-      assert(#trees == self.weight:size(1))
-   end
-
-   self.bias = bias or 0
-   assert(torch.type(self.bias) == 'number')
-end
-
-function DecisionForest:score(input, incrementalId)
-   assert(torch.isTensor(input))
-
-   local buffer = {}
-   if incrementalId then
-      self.buffers = self.buffers or {}
-      self.buffers[incrementalId] = self.buffers[incrementalId] or {}
-      buffer = self.buffers[incrementalId]
-   end
-   buffer.initialCounter = buffer.initialCounter or 0
-
-   -- TODO: score in parallel
-   local output
-   if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
-      buffer.output = buffer.output or input.new()
-      output = buffer.output
-      assert(output:nElement() == 0 or output:size(1) == input:size(1))
-      if output:nElement() == 0 then
-         output:resize(input:size(1)):fill(self.bias)
-      end
-      for i,tree in ipairs(self.trees) do
-         if i > buffer.initialCounter then
-            local score = tree:score(input, nil, true)
-            output:add(self.weight[i], score)
-         end
-      end
-   else
-      output = buffer.output or self.bias
-      for i,tree in ipairs(self.trees) do
-         if i > buffer.initialCounter then
-            output = output + tree:score(input) * self.weight[i]
-         end
-      end
-      buffer.output = output
-   end
-
-   buffer.initialCounter = #self.trees
-
-   return output
-end
-
-function DecisionForest:add(tree, weight)
-   assert(torch.type(weight) == 'number')
-   assert(weight > 0)
-   table.insert(self.trees, tree)
-   self.weight:resize(#self.trees)
-   self.weight[#self.trees] = weight
-   return self
-end
-
-function DecisionForest:clone()
-   local trees = {}
-   for i, tree in ipairs(self.trees) do
-      trees[i] = tree:clone()
-   end
-   return DecisionForest(trees, self.weight:clone(), self.bias)
-end
diff --git a/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua b/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua
deleted file mode 100644
index fc903678b..000000000
--- a/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua
+++ /dev/null
@@ -1,22 +0,0 @@
-local dt = require "decisiontree._env"
-
--- Interface for all decisionForestTrainers
-local DFT = torch.class("dt.DecisionForestTrainer", dt)
-
--- Train a DecisionForest with examples, a table of valid featureIds and a dataset (i.e. sortedExamplesByFeatureId)
-function DFT:train(examples, validFeatureIds, dataset)
-   assert(torch.type(examples) == "table")
-   assert(torch.isTypeOf(examples[1], "dt.LabeledExample"))
-
-   assert(torch.type(validFeatureIds) == 'table')
-
-   assert(torch.type(dataset) == 'table')
-   for k,v in pairs(dataset) do
-      assert(torch.type(v) == 'table')
-      assert(torch.isTypeOf(v[1], 'dt.LabeledExample'))
-      break
-   end
-   -- dataset is a table mapping featureIds to sorted lists of LabeledExamples
-   -- e.g. {featureId={example1,example2,example3}}
-   error"Not Implemented"
-end
diff --git a/contrib/lua-torch/decisiontree/DecisionTree.lua b/contrib/lua-torch/decisiontree/DecisionTree.lua
deleted file mode 100644
index c61bc3757..000000000
--- a/contrib/lua-torch/decisiontree/DecisionTree.lua
+++ /dev/null
@@ -1,12 +0,0 @@
-local dt = require "decisiontree._env"
-
--- An interface for decision trees.
-local DecisionTree = torch.class("dt.DecisionTree", dt)
-
--- Score an input example and return the prediction score.
--- input is a Tensor or SparseTensor
--- return prediction score and nodeId
-function DecisionTree:score(input)
-   error"Not Implemented"
-   return score, nodeId
-end
diff --git a/contrib/lua-torch/decisiontree/GBDT_common.h b/contrib/lua-torch/decisiontree/GBDT_common.h
deleted file mode 100644
index eb993702d..000000000
--- a/contrib/lua-torch/decisiontree/GBDT_common.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#include "khash.h"
-#include <pthread.h>
-
-#define computeGradientBoostLoss(g, h) (-(g)*(g)/(h))
-
-// we use khash to make iteration faster than lua tables
-KHASH_SET_INIT_INT64(long)
-
-// defines the data we need for running an instance of thet and its constructor/destructor
-typedef struct {
-  khash_t(long)* exampleMap;
-  THLongTensor *exampleIdsWithFeature_cache;
-  long minLeafSize;
-} GBRunData;
-
-
-// allocates data that cannot be shared between threads
-static void gb_local_create_run_data(GBRunData *run_data) {
-  run_data->exampleMap = kh_init(long);
-  run_data->exampleIdsWithFeature_cache = THLongTensor_new();
-}
-
-static void gb_create_run_data(GBRunData *run_data, int minLeafSize) {
-  gb_local_create_run_data(run_data);
-  run_data->minLeafSize = minLeafSize;
-}
-
-static void gb_destroy_run_data(GBRunData *run_data) {
-  THLongTensor_free(run_data->exampleIdsWithFeature_cache);
-  kh_destroy(long, run_data->exampleMap);
-}
-
-// initializes the data required by the optimizer for the given feature.
-static THLongTensor *gb_internal_prepare(lua_State *L, THLongTensor *exampleIds,
-    THLongTensor *exampleIdsWithFeature_cache, int input_index, long feature_id,
-    khash_t(long)* exampleMap) {
-  long *exampleIds_data = THLongTensor_data(exampleIds);
-  long exampleIds_size = THLongTensor_size(exampleIds, 0);
-
-  int ret = 0;
-
-  // if the the input is a table, then we have a sparse dataset
-  if (lua_istable(L, input_index)) {
-    if (exampleIds_size == 0) {
-      return NULL;
-    }
-    else {
-      // loops over the examples' ids that this node has to evaluate and, if they have the feature
-      // we're looking for, marks them as present and stores them in the order provided by the
-      // dataset
-      THLongTensor_resize1d(exampleIdsWithFeature_cache, exampleIds_size);
-      kh_clear(long, exampleMap);
-      kh_resize(long, exampleMap, exampleIds_size*8);
-      long *exampleIdsWithFeature_data = THLongTensor_data(exampleIdsWithFeature_cache);
-      long j = 0;
-      // for each sample to be evaluated
-      for (long i = 0; i < exampleIds_size; i++) {
-        // gets the representation for the example
-        lua_pushinteger(L, exampleIds_data[i]);
-        lua_gettable(L, input_index);
-
-        // builds the index, which happens only once per thread for efficiency
-        lua_pushstring(L, "buildIndex");
-        lua_gettable(L, -2);
-        lua_pushvalue(L, -2);
-        lua_call(L, 1, 0);
-
-        // tries to get the feature for this sample
-        lua_pushinteger(L, feature_id);
-        lua_gettable(L, -2);
-        // if present, then...
-        if (!lua_isnil(L, -1)) {
-          // saves the example
-          exampleIdsWithFeature_data[j] = exampleIds_data[i];
-          j++;
-
-          // marks it as present in the hash table
-          kh_put(long, exampleMap, exampleIds_data[i], &ret);
-        }
-
-        lua_pop(L, 2);
-      }
-
-      // resizes to fit only the samples that have the feature
-      THLongTensor_resize1d(exampleIdsWithFeature_cache, j);
-      kh_resize(long, exampleMap, j*8);
-      return exampleIdsWithFeature_cache;
-    }
-  }
-  else {
-    // if the input isn't a table, then it's dense and we cannot have exampleIds missing, so it
-    // depends on feature_id
-    // since exampleIds is fixed between calls and this is going to store the same values to the
-    // same position, we can cache it between calls
-    if (kh_size(exampleMap) == 0) {
-      kh_resize(long, exampleMap, exampleIds_size*8);
-      for (long i = 0; i < exampleIds_size; i++) {
-        kh_put(long, exampleMap, exampleIds_data[i], &ret);
-      }
-    }
-    // notice that we just return the given tensor of ids instead of copying it. the rest of the
-    // code handles this transparently
-    return exampleIds;
-  }
-}
-
diff --git a/contrib/lua-torch/decisiontree/GiniState.lua b/contrib/lua-torch/decisiontree/GiniState.lua
deleted file mode 100644
index 6dfed2845..000000000
--- a/contrib/lua-torch/decisiontree/GiniState.lua
+++ /dev/null
@@ -1,54 +0,0 @@
-local dt = require 'decisiontree._env'
-
--- used by RandomForestTrainer
-local GiniState, parent = torch.class("dt.GiniState", "dt.TreeState", dt)
-
-function GiniState:__init(exampleIds)
-   parent.__init(self, exampleIds)
-   self.nPositiveInLeftBranch = 0
-   self.nPositiveInRightBranch = 0
-end
-
-function GiniState:score(dataset)
-   local dt = require 'decisiontree'
-   local nPositive = dataset:countPositive(self.exampleIds)
-   return dt.calculateLogitScore(nPositive, self.exampleIds:size(1))
-end
-
-function GiniState:initialize(exampleIdsWithFeature, dataset)
-   assert(torch.type(exampleIdsWithFeature) == 'torch.LongTensor')
-   assert(torch.isTypeOf(dataset, 'dt.DataSet'))
-   self.nPositiveInLeftBranch = dataset:countPositive(exampleIdsWithFeature)
-   self.nPositiveInRightBranch = 0
-
-   self.nExampleInLeftBranch = exampleIdsWithFeature:size(1)
-   self.nExampleInRightBranch = 0
-end
-
-function GiniState:update(exampleId, dataset)
-   assert(torch.type(exampleId) == 'number')
-   assert(torch.isTypeOf(dataset, 'dt.DataSet'))
-   if dataset.target[exampleId] > 0 then
-      self.nPositiveInLeftBranch = self.nPositiveInLeftBranch - 1
-      self.nPositiveInRightBranch = self.nPositiveInRightBranch + 1
-   end
-
-   self.nExampleInLeftBranch = self.nExampleInLeftBranch - 1
-   self.nExampleInRightBranch = self.nExampleInRightBranch + 1
-end
-
-function GiniState:computeSplitInfo(splitFeatureId, splitFeatureValue)
-   local dt = require 'decisiontree'
-   local gini = dt.computeGini(self.nExampleInLeftBranch, self.nPositiveInLeftBranch, self.nExampleInRightBranch, self.nPositiveInRightBranch)
-   local splitInfo = {
-      splitId = assert(splitFeatureId),
-      splitValue = assert(splitFeatureValue),
-      leftChildSize = assert(self.nExampleInLeftBranch),
-      leftPositiveCount = assert(self.nPositiveInLeftBranch),
-      rightChildSize = assert(self.nExampleInRightBranch),
-      rightPositiveCount = assert(self.nPositiveInRightBranch),
-      gini = assert(gini),
-      splitGain = gini
-   }
-   return splitInfo
-end
-\ No newline at end of file
diff --git a/contrib/lua-torch/decisiontree/GradientBoostState.lua b/contrib/lua-torch/decisiontree/GradientBoostState.lua
deleted file mode 100644
index f268f3da8..000000000
--- a/contrib/lua-torch/decisiontree/GradientBoostState.lua
+++ /dev/null
@@ -1,57 +0,0 @@
-local dt = require 'decisiontree._env'
-
-local GradientBoostState, parent = torch.class("dt.GradientBoostState", "dt.TreeState", dt)
-
-function GradientBoostState:__init(exampleIds, gradInput, hessInput)
-   parent.__init(self, exampleIds)
-   self.gradInput = gradInput
-   self.hessInput = hessInput
-end
-
-function GradientBoostState:score(dataset)
-   local dt = require 'decisiontree'
-   local gradInput = self.gradInput:index(1, self.exampleIds)
-   local hessInput = self.hessInput:index(1, self.exampleIds)
-   return dt.computeNewtonScore(gradInput:sum(), hessInput:sum())
-end
-
--- calls _branch and encapsulates the left and right exampleIds into a TreeStates
-function GradientBoostState:branch(splitInfo, dataset)
-   local leftExampleIds, rightExampleIds = self:_branch(splitInfo, dataset)
-   return self.new(leftExampleIds, self.gradInput, self.hessInput), self.new(rightExampleIds, self.gradInput, self.hessInput)
-end
-
--- Partitions self given a splitInfo table, producing a pair of exampleIds corresponding to the left and right subtrees.
-function GradientBoostState:_branch(splitInfo, dataset)
-   local input = dataset.input
-   -- if the input is dense, we can use the optimized version
-   if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
-      return input.nn.GBDT_branch(splitInfo, input, self.exampleIds)
-   end
-   return parent._branch(self, splitInfo, dataset)
-end
-
--- The following methods are supersets of each other. You can comment out them to re-use the lua
--- version with just the provided core optimized
-
--- THIS ONE CANNOT BE COMMENTED OUT
-function GradientBoostState:findBestFeatureSplit(dataset, featureId, minLeafSize)
-   local ret = self.hessInput.nn.GBDT_findBestFeatureSplit(self.exampleIds, dataset, featureId, minLeafSize, self.gradInput, self.hessInput)
-   return ret
-end
-
--- finds the best split of examples in treeState among featureIds
-function GradientBoostState:findBestSplit(dataset, featureIds, minLeafSize, shardId, nShard)
-   local ret = self.hessInput.nn.GBDT_findBestSplit(self.exampleIds, dataset, featureIds, minLeafSize, shardId, nShard, self.gradInput, self.hessInput)
-   return ret
-end
-
--- finds the best split like the previous one, but performs feature parallelism. Note that the
--- optimization is only applied if the input is dense
-function GradientBoostState:findBestSplitFP(dataset, featureIds, minLeafSize, nThread)
-   local input = dataset.input
-   if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
-      local ret = self.hessInput.nn.GBDT_findBestSplitFP(self.exampleIds, dataset, featureIds, minLeafSize, self.gradInput, self.hessInput, nThread)
-      return ret
-   end
-end
diff --git a/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua b/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua
deleted file mode 100644
index 51299b109..000000000
--- a/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua
+++ /dev/null
@@ -1,244 +0,0 @@
-local dt = require "decisiontree._env"
-
--- Gradient boosted decision tree trainer
-local GradientBoostTrainer = torch.class("dt.GradientBoostTrainer", "dt.DecisionForestTrainer", dt)
-
-function GradientBoostTrainer:__init(opt)
-   assert(torch.type(opt) == 'table')
-
-   assert(torch.isTypeOf(opt.treeTrainer, 'dt.CartTrainer'))
-   self.treeTrainer = opt.treeTrainer
-
-   assert(torch.isTypeOf(opt.lossFunction, 'nn.Criterion'))
-   self.lossFunction = opt.lossFunction
-
-   assert(torch.type(opt.shrinkage) == 'number')
-   assert(opt.shrinkage > 0)
-   self.shrinkage = opt.shrinkage
-
-   assert(torch.type(opt.downsampleRatio) == 'number')
-   assert(opt.downsampleRatio > 0)
-   self.downsampleRatio = opt.downsampleRatio
-
-   assert(torch.type(opt.nTree) == 'number')
-   assert(opt.nTree > 0)
-   self.nTree = opt.nTree
-
-   evalFreq = evalFreq or -1
-   assert(torch.type(opt.evalFreq) == 'number')
-   assert(torch.round(opt.evalFreq) == opt.evalFreq)
-   self.evalFreq = opt.evalFreq
-
-   -- when non-positive, no early-stopping
-   earlyStop = earlyStop or (evalFreq-1)
-   assert(torch.type(opt.earlyStop) == 'number')
-   self.earlyStop = opt.earlyStop
-
-    -- when non-positive, defaults to sqrt(#feature)
-   assert(torch.type(opt.featureBaggingSize) == 'number')
-   self.featureBaggingSize = opt.featureBaggingSize
-
-   if opt.decisionForest then
-      assert(torch.isTypeOf(opt.decisionForest, 'dt.DecisionForest'))
-   end
-   self.decisionForest = opt.decisionForest
-
-   self.useInitBias = opt.useInitBias
-end
-
-function GradientBoostTrainer:computeBias(trainSet, verbose)
-   assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
-
-   if verbose then print("Use new bias generated from the training examples.") end
-
-   return -0.5 * self.gradInput:sum() / self.hessInput:sum()
-end
-
-
-function GradientBoostTrainer:initialize(trainSet, verbose)
-   assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
-
-   trainSet:initScore()
-   self.gradInput, self.hessInput = self.lossFunction:backward2(trainSet.score, trainSet.target)
-
-   -- used for early-stopping (see validate())
-   self.stopCount = 0
-   self.prevTrainLoss = math.huge
-   self.prevTestLoss = math.huge
-
-   if verbose then print("Processing initial decision forest") end
-
-   local decisionForest, bias
-
-   if self.decisionForest then
-      local bias = self.useInitBias and self.decisionForest.bias or self:computeBias(trainSet, verbose)
-
-      decisionForest = dt.DecisionForest(self.decisionForest.trees, self.decisionForest.weight, bias)
-
-      local input = trainSet.input
-      if torch.isTensor(input) and input.isContiguous and input:isContiguous() then
-         score = decisionForest:score(input)
-      else
-         score:resize(trainSet:size())
-         for exampleId=1,trainSet:size() do
-            score[exampleId] = decisionForest:score(input[exampleId])
-         end
-      end
-   else
-      local bias = self:computeBias(trainSet, verbose)
-      decisionForest = dt.DecisionForest({}, torch.Tensor(), bias)
-
-      trainSet.score:fill(bias)
-   end
-
-   if verbose then print("Finish loading initial decision forest") end
-
-   return decisionForest
-end
-
--- Trains a decision forest of boosted decision trees.
--- examples are the training examples. validExamples are used for cross-validation.
-function GradientBoostTrainer:train(trainSet, featureIds, validSet, verbose)
-   assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
-   assert(torch.type(featureIds) == 'torch.LongTensor')
-   assert(torch.isTypeOf(validSet, 'dt.DataSet'))
-
-   local decisionForest = self:initialize(trainSet, verbose)
-   local bestDecisionForest
-
-   if verbose then print(string.format("Get %d featureIds.", featureIds:size(1))) end
-
-   local baggingSize = self.featureBaggingSize > 0 and self.featureBaggingSize or torch.round(math.sqrt(featureIds:size(1)))
-   local trainExampleIds = trainSet:getExampleIds()
-   local baggingIndices, activeFeatures
-   local treeExampleIds
-
-   local timer = torch.Timer()
-
-   for treeId = 1,self.nTree do
-      timer:reset()
-      if verbose then print(string.format("Begin processing tree number %d of %d", treeId, self.nTree)) end
-
-      -- Get active features
-      activeFeatures = activeFeatures or torch.LongTensor()
-      if baggingSize < featureIds:size(1) then
-         if verbose then print(string.format("Tree %d: Bagging %d from %d features", treeId, baggingSize, featureIds:size(1))) end
-
-         baggingIndices = baggingIndices or torch.LongTensor()
-         baggingIndices:randperm(featureIds:size(1))
-         activeFeatures:index(featureIds, 1, baggingIndices:narrow(1,1,baggingSize))
-      else
-         activeFeatures = featureIds
-      end
-
-      -- Get data samples
-      if self.downsampleRatio < 0.99 then
-         local sampleSize = torch.round(trainSet:size() * self.downsampleRatio)
-
-         if verbose then print(string.format("Tree %d: Downsampling %d of %d samples", treeId, sampleSize, trainSet:size())) end
-
-         baggingIndices = baggingIndices or torch.LongTensor()
-         baggingIndices:randperm(trainSet:size())
-
-         treeExampleIds = treeExampleIds or torch.LongTensor()
-         treeExampleIds:index(trainExampleIds, 1, baggingIndices:narrow(1,1,sampleSize))
-      else
-         treeExampleIds = trainExampleIds
-      end
-
-      if verbose then print(string.format("Tree %d: training CART tree", treeId)) end
-
-      local rootTreeState = dt.GradientBoostState(treeExampleIds, self.gradInput, self.hessInput)
-      local cartTree = self.treeTrainer:train(rootTreeState, activeFeatures)
-
-      if verbose then print(string.format("Tree %d: finished training CART tree in %f seconds", treeId, timer:time().real)) end
-
-      decisionForest:add(cartTree, self.shrinkage)
-
-      -- update score
-      local predictionScore
-      local input = trainSet.input
-      if torch.isTensor(input) and input:isContiguous() then
-         predictionScore = cartTree:score(trainSet.input, nil, true)
-      else
-         local size = trainSet:size()
-         predictionScore = torch.Tensor(size)
-         for exampleId=1,size do
-            predictionScore[exampleId] = cartTree:score(trainSet.input[exampleId])
-         end
-      end
-      trainSet.score:add(self.shrinkage, predictionScore)
-      self.gradInput, self.hessInput = self.lossFunction:backward2(trainSet.score, trainSet.target)
-
-      if verbose then print(string.format("Tree %d: training complete in %f seconds", treeId, timer:time().real)) end
-
-      -- cross-validation/early-stopping
-      if self.evalFreq > 0 and treeId % self.evalFreq == 0 then
-         timer:reset()
-         local stop, validLoss, bestDecisionForest = self:validate(trainSet, validSet, decisionForest, bestDecisionForest)
-         if dt.PROFILE then print("validate tree time: "..timer:time().real) end
-         if verbose then print(string.format("Loss: train=%7.4f, valid=%7.4f", 0, validLoss)) end
-         if stop then
-            if verbose then print(string.format("GBDT early stopped on tree %d", treeId)) end
-            break
-         end
-
-      end
-   end
-
-   return bestDecisionForest or decisionForest
-end
-
-function dt.GradientBoostTrainer:validate(trainSet, validSet, decisionForest, bestDecisionForest)
-   assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
-   assert(torch.isTypeOf(validSet, 'dt.DataSet'))
-   assert(torch.isTypeOf(decisionForest, 'dt.DecisionForest'))
-   assert(not bestDecisionForest or torch.isTypeOf(decisionForest, 'dt.DecisionForest'))
-
-   -- buffer
-   local buffer = dt.getBufferTable('GradientBoost')
-   buffer.tensor = buffer.tensor or trainSet.score.new()
-   local score = buffer.tensor
-
-   -- per thread loss function (tensors are shared)
-   local lossname = torch.typename(self.lossFunction)
-   buffer[lossname] = buffer[lossname] or self.lossFunction:clone()
-   local lossFunction = buffer[lossname]
-
-   -- TODO batch this for large datasets
-   local input = validSet.input
-   if torch.isTensor(input) and input.isContiguous and input:isContiguous() then
-      score = decisionForest:score(input, 'val')
-   else
-      score:resize(validSet:size())
-      for exampleId=1,validSet:size() do
-         score[exampleId] = decisionForest:score(input[exampleId], 'val')
-      end
-   end
-   local validLoss = lossFunction:forward(score, validSet.target)
-
-   -- early stop is not enabled when earlyStop=0
-   local stop = false
-   if self.earlyStop > 0 then
-      -- Track test loss and detect early stop
-      if self.prevTestLoss - validLoss < 0 then
-         self.stopCount = self.stopCount + 1
-      else
-         bestDecisionForest = decisionForest:clone()
-         self.stopCount = 0
-      end
-
-      stop = self.stopCount >= self.earlyStop
-   end
-
-   self.prevTestLoss = validLoss
-
-   return stop, validLoss, bestDecisionForest
-end
-
-function GradientBoostTrainer:getName()
-   return string.format(
-      "gbdt-dRatio-%s-maxLeaf-%s-minExample-%s-nTree-%s-shrinkage-%s",
-      self.downsampleRatio, self.maxLeafNodes, self.minLeafSize, self.nTree, self.shrinkage
-   )
-end
diff --git a/contrib/lua-torch/decisiontree/LICENSE b/contrib/lua-torch/decisiontree/LICENSE
deleted file mode 100644
index 8dada3eda..000000000
--- a/contrib/lua-torch/decisiontree/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua b/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua
deleted file mode 100644
index 5b9eb6028..000000000
--- a/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua
+++ /dev/null
@@ -1,45 +0,0 @@
-local dt = require "decisiontree._env"
-
--- Ref: slide 17 in https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf
-
--- equivalent to nn.Sigmoid() + nn.BCECriterion()
-local LogitBoostCriterion, parent = torch.class("nn.LogitBoostCriterion", "nn.Criterion")
-
-function LogitBoostCriterion:__init(sizeAverage)
-   parent.__init(self)
-   self.sizeAverage = sizeAverage
-   self.hessInput = self.gradInput.new()
-   self._output = torch.Tensor()
-end
-
-function LogitBoostCriterion:updateOutput(input, target)
-   input.nn.LogitBoostCriterion_updateOutput(input, target, self._output, self.sizeAverage)
-   self.output = self._output[1]
-   return self.output
-end
-
-function LogitBoostCriterion:updateGradInput(input, target)
-   input.nn.LogitBoostCriterion_updateGradInput(input, target, self.gradInput)
-   return self.gradInput
-end
-
-function LogitBoostCriterion:updateHessInput(input, target)
-   input.nn.LogitBoostCriterion_updateHessInput(input, target, self.hessInput)
-   return self.hessInput
-end
-
--- returns gradInput and hessInput
-function LogitBoostCriterion:backward2(input, target)
-   return self:updateGradInput(input, target), self:updateHessInput(input, target)
-end
-
-local gradWrapper = function(input, target, grad)
-   input.nn.LogitBoostCriterion_updateGradInput(input, target, grad)
-end
-local hessianWrapper = function(input, target, hessian)
-   input.nn.LogitBoostCriterion_updateHessInput(input, target, hessian)
-end
-
-function LogitBoostCriterion:getWrappers()
-   return gradWrapper, hessianWrapper
-end
diff --git a/contrib/lua-torch/decisiontree/MSECriterion.lua b/contrib/lua-torch/decisiontree/MSECriterion.lua
deleted file mode 100644
index 948c1a17e..000000000
--- a/contrib/lua-torch/decisiontree/MSECriterion.lua
+++ /dev/null
@@ -1,13 +0,0 @@
-local dt = require "decisiontree._env"
-
-function nn.MSECriterion.updateHessianInput(self, input, target)
-   self.hessInput = self.hessInput or input.new()
-   self.hessInput:resize(input:size()):fill(2)
-   return self.hessInput
-end
-
--- returns gradInput and hessInput
-function nn.MSECriterion.backward2(self, input, target)
-   return self:updateGradInput(input, target), self:updateHessInput(input, target)
-end
-
diff --git a/contrib/lua-torch/decisiontree/README.md b/contrib/lua-torch/decisiontree/README.md
deleted file mode 100644
index db4622add..000000000
--- a/contrib/lua-torch/decisiontree/README.md
+++ /dev/null
@@ -1,386 +0,0 @@
-# Torch decision tree library
-
-```lua
-local dt = require 'decisiontree'
-```
-
-This project implements random forests and gradient boosted decision trees (GBDT).
-The latter uses gradient tree boosting.
-Both use ensemble learning to produce ensembles of decision trees (that is, forests).
-
-## `nn.DFD`
-
-One practical application for decision forests is to *discretize* an input feature space into a richer output feature space.
-The  `nn.DFD` Module can be used as a decision forest discretizer (DFD):
-
-```lua
-local dfd = nn.DFD(df, onlyLastNode)
-```
-
-where `df` is a `dt.DecisionForest` instance or the table returned by the method `getReconstructionInfo()` on another `nn.DFD` module, and `onlyLastNode` is a boolean that indicates that module should return only the id of the last node visited on each tree (by default it outputs all traversed nodes except for the roots).
-The `nn.DFD` module requires dense `input` tensors.
-Sparse `input` tensors (tables of tensors) are not supported.
-The `output` returned by a call to `updateOutput` is a batch of sparse tensors.
-This `output` where `output[1]` and `output[2]` are a respectively a list of key and value tensors:
-
-```lua
-{
-  { [torch.LongTensor], ... , [torch.LongTensor] },
-  { [torch.Tensor], ... , [torch.Tensor] }
-}
-```
-
-This module doesn't support CUDA.
-
-### Example
-As a concrete example, let us first train a Random Forest on a dummy dense dataset:
-
-```lua
-local nExample = 100
-local batchsize = 2
-local inputsize = 10
-
--- train Random Forest
-local trainSet = dt.getDenseDummyData(nExample, nil, inputsize)
-local opt = {
-   activeRatio=0.5,
-   featureBaggingSize=5,
-   nTree=4,
-   maxLeafNodes=nExample/2,
-   minLeafSize=nExample/10,
-}
-local trainer = dt.RandomForestTrainer(opt)
-local df = trainer:train(trainSet, trainSet.featureIds)
-mytester:assert(#df.trees == opt.nTree)
-```
-
-Now that we have `df`, a `dt.DecisionForest` instance, we can use it to initialize `nn.DFD`:
-
-```lua
-local dfd = nn.DFD(df)
-```
-
-The `dfd` instance holds no reference to `df`, instead it extracts the relevant attributes from `df`.
-These attributes are stored in tensors for batching and efficiency.
-
-We can discretize a hypothetical `input` by calling `forward`:
-```lua
-local input = trainSet.input:sub(1,batchsize)
-local output = dfd:forward(input)
-```
-
-The resulting output is a table consisting of two tables: keys and values.
-The keys and values tables each contains `batchsize` tensors:
-
-```lua
-print(output)
-{
-  1 :
-    {
-      1 : LongTensor - size: 14
-      2 : LongTensor - size: 16
-      3 : LongTensor - size: 15
-      4 : LongTensor - size: 13
-    }
-  2 :
-    {
-      1 : DoubleTensor - size: 14
-      2 : DoubleTensor - size: 16
-      3 : DoubleTensor - size: 15
-      4 : DoubleTensor - size: 13
-    }
-}
-```
-
-An example's feature keys (`LongTensor`) and commensurate values (`DoubleTensor`) have the same number of elements.
-The examples have variable number of key-value pairs representing the nodes traversed in the tree.
-The output feature space has as many dimensions (that is, possible feature keys) for each node in the forest.
-
-## `torch.SparseTensor`
-
-Suppose you have a set of `keys` mapped to `values`:
-```lua
-local keys = torch.LongTensor{1,3,4,7,2}
-local values = torch.Tensor{0.1,0.3,0.4,0.7,0.2}
-```
-
-You can use a `SparseTensor` to encapsulate these into a read-only tensor:
-
-```lua
-local st = torch.SparseTensor(input, target)
-```
-
-The _decisiontree_ library uses `SparseTensors` to simulate the `__index` method of the `torch.Tensor`.
-For example, one can obtain the value associated to key 3 of the above `st` instance:
-
-```lua
-local value = st[3]
-assert(value == 0.3)
-```
-
-When the key,value pair are missing, `nil` is returned instead:
-
-```lua
-local value = st[2]
-assert(value == nil)
-```
-
-The best implementation for this kind of indexing is slow (it uses a sequential scan of the `keys).
-To speedup indexing, one can call the `buildIndex()` method before hand:
-
-```lua
-st:buildIndex()
-```
-
-The `buildIndex()` creates a hash map (a Lua table) of keys to their commensurate indices in the `values` table.
-
-## `dt.DataSet`
-
-The `CartTrainer`, `RandomForestTrainer` and `GradientBoostTrainer` require that data sets be encapsulated into a `DataSet`.
-Suppose you have a dataset of dense inputs and targets:
-
-```lua
-local nExample = 10
-local nFeature = 5
-local input = torch.randn(nExample, nFeature)
-local target = torch.Tensor(nExample):random(0,1)
-```
-
-these can be encapsulated into a `DataSet` object:
-
-```lua
-local dataset = dt.DataSet(input, target)
-```
-
-Now suppose you have a dataset where the `input` is a table of `SparseTensor` instances:
-
-```lua
-local input = {}
-for i=1,nExample do
-   local nKeyVal = math.random(2,nFeature)
-   local keys = torch.LongTensor(nKeyVal):random(1,nFeature)
-   local values = torch.randn(nKeyVal)
-   input[i] = torch.SparseTensor(keys, values)
-end
-```
-
-You can still use a `DataSet` to encapsulate the sparse dataset:
-
-```lua
-local dataset = dt.DataSet(input, target)
-```
-
-The main purpose of the `DataSet` class is to sort each feature by value.
-This is captured by the `sortFeatureValues(input)` method, which is called in the constructor:
-
-```lua
-local sortedFeatureValues, featureIds = self:sortFeatureValues(input)
-```
-
-The `featureIds` is a `torch.LongTensor` of all available feature IDs.
-For a dense `input` tensor, this is just `torch.LongTensor():range(1,input:size(2))`.
-But for a sparse `input` tensor, the `featureIds` tensor only contains the feature IDs present in the dataset.
-
-The resulting `sortedFeatureValues` is a table mapping `featureIds` to `exampleIds` sorted by `featureValues`.
-For each `featureId`, examples are sorted by `featureValue` in ascending order.
-For example, the table might look like: `{featureId=exampleIds}` where `examplesIds={1,3,2}`.
-
-The `CartTrainer` accesses the `sortedFeatureValues` tensor by calling `getSortedFeature(featureId)`:
-
-```lua
-local exampleIdsWithFeature = dataset:getSortedFeature(featureId)
-```
-
-The ability to access examples IDs sorted by feature value, given a feature ID, is the main purpose of the `DataSet`.
-The `CartTrainer` relies on these sorted lists to find the best way to split a set of examples between two tree nodes.
-
-## `dt.CartTrainer`
-
-```lua
-local trainer = dt.CartTrainer(dataset, minLeafSize, maxLeafNodes)
-```
-
-The `CartTrainer` is used by the `RandomForestTrainer` and `GradientBoostTrainer` to train individual trees.
-CART stands for classification and regression trees.
-However, only binary classifiers are unit tested.
-
-The constructor takes the following arguments:
-
- * `dataset` is a `dt.DataSet` instance representing the training set.
- * `minLeafSize` is the minimum examples per leaf node in a tree. The larger the value, the more regularization.
- * `maxLeafNodes` is the maximum nodes in the tree. The lower the value, the more regularization.
-
-Training is initiated by calling the `train()` method:
-
-```lua
-local trainSet = dt.DataSet(input, target)
-local rootTreeState = dt.GiniState(trainSet:getExampleIds())
-local activeFeatures = trainSet.featureIds
-local tree = trainer:train(rootTreeState, activeFeatures)
-```
-
-The resulting `tree` is a `CartTree` instance.
-The `rootTreeState` is a `TreeState` instance like `GiniState` (used by `RandomForestTrainer`) or `GradientBoostState` (used by `GradientBoostTrainer`).
-The `activeFeatures` is a `LongTensor` of feature IDs that used to build the tree.
-Every other feature ID is ignored during training. This is useful for feature bagging.
-
-By default the `CartTrainer` runs in a single-thread.
-The `featureParallel(nThread)` method can be called before calling `train()` to parallelize training using `nThread` workers:
-
-```lua
-local nThread = 3
-trainer:featureParallel(nThread)
-trainer:train(rootTreeState, activeFeatures)
-```
-
-Feature parallelization assigns a set of features IDs to each thread.
-
-The `CartTrainer` can be used as a stand-alone tree trainer.
-But it is recommended to use it within the context of a `RandomForestTrainer` or `GradientBoostTrainer` instead.
-The latter typically generalize better.
-
-## RandomForestTrainer
-
-The `RandomForestTrainer` is used to train a random forest:
-
-```lua
-local nExample = trainSet:size()
-local opt = {
-   activeRatio=0.5,
-   featureBaggingSize=5,
-   nTree=14,
-   maxLeafNodes=nExample/2,
-   minLeafSize=nExample/10,
-}
-local trainer = dt.RandomForestTrainer(opt)
-local forest = trainer:train(trainSet, trainSet.featureIds)
-```
-
-The returned `forest` is a `DecisionForest` instance.
-A `DecisionForest` has a similar interface to the `CartTree`.
-Indeed, they both sub-class the `DecisionTree` abstract class.
-
-The constructor takes a single `opt` table argument, which contains the actual arguments:
-
- * `activeRatio` is the ratio of active examples per tree. This is used for boostrap sampling.
- * `featureBaggingSize` is the number of features per tree. This is also used fpr feature bagging.
- * `nTree` is the number of trees to be trained.
- * `maxLeafNodes` and `minLeafSize` are passed to the underlying `CartTrainer` constructor (controls regularization).
-
-Internally, the `RandomForestTrainer` passes a `GiniBoostState` to the `CartTrainer:train()` method.
-
-Training can be parallelized by calling `treeParallel(nThread)`:
-
-```lua
-local nThread = 3
-trainer:treeParallel(nThread)
-local forest = trainer:train(trainSet, trainSet.featureIds)
-```
-
-Training then parallelizes by training each tree in its own thread worker.
-
-## GradientBoostTrainer
-
-References:
- * A. [Boosted Tree presentation](https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf)
-
-Graient boosted decision trees (GBDT) can be trained as follows:
-```lua
-local nExample = trainSet:size()
-local maxLeafNode, minLeafSize = nExample/2, nExample/10
-local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNode)
-
-local opt = {
-  lossFunction=nn.LogitBoostCriterion(false),
-  treeTrainer=cartTrainer,
-  shrinkage=0.1,
-  downsampleRatio=0.8,
-  featureBaggingSize=-1,
-  nTree=14,
-  evalFreq=8,
-  earlyStop=0
-}
-
-local trainer = dt.GradientBoostTrainer(opt)
-local forest = trainer:train(trainSet, trainSet.featureIds, validSet)
-```
-
-The above code snippet uses the `LogitBoostCriterion` outlined in reference A.
-It is used for training binary classification trees.
-
-The returned `forest` is a `DecisionForest` instance.
-A `DecisionForest` has a similar interface to the `CartTree`.
-Indeed, they both sub-class the `DecisionTree` abstract class.
-
-The constructor takes a single `opt` table argument, which contains the actual arguments:
-
- * `lossFunction` is a `nn.Criterion` instance extended to include the `updateHessInput(input, target)` and `backward2(input, target)`. These return the hessian of the `input`.
- * `treeTrainer` is a `CartTrainer` instance. Its `featureParallel()` method can be called to implement feature parallelization.
- * `shrinkage` is the weight of each additional tree.
- * `downsampleRatio` is the ratio of examples to be sampled for each tree. Used for bootstrap sampling.
- * `featureBaggingSize` is the number of features to sample per tree. Used for feature bagging. `-1` defaults to `torch.round(math.sqrt(featureIds:size(1)))`
- * `nTree` is the maximum number of trees.
- * `evalFreq` is the number of epochs between calls to `validate()` for cross-validation and early-stopping.
- * `earlyStop` is the maximum number of epochs to wait for early-stopping.
-
-Internally, the `GradientBoostTrainer` passes a `GradientBoostState` to the `CartTrainer:train()` method.
-
-## TreeState
-
-An abstract class that holds the state of a subtree during decision tree training.
-It also manages the state of candidate splits.
-
-```lua
-local treeState = dt.TreeState(exampleIds)
-```
-
-The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree.
-
-## GiniState
-
-A `TreeState` subclass used internally by the `RandomForestTrainer`.
-Uses Gini impurity to determine how to split trees.
-
-```lua
-local treeState = dt.GiniState(exampleIds)
-```
-
-The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree.
-
-## GradientBoostState
-
-A `TreeState` subclass used internally by the `GradientBoostTrainer`.
-It implements the GBDT spliting algorithm, which uses a loss function.
-
-```lua
-local treeState = dt.GradientBoostState(exampleIds, lossFunction)
-```
-
-The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree.
-The `lossFunction` is an `nn.Criterion` instance (see `GradientBoostTrainer`).
-
-
-## WorkPool
-
-Utility class that simplifies construction of a pool of daemon threads with which to execute tasks in parallel.
-
-```lua
-local workpool = dt.WorkPool(nThread)
-```
-
-## CartTree
-
-Implements a trained CART decision tree:
-
-```lua
-local tree = nn.CartTree(rootNode)
-```
-
-The `rootNode` is a `CartNode` instance.
-Each `CartNode` contains pointers to left and right branches, which are themselves `CartNode` instances.
-
-For inference, use the `score(input)` method:
-
-```lua
-local score = tree:score(input)
-```
diff --git a/contrib/lua-torch/decisiontree/RandomForestTrainer.lua b/contrib/lua-torch/decisiontree/RandomForestTrainer.lua
deleted file mode 100644
index 41040b25b..000000000
--- a/contrib/lua-torch/decisiontree/RandomForestTrainer.lua
+++ /dev/null
@@ -1,159 +0,0 @@
-local dt = require "decisiontree._env"
-
-local RandomForestTrainer = torch.class("dt.RandomForestTrainer", dt)
-
-function RandomForestTrainer:__init(opt)
-   assert(torch.type(opt.nTree) == 'number')
-   assert(opt.nTree > 0)
-   self.nTree = opt.nTree
-   -- max number of leaf nodes per tree
-   assert(torch.type(opt.maxLeafNodes) == 'number')
-   assert(opt.maxLeafNodes > 0)
-   self.maxLeafNodes = opt.maxLeafNodes
-   -- min number of examples per leaf
-   assert(torch.type(opt.minLeafSize) == 'number')
-   assert(opt.minLeafSize > 0)
-   self.minLeafSize = opt.minLeafSize
-
-   -- when non-positive, defaults to sqrt(#feature)
-   assert(torch.type(opt.featureBaggingSize) == 'number')
-   self.featureBaggingSize = opt.featureBaggingSize
-
-   assert(torch.type(opt.activeRatio) == 'number')
-   assert(opt.activeRatio > 0)
-   self.activeRatio = opt.activeRatio
-
-   -- default parallelization is singlethread
-   self.parallelMode = 'singlethread'
-end
-
--- Train a DecisionForest
-function RandomForestTrainer:train(trainSet, featureIds, verbose)
-   assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
-   assert(torch.type(featureIds) == 'torch.LongTensor')
-
-   if verbose then print(string.format("Begin training Decision Forest with %d trees", self.nTree)) end
-
-   local weight = torch.Tensor(self.nTree):fill(1 / self.nTree) -- RF uses uniform weights
-
-   local trees
-   if self.parallelMode == 'singlethread' then
-      trees = self:trainTrees(trainSet, featureIds, verbose)
-   elseif self.parallelMode == 'treeparallel' then
-      trainSet:deleteIndex() -- prevents serialization bottleneck
-      trees = self:trainTreesTP(trainSet, featureIds, verbose)
-   else
-      error("Unrecognized parallel mode: " .. self.parallelMode)
-   end
-
-   if verbose then print(string.format("Successfully trained %d trees", #trees)) end
-
-   -- set bias
-   local bias = 0;
-   for i, tree in ipairs(trees) do
-      bias = bias + tree.root.score * weight[i]
-   end
-
-   return dt.DecisionForest(trees, weight, bias)
-end
-
-function RandomForestTrainer:trainTrees(trainSet, featureIds, verbose)
-
-   -- the same CartTrainer will be used for each tree
-   local cartTrainer = dt.CartTrainer(trainSet, self.minLeafSize, self.maxLeafNodes)
-
-   local trees = {}
-   for treeId=1,self.nTree do
-      -- Train a CartTree
-      local tree = self.trainTree(cartTrainer, featureIds, self.featureBaggingSize, self.activeRatio, treeId, verbose)
-      table.insert(trees, tree)
-   end
-   return trees
-end
-
--- static function that returns a cartTree
-function RandomForestTrainer.trainTree(cartTrainer, featureIds, baggingSize, activeRatio, treeId, verbose)
-   assert(torch.isTypeOf(cartTrainer, 'dt.CartTrainer'))
-   assert(torch.type(featureIds) == 'torch.LongTensor')
-   local baggingSize = baggingSize > 0 and baggingSize or torch.round(math.sqrt(featureIds:size(1)))
-
-   if verbose then
-      print(string.format("Tree %d: Creating features bootstrap sample with baggingSize %d, nFeatures %d", treeId, baggingSize, featureIds:size(1)))
-   end
-
-   local trainSet = cartTrainer.dataset
-
-   -- sample boot strap features
-   local baggingIndices = torch.LongTensor(baggingSize):random(1,featureIds:size(1))
-   local activeFeatures = featureIds:index(1, baggingIndices)
-
-    -- sample boot strap examples
-   local sampleSize = torch.round(trainSet:size() * activeRatio)
-   if verbose then print(string.format("Creating bootstrap sample created of size %d", sampleSize)) end
-
-   baggingIndices:resize(sampleSize):random(1,trainSet:size())
-   local bootStrapExampleIds = torch.LongTensor()
-   bootStrapExampleIds:index(trainSet:getExampleIds(), 1, baggingIndices)
-
-   local cartTree = cartTrainer:train(dt.GiniState(bootStrapExampleIds), activeFeatures)
-
-   if verbose then print(string.format("Complete processing tree number %d", treeId)) end
-
-   return cartTree
-end
-
--- call before training to enable tree-level parallelization
-function RandomForestTrainer:treeParallel(workPool)
-   assert(self.parallelMode == 'singlethread', self.parallelMode)
-   self.parallelMode = 'treeparallel'
-   self.workPool = torch.type(workPool) == 'number' and dt.WorkPool(workPool) or workPool
-   assert(torch.isTypeOf(self.workPool, 'dt.WorkPool'))
-
-   -- require the dt package
-   self.workPool:update('require', {libname='decisiontree',varname='dt'})
-end
-
--- TP is for tree parallel (not toilet paper)
-function RandomForestTrainer:trainTreesTP(trainSet, featureIds, verbose)
-   assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
-   assert(torch.type(featureIds) == 'torch.LongTensor')
-   local minLeafSize = self.minLeafSize
-   local maxLeafNodes = self.maxLeafNodes
-
-   -- setup worker store (each worker will have its own cartTrainer)
-   self.workPool:updateup('execute', function(store)
-      local dt = require 'decisiontree'
-
-      store.cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes)
-      store.featureIds = featureIds
-   end)
-
-   for treeId=1,self.nTree do
-      -- upvalues
-      local baggingSize = self.featureBaggingSize
-      local activeRatio = self.activeRatio
-      -- task closure that will be executed in worker-thread
-      local function trainTreeTask(store)
-         local dt = require 'decisiontree'
-         return dt.RandomForestTrainer.trainTree(store.cartTrainer, store.featureIds, baggingSize, activeRatio, treeId, verbose)
-      end
-      self.workPool:writeup('execute', trainTreeTask)
-   end
-
-   local trees = {}
-   for treeId=1,self.nTree do
-      local taskname, tree = self.workPool:read()
-      assert(taskname=='execute')
-      assert(torch.isTypeOf(tree, 'dt.CartTree'))
-      table.insert(trees, tree)
-   end
-   return trees
-end
-
-function RandomForestTrainer:getName()
-   return string.format(
-      "randomforest-aRatio-%4.2f-maxLeaf-%d-minExample-%d-nTree-%d",
-      self.activeRatio, self.maxLeafNodes, self.minLeafSize, self.nTree
-   )
-end
-
diff --git a/contrib/lua-torch/decisiontree/Sparse2Dense.lua b/contrib/lua-torch/decisiontree/Sparse2Dense.lua
deleted file mode 100644
index 4e5b79d2f..000000000
--- a/contrib/lua-torch/decisiontree/Sparse2Dense.lua
+++ /dev/null
@@ -1,88 +0,0 @@
-local S2D, parent = torch.class("nn.Sparse2Dense", "nn.Module")
-local dt = require 'decisiontree._env'
-
-function S2D:__init(features)
-   parent.__init(self)
-   if torch.type(features) == 'table' then
-      assert(#features > 0)
-      features = torch.LongTensor(features)
-   end
-   assert(torch.isTensor(features))
-   self.features = features
-   self.featureMap = nil
-   self.masks = {}
-   self.mappedKeys = {}
-end
-
-function S2D:updateOutput(input)
-   if not self.featureMap then
-      self.featureMap = dt.HashMap()
-      self.featureMap:fill(self.features)
-   end
-   local batched, keys, values
-   if torch.isTensor(input[1]) then
-      keys = {input[1]}
-      values = {input[2]}
-      batched = false
-   else
-      keys = input[1]
-      values = input[2]
-      batched = true
-   end
-   assert(#keys == #values)
-
-   local masks = self.masks
-   local mappedKeys = self.mappedKeys
-   local nKeys = #keys
-   local nMasks = #masks
-   if nMasks < nKeys then
-      for i=nMasks+1,nKeys do
-         masks[i] = torch.ByteTensor()
-         mappedKeys[i] = torch.LongTensor()
-      end
-   elseif nMasks > nKeys then
-      for i=nKeys+1,nMasks do
-         masks[i] = nil
-         mappedKeys[i] = nil
-      end
-   end
-
-   self.featureMap:get(keys, mappedKeys, masks)
-   self.output = self.output or torch.Tensor():type(self._type)
-   self.output.nn.S2D_computeOutput(self.output, mappedKeys, values, masks, self.features)
-   if not batched then
-      self.output = self.output:view(-1)
-   end
-   return self.output
-end
-
-function S2D:type(type, tensorCache)
-   if type then
-      local features = self.features
-      self.features = nil
-      parent.type(self, type, tensorCache)
-      self.features = features
-      return self
-   else
-      return parent.type(self)
-   end
-end
-
-function S2D:updateGradInput(input, gradOutput)
-   error"Not Implemented"
-end
-
-function S2D:reset()
-   parent.reset(self)
-   self.featureMap = nil
-end
-
-function S2D:write(file)
-   self.featureMap = nil
-   parent.write(self, file)
-end
-
-function S2D:read(file)
-   self.featureMap = nil
-   parent.read(self, file)
-end
diff --git a/contrib/lua-torch/decisiontree/SparseTensor.lua b/contrib/lua-torch/decisiontree/SparseTensor.lua
deleted file mode 100644
index 4c620e618..000000000
--- a/contrib/lua-torch/decisiontree/SparseTensor.lua
+++ /dev/null
@@ -1,54 +0,0 @@
-
-local SparseTensor = torch.class("torch.SparseTensor")
-
-function SparseTensor:__init(keys, values)
-   if keys and values then
-      assert(torch.typename(keys):find('torch%..*LongTensor'))
-      assert(torch.isTensor(values))
-      assert(keys:nElement() == values:nElement(), "Expecting key and value tensors of same size")
-      self.keys = keys
-      self.values = values
-   elseif not (keys or values) then
-      self.keys = torch.LongTensor()
-      self.values = torch.Tensor()
-   else
-      error"Expecting zero or two args"
-   end
-end
-
-function SparseTensor:buildIndex(overwrite)
-   if self._map and not overwrite then return end
-   assert(self.keys and self.keys:dim() == 1)
-   assert(self.values and self.values:dim() == 1)
-   -- hash table
-   self._map = {}
-   for i=1,self.keys:size(1) do
-      self._map[self.keys[i]] = i
-   end
-end
-
-function SparseTensor:deleteIndex()
-   self._map = nil
-end
-
-local __index = SparseTensor.__index
-function SparseTensor:__index(key)
-   if key == nil then
-      error"Attempt to index using a nil key"
-   elseif torch.type(key) ~= 'number' then
-      return __index(self, key)
-   end
-
-   if self._map then
-      assert(torch.type(self._map) == 'table')
-      local idx = self._map[key]
-      return idx and self.values[idx] or nil
-   elseif self.keys:nElement() > 0 then
-      for i=1,self.keys:size(1) do
-         if self.keys[i] == key then
-            return self.values[i]
-         end
-      end
-   end
-   return nil
-end
-\ No newline at end of file
diff --git a/contrib/lua-torch/decisiontree/TreeState.lua b/contrib/lua-torch/decisiontree/TreeState.lua
deleted file mode 100644
index 3928649fd..000000000
--- a/contrib/lua-torch/decisiontree/TreeState.lua
+++ /dev/null
@@ -1,191 +0,0 @@
-local dt = require "decisiontree._env"
-
-local TreeState = torch.class("dt.TreeState", dt)
-
--- Holds the state of a subtree during decision tree training.
--- Also, manages the state of candidate splits
-function TreeState:__init(exampleIds)
-   assert(torch.type(exampleIds) == 'torch.LongTensor')
-   self.exampleIds = exampleIds
-
-   self.nExampleInLeftBranch = 0
-   self.nExampleInRightBranch = 0
-end
-
--- computes and returns the score of the node based on its examples
-function TreeState:score(dataset)
-   error"NotImplemented"
-end
-
-
--- Initializes the split-state-updater. Initially all examples are in the left branch.
--- exampleIdsWithFeature is list of examples to split (those having a particular feature)
-function TreeState:initialize(exampleIdsWithFeature, dataset)
-   error"NotImplemented"
-end
-
--- Update the split state. This call has the effect of shifting the example from the left to the right branch.
-function TreeState:update(exampleId, dataset)
-   error"NotImplemented"
-end
-
--- Computes the SplitInfo determined by the current split state
--- @param splitFeatureId the feature id of the split feature
--- @param splitFeatureValue the feature value of the split feature
--- @return the SplitInfo determined by the current split state
-function TreeState:computeSplitInfo(splitFeatureId, splitFeatureValue)
-   error"NotImplemented"
-end
-
--- bottleneck
-function TreeState:findBestFeatureSplit(dataset, featureId, minLeafSize)
-   local dt = require "decisiontree"
-   assert(torch.isTypeOf(dataset, 'dt.DataSet'))
-   assert(torch.type(featureId) == 'number')
-   assert(torch.type(minLeafSize) == 'number')
-
-   -- all dataset example having this feature, sorted by value
-   local featureExampleIds = dataset:getSortedFeature(featureId)
-
-   local buffer = dt.getBufferTable('TreeState')
-   buffer.longtensor = buffer.longtensor or torch.LongTensor()
-   local exampleIdsWithFeature = buffer.longtensor
-
-   -- map and tensor of examples containing feature:
-   local exampleMap = {}
-   local getExampleFeatureValue
-
-   local j = 0
-   if torch.type(dataset.input) == 'table' then
-      exampleIdsWithFeature:resize(self.exampleIds:size())
-      self.exampleIds:apply(function(exampleId)
-         local input = dataset.input[exampleId]
-         input:buildIndex()-- only builds index first time
-         if input[featureId] then
-            j = j + 1
-            exampleIdsWithFeature[j] = exampleId
-            exampleMap[exampleId] = j
-         end
-      end)
-      if j == 0 then
-         return
-      end
-      exampleIdsWithFeature:resize(j)
-      getExampleFeatureValue = function(exampleId) return dataset.input[exampleId][featureId] end
-   else
-      exampleIdsWithFeature = self.exampleIds
-      self.exampleIds:apply(function(exampleId)
-         j = j + 1
-         exampleMap[exampleId] = j
-      end)
-      local featureValues = dataset.input:select(2,featureId)
-      getExampleFeatureValue = function(exampleId) return featureValues[exampleId] end
-   end
-
-
-   self:initialize(exampleIdsWithFeature, dataset)
-
-   -- bottleneck
-   local bestSplit, previousSplitValue, _tictoc
-   for i=featureExampleIds:size(1),1,-1 do -- loop over examples sorted (desc) by feature value
-      local exampleId = featureExampleIds[i]
-
-      local exampleIdx = exampleMap[exampleId]
-      if exampleIdx then
-         local splitValue = getExampleFeatureValue(exampleId)
-
-         if previousSplitValue and math.abs(splitValue - previousSplitValue) > dt.EPSILON then
-            local splitInfo = self:computeSplitInfo(featureId, previousSplitValue, _tictoc)
-            if (splitInfo.leftChildSize >= minLeafSize) and (splitInfo.rightChildSize >= minLeafSize) then
-
-               if (not bestSplit) or (splitInfo.splitGain < bestSplit.splitGain) then
-                  _tictoc = bestSplit or {} -- reuse table
-                  bestSplit = splitInfo
-               end
-
-            end
-         end
-
-         previousSplitValue = splitValue
-
-         -- bottleneck
-         self:update(exampleId, dataset, exampleIdx)
-      end
-   end
-
-   return bestSplit
-end
-
--- finds the best split of examples in treeState among featureIds
-function TreeState:findBestSplit(dataset, featureIds, minLeafSize, shardId, nShard)
-   assert(torch.isTypeOf(dataset, 'dt.DataSet'))
-   assert(torch.type(featureIds) == 'torch.LongTensor')
-   assert(torch.type(minLeafSize) == 'number')
-   assert(torch.type(shardId) == 'number')
-   assert(torch.type(nShard) == 'number')
-
-   local bestSplit
-   for i=1,featureIds:size(1) do
-      local featureId = featureIds[i]
-      if (nShard <= 1) or ( (featureId % nShard) + 1 == shardId ) then -- feature sharded
-         local splitCandidate = self:findBestFeatureSplit(dataset, featureId, minLeafSize)
-         if splitCandidate and ((not bestSplit) or (splitCandidate.splitGain < bestSplit.splitGain)) then
-            bestSplit = splitCandidate
-         end
-      end
-   end
-
-   return bestSplit
-end
-
--- Partitions self given a splitInfo table, producing a pair of exampleIds corresponding to the left and right subtrees.
-function TreeState:_branch(splitInfo, dataset)
-   local leftIdx, rightIdx = 0, 0
-   local nExample = self.exampleIds:size(1)
-   local splitExampleIds = torch.LongTensor(nExample)
-
-
-   for i=1,self.exampleIds:size(1) do
-      local exampleId = self.exampleIds[i]
-      local input = dataset.input[exampleId]
-      local val = input[splitInfo.splitId]
-      -- Note: when the feature is not present in the example, the example is droped from all sub-trees.
-      -- Which means that for most sparse data, a tree cannot reach 100% accuracy...
-      if val then
-         if val < splitInfo.splitValue then
-            leftIdx = leftIdx + 1
-            splitExampleIds[leftIdx] = exampleId
-         else
-            rightIdx = rightIdx + 1
-            splitExampleIds[nExample-rightIdx+1] = exampleId
-         end
-      end
-   end
-
-   local leftExampleIds = splitExampleIds:narrow(1,1,leftIdx)
-   local rightExampleIds = splitExampleIds:narrow(1,nExample-rightIdx+1,rightIdx)
-
-   assert(leftExampleIds:size(1) + rightExampleIds:size(1) <= self.exampleIds:size(1), "Left and right branches contain more data than the parent!")
-   return leftExampleIds, rightExampleIds
-end
-
--- calls _branch and encapsulates the left and right exampleIds into a TreeStates
-function TreeState:branch(splitInfo, dataset)
-   local leftExampleIds, rightExampleIds = self:_branch(splitInfo, dataset)
-   return self.new(leftExampleIds), self.new(rightExampleIds)
-end
-
-function TreeState:size()
-   return self.exampleIds:size(1)
-end
-
-function TreeState:contains(exampleId)
-   local found = false
-   self.exampleIds:apply(function(x)
-      if x == exampleId then
-         found = true
-      end
-   end)
-   return found
-end
-
diff --git a/contrib/lua-torch/decisiontree/WorkPool.lua b/contrib/lua-torch/decisiontree/WorkPool.lua
deleted file mode 100644
index 8f473727e..000000000
--- a/contrib/lua-torch/decisiontree/WorkPool.lua
+++ /dev/null
@@ -1,156 +0,0 @@
-local dt = require "decisiontree._env"
-
--- Utility to simplify construction of a pool of daemon threads with which to execute tasks in parallel.
-local WorkPool = torch.class("dt.WorkPool", dt)
-
-function WorkPool:__init(nThread)
-   self.nThread = nThread or 16
-   assert(torch.type(self.nThread) == 'number')
-   assert(self.nThread > 0)
-
-   self:initialize()
-end
-
-function WorkPool:initialize()
-   local ipc = require 'libipc'
-   self.queuename = os.tmpname()
-   self.queue = ipc.workqueue(self.queuename)
-   self.queues = {}
-   for i=1,self.nThread do
-      self.queues[i] = ipc.workqueue(self.queuename.."/"..i)
-   end
-
-   -- spawn thread workers
-   ipc.map(self.nThread, function(queuename, nThread, myId)
-      assert(nThread)
-      assert(myId)
-      local ipc = require 'libipc'
-
-      -- Open the queue by name (the main thread already created it)
-      local mainqueue = ipc.workqueue(queuename)
-      local workqueue = ipc.workqueue(queuename.."/"..myId)
-
-      local taskname, args
-
-      local store = {}
-      local queue = mainqueue
-
-      repeat
-         local msg = queue:read()
-         assert(torch.type(msg) == 'table')
-         taskname, task = unpack(msg)
-         if taskname == nil then
-            break
-         elseif torch.type(taskname) ~= 'string' then
-            error("Expecting taskname string. Got "..torch.type(taskname))
-         elseif taskname == 'storeKeyValue' then
-            assert(torch.type(task) == 'table')
-            assert(queue == workqueue)
-            store[task.key] = task.value
-            queue:write({taskname})
-         elseif taskname == 'storeKeysValues' then
-            assert(torch.type(task) == 'table')
-            assert(queue == workqueue)
-            for key,value in pairs(task) do
-               store[key] = value
-            end
-            queue:write({taskname})
-         elseif taskname == 'require' then
-            assert(torch.type(task) == 'table')
-            assert(torch.type(task.libname) == 'string')
-            assert(torch.type(task.varname) == 'string')
-            _G[task.varname] = require(task.libname)
-            assert(queue == workqueue)
-            queue:write({taskname})
-         elseif taskname == 'storeReset' then
-            store = {}
-            mainqueue:write({taskname})
-         elseif taskname == 'echo' then
-            mainqueue:write({taskname, task})
-         elseif taskname == 'readWorkerQueue' then
-            queue = workqueue
-         elseif taskname == 'readMainQueue' then
-            queue = mainqueue
-         elseif taskname == 'execute' then
-            if torch.type(task) == 'table' then
-               assert(task.func and task.args)
-               queue:write({taskname, task.func(store, task.args, myId)})
-            else
-               assert(torch.type(task) == 'function')
-               queue:write({taskname, task(store, myId)})
-            end
-         else
-            error("Unknown taskname: "..taskname)
-         end
-      until taskname == nil
-   end, self.queuename, self.nThread)
-
-end
-
--- Terminates all daemon threads.
-function WorkPool:terminate()
-   for i=1,self.nThread do
-      self.queue:write({})
-   end
-end
-
--- this function is used to update the store of data in each worker thread
-function WorkPool:_update(taskname, task, upval)
-   assert(torch.type(taskname) == 'string')
-   local _ = require 'moses'
-   assert(_.contains({'storeKeyValue','storeKeysValues','require','execute'}, taskname))
-   assert(torch.type(task) == 'table' or torch.type(task) == 'function')
-
-   -- tell the workers to read their individual queue
-   for i=1,self.nThread do
-      self.queue:write({'readWorkerQueue'})
-   end
-
-   -- write to individual worker queues
-   for i=1,self.nThread do
-      if upval then
-         self.queues[i]:writeup({taskname, task})
-      else
-         self.queues[i]:write({taskname, task})
-      end
-   end
-
-   -- TODO use ipc.mutex:barrier(nThread+1)
-   -- barrier: make sure that every worker has completed task by reading their queue
-   for i=1,self.nThread do
-      assert(self.queues[i]:read()[1] == taskname)
-   end
-
-   -- finally, tell them to read the main queue
-   for i=1,self.nThread do
-      self.queues[i]:write({'readMainQueue'})
-   end
-end
-
-function WorkPool:update(taskname, task)
-   return self:_update(taskname, task, false)
-end
-
-function WorkPool:updateup(taskname, task)
-   return self:_update(taskname, task, true)
-end
-
-function WorkPool:write(taskname, task)
-   assert(torch.type(taskname) == 'string')
-   assert(taskname ~= 'storeKeyValue' or taskname ~= 'storeKeysValues')
-   self.queue:write({taskname, task})
-end
-
-function WorkPool:writeup(taskname, task)
-   assert(torch.type(taskname) == 'string')
-   assert(taskname ~= 'storeKeyValue' or taskname ~= 'storeKeysValues')
-   self.queue:writeup({taskname, task})
-end
-
-function WorkPool:read()
-   local res = self.queue:read()
-   assert(torch.type(res) == 'table')
-   assert(torch.type(res[1] == 'string'))
-   return unpack(res)
-end
-
diff --git a/contrib/lua-torch/decisiontree/_env.lua b/contrib/lua-torch/decisiontree/_env.lua
deleted file mode 100644
index a92770152..000000000
--- a/contrib/lua-torch/decisiontree/_env.lua
+++ /dev/null
@@ -1,5 +0,0 @@
-
--- https://github.com/torch/torch7/issues/525
-
-local dl = {}
-return dl
-\ No newline at end of file
diff --git a/contrib/lua-torch/decisiontree/benchmark.lua b/contrib/lua-torch/decisiontree/benchmark.lua
deleted file mode 100644
index 2b6a03dc6..000000000
--- a/contrib/lua-torch/decisiontree/benchmark.lua
+++ /dev/null
@@ -1,171 +0,0 @@
-local dt = require "decisiontree._env"
-
-local bm = {}
-function bm.CartTrainer(opt)
-   local timer = torch.Timer()
-   local trainSet, validSet = dt.getSparseDummyData(opt)
-   print(string.format("CartTrainer: sparse dataset create: %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
-
-   local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes)
-   local treeState = dt.GiniState(trainSet:getExampleIds())
-   timer:reset()
-   local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
-   print(string.format("CartTrainer: train single-thread : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
-
-   timer:reset()
-   cartTrainer:featureParallel(opt.nThread)
-   print(string.format("CartTrainer: setup feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
-   timer:reset()
-   local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
-   print(string.format("CartTrainer: train feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
-end
-
-function bm.GradientBoostState(opt)
-   local trainSet, validSet = dt.getSparseDummyData(opt)
-
-   trainSet:initScore()
-
-   local treeState = dt.GradientBoostState(trainSet:getExampleIds(), nn.LogitBoostCriterion(false))
-
-   local timer = torch.Timer() -- first step also calls SparseTensor:buildIndex()
-   treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3)
-   print(string.format("GradientBoostState: findBestSplit (first) : %f sec", timer:time().real))
-
-   timer:reset()
-   treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3)
-   print(string.format("GradientBoostState: findBestSplit (second) : %f sec", timer:time().real))
-
-end
-
-local function file_exists(name)
-   local f=io.open(name,"r")
-   if f~=nil then io.close(f) return true else return false end
-end
-
-function bm.GradientBoostTrainer(opt)
-   local trainSet, validSet
-   if file_exists("/tmp/train.bin") and file_exists("/tmp/valid.bin") then
-      trainSet = torch.load("/tmp/train.bin")
-      validSet = torch.load("/tmp/valid.bin")
-   else
-      if opt.sparse then
-         trainSet, validSet = dt.getSparseDummyData(opt)
-      else
-         trainSet, validSet = dt.getDenseDummyData(opt)
-      end
-      torch.save("/tmp/train.bin", trainSet)
-      torch.save("/tmp/valid.bin", validSet)
-   end
-
-   local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes)
-   opt.lossFunction = nn.LogitBoostCriterion(false)
-   opt.treeTrainer = cartTrainer
-   local forestTrainer = dt.GradientBoostTrainer(opt)
-
-   local timer = torch.Timer()
-   local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet)
-   local time = timer:time().real
-   print(string.format("GradientBoostTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
-
-   cartTrainer:featureParallel(opt.nThread)
-   timer:reset()
-   local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet)
-   local time = timer:time().real
-   print(string.format("GradientBoostTrainer: train feature-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
-end
-
-function bm.RandomForestTrainer(opt)
-   local trainSet, validSet = dt.getSparseDummyData(opt)
-
-   local forestTrainer = dt.RandomForestTrainer(opt)
-   local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
-
-   local timer = torch.Timer()
-   local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
-   local time = timer:time().real
-   print(string.format("RandomForestTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
-
-   timer:reset()
-   forestTrainer:treeParallel(opt.nThread)
-   print(string.format("RandomForestTrainer: setup tree-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
-
-   timer:reset()
-   local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
-   local time = timer:time().real
-   print(string.format("RandomForestTrainer: train tree-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
-end
-
-function bm.DFD(opt)
-   local _ = require 'moses'
-   local opt = _.clone(opt)
-   opt.nExample = 200
-   local trainSet, validSet = dt.getDenseDummyData(opt)
-
-   local forestTrainer = dt.RandomForestTrainer(opt)
-   forestTrainer:treeParallel(opt.nThread)
-   local timer = torch.Timer()
-   local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
-   local time = timer:time().real
-   print(string.format("DFD: train random forest in parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
-
-
-   -- benchmark nn.DFD
-   local input = trainSet.input:sub(1,opt.batchsize)
-   local dfd = nn.DFD(decisionForest)
-   dfd:forward(input)
-   timer:reset()
-   for i=1,opt.nloop do
-      dfd:forward(input)
-   end
-   print(string.format("DFD: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real))
-end
-
-function bm.Sparse2Dense(opt)
-   local _ = require 'moses'
-   local opt = _.clone(opt)
-   opt.nExample = opt.batchsize
-   local trainSet = dt.getSparseDummyData(opt)
-
-   local input = {{},{}}
-   for i=1,opt.batchsize do
-      input[1][i] = trainSet.input[i].keys
-      input[2][i] = trainSet.input[i].values
-   end
-   assert(#input[1] == opt.batchsize)
-
-   -- benchmark nn.Sparse2Dense
-   local s2d = nn.Sparse2Dense(torch.LongTensor():range(1,opt.nFeature))
-   s2d:forward(input)
-   local timer = torch.Timer()
-   for i=1,opt.nloop do
-      s2d:forward(input)
-   end
-   print(string.format("Sparse2Dense: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real))
-end
-
-function dt.benchmark(benchmarks, opt2)
-   local opt = {
-      nExample=10000, nCluster=2, nFeature=1000, overlap=0, nValid=100,        -- getSparseDummyData
-      nTree=20, featureBaggingSize=-1, sparse=true,                           -- GradientBoostTrainer and RandomForestTrainer
-      nThread=2, shrinkage=0.1, downsampleRatio=0.1, evalFreq=5, earlyStop=0, -- GradientBoostTrainer
-      activeRatio=0.5,                                                      -- RandomForestTrainer
-      batchsize=32, nloop=10
-   }
-
-   local _ = require 'moses'
-   benchmarks = benchmarks or _.keys(bm)
-   assert(torch.type(benchmarks) == 'table')
-   for i,benchmark in ipairs(benchmarks) do
-      local opt1 = _.clone(opt)
-      for key, value in pairs(opt2 or {}) do
-         opt1[key] = value
-      end
-      opt1.nActive = opt1.nActive or torch.round(opt1.nFeature/10)
-      opt1.maxLeafNodes = opt1.maxLeafNodes or (opt1.nExample/10)
-      opt1.minLeafSize = opt1.minLeafSize or (opt1.nExample/100)
-
-      assert(torch.type(benchmark) == 'string', benchmark)
-      assert(bm[benchmark], benchmark)
-      bm[benchmark](opt1)
-   end
-end
diff --git a/contrib/lua-torch/decisiontree/doc/benchmark.md b/contrib/lua-torch/decisiontree/doc/benchmark.md
deleted file mode 100644
index cb8f905d6..000000000
--- a/contrib/lua-torch/decisiontree/doc/benchmark.md
+++ /dev/null
@@ -1,291 +0,0 @@
-# Benchmarks
-
-This file outlines the roadmap (and commensurate benchmarks) of optimizations and refactorings over time.
-
-## Baseline
-
-The baseline implementation is very slow.
-We converted the Twitter decision tree library (used internally) from Java to Lua.
-The objective was to replicate the GBDT and Random Forest implementations as is (more or less).
-The Java library is very good and reasonably fast. The same code in Lua is slow.
-The point of this Lua baseline was not to obtain the same computational performance as the Java library.
-Instead, we wanted the training and inferences algorithms of the Lua lib to match thoses of the Java lib.
-As such, the training/validation error of the baseline Lua lib should match that of the Java lib.
-The unit tests seem to validate this claim as both training/validation set performance is unit tested.
-We also used the conversion exercise as a way to learn about decision tree implementation (our background is deep learning).
-That being said, the baseline performance is terrible:
-
-```
-th -e "dt = require 'decisiontree'; dt.benchmark()"
-CartTrainer: sparse dataset create: 2963.192386 samples/sec; 0.337479 sec
-CartTrainer: train single-thread : 14.165438 samples/sec; 70.594361 sec
-CartTrainer: setup feature-parallel : 5.129034 samples/sec; 194.968478 sec
-CartTrainer: train feature-parallel : 9.736592 samples/sec; 102.705344 sec
-```
-
-The original Java lib had approximately 43 classes.
-The baseline has about 24.
-This reduction is due to obvious merging of classes. But also to conversions of classes to functions.
-The next patches continue this process of reducing the number of classes.
-
-## Patch 1 (complete):
-
-This patch further reduces the number of classes, but adds the DataSet class.
-The code is much simple to read. Examples are batched.
-
- * [x] examples are batched in dt.DataSet: {input, target, score}
- * [x] deprecate dt.LabeledExample
- * [x] list of examples are replaced with torch.LongTensors of exampleIds
- * [x] merge TreeBrancher into TreeState
- * [x] merge BestSplitFinder and SplitStateUpdater into TreeState
- * [x] TreeState subclasses: GradientBoostState and GiniState
-
-```
-th -e "dt = require 'decisiontree'; dt.benchmark()"
-CartTrainer: sparse dataset create: 3597.392294 samples/sec; 0.277984 sec
-CartTrainer: train single-thread : 35.763255 samples/sec; 27.961663 sec
-CartTrainer: setup feature-parallel : 36759.250495 samples/sec; 0.027220 sec
-CartTrainer: train feature-parallel : 72.523658 samples/sec; 13.788606 sec
-```
-
- The setup time for feature-parallelization is most improved.
- The run-time for feature-parallel also about half that of single-thread.
- Since its using 2 threads, that means the parallelization is working quite well.
-
- We also added benchmarks for the `RandomForestTrainer` and `GradientBoostTrainer`:
-
-```
-GradientBoostTrainer: train single-thread : 599.895105 samples/sec; 0.083348 sec/tree, 1.666958 sec
-GradientBoostTrainer: train feature-parallel : 974.235273 samples/sec; 0.051322 sec/tree, 1.026446 sec
-RandomForestTrainer: train single-thread : 134.781044 samples/sec; 0.370972 sec/tree, 7.419441 sec
-RandomForestTrainer: setup tree-parallel : 73341.097064 samples/sec; 0.013649 sec
-RandomForestTrainer: train tree-parallel : 262.975891 samples/sec; 0.190131 sec/tree, 3.802630 sec
-```
-
-Looks good.
-
-## Patch 2 (complete):
-
- * [x] dt.LossFunction -> nn.Criterion (LogitBoost is done, missing MSE)
- * [x] use SparseTensor:buildIndex() to accelerate TreeState:findBestSplit()
- * [x] benchmarks use 10000 instead of 1000 examples
-
-The benchmarks indicate good improvements. Most improvements were made possible by the use of `buildIndex`:
-
-```
-th -e "dt = require 'decisiontree'; dt.benchmark()"
-GradientBoostState: findBestSplit (first) : 11.415645 sec
-GradientBoostState: findBestSplit (second) : 11.246336 sec
-CartTrainer: sparse dataset create: 3284.803629 samples/sec; 3.044327 sec
-CartTrainer: train single-thread : 239.544758 samples/sec; 41.745858 sec
-CartTrainer: setup feature-parallel : 10996.443063 samples/sec; 0.909390 sec
-CartTrainer: train feature-parallel : 473.888592 samples/sec; 21.102011 sec
-RandomForestTrainer: train single-thread : 892.985186 samples/sec; 0.559920 sec/tree, 11.198394 sec
-RandomForestTrainer: setup tree-parallel : 176806.252266 samples/sec; 0.056569 sec
-RandomForestTrainer: train tree-parallel : 1377.849291 samples/sec; 0.362884 sec/tree, 7.257688 sec
-GradientBoostTrainer: train single-thread : 2685.485128 samples/sec; 0.186186 sec/tree, 3.723722 sec
-GradientBoostTrainer: train feature-parallel : 3712.313215 samples/sec; 0.134687 sec/tree, 2.693738 sec
-```
-
-The main bottleneck now is in serializing the SparseTensor hash maps. We temporarly overcame this bottleneck by
-deleting indexes when calling `CartTrainer:featureParallel()` and `RandomForestTrainer:treeParallel()`.
-In this way, the indexes are recreated for each thread. Ideally, we would use a C hash map such that a pointer
-could be serialized instead. But `tds.Hash` does not serialize well. For now instead, we use lua tables.
-
-This is the benchmark for `GradientBoostTrainer` on a large dataset of dense inputs:
-
-```
-th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})"
-GradientBoostTrainer: train single-thread : 152.463989 samples/sec; 131.178517 sec/tree, 655.892584 sec
-GradientBoostTrainer: train feature-parallel : 224.288488 samples/sec; 89.170872 sec/tree, 445.854358 sec
-[tw-mbp-nleonard decisiontree]$ th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8,nThread=4})"
-GradientBoostTrainer: train single-thread : 163.836896 samples/sec; 122.072625 sec/tree, 610.363126 sec
-GradientBoostTrainer: train feature-parallel : 407.981442 samples/sec; 49.021838 sec/tree, 245.109188 sec
-```
-
-## Patch 3 :
-
-Optimize GBDT for large datasets consisting of dense inputs. The benchmarks:
-
-```
-th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})"
-GradientBoostTrainer: train single-thread : 547.553407 samples/sec; 36.526117 sec/tree, 182.630587 sec
-GradientBoostTrainer: train feature-parallel : 792.964678 samples/sec; 25.221804 sec/tree, 126.109022 sec
-[tw-mbp-nleonard decisiontree]$ th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8,nThread=4})"
-GradientBoostTrainer: train single-thread : 555.793759 samples/sec; 35.984571 sec/tree, 179.922855 sec
-GradientBoostTrainer: train feature-parallel : 1289.977846 samples/sec; 15.504142 sec/tree, 77.520711 sec
-```
-
-For 1, 2 and 4 threads, the speedups of patch 3 over patch 2 are respectively: 3.39, 3.53, and 3.18.
-For this patch, the multi-threading speedup of 2 and 4 threads over a single thread are respectively: 1.42 and 2.33.
-Improvements over the previous patch were obtained by optimizing two aspects:
-
-  1. Optimizing `TreeState.findBestFeatureSplit` for dense datasets (for example: `if dense, then ...`);
-  2. Removing `assert` clauses in `GradientBoostState.update`. The `update` method is called for every (example, feature), making it a major bottleneck.
-
-Converting the `update` to C could lead to further optimizations.
-
-This patch also improves the benchmark on sparse datasets:
-```
-$ th -e "dt = require 'decisiontree'; dt.benchmark()"
-RandomForestTrainer: train single-thread : 1121.311196 samples/sec; 0.445907 sec/tree, 8.918131 sec
-RandomForestTrainer: setup tree-parallel : 168773.323354 samples/sec; 0.059256 sec
-RandomForestTrainer: train tree-parallel : 1701.280938 samples/sec; 0.293896 sec/tree, 5.877924 sec
-GradientBoostState: findBestSplit (first) : 8.250646 sec
-GradientBoostState: findBestSplit (second) : 7.952077 sec
-GradientBoostTrainer: train single-thread : 3355.248596 samples/sec; 0.149020 sec/tree, 2.980405 sec
-GradientBoostTrainer: train feature-parallel : 4399.133369 samples/sec; 0.113659 sec/tree, 2.273175 sec
-CartTrainer: sparse dataset create: 3428.105601 samples/sec; 2.917069 sec
-CartTrainer: train single-thread : 282.172416 samples/sec; 35.439331 sec
-CartTrainer: setup feature-parallel : 9455.440801 samples/sec; 1.057598 sec
-CartTrainer: train feature-parallel : 594.054049 samples/sec; 16.833491 sec
-DFD: train random forest in parallel : 346.831378 samples/sec; 0.288325 sec/tree, 5.766491 sec
-DFD: updateOutput : 831.105546 samples/sec; 0.038509 sec
-```
-
-## Patch 4 :
-
-This patch improves `nn.DFD` from
-
-```
-th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})"
-DFD: train random forest in parallel : 10.527251 samples/sec; 0.037997 sec/tree, 18.998313 sec
-DFD: updateOutput : 32.442950 samples/sec; 9.863472 sec
-```
-
-to
-
-```
-th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})"
-DFD: train random forest in parallel : 10.839547 samples/sec; 0.036902 sec/tree, 18.450956 sec
-DFD: updateOutput : 359.158353 samples/sec; 0.890975 sec
-Sparse2Dense: updateOutput : 15395.648952 samples/sec; 0.020791 sec
-```
-
-That is a 10x speedup for `nn.DFD`.
-
-The patch also adds a benchmark for `nn.Sparse2Dense`:
-
-```
-th -e "dt = require 'decisiontree'; dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})"
-Sparse2Dense: updateOutput : 17158.126406 samples/sec; 0.018653 sec
-```
-
-Indeed, `nn.Sparse2Dense` is not the bottleneck; `nn.DFD` is.
-
-## Patch 5 :
-
-This patch improves `nn.DFD` inference from
-
-```
-for i in `seq 3`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=100})"; done
-DFD: train random forest in parallel : 8.452295 samples/sec; 0.047324 sec/tree, 23.662212 sec
-DFD: updateOutput : 176.617872 samples/sec; 9.059109 sec
-DFD: train random forest in parallel : 8.350019 samples/sec; 0.047904 sec/tree, 23.952042 sec
-DFD: updateOutput : 183.508204 samples/sec; 8.718962 sec
-DFD: train random forest in parallel : 8.525779 samples/sec; 0.046917 sec/tree, 23.458266 sec
-DFD: updateOutput : 178.877077 samples/sec; 8.944692 sec
-```
-
-to
-
-```
-for i in `seq 3`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=100})"; done
-DFD: train random forest in parallel : 8.434502 samples/sec; 0.047424 sec/tree, 23.712129 sec
-DFD: updateOutput : 6479.597179 samples/sec; 0.246933 sec
-DFD: train random forest in parallel : 8.334543 samples/sec; 0.047993 sec/tree, 23.996518 sec
-DFD: updateOutput : 6663.641184 samples/sec; 0.240114 sec
-DFD: train random forest in parallel : 8.353265 samples/sec; 0.047885 sec/tree, 23.942735 sec
-DFD: updateOutput : 6882.607456 samples/sec; 0.232475 sec
-```
-
-That is a 37x speedup for `nn.DFD`.
-
-## Patch 6:
-
-This patch improves `nn.DFD` from the previous result to
-
-```
-for i in `seq 5`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=10000})"; done
-DFD: train random forest in parallel : 8.353504 samples/sec; 0.047884 sec/tree, 23.942050 sec
-DFD: updateOutput : 91967.342339 samples/sec; 1.739753 sec
-DFD: train random forest in parallel : 8.528141 samples/sec; 0.046904 sec/tree, 23.451770 sec
-DFD: updateOutput : 91405.321702 samples/sec; 1.750451 sec
-DFD: train random forest in parallel : 8.184562 samples/sec; 0.048872 sec/tree, 24.436250 sec
-DFD: updateOutput : 91623.388867 samples/sec; 1.746284 sec
-DFD: train random forest in parallel : 8.779561 samples/sec; 0.045560 sec/tree, 22.780182 sec
-DFD: updateOutput : 93914.242852 samples/sec; 1.703686 sec
-DFD: train random forest in parallel : 8.636201 samples/sec; 0.046317 sec/tree, 23.158330 sec
-DFD: updateOutput : 94092.241963 samples/sec; 1.700465 sec
-```
-
-That is another 13.8x speedup.
-
-## Patch 7:
-
-This patch improves `nn.Sparse2Dense` computation from
-
-```
-for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done
-Sparse2Dense: updateOutput : 1103.570777 samples/sec; 28.996786 sec
-Sparse2Dense: updateOutput : 1092.064331 samples/sec; 29.302309 sec
-Sparse2Dense: updateOutput : 1036.963572 samples/sec; 30.859334 sec
-```
-
-to
-
-```
-for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done
-Sparse2Dense: updateOutput : 62995.834470 samples/sec; 0.507978 sec
-Sparse2Dense: updateOutput : 62471.568253 samples/sec; 0.512242 sec
-Sparse2Dense: updateOutput : 62965.099331 samples/sec; 0.508226 sec
-```
-
-This represents a speedup of about 57x.
-
-## Patch 8:
-
-This patch improves `nn.Sparse2Dense` from the previous result to
-
-```for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done
-Sparse2Dense: updateOutput : 124268.079914 samples/sec; 0.257515 sec
-Sparse2Dense: updateOutput : 114750.039542 samples/sec; 0.278873 sec
-Sparse2Dense: updateOutput : 122863.314766 samples/sec; 0.260458 sec
-```
-
-which corresponds to another 1.95x speedup.
-
-## Patch 9:
-
-This patches moves the core of training GBDTs, which used to be a big bottleneck, to C. It also
-performs small optimizations across the board (faster scoring, faster branching, ...) that provide a
-little more performance.
-
-The original commit had this performance:
-
-```
-th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})"
-GradientBoostTrainer: train single-thread : 500.414666 samples/sec; 39.966854 sec/tree, 199.834271 sec
-GradientBoostTrainer: train feature-parallel : 1227.228044 samples/sec; 16.296890 sec/tree, 81.484448 sec (4 threads)
-GradientBoostTrainer: train feature-parallel : 1385.926280 samples/sec; 14.430782 sec/tree, 72.153910 sec (8 threads)
-```
-
-and the new version has
-
-```
-GradientBoostTrainer: train single-thread : 15285.644631 samples/sec; 1.308417 sec/tree, 6.542086 sec
-GradientBoostTrainer: train feature-parallel : 43170.435932 samples/sec; 0.463280 sec/tree, 2.316400 sec (4 threads)
-GradientBoostTrainer: train feature-parallel : 50062.681239 samples/sec; 0.399499 sec/tree, 1.997496 sec (8 threads)
-```
-
-That represents a speedup of about 30.5x over the baseline for 1 thread and 36.1x for 8 threads.
-Note that the performance doesn't increase much as we increase the number of threads since we use
-feature parallelism and the number of features evaluated is small (29 in this case) due to bagging.
-If we disable bagging, then we have the following result with 8 threads and the new code:
-
-```
-GradientBoostTrainer: train single-thread : 590.823965 samples/sec; 33.851030 sec/tree, 169.255152 sec
-GradientBoostTrainer: train feature-parallel : 3232.188576 samples/sec; 6.187758 sec/tree, 30.938789 sec
-```
-
-So processing 836 features now is much faster than processing 29 before.
diff --git a/contrib/lua-torch/decisiontree/error.h b/contrib/lua-torch/decisiontree/error.h
deleted file mode 100644
index 18df3c939..000000000
--- a/contrib/lua-torch/decisiontree/error.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _ERROR_H_
-#define _ERROR_H_
-
-#include "luaT.h"
-#include <string.h>
-
-static inline int _lua_error(lua_State *L, int ret, const char* file, int line) {
-   int pos_ret = ret >= 0 ? ret : -ret;
-   return luaL_error(L, "ERROR: (%s, %d): (%d, %s)\n", file, line, pos_ret, strerror(pos_ret));
-}
-
-static inline int _lua_error_str(lua_State *L, const char *str, const char* file, int line) {
-   return luaL_error(L, "ERROR: (%s, %d): (%s)\n", file, line, str);
-}
-
-static inline int _lua_error_str_str(lua_State *L, const char *str, const char* file, int line, const char *extra) {
-   return luaL_error(L, "ERROR: (%s, %d): (%s: %s)\n", file, line, str, extra);
-}
-
-#define LUA_HANDLE_ERROR(L, ret) _lua_error(L, ret, __FILE__, __LINE__)
-#define LUA_HANDLE_ERROR_STR(L, str) _lua_error_str(L, str, __FILE__, __LINE__)
-#define LUA_HANDLE_ERROR_STR_STR(L, str, extra) _lua_error_str_str(L, str, __FILE__, __LINE__, extra)
-
-#endif
diff --git a/contrib/lua-torch/decisiontree/generic/CartTree.c b/contrib/lua-torch/decisiontree/generic/CartTree.c
deleted file mode 100644
index eb29fcf02..000000000
--- a/contrib/lua-torch/decisiontree/generic/CartTree.c
+++ /dev/null
@@ -1,88 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/CartTree.c"
-#else
-
-static int nn_(tree_fast_score)(lua_State *L) {
-  THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
-  THTensor *score = luaT_checkudata(L, 3, torch_Tensor);
-  long n_samples = THTensor_(size)(input, 0);
-  long n_features = THTensor_(size)(input, 1);
-  THTensor_(resize1d)(score, n_samples);
-  real *input_data = THTensor_(data)(input);
-  real *score_data = THTensor_(data)(score);
-
-  lua_pushstring(L, "leftChild");
-  const int left_child_string = 4;
-  lua_pushstring(L, "rightChild");
-  const int right_child_string = 5;
-  lua_pushstring(L, "score");
-  const int score_string = 6;
-  lua_pushstring(L, "splitFeatureId");
-  const int id_string = 7;
-  lua_pushstring(L, "splitFeatureValue");
-  const int value_string = 8;
-
-  const int original_top = lua_gettop(L);
-  for (long i = 0; i < n_samples; i++) {
-    int node = 2;
-    while (1) {
-      int current_top = lua_gettop(L);
-      lua_pushvalue(L, left_child_string);
-      lua_rawget(L, node);
-      lua_pushvalue(L, right_child_string);
-      lua_rawget(L, node);
-      if (lua_isnil(L, -2) && lua_isnil(L, -1)) {
-        lua_pushvalue(L, score_string);
-        lua_rawget(L, node);
-        score_data[i] = lua_tonumber(L, -1);
-        break;
-      }
-      if (lua_isnil(L, -2)) {
-        // go to right
-        node = current_top + 2;
-        continue;
-      }
-      if (lua_isnil(L, -1)) {
-        // go to left
-        node = current_top + 1;
-        continue;
-      }
-      lua_pushvalue(L, id_string);
-      lua_rawget(L, node);
-      lua_pushvalue(L, value_string);
-      lua_rawget(L, node);
-      long feature_id = lua_tointeger(L, -2);
-      real feature_value = lua_tonumber(L, -1);
-
-      real current_value = input_data[i * n_features + (feature_id-1)];
-      if (current_value < feature_value) {
-        // go to left
-        node = current_top + 1;
-      }
-      else {
-        // go to right
-        node = current_top + 2;
-      }
-    }
-    lua_pop(L, lua_gettop(L) - original_top);
-  }
-
-  lua_pop(L, 5);
-
-  lua_pushvalue(L, 3);
-  return 1;
-}
-
-static const struct luaL_Reg nn_(CT__) [] = {
-  {"CartTreeFastScore", nn_(tree_fast_score)},
-  {NULL, NULL}
-};
-
-static void nn_(CT_init)(lua_State *L)
-{
-  luaT_pushmetatable(L, torch_Tensor);
-  luaT_registeratname(L, nn_(CT__), "nn");
-  lua_pop(L,1);
-}
-
-#endif
diff --git a/contrib/lua-torch/decisiontree/generic/DFD.c b/contrib/lua-torch/decisiontree/generic/DFD.c
deleted file mode 100644
index 599c4d794..000000000
--- a/contrib/lua-torch/decisiontree/generic/DFD.c
+++ /dev/null
@@ -1,157 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/DFD.c"
-#else
-
-static int nn_(DFD_computeOutput)(lua_State *L) {
-  THLongTensor *outputkeys =       luaT_checkudata(L, 1, "torch.LongTensor");
-  THTensor *outputvalues =         luaT_checkudata(L, 2, torch_Tensor);
-  THLongTensor *root_ids =         luaT_checkudata(L, 3, "torch.LongTensor");
-  THLongTensor *left_child =       luaT_checkudata(L, 4, "torch.LongTensor");
-  THLongTensor *right_child =      luaT_checkudata(L, 5, "torch.LongTensor");
-  THLongTensor *split_feature_id = luaT_checkudata(L, 6, "torch.LongTensor");
-  THTensor *split_feature_value =  luaT_checkudata(L, 7, torch_Tensor);
-  THTensor *input =                luaT_checkudata(L, 8, torch_Tensor);
-  char only_last_node =            lua_toboolean(L, 9);
-
-  // gets some important sizes from the input
-  long batch_size = THTensor_(size)(input, 0);
-  long input_size = THTensor_(size)(input, 1);
-  long roots_size = THLongTensor_size(root_ids, 0);
-  long depth = THLongTensor_size(outputkeys, 1);
-
-  // keeps track of the number of nodes traversed in the trees by each sample.
-  // each traversed node maps to an output feature having a value of 1
-  long outputsize[batch_size];
-  for (long i = 0; i < batch_size; i++)
-    outputsize[i] = 0;
-
-  // gets direct pointers to the memory of each tensor for efficiency
-  long *root_ids_data = THLongTensor_data(root_ids);
-  long *left_child_data = THLongTensor_data(left_child);
-  long *right_child_data = THLongTensor_data(right_child);
-  real *split_feature_value_data = THTensor_(data)(split_feature_value);
-  long *split_feature_id_data = THLongTensor_data(split_feature_id);
-  long *outputkeys_data = THLongTensor_data(outputkeys);
-  real *input_data = THTensor_(data)(input);
-
-  // for each sample in the batch
-  for (long sample_index = 0; sample_index < batch_size; sample_index++) {
-    // gets pointers to the direct memory associated with each sample for efficiency
-    const long outputkeys_offset = sample_index * depth;
-    const long input_offset = sample_index * input_size;
-    long *local_outputkeys_data = &outputkeys_data[outputkeys_offset];
-    real *local_input_data = &input_data[input_offset];
-
-    // for each tree in the forest
-    for (long i = 0; i < roots_size; i++) {
-      int root = 1;
-      long node_id = root_ids_data[i];
-
-      // traverses the whole tree keeping track of which nodes were seen
-      while (1) {
-        if (root) {
-          // root nodes aren't added to output because they are always traversed
-          root = 0;
-        }
-        else if (!only_last_node) {
-          // updates the outputsize for all samples traversing this node; and
-          // set the traversed node as a feature in output for exampleIds
-          long output_index = outputsize[sample_index];
-          // updates the outputsize for all samples traversing this node
-          outputsize[sample_index]++;
-          // sets the traversed node as a feature in output for exampleIds
-          local_outputkeys_data[output_index] = node_id;
-        }
-
-        // gets the left and right nodes. values of -1 represent missing node
-        long left_id = left_child_data[node_id-1];
-        long right_id = right_child_data[node_id-1];
-
-        if (left_id <= 0 && right_id <= 0) {
-          if (only_last_node) {
-            long output_index = outputsize[sample_index];
-            outputsize[sample_index]++;
-            local_outputkeys_data[output_index] = node_id;
-          }
-          // if no children, stops
-          break;
-        }
-        else if (left_id <= 0) {
-          // if no left child, traverses right node
-          node_id = right_id;
-        }
-        else if (right_id <= 0) {
-          // if no right child, traverses left node
-          node_id = left_id;
-        }
-        else {
-          // if both left and right children, finds the direction for this sample
-          // first get the reference from the node
-          real split_value = split_feature_value_data[node_id-1];
-          long split_id = split_feature_id_data[node_id-1]-1;
-
-          // then gets the value of the sample
-          real node_value = local_input_data[split_id];
-          // and branchs
-          if (node_value < split_value)
-            node_id = left_id;
-          else
-            node_id = right_id;
-        }
-      }
-    }
-  }
-
-  // now that we know which nodes were traverse for each sample, we can create the sparse output
-  // with 1 entry pair for each sample
-  THTensor *input_feature = THTensor_(new)();
-  THLongTensor *indices = THLongTensor_new();
-
-  // pushes the return table with 2 children tables
-  lua_newtable(L);
-  lua_pushinteger(L, 1);
-  lua_newtable(L);
-  lua_pushinteger(L, 2);
-  lua_newtable(L);
-
-  // for each sample...
-  for (long i = 0; i < batch_size; i++) {
-    long j = outputsize[i];
-    // selects the tensor lines from the dense output
-    THLongTensor_select(indices, outputkeys, 0, i);
-    THTensor_(select)(input_feature, outputvalues, 0, i);
-
-    // narrows the keys to actual number of nodes traversed and saves to the output
-    lua_pushinteger(L, i+1);
-    luaT_pushudata(L, THLongTensor_newNarrow(indices, 0, 0, j), "torch.LongTensor");
-    lua_settable(L, -5);
-
-    // and narrows the values
-    lua_pushinteger(L, i+1);
-    luaT_pushudata(L, THTensor_(newNarrow)(input_feature, 0, 0, j), torch_Tensor);
-    lua_settable(L, -3);
-  }
-
-  // pushes the two parts of the output into the output table
-  lua_settable(L, -5);
-  lua_settable(L, -3);
-
-  THLongTensor_free(indices);
-  THTensor_(free)(input_feature);
-
-  return 1;
-}
-
-static const struct luaL_Reg nn_(DFD__) [] = {
-  {"DFD_computeOutput", nn_(DFD_computeOutput)},
-  {NULL, NULL}
-};
-
-static void nn_(DFD_init)(lua_State *L)
-{
-  luaT_pushmetatable(L, torch_Tensor);
-  luaT_registeratname(L, nn_(DFD__), "nn");
-  lua_pop(L,1);
-}
-
-#endif
diff --git a/contrib/lua-torch/decisiontree/generic/GBDT.c b/contrib/lua-torch/decisiontree/generic/GBDT.c
deleted file mode 100644
index 31f5b025d..000000000
--- a/contrib/lua-torch/decisiontree/generic/GBDT.c
+++ /dev/null
@@ -1,392 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/GBDT.c"
-#else
-
-#include "GBDT_internal.h"
-#include "GBDT_internal.c"
-
-// note that each one of the functions to find the best split is a subset of the next.
-// first we have one that can only evaluate a single feature, using the logic in lua to control the
-// features
-// then we have one that can go over a shard of faetures, following the feature parallelism
-// introduced by the lua logic
-// and finally we have one that performans the feature parallelism itself in the special case of
-// dense tensors
-// these functions are provided for completeness and to test in case the logic is to be changed
-
-// finds the best split for a given node and feature
-static int nn_(gb_findBestFeatureSplit)(lua_State *L) {
-  THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor");
-  const int dataset_index = 2;
-  if (!lua_isnumber(L, 3))
-    return LUA_HANDLE_ERROR_STR(L, "third argument should be an integer");
-  long feature_id = lua_tointeger(L, 3);
-  if (!lua_isnumber(L, 4))
-    return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer");
-  long minLeafSize = lua_tointeger(L, 4);
-  // Since minLeafSize == 1 corresponds to each sample in its own leaf, any value below it doesn't
-  // make sense
-  if (minLeafSize < 1)
-    minLeafSize = 1;
-  THTensor *grad = luaT_checkudata(L, 5, torch_Tensor);
-  THTensor *hess = luaT_checkudata(L, 6, torch_Tensor);
-
-  if (!THLongTensor_isContiguous(exampleIds))
-    return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous");
-  if (!THTensor_(isContiguous)(grad))
-    return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous");
-  if (!THTensor_(isContiguous)(hess))
-    return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous");
-
-  // initializes the static data
-  nn_(GBInitialization) initialization_data;
-  nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index);
-
-  // initializes the dynamic data
-  GBRunData run_data;
-  gb_create_run_data(&run_data, minLeafSize);
-
-  // finds the best state possible for the split
-  nn_(GBBestState) bs;
-  nn_(gb_find_best_feature_split)(L, &initialization_data, &bs, feature_id, &run_data);
-
-  lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index);
-
-  // fills the table we the best split found and the lua logic above will do everything else
-  // if no state was found, returns nil
-  if (bs.valid_state == 0) {
-    lua_pop(L, 1);
-    lua_pushnil(L);
-  }
-  else {
-    nn_(gb_internal_split_info)(L, &bs, initialization_data.splitInfo_index);
-  }
-
-  gb_destroy_run_data(&run_data);
-
-  return 1;
-}
-
-// finds the best split for a given node and shard of features
-// this is more efficient than calling the previous one multiple times
-static int nn_(gb_findBestSplit)(lua_State *L) {
-  THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor");
-  const int dataset_index = 2;
-  THLongTensor *feature_ids = luaT_checkudata(L, 3, "torch.LongTensor");
-  if (!lua_isnumber(L, 4))
-    return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer");
-  long minLeafSize = lua_tointeger(L, 4);
-  // Since minLeafSize == 1 corresponds to each sample in its own leaf, any value below it doesn't
-  // make sense
-  if (minLeafSize < 1)
-    minLeafSize = 1;
-  if (!lua_isnumber(L, 5))
-    return LUA_HANDLE_ERROR_STR(L, "fifth argument should be an integer");
-  long shardId = lua_tointeger(L, 5);
-  if (!lua_isnumber(L, 6))
-    return LUA_HANDLE_ERROR_STR(L, "sixth argument should be an integer");
-  long nShard = lua_tointeger(L, 6);
-  THTensor *grad = luaT_checkudata(L, 7, torch_Tensor);
-  THTensor *hess = luaT_checkudata(L, 8, torch_Tensor);
-
-  if (!THLongTensor_isContiguous(exampleIds))
-    return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous");
-  if (!THTensor_(isContiguous)(grad))
-    return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous");
-  if (!THTensor_(isContiguous)(hess))
-    return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous");
-
-  // initializes the static data
-  nn_(GBInitialization) initialization_data;
-  nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index);
-
-  // initializes the dynamic data
-  GBRunData run_data;
-  gb_create_run_data(&run_data, minLeafSize);
-
-  // initializes to evaluate all the features in this shard
-  nn_(GBBestState) global_bs;
-  global_bs.valid_state = 0;
-  long n_features = THLongTensor_size(feature_ids, 0);
-  if (!THLongTensor_isContiguous(feature_ids))
-    return LUA_HANDLE_ERROR_STR(L, "feature_ids must be contiguous");
-  long *feature_ids_data = THLongTensor_data(feature_ids);
-
-  // for every feature
-  for (long i = 0; i < n_features; i++) {
-    long feature_id = feature_ids_data[i];
-    // if we are responsible for it
-    if (nShard <= 1 || (feature_id % nShard) + 1 == shardId) {
-      // finds the best state possible for the split
-      nn_(GBBestState) bs;
-      nn_(gb_find_best_feature_split)(L, &initialization_data, &bs, feature_id, &run_data);
-
-      // if it's valid and better than one we found before, saves it
-      if (bs.valid_state) {
-        if (global_bs.valid_state == 0 || bs.gain < global_bs.gain) {
-          global_bs = bs;
-        }
-      }
-    }
-  }
-
-  lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index);
-
-  // fills the table we the best split found and the lua logic above will do everything else
-  // if no state was found, returns nil
-  if (global_bs.valid_state == 0) {
-    lua_pop(L, 1);
-    lua_pushnil(L);
-  }
-  else {
-    nn_(gb_internal_split_info)(L, &global_bs, initialization_data.splitInfo_index);
-  }
-
-  gb_destroy_run_data(&run_data);
-
-  return 1;
-}
-
-// all the info we have to apss to the slave threads so that they can do their jobs
-// note that we do not pass the lua state since it isn't required. we perform direct C parallelism
-// instead of using lua's parallelism like with the previous version
-typedef struct {
-  nn_(GBInitialization) *initialization_data;
-  GBRunData *run_data;
-  long *index;
-  nn_(GBBestState) *global_bs;
-  long n_features;
-  long *feature_ids_data;
-  pthread_mutex_t *mutex;
-  THLongTensor *exampleIds;
-  THTensor *input;
-  THLongTensor **sorted_ids_per_feature;
-} nn_(ThreadInfo);
-
-// loops over all the features in parallel and finds the best global split
-static void* nn_(thread_worker)(void *arg) {
-  nn_(ThreadInfo) *info = (nn_(ThreadInfo) *)arg;
-
-  while (1) {
-    pthread_mutex_lock(info->mutex);
-    long index = (*info->index);
-    (*info->index)++;
-    pthread_mutex_unlock(info->mutex);
-
-    if (index >= info->n_features)
-      break;
-
-    // performs part of steps (1) and (2) of gb_find_best_feature_split without having to access the
-    // lua state using pre-loaded data
-    long feature_id = info->feature_ids_data[index];
-    THLongTensor *exampleIdsWithFeature_ret = info->exampleIds;
-    THLongTensor *featureExampleIds = info->sorted_ids_per_feature[index];
-    nn_(GBInitialization) *initialization_data = info->initialization_data;
-    GBRunData *run_data = info->run_data;
-
-    // performs steps (3) and (4) of gb_find_best_feature_split since (1) and (2) were already
-    // performed before
-    nn_(GBBestState) bs;
-    nn_(gb_internal_create)(initialization_data->grad, initialization_data->hess,
-        exampleIdsWithFeature_ret, &bs.state);
-    nn_(gb_internal_get_best_split_special)(&bs, featureExampleIds, run_data->exampleMap,
-        info->input, run_data->minLeafSize, feature_id);
-
-    // saves to the global state if it's better
-    if (bs.valid_state) {
-      pthread_mutex_lock(info->mutex);
-      if (info->global_bs->valid_state == 0 || bs.gain < info->global_bs->gain) {
-        (*info->global_bs) = bs;
-      }
-      pthread_mutex_unlock(info->mutex);
-    }
-  }
-
-  return NULL;
-}
-
-// finds the global best split by doing feature parallelism directly in C
-static int nn_(gb_findBestSplitFP)(lua_State *L) {
-  THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor");
-  const int dataset_index = 2;
-  THLongTensor *feature_ids = luaT_checkudata(L, 3, "torch.LongTensor");
-  if (!lua_isnumber(L, 4))
-    return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer");
-  long minLeafSize = lua_tointeger(L, 4);
-  THTensor *grad = luaT_checkudata(L, 5, torch_Tensor);
-  THTensor *hess = luaT_checkudata(L, 6, torch_Tensor);
-  if (!lua_isnumber(L, 7))
-    return LUA_HANDLE_ERROR_STR(L, "seventh argument should be an integer");
-  long nThread = lua_tointeger(L, 7);
-
-  if (!THLongTensor_isContiguous(exampleIds))
-    return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous");
-  if (!THTensor_(isContiguous)(grad))
-    return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous");
-  if (!THTensor_(isContiguous)(hess))
-    return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous");
-
-  pthread_mutex_t mutex;
-  pthread_mutex_init(&mutex, NULL);
-
-  // initializes the static data
-  nn_(GBInitialization) initialization_data;
-  nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index);
-
-  // initializes the dynamic data
-  GBRunData run_data;
-  gb_create_run_data(&run_data, minLeafSize);
-
-  // initializes to evaluate all the features
-  nn_(GBBestState) global_bs;
-  global_bs.valid_state = 0;
-  long n_features = THLongTensor_size(feature_ids, 0);
-  if (!THLongTensor_isContiguous(feature_ids))
-    return LUA_HANDLE_ERROR_STR(L, "feature_ids must be contiguous");
-  long *feature_ids_data = THLongTensor_data(feature_ids);
-
-  THTensor *input = luaT_checkudata(L, initialization_data.input_index, torch_Tensor);
-
-  // performs step (1) of gb_find_best_feature_split so that we don't have to pass the lua state
-  THLongTensor *sorted_ids_per_feature[n_features];
-  for (long i = 0; i < n_features; i++) {
-    long feature_id = feature_ids_data[i];
-    lua_pushvalue(L, initialization_data.getSortedFeature_index);
-    lua_pushvalue(L, initialization_data.dataset_index);
-    lua_pushinteger(L, feature_id);
-    lua_call(L, 2, 1);
-
-    THLongTensor *featureExampleIds = luaT_checkudata(L, -1, "torch.LongTensor");
-    sorted_ids_per_feature[i] = featureExampleIds;
-  }
-
-  // performas step (2) of gb_find_best_feature_split since it's the same for all features when the
-  // data is dense
-  long exampleIds_size = THLongTensor_size(initialization_data.exampleIds, 0);
-  long *exampleIds_data = THLongTensor_data(initialization_data.exampleIds);
-
-  int ret;
-  kh_resize(long, run_data.exampleMap, exampleIds_size*8);
-  for (long i = 0; i < exampleIds_size; i++)
-    kh_put(long, run_data.exampleMap, exampleIds_data[i], &ret);
-
-  // saves the info for the threads
-  long index = 0;
-  nn_(ThreadInfo) info;
-  info.initialization_data = &initialization_data;
-  info.run_data = &run_data;
-  info.index = &index;
-  info.global_bs = &global_bs;
-  info.n_features = n_features;
-  info.feature_ids_data = feature_ids_data;
-  info.mutex = &mutex;
-  info.exampleIds = exampleIds;
-  info.input = input;
-  info.sorted_ids_per_feature = sorted_ids_per_feature;
-
-  pthread_t threads[nThread];
-
-  // let the threads run like crazy over the features to find the minimum
-  for (long i = 0; i < nThread; i++) {
-    int ret = pthread_create(&threads[i], NULL, nn_(thread_worker), &info);
-    if (ret)
-      return LUA_HANDLE_ERROR_STR(L, "falied to create thread");
-  }
-
-  for (long i = 0; i < nThread; i++) {
-    int ret = pthread_join(threads[i], NULL);
-    if (ret)
-      return LUA_HANDLE_ERROR_STR(L, "failed to join thread");
-  }
-
-  lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index);
-
-  // fills the table we the best split found and the lua logic above will do everything else
-  // if no state was found, returns nil
-  if (global_bs.valid_state == 0) {
-    lua_pop(L, 1);
-    lua_pushnil(L);
-  }
-  else {
-    nn_(gb_internal_split_info)(L, &global_bs, initialization_data.splitInfo_index);
-  }
-
-  gb_destroy_run_data(&run_data);
-  pthread_mutex_destroy(&mutex);
-
-  return 1;
-}
-
-// performs an efficient branch of the current examples based on a split info provided
-static int nn_(gb_branch)(lua_State *L) {
-  if (!lua_istable(L, 1))
-    return LUA_HANDLE_ERROR_STR(L, "first argument must be a table");
-  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
-  THLongTensor *exampleIds = luaT_checkudata(L, 3, "torch.LongTensor");
-
-  // gets direct access to the dataset
-  long n_exampleIds = THLongTensor_size(exampleIds, 0);
-  long *exampleIds_data = THLongTensor_data(exampleIds);
-  long n_features = THTensor_(size)(input, 1);
-  real *input_data = THTensor_(data)(input);
-
-  // creates the tensors to be returned
-  luaT_pushudata(L, THLongTensor_new(), "torch.LongTensor");
-  luaT_pushudata(L, THLongTensor_new(), "torch.LongTensor");
-  THLongTensor *leftExampleIds = luaT_checkudata(L, 4, "torch.LongTensor");
-  THLongTensor *rightExampleIds = luaT_checkudata(L, 5, "torch.LongTensor");
-  THLongTensor_resize1d(leftExampleIds, n_exampleIds);
-
-  // gets direct access to the examples
-  THLongTensor *splitExampleIds = leftExampleIds;
-  long *splitExampleIds_data = THLongTensor_data(splitExampleIds);
-
-  // gets the split info
-  lua_pushstring(L, "splitId");
-  lua_rawget(L, 1);
-  const long splitId = lua_tointeger(L, -1);
-  lua_pushstring(L, "splitValue");
-  lua_rawget(L, 1);
-  const real splitValue = lua_tonumber(L, -1);
-  lua_pop(L, 2);
-
-  long leftIdx = 0, rightIdx = 0;
-
-  // goes over all the samples dividing them into the two sides
-  for (long i = 0; i < n_exampleIds; i++) {
-    long exampleId = exampleIds_data[i];
-    real val = input_data[(exampleId-1) * n_features + (splitId - 1)];
-    if (val <= splitValue) {
-      leftIdx++;
-      splitExampleIds_data[leftIdx-1] = exampleId;
-    }
-    else {
-      rightIdx++;
-      splitExampleIds_data[n_exampleIds - rightIdx + 1 - 1] = exampleId;
-    }
-  }
-
-  // once done, the resulting tensors are just splits of the sample base. this is more efficient
-  // than having 2 tensors since we didn't know where the split would happen (how much to each
-  // side), but we knew that the sum would be constant
-  THLongTensor_narrow(rightExampleIds, splitExampleIds, 0, n_exampleIds-rightIdx+1-1, rightIdx);
-  THLongTensor_narrow(leftExampleIds, splitExampleIds, 0, 0, leftIdx);
-  return 2;
-}
-
-static const struct luaL_Reg nn_(GBDT__) [] = {
-  {"GBDT_findBestFeatureSplit", nn_(gb_findBestFeatureSplit)},
-  {"GBDT_findBestSplit", nn_(gb_findBestSplit)},
-  {"GBDT_findBestSplitFP", nn_(gb_findBestSplitFP)},
-  {"GBDT_branch", nn_(gb_branch)},
-  {NULL, NULL}
-};
-
-static void nn_(GBDT_init)(lua_State *L)
-{
-  luaT_pushmetatable(L, torch_Tensor);
-  luaT_registeratname(L, nn_(GBDT__), "nn");
-  lua_pop(L,1);
-}
-
-#endif
diff --git a/contrib/lua-torch/decisiontree/generic/GBDT_internal.c b/contrib/lua-torch/decisiontree/generic/GBDT_internal.c
deleted file mode 100644
index 739aabf25..000000000
--- a/contrib/lua-torch/decisiontree/generic/GBDT_internal.c
+++ /dev/null
@@ -1,312 +0,0 @@
-// initializes the optimization structure based on the arguments provided, either filling directly
-// or making calls to lua to load some kind of data
-static void nn_(gb_initialize)(lua_State *L, nn_(GBInitialization) *initialization_data,
-    THLongTensor *exampleIds, THTensor *grad, THTensor *hess, int dataset_index) {
-  initialization_data->dataset_index = dataset_index;
-  initialization_data->exampleIds = exampleIds;
-  initialization_data->grad = grad;
-  initialization_data->hess = hess;
-
-  lua_newtable(L);
-  initialization_data->splitInfo_index = lua_gettop(L);
-
-  lua_pushstring(L, "input");
-  lua_gettable(L, dataset_index);
-  initialization_data->input_index = lua_gettop(L);
-
-  lua_pushstring(L, "getSortedFeature");
-  lua_gettable(L, dataset_index);
-  initialization_data->getSortedFeature_index = lua_gettop(L);
-}
-
-// initializes a state that will be passed to the optimizer
-static void nn_(gb_internal_create)(THTensor *grad, THTensor *hessian,
-    THLongTensor *exampleIds, nn_(GBState)* s) {
-  long *exampleIds_data = THLongTensor_data(exampleIds);
-  long n_examples = THLongTensor_size(exampleIds, 0);
-  accreal leftGradientSum = 0;
-  accreal leftHessianSum = 0;
-
-  real *grad_data = THTensor_(data)(grad);
-  real *hessian_data = THTensor_(data)(hessian);
-
-  // only sums the relevant gradients and hessians
-  for (long i = 0; i < n_examples; i++) {
-    long exampleId = exampleIds_data[i]-1;
-    leftGradientSum += grad_data[exampleId];
-    leftHessianSum += hessian_data[exampleId];
-  }
-
-  // we move data from the left branch to the right branch
-  s->rightGradientSum = 0;
-  s->rightHessianSum = 1;
-  s->nExampleInRightBranch = 0;
-  s->leftGradientSum = leftGradientSum;
-  s->leftHessianSum = leftHessianSum + 1;
-  s->nExampleInLeftBranch = n_examples;
-
-  // stores the loss in parent for efficiency
-  real lossInParent = computeGradientBoostLoss(s->leftGradientSum + s->rightGradientSum,
-      s->leftHessianSum + s->rightHessianSum);
-  s->lossInParent = lossInParent;
-
-  // caches the direct pointers to the data for efficiency
-  s->grad_data = grad_data;
-  s->hessian_data = hessian_data;
-}
-
-// computes the gain obtained by performing the split
-static real nn_(computeSplitGain)(nn_(GBState) *s) {
-  real lossInLeftBranch = computeGradientBoostLoss(s->leftGradientSum, s->leftHessianSum);
-  real lossInRightBranch = computeGradientBoostLoss(s->rightGradientSum, s->rightHessianSum);
-  return lossInLeftBranch + lossInRightBranch - s->lossInParent;
-}
-
-// uses the state information to build the table required by the lua library about the best split
-static void nn_(gb_internal_split_info)(lua_State *L, nn_(GBBestState) *bs, int res) {
-  long feature_id = bs->feature_id;
-  real feature_value = bs->feature_value;
-  real gain  = bs->gain;
-  nn_(GBState) *s = &bs->state;
-  lua_pushstring(L, "splitGain");
-  lua_pushnumber(L, gain);
-  lua_rawset(L, res);
-  lua_pushstring(L, "splitId");
-  lua_pushinteger(L, feature_id);
-  lua_rawset(L, res);
-  lua_pushstring(L, "splitValue");
-  lua_pushnumber(L, feature_value);
-  lua_rawset(L, res);
-  lua_pushstring(L, "leftChildSize");
-  lua_pushinteger(L, s->nExampleInLeftBranch);
-  lua_rawset(L, res);
-  lua_pushstring(L, "rightChildSize");
-  lua_pushinteger(L, s->nExampleInRightBranch);
-  lua_rawset(L, res);
-  lua_pushstring(L, "leftGradient");
-  lua_pushnumber(L, s->leftGradientSum);
-  lua_rawset(L, res);
-  lua_pushstring(L, "rightGradient");
-  lua_pushnumber(L, s->rightGradientSum);
-  lua_rawset(L, res);
-  lua_pushstring(L, "leftHessian");
-  lua_pushnumber(L, s->leftHessianSum);
-  lua_rawset(L, res);
-  lua_pushstring(L, "rightHessian");
-  lua_pushnumber(L, s->rightHessianSum);
-  lua_rawset(L, res);
-}
-
-// core of the computation, where we loop over all the relevant samples looking for the best split
-// we can find
-static void nn_(gb_internal_get_best_split)(lua_State *L, nn_(GBBestState) *bs,
-    THLongTensor *featureExampleIds, khash_t(long)* exampleMap, int input_table_index,
-    long minLeafSize, long feature_id) {
-  nn_(GBState) current_state;
-  nn_(GBState) best_state;
-  current_state = bs->state;
-
-  real best_gain = INFINITY;
-  real best_value = 0;
-
-  // if the data is dense, pre-loads direct access to it
-  THTensor *input = NULL;
-  real *input_data = NULL;
-  long n_features = 0;
-  if (lua_istable(L, input_table_index)) {
-  }
-  else {
-    input = luaT_checkudata(L, input_table_index, torch_Tensor);
-    input_data = THTensor_(data)(input);
-    n_features = THTensor_(size)(input, 1);
-  }
-
-  long stride = featureExampleIds->stride[0];
-  long *featureExampleIds_data = THLongTensor_data(featureExampleIds);
-
-  khiter_t k;
-
-  real previousSplitValue = 0;
-  // for each example with the given feature and from large to small value...
-  for (long i = THLongTensor_size(featureExampleIds, 0)-1; i >= 0; i--) {
-    long exampleId = featureExampleIds_data[i * stride];
-
-    // checks if the sample is in the list of ones that have to be evaluated by this node
-    k = kh_get(long, exampleMap, exampleId);
-    if (k != kh_end(exampleMap)) {
-      long exampleIdx = exampleId;
-
-      // gets the split value, depending on whether the input is sparse or dense
-      real splitValue;
-      if (input_data) {
-        splitValue = input_data[(exampleId-1) * n_features + feature_id-1];
-      }
-      else {
-        lua_pushinteger(L, exampleId);
-        lua_gettable(L, input_table_index);
-        lua_pushinteger(L, feature_id);
-        lua_gettable(L, -2);
-        splitValue = lua_tonumber(L, -1);
-        lua_pop(L, 2);
-      }
-
-      // performs one update of the state, moving a sample from the left branch to the right
-      real gradient = current_state.grad_data[exampleIdx-1];
-      real hessian = current_state.hessian_data[exampleIdx-1];
-      current_state.leftGradientSum -= gradient;
-      current_state.rightGradientSum += gradient;
-      current_state.leftHessianSum -= hessian;
-      current_state.rightHessianSum += hessian;
-      current_state.nExampleInLeftBranch--;
-      current_state.nExampleInRightBranch++;
-
-      // since we remove from the left, once this becomes true, it stays true forever
-      // hence we stop the loop
-      if (current_state.nExampleInLeftBranch < minLeafSize)
-        break;
-
-      if (current_state.nExampleInRightBranch >= minLeafSize) {
-        // if the values are equal between the steps, it doesn't make sense to evaluate the score
-        // since we won't be able to separate the two
-        if (previousSplitValue != splitValue) {
-          // computes the gain **without including the parent** since it doesn't change as we move
-          // examples between branches
-          real lossInLeftBranch = computeGradientBoostLoss(current_state.leftGradientSum, current_state.leftHessianSum);
-          real lossInRightBranch = computeGradientBoostLoss(current_state.rightGradientSum, current_state.rightHessianSum);
-          real current_gain = lossInLeftBranch + lossInRightBranch;
-          if (current_gain < best_gain) {
-            best_gain = current_gain;
-            best_value = splitValue;
-            best_state = current_state;
-          }
-        }
-      }
-      previousSplitValue = splitValue;
-    }
-  }
-
-  // if there is a valid gain, then marks the state as valid and fills the meta-info
-  if (!isfinite(best_gain)) {
-    bs->valid_state = 0;
-  }
-  else {
-    bs->valid_state = 1;
-    bs->state = best_state;
-    bs->feature_id = feature_id;
-    bs->gain = nn_(computeSplitGain)(&bs->state);
-    bs->feature_value = best_value;
-  }
-}
-
-// exactly like the previous version, but direct access to the data for efficiency. it also doesn't
-// rely on the lua state in the particular case of dense data, so we can evaluate this without using
-// the lua state
-static void nn_(gb_internal_get_best_split_special)(nn_(GBBestState) *bs,
-    THLongTensor *featureExampleIds, khash_t(long)* exampleMap, THTensor *input, long minLeafSize,
-    long feature_id) {
-  nn_(GBState) current_state;
-  nn_(GBState) best_state;
-  current_state = bs->state;
-
-  real best_gain = INFINITY;
-  real best_value = 0;
-
-  real *input_data = NULL;
-  long n_features = 0;
-  input_data = THTensor_(data)(input);
-  n_features = THTensor_(size)(input, 1);
-
-  long stride = featureExampleIds->stride[0];
-  long *featureExampleIds_data = THLongTensor_data(featureExampleIds);
-
-  khiter_t k;
-
-  real previousSplitValue = 0;
-  for (long i = THLongTensor_size(featureExampleIds, 0)-1; i >= 0; i--) {
-    long exampleId = featureExampleIds_data[i * stride];
-
-    k = kh_get(long, exampleMap, exampleId);
-    if (k != kh_end(exampleMap)) {
-      long exampleIdx = exampleId;
-
-      // THIS is the main part that changes. seems crazy to have a special case just for this, but
-      // since there are a **lot** of samples to be evaluated, the "if" in the previous case can
-      // become expensive
-      real splitValue;
-      splitValue = input_data[(exampleId-1) * n_features + feature_id-1];
-
-      real gradient = current_state.grad_data[exampleIdx-1];
-      real hessian = current_state.hessian_data[exampleIdx-1];
-      current_state.leftGradientSum -= gradient;
-      current_state.rightGradientSum += gradient;
-      current_state.leftHessianSum -= hessian;
-      current_state.rightHessianSum += hessian;
-      current_state.nExampleInLeftBranch--;
-      current_state.nExampleInRightBranch++;
-
-      // since we remove from the left, once this becomes true, it stays true forever
-      // hence we stop the loop
-      if (current_state.nExampleInLeftBranch < minLeafSize)
-        break;
-
-      // This will always fail in the first pass since minLeafSize >= 1 and nExampleInRightBranch
-      // starts at 0
-      if (current_state.nExampleInRightBranch >= minLeafSize) {
-        if (previousSplitValue != splitValue) {
-          real lossInLeftBranch = computeGradientBoostLoss(current_state.leftGradientSum, current_state.leftHessianSum);
-          real lossInRightBranch = computeGradientBoostLoss(current_state.rightGradientSum, current_state.rightHessianSum);
-          real current_gain = lossInLeftBranch + lossInRightBranch;
-          if (current_gain < best_gain) {
-            best_gain = current_gain;
-            best_value = splitValue;
-            best_state = current_state;
-          }
-        }
-      }
-      previousSplitValue = splitValue;
-    }
-  }
-
-  if (!isfinite(best_gain)) {
-    bs->valid_state = 0;
-  }
-  else {
-    bs->valid_state = 1;
-    bs->state = best_state;
-    bs->feature_id = feature_id;
-    bs->gain = nn_(computeSplitGain)(&bs->state);
-    bs->feature_value = best_value;
-  }
-}
-
-// core of the computation to find the split for a given feature and is divided in 4 steps
-static void nn_(gb_find_best_feature_split)(lua_State *L,
-    nn_(GBInitialization) *initialization_data, nn_(GBBestState) *bs, long feature_id,
-    GBRunData *run_data) {
-
-  // 1) loads the examples in the dataset ordered by their feature value
-  lua_pushvalue(L, initialization_data->getSortedFeature_index);
-  lua_pushvalue(L, initialization_data->dataset_index);
-  lua_pushinteger(L, feature_id);
-  lua_call(L, 2, 1);
-
-  THLongTensor *featureExampleIds = luaT_checkudata(L, -1, "torch.LongTensor");
-
-  // 2) processes the data to find the intersection between the examples in the dataset and the
-  // examples the current node has to evaluate
-  THLongTensor *exampleIdsWithFeature_ret = gb_internal_prepare(L, initialization_data->exampleIds,
-      run_data->exampleIdsWithFeature_cache, initialization_data->input_index, feature_id,
-      run_data->exampleMap);
-  if (!exampleIdsWithFeature_ret) {
-    bs->valid_state = 0;
-    return;
-  }
-
-  // 3) creates a new state to be used by the optimizer
-  nn_(gb_internal_create)(initialization_data->grad, initialization_data->hess,
-      exampleIdsWithFeature_ret, &bs->state);
-
-  // 4) optimize away!
-  nn_(gb_internal_get_best_split)(L, bs, featureExampleIds, run_data->exampleMap,
-      initialization_data->input_index, run_data->minLeafSize, feature_id);
-}
diff --git a/contrib/lua-torch/decisiontree/generic/GBDT_internal.h b/contrib/lua-torch/decisiontree/generic/GBDT_internal.h
deleted file mode 100644
index 7119365cf..000000000
--- a/contrib/lua-torch/decisiontree/generic/GBDT_internal.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// representation of a state used while searching for the best split
-typedef struct {
-  real leftGradientSum, rightGradientSum;
-  real leftHessianSum, rightHessianSum;
-  real lossInParent;
-  long nExampleInLeftBranch, nExampleInRightBranch;
-  real *grad_data, *hessian_data;
-} nn_(GBState);
-
-// representation for the best state found for a given feature
-typedef struct {
-  nn_(GBState) state;
-  real gain;
-  long feature_id;
-  real feature_value;
-  int valid_state;
-} nn_(GBBestState);
-
-// full data that must be initialized before calling the optimizer
-typedef struct {
-  // *_index represent positions on the lua stack
-  int dataset_index;
-  int splitInfo_index;
-  int input_index;
-  // position of the dataset's function to return the samples ordered for a given feature
-  int getSortedFeature_index;
-
-  // samples that this node has to evaluate
-  THLongTensor *exampleIds;
-
-  // cached gradient and hessian for all data
-  THTensor *grad;
-  THTensor *hess;
-} nn_(GBInitialization);
diff --git a/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c b/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c
deleted file mode 100644
index f2ea1ef09..000000000
--- a/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/LogitBoostCriterion.c"
-#else
-
-#define EPS 1e-12
-
-static int nn_(LogitBoostCriterion_updateOutput)(lua_State *L)
-{
-  THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
-  THTensor *target = luaT_checkudata(L, 2, torch_Tensor);
-  THTensor *output = luaT_checkudata(L, 3, torch_Tensor);
-  int sizeAverage = lua_toboolean(L, 4);
-
-  if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) {
-    luaL_error(L, "inconsistent input and target size");
-  }
-  THTensor_(resize1d)(output, 1);
-
-  real sum = 0;
-
-  TH_TENSOR_APPLY2(real, input, real, target,
-    real x = *input_data;
-    real y = *target_data;
-    // math.log(1 + math.exp(target[i] <= 0 and input[i] or -input[i]))
-    sum += log(1 + exp(y <= 0 ? x : -x));
-  );
-
-  if (sizeAverage)
-    sum /= THTensor_(nElement)(input);
-
-  THTensor_(set1d)(output, 0, sum);
-  return 0;
-}
-
-static int nn_(LogitBoostCriterion_updateGradInput)(lua_State *L)
-{
-  THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
-  THTensor *target = luaT_checkudata(L, 2, torch_Tensor);
-  THTensor *gradInput = luaT_checkudata(L, 3, torch_Tensor);
-
-  if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) {
-    luaL_error(L, "inconsistent input and target size");
-  }
-  THTensor_(resizeAs)(gradInput, input);
-
-  TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
-    real x = *input_data;
-    real y = *target_data;
-    real p = (x >= 0) ? (1 / (1 + exp(-x))) : (1 - 1 / (1 + exp(x)));
-    *gradInput_data = (y <= 0) ? p : (p - 1);
-  );
-
-  return 0;
-}
-
-static int nn_(LogitBoostCriterion_updateHessInput)(lua_State *L)
-{
-  THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
-  THTensor *target = luaT_checkudata(L, 2, torch_Tensor);
-  THTensor *hessInput = luaT_checkudata(L, 3, torch_Tensor);
-
-  if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) {
-    luaL_error(L, "inconsistent input and target size");
-  }
-  THTensor_(resizeAs)(hessInput, input);
-
-  TH_TENSOR_APPLY3(real, hessInput, real, input, real, target,
-    real x = *input_data;
-    real p = (x >= 0) ? (1 / (1 + exp(-x))) : (1 - 1 / (1 + exp(x)));
-    *hessInput_data = p * (1.0 - p);
-  );
-
-  return 0;
-}
-
-static const struct luaL_Reg nn_(LogitBoostCriterion__) [] = {
-  {"LogitBoostCriterion_updateOutput", nn_(LogitBoostCriterion_updateOutput)},
-  {"LogitBoostCriterion_updateGradInput", nn_(LogitBoostCriterion_updateGradInput)},
-  {"LogitBoostCriterion_updateHessInput", nn_(LogitBoostCriterion_updateHessInput)},
-  {NULL, NULL}
-};
-
-static void nn_(LogitBoostCriterion_init)(lua_State *L)
-{
-  luaT_pushmetatable(L, torch_Tensor);
-  luaT_registeratname(L, nn_(LogitBoostCriterion__), "nn");
-  lua_pop(L,1);
-}
-
-#endif
diff --git a/contrib/lua-torch/decisiontree/generic/S2D.c b/contrib/lua-torch/decisiontree/generic/S2D.c
deleted file mode 100644
index 2392ee7c8..000000000
--- a/contrib/lua-torch/decisiontree/generic/S2D.c
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/S2D.c"
-#else
-
-static int nn_(S2D_computeOutput)(lua_State *L) {
-  THTensor *output = luaT_checkudata(L, 1, torch_Tensor);
-  const int keys_index = 2;
-  const int values_index = 3;
-  const int masks_index = 4;
-
-  if (!lua_istable(L, keys_index))
-    return LUA_HANDLE_ERROR_STR(L, "expeced position 2 to be a table");
-  if (!lua_istable(L, values_index))
-    return LUA_HANDLE_ERROR_STR(L, "expeced position 3 to be a table");
-  if (!lua_istable(L, masks_index))
-    return LUA_HANDLE_ERROR_STR(L, "expeced position 4 to be a table");
-
-
-  THLongTensor *features = luaT_checkudata(L, 5, "torch.LongTensor");
-
-  const int original_top = lua_gettop(L);
-
-  long outputsize = THLongTensor_size(features, 0);
-  long batch_size = lua_objlen(L, keys_index);
-
-  // initializes output
-  THTensor_(resize2d)(output, batch_size, outputsize);
-  THTensor_(zero)(output);
-  real *output_data = THTensor_(data)(output);
-
-  // iterates over samples
-  lua_pushnil(L);
-  const int local_top = lua_gettop(L);
-  while (lua_next(L, keys_index) != 0) {
-    // gets data corresponding to the current sample
-    long i = lua_tointeger(L, -2)-1;
-    real *current_output_data = &output_data[i * outputsize];
-    THLongTensor *keys = luaT_checkudata(L, -1, "torch.LongTensor");
-    lua_rawgeti(L, values_index, i+1);
-    THTensor *values = luaT_checkudata(L, -1, torch_Tensor);
-    lua_rawgeti(L, masks_index, i+1);
-    THByteTensor *mask = luaT_checkudata(L, -1, "torch.ByteTensor");
-
-    long n_keys = THLongTensor_size(keys, 0);
-    long n_values = THTensor_(size)(values, 0);
-
-    // quick safety check
-    if (n_keys != n_values)
-      return LUA_HANDLE_ERROR_STR(L, "keys and values have to have the same size");
-
-    // gets the direct memory pointers
-    long *keys_data = THLongTensor_data(keys);
-    real *values_data = THTensor_(data)(values);
-    unsigned char *mask_data = THByteTensor_data(mask);
-
-    // for each value in the sparse input...
-    for (long j = 0; j < n_keys; j++) {
-      // loads the value and key
-      real current_value = values_data[j];
-      long current_key = keys_data[j];
-      unsigned char current_mask = mask_data[j];
-
-      // if the feature is present in the map
-      if (current_mask)
-        // saves in the given position
-        current_output_data[current_key-1] = current_value;
-    }
-    // cleans up the trash we create by iterating over keys to avoid it from overflowing
-    lua_pop(L, lua_gettop(L) - local_top);
-  }
-
-  // cleans up the trash we added to the stack
-  lua_pop(L, lua_gettop(L) - original_top);
-
-  return 0;
-}
-
-static const struct luaL_Reg nn_(S2D__) [] = {
-  {"S2D_computeOutput", nn_(S2D_computeOutput)},
-  {NULL, NULL}
-};
-
-static void nn_(S2D_init)(lua_State *L)
-{
-  luaT_pushmetatable(L, torch_Tensor);
-  luaT_registeratname(L, nn_(S2D__), "nn");
-  lua_pop(L,1);
-}
-
-#endif
diff --git a/contrib/lua-torch/decisiontree/hash_map.c b/contrib/lua-torch/decisiontree/hash_map.c
deleted file mode 100644
index 2c679e207..000000000
--- a/contrib/lua-torch/decisiontree/hash_map.c
+++ /dev/null
@@ -1,445 +0,0 @@
-#include "utils.h"
-#include "hash_map.h"
-#include "internal_hash_map.h"
-#include <pthread.h>
-
-hash_map_t hash_map_init(void) {
-   return kh_init(long);
-}
-
-void hash_map_destroy(hash_map_t h_) {
-   internal_hash_map_t h = (internal_hash_map_t) h_;
-   kh_destroy(long, h);
-}
-
-void hash_map_clear(hash_map_t h_) {
-   internal_hash_map_t h = (internal_hash_map_t) h_;
-   kh_clear(long, h);
-}
-
-int hash_map_put(hash_map_t h_, long key, long val) {
-   internal_hash_map_t h = (internal_hash_map_t) h_;
-   int ret;
-   khiter_t k = kh_put(long, h, key, &ret);
-   ret = (ret >= 0);
-   if (ret)
-      kh_value(h, k) = val;
-   return ret;
-}
-
-int hash_map_put_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_) {
-   long *keys = THLongTensor_data(keys_);
-   long *vals = THLongTensor_data(vals_);
-   long size = get_tensor_size(keys_, Long);
-   for (long i = 0; i < size; i++)
-      if (!hash_map_put(h_, keys[i], vals[i]))
-         return 0;
-   return 1;
-}
-
-int hash_map_fill(hash_map_t h_, long key, long *counter) {
-   internal_hash_map_t h = (internal_hash_map_t) h_;
-   khiter_t k = kh_get(long, h, key);
-   if (k == kh_end(h))
-      return hash_map_put(h_, key, ++(*counter));
-   return 1;
-}
-
-int hash_map_fill_tensor(hash_map_t h_, THLongTensor *keys_, long *counter) {
-   long *keys = THLongTensor_data(keys_);
-   long size = get_tensor_size(keys_, Long);
-   for (long i = 0; i < size; i++)
-      if (!hash_map_fill(h_, keys[i], counter))
-         return 0;
-   return 1;
-}
-
-int hash_map_get(hash_map_t h_, long key, long* val) {
-   internal_hash_map_t h = (internal_hash_map_t) h_;
-   khiter_t k = kh_get(long, h, key);
-   if (k == kh_end(h))
-      return 0;
-   *val = kh_value(h, k);
-   return 1;
-}
-
-void hash_map_get_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_, THByteTensor *mask_) {
-   long *keys = THLongTensor_data(keys_);
-   long *vals = THLongTensor_data(vals_);;
-   unsigned char *mask = THByteTensor_data(mask_);
-   long size = get_tensor_size(keys_, Long);
-   for (long i = 0; i < size; i++)
-      mask[i] = hash_map_get(h_, keys[i], &vals[i]);
-}
-
-void hash_map_del(hash_map_t h_, long key) {
-   internal_hash_map_t h = (internal_hash_map_t) h_;
-   khiter_t k = kh_get(long, h, key);
-   if (k != kh_end(h))
-      kh_del(long, h, k);
-}
-
-void hash_map_del_tensor(hash_map_t h_, THLongTensor *keys_) {
-   long *keys = THLongTensor_data(keys_);
-   long size = get_tensor_size(keys_, Long);
-   for (long i = 0; i < size; i++)
-      hash_map_del(h_, keys[i]);
-}
-
-size_t hash_map_size(hash_map_t h_) {
-   internal_hash_map_t h = (internal_hash_map_t) h_;
-   return kh_size(h);
-}
-
-void hash_map_to_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_) {
-   internal_hash_map_t h = (internal_hash_map_t) h_;
-   long *keys = THLongTensor_data(keys_);
-   long *vals = THLongTensor_data(vals_);
-   long key, val, i = 0;
-   kh_foreach(h, key, val, {
-      keys[i] = key;
-      vals[i] = val;
-      i++;
-   });
-}
-
-static void autolock(hash_map_lua_t *h) {
-   if (h->autolock) {
-      pthread_mutex_lock(&h->mutex);
-   }
-}
-
-static void autounlock(hash_map_lua_t *h) {
-   if (h->autolock) {
-      pthread_mutex_unlock(&h->mutex);
-   }
-}
-
-int hash_map_autolock_on_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   h->autolock = 1;
-   return 0;
-}
-
-int hash_map_autolock_off_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   h->autolock = 0;
-   return 0;
-}
-
-int hash_map_init_lua(lua_State *L) {
-   hash_map_lua_t **hp = (hash_map_lua_t**)lua_newuserdata(L, sizeof(hash_map_lua_t*));
-   *hp = (hash_map_lua_t*)malloc(sizeof(hash_map_lua_t));
-   hash_map_lua_t *h = *hp;
-   h->refcount = 1;
-   h->counter = 0;
-   h->autolock = 0;
-   h->h = hash_map_init();
-
-   pthread_mutexattr_t mutex_attr;
-   pthread_mutexattr_init(&mutex_attr);
-   pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_RECURSIVE);
-   pthread_mutex_init(&h->mutex, &mutex_attr);
-
-   luaL_getmetatable(L, "dt.HashMap");
-   lua_setmetatable(L, -2);
-   return 1;
-}
-
-int hash_map_gc_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   if (THAtomicDecrementRef(&h->refcount)) {
-      pthread_mutex_destroy(&h->mutex);
-      hash_map_destroy(h->h);
-      free(h);
-   }
-   return 0;
-}
-
-int hash_map_retain_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   THAtomicIncrementRef(&h->refcount);
-   return 0;
-}
-
-int hash_map_metatablename_lua(lua_State *L) {
-   lua_pushstring(L, "dt.HashMap");
-   return 1;
-}
-
-int hash_map_clear_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   autolock(h);
-   hash_map_clear(h->h);
-   autounlock(h);
-   return 0;
-}
-
-int hash_map_put_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   int ret;
-#if LUA_VERSION_NUM <= 501
-#define lua_isinteger lua_isnumber
-#endif
-   if (lua_isinteger(L, 2)) {
-      if (!lua_isinteger(L, 3))
-         return LUA_HANDLE_ERROR_STR(L, "second parameter is not a number");
-      long key = lua_tointeger(L, 2);
-      long val = lua_tointeger(L, 3);
-      autolock(h);
-      ret = hash_map_put(h->h, key, val);
-      autounlock(h);
-   }
-   else {
-      THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
-      THLongTensor *vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor");
-      check_tensor(L, keys, THLongTensor);
-      check_tensor(L, vals, THLongTensor);
-      check_tensors(L, keys, vals);
-      autolock(h);
-      ret = hash_map_put_tensor(h->h, keys, vals);
-      autounlock(h);
-   }
-   if (!ret)
-      return LUA_HANDLE_ERROR_STR(L, "failed to put into hash map");
-   return 0;
-}
-
-int hash_map_fill_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   int ret;
-   if (lua_isinteger(L, 2)) {
-      long key = lua_tointeger(L, 2);
-      autolock(h);
-      ret = hash_map_fill(h->h, key, &h->counter);
-      autounlock(h);
-   }
-   else {
-      THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
-      check_tensor(L, keys, THLongTensor);
-      autolock(h);
-      ret = hash_map_fill_tensor(h->h, keys, &h->counter);
-      autounlock(h);
-   }
-   if (!ret)
-      return LUA_HANDLE_ERROR_STR(L, "failed to fill into hash map");
-   return 0;
-}
-
-int hash_map_adjust_counter_lua(lua_State *L) {
-   hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   internal_hash_map_t h = (internal_hash_map_t) h_->h;
-
-   long val;
-   kh_foreach_value(h, val, {
-      if (val >= h_->counter)
-         h_->counter = val;
-   });
-   return 0;
-}
-
-int hash_map_set_counter_lua(lua_State *L) {
-   hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   h_->counter = lua_tointeger(L, 2);
-   return 0;
-}
-
-int hash_map_get_counter_lua(lua_State *L) {
-   hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   lua_pushinteger(L, h_->counter);
-   return 1;
-}
-
-static int hash_map_get_tensor_lua(lua_State *L, hash_map_lua_t *h, int inplace) {
-    THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
-    check_tensor(L, keys, THLongTensor);
-    THLongTensor *vals = inplace ? keys : NULL;
-    THByteTensor *mask = NULL;
-
-    int maskIdx = inplace ? 3 : 4;
-
-    if (!inplace) {
-       if (lua_gettop(L) < 3) {
-          vals = THLongTensor_new();
-       } else {
-          vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor");
-          check_tensor(L, vals, THLongTensor);
-       }
-    }
-
-    if (lua_gettop(L) < maskIdx) {
-        mask = THByteTensor_new();
-    } else {
-        mask = (THByteTensor *)luaT_checkudata(L, maskIdx, "torch.ByteTensor");
-        check_tensor(L, mask, THByteTensor);
-    }
-
-    int n_dim = THLongTensor_nDimension(keys);
-    THLongStorage *st = THLongStorage_newWithSize1(n_dim);
-    for (int i = 0; i < n_dim; i++) {
-        THLongStorage_set(st, i, THLongTensor_size(keys, i));
-    }
-    THByteTensor_resize(mask, st, NULL);
-    if (!inplace) THLongTensor_resize(vals, st, NULL);
-    THLongStorage_free(st);
-
-    autolock(h);
-    hash_map_get_tensor(h->h, keys, vals, mask);
-    autounlock(h);
-
-    if (!inplace && lua_gettop(L) < 3)
-        luaT_pushudata(L, vals, "torch.LongTensor");
-    if (lua_gettop(L) < maskIdx)
-        luaT_pushudata(L, mask, "torch.ByteTensor");
-
-    return 2;
-}
-
-static int hash_map_get_table_lua(lua_State *L, hash_map_lua_t *h, int inplace) {
-    const int kidx = 2;
-    const int vidx = inplace ? 2 : 3;
-    const int midx = inplace ? 3 : 4;
-    const int narg = lua_gettop(L);
-
-    if (inplace) {
-        if (narg < 3) {
-            LUA_HANDLE_ERROR_STR(L, "HashMap.getInplace requires two arguments.");
-        }
-    } else {
-        if (narg < 4) {
-            LUA_HANDLE_ERROR_STR(L, "HashMap.get requires three arguments.");
-        }
-    }
-
-    int count = push_table_contents(L, kidx);
-    verify_push_table_contents(L, vidx, count);
-    verify_push_table_contents(L, midx, count);
-
-    THLongTensor *keys;
-    THLongTensor *vals;
-    THByteTensor *mask;
-    for (int i = count - 1; i >= 0; i--) {
-        int maskIdx = i - count;
-        int valIdx = maskIdx -  count;
-        int keyIdx = inplace ? valIdx : (valIdx - count);
-
-        keys = (THLongTensor *)luaT_checkudata(L, keyIdx, "torch.LongTensor");
-        check_tensor(L, keys, THLongTensor);
-        if (inplace) {
-            vals = keys;
-        } else {
-            vals = (THLongTensor *)luaT_checkudata(L, valIdx, "torch.LongTensor");
-        }
-        mask = (THByteTensor *)luaT_checkudata(L, maskIdx, "torch.ByteTensor");
-
-        int n_dim = THLongTensor_nDimension(keys);
-        THLongStorage *st = THLongStorage_newWithSize1(n_dim);
-        for (int i = 0; i < n_dim; i++) {
-            THLongStorage_set(st, i, THLongTensor_size(keys, i));
-        }
-        THByteTensor_resize(mask, st, NULL);
-        THLongTensor_resize(vals, st, NULL);
-        THLongStorage_free(st);
-
-        autolock(h);
-        hash_map_get_tensor(h->h, keys, vals, mask);
-        autounlock(h);
-    }
-    lua_pop(L, (narg - 1) * count);
-    return 2;
-}
-
-int hash_map_get_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   if (lua_isinteger(L, 2)) {
-      long key = lua_tointeger(L, 2);
-      long val;
-      autolock(h);
-      int ret = hash_map_get(h->h, key, &val);
-      autounlock(h);
-      if (ret) {
-         lua_pushinteger(L, val);
-         lua_pushinteger(L, 1);
-      }
-      else {
-         lua_pushinteger(L, 0);
-         lua_pushinteger(L, 0);
-      }
-   } else if (lua_istable(L, 2)) {
-      return hash_map_get_table_lua(L, h, 0);
-   } else {
-      return hash_map_get_tensor_lua(L, h, 0);
-   }
-   return 2;
-}
-
-int hash_map_get_inplace_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   if (lua_isinteger(L, 2)) {
-      LUA_HANDLE_ERROR_STR(L, "HashMap.getInplace does not support integer arguments.");
-   } else if (lua_istable(L, 2)) {
-      return hash_map_get_table_lua(L, h, 1);
-   } else {
-      return hash_map_get_tensor_lua(L, h, 1);
-   }
-   return 2;
-}
-
-int hash_map_del_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   if (lua_isinteger(L, 2)) {
-      long key = lua_tointeger(L, 2);
-      autolock(h);
-      hash_map_del(h->h, key);
-      autounlock(h);
-   }
-   else {
-      THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
-      autolock(h);
-      hash_map_del_tensor(h->h, keys);
-      autounlock(h);
-   }
-   return 0;
-}
-
-int hash_map_size_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   long size = hash_map_size(h->h);
-   lua_pushinteger(L, size);
-   return 1;
-}
-
-int hash_map_to_tensor_lua(lua_State *L) {
-   hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
-   THLongTensor *keys, *vals;
-
-   if (lua_gettop(L) < 2) {
-      keys = THLongTensor_new();
-   }
-   else {
-      keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
-      check_tensor(L, keys, THLongTensor);
-   }
-
-   if (lua_gettop(L) < 3) {
-      vals = THLongTensor_new();
-   }
-   else {
-      vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor");
-      check_tensor(L, vals, THLongTensor);
-   }
-
-   size_t size = hash_map_size(h->h);
-   THLongTensor_resize1d(keys, size);
-   THLongTensor_resize1d(vals, size);
-
-   autolock(h);
-   hash_map_to_tensor(h->h, keys, vals);
-   autounlock(h);
-
-   if (lua_gettop(L) < 2)
-      luaT_pushudata(L, keys, "torch.LongTensor");
-   if (lua_gettop(L) < 3)
-      luaT_pushudata(L, vals, "torch.LongTensor");
-   return 2;
-}
diff --git a/contrib/lua-torch/decisiontree/hash_map.h b/contrib/lua-torch/decisiontree/hash_map.h
deleted file mode 100644
index 5b215e4ca..000000000
--- a/contrib/lua-torch/decisiontree/hash_map.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "luaT.h"
-#include "TH.h"
-
-typedef void* hash_map_t;
-
-hash_map_t hash_map_init(void);
-void hash_map_destroy(hash_map_t);
-void hash_map_clear(hash_map_t);
-int hash_map_put(hash_map_t, long key, long val);
-int hash_map_put_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_);
-int hash_map_fill(hash_map_t, long key, long *counter);
-int hash_map_fill_tensor(hash_map_t, THLongTensor *keys_, long *counter);
-int hash_map_get(hash_map_t, long key, long *val);
-void hash_map_get_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_, THByteTensor *mask_);
-void hash_map_del(hash_map_t, long key);
-void hash_map_del_tensor(hash_map_t, THLongTensor *keys_);
-size_t hash_map_size(hash_map_t);
-void hash_map_to_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_);
-
-int hash_map_autolock_on_lua(lua_State *L);
-int hash_map_autolock_off_lua(lua_State *L);
-int hash_map_init_lua(lua_State *L);
-int hash_map_gc_lua(lua_State *L);
-int hash_map_retain_lua(lua_State *L);
-int hash_map_metatablename_lua(lua_State *L);
-int hash_map_clear_lua(lua_State *L);
-int hash_map_put_lua(lua_State *L);
-int hash_map_fill_lua(lua_State *L);
-int hash_map_adjust_counter_lua(lua_State *L);
-int hash_map_set_counter_lua(lua_State *L);
-int hash_map_get_counter_lua(lua_State *L);
-int hash_map_get_lua(lua_State *L);
-int hash_map_get_inplace_lua(lua_State *L);
-int hash_map_del_lua(lua_State *L);
-int hash_map_size_lua(lua_State *L);
-int hash_map_to_tensor_lua(lua_State *L);
diff --git a/contrib/lua-torch/decisiontree/init.c b/contrib/lua-torch/decisiontree/init.c
deleted file mode 100644
index 276241e8e..000000000
--- a/contrib/lua-torch/decisiontree/init.c
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "TH.h"
-#include "luaT.h"
-
-#ifdef _OPENMP
-#include "omp.h"
-#endif
-
-#include "error.h"
-#include "hash_map.h"
-
-#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
-#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor)
-#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
-
-#include "generic/LogitBoostCriterion.c"
-#include "THGenerateFloatTypes.h"
-
-#include "generic/DFD.c"
-#include "THGenerateFloatTypes.h"
-
-#include "generic/S2D.c"
-#include "THGenerateFloatTypes.h"
-
-#include "generic/CartTree.c"
-#include "THGenerateFloatTypes.h"
-
-#include "GBDT_common.h"
-#include "generic/GBDT.c"
-#include "THGenerateFloatTypes.h"
-
-static const struct luaL_Reg decisiontree_hash_map_routines[] = {
-   {"__gc", hash_map_gc_lua},
-   {"retain", hash_map_retain_lua},
-   {"metatablename", hash_map_metatablename_lua},
-   {"clear", hash_map_clear_lua},
-   {"put", hash_map_put_lua},
-   {"fill", hash_map_fill_lua},
-   {"adjustCounter", hash_map_adjust_counter_lua},
-   {"getCounter", hash_map_get_counter_lua},
-   {"setCounter", hash_map_set_counter_lua},
-   {"get", hash_map_get_lua},
-   {"getInplace", hash_map_get_inplace_lua},
-   {"del", hash_map_del_lua},
-   {"size", hash_map_size_lua},
-   {"safe", hash_map_autolock_on_lua},
-   {"unsafe", hash_map_autolock_off_lua},
-   {"toTensors", hash_map_to_tensor_lua},
-   {"new", hash_map_init_lua},
-   {NULL, NULL}
-};
-
-DLL_EXPORT int luaopen_libdecisiontree(lua_State *L)
-{
-  // HashMap
-  luaL_newmetatable(L, "dt.HashMap");
-  lua_pushstring(L, "__index");
-  lua_pushvalue(L, -2);
-  lua_settable(L, -3);
-  luaT_setfuncs(L, decisiontree_hash_map_routines, 0);
-
-  nn_FloatLogitBoostCriterion_init(L);
-  nn_DoubleLogitBoostCriterion_init(L);
-
-  nn_FloatDFD_init(L);
-  nn_DoubleDFD_init(L);
-
-  nn_FloatS2D_init(L);
-  nn_DoubleS2D_init(L);
-
-  nn_FloatCT_init(L);
-  nn_DoubleCT_init(L);
-
-  nn_FloatGBDT_init(L);
-  nn_DoubleGBDT_init(L);
-
-  return 1;
-}
diff --git a/contrib/lua-torch/decisiontree/init.lua b/contrib/lua-torch/decisiontree/init.lua
deleted file mode 100644
index 26f790b60..000000000
--- a/contrib/lua-torch/decisiontree/init.lua
+++ /dev/null
@@ -1,70 +0,0 @@
-require 'paths'
---require 'xlua'
-require 'string'
-require 'os'
---require 'sys'
-require 'nn'
-
--- these actually return local variables but we will re-require them
--- when needed. This is just to make sure they are loaded.
-require 'moses'
-
-unpack = unpack or table.unpack
-
-local dt = require 'decisiontree._env'
-
--- c lib:
-require "paths"
-paths.require 'libdecisiontree'
-
-dt.HashMap = torch.getmetatable("dt.HashMap").new
-
-dt.EPSILON = 1e-6
-
--- experimental Tensor-like container
-require 'decisiontree.SparseTensor'
-
--- functions
-require 'decisiontree.math'
-require 'decisiontree.utils'
-
--- for multi-threading
---require 'decisiontree.WorkPool'
-
--- abstract classes
-require 'decisiontree.DecisionTree'
-require 'decisiontree.DecisionForest'
-require 'decisiontree.DecisionForestTrainer'
-require 'decisiontree.TreeState'
-
--- for CaRTree inference
-require 'decisiontree.CartNode'
-require 'decisiontree.CartTree'
-
--- Criterions (extended with updateHessInput and backward2)
-require 'decisiontree.MSECriterion'
-require 'decisiontree.LogitBoostCriterion'
-
--- Used by both RandomForestTrainer and GradientBoostTrainer
-require 'decisiontree.CartTrainer'
-
--- Used by CartTrainer
-require 'decisiontree.DataSet'
-
--- Random Forest Training
-require 'decisiontree.RandomForestTrainer'
-require 'decisiontree.GiniState' -- TreeState subclass
-
--- Gradient Boosted Decision Tree Training
-require 'decisiontree.GradientBoostTrainer'
-require 'decisiontree.GradientBoostState' -- TreeState subclass
-
--- unit tests and benchmarks
---require 'decisiontree.test'
---require 'decisiontree.benchmark'
-
--- nn.Module
-require 'decisiontree.DFD'
-require 'decisiontree.Sparse2Dense'
-
-return dt
diff --git a/contrib/lua-torch/decisiontree/internal_hash_map.h b/contrib/lua-torch/decisiontree/internal_hash_map.h
deleted file mode 100644
index bc8c523ef..000000000
--- a/contrib/lua-torch/decisiontree/internal_hash_map.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "khash.h"
-#include <pthread.h>
-
-KHASH_MAP_INIT_INT64(long, long)
-typedef khash_t(long)* internal_hash_map_t;
-
-typedef struct {
-   hash_map_t h;
-   int refcount;
-   pthread_mutex_t mutex;
-   int autolock;
-   long counter;
-} hash_map_lua_t;
diff --git a/contrib/lua-torch/decisiontree/khash.h b/contrib/lua-torch/decisiontree/khash.h
deleted file mode 100644
index 20e994063..000000000
--- a/contrib/lua-torch/decisiontree/khash.h
+++ /dev/null
@@ -1,627 +0,0 @@
-/* The MIT License
-
-   Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-/*
-  An example:
-
-#include "khash.h"
-KHASH_MAP_INIT_INT(32, char)
-int main() {
-   int ret, is_missing;
-   khiter_t k;
-   khash_t(32) *h = kh_init(32);
-   k = kh_put(32, h, 5, &ret);
-   kh_value(h, k) = 10;
-   k = kh_get(32, h, 10);
-   is_missing = (k == kh_end(h));
-   k = kh_get(32, h, 5);
-   kh_del(32, h, k);
-   for (k = kh_begin(h); k != kh_end(h); ++k)
-      if (kh_exist(h, k)) kh_value(h, k) = 1;
-   kh_destroy(32, h);
-   return 0;
-}
-*/
-
-/*
-  2013-05-02 (0.2.8):
-
-   * Use quadratic probing. When the capacity is power of 2, stepping function
-     i*(i+1)/2 guarantees to traverse each bucket. It is better than double
-     hashing on cache performance and is more robust than linear probing.
-
-     In theory, double hashing should be more robust than quadratic probing.
-     However, my implementation is probably not for large hash tables, because
-     the second hash function is closely tied to the first hash function,
-     which reduce the effectiveness of double hashing.
-
-   Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
-
-  2011-12-29 (0.2.7):
-
-    * Minor code clean up; no actual effect.
-
-  2011-09-16 (0.2.6):
-
-   * The capacity is a power of 2. This seems to dramatically improve the
-     speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
-
-      - http://code.google.com/p/ulib/
-      - http://nothings.org/computer/judy/
-
-   * Allow to optionally use linear probing which usually has better
-     performance for random input. Double hashing is still the default as it
-     is more robust to certain non-random input.
-
-   * Added Wang's integer hash function (not used by default). This hash
-     function is more robust to certain non-random input.
-
-  2011-02-14 (0.2.5):
-
-    * Allow to declare global functions.
-
-  2009-09-26 (0.2.4):
-
-    * Improve portability
-
-  2008-09-19 (0.2.3):
-
-   * Corrected the example
-   * Improved interfaces
-
-  2008-09-11 (0.2.2):
-
-   * Improved speed a little in kh_put()
-
-  2008-09-10 (0.2.1):
-
-   * Added kh_clear()
-   * Fixed a compiling error
-
-  2008-09-02 (0.2.0):
-
-   * Changed to token concatenation which increases flexibility.
-
-  2008-08-31 (0.1.2):
-
-   * Fixed a bug in kh_get(), which has not been tested previously.
-
-  2008-08-31 (0.1.1):
-
-   * Added destructor
-*/
-
-
-#ifndef __AC_KHASH_H
-#define __AC_KHASH_H
-
-/*!
-  @header
-
-  Generic hash table library.
- */
-
-#define AC_VERSION_KHASH_H "0.2.8"
-
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-
-/* compiler specific configuration */
-
-#if UINT_MAX == 0xffffffffu
-typedef unsigned int khint32_t;
-#elif ULONG_MAX == 0xffffffffu
-typedef unsigned long khint32_t;
-#endif
-
-#if ULONG_MAX == ULLONG_MAX
-typedef unsigned long khint64_t;
-#else
-typedef unsigned long long khint64_t;
-#endif
-
-#ifndef kh_inline
-#ifdef _MSC_VER
-#define kh_inline __inline
-#else
-#define kh_inline inline
-#endif
-#endif /* kh_inline */
-
-#ifndef klib_unused
-#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
-#define klib_unused __attribute__ ((__unused__))
-#else
-#define klib_unused
-#endif
-#endif /* klib_unused */
-
-typedef khint32_t khint_t;
-typedef khint_t khiter_t;
-
-#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
-#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
-#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
-#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
-#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
-#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
-#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
-
-#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-#ifndef kcalloc
-#define kcalloc(N,Z) calloc(N,Z)
-#endif
-#ifndef kmalloc
-#define kmalloc(Z) malloc(Z)
-#endif
-#ifndef krealloc
-#define krealloc(P,Z) realloc(P,Z)
-#endif
-#ifndef kfree
-#define kfree(P) free(P)
-#endif
-
-static const double __ac_HASH_UPPER = 0.77;
-
-#define __KHASH_TYPE(name, khkey_t, khval_t) \
-   typedef struct kh_##name##_s { \
-      khint_t n_buckets, size, n_occupied, upper_bound; \
-      khint32_t *flags; \
-      khkey_t *keys; \
-      khval_t *vals; \
-   } kh_##name##_t;
-
-#define __KHASH_PROTOTYPES(name, khkey_t, khval_t)                \
-   extern kh_##name##_t *kh_init_##name(void);                    \
-   extern void kh_destroy_##name(kh_##name##_t *h);               \
-   extern void kh_clear_##name(kh_##name##_t *h);                 \
-   extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key);   \
-   extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
-   extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
-   extern void kh_del_##name(kh_##name##_t *h, khint_t x);
-
-#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
-   SCOPE kh_##name##_t *kh_init_##name(void) {                    \
-      return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t));      \
-   }                                                  \
-   SCOPE void kh_destroy_##name(kh_##name##_t *h)                 \
-   {                                                  \
-      if (h) {                                        \
-         kfree((void *)h->keys); kfree(h->flags);              \
-         kfree((void *)h->vals);                            \
-         kfree(h);                                       \
-      }                                               \
-   }                                                  \
-   SCOPE void kh_clear_##name(kh_##name##_t *h)                \
-   {                                                  \
-      if (h && h->flags) {                               \
-         memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
-         h->size = h->n_occupied = 0;                       \
-      }                                               \
-   }                                                  \
-   SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key)  \
-   {                                                  \
-      if (h->n_buckets) {                                   \
-         khint_t k, i, last, mask, step = 0; \
-         mask = h->n_buckets - 1;                           \
-         k = __hash_func(key); i = k & mask;                   \
-         last = i; \
-         while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
-            i = (i + (++step)) & mask; \
-            if (i == last) return h->n_buckets;                \
-         }                                            \
-         return __ac_iseither(h->flags, i)? h->n_buckets : i;     \
-      } else return 0;                                   \
-   }                                                  \
-   SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
-   { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
-      khint32_t *new_flags = 0;                             \
-      khint_t j = 1;                                     \
-      {                                               \
-         kroundup32(new_n_buckets);                            \
-         if (new_n_buckets < 4) new_n_buckets = 4;             \
-         if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
-         else { /* hash table size to be changed (shrink or expand); rehash */ \
-            new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));  \
-            if (!new_flags) return -1;                      \
-            memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
-            if (h->n_buckets < new_n_buckets) { /* expand */      \
-               khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
-               if (!new_keys) { kfree(new_flags); return -1; }    \
-               h->keys = new_keys;                          \
-               if (kh_is_map) {                          \
-                  khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
-                  if (!new_vals) { kfree(new_flags); return -1; } \
-                  h->vals = new_vals;                       \
-               }                                      \
-            } /* otherwise shrink */                        \
-         }                                            \
-      }                                               \
-      if (j) { /* rehashing is needed */                       \
-         for (j = 0; j != h->n_buckets; ++j) {                 \
-            if (__ac_iseither(h->flags, j) == 0) {             \
-               khkey_t key = h->keys[j];                    \
-               khval_t val;                              \
-               khint_t new_mask;                         \
-               new_mask = new_n_buckets - 1;                   \
-               if (kh_is_map) val = h->vals[j];             \
-               __ac_set_isdel_true(h->flags, j);               \
-               while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
-                  khint_t k, i, step = 0; \
-                  k = __hash_func(key);                     \
-                  i = k & new_mask;                      \
-                  while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
-                  __ac_set_isempty_false(new_flags, i);        \
-                  if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
-                     { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
-                     if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
-                     __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
-                  } else { /* write the element and jump out of the loop */ \
-                     h->keys[i] = key;                   \
-                     if (kh_is_map) h->vals[i] = val;       \
-                     break;                              \
-                  }                                   \
-               }                                      \
-            }                                         \
-         }                                            \
-         if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
-            h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
-            if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
-         }                                            \
-         kfree(h->flags); /* free the working space */            \
-         h->flags = new_flags;                              \
-         h->n_buckets = new_n_buckets;                      \
-         h->n_occupied = h->size;                           \
-         h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
-      }                                               \
-      return 0;                                          \
-   }                                                  \
-   SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
-   {                                                  \
-      khint_t x;                                         \
-      if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
-         if (h->n_buckets > (h->size<<1)) {                    \
-            if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
-               *ret = -1; return h->n_buckets;                 \
-            }                                         \
-         } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
-            *ret = -1; return h->n_buckets;                    \
-         }                                            \
-      } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
-      {                                               \
-         khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
-         x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
-         if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
-         else {                                          \
-            last = i; \
-            while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
-               if (__ac_isdel(h->flags, i)) site = i;          \
-               i = (i + (++step)) & mask; \
-               if (i == last) { x = site; break; }             \
-            }                                         \
-            if (x == h->n_buckets) {                        \
-               if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
-               else x = i;                               \
-            }                                         \
-         }                                            \
-      }                                               \
-      if (__ac_isempty(h->flags, x)) { /* not present at all */      \
-         h->keys[x] = key;                               \
-         __ac_set_isboth_false(h->flags, x);                   \
-         ++h->size; ++h->n_occupied;                           \
-         *ret = 1;                                       \
-      } else if (__ac_isdel(h->flags, x)) { /* deleted */            \
-         h->keys[x] = key;                               \
-         __ac_set_isboth_false(h->flags, x);                   \
-         ++h->size;                                      \
-         *ret = 2;                                       \
-      } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
-      return x;                                          \
-   }                                                  \
-   SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)          \
-   {                                                  \
-      if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {        \
-         __ac_set_isdel_true(h->flags, x);                     \
-         --h->size;                                      \
-      }                                               \
-   }
-
-#define KHASH_DECLARE(name, khkey_t, khval_t)                     \
-   __KHASH_TYPE(name, khkey_t, khval_t)                        \
-   __KHASH_PROTOTYPES(name, khkey_t, khval_t)
-
-#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
-   __KHASH_TYPE(name, khkey_t, khval_t)                        \
-   __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
-
-#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
-   KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
-
-/* --- BEGIN OF HASH FUNCTIONS --- */
-
-/*! @function
-  @abstract     Integer hash function
-  @param  key   The integer [khint32_t]
-  @return       The hash value [khint_t]
- */
-#define kh_int_hash_func(key) (khint32_t)(key)
-/*! @function
-  @abstract     Integer comparison function
- */
-#define kh_int_hash_equal(a, b) ((a) == (b))
-/*! @function
-  @abstract     64-bit integer hash function
-  @param  key   The integer [khint64_t]
-  @return       The hash value [khint_t]
- */
-#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
-/*! @function
-  @abstract     64-bit integer comparison function
- */
-#define kh_int64_hash_equal(a, b) ((a) == (b))
-/*! @function
-  @abstract     const char* hash function
-  @param  s     Pointer to a null terminated string
-  @return       The hash value
- */
-static kh_inline khint_t __ac_X31_hash_string(const char *s)
-{
-   khint_t h = (khint_t)*s;
-   if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
-   return h;
-}
-/*! @function
-  @abstract     Another interface to const char* hash function
-  @param  key   Pointer to a null terminated string [const char*]
-  @return       The hash value [khint_t]
- */
-#define kh_str_hash_func(key) __ac_X31_hash_string(key)
-/*! @function
-  @abstract     Const char* comparison function
- */
-#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
-
-static kh_inline khint_t __ac_Wang_hash(khint_t key)
-{
-    key += ~(key << 15);
-    key ^=  (key >> 10);
-    key +=  (key << 3);
-    key ^=  (key >> 6);
-    key += ~(key << 11);
-    key ^=  (key >> 16);
-    return key;
-}
-#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key)
-
-/* --- END OF HASH FUNCTIONS --- */
-
-/* Other convenient macros... */
-
-/*!
-  @abstract Type of the hash table.
-  @param  name  Name of the hash table [symbol]
- */
-#define khash_t(name) kh_##name##_t
-
-/*! @function
-  @abstract     Initiate a hash table.
-  @param  name  Name of the hash table [symbol]
-  @return       Pointer to the hash table [khash_t(name)*]
- */
-#define kh_init(name) kh_init_##name()
-
-/*! @function
-  @abstract     Destroy a hash table.
-  @param  name  Name of the hash table [symbol]
-  @param  h     Pointer to the hash table [khash_t(name)*]
- */
-#define kh_destroy(name, h) kh_destroy_##name(h)
-
-/*! @function
-  @abstract     Reset a hash table without deallocating memory.
-  @param  name  Name of the hash table [symbol]
-  @param  h     Pointer to the hash table [khash_t(name)*]
- */
-#define kh_clear(name, h) kh_clear_##name(h)
-
-/*! @function
-  @abstract     Resize a hash table.
-  @param  name  Name of the hash table [symbol]
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  s     New size [khint_t]
- */
-#define kh_resize(name, h, s) kh_resize_##name(h, s)
-
-/*! @function
-  @abstract     Insert a key to the hash table.
-  @param  name  Name of the hash table [symbol]
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  k     Key [type of keys]
-  @param  r     Extra return code: -1 if the operation failed;
-                0 if the key is present in the hash table;
-                1 if the bucket is empty (never used); 2 if the element in
-            the bucket has been deleted [int*]
-  @return       Iterator to the inserted element [khint_t]
- */
-#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
-
-/*! @function
-  @abstract     Retrieve a key from the hash table.
-  @param  name  Name of the hash table [symbol]
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  k     Key [type of keys]
-  @return       Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
- */
-#define kh_get(name, h, k) kh_get_##name(h, k)
-
-/*! @function
-  @abstract     Remove a key from the hash table.
-  @param  name  Name of the hash table [symbol]
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  k     Iterator to the element to be deleted [khint_t]
- */
-#define kh_del(name, h, k) kh_del_##name(h, k)
-
-/*! @function
-  @abstract     Test whether a bucket contains data.
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  x     Iterator to the bucket [khint_t]
-  @return       1 if containing data; 0 otherwise [int]
- */
-#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
-
-/*! @function
-  @abstract     Get key given an iterator
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  x     Iterator to the bucket [khint_t]
-  @return       Key [type of keys]
- */
-#define kh_key(h, x) ((h)->keys[x])
-
-/*! @function
-  @abstract     Get value given an iterator
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  x     Iterator to the bucket [khint_t]
-  @return       Value [type of values]
-  @discussion   For hash sets, calling this results in segfault.
- */
-#define kh_val(h, x) ((h)->vals[x])
-
-/*! @function
-  @abstract     Alias of kh_val()
- */
-#define kh_value(h, x) ((h)->vals[x])
-
-/*! @function
-  @abstract     Get the start iterator
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @return       The start iterator [khint_t]
- */
-#define kh_begin(h) (khint_t)(0)
-
-/*! @function
-  @abstract     Get the end iterator
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @return       The end iterator [khint_t]
- */
-#define kh_end(h) ((h)->n_buckets)
-
-/*! @function
-  @abstract     Get the number of elements in the hash table
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @return       Number of elements in the hash table [khint_t]
- */
-#define kh_size(h) ((h)->size)
-
-/*! @function
-  @abstract     Get the number of buckets in the hash table
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @return       Number of buckets in the hash table [khint_t]
- */
-#define kh_n_buckets(h) ((h)->n_buckets)
-
-/*! @function
-  @abstract     Iterate over the entries in the hash table
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  kvar  Variable to which key will be assigned
-  @param  vvar  Variable to which value will be assigned
-  @param  code  Block of code to execute
- */
-#define kh_foreach(h, kvar, vvar, code) { khint_t __i;      \
-   for (__i = kh_begin(h); __i != kh_end(h); ++__i) {    \
-      if (!kh_exist(h,__i)) continue;                 \
-      (kvar) = kh_key(h,__i);                      \
-      (vvar) = kh_val(h,__i);                      \
-      code;                                  \
-   } }
-
-/*! @function
-  @abstract     Iterate over the values in the hash table
-  @param  h     Pointer to the hash table [khash_t(name)*]
-  @param  vvar  Variable to which value will be assigned
-  @param  code  Block of code to execute
- */
-#define kh_foreach_value(h, vvar, code) { khint_t __i;      \
-   for (__i = kh_begin(h); __i != kh_end(h); ++__i) {    \
-      if (!kh_exist(h,__i)) continue;                 \
-      (vvar) = kh_val(h,__i);                      \
-      code;                                  \
-   } }
-
-/* More conenient interfaces */
-
-/*! @function
-  @abstract     Instantiate a hash set containing integer keys
-  @param  name  Name of the hash table [symbol]
- */
-#define KHASH_SET_INIT_INT(name)                            \
-   KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
-
-/*! @function
-  @abstract     Instantiate a hash map containing integer keys
-  @param  name  Name of the hash table [symbol]
-  @param  khval_t  Type of values [type]
- */
-#define KHASH_MAP_INIT_INT(name, khval_t)                      \
-   KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
-
-/*! @function
-  @abstract     Instantiate a hash map containing 64-bit integer keys
-  @param  name  Name of the hash table [symbol]
- */
-#define KHASH_SET_INIT_INT64(name)                             \
-   KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
-
-/*! @function
-  @abstract     Instantiate a hash map containing 64-bit integer keys
-  @param  name  Name of the hash table [symbol]
-  @param  khval_t  Type of values [type]
- */
-#define KHASH_MAP_INIT_INT64(name, khval_t)                       \
-   KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
-
-typedef const char *kh_cstr_t;
-/*! @function
-  @abstract     Instantiate a hash map containing const char* keys
-  @param  name  Name of the hash table [symbol]
- */
-#define KHASH_SET_INIT_STR(name)                            \
-   KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
-
-/*! @function
-  @abstract     Instantiate a hash map containing const char* keys
-  @param  name  Name of the hash table [symbol]
-  @param  khval_t  Type of values [type]
- */
-#define KHASH_MAP_INIT_STR(name, khval_t)                      \
-   KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
-
-#endif /* __AC_KHASH_H */
diff --git a/contrib/lua-torch/decisiontree/math.lua b/contrib/lua-torch/decisiontree/math.lua
deleted file mode 100644
index eb71b31ed..000000000
--- a/contrib/lua-torch/decisiontree/math.lua
+++ /dev/null
@@ -1,84 +0,0 @@
-local dt = require "decisiontree._env"
-
-local PSEUDOCOUNT = 1.0
-local MIN_LOGISTIC = 1E-8
-local MAX_LOGISTIC = 1.0 - MIN_LOGISTIC
-
--- Create counts of possible results (last column of each row is the result)
-function dt.uniquecounts(counts, inputset, nclass)
-   counts = counts or inputset.input.new()
-   nclass = nclass or inputset.target:max()
-   counts:resize(nclass):zero()
-
-   inputset.target:apply(function(c) counts[c] = counts[c] + 1 end)
-   return counts
-end
-
--- Entropy is the sum of -p(x)log(p(x)) across all the different possible results
-local counts, logprobs
-function dt.entropy(inputset, nclass)
-   local dt = require 'decisiontree'
-   counts = dt.uniquecounts(counts, inputset, nclass)
-   -- convert counts to categorical probabilities
-   counts:add(0.0000001) -- prevent NaN
-   counts:div(counts:sum())
-
-   logprobs = logprobs or counts.new()
-   logprobs:resize(counts:size())
-   logprobs:log(counts):div(math.log(2)) -- log2(x)
-
-   counts:cmul(logprobs)
-
-   return -counts:sum()
-end
-
--- Compute and return the probability of positive label.
-function dt.probabilityPositive(nPositive, nTotal)
-   return (nPositive + PSEUDOCOUNT) / (nTotal + 2.0 * PSEUDOCOUNT);
-end
-
--- Ref. https://en.wikipedia.org/wiki/Logit
--- Calculates logit of the probability.
--- Logit represents the log-odds. Probabilities transformed to logit 'space' can be combined linearly.
-function dt.logit(p)
-   assert(p >= 0.0 and p <= 1.0, "Expecting probability for arg 1")
-   local truncatedP = math.max(MIN_LOGISTIC, math.min(MAX_LOGISTIC, p))
-   return math.log(truncatedP / (1.0 - truncatedP))
-end
-
-function dt.logistic(x)
-   return (x >= 0) and (1 / (1 + math.exp(-x))) or (1 - 1 / (1 + math.exp(x)))
-end
-
-function dt.computeGradientBoostLoss(gradient, hessian)
-   return -gradient * gradient / hessian
-end
-
-function dt.computeNewtonScore(gradient, hessian)
-   return -0.5 * gradient / hessian;
-end
-
--- Calculates the logit score for a Node in a Decision Tree based on the probability of a positive label.
--- params: number of positive examples and total number of examples.
-function dt.calculateLogitScore(nPositive, nTotal)
-   local dt = require 'decisiontree'
-   return dt.logit(dt.probabilityPositive(nPositive, nTotal))
-end
-
--- Compute and return the Gini impurity score based on an input contingency table.
-function dt.computeGini(leftCount, positiveLeftCount, rightCount, positiveRightCount)
-   assert(torch.type(leftCount) == 'number', 'Expecting total number examples falling into leftBranch.')
-   assert(torch.type(positiveLeftCount) == 'number', 'Expecting total number of positive examples falling into left branch.')
-   assert(torch.type(rightCount) == 'number', 'Expecting total number of examples falling into the right branch.')
-   assert(torch.type(positiveRightCount) == 'number', 'Expecting total number of positive examples falling into the right branch.')
-
-   local total = leftCount + rightCount
-
-   local pPositiveLeft = leftCount == 0 and 0 or (positiveLeftCount / leftCount)
-   local leftGini = pPositiveLeft * (1.0 - pPositiveLeft)
-
-   local pPositiveRight = rightCount == 0 and 0 or (positiveRightCount / rightCount)
-   local rightGini = pPositiveRight * (1.0 - pPositiveRight)
-
-   return (leftCount * leftGini + rightCount * rightGini) / total
-end
-\ No newline at end of file
diff --git a/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec b/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec
deleted file mode 100644
index d5d916275..000000000
--- a/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec
+++ /dev/null
@@ -1,40 +0,0 @@
-package = "decisiontree"
-version = "scm-1"
-
-source = {
-   url = "git://github.com/Twitter/decisiontree",
-   tag = "master"
-}
-
-description = {
-   summary = "Decision trees for Torch by Twitter",
-   detailed = [[
-   Classification and regression trees (CART).
-   Gradients boosted decision trees (GBDT).
-   ]],
-   homepage = "https://github.com/Twitter/decisiontree",
-   license = "BSD"
-}
-
-dependencies = {
-   "torch >= 7.0",
-   "moses >= 1.3.1",
-   "xlua >= 1.0",
-   "image >= 1.0",
-   "luafilesystem >= 1.6.2",
-   "sys >= 1.1",
-   "paths >= 1.0",
-   "ipc >= 1.0",
-   "nn >= 1.0"
-}
-
-build = {
-   type = "command",
-   build_command = [[
-cmake -E make_directory build;
-cd build;
-cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)" -DCMAKE_C_FLAGS=-fPIC -DCMAKE_CXX_FLAGS=-fPIC;
-$(MAKE)
-   ]],
-   install_command = "cd build && $(MAKE) install"
-}
diff --git a/contrib/lua-torch/decisiontree/test.lua b/contrib/lua-torch/decisiontree/test.lua
deleted file mode 100644
index 80510a4f6..000000000
--- a/contrib/lua-torch/decisiontree/test.lua
+++ /dev/null
@@ -1,817 +0,0 @@
-local dt = require "decisiontree._env"
-
-local dttest = {}
-local nloop = 50
-local epsilon = 0.000001
-local mytester
-
---e.g. usage: th -e "dt = require 'decisiontree'; dt.test()"
-
--- test 99% accuracy
-local function testAccuracy(cartTree, name, dataset, minacc)
-   assert(torch.isTypeOf(dataset, 'dt.DataSet'))
-   minacc = minacc or 0.99
-   local output = torch.Tensor(dataset:size())
-   local target, input = dataset.target, dataset.input
-
-   for i=1,dataset:size() do
-      local stack = {}
-      local score = cartTree:score(input[i], stack)
-      output[i] = score >= 0 and 1 or 0
-
-      if dt.VERBOSE and torch.type(cartTree) == 'dt.CartTree' and target[i] ~= output[i] then
-         print(cartTree:stackToString(stack, example.input))
-         print(i, score, target[i], output[i])
-      end
-   end
-
-   local accuracy = torch.eq(target, output):float():mean()
-   mytester:assert(accuracy >= minacc, name .. ": insufficient accuracy: " .. accuracy .. " < " .. minacc)
-end
-
-function dttest.SparseTensor()
-   local keys = torch.LongTensor{1,5,6,10}
-   local values = torch.randn(keys:size(1))
-   local st = torch.SparseTensor(keys, values)
-
-   mytester:assert(st[1] == values[1])
-   mytester:assert(st[5] == values[2])
-   mytester:assert(st[6] == values[3])
-   mytester:assert(st[10] == values[4])
-
-   mytester:assert(st[2] == nil)
-
-   st:buildIndex()
-
-   mytester:assert(st[1] == values[1])
-   mytester:assert(st[5] == values[2])
-   mytester:assert(st[6] == values[3])
-   mytester:assert(st[10] == values[4])
-
-   mytester:assert(st[2] == nil)
-
-   -- test empty sparse tensor
-
-   local est = torch.SparseTensor()
-end
-
-function dttest.GiniState()
-   local featureId = 2
-   local minLeafSize = 0
-
-   local input = torch.Tensor({{0,1,0},{0,2,0},{0,3,0}})
-   local target = torch.Tensor({1, 1, 1})
-   local dataset = dt.DataSet(input, target, 3)
-
-   local splitInfo1 = {_id=1}
-   local splitInfo2 = {_id=2, leftChildSize = 1, rightChildSize = 2, splitGain = 0}
-   local splitInfo3 = {_id=3, leftChildSize = 2, rightChildSize = 1, splitGain = -1}
-
-   local exampleIds = torch.LongTensor{1,2,3}
-
-   local treeState = dt.GiniState(exampleIds)
-
-   function treeState.computeSplitInfo(self, splitFeatureId, splitFeatureValue)
-      if splitFeatureId  == featureId and splitFeatureValue == 2 then
-         return splitInfo2
-      elseif splitFeatureId  == featureId and splitFeatureValue == 3 then
-         return splitInfo3
-      else
-         error("Unhandled computeSplitInfo call "..splitFeatureId.." "..splitFeatureValue)
-      end
-   end
-
-   local splitInfo = treeState:findBestFeatureSplit(dataset, featureId, minLeafSize)
-   mytester:assert(splitInfo._id == splitInfo3._id)
-end
-
-function dttest.CartTree()
-
-   local splitFeatureId = 100
-   local splitFeatureValue = 1.0
-
-   local function getBinaryCartTreeRootNode()
-
-      local leftNodeScore = 0.2
-      local rightNodeScore = 0.4
-
-      local rootNode = dt.CartNode()
-      rootNode.nodeId = 0
-      rootNode.score = 0.5
-      rootNode.splitFeatureId = splitFeatureId
-      rootNode.splitFeautreValue = splitFeatureValue
-
-      local leftChild = dt.CartNode()
-      leftChild.score = leftNodeScore
-      leftChild.nodeId = 1
-
-      local rightChild = dt.CartNode()
-      rightChild.score = rightNodeScore
-      rightChild.nodeId = 2
-
-      rootNode.leftChild = leftChild
-      rootNode.rightChild = rightChild
-
-      return rootNode
-   end
-
-   local function testScoreCartTreeBranchLeftIfMissing()
-      local rootNode = getBinaryCartTreeRootNode()
-
-      local cartTree = dt.CartTree(rootNode)
-
-      local continuousFeatures = torch.SparseTensor()
-
-      local score, nodeId = cartTree:score(continuousFeatures)
-
-      mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon)
-      mytester:assert(rootNode.leftChild.nodeId == nodeId)
-   end
-
-   local function testBranchRightWithFeature()
-      local rootNode = getBinaryCartTreeRootNode()
-
-      local cartTree = dt.CartTree(rootNode)
-
-      local continuousFeatures = torch.zeros(100)
-      continuousFeatures[splitFeatureId] = splitFeatureValue
-
-      local score, nodeId = cartTree:score(continuousFeatures)
-
-      mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon)
-      mytester:assert(rootNode.rightChild.nodeId == nodeId)
-   end
-
-   local function testMissingRightNode()
-      local rootNode = getBinaryCartTreeRootNode()
-
-      rootNode.rightChild = nil
-
-      local cartTree = dt.CartTree(rootNode)
-
-      local continuousFeatures = torch.Tensor()
-
-      local score, nodeId = cartTree:score(continuousFeatures)
-
-      mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon)
-      mytester:assert(rootNode.leftChild.nodeId == nodeId)
-   end
-
-   local function testMissingLeftNode()
-      local rootNode = getBinaryCartTreeRootNode()
-
-      rootNode.leftChild = nil
-
-      local cartTree = dt.CartTree(rootNode)
-
-      local continuousFeatures = torch.Tensor()
-
-      local score, nodeId = cartTree:score(continuousFeatures)
-
-      mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon)
-      mytester:assert(rootNode.rightChild.nodeId == nodeId)
-   end
-
-   local function testMissingAllChildren()
-      local rootNode = getBinaryCartTreeRootNode()
-
-      rootNode.leftChild = nil
-      rootNode.rightChild = nil
-
-      local cartTree = dt.CartTree(rootNode)
-
-      local continuousFeatures = torch.Tensor()
-
-      local score, nodeId = cartTree:score(continuousFeatures)
-
-      mytester:assert(math.abs(rootNode.score - score) < epsilon)
-      mytester:assert(rootNode.nodeId == nodeId)
-   end
-
-   local function testScoreCartTreeBranchRandomlyRight()
-      local rootNode = getBinaryCartTreeRootNode();
-
-      -- Force Branch Right
-      local cartTree = dt.CartTree(rootNode, function() return false end);
-
-      local continuousFeatures = torch.SparseTensor()
-
-      local score, nodeId = cartTree:score(continuousFeatures)
-
-      mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon)
-      mytester:assert(rootNode.rightChild.nodeId == nodeId)
-   end
-
-   local function testScoreCartTreeBranchRandomlyLeft()
-      local rootNode = getBinaryCartTreeRootNode();
-
-      -- Force Branch Left
-      local cartTree = dt.CartTree(rootNode, function() return true end);
-
-      local continuousFeatures = torch.SparseTensor()
-
-      local score, nodeId = cartTree:score(continuousFeatures)
-
-      mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon)
-      mytester:assert(rootNode.leftChild.nodeId == nodeId)
-   end
-
-   testScoreCartTreeBranchLeftIfMissing()
-   testBranchRightWithFeature()
-   testMissingRightNode()
-   testMissingLeftNode()
-   testMissingAllChildren()
-   testScoreCartTreeBranchRandomlyRight()
-   testScoreCartTreeBranchRandomlyLeft()
-
-end
-
-function dttest.TreeState_branch()
-   local _ = require 'moses'
-   local binFeatureId = 1
-   local featureId = 2
-
-   local input = {
-      torch.SparseTensor(torch.LongTensor{binFeatureId},torch.Tensor{1}),
-      torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{1,1}),
-      torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{0,2}),
-      torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{0,3})
-   }
-   local target = torch.LongTensor(4):fill(1)
-
-   local dataset = dt.DataSet(input, target)
-
-   local treeState = dt.TreeState(torch.LongTensor():range(1,4))
-   local splitInfo = {splitId = binFeatureId, splitValue = 1}
-
-   local function testBranchBinaryFeature()
-      splitInfo = {splitId = binFeatureId, splitValue = 1}
-      local leftBranch, rightBranch = treeState:branch(splitInfo, dataset)
-      mytester:assert(leftBranch ~= nil and rightBranch ~= nil)
-
-      mytester:assert(2 == leftBranch:size())
-      mytester:assert(leftBranch:contains(3))
-      mytester:assert(leftBranch:contains(4))
-
-      mytester:assert(2 == rightBranch:size())
-      mytester:assert(rightBranch:contains(1))
-      mytester:assert(rightBranch:contains(2))
-   end
-
-   local function testBranchContinuousFeature()
-      local splitValue = 2
-      splitInfo = {splitId = featureId, splitValue = splitValue}
-
-      local leftBranch, rightBranch = treeState:branch(splitInfo, dataset)
-      mytester:assert(leftBranch ~= nil and rightBranch ~= nil)
-
-      mytester:assert(1 == leftBranch:size())
-      mytester:assert(leftBranch:contains(2))
-
-      mytester:assert(2 == rightBranch:size())
-      mytester:assert(rightBranch:contains(3))
-      mytester:assert(rightBranch:contains(4))
-   end
-
-   testBranchBinaryFeature()
-   testBranchContinuousFeature()
-
-end
-
-function dttest.DecisionForest()
-   -- Create test decision forest, each forest has only a single node, and returns score == score of root node.
-
-   local function createCartTreeWithSingleNode(score)
-      local cartNode = dt.CartNode()
-      cartNode.score = score
-      return dt.CartTree(cartNode)
-   end
-
-   local function getTestDecisionForest()
-      local cartTrees = {
-         createCartTreeWithSingleNode(1),
-         createCartTreeWithSingleNode(2),
-         createCartTreeWithSingleNode(3)
-      }
-      local weight = torch.Tensor{10,20,30}
-      local bias = 0.5
-
-      return dt.DecisionForest(cartTrees, weight, bias)
-   end
-
-   local function testScoreDecisionForest()
-      local df = getTestDecisionForest()
-      local continuousFeatures = torch.SparseTensor()
-
-      local expectedResult = 1.0 * 10.0 + 2.0 * 20.0 + 3.0 * 30.0 + 0.5;
-      local result = df:score(continuousFeatures)
-
-      mytester:assert(math.abs(expectedResult - result) < epsilon)
-   end
-
-   testScoreDecisionForest()
-end
-
-function dttest.CartTrainer()
-   local minLeafSize, maxLeafNodes = 1, 1000
-   local nExample = 100
-
-   -- 1. dense dataset
-   local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample)
-
-   -- assert that the dataset is valid
-   for clusterId, exampleIds in ipairs(clusterExamples) do
-      local exampleIdx = torch.LongTensor(exampleIds)
-      local input = inputs:index(1,exampleIdx)
-      local target = targets:index(1,exampleIdx)
-      assert(input:std(1):mean() < 0.05)
-   end
-
-   local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes)
-   local treeState = dt.GiniState(trainSet:getExampleIds())
-   local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
-
-   mytester:assert(nleaf == nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true
-   testAccuracy(cartTree, "dense train single-thread first", trainSet, 0.99)
-   testAccuracy(cartTree, "dense valid single-thread first", validSet, 0.7) -- they don't generalize very well..
-
-   local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
-   testAccuracy(cartTree, "dense single-thread second", trainSet)
-
-   -- test feature parallelization
-   local nThread = 2
-   cartTrainer:featureParallel(nThread)
-   local treeState = dt.GiniState(trainSet:getExampleIds())
-   local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
-   testAccuracy(cartTree, "dense feature-parallel", trainSet)
-
-   -- 2. sparse-dense dataset
-   local trainSet, validSet, clusterExamples, inputs, targets = dt.getSparseDummyData(nExample, nil, 10, nil, nil, 10)
-
-   -- assert that the dataset is valid
-   for clusterId, exampleIds in ipairs(clusterExamples) do
-      local input = torch.Tensor(#exampleIds, 10):zero()
-      for i, exampleId in ipairs(exampleIds) do
-         input[i]:indexCopy(1, inputs[exampleId].keys, inputs[exampleId].values)
-      end
-      assert(input:std(1):mean() < 0.05)
-   end
-
-   local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes)
-   local treeState = dt.GiniState(trainSet:getExampleIds())
-   local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
-
-   mytester:assert(nleaf == nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true
-   testAccuracy(cartTree, "sparse-dense train single-thread first", trainSet, 0.99)
-
-   local shuffle = torch.LongTensor():randperm(10)
-   for i, input in ipairs(inputs) do
-      input.keys = input.keys:index(1, shuffle)
-      input.values = input.values:index(1, shuffle)
-      input._map = nil
-   end
-   testAccuracy(cartTree, "sparse-dense shuffled keys train single-thread first", trainSet, 0.99)
-   testAccuracy(cartTree, "sparse-dense valid single-thread first", validSet, 0.8)
-
-   -- 3. sparse dataset
-   local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9)
-
-   local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes)
-   local treeState = dt.GiniState(trainSet:getExampleIds())
-   local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
-   cartTree.branchleft = function() return true end
-
-   mytester:assert(nleaf < nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true
-   testAccuracy(cartTree, "sparse train single-thread first", trainSet, 0.9) -- the TreeBrancher drops examples with missing features, making it difficult to overfit
-   testAccuracy(cartTree, "sparse valid single-thread first", validSet, 0.8)
-end
-
-function dttest.GradientBoostTrainer()
-   local nExample = 100
-   local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9)
-
-   local maxLeafNode, minLeafSize = nExample/2, nExample/10
-   local loss = nn.LogitBoostCriterion(false)
-
-   local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNode)
-
-   local opt = {
-      lossFunction=loss,
-      treeTrainer=cartTrainer,
-      shrinkage=0.1,
-      downsampleRatio=6,
-      featureBaggingSize=-1,
-      nTree=14,
-      evalFreq=8,
-      earlyStop=0 -- no early-stopping
-   }
-
-   -- test single-thread
-   local trainer = dt.GradientBoostTrainer(opt)
-   local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet)
-
-   mytester:assert(#decisionForest.trees == opt.nTree)
-   testAccuracy(decisionForest, "sparse train single-thread first", trainSet, 0.98)
-   testAccuracy(decisionForest, "sparse valid single-thread first", validSet, 0.95)
-
-   -- test stateless
-   local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet)
-
-   mytester:assert(#decisionForest.trees == opt.nTree)
-   testAccuracy(decisionForest, "sparse train single-thread second", trainSet, 0.98)
-   testAccuracy(decisionForest, "sparse valid single-thread second", validSet, 0.95)
-
-   -- test feature-parallel
-   local nThread = 2
-   cartTrainer:featureParallel(nThread)
-
-   local trainer = dt.GradientBoostTrainer(opt)
-   local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet)
-
-   mytester:assert(#decisionForest.trees == opt.nTree)
-   testAccuracy(decisionForest, "sparse train feature-parallel first", trainSet, 0.98)
-   testAccuracy(decisionForest, "sparse valid feature-parallel first", validSet, 0.95)
-
-end
-
-function dttest.RandomForestTrainer()
-   local nExample = 100
-   local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9)
-
-   local opt = {
-      activeRatio=0.5,
-      featureBaggingSize=5,
-      nTree=14,
-      maxLeafNodes=nExample/2,
-      minLeafSize=nExample/10,
-   }
-
-   local trainer = dt.RandomForestTrainer(opt)
-
-   local decisionForest = trainer:train(trainSet, trainSet.featureIds)
-   mytester:assert(#decisionForest.trees == opt.nTree)
-   testAccuracy(decisionForest, "sparse train single-thread first", trainSet, 0.98)
-   testAccuracy(decisionForest, "sparse valid single-thread first", validSet, 0.95)
-
-   -- test stateless
-   local decisionForest = trainer:train(trainSet, trainSet.featureIds)
-
-   mytester:assert(#decisionForest.trees == opt.nTree)
-   testAccuracy(decisionForest, "sparse train single-thread second", trainSet, 0.98)
-   testAccuracy(decisionForest, "sparse valid single-thread second", validSet, 0.95)
-
-   -- test tree-parallel
-   local nThread = 2
-   trainer:treeParallel(nThread)
-
-   local trainer = dt.RandomForestTrainer(opt)
-   local decisionForest = trainer:train(trainSet, trainSet.featureIds)
-
-   mytester:assert(#decisionForest.trees == opt.nTree)
-   testAccuracy(decisionForest, "sparse train tree-parallel first", trainSet, 0.98)
-   testAccuracy(decisionForest, "sparse valid tree-parallel first", validSet, 0.95)
-end
-
-function dttest.WorkPool()
-   local nThread = 2
-   local wp = dt.WorkPool(nThread)
-
-   -- 1. some easy tests
-   local store = {key='nick',value=7}
-   wp:update('storeKeyValue', store)
-
-   wp:update('require', {libname='decisiontree', varname='dt'})
-
-   local bias = 2
-   local obj = nn.MSECriterion()
-   wp:update('require', {libname='decisiontree', varname='dt'})
-   wp:writeup('execute', function(store) return bias + obj:updateOutput(torch.Tensor{1},torch.Tensor{1}) + store.nick end)
-
-   local taskname, res = wp:read()
-   mytester:assert(taskname == 'execute')
-   mytester:assert(res == 9)
-
-   -- 2. trying to reproduce a difficult error
-   local trainSet, validSet = dt.getSparseDummyData()
-
-   -- setup worker store (each worker will have its own copy)
-   local store = {
-      trainSet=trainSet,
-      minLeafSize=2
-   }
-   wp:update('storeKeysValues', store)
-
-   -- arguments/upvalues
-   local treeState = dt.GiniState(trainSet:getExampleIds())
-   local shardId = 1
-   local nShard = nThread
-   local featureIds = trainSet.featureIds
-
-   local task = function(store, args)
-      assert(store.trainSet)
-      assert(store.minLeafSize)
-
-      local bestSplit = args.treeState:findBestSplit(store.trainSet, args.featureIds, store.minLeafSize, args.shardId, args.nShard)
-      return bestSplit
-   end
-   local args = {treeState=treeState,featureIds=featureIds,shardId=shardId,nShard=nShard}
-   wp:writeup("execute", {func=task,args=args})
-
-   local taskname, bestSplit = wp:read()
-   mytester:assert(taskname == 'execute')
-   mytester:assert(torch.type(bestSplit) == 'table')
-
-   -- closure
-   local task = function(store)
-      assert(store.trainSet)
-      assert(store.minLeafSize)
-
-      local bestSplit = treeState:findBestSplit(store.trainSet, featureIds, store.minLeafSize, shardId, nShard)
-      return bestSplit
-   end
-   wp:writeup("execute", task)
-
-   local taskname, bestSplit = wp:read()
-   mytester:assert(taskname == 'execute')
-   mytester:assert(torch.type(bestSplit) == 'table')
-
-   local task = function(store, args)
-      assert(store.trainSet)
-      assert(torch.isTypeOf(treeState, 'dt.TreeState'), torch.type(treeState))
-
-      local bestSplit = treeState:findBestSplit(store.trainSet, featureIds, store.minLeafSize, shardId, nShard)
-      return bestSplit
-   end
-   local args = {featureIds=featureIds,shardId=shardId,nShard=nShard}
-   wp:writeup("execute", {func=task,args=args})
-
-
-   local taskname, bestSplit = wp:read()
-   mytester:assert(taskname == 'execute')
-   mytester:assert(torch.type(bestSplit) == 'table')
-
-   wp:terminate()
-end
-
-function dttest.Sparse2Dense()
-   local batchsize = 4
-   local minFeatureId, maxFeatureId = 10, 100
-   local input = {{},{}}
-   for i=1,batchsize do
-      local inputsize = math.random(5,10)
-      input[1][i] = torch.LongTensor(inputsize):random(minFeatureId,maxFeatureId)
-      input[2][i] = torch.Tensor(inputsize):uniform(0,1)
-   end
-   local s2d = nn.Sparse2Dense(torch.LongTensor():range(minFeatureId,maxFeatureId))
-   -- test 2d forward
-   local output = s2d:forward(input)
-   local output2 = torch.Tensor(batchsize, maxFeatureId-minFeatureId+1):zero()
-   local featureMap = {}
-   local j = 0
-   for i=minFeatureId,maxFeatureId do
-      j = j + 1
-      featureMap[i] = j
-   end
-   for i=1,batchsize do
-      local keys, values = input[1][i], input[2][i]
-      for j=1,keys:size(1) do
-         output2[{i,featureMap[keys[j] ]}] = values[j]
-      end
-   end
-   mytester:assertTensorEq(output, output2, 0.000001)
-   -- test 1d forward
-   local input = {input[1][batchsize], input[2][batchsize]}
-   local output = s2d:forward(input)
-   mytester:assertTensorEq(output, output2[batchsize], 0.000001)
-end
-
-function dttest.Sparse2DenseDouble()
-   local batchsize = 4
-   local minFeatureId, maxFeatureId = 10, 100
-   local input = {{},{}}
-   for i=1,batchsize do
-      local inputsize = math.random(5,10)
-      input[1][i] = torch.LongTensor(inputsize):random(minFeatureId,maxFeatureId)
-      input[2][i] = torch.Tensor(inputsize):uniform(0,1):double()
-   end
-   local s2d = nn.Sparse2Dense(torch.LongTensor():range(minFeatureId,maxFeatureId))
-   s2d:double()
-   -- test 2d forward
-   local output = s2d:forward(input)
-   local output2 = torch.Tensor(batchsize, maxFeatureId-minFeatureId+1):zero():double()
-   local featureMap = {}
-   local j = 0
-   for i=minFeatureId,maxFeatureId do
-      j = j + 1
-      featureMap[i] = j
-   end
-   for i=1,batchsize do
-      local keys, values = input[1][i], input[2][i]
-      for j=1,keys:size(1) do
-         output2[{i,featureMap[keys[j] ]}] = values[j]
-      end
-   end
-   mytester:assertTensorEq(output, output2, 0.000001)
-   -- test 1d forward
-   local input = {input[1][batchsize], input[2][batchsize]}
-   local output = s2d:forward(input)
-   mytester:assertTensorEq(output, output2[batchsize], 0.000001)
-end
-
-function dttest.LogitBoostCriterion()
-   local input = torch.randn(10)
-   local target = torch.LongTensor(10):random(0,1):type(torch.type(input))
-
-   local lb = nn.LogitBoostCriterion(false)
-   local loss = lb:updateOutput(input, target)
-
-   local loss2 = 0
-   for i=1,10 do
-      loss2 = loss2 + math.log(1 + math.exp(target[i] <= 0 and input[i] or -input[i]))
-   end
-   mytester:assert(math.abs(loss - loss2) < 0.00001)
-
-   local gradInput = lb:updateGradInput(input, target)
-   local gradInput2 = gradInput:clone():zero()
-   for i=1,10 do
-      local p = dt.logistic(input[i])
-      gradInput2[i] = (target[i] <= 0) and p or (p - 1)
-   end
-   mytester:assertTensorEq(gradInput, gradInput2, 0.000001)
-
-   local hessInput = lb:updateHessInput(input, target)
-   local hessInput2 = hessInput:clone():zero()
-   for i=1,10 do
-      local p = dt.logistic(input[i])
-      hessInput2[i] = p * (1.0 - p)
-   end
-   mytester:assertTensorEq(hessInput, hessInput2, 0.000001)
-end
-
-function dttest.DFD()
-   local nExample = 100
-   local batchsize = 4
-   local inputsize = 10
-
-   -- train Random Forest
-   local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample, nil, inputsize)
-   local opt = {
-      activeRatio=0.5,
-      featureBaggingSize=5,
-      nTree=4,
-      maxLeafNodes=nExample/2,
-      minLeafSize=nExample/10,
-   }
-   local trainer = dt.RandomForestTrainer(opt)
-   local df = trainer:train(trainSet, trainSet.featureIds)
-   mytester:assert(#df.trees == opt.nTree)
-
-   local dfd = nn.DFD(df)
-   dfd = nn.DFD(dfd:getReconstructionInfo())
-   local dfd2 = nn.DFD(dfd:getReconstructionInfo(), true)
-   local input = validSet.input:sub(1,batchsize)
-   local output = dfd:forward(input)
-   local output2 = dfd2:forward(input)
-
-   local _ = require 'moses'
-
-   local function hasKey(keys,key)
-      local found = false
-      keys:apply(function(x)
-         if x == key then
-            found = true
-         end
-      end)
-      return found
-   end
-
-   for i=1,batchsize do
-      local nodes = {}
-      local keys = output[1][i]
-      local keys2 = output2[1][i]
-      for j,tree in ipairs(df.trees) do
-         local stack = {}
-         tree:score(input[i], stack)
-         mytester:assert(hasKey(keys2, stack[#stack]._nodeId))
-
-         for k,node in ipairs(stack) do
-            if k > 1 then
-               assert(node._nodeId)
-               mytester:assert(hasKey(keys, node._nodeId), string.format("missing key=%d in %s", node._nodeId, tostring(keys)))
-               table.insert(nodes, node._nodeId)
-            end
-         end
-      end
-      mytester:assert(#nodes == keys:size(1))
-      mytester:assert(#df.trees == keys2:size(1))
-   end
-end
-
-function dttest.DFDDouble()
-   local nExample = 100
-   local batchsize = 4
-   local inputsize = 10
-
-   -- train Random Forest
-   local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample, nil, inputsize)
-   local opt = {
-      activeRatio=0.5,
-      featureBaggingSize=5,
-      nTree=4,
-      maxLeafNodes=nExample/2,
-      minLeafSize=nExample/10,
-   }
-   local trainer = dt.RandomForestTrainer(opt)
-   local df = trainer:train(trainSet, trainSet.featureIds)
-   mytester:assert(#df.trees == opt.nTree)
-
-   local dfd = nn.DFD(df)
-   dfd:double()
-   dfd = nn.DFD(dfd:getReconstructionInfo())
-   local dfd2 = nn.DFD(dfd:getReconstructionInfo(), true)
-   local input = validSet.input:sub(1,batchsize):double()
-   local output = dfd:forward(input)
-   local output2 = dfd2:forward(input)
-
-   local _ = require 'moses'
-
-   local function hasKey(keys,key)
-      local found = false
-      keys:apply(function(x)
-         if x == key then
-            found = true
-         end
-      end)
-      return found
-   end
-
-   for i=1,batchsize do
-      local nodes = {}
-      local keys = output[1][i]
-      local keys2 = output2[1][i]
-      for j,tree in ipairs(df.trees) do
-         local stack = {}
-         tree:score(input[i], stack)
-         mytester:assert(hasKey(keys2, stack[#stack]._nodeId))
-
-         for k,node in ipairs(stack) do
-            if k > 1 then
-               assert(node._nodeId)
-               mytester:assert(hasKey(keys, node._nodeId), string.format("missing key=%d in %s", node._nodeId, tostring(keys)))
-               table.insert(nodes, node._nodeId)
-            end
-         end
-      end
-      mytester:assert(#nodes == keys:size(1))
-      mytester:assert(#df.trees == keys2:size(1))
-   end
-end
-
-function dttest.uniquecounts() -- DEPRECATED
-   local target = torch.LongTensor(100):random(1,3)
-   local input = torch.Tensor()
-   local inputset = {input=input, target=target}
-
-   local counts = dt.uniquecounts(nil, inputset, 3)
-
-   mytester:assert(counts:sum() == 100)
-   mytester:assert(counts:nElement() == 3)
-
-   local res = torch.Tensor(3):zero()
-   target:apply(function(t) res[t] = res[t] + 1 end)
-
-   mytester:assertTensorEq(counts, res)
-end
-
-function dttest.entropy() -- DEPRECATED
-   -- 2 clusters with a bit overlap between classes:
-   local input = torch.Tensor(100,2)
-   input:narrow(1,1,50):normal(-1,.01)
-   input:narrow(1,51,50):normal(2,.01)
-
-   local target = torch.LongTensor(100):fill(3)
-   target:narrow(1,1,45):fill(1)
-   target:narrow(1,56,45):fill(2)
-
-   local inputset = {input=input, target=target}
-
-   -- test entropy()
-   local fullent = dt.entropy(inputset)
-
-   local halfset = {input=input:narrow(1,1,50), target=target:narrow(1,1,50)}
-   local halfent = dt.entropy(halfset)
-
-   local perfectset = {input=input:narrow(1,56,45), target=target:narrow(1,56,45)}
-   local perfectent = dt.entropy(perfectset)
-
-   mytester:assert(fullent > halfent)
-   mytester:assert(halfent > perfectent)
-   mytester:assert(perfectent < 0.0000001 and perfectent >= 0)
-end
-
-function dt.test(tests)
-   math.randomseed(os.time())
-   mytester = torch.Tester()
-   mytester:add(dttest)
-   mytester:run(tests)
-end
diff --git a/contrib/lua-torch/decisiontree/utils.h b/contrib/lua-torch/decisiontree/utils.h
deleted file mode 100644
index 8a0196a58..000000000
--- a/contrib/lua-torch/decisiontree/utils.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "error.h"
-
-#define check_tensors(L, a, b) \
-   do { \
-      if ((a)->nDimension != (b)->nDimension) \
-         return LUA_HANDLE_ERROR_STR((L), "different tensor dimensions"); \
-      for (int __local__var = 0; __local__var < (a)->nDimension; __local__var++) \
-         if ((a)->size[__local__var] != (b)->size[__local__var]) \
-            return LUA_HANDLE_ERROR_STR((L), "different tensor sizes"); \
-   } while (0)
-
-#define check_tensor(L, t, type) \
-   do { \
-      if (!type##_isContiguous(t)) \
-         return LUA_HANDLE_ERROR_STR((L), "tensor should be contiguous"); \
-   } while (0)
-
-#define get_tensor_size(t, type)                \
-    (TH##type##Tensor_nElement(t))
-
-#define get_tensor(L, idx, type)                                        \
-    (TH##type##Tensor *)luaT_checkudata(L, idx, "torch." #type "Tensor")
-
-static int push_table_contents(lua_State *L, int arg)
-{
-    int size = 0;
-    while(1) {
-        lua_checkstack(L, 1);
-        lua_rawgeti(L, arg, size + 1);
-        if (lua_isnil(L, -1)) {
-            lua_pop(L, 1);
-            break;
-        }
-        size++;
-    }
-    return size;
-}
-
-#define verify_push_table_contents(L, idx, count)  do {             \
-        int __tmp_count = push_table_contents(L, idx);              \
-        if (__tmp_count != count) {                                 \
-            lua_pop(L, __tmp_count);                                \
-            LUA_HANDLE_ERROR_STR(L, "Table sizes do not match");    \
-        }                                                           \
-    } while(0)
diff --git a/contrib/lua-torch/decisiontree/utils.lua b/contrib/lua-torch/decisiontree/utils.lua
deleted file mode 100644
index c32c3d08b..000000000
--- a/contrib/lua-torch/decisiontree/utils.lua
+++ /dev/null
@@ -1,125 +0,0 @@
-local dt = require "decisiontree._env"
-
--- returns a buffer table local to a thread (no serialized)
-function dt.getBufferTable(name)
-   local dt = require 'decisiontree'
-   assert(torch.type(name) == 'string')
-   dt.buffer = dt.buffer or {}
-   dt.buffer[name] = dt.buffer[name] or {}
-   return dt.buffer[name]
-end
-
-function dt.getSparseDummyData(nExample, nCluster, nFeature, overlap, nValid, nActive)
-   local dt = require 'decisiontree'
-   if torch.type(nExample) == 'table' then
-      local opt = nExample
-      nExample = opt.nExample
-      nCluster = opt.nCluster
-      nFeature = opt.nFeature
-      overlap = opt.overlap
-      nValid = opt.nValid
-      nActive = opt.nActive
-   end
-   nExample = nExample or 100 -- training set size
-   nCluster = nCluster or 10
-   assert(nCluster >= 2)
-   nFeature = math.max(2, nFeature or 10)
-   overlap = overlap or 0
-   nValid = nValid or nExample/10 -- validation set size
-   nActive = nActive or math.max(2, nFeature / 2)
-
-   -- sample nCluster centers
-   local clusterCenter = torch.rand(nCluster, nFeature)
-   local clusterLabel = torch.LongTensor(nCluster)
-   local clusterExamples = {}
-   for i=1,nCluster do
-      clusterCenter[i]:add(i)
-      clusterLabel[i] = i % 2
-      clusterExamples[i] = {}
-   end
-
-   local sparseCenter = torch.Tensor()
-
-   local shuffle = torch.LongTensor()
-
-   -- build dataset in pseudo-dense format
-   local inputs = {}
-   local targets = torch.Tensor(nExample+nValid)
-   for i=1,nExample+nValid do
-      local clusterIdx = torch.random(1,nCluster)
-      table.insert(clusterExamples[clusterIdx], i)
-
-      shuffle:randperm(nFeature)
-      local keys = torch.LongTensor(nActive):copy(shuffle:narrow(1,1,nActive))
-      sparseCenter:index(clusterCenter[clusterIdx], 1, keys)
-      local stdiv = i <= nExample and 100 or 1000
-      local values = torch.randn(nActive):div(stdiv):add(sparseCenter)
-
-      table.insert(inputs, torch.SparseTensor(keys, values))
-
-      local label = clusterLabel[clusterIdx]
-      if math.random() < overlap then
-         targets[i] = label == 1 and 0 or 1
-      else
-         targets[i] = label
-      end
-   end
-
-   local _ = require 'moses'
-   local validSet = dt.DataSet(_.slice(inputs, nExample+1, nExample+nValid), targets:narrow(1,nExample+1,nValid))
-   local trainSet = dt.DataSet(_.slice(inputs, 1, nExample), targets:narrow(1,1,nExample))
-
-   return trainSet, validSet, clusterExamples, inputs, targets
-end
-
-function dt.getDenseDummyData(nExample, nCluster, nFeature, overlap, nValid)
-   local dt = require 'decisiontree'
-   if torch.type(nExample) == 'table' then
-      local opt = nExample
-      nExample = opt.nExample
-      nCluster = opt.nCluster
-      nFeature = opt.nFeature
-      overlap = opt.overlap
-      nValid = opt.nValid
-   end
-   nExample = nExample or 100 -- training set size
-   nCluster = nCluster or 10
-   assert(nCluster >= 2)
-   nFeature = math.max(2, nFeature or 10)
-   overlap = overlap or 0
-   nValid = nValid or nExample/10 -- validation set size
-
-   -- sample nCluster centers
-   local clusterCenter = torch.rand(nCluster, nFeature)
-   local clusterLabel = torch.LongTensor(nCluster)
-   local clusterExamples = {}
-   for i=1,nCluster do
-      clusterCenter[i]:add(i)
-      clusterLabel[i] = i % 2
-      clusterExamples[i] = {}
-   end
-
-   -- build dataset in pseudo-dense format
-   local inputs = torch.Tensor(nExample+nValid, nFeature)
-   local targets = torch.Tensor(nExample+nValid)
-   for i=1,nExample+nValid do
-      local clusterIdx = torch.random(1,nCluster)
-      table.insert(clusterExamples[clusterIdx], i)
-
-      local stdiv = i <= nExample and 100 or 1000
-      inputs[i]:normal():div(stdiv):add(clusterCenter[clusterIdx])
-
-      local label = clusterLabel[clusterIdx]
-      if math.random() < overlap then
-         targets[i] = label == 1 and 0 or 1
-      else
-         targets[i] = label
-      end
-   end
-
-   local _ = require 'moses'
-   local validSet = dt.DataSet(inputs:narrow(1,nExample+1,nValid), targets:narrow(1,nExample+1,nValid))
-   local trainSet = dt.DataSet(inputs:narrow(1,1,nExample), targets:narrow(1,1,nExample))
-
-   return trainSet, validSet, clusterExamples, inputs, targets
-end
author	Vsevolod Stakhov <vsevolod@highsecure.ru>	2019-07-01 15:13:04 +0100
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>	2019-07-01 15:13:04 +0100
commit	891b250b452f8e1963a99931f241ac75e34d0281 (patch)
tree	ab56b822aca3cc6d02a3c9afbe8ca2f6d1c0381f /contrib/lua-torch/decisiontree
parent	38691d998d019ac0fba95720c337e3f9badf55c4 (diff)
download	rspamd-891b250b452f8e1963a99931f241ac75e34d0281.tar.gz rspamd-891b250b452f8e1963a99931f241ac75e34d0281.zip