aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/lua-torch/decisiontree
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-05-23 18:14:15 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-05-23 18:14:15 +0100
commit714eb56e1760fdfb26afccde92664d3a2f1e8435 (patch)
tree84d1399acbb92f852b4bd64f9ea5412680b0c6ab /contrib/lua-torch/decisiontree
parent220a51ff68013dd668a45b78c60a7b8bfc10f074 (diff)
downloadrspamd-714eb56e1760fdfb26afccde92664d3a2f1e8435.tar.gz
rspamd-714eb56e1760fdfb26afccde92664d3a2f1e8435.zip
[Minor] Move lua contrib libraries to lua- prefix
Diffstat (limited to 'contrib/lua-torch/decisiontree')
-rw-r--r--contrib/lua-torch/decisiontree/CMakeLists.txt54
-rw-r--r--contrib/lua-torch/decisiontree/CartNode.lua42
-rw-r--r--contrib/lua-torch/decisiontree/CartTrainer.lua180
-rw-r--r--contrib/lua-torch/decisiontree/CartTree.lua90
-rw-r--r--contrib/lua-torch/decisiontree/DFD.lua182
-rw-r--r--contrib/lua-torch/decisiontree/DataSet.lua142
-rw-r--r--contrib/lua-torch/decisiontree/DecisionForest.lua82
-rw-r--r--contrib/lua-torch/decisiontree/DecisionForestTrainer.lua22
-rw-r--r--contrib/lua-torch/decisiontree/DecisionTree.lua12
-rw-r--r--contrib/lua-torch/decisiontree/GBDT_common.h106
-rw-r--r--contrib/lua-torch/decisiontree/GiniState.lua54
-rw-r--r--contrib/lua-torch/decisiontree/GradientBoostState.lua57
-rw-r--r--contrib/lua-torch/decisiontree/GradientBoostTrainer.lua244
-rw-r--r--contrib/lua-torch/decisiontree/LICENSE201
-rw-r--r--contrib/lua-torch/decisiontree/LogitBoostCriterion.lua45
-rw-r--r--contrib/lua-torch/decisiontree/MSECriterion.lua13
-rw-r--r--contrib/lua-torch/decisiontree/README.md386
-rw-r--r--contrib/lua-torch/decisiontree/RandomForestTrainer.lua159
-rw-r--r--contrib/lua-torch/decisiontree/Sparse2Dense.lua88
-rw-r--r--contrib/lua-torch/decisiontree/SparseTensor.lua54
-rw-r--r--contrib/lua-torch/decisiontree/TreeState.lua191
-rw-r--r--contrib/lua-torch/decisiontree/WorkPool.lua156
-rw-r--r--contrib/lua-torch/decisiontree/_env.lua5
-rw-r--r--contrib/lua-torch/decisiontree/benchmark.lua171
-rw-r--r--contrib/lua-torch/decisiontree/doc/benchmark.md291
-rw-r--r--contrib/lua-torch/decisiontree/error.h24
-rw-r--r--contrib/lua-torch/decisiontree/generic/CartTree.c88
-rw-r--r--contrib/lua-torch/decisiontree/generic/DFD.c157
-rw-r--r--contrib/lua-torch/decisiontree/generic/GBDT.c392
-rw-r--r--contrib/lua-torch/decisiontree/generic/GBDT_internal.c312
-rw-r--r--contrib/lua-torch/decisiontree/generic/GBDT_internal.h34
-rw-r--r--contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c90
-rw-r--r--contrib/lua-torch/decisiontree/generic/S2D.c90
-rw-r--r--contrib/lua-torch/decisiontree/hash_map.c445
-rw-r--r--contrib/lua-torch/decisiontree/hash_map.h36
-rw-r--r--contrib/lua-torch/decisiontree/init.c77
-rw-r--r--contrib/lua-torch/decisiontree/init.lua70
-rw-r--r--contrib/lua-torch/decisiontree/internal_hash_map.h13
-rw-r--r--contrib/lua-torch/decisiontree/khash.h627
-rw-r--r--contrib/lua-torch/decisiontree/math.lua84
-rw-r--r--contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec40
-rw-r--r--contrib/lua-torch/decisiontree/test.lua817
-rw-r--r--contrib/lua-torch/decisiontree/utils.h45
-rw-r--r--contrib/lua-torch/decisiontree/utils.lua125
44 files changed, 6593 insertions, 0 deletions
diff --git a/contrib/lua-torch/decisiontree/CMakeLists.txt b/contrib/lua-torch/decisiontree/CMakeLists.txt
new file mode 100644
index 000000000..6434eddd9
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/CMakeLists.txt
@@ -0,0 +1,54 @@
+SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
+CMAKE_POLICY(VERSION 2.6)
+
+SET(src
+ init.c
+ hash_map.c
+)
+SET(luasrc
+ _env.lua
+ benchmark.lua
+ CartNode.lua
+ CartTrainer.lua
+ CartTree.lua
+ DataSet.lua
+ DecisionForest.lua
+ DecisionForestTrainer.lua
+ DecisionTree.lua
+ DFD.lua
+ GiniState.lua
+ GradientBoostState.lua
+ GradientBoostTrainer.lua
+ init.lua
+ LogitBoostCriterion.lua
+ math.lua
+ MSECriterion.lua
+ RandomForestTrainer.lua
+ Sparse2Dense.lua
+ SparseTensor.lua
+ test.lua
+ TreeState.lua
+ utils.lua
+ WorkPool.lua
+)
+
+IF (WITH_OPENMP)
+ FIND_PACKAGE(OpenMP)
+ IF(OPENMP_FOUND)
+ MESSAGE(STATUS "Compiling with OpenMP support")
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+ SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+ ENDIF(OPENMP_FOUND)
+ENDIF (WITH_OPENMP)
+
+ADD_TORCH_PACKAGE(decisiontree "${src}" "${luasrc}" "A decision tree library, for Torch")
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+### Torch packages supposes libraries prefix is "lib"
+SET_TARGET_PROPERTIES(decisiontree PROPERTIES
+ PREFIX "lib"
+ IMPORT_PREFIX "lib")
+TARGET_LINK_LIBRARIES(decisiontree ${TH_LIBRARIES})
+INSTALL(TARGETS decisiontree DESTINATION ${RSPAMD_LIBDIR})
diff --git a/contrib/lua-torch/decisiontree/CartNode.lua b/contrib/lua-torch/decisiontree/CartNode.lua
new file mode 100644
index 000000000..e6a85006c
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/CartNode.lua
@@ -0,0 +1,42 @@
+local dt = require 'decisiontree._env'
+local CartNode = torch.class("dt.CartNode", dt)
+
+function CartNode:__init(nodeId, leftChild, rightChild, splitFeatureId, splitFeatureValue, score, splitGain)
+ self.nodeId = nodeId or 0
+ self.leftChild = leftChild
+ self.rightChild = rightChild
+ self.splitFeatureId = splitFeatureId or -1
+ self.splitFeatureValue = splitFeatureValue or 0
+ self.score = score or 0
+ self.splitGain = splitGain
+end
+
+function CartNode:__tostring__()
+ return self:recursivetostring()
+end
+
+function CartNode:recursivetostring(indent)
+ indent = indent or ' '
+
+ -- Is this a leaf node?
+ local res = ''
+ if not (self.leftChild or self.rightChild) then
+ res = res .. self.score .. '\n'
+ else
+ -- Print the criteria
+ res = res .. 'input[' .. self.splitFeatureId .. '] <' .. self.splitFeatureValue .. '?\n'
+
+ -- Print the branches
+ if self.leftChild then
+ res = res .. indent .. 'True->' .. self.leftChild:recursivetostring(indent .. ' ')
+ end
+ if self.rightChild then
+ res = res .. indent .. 'False->' .. self.rightChild:recursivetostring(indent .. ' ')
+ end
+ end
+ return res
+end
+
+function CartNode:clone()
+ return CartNode(self.nodeId, self.leftChild, self.rightChild, self.splitFeatureId, self.splitFeatureValue, self.score, self.splitGain)
+end
diff --git a/contrib/lua-torch/decisiontree/CartTrainer.lua b/contrib/lua-torch/decisiontree/CartTrainer.lua
new file mode 100644
index 000000000..63ae6c148
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/CartTrainer.lua
@@ -0,0 +1,180 @@
+local dt = require "decisiontree._env"
+local _ = require "moses"
+
+local CartTrainer = torch.class("dt.CartTrainer", dt)
+
+-- Generic CART trainer
+function CartTrainer:__init(dataset, minLeafSize, maxLeafNodes)
+ assert(torch.isTypeOf(dataset, 'dt.DataSet'))
+ self.dataset = dataset
+ self.minLeafSize = assert(minLeafSize) -- min examples per leaf
+ self.maxLeafNodes = assert(maxLeafNodes) -- max leaf nodes in tree
+
+ -- by default, single thread
+ self.parallelMode = 'singlethread'
+end
+
+function CartTrainer:train(rootTreeState, activeFeatures)
+ assert(torch.isTypeOf(rootTreeState, 'dt.TreeState'))
+ assert(torch.isTensor(activeFeatures))
+ local root = dt.CartNode()
+ root.id = 0
+ root.score = rootTreeState:score(self.dataset)
+
+ local nleaf = 1
+
+ -- TODO : nodeparallel: parallelize here. The queue is a workqueue.
+ local queue = {}
+ table.insert(queue, 1, {cartNode=root, treeState=rootTreeState})
+
+ while #queue > 0 and nleaf < self.maxLeafNodes do
+ local treeGrowerArgs = table.remove(queue, #queue)
+ local currentTreeState = treeGrowerArgs.treeState
+
+ -- Note: if minLeafSize = 1 and maxLeafNode = inf, then each example will be its own leaf...
+ if self:hasEnoughTrainingExamplesToSplit(currentTreeState.exampleIds:size(1)) then
+ nleaf = self:processNode(nleaf, queue, treeGrowerArgs.cartNode, currentTreeState, activeFeatures)
+ end
+ end
+
+ -- CartTree with random branching (when feature is missing)
+ local branchleft = function() return math.random() < 0.5 end
+ return dt.CartTree(root, branchleft), nleaf
+end
+
+function CartTrainer:processNode(nleaf, queue, node, treeState, activeFeatures)
+ local bestSplit
+ if self.parallelMode == 'singlethread' then
+ bestSplit = self:findBestSplitForAllFeatures(treeState, activeFeatures)
+ elseif self.parallelMode == 'featureparallel' then
+ bestSplit = self:findBestSplitForAllFeaturesFP(treeState, activeFeatures)
+ else
+ error("Unrecognized parallel mode: " .. self.parallelMode)
+ end
+
+ if bestSplit then
+ local leftTreeState, rightTreeState = treeState:branch(bestSplit, self.dataset)
+ assert(bestSplit.leftChildSize + bestSplit.rightChildSize == leftTreeState.exampleIds:size(1) + rightTreeState.exampleIds:size(1), "The left and right subtrees don't match the split found!")
+ self:setValuesAndCreateChildrenForNode(node, bestSplit, leftTreeState, rightTreeState, nleaf)
+
+ table.insert(queue, 1, {cartNode=node.leftChild, treeState=leftTreeState})
+ table.insert(queue, 1, {cartNode=node.rightChild, treeState=rightTreeState})
+
+ return nleaf + 1
+ end
+
+ return nleaf
+end
+
+function CartTrainer:findBestSplitForAllFeatures(treeState, activeFeatures)
+ local timer = torch.Timer()
+ local bestSplit = treeState:findBestSplit(self.dataset, activeFeatures, self.minLeafSize, -1, -1)
+
+ if bestSplit then
+ assert(torch.type(bestSplit) == 'table')
+ end
+
+ if dt.PROFILE then
+ print("findBestSplitForAllFeatures time="..timer:time().real)
+ end
+ return bestSplit
+end
+
+-- Updates the parentNode with the bestSplit information by creates left/right child Nodes.
+function CartTrainer:setValuesAndCreateChildrenForNode(parentNode, bestSplit, leftState, rightState, nleaf)
+ assert(torch.isTypeOf(parentNode, 'dt.CartNode'))
+ assert(torch.type(bestSplit) == 'table')
+ assert(torch.isTypeOf(leftState, 'dt.TreeState'))
+ assert(torch.isTypeOf(rightState, 'dt.TreeState'))
+ assert(torch.type(nleaf) == 'number')
+
+ local leftChild = dt.CartNode()
+ leftChild.score = leftState:score(self.dataset)
+ leftChild.nodeId = 2 * nleaf - 1
+
+ local rightChild = dt.CartNode()
+ rightChild.score = rightState:score(self.dataset)
+ rightChild.nodeId = 2 * nleaf
+
+ parentNode.splitFeatureId = bestSplit.splitId
+ parentNode.splitFeatureValue = bestSplit.splitValue
+ parentNode.leftChild = leftChild
+ parentNode.rightChild = rightChild
+ parentNode.splitGain = bestSplit.splitGain
+end
+
+-- We minimally need 2 * N examples in the parent to satisfy >= N examples per child
+function CartTrainer:hasEnoughTrainingExamplesToSplit(count)
+ return count >= 2 * self.minLeafSize
+end
+
+-- call before training to enable feature-parallelization
+function CartTrainer:featureParallel(workPool)
+ assert(self.parallelMode == 'singlethread', self.parallelMode)
+ self.parallelMode = 'featureparallel'
+ self.workPool = torch.type(workPool) == 'number' and dt.WorkPool(workPool) or workPool
+ assert(torch.isTypeOf(self.workPool, 'dt.WorkPool'))
+
+ -- this deletes all SparseTensor hash maps so that they aren't serialized
+ self.dataset:deleteIndex()
+
+ -- require the dt package
+ self.workPool:update('require', {libname='decisiontree',varname='dt'})
+ -- setup worker store (each worker will have its own copy)
+ local store = {
+ dataset=self.dataset,
+ minLeafSize=self.minLeafSize
+ }
+ self.workPool:update('storeKeysValues', store)
+end
+
+-- feature parallel
+function CartTrainer:findBestSplitForAllFeaturesFP(treeState, activeFeatures)
+ local timer = torch.Timer()
+ local bestSplit
+ if treeState.findBestSplitFP then
+ bestSplit = treeState:findBestSplitFP(self.dataset, activeFeatures, self.minLeafSize, self.workPool.nThread)
+ end
+
+ if not bestSplit then
+ for i=1,self.workPool.nThread do
+ -- upvalues
+ local treeState = treeState
+ local shardId = i
+ local nShard = self.workPool.nThread
+ local featureIds = activeFeatures
+ -- closure
+ local task = function(store)
+ assert(store.dataset)
+ assert(store.minLeafSize)
+ if treeState.threadInitialize then
+ treeState:threadInitialize()
+ end
+
+ local bestSplit = treeState:findBestSplit(store.dataset, featureIds, store.minLeafSize, shardId, nShard)
+ return bestSplit
+ end
+
+ self.workPool:writeup('execute', task)
+ end
+
+ for i=1,self.workPool.nThread do
+ local taskname, candidateSplit = self.workPool:read()
+ assert(taskname == 'execute')
+ if candidateSplit then
+ if ((not bestSplit) or candidateSplit.splitGain < bestSplit.splitGain) then
+ bestSplit = candidateSplit
+ end
+ end
+ end
+ end
+
+ if bestSplit then
+ assert(torch.type(bestSplit) == 'table')
+ end
+
+ if dt.PROFILE then
+ print("findBestSplitForAllFeaturesFP time="..timer:time().real)
+ end
+ return bestSplit
+end
diff --git a/contrib/lua-torch/decisiontree/CartTree.lua b/contrib/lua-torch/decisiontree/CartTree.lua
new file mode 100644
index 000000000..c74dfda9e
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/CartTree.lua
@@ -0,0 +1,90 @@
+local _ = require "moses"
+local dt = require 'decisiontree._env'
+
+-- CART (classification-regression decision tree).
+-- The example is always branched to the left when the splitting feature is missing.
+local CartTree = torch.class("dt.CartTree", "dt.DecisionTree", dt)
+
+function CartTree:__init(root, branchleft)
+ assert(torch.isTypeOf(root, 'dt.CartNode'))
+ self.root = root
+ self.branchleft = branchleft or function() return true end
+end
+
+-- TODO optimize this
+function CartTree:score(input, stack, optimized)
+ if optimized == true and stack == nil and torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
+ return input.nn.CartTreeFastScore(input, self.root, input.new())
+ end
+ return self:recursivescore(self.root, input, stack)
+end
+
+-- Continuous: if input[node.splitFeatureId] < node.splitFeatureValue then leftNode else rightNode
+-- Binary: if input[node.splitFeatureId] == 0 then leftNode else rightNode
+-- when stack is provided, it is returned as the third argument containing the stack of nodes from root to leaf
+function CartTree:recursivescore(node, input, stack)
+ assert(torch.isTypeOf(node, 'dt.CartNode'))
+
+ if stack then
+ stack = torch.type(stack) == 'table' and stack or {}
+ table.insert(stack, node)
+ end
+
+ if not (node.leftChild or node.rightChild) then
+ return node.score, node.nodeId, stack
+ elseif not node.leftChild then
+ return self:recursivescore(node.rightChild, input, stack)
+ elseif not node.rightChild then
+ return self:recursivescore(node.leftChild, input, stack)
+ end
+
+ local splitId = node.splitFeatureId
+ local splitVal = node.splitFeatureValue
+
+ if input[splitId] then -- if has key
+ local featureVal = input[splitId]
+ local nextNode = featureVal < splitVal and node.leftChild or node.rightChild
+ return self:recursivescore(nextNode, input, stack)
+ end
+
+ -- if feature is missing, branch left
+ local nextNode = self.branchleft() and node.leftChild or node.rightChild
+ return self:recursivescore(nextNode, input, stack)
+end
+
+function CartTree:__tostring__()
+ return self.root:recursivetostring()
+end
+
+-- expects a stack returned by score
+function CartTree:stackToString(stack, input)
+ assert(torch.type(stack) == 'table')
+ assert(torch.isTypeOf(stack[1], 'dt.CartNode'))
+
+ local res = 'Stack nodes from root to leaf\n'
+ for i,node in ipairs(stack) do
+ if not (node.leftChild or node.rightChild) then
+ res = res .. "score="..node.score .. '\n'
+ else
+ local istr = ''
+ if input then
+ istr = '=' .. (input[node.splitFeatureId] or 'nil')
+ end
+ res = res .. 'input[' .. node.splitFeatureId .. ']' .. istr ..' < ' .. node.splitFeatureValue .. ' ? '
+ res = res .. '(' .. ((node.leftChild and node.rightChild) and 'LR' or node.leftChild and 'L' or node.rightChild and 'R' or 'WAT?') .. ') '
+ if node.leftChild == stack[i+1] then
+ res = res .. 'Left\n'
+ elseif node.rightChild == stack[i+1] then
+ res = res .. 'Right\n'
+ else
+ error"stackToString error"
+ end
+ end
+ end
+ return res .. #stack .. " nodes"
+end
+
+function CartTree:clone()
+ return CartTree(self.root:clone(), self.branchleft)
+end
+
diff --git a/contrib/lua-torch/decisiontree/DFD.lua b/contrib/lua-torch/decisiontree/DFD.lua
new file mode 100644
index 000000000..e4746212a
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/DFD.lua
@@ -0,0 +1,182 @@
+-- nn.DFD: Decision Forest Discretizer
+-- Takes a dense input and outputs a sparse output.
+-- Each node in the forest is its own feature.
+-- When a node is traversed, its commensurate feature takes on a value of 1.
+-- For all non-traversed nodes, the feature is 0.
+local DFD, parent = torch.class("nn.DFD", "nn.Module")
+
+-- TODO: add :type, as the default will convert the long tensors
+function DFD:__init(df, onlyLastNode)
+ parent.__init(self)
+ if torch.type(df) == 'table' then
+ self:reconstructFromInfo(df)
+ else
+ assert(torch.type(df) == 'dt.DecisionForest')
+
+ self.rootIds = torch.LongTensor()
+ -- nodeId of left and right child nodes
+ self.leftChild = torch.LongTensor()
+ self.rightChild = torch.LongTensor()
+ -- index and value of the feature that splits this node
+ self.splitFeatureId = torch.LongTensor()
+ self.splitFeatureValue = torch.Tensor()
+ -- initialize state given df
+ self:convertForest2Tensors(df)
+ self:clearState()
+ end
+ self.onlyLastNode = onlyLastNode
+ self.nTrees = self.rootIds:size(1)
+end
+
+-- converts a DecisionForest to efficient tensor representation
+function DFD:convertForest2Tensors(df)
+ self.rootIds:resize(#df.trees)
+
+ -- nodeId will map to featureId
+ local nodeId = 0
+ -- sets nodeIds of all subnodes
+ -- and measures the maximum depth over all trees
+ local function recursiveTree(node, depth)
+ depth = (depth or 0) + 1
+ local rdepth = depth
+ nodeId = nodeId + 1
+ node._nodeId = nodeId
+
+ if node.leftChild then
+ rdepth = math.max(rdepth, recursiveTree(node.leftChild, depth))
+ end
+ if node.rightChild then
+ rdepth = math.max(rdepth, recursiveTree(node.rightChild, depth))
+ end
+ return rdepth
+ end
+
+ -- sum over trees of max depth
+ self.depth = 0
+ for i,tree in ipairs(df.trees) do
+ assert(torch.isTypeOf(tree.root, 'dt.CartNode'))
+ self.depth = self.depth + recursiveTree(tree.root)
+ end
+ -- remove roots from depth
+ self.depth = self.depth - self.rootIds:size(1)
+
+ -- total number of nodes in all trees
+ self.nNode = nodeId
+
+ -- nodeId of left and right child nodes
+ self.leftChild:resize(self.nNode):fill(-1)
+ self.rightChild:resize(self.nNode):fill(-1)
+ -- index and value of the feature that splits this node
+ self.splitFeatureId:resize(self.nNode):fill(-1)
+ self.splitFeatureValue:resize(self.nNode):fill(-1)
+
+ -- aggregates CartNode attributes to an efficient tensor representation
+ local function recursiveTree2(node)
+ local nodeId = assert(node._nodeId)
+ assert(self.splitFeatureId[nodeId] == -1)
+
+ if node.leftChild then
+ self.leftChild[nodeId] = assert(node.leftChild._nodeId)
+ recursiveTree2(node.leftChild)
+ else
+ self.leftChild[nodeId] = 0
+ end
+
+ if node.rightChild then
+ self.rightChild[nodeId] = assert(node.rightChild._nodeId)
+ recursiveTree2(node.rightChild)
+ else
+ self.rightChild[nodeId] = 0
+ end
+
+ -- each node splits the dataset on a feature id-value pair
+ self.splitFeatureId[nodeId] = assert(node.splitFeatureId)
+ self.splitFeatureValue[nodeId] = assert(node.splitFeatureValue)
+ end
+
+ for i,tree in ipairs(df.trees) do
+ self.rootIds[i] = assert(tree.root._nodeId)
+ recursiveTree2(tree.root)
+ end
+
+ assert(self.leftChild:min() >= 0)
+ assert(self.rightChild:min() >= 0)
+end
+
+-- input is a batchsize x inputsize tensor
+function DFD:updateOutput(input)
+ assert(torch.isTensor(input))
+ assert(input:dim() == 2)
+ input = input:contiguous()
+
+ local batchsize, inputsize = input:size(1), input:size(2)
+ local size = self.onlyLastNode and self.nTree or self.depth
+
+ -- each sample's output keys is resized to maxdepth, which is the maximum size that it can take on
+ self.outputkeys = self.outputkeys or torch.LongTensor()
+ self.outputkeys:resize(batchsize, size)
+ -- values are 1
+ self.outputvalues = self.outputvalues or input.new()
+ self.outputvalues:resize(batchsize, size):fill(1)
+
+ self.output = input.nn.DFD_computeOutput(self.outputkeys, self.outputvalues, self.rootIds, self.leftChild, self.rightChild, self.splitFeatureId, self.splitFeatureValue, input, self.onlyLastNode)
+ return self.output
+end
+
+function DFD:type(type, tensorCache)
+ if type then
+ local info = self:getReconstructionInfo()
+ for k, v in pairs(info) do
+ if torch.type(v) ~= 'torch.LongTensor' then
+ info[k] = nil
+ end
+ end
+ parent.type(self, type, tensorCache)
+ self:reconstructFromInfo(info)
+ return self
+ else
+ return parent.type(self)
+ end
+end
+
+function DFD:updateGradInput()
+ error"Not Implemented"
+end
+
+function DFD:clearState()
+ self.output = {{},{}}
+ self.taskbuffer = {}
+ self.outputkeys = nil
+ self.outputvalues = nil
+ self._range = nil
+ self._indices = nil
+ self._mask = nil
+end
+
+function DFD:reconstructFromInfo(DFDinfo)
+ for k,v in pairs(DFDinfo) do
+ self[k] = v
+ end
+ assert(self.leftChild:nDimension() == 1)
+ assert(self.rightChild:nDimension() == 1)
+ assert(self.leftChild:size(1) == self.nNode)
+ assert(self.rightChild:size(1) == self.nNode)
+ assert(self.leftChild:min() >= 0)
+ assert(self.rightChild:min() >= 0)
+ assert(self.splitFeatureId:nDimension() == 1)
+ assert(self.splitFeatureValue:nDimension() == 1)
+ assert(self.splitFeatureId:size(1) == self.splitFeatureValue:size(1))
+end
+
+function DFD:getReconstructionInfo()
+ local DFDinfo = {
+ nNode = self.nNode,
+ rootIds = self.rootIds,
+ leftChild = self.leftChild,
+ rightChild = self.rightChild,
+ splitFeatureId = self.splitFeatureId,
+ splitFeatureValue = self.splitFeatureValue,
+ depth = self.depth
+ }
+ return DFDinfo
+end
diff --git a/contrib/lua-torch/decisiontree/DataSet.lua b/contrib/lua-torch/decisiontree/DataSet.lua
new file mode 100644
index 000000000..15058a7c6
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/DataSet.lua
@@ -0,0 +1,142 @@
+local dt = require "decisiontree._env"
+
+local DataSet = torch.class("dt.DataSet", dt)
+
+function DataSet:__init(input, target, nThreads)
+ if torch.type(input) == 'table' then
+ assert(torch.isTypeOf(input[1], 'torch.SparseTensor'))
+ else
+ assert(torch.isTensor(input))
+ end
+ self.input = input
+ assert(torch.isTensor(target))
+ self.target = target
+ self.nThreads = nThreads or 1
+
+ self.sortedFeatureValues, self.featureIds = self:sortFeatureValues(input)
+end
+
+-- group examples by featureId. For each featureId, sort examples by featureValue (ascending order)
+-- returns a table mapping featureIds to sorted lists of exampleIds
+-- e.g. {featureId={example1,example2,example3}}
+function DataSet:sortFeatureValues(inputs)
+ local isSparse = torch.typename(inputs[1]):match('torch.*SparseTensor')
+ assert(isSparse or torch.isTensor(inputs))
+
+ local featureIds = torch.LongTensor()
+ local dataset = {} -- TODO use tds.Hash (will require SparseTensor to be userdata)
+ if isSparse then
+ local proto = inputs[1].values
+ -- get list of featureIds
+ local featureMap = {}
+ for i,input in ipairs(inputs) do
+ input.keys:apply(function(key)
+ featureMap[key] = (featureMap[key] or 0) + 1
+ end)
+ end
+ local _ = require "moses"
+ featureIds = featureIds.new(_.keys(featureMap))
+ local featureCounts = torch.LongTensor(featureIds:size(1))
+ for i=1,featureIds:size(1) do
+ featureCounts[i] = featureMap[featureIds[i]]
+ end
+
+ for i=1,featureIds:size(1) do
+ local featureId = featureIds[i]
+ local featureCount = featureCounts[i]
+ dataset[featureId] = {
+ values=proto.new(featureCount),
+ examples=torch.LongTensor(featureCount),
+ i=0
+ }
+ end
+
+ for exampleId,input in ipairs(inputs) do
+ local sparseIdx = 0
+ input.keys:apply(function(key)
+ sparseIdx = sparseIdx + 1
+ local f = dataset[key]
+ f.i = f.i + 1
+ f.values[f.i] = input.values[sparseIdx]
+ f.examples[f.i] = exampleId
+ end)
+ end
+
+ local sortVal, sortIdx = proto.new(), torch.LongTensor()
+ for featureId,f in pairs(dataset) do
+ assert(f.values:size(1) == f.i)
+ sortVal:sort(sortIdx, f.values, 1, false)
+
+ local sortedExampleIds = torch.LongTensor(f.i)
+ sortedExampleIds:index(f.examples, 1, sortIdx)
+
+ dataset[featureId] = sortedExampleIds
+ end
+ else
+ assert(torch.isTensor(inputs))
+ featureIds:range(1,inputs:size(2))
+
+ for i=1,inputs:size(2) do
+ local featureId = i
+ local values = inputs:select(2, i)
+ local _, sortedFeatureExampleIds = values:sort(1, false)
+ dataset[featureId] = sortedFeatureExampleIds
+ end
+ end
+
+ return dataset, featureIds
+end
+
+function DataSet:getSortedFeature(featureId)
+ assert(self.sortedFeatureValues)
+ return self.sortedFeatureValues[featureId]
+end
+
+function DataSet:size()
+ return self.target:size(1)
+end
+
+function DataSet:getExampleIds()
+ if not self.exampleIds then
+ self.exampleIds = torch.LongTensor():range(1,self:size())
+ end
+ return self.exampleIds
+end
+
+function DataSet:countPositive(exampleIds)
+ assert(torch.type(exampleIds) == 'torch.LongTensor')
+ local dt = require 'decisiontree'
+ local buffer = dt.getBufferTable('DataSet')
+ buffer.tensor = buffer.tensor or self.target.new()
+ buffer.tensor:index(self.target, 1, exampleIds)
+ local nPositive = 0
+ buffer.tensor:apply(function(x)
+ if x > 0 then nPositive = nPositive + 1 end
+ end)
+ return nPositive
+end
+
+function DataSet:initScore()
+ self.score = self.score or torch.Tensor()
+ self.score:resize(self:size()):fill(0)
+end
+
+function DataSet:buildIndex()
+ if torch.type(self.input) == 'table' then
+ for exampleId,input in ipairs(self.input) do
+ if torch.isTypeOf(input, 'torch.SparseTensor') then
+ input:buildIndex()
+ end
+ end
+ end
+end
+
+function DataSet:deleteIndex()
+ if torch.type(self.input) == 'table' then
+ for exampleId,input in ipairs(self.input) do
+ if torch.isTypeOf(input, 'torch.SparseTensor') then
+ input:deleteIndex()
+ end
+ end
+ end
+end
diff --git a/contrib/lua-torch/decisiontree/DecisionForest.lua b/contrib/lua-torch/decisiontree/DecisionForest.lua
new file mode 100644
index 000000000..cac748e7e
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/DecisionForest.lua
@@ -0,0 +1,82 @@
+local dt = require "decisiontree._env"
+
+-- Decision forest that ensembles a bag of decision trees.
+local DecisionForest = torch.class("dt.DecisionForest", "dt.DecisionTree", dt)
+
+function DecisionForest:__init(trees, weight, bias)
+ assert(torch.type(trees) == 'table')
+ self.trees = trees
+ if #trees == 0 then
+ self.weight = weight or torch.Tensor()
+ assert(torch.isTensor(self.weight))
+ assert(self.weight:nElement() == 0)
+ else
+ assert(torch.isTypeOf(trees[1], 'dt.DecisionTree'))
+ self.weight = weight or torch.Tensor(#trees):fill(1)
+ assert(torch.isTensor(self.weight))
+ assert(self.weight:dim() == 1)
+ assert(self.weight:min() >= 0, "Expecting positive weights")
+ assert(#trees == self.weight:size(1))
+ end
+
+ self.bias = bias or 0
+ assert(torch.type(self.bias) == 'number')
+end
+
+function DecisionForest:score(input, incrementalId)
+ assert(torch.isTensor(input))
+
+ local buffer = {}
+ if incrementalId then
+ self.buffers = self.buffers or {}
+ self.buffers[incrementalId] = self.buffers[incrementalId] or {}
+ buffer = self.buffers[incrementalId]
+ end
+ buffer.initialCounter = buffer.initialCounter or 0
+
+ -- TODO: score in parallel
+ local output
+ if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
+ buffer.output = buffer.output or input.new()
+ output = buffer.output
+ assert(output:nElement() == 0 or output:size(1) == input:size(1))
+ if output:nElement() == 0 then
+ output:resize(input:size(1)):fill(self.bias)
+ end
+ for i,tree in ipairs(self.trees) do
+ if i > buffer.initialCounter then
+ local score = tree:score(input, nil, true)
+ output:add(self.weight[i], score)
+ end
+ end
+ else
+ output = buffer.output or self.bias
+ for i,tree in ipairs(self.trees) do
+ if i > buffer.initialCounter then
+ output = output + tree:score(input) * self.weight[i]
+ end
+ end
+ buffer.output = output
+ end
+
+ buffer.initialCounter = #self.trees
+
+ return output
+end
+
+function DecisionForest:add(tree, weight)
+ assert(torch.type(weight) == 'number')
+ assert(weight > 0)
+ table.insert(self.trees, tree)
+ self.weight:resize(#self.trees)
+ self.weight[#self.trees] = weight
+ return self
+end
+
+function DecisionForest:clone()
+ local trees = {}
+ for i, tree in ipairs(self.trees) do
+ trees[i] = tree:clone()
+ end
+ return DecisionForest(trees, self.weight:clone(), self.bias)
+end
diff --git a/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua b/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua
new file mode 100644
index 000000000..fc903678b
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua
@@ -0,0 +1,22 @@
+local dt = require "decisiontree._env"
+
+-- Interface for all decisionForestTrainers
+local DFT = torch.class("dt.DecisionForestTrainer", dt)
+
+-- Train a DecisionForest with examples, a table of valid featureIds and a dataset (i.e. sortedExamplesByFeatureId)
+function DFT:train(examples, validFeatureIds, dataset)
+ assert(torch.type(examples) == "table")
+ assert(torch.isTypeOf(examples[1], "dt.LabeledExample"))
+
+ assert(torch.type(validFeatureIds) == 'table')
+
+ assert(torch.type(dataset) == 'table')
+ for k,v in pairs(dataset) do
+ assert(torch.type(v) == 'table')
+ assert(torch.isTypeOf(v[1], 'dt.LabeledExample'))
+ break
+ end
+ -- dataset is a table mapping featureIds to sorted lists of LabeledExamples
+ -- e.g. {featureId={example1,example2,example3}}
+ error"Not Implemented"
+end
diff --git a/contrib/lua-torch/decisiontree/DecisionTree.lua b/contrib/lua-torch/decisiontree/DecisionTree.lua
new file mode 100644
index 000000000..c61bc3757
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/DecisionTree.lua
@@ -0,0 +1,12 @@
+local dt = require "decisiontree._env"
+
+-- An interface for decision trees.
+local DecisionTree = torch.class("dt.DecisionTree", dt)
+
+-- Score an input example and return the prediction score.
+-- input is a Tensor or SparseTensor
+-- return prediction score and nodeId
+function DecisionTree:score(input)
+ error"Not Implemented"
+ return score, nodeId
+end
diff --git a/contrib/lua-torch/decisiontree/GBDT_common.h b/contrib/lua-torch/decisiontree/GBDT_common.h
new file mode 100644
index 000000000..eb993702d
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/GBDT_common.h
@@ -0,0 +1,106 @@
+#include "khash.h"
+#include <pthread.h>
+
+#define computeGradientBoostLoss(g, h) (-(g)*(g)/(h))
+
+// we use khash to make iteration faster than lua tables
+KHASH_SET_INIT_INT64(long)
+
+// defines the data we need for running an instance of thet and its constructor/destructor
+typedef struct {
+ khash_t(long)* exampleMap;
+ THLongTensor *exampleIdsWithFeature_cache;
+ long minLeafSize;
+} GBRunData;
+
+
+// allocates data that cannot be shared between threads
+static void gb_local_create_run_data(GBRunData *run_data) {
+ run_data->exampleMap = kh_init(long);
+ run_data->exampleIdsWithFeature_cache = THLongTensor_new();
+}
+
+static void gb_create_run_data(GBRunData *run_data, int minLeafSize) {
+ gb_local_create_run_data(run_data);
+ run_data->minLeafSize = minLeafSize;
+}
+
+static void gb_destroy_run_data(GBRunData *run_data) {
+ THLongTensor_free(run_data->exampleIdsWithFeature_cache);
+ kh_destroy(long, run_data->exampleMap);
+}
+
+// initializes the data required by the optimizer for the given feature.
+static THLongTensor *gb_internal_prepare(lua_State *L, THLongTensor *exampleIds,
+ THLongTensor *exampleIdsWithFeature_cache, int input_index, long feature_id,
+ khash_t(long)* exampleMap) {
+ long *exampleIds_data = THLongTensor_data(exampleIds);
+ long exampleIds_size = THLongTensor_size(exampleIds, 0);
+
+ int ret = 0;
+
+ // if the the input is a table, then we have a sparse dataset
+ if (lua_istable(L, input_index)) {
+ if (exampleIds_size == 0) {
+ return NULL;
+ }
+ else {
+ // loops over the examples' ids that this node has to evaluate and, if they have the feature
+ // we're looking for, marks them as present and stores them in the order provided by the
+ // dataset
+ THLongTensor_resize1d(exampleIdsWithFeature_cache, exampleIds_size);
+ kh_clear(long, exampleMap);
+ kh_resize(long, exampleMap, exampleIds_size*8);
+ long *exampleIdsWithFeature_data = THLongTensor_data(exampleIdsWithFeature_cache);
+ long j = 0;
+ // for each sample to be evaluated
+ for (long i = 0; i < exampleIds_size; i++) {
+ // gets the representation for the example
+ lua_pushinteger(L, exampleIds_data[i]);
+ lua_gettable(L, input_index);
+
+ // builds the index, which happens only once per thread for efficiency
+ lua_pushstring(L, "buildIndex");
+ lua_gettable(L, -2);
+ lua_pushvalue(L, -2);
+ lua_call(L, 1, 0);
+
+ // tries to get the feature for this sample
+ lua_pushinteger(L, feature_id);
+ lua_gettable(L, -2);
+ // if present, then...
+ if (!lua_isnil(L, -1)) {
+ // saves the example
+ exampleIdsWithFeature_data[j] = exampleIds_data[i];
+ j++;
+
+ // marks it as present in the hash table
+ kh_put(long, exampleMap, exampleIds_data[i], &ret);
+ }
+
+ lua_pop(L, 2);
+ }
+
+ // resizes to fit only the samples that have the feature
+ THLongTensor_resize1d(exampleIdsWithFeature_cache, j);
+ kh_resize(long, exampleMap, j*8);
+ return exampleIdsWithFeature_cache;
+ }
+ }
+ else {
+ // if the input isn't a table, then it's dense and we cannot have exampleIds missing, so it
+ // depends on feature_id
+ // since exampleIds is fixed between calls and this is going to store the same values to the
+ // same position, we can cache it between calls
+ if (kh_size(exampleMap) == 0) {
+ kh_resize(long, exampleMap, exampleIds_size*8);
+ for (long i = 0; i < exampleIds_size; i++) {
+ kh_put(long, exampleMap, exampleIds_data[i], &ret);
+ }
+ }
+ // notice that we just return the given tensor of ids instead of copying it. the rest of the
+ // code handles this transparently
+ return exampleIds;
+ }
+}
+
diff --git a/contrib/lua-torch/decisiontree/GiniState.lua b/contrib/lua-torch/decisiontree/GiniState.lua
new file mode 100644
index 000000000..6dfed2845
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/GiniState.lua
@@ -0,0 +1,54 @@
+local dt = require 'decisiontree._env'
+
+-- used by RandomForestTrainer
+local GiniState, parent = torch.class("dt.GiniState", "dt.TreeState", dt)
+
+function GiniState:__init(exampleIds)
+ parent.__init(self, exampleIds)
+ self.nPositiveInLeftBranch = 0
+ self.nPositiveInRightBranch = 0
+end
+
+function GiniState:score(dataset)
+ local dt = require 'decisiontree'
+ local nPositive = dataset:countPositive(self.exampleIds)
+ return dt.calculateLogitScore(nPositive, self.exampleIds:size(1))
+end
+
+function GiniState:initialize(exampleIdsWithFeature, dataset)
+ assert(torch.type(exampleIdsWithFeature) == 'torch.LongTensor')
+ assert(torch.isTypeOf(dataset, 'dt.DataSet'))
+ self.nPositiveInLeftBranch = dataset:countPositive(exampleIdsWithFeature)
+ self.nPositiveInRightBranch = 0
+
+ self.nExampleInLeftBranch = exampleIdsWithFeature:size(1)
+ self.nExampleInRightBranch = 0
+end
+
+function GiniState:update(exampleId, dataset)
+ assert(torch.type(exampleId) == 'number')
+ assert(torch.isTypeOf(dataset, 'dt.DataSet'))
+ if dataset.target[exampleId] > 0 then
+ self.nPositiveInLeftBranch = self.nPositiveInLeftBranch - 1
+ self.nPositiveInRightBranch = self.nPositiveInRightBranch + 1
+ end
+
+ self.nExampleInLeftBranch = self.nExampleInLeftBranch - 1
+ self.nExampleInRightBranch = self.nExampleInRightBranch + 1
+end
+
+function GiniState:computeSplitInfo(splitFeatureId, splitFeatureValue)
+ local dt = require 'decisiontree'
+ local gini = dt.computeGini(self.nExampleInLeftBranch, self.nPositiveInLeftBranch, self.nExampleInRightBranch, self.nPositiveInRightBranch)
+ local splitInfo = {
+ splitId = assert(splitFeatureId),
+ splitValue = assert(splitFeatureValue),
+ leftChildSize = assert(self.nExampleInLeftBranch),
+ leftPositiveCount = assert(self.nPositiveInLeftBranch),
+ rightChildSize = assert(self.nExampleInRightBranch),
+ rightPositiveCount = assert(self.nPositiveInRightBranch),
+ gini = assert(gini),
+ splitGain = gini
+ }
+ return splitInfo
+end \ No newline at end of file
diff --git a/contrib/lua-torch/decisiontree/GradientBoostState.lua b/contrib/lua-torch/decisiontree/GradientBoostState.lua
new file mode 100644
index 000000000..f268f3da8
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/GradientBoostState.lua
@@ -0,0 +1,57 @@
+local dt = require 'decisiontree._env'
+
+local GradientBoostState, parent = torch.class("dt.GradientBoostState", "dt.TreeState", dt)
+
+function GradientBoostState:__init(exampleIds, gradInput, hessInput)
+ parent.__init(self, exampleIds)
+ self.gradInput = gradInput
+ self.hessInput = hessInput
+end
+
+function GradientBoostState:score(dataset)
+ local dt = require 'decisiontree'
+ local gradInput = self.gradInput:index(1, self.exampleIds)
+ local hessInput = self.hessInput:index(1, self.exampleIds)
+ return dt.computeNewtonScore(gradInput:sum(), hessInput:sum())
+end
+
+-- calls _branch and encapsulates the left and right exampleIds into a TreeStates
+function GradientBoostState:branch(splitInfo, dataset)
+ local leftExampleIds, rightExampleIds = self:_branch(splitInfo, dataset)
+ return self.new(leftExampleIds, self.gradInput, self.hessInput), self.new(rightExampleIds, self.gradInput, self.hessInput)
+end
+
+-- Partitions self given a splitInfo table, producing a pair of exampleIds corresponding to the left and right subtrees.
+function GradientBoostState:_branch(splitInfo, dataset)
+ local input = dataset.input
+ -- if the input is dense, we can use the optimized version
+ if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
+ return input.nn.GBDT_branch(splitInfo, input, self.exampleIds)
+ end
+ return parent._branch(self, splitInfo, dataset)
+end
+
+-- The following methods are supersets of each other. You can comment out them to re-use the lua
+-- version with just the provided core optimized
+
+-- THIS ONE CANNOT BE COMMENTED OUT
+function GradientBoostState:findBestFeatureSplit(dataset, featureId, minLeafSize)
+ local ret = self.hessInput.nn.GBDT_findBestFeatureSplit(self.exampleIds, dataset, featureId, minLeafSize, self.gradInput, self.hessInput)
+ return ret
+end
+
+-- finds the best split of examples in treeState among featureIds
+function GradientBoostState:findBestSplit(dataset, featureIds, minLeafSize, shardId, nShard)
+ local ret = self.hessInput.nn.GBDT_findBestSplit(self.exampleIds, dataset, featureIds, minLeafSize, shardId, nShard, self.gradInput, self.hessInput)
+ return ret
+end
+
+-- finds the best split like the previous one, but performs feature parallelism. Note that the
+-- optimization is only applied if the input is dense
+function GradientBoostState:findBestSplitFP(dataset, featureIds, minLeafSize, nThread)
+ local input = dataset.input
+ if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
+ local ret = self.hessInput.nn.GBDT_findBestSplitFP(self.exampleIds, dataset, featureIds, minLeafSize, self.gradInput, self.hessInput, nThread)
+ return ret
+ end
+end
diff --git a/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua b/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua
new file mode 100644
index 000000000..51299b109
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua
@@ -0,0 +1,244 @@
+local dt = require "decisiontree._env"
+
+-- Gradient boosted decision tree trainer
+local GradientBoostTrainer = torch.class("dt.GradientBoostTrainer", "dt.DecisionForestTrainer", dt)
+
+function GradientBoostTrainer:__init(opt)
+ assert(torch.type(opt) == 'table')
+
+ assert(torch.isTypeOf(opt.treeTrainer, 'dt.CartTrainer'))
+ self.treeTrainer = opt.treeTrainer
+
+ assert(torch.isTypeOf(opt.lossFunction, 'nn.Criterion'))
+ self.lossFunction = opt.lossFunction
+
+ assert(torch.type(opt.shrinkage) == 'number')
+ assert(opt.shrinkage > 0)
+ self.shrinkage = opt.shrinkage
+
+ assert(torch.type(opt.downsampleRatio) == 'number')
+ assert(opt.downsampleRatio > 0)
+ self.downsampleRatio = opt.downsampleRatio
+
+ assert(torch.type(opt.nTree) == 'number')
+ assert(opt.nTree > 0)
+ self.nTree = opt.nTree
+
+ evalFreq = evalFreq or -1
+ assert(torch.type(opt.evalFreq) == 'number')
+ assert(torch.round(opt.evalFreq) == opt.evalFreq)
+ self.evalFreq = opt.evalFreq
+
+ -- when non-positive, no early-stopping
+ earlyStop = earlyStop or (evalFreq-1)
+ assert(torch.type(opt.earlyStop) == 'number')
+ self.earlyStop = opt.earlyStop
+
+ -- when non-positive, defaults to sqrt(#feature)
+ assert(torch.type(opt.featureBaggingSize) == 'number')
+ self.featureBaggingSize = opt.featureBaggingSize
+
+ if opt.decisionForest then
+ assert(torch.isTypeOf(opt.decisionForest, 'dt.DecisionForest'))
+ end
+ self.decisionForest = opt.decisionForest
+
+ self.useInitBias = opt.useInitBias
+end
+
+function GradientBoostTrainer:computeBias(trainSet, verbose)
+ assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
+
+ if verbose then print("Use new bias generated from the training examples.") end
+
+ return -0.5 * self.gradInput:sum() / self.hessInput:sum()
+end
+
+
+function GradientBoostTrainer:initialize(trainSet, verbose)
+ assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
+
+ trainSet:initScore()
+ self.gradInput, self.hessInput = self.lossFunction:backward2(trainSet.score, trainSet.target)
+
+ -- used for early-stopping (see validate())
+ self.stopCount = 0
+ self.prevTrainLoss = math.huge
+ self.prevTestLoss = math.huge
+
+ if verbose then print("Processing initial decision forest") end
+
+ local decisionForest, bias
+
+ if self.decisionForest then
+ local bias = self.useInitBias and self.decisionForest.bias or self:computeBias(trainSet, verbose)
+
+ decisionForest = dt.DecisionForest(self.decisionForest.trees, self.decisionForest.weight, bias)
+
+ local input = trainSet.input
+ if torch.isTensor(input) and input.isContiguous and input:isContiguous() then
+ score = decisionForest:score(input)
+ else
+ score:resize(trainSet:size())
+ for exampleId=1,trainSet:size() do
+ score[exampleId] = decisionForest:score(input[exampleId])
+ end
+ end
+ else
+ local bias = self:computeBias(trainSet, verbose)
+ decisionForest = dt.DecisionForest({}, torch.Tensor(), bias)
+
+ trainSet.score:fill(bias)
+ end
+
+ if verbose then print("Finish loading initial decision forest") end
+
+ return decisionForest
+end
+
+-- Trains a decision forest of boosted decision trees.
+-- examples are the training examples. validExamples are used for cross-validation.
+function GradientBoostTrainer:train(trainSet, featureIds, validSet, verbose)
+ assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
+ assert(torch.type(featureIds) == 'torch.LongTensor')
+ assert(torch.isTypeOf(validSet, 'dt.DataSet'))
+
+ local decisionForest = self:initialize(trainSet, verbose)
+ local bestDecisionForest
+
+ if verbose then print(string.format("Get %d featureIds.", featureIds:size(1))) end
+
+ local baggingSize = self.featureBaggingSize > 0 and self.featureBaggingSize or torch.round(math.sqrt(featureIds:size(1)))
+ local trainExampleIds = trainSet:getExampleIds()
+ local baggingIndices, activeFeatures
+ local treeExampleIds
+
+ local timer = torch.Timer()
+
+ for treeId = 1,self.nTree do
+ timer:reset()
+ if verbose then print(string.format("Begin processing tree number %d of %d", treeId, self.nTree)) end
+
+ -- Get active features
+ activeFeatures = activeFeatures or torch.LongTensor()
+ if baggingSize < featureIds:size(1) then
+ if verbose then print(string.format("Tree %d: Bagging %d from %d features", treeId, baggingSize, featureIds:size(1))) end
+
+ baggingIndices = baggingIndices or torch.LongTensor()
+ baggingIndices:randperm(featureIds:size(1))
+ activeFeatures:index(featureIds, 1, baggingIndices:narrow(1,1,baggingSize))
+ else
+ activeFeatures = featureIds
+ end
+
+ -- Get data samples
+ if self.downsampleRatio < 0.99 then
+ local sampleSize = torch.round(trainSet:size() * self.downsampleRatio)
+
+ if verbose then print(string.format("Tree %d: Downsampling %d of %d samples", treeId, sampleSize, trainSet:size())) end
+
+ baggingIndices = baggingIndices or torch.LongTensor()
+ baggingIndices:randperm(trainSet:size())
+
+ treeExampleIds = treeExampleIds or torch.LongTensor()
+ treeExampleIds:index(trainExampleIds, 1, baggingIndices:narrow(1,1,sampleSize))
+ else
+ treeExampleIds = trainExampleIds
+ end
+
+ if verbose then print(string.format("Tree %d: training CART tree", treeId)) end
+
+ local rootTreeState = dt.GradientBoostState(treeExampleIds, self.gradInput, self.hessInput)
+ local cartTree = self.treeTrainer:train(rootTreeState, activeFeatures)
+
+ if verbose then print(string.format("Tree %d: finished training CART tree in %f seconds", treeId, timer:time().real)) end
+
+ decisionForest:add(cartTree, self.shrinkage)
+
+ -- update score
+ local predictionScore
+ local input = trainSet.input
+ if torch.isTensor(input) and input:isContiguous() then
+ predictionScore = cartTree:score(trainSet.input, nil, true)
+ else
+ local size = trainSet:size()
+ predictionScore = torch.Tensor(size)
+ for exampleId=1,size do
+ predictionScore[exampleId] = cartTree:score(trainSet.input[exampleId])
+ end
+ end
+ trainSet.score:add(self.shrinkage, predictionScore)
+ self.gradInput, self.hessInput = self.lossFunction:backward2(trainSet.score, trainSet.target)
+
+ if verbose then print(string.format("Tree %d: training complete in %f seconds", treeId, timer:time().real)) end
+
+ -- cross-validation/early-stopping
+ if self.evalFreq > 0 and treeId % self.evalFreq == 0 then
+ timer:reset()
+ local stop, validLoss, bestDecisionForest = self:validate(trainSet, validSet, decisionForest, bestDecisionForest)
+ if dt.PROFILE then print("validate tree time: "..timer:time().real) end
+ if verbose then print(string.format("Loss: train=%7.4f, valid=%7.4f", 0, validLoss)) end
+ if stop then
+ if verbose then print(string.format("GBDT early stopped on tree %d", treeId)) end
+ break
+ end
+
+ end
+ end
+
+ return bestDecisionForest or decisionForest
+end
+
+function dt.GradientBoostTrainer:validate(trainSet, validSet, decisionForest, bestDecisionForest)
+ assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
+ assert(torch.isTypeOf(validSet, 'dt.DataSet'))
+ assert(torch.isTypeOf(decisionForest, 'dt.DecisionForest'))
+ assert(not bestDecisionForest or torch.isTypeOf(decisionForest, 'dt.DecisionForest'))
+
+ -- buffer
+ local buffer = dt.getBufferTable('GradientBoost')
+ buffer.tensor = buffer.tensor or trainSet.score.new()
+ local score = buffer.tensor
+
+ -- per thread loss function (tensors are shared)
+ local lossname = torch.typename(self.lossFunction)
+ buffer[lossname] = buffer[lossname] or self.lossFunction:clone()
+ local lossFunction = buffer[lossname]
+
+ -- TODO batch this for large datasets
+ local input = validSet.input
+ if torch.isTensor(input) and input.isContiguous and input:isContiguous() then
+ score = decisionForest:score(input, 'val')
+ else
+ score:resize(validSet:size())
+ for exampleId=1,validSet:size() do
+ score[exampleId] = decisionForest:score(input[exampleId], 'val')
+ end
+ end
+ local validLoss = lossFunction:forward(score, validSet.target)
+
+ -- early stop is not enabled when earlyStop=0
+ local stop = false
+ if self.earlyStop > 0 then
+ -- Track test loss and detect early stop
+ if self.prevTestLoss - validLoss < 0 then
+ self.stopCount = self.stopCount + 1
+ else
+ bestDecisionForest = decisionForest:clone()
+ self.stopCount = 0
+ end
+
+ stop = self.stopCount >= self.earlyStop
+ end
+
+ self.prevTestLoss = validLoss
+
+ return stop, validLoss, bestDecisionForest
+end
+
+function GradientBoostTrainer:getName()
+ return string.format(
+ "gbdt-dRatio-%s-maxLeaf-%s-minExample-%s-nTree-%s-shrinkage-%s",
+ self.downsampleRatio, self.maxLeafNodes, self.minLeafSize, self.nTree, self.shrinkage
+ )
+end
diff --git a/contrib/lua-torch/decisiontree/LICENSE b/contrib/lua-torch/decisiontree/LICENSE
new file mode 100644
index 000000000..8dada3eda
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua b/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua
new file mode 100644
index 000000000..5b9eb6028
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua
@@ -0,0 +1,45 @@
+local dt = require "decisiontree._env"
+
+-- Ref: slide 17 in https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf
+
+-- equivalent to nn.Sigmoid() + nn.BCECriterion()
+local LogitBoostCriterion, parent = torch.class("nn.LogitBoostCriterion", "nn.Criterion")
+
+function LogitBoostCriterion:__init(sizeAverage)
+ parent.__init(self)
+ self.sizeAverage = sizeAverage
+ self.hessInput = self.gradInput.new()
+ self._output = torch.Tensor()
+end
+
+function LogitBoostCriterion:updateOutput(input, target)
+ input.nn.LogitBoostCriterion_updateOutput(input, target, self._output, self.sizeAverage)
+ self.output = self._output[1]
+ return self.output
+end
+
+function LogitBoostCriterion:updateGradInput(input, target)
+ input.nn.LogitBoostCriterion_updateGradInput(input, target, self.gradInput)
+ return self.gradInput
+end
+
+function LogitBoostCriterion:updateHessInput(input, target)
+ input.nn.LogitBoostCriterion_updateHessInput(input, target, self.hessInput)
+ return self.hessInput
+end
+
+-- returns gradInput and hessInput
+function LogitBoostCriterion:backward2(input, target)
+ return self:updateGradInput(input, target), self:updateHessInput(input, target)
+end
+
+local gradWrapper = function(input, target, grad)
+ input.nn.LogitBoostCriterion_updateGradInput(input, target, grad)
+end
+local hessianWrapper = function(input, target, hessian)
+ input.nn.LogitBoostCriterion_updateHessInput(input, target, hessian)
+end
+
+function LogitBoostCriterion:getWrappers()
+ return gradWrapper, hessianWrapper
+end
diff --git a/contrib/lua-torch/decisiontree/MSECriterion.lua b/contrib/lua-torch/decisiontree/MSECriterion.lua
new file mode 100644
index 000000000..948c1a17e
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/MSECriterion.lua
@@ -0,0 +1,13 @@
+local dt = require "decisiontree._env"
+
+function nn.MSECriterion.updateHessianInput(self, input, target)
+ self.hessInput = self.hessInput or input.new()
+ self.hessInput:resize(input:size()):fill(2)
+ return self.hessInput
+end
+
+-- returns gradInput and hessInput
+function nn.MSECriterion.backward2(self, input, target)
+ return self:updateGradInput(input, target), self:updateHessInput(input, target)
+end
+
diff --git a/contrib/lua-torch/decisiontree/README.md b/contrib/lua-torch/decisiontree/README.md
new file mode 100644
index 000000000..db4622add
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/README.md
@@ -0,0 +1,386 @@
+# Torch decision tree library
+
+```lua
+local dt = require 'decisiontree'
+```
+
+This project implements random forests and gradient boosted decision trees (GBDT).
+The latter uses gradient tree boosting.
+Both use ensemble learning to produce ensembles of decision trees (that is, forests).
+
+## `nn.DFD`
+
+One practical application for decision forests is to *discretize* an input feature space into a richer output feature space.
+The `nn.DFD` Module can be used as a decision forest discretizer (DFD):
+
+```lua
+local dfd = nn.DFD(df, onlyLastNode)
+```
+
+where `df` is a `dt.DecisionForest` instance or the table returned by the method `getReconstructionInfo()` on another `nn.DFD` module, and `onlyLastNode` is a boolean that indicates that module should return only the id of the last node visited on each tree (by default it outputs all traversed nodes except for the roots).
+The `nn.DFD` module requires dense `input` tensors.
+Sparse `input` tensors (tables of tensors) are not supported.
+The `output` returned by a call to `updateOutput` is a batch of sparse tensors.
+This `output` where `output[1]` and `output[2]` are a respectively a list of key and value tensors:
+
+```lua
+{
+ { [torch.LongTensor], ... , [torch.LongTensor] },
+ { [torch.Tensor], ... , [torch.Tensor] }
+}
+```
+
+This module doesn't support CUDA.
+
+### Example
+As a concrete example, let us first train a Random Forest on a dummy dense dataset:
+
+```lua
+local nExample = 100
+local batchsize = 2
+local inputsize = 10
+
+-- train Random Forest
+local trainSet = dt.getDenseDummyData(nExample, nil, inputsize)
+local opt = {
+ activeRatio=0.5,
+ featureBaggingSize=5,
+ nTree=4,
+ maxLeafNodes=nExample/2,
+ minLeafSize=nExample/10,
+}
+local trainer = dt.RandomForestTrainer(opt)
+local df = trainer:train(trainSet, trainSet.featureIds)
+mytester:assert(#df.trees == opt.nTree)
+```
+
+Now that we have `df`, a `dt.DecisionForest` instance, we can use it to initialize `nn.DFD`:
+
+```lua
+local dfd = nn.DFD(df)
+```
+
+The `dfd` instance holds no reference to `df`, instead it extracts the relevant attributes from `df`.
+These attributes are stored in tensors for batching and efficiency.
+
+We can discretize a hypothetical `input` by calling `forward`:
+```lua
+local input = trainSet.input:sub(1,batchsize)
+local output = dfd:forward(input)
+```
+
+The resulting output is a table consisting of two tables: keys and values.
+The keys and values tables each contains `batchsize` tensors:
+
+```lua
+print(output)
+{
+ 1 :
+ {
+ 1 : LongTensor - size: 14
+ 2 : LongTensor - size: 16
+ 3 : LongTensor - size: 15
+ 4 : LongTensor - size: 13
+ }
+ 2 :
+ {
+ 1 : DoubleTensor - size: 14
+ 2 : DoubleTensor - size: 16
+ 3 : DoubleTensor - size: 15
+ 4 : DoubleTensor - size: 13
+ }
+}
+```
+
+An example's feature keys (`LongTensor`) and commensurate values (`DoubleTensor`) have the same number of elements.
+The examples have variable number of key-value pairs representing the nodes traversed in the tree.
+The output feature space has as many dimensions (that is, possible feature keys) for each node in the forest.
+
+## `torch.SparseTensor`
+
+Suppose you have a set of `keys` mapped to `values`:
+```lua
+local keys = torch.LongTensor{1,3,4,7,2}
+local values = torch.Tensor{0.1,0.3,0.4,0.7,0.2}
+```
+
+You can use a `SparseTensor` to encapsulate these into a read-only tensor:
+
+```lua
+local st = torch.SparseTensor(input, target)
+```
+
+The _decisiontree_ library uses `SparseTensors` to simulate the `__index` method of the `torch.Tensor`.
+For example, one can obtain the value associated to key 3 of the above `st` instance:
+
+```lua
+local value = st[3]
+assert(value == 0.3)
+```
+
+When the key,value pair are missing, `nil` is returned instead:
+
+```lua
+local value = st[2]
+assert(value == nil)
+```
+
+The best implementation for this kind of indexing is slow (it uses a sequential scan of the `keys).
+To speedup indexing, one can call the `buildIndex()` method before hand:
+
+```lua
+st:buildIndex()
+```
+
+The `buildIndex()` creates a hash map (a Lua table) of keys to their commensurate indices in the `values` table.
+
+## `dt.DataSet`
+
+The `CartTrainer`, `RandomForestTrainer` and `GradientBoostTrainer` require that data sets be encapsulated into a `DataSet`.
+Suppose you have a dataset of dense inputs and targets:
+
+```lua
+local nExample = 10
+local nFeature = 5
+local input = torch.randn(nExample, nFeature)
+local target = torch.Tensor(nExample):random(0,1)
+```
+
+these can be encapsulated into a `DataSet` object:
+
+```lua
+local dataset = dt.DataSet(input, target)
+```
+
+Now suppose you have a dataset where the `input` is a table of `SparseTensor` instances:
+
+```lua
+local input = {}
+for i=1,nExample do
+ local nKeyVal = math.random(2,nFeature)
+ local keys = torch.LongTensor(nKeyVal):random(1,nFeature)
+ local values = torch.randn(nKeyVal)
+ input[i] = torch.SparseTensor(keys, values)
+end
+```
+
+You can still use a `DataSet` to encapsulate the sparse dataset:
+
+```lua
+local dataset = dt.DataSet(input, target)
+```
+
+The main purpose of the `DataSet` class is to sort each feature by value.
+This is captured by the `sortFeatureValues(input)` method, which is called in the constructor:
+
+```lua
+local sortedFeatureValues, featureIds = self:sortFeatureValues(input)
+```
+
+The `featureIds` is a `torch.LongTensor` of all available feature IDs.
+For a dense `input` tensor, this is just `torch.LongTensor():range(1,input:size(2))`.
+But for a sparse `input` tensor, the `featureIds` tensor only contains the feature IDs present in the dataset.
+
+The resulting `sortedFeatureValues` is a table mapping `featureIds` to `exampleIds` sorted by `featureValues`.
+For each `featureId`, examples are sorted by `featureValue` in ascending order.
+For example, the table might look like: `{featureId=exampleIds}` where `examplesIds={1,3,2}`.
+
+The `CartTrainer` accesses the `sortedFeatureValues` tensor by calling `getSortedFeature(featureId)`:
+
+```lua
+local exampleIdsWithFeature = dataset:getSortedFeature(featureId)
+```
+
+The ability to access examples IDs sorted by feature value, given a feature ID, is the main purpose of the `DataSet`.
+The `CartTrainer` relies on these sorted lists to find the best way to split a set of examples between two tree nodes.
+
+## `dt.CartTrainer`
+
+```lua
+local trainer = dt.CartTrainer(dataset, minLeafSize, maxLeafNodes)
+```
+
+The `CartTrainer` is used by the `RandomForestTrainer` and `GradientBoostTrainer` to train individual trees.
+CART stands for classification and regression trees.
+However, only binary classifiers are unit tested.
+
+The constructor takes the following arguments:
+
+ * `dataset` is a `dt.DataSet` instance representing the training set.
+ * `minLeafSize` is the minimum examples per leaf node in a tree. The larger the value, the more regularization.
+ * `maxLeafNodes` is the maximum nodes in the tree. The lower the value, the more regularization.
+
+Training is initiated by calling the `train()` method:
+
+```lua
+local trainSet = dt.DataSet(input, target)
+local rootTreeState = dt.GiniState(trainSet:getExampleIds())
+local activeFeatures = trainSet.featureIds
+local tree = trainer:train(rootTreeState, activeFeatures)
+```
+
+The resulting `tree` is a `CartTree` instance.
+The `rootTreeState` is a `TreeState` instance like `GiniState` (used by `RandomForestTrainer`) or `GradientBoostState` (used by `GradientBoostTrainer`).
+The `activeFeatures` is a `LongTensor` of feature IDs that used to build the tree.
+Every other feature ID is ignored during training. This is useful for feature bagging.
+
+By default the `CartTrainer` runs in a single-thread.
+The `featureParallel(nThread)` method can be called before calling `train()` to parallelize training using `nThread` workers:
+
+```lua
+local nThread = 3
+trainer:featureParallel(nThread)
+trainer:train(rootTreeState, activeFeatures)
+```
+
+Feature parallelization assigns a set of features IDs to each thread.
+
+The `CartTrainer` can be used as a stand-alone tree trainer.
+But it is recommended to use it within the context of a `RandomForestTrainer` or `GradientBoostTrainer` instead.
+The latter typically generalize better.
+
+## RandomForestTrainer
+
+The `RandomForestTrainer` is used to train a random forest:
+
+```lua
+local nExample = trainSet:size()
+local opt = {
+ activeRatio=0.5,
+ featureBaggingSize=5,
+ nTree=14,
+ maxLeafNodes=nExample/2,
+ minLeafSize=nExample/10,
+}
+local trainer = dt.RandomForestTrainer(opt)
+local forest = trainer:train(trainSet, trainSet.featureIds)
+```
+
+The returned `forest` is a `DecisionForest` instance.
+A `DecisionForest` has a similar interface to the `CartTree`.
+Indeed, they both sub-class the `DecisionTree` abstract class.
+
+The constructor takes a single `opt` table argument, which contains the actual arguments:
+
+ * `activeRatio` is the ratio of active examples per tree. This is used for boostrap sampling.
+ * `featureBaggingSize` is the number of features per tree. This is also used fpr feature bagging.
+ * `nTree` is the number of trees to be trained.
+ * `maxLeafNodes` and `minLeafSize` are passed to the underlying `CartTrainer` constructor (controls regularization).
+
+Internally, the `RandomForestTrainer` passes a `GiniBoostState` to the `CartTrainer:train()` method.
+
+Training can be parallelized by calling `treeParallel(nThread)`:
+
+```lua
+local nThread = 3
+trainer:treeParallel(nThread)
+local forest = trainer:train(trainSet, trainSet.featureIds)
+```
+
+Training then parallelizes by training each tree in its own thread worker.
+
+## GradientBoostTrainer
+
+References:
+ * A. [Boosted Tree presentation](https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf)
+
+Graient boosted decision trees (GBDT) can be trained as follows:
+```lua
+local nExample = trainSet:size()
+local maxLeafNode, minLeafSize = nExample/2, nExample/10
+local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNode)
+
+local opt = {
+ lossFunction=nn.LogitBoostCriterion(false),
+ treeTrainer=cartTrainer,
+ shrinkage=0.1,
+ downsampleRatio=0.8,
+ featureBaggingSize=-1,
+ nTree=14,
+ evalFreq=8,
+ earlyStop=0
+}
+
+local trainer = dt.GradientBoostTrainer(opt)
+local forest = trainer:train(trainSet, trainSet.featureIds, validSet)
+```
+
+The above code snippet uses the `LogitBoostCriterion` outlined in reference A.
+It is used for training binary classification trees.
+
+The returned `forest` is a `DecisionForest` instance.
+A `DecisionForest` has a similar interface to the `CartTree`.
+Indeed, they both sub-class the `DecisionTree` abstract class.
+
+The constructor takes a single `opt` table argument, which contains the actual arguments:
+
+ * `lossFunction` is a `nn.Criterion` instance extended to include the `updateHessInput(input, target)` and `backward2(input, target)`. These return the hessian of the `input`.
+ * `treeTrainer` is a `CartTrainer` instance. Its `featureParallel()` method can be called to implement feature parallelization.
+ * `shrinkage` is the weight of each additional tree.
+ * `downsampleRatio` is the ratio of examples to be sampled for each tree. Used for bootstrap sampling.
+ * `featureBaggingSize` is the number of features to sample per tree. Used for feature bagging. `-1` defaults to `torch.round(math.sqrt(featureIds:size(1)))`
+ * `nTree` is the maximum number of trees.
+ * `evalFreq` is the number of epochs between calls to `validate()` for cross-validation and early-stopping.
+ * `earlyStop` is the maximum number of epochs to wait for early-stopping.
+
+Internally, the `GradientBoostTrainer` passes a `GradientBoostState` to the `CartTrainer:train()` method.
+
+## TreeState
+
+An abstract class that holds the state of a subtree during decision tree training.
+It also manages the state of candidate splits.
+
+```lua
+local treeState = dt.TreeState(exampleIds)
+```
+
+The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree.
+
+## GiniState
+
+A `TreeState` subclass used internally by the `RandomForestTrainer`.
+Uses Gini impurity to determine how to split trees.
+
+```lua
+local treeState = dt.GiniState(exampleIds)
+```
+
+The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree.
+
+## GradientBoostState
+
+A `TreeState` subclass used internally by the `GradientBoostTrainer`.
+It implements the GBDT spliting algorithm, which uses a loss function.
+
+```lua
+local treeState = dt.GradientBoostState(exampleIds, lossFunction)
+```
+
+The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree.
+The `lossFunction` is an `nn.Criterion` instance (see `GradientBoostTrainer`).
+
+
+## WorkPool
+
+Utility class that simplifies construction of a pool of daemon threads with which to execute tasks in parallel.
+
+```lua
+local workpool = dt.WorkPool(nThread)
+```
+
+## CartTree
+
+Implements a trained CART decision tree:
+
+```lua
+local tree = nn.CartTree(rootNode)
+```
+
+The `rootNode` is a `CartNode` instance.
+Each `CartNode` contains pointers to left and right branches, which are themselves `CartNode` instances.
+
+For inference, use the `score(input)` method:
+
+```lua
+local score = tree:score(input)
+```
diff --git a/contrib/lua-torch/decisiontree/RandomForestTrainer.lua b/contrib/lua-torch/decisiontree/RandomForestTrainer.lua
new file mode 100644
index 000000000..41040b25b
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/RandomForestTrainer.lua
@@ -0,0 +1,159 @@
+local dt = require "decisiontree._env"
+
+local RandomForestTrainer = torch.class("dt.RandomForestTrainer", dt)
+
+function RandomForestTrainer:__init(opt)
+ assert(torch.type(opt.nTree) == 'number')
+ assert(opt.nTree > 0)
+ self.nTree = opt.nTree
+ -- max number of leaf nodes per tree
+ assert(torch.type(opt.maxLeafNodes) == 'number')
+ assert(opt.maxLeafNodes > 0)
+ self.maxLeafNodes = opt.maxLeafNodes
+ -- min number of examples per leaf
+ assert(torch.type(opt.minLeafSize) == 'number')
+ assert(opt.minLeafSize > 0)
+ self.minLeafSize = opt.minLeafSize
+
+ -- when non-positive, defaults to sqrt(#feature)
+ assert(torch.type(opt.featureBaggingSize) == 'number')
+ self.featureBaggingSize = opt.featureBaggingSize
+
+ assert(torch.type(opt.activeRatio) == 'number')
+ assert(opt.activeRatio > 0)
+ self.activeRatio = opt.activeRatio
+
+ -- default parallelization is singlethread
+ self.parallelMode = 'singlethread'
+end
+
+-- Train a DecisionForest
+function RandomForestTrainer:train(trainSet, featureIds, verbose)
+ assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
+ assert(torch.type(featureIds) == 'torch.LongTensor')
+
+ if verbose then print(string.format("Begin training Decision Forest with %d trees", self.nTree)) end
+
+ local weight = torch.Tensor(self.nTree):fill(1 / self.nTree) -- RF uses uniform weights
+
+ local trees
+ if self.parallelMode == 'singlethread' then
+ trees = self:trainTrees(trainSet, featureIds, verbose)
+ elseif self.parallelMode == 'treeparallel' then
+ trainSet:deleteIndex() -- prevents serialization bottleneck
+ trees = self:trainTreesTP(trainSet, featureIds, verbose)
+ else
+ error("Unrecognized parallel mode: " .. self.parallelMode)
+ end
+
+ if verbose then print(string.format("Successfully trained %d trees", #trees)) end
+
+ -- set bias
+ local bias = 0;
+ for i, tree in ipairs(trees) do
+ bias = bias + tree.root.score * weight[i]
+ end
+
+ return dt.DecisionForest(trees, weight, bias)
+end
+
+function RandomForestTrainer:trainTrees(trainSet, featureIds, verbose)
+
+ -- the same CartTrainer will be used for each tree
+ local cartTrainer = dt.CartTrainer(trainSet, self.minLeafSize, self.maxLeafNodes)
+
+ local trees = {}
+ for treeId=1,self.nTree do
+ -- Train a CartTree
+ local tree = self.trainTree(cartTrainer, featureIds, self.featureBaggingSize, self.activeRatio, treeId, verbose)
+ table.insert(trees, tree)
+ end
+ return trees
+end
+
+-- static function that returns a cartTree
+function RandomForestTrainer.trainTree(cartTrainer, featureIds, baggingSize, activeRatio, treeId, verbose)
+ assert(torch.isTypeOf(cartTrainer, 'dt.CartTrainer'))
+ assert(torch.type(featureIds) == 'torch.LongTensor')
+ local baggingSize = baggingSize > 0 and baggingSize or torch.round(math.sqrt(featureIds:size(1)))
+
+ if verbose then
+ print(string.format("Tree %d: Creating features bootstrap sample with baggingSize %d, nFeatures %d", treeId, baggingSize, featureIds:size(1)))
+ end
+
+ local trainSet = cartTrainer.dataset
+
+ -- sample boot strap features
+ local baggingIndices = torch.LongTensor(baggingSize):random(1,featureIds:size(1))
+ local activeFeatures = featureIds:index(1, baggingIndices)
+
+ -- sample boot strap examples
+ local sampleSize = torch.round(trainSet:size() * activeRatio)
+ if verbose then print(string.format("Creating bootstrap sample created of size %d", sampleSize)) end
+
+ baggingIndices:resize(sampleSize):random(1,trainSet:size())
+ local bootStrapExampleIds = torch.LongTensor()
+ bootStrapExampleIds:index(trainSet:getExampleIds(), 1, baggingIndices)
+
+ local cartTree = cartTrainer:train(dt.GiniState(bootStrapExampleIds), activeFeatures)
+
+ if verbose then print(string.format("Complete processing tree number %d", treeId)) end
+
+ return cartTree
+end
+
+-- call before training to enable tree-level parallelization
+function RandomForestTrainer:treeParallel(workPool)
+ assert(self.parallelMode == 'singlethread', self.parallelMode)
+ self.parallelMode = 'treeparallel'
+ self.workPool = torch.type(workPool) == 'number' and dt.WorkPool(workPool) or workPool
+ assert(torch.isTypeOf(self.workPool, 'dt.WorkPool'))
+
+ -- require the dt package
+ self.workPool:update('require', {libname='decisiontree',varname='dt'})
+end
+
+-- TP is for tree parallel (not toilet paper)
+function RandomForestTrainer:trainTreesTP(trainSet, featureIds, verbose)
+ assert(torch.isTypeOf(trainSet, 'dt.DataSet'))
+ assert(torch.type(featureIds) == 'torch.LongTensor')
+ local minLeafSize = self.minLeafSize
+ local maxLeafNodes = self.maxLeafNodes
+
+ -- setup worker store (each worker will have its own cartTrainer)
+ self.workPool:updateup('execute', function(store)
+ local dt = require 'decisiontree'
+
+ store.cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes)
+ store.featureIds = featureIds
+ end)
+
+ for treeId=1,self.nTree do
+ -- upvalues
+ local baggingSize = self.featureBaggingSize
+ local activeRatio = self.activeRatio
+ -- task closure that will be executed in worker-thread
+ local function trainTreeTask(store)
+ local dt = require 'decisiontree'
+ return dt.RandomForestTrainer.trainTree(store.cartTrainer, store.featureIds, baggingSize, activeRatio, treeId, verbose)
+ end
+ self.workPool:writeup('execute', trainTreeTask)
+ end
+
+ local trees = {}
+ for treeId=1,self.nTree do
+ local taskname, tree = self.workPool:read()
+ assert(taskname=='execute')
+ assert(torch.isTypeOf(tree, 'dt.CartTree'))
+ table.insert(trees, tree)
+ end
+ return trees
+end
+
+function RandomForestTrainer:getName()
+ return string.format(
+ "randomforest-aRatio-%4.2f-maxLeaf-%d-minExample-%d-nTree-%d",
+ self.activeRatio, self.maxLeafNodes, self.minLeafSize, self.nTree
+ )
+end
+
diff --git a/contrib/lua-torch/decisiontree/Sparse2Dense.lua b/contrib/lua-torch/decisiontree/Sparse2Dense.lua
new file mode 100644
index 000000000..4e5b79d2f
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/Sparse2Dense.lua
@@ -0,0 +1,88 @@
+local S2D, parent = torch.class("nn.Sparse2Dense", "nn.Module")
+local dt = require 'decisiontree._env'
+
+function S2D:__init(features)
+ parent.__init(self)
+ if torch.type(features) == 'table' then
+ assert(#features > 0)
+ features = torch.LongTensor(features)
+ end
+ assert(torch.isTensor(features))
+ self.features = features
+ self.featureMap = nil
+ self.masks = {}
+ self.mappedKeys = {}
+end
+
+function S2D:updateOutput(input)
+ if not self.featureMap then
+ self.featureMap = dt.HashMap()
+ self.featureMap:fill(self.features)
+ end
+ local batched, keys, values
+ if torch.isTensor(input[1]) then
+ keys = {input[1]}
+ values = {input[2]}
+ batched = false
+ else
+ keys = input[1]
+ values = input[2]
+ batched = true
+ end
+ assert(#keys == #values)
+
+ local masks = self.masks
+ local mappedKeys = self.mappedKeys
+ local nKeys = #keys
+ local nMasks = #masks
+ if nMasks < nKeys then
+ for i=nMasks+1,nKeys do
+ masks[i] = torch.ByteTensor()
+ mappedKeys[i] = torch.LongTensor()
+ end
+ elseif nMasks > nKeys then
+ for i=nKeys+1,nMasks do
+ masks[i] = nil
+ mappedKeys[i] = nil
+ end
+ end
+
+ self.featureMap:get(keys, mappedKeys, masks)
+ self.output = self.output or torch.Tensor():type(self._type)
+ self.output.nn.S2D_computeOutput(self.output, mappedKeys, values, masks, self.features)
+ if not batched then
+ self.output = self.output:view(-1)
+ end
+ return self.output
+end
+
+function S2D:type(type, tensorCache)
+ if type then
+ local features = self.features
+ self.features = nil
+ parent.type(self, type, tensorCache)
+ self.features = features
+ return self
+ else
+ return parent.type(self)
+ end
+end
+
+function S2D:updateGradInput(input, gradOutput)
+ error"Not Implemented"
+end
+
+function S2D:reset()
+ parent.reset(self)
+ self.featureMap = nil
+end
+
+function S2D:write(file)
+ self.featureMap = nil
+ parent.write(self, file)
+end
+
+function S2D:read(file)
+ self.featureMap = nil
+ parent.read(self, file)
+end
diff --git a/contrib/lua-torch/decisiontree/SparseTensor.lua b/contrib/lua-torch/decisiontree/SparseTensor.lua
new file mode 100644
index 000000000..4c620e618
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/SparseTensor.lua
@@ -0,0 +1,54 @@
+
+local SparseTensor = torch.class("torch.SparseTensor")
+
+function SparseTensor:__init(keys, values)
+ if keys and values then
+ assert(torch.typename(keys):find('torch%..*LongTensor'))
+ assert(torch.isTensor(values))
+ assert(keys:nElement() == values:nElement(), "Expecting key and value tensors of same size")
+ self.keys = keys
+ self.values = values
+ elseif not (keys or values) then
+ self.keys = torch.LongTensor()
+ self.values = torch.Tensor()
+ else
+ error"Expecting zero or two args"
+ end
+end
+
+function SparseTensor:buildIndex(overwrite)
+ if self._map and not overwrite then return end
+ assert(self.keys and self.keys:dim() == 1)
+ assert(self.values and self.values:dim() == 1)
+ -- hash table
+ self._map = {}
+ for i=1,self.keys:size(1) do
+ self._map[self.keys[i]] = i
+ end
+end
+
+function SparseTensor:deleteIndex()
+ self._map = nil
+end
+
+local __index = SparseTensor.__index
+function SparseTensor:__index(key)
+ if key == nil then
+ error"Attempt to index using a nil key"
+ elseif torch.type(key) ~= 'number' then
+ return __index(self, key)
+ end
+
+ if self._map then
+ assert(torch.type(self._map) == 'table')
+ local idx = self._map[key]
+ return idx and self.values[idx] or nil
+ elseif self.keys:nElement() > 0 then
+ for i=1,self.keys:size(1) do
+ if self.keys[i] == key then
+ return self.values[i]
+ end
+ end
+ end
+ return nil
+end \ No newline at end of file
diff --git a/contrib/lua-torch/decisiontree/TreeState.lua b/contrib/lua-torch/decisiontree/TreeState.lua
new file mode 100644
index 000000000..3928649fd
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/TreeState.lua
@@ -0,0 +1,191 @@
+local dt = require "decisiontree._env"
+
+local TreeState = torch.class("dt.TreeState", dt)
+
+-- Holds the state of a subtree during decision tree training.
+-- Also, manages the state of candidate splits
+function TreeState:__init(exampleIds)
+ assert(torch.type(exampleIds) == 'torch.LongTensor')
+ self.exampleIds = exampleIds
+
+ self.nExampleInLeftBranch = 0
+ self.nExampleInRightBranch = 0
+end
+
+-- computes and returns the score of the node based on its examples
+function TreeState:score(dataset)
+ error"NotImplemented"
+end
+
+
+-- Initializes the split-state-updater. Initially all examples are in the left branch.
+-- exampleIdsWithFeature is list of examples to split (those having a particular feature)
+function TreeState:initialize(exampleIdsWithFeature, dataset)
+ error"NotImplemented"
+end
+
+-- Update the split state. This call has the effect of shifting the example from the left to the right branch.
+function TreeState:update(exampleId, dataset)
+ error"NotImplemented"
+end
+
+-- Computes the SplitInfo determined by the current split state
+-- @param splitFeatureId the feature id of the split feature
+-- @param splitFeatureValue the feature value of the split feature
+-- @return the SplitInfo determined by the current split state
+function TreeState:computeSplitInfo(splitFeatureId, splitFeatureValue)
+ error"NotImplemented"
+end
+
+-- bottleneck
+function TreeState:findBestFeatureSplit(dataset, featureId, minLeafSize)
+ local dt = require "decisiontree"
+ assert(torch.isTypeOf(dataset, 'dt.DataSet'))
+ assert(torch.type(featureId) == 'number')
+ assert(torch.type(minLeafSize) == 'number')
+
+ -- all dataset example having this feature, sorted by value
+ local featureExampleIds = dataset:getSortedFeature(featureId)
+
+ local buffer = dt.getBufferTable('TreeState')
+ buffer.longtensor = buffer.longtensor or torch.LongTensor()
+ local exampleIdsWithFeature = buffer.longtensor
+
+ -- map and tensor of examples containing feature:
+ local exampleMap = {}
+ local getExampleFeatureValue
+
+ local j = 0
+ if torch.type(dataset.input) == 'table' then
+ exampleIdsWithFeature:resize(self.exampleIds:size())
+ self.exampleIds:apply(function(exampleId)
+ local input = dataset.input[exampleId]
+ input:buildIndex()-- only builds index first time
+ if input[featureId] then
+ j = j + 1
+ exampleIdsWithFeature[j] = exampleId
+ exampleMap[exampleId] = j
+ end
+ end)
+ if j == 0 then
+ return
+ end
+ exampleIdsWithFeature:resize(j)
+ getExampleFeatureValue = function(exampleId) return dataset.input[exampleId][featureId] end
+ else
+ exampleIdsWithFeature = self.exampleIds
+ self.exampleIds:apply(function(exampleId)
+ j = j + 1
+ exampleMap[exampleId] = j
+ end)
+ local featureValues = dataset.input:select(2,featureId)
+ getExampleFeatureValue = function(exampleId) return featureValues[exampleId] end
+ end
+
+
+ self:initialize(exampleIdsWithFeature, dataset)
+
+ -- bottleneck
+ local bestSplit, previousSplitValue, _tictoc
+ for i=featureExampleIds:size(1),1,-1 do -- loop over examples sorted (desc) by feature value
+ local exampleId = featureExampleIds[i]
+
+ local exampleIdx = exampleMap[exampleId]
+ if exampleIdx then
+ local splitValue = getExampleFeatureValue(exampleId)
+
+ if previousSplitValue and math.abs(splitValue - previousSplitValue) > dt.EPSILON then
+ local splitInfo = self:computeSplitInfo(featureId, previousSplitValue, _tictoc)
+ if (splitInfo.leftChildSize >= minLeafSize) and (splitInfo.rightChildSize >= minLeafSize) then
+
+ if (not bestSplit) or (splitInfo.splitGain < bestSplit.splitGain) then
+ _tictoc = bestSplit or {} -- reuse table
+ bestSplit = splitInfo
+ end
+
+ end
+ end
+
+ previousSplitValue = splitValue
+
+ -- bottleneck
+ self:update(exampleId, dataset, exampleIdx)
+ end
+ end
+
+ return bestSplit
+end
+
+-- finds the best split of examples in treeState among featureIds
+function TreeState:findBestSplit(dataset, featureIds, minLeafSize, shardId, nShard)
+ assert(torch.isTypeOf(dataset, 'dt.DataSet'))
+ assert(torch.type(featureIds) == 'torch.LongTensor')
+ assert(torch.type(minLeafSize) == 'number')
+ assert(torch.type(shardId) == 'number')
+ assert(torch.type(nShard) == 'number')
+
+ local bestSplit
+ for i=1,featureIds:size(1) do
+ local featureId = featureIds[i]
+ if (nShard <= 1) or ( (featureId % nShard) + 1 == shardId ) then -- feature sharded
+ local splitCandidate = self:findBestFeatureSplit(dataset, featureId, minLeafSize)
+ if splitCandidate and ((not bestSplit) or (splitCandidate.splitGain < bestSplit.splitGain)) then
+ bestSplit = splitCandidate
+ end
+ end
+ end
+
+ return bestSplit
+end
+
+-- Partitions self given a splitInfo table, producing a pair of exampleIds corresponding to the left and right subtrees.
+function TreeState:_branch(splitInfo, dataset)
+ local leftIdx, rightIdx = 0, 0
+ local nExample = self.exampleIds:size(1)
+ local splitExampleIds = torch.LongTensor(nExample)
+
+
+ for i=1,self.exampleIds:size(1) do
+ local exampleId = self.exampleIds[i]
+ local input = dataset.input[exampleId]
+ local val = input[splitInfo.splitId]
+ -- Note: when the feature is not present in the example, the example is droped from all sub-trees.
+ -- Which means that for most sparse data, a tree cannot reach 100% accuracy...
+ if val then
+ if val < splitInfo.splitValue then
+ leftIdx = leftIdx + 1
+ splitExampleIds[leftIdx] = exampleId
+ else
+ rightIdx = rightIdx + 1
+ splitExampleIds[nExample-rightIdx+1] = exampleId
+ end
+ end
+ end
+
+ local leftExampleIds = splitExampleIds:narrow(1,1,leftIdx)
+ local rightExampleIds = splitExampleIds:narrow(1,nExample-rightIdx+1,rightIdx)
+
+ assert(leftExampleIds:size(1) + rightExampleIds:size(1) <= self.exampleIds:size(1), "Left and right branches contain more data than the parent!")
+ return leftExampleIds, rightExampleIds
+end
+
+-- calls _branch and encapsulates the left and right exampleIds into a TreeStates
+function TreeState:branch(splitInfo, dataset)
+ local leftExampleIds, rightExampleIds = self:_branch(splitInfo, dataset)
+ return self.new(leftExampleIds), self.new(rightExampleIds)
+end
+
+function TreeState:size()
+ return self.exampleIds:size(1)
+end
+
+function TreeState:contains(exampleId)
+ local found = false
+ self.exampleIds:apply(function(x)
+ if x == exampleId then
+ found = true
+ end
+ end)
+ return found
+end
+
diff --git a/contrib/lua-torch/decisiontree/WorkPool.lua b/contrib/lua-torch/decisiontree/WorkPool.lua
new file mode 100644
index 000000000..8f473727e
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/WorkPool.lua
@@ -0,0 +1,156 @@
+local dt = require "decisiontree._env"
+
+-- Utility to simplify construction of a pool of daemon threads with which to execute tasks in parallel.
+local WorkPool = torch.class("dt.WorkPool", dt)
+
+function WorkPool:__init(nThread)
+ self.nThread = nThread or 16
+ assert(torch.type(self.nThread) == 'number')
+ assert(self.nThread > 0)
+
+ self:initialize()
+end
+
+function WorkPool:initialize()
+ local ipc = require 'libipc'
+ self.queuename = os.tmpname()
+ self.queue = ipc.workqueue(self.queuename)
+ self.queues = {}
+ for i=1,self.nThread do
+ self.queues[i] = ipc.workqueue(self.queuename.."/"..i)
+ end
+
+ -- spawn thread workers
+ ipc.map(self.nThread, function(queuename, nThread, myId)
+ assert(nThread)
+ assert(myId)
+ local ipc = require 'libipc'
+
+ -- Open the queue by name (the main thread already created it)
+ local mainqueue = ipc.workqueue(queuename)
+ local workqueue = ipc.workqueue(queuename.."/"..myId)
+
+ local taskname, args
+
+ local store = {}
+ local queue = mainqueue
+
+ repeat
+ local msg = queue:read()
+ assert(torch.type(msg) == 'table')
+ taskname, task = unpack(msg)
+ if taskname == nil then
+ break
+ elseif torch.type(taskname) ~= 'string' then
+ error("Expecting taskname string. Got "..torch.type(taskname))
+ elseif taskname == 'storeKeyValue' then
+ assert(torch.type(task) == 'table')
+ assert(queue == workqueue)
+ store[task.key] = task.value
+ queue:write({taskname})
+ elseif taskname == 'storeKeysValues' then
+ assert(torch.type(task) == 'table')
+ assert(queue == workqueue)
+ for key,value in pairs(task) do
+ store[key] = value
+ end
+ queue:write({taskname})
+ elseif taskname == 'require' then
+ assert(torch.type(task) == 'table')
+ assert(torch.type(task.libname) == 'string')
+ assert(torch.type(task.varname) == 'string')
+ _G[task.varname] = require(task.libname)
+ assert(queue == workqueue)
+ queue:write({taskname})
+ elseif taskname == 'storeReset' then
+ store = {}
+ mainqueue:write({taskname})
+ elseif taskname == 'echo' then
+ mainqueue:write({taskname, task})
+ elseif taskname == 'readWorkerQueue' then
+ queue = workqueue
+ elseif taskname == 'readMainQueue' then
+ queue = mainqueue
+ elseif taskname == 'execute' then
+ if torch.type(task) == 'table' then
+ assert(task.func and task.args)
+ queue:write({taskname, task.func(store, task.args, myId)})
+ else
+ assert(torch.type(task) == 'function')
+ queue:write({taskname, task(store, myId)})
+ end
+ else
+ error("Unknown taskname: "..taskname)
+ end
+ until taskname == nil
+ end, self.queuename, self.nThread)
+
+end
+
+-- Terminates all daemon threads.
+function WorkPool:terminate()
+ for i=1,self.nThread do
+ self.queue:write({})
+ end
+end
+
+-- this function is used to update the store of data in each worker thread
+function WorkPool:_update(taskname, task, upval)
+ assert(torch.type(taskname) == 'string')
+ local _ = require 'moses'
+ assert(_.contains({'storeKeyValue','storeKeysValues','require','execute'}, taskname))
+ assert(torch.type(task) == 'table' or torch.type(task) == 'function')
+
+ -- tell the workers to read their individual queue
+ for i=1,self.nThread do
+ self.queue:write({'readWorkerQueue'})
+ end
+
+ -- write to individual worker queues
+ for i=1,self.nThread do
+ if upval then
+ self.queues[i]:writeup({taskname, task})
+ else
+ self.queues[i]:write({taskname, task})
+ end
+ end
+
+ -- TODO use ipc.mutex:barrier(nThread+1)
+ -- barrier: make sure that every worker has completed task by reading their queue
+ for i=1,self.nThread do
+ assert(self.queues[i]:read()[1] == taskname)
+ end
+
+ -- finally, tell them to read the main queue
+ for i=1,self.nThread do
+ self.queues[i]:write({'readMainQueue'})
+ end
+end
+
+function WorkPool:update(taskname, task)
+ return self:_update(taskname, task, false)
+end
+
+function WorkPool:updateup(taskname, task)
+ return self:_update(taskname, task, true)
+end
+
+function WorkPool:write(taskname, task)
+ assert(torch.type(taskname) == 'string')
+ assert(taskname ~= 'storeKeyValue' or taskname ~= 'storeKeysValues')
+ self.queue:write({taskname, task})
+end
+
+function WorkPool:writeup(taskname, task)
+ assert(torch.type(taskname) == 'string')
+ assert(taskname ~= 'storeKeyValue' or taskname ~= 'storeKeysValues')
+ self.queue:writeup({taskname, task})
+end
+
+function WorkPool:read()
+ local res = self.queue:read()
+ assert(torch.type(res) == 'table')
+ assert(torch.type(res[1] == 'string'))
+ return unpack(res)
+end
+
diff --git a/contrib/lua-torch/decisiontree/_env.lua b/contrib/lua-torch/decisiontree/_env.lua
new file mode 100644
index 000000000..a92770152
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/_env.lua
@@ -0,0 +1,5 @@
+
+-- https://github.com/torch/torch7/issues/525
+
+local dl = {}
+return dl \ No newline at end of file
diff --git a/contrib/lua-torch/decisiontree/benchmark.lua b/contrib/lua-torch/decisiontree/benchmark.lua
new file mode 100644
index 000000000..2b6a03dc6
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/benchmark.lua
@@ -0,0 +1,171 @@
+local dt = require "decisiontree._env"
+
+local bm = {}
+function bm.CartTrainer(opt)
+ local timer = torch.Timer()
+ local trainSet, validSet = dt.getSparseDummyData(opt)
+ print(string.format("CartTrainer: sparse dataset create: %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+
+ local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes)
+ local treeState = dt.GiniState(trainSet:getExampleIds())
+ timer:reset()
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+ print(string.format("CartTrainer: train single-thread : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+
+ timer:reset()
+ cartTrainer:featureParallel(opt.nThread)
+ print(string.format("CartTrainer: setup feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+ timer:reset()
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+ print(string.format("CartTrainer: train feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+end
+
+function bm.GradientBoostState(opt)
+ local trainSet, validSet = dt.getSparseDummyData(opt)
+
+ trainSet:initScore()
+
+ local treeState = dt.GradientBoostState(trainSet:getExampleIds(), nn.LogitBoostCriterion(false))
+
+ local timer = torch.Timer() -- first step also calls SparseTensor:buildIndex()
+ treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3)
+ print(string.format("GradientBoostState: findBestSplit (first) : %f sec", timer:time().real))
+
+ timer:reset()
+ treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3)
+ print(string.format("GradientBoostState: findBestSplit (second) : %f sec", timer:time().real))
+
+end
+
+local function file_exists(name)
+ local f=io.open(name,"r")
+ if f~=nil then io.close(f) return true else return false end
+end
+
+function bm.GradientBoostTrainer(opt)
+ local trainSet, validSet
+ if file_exists("/tmp/train.bin") and file_exists("/tmp/valid.bin") then
+ trainSet = torch.load("/tmp/train.bin")
+ validSet = torch.load("/tmp/valid.bin")
+ else
+ if opt.sparse then
+ trainSet, validSet = dt.getSparseDummyData(opt)
+ else
+ trainSet, validSet = dt.getDenseDummyData(opt)
+ end
+ torch.save("/tmp/train.bin", trainSet)
+ torch.save("/tmp/valid.bin", validSet)
+ end
+
+ local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes)
+ opt.lossFunction = nn.LogitBoostCriterion(false)
+ opt.treeTrainer = cartTrainer
+ local forestTrainer = dt.GradientBoostTrainer(opt)
+
+ local timer = torch.Timer()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet)
+ local time = timer:time().real
+ print(string.format("GradientBoostTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+
+ cartTrainer:featureParallel(opt.nThread)
+ timer:reset()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet)
+ local time = timer:time().real
+ print(string.format("GradientBoostTrainer: train feature-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+end
+
+function bm.RandomForestTrainer(opt)
+ local trainSet, validSet = dt.getSparseDummyData(opt)
+
+ local forestTrainer = dt.RandomForestTrainer(opt)
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
+
+ local timer = torch.Timer()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
+ local time = timer:time().real
+ print(string.format("RandomForestTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+
+ timer:reset()
+ forestTrainer:treeParallel(opt.nThread)
+ print(string.format("RandomForestTrainer: setup tree-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+
+ timer:reset()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
+ local time = timer:time().real
+ print(string.format("RandomForestTrainer: train tree-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+end
+
+function bm.DFD(opt)
+ local _ = require 'moses'
+ local opt = _.clone(opt)
+ opt.nExample = 200
+ local trainSet, validSet = dt.getDenseDummyData(opt)
+
+ local forestTrainer = dt.RandomForestTrainer(opt)
+ forestTrainer:treeParallel(opt.nThread)
+ local timer = torch.Timer()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
+ local time = timer:time().real
+ print(string.format("DFD: train random forest in parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+
+
+ -- benchmark nn.DFD
+ local input = trainSet.input:sub(1,opt.batchsize)
+ local dfd = nn.DFD(decisionForest)
+ dfd:forward(input)
+ timer:reset()
+ for i=1,opt.nloop do
+ dfd:forward(input)
+ end
+ print(string.format("DFD: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real))
+end
+
+function bm.Sparse2Dense(opt)
+ local _ = require 'moses'
+ local opt = _.clone(opt)
+ opt.nExample = opt.batchsize
+ local trainSet = dt.getSparseDummyData(opt)
+
+ local input = {{},{}}
+ for i=1,opt.batchsize do
+ input[1][i] = trainSet.input[i].keys
+ input[2][i] = trainSet.input[i].values
+ end
+ assert(#input[1] == opt.batchsize)
+
+ -- benchmark nn.Sparse2Dense
+ local s2d = nn.Sparse2Dense(torch.LongTensor():range(1,opt.nFeature))
+ s2d:forward(input)
+ local timer = torch.Timer()
+ for i=1,opt.nloop do
+ s2d:forward(input)
+ end
+ print(string.format("Sparse2Dense: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real))
+end
+
+function dt.benchmark(benchmarks, opt2)
+ local opt = {
+ nExample=10000, nCluster=2, nFeature=1000, overlap=0, nValid=100, -- getSparseDummyData
+ nTree=20, featureBaggingSize=-1, sparse=true, -- GradientBoostTrainer and RandomForestTrainer
+ nThread=2, shrinkage=0.1, downsampleRatio=0.1, evalFreq=5, earlyStop=0, -- GradientBoostTrainer
+ activeRatio=0.5, -- RandomForestTrainer
+ batchsize=32, nloop=10
+ }
+
+ local _ = require 'moses'
+ benchmarks = benchmarks or _.keys(bm)
+ assert(torch.type(benchmarks) == 'table')
+ for i,benchmark in ipairs(benchmarks) do
+ local opt1 = _.clone(opt)
+ for key, value in pairs(opt2 or {}) do
+ opt1[key] = value
+ end
+ opt1.nActive = opt1.nActive or torch.round(opt1.nFeature/10)
+ opt1.maxLeafNodes = opt1.maxLeafNodes or (opt1.nExample/10)
+ opt1.minLeafSize = opt1.minLeafSize or (opt1.nExample/100)
+
+ assert(torch.type(benchmark) == 'string', benchmark)
+ assert(bm[benchmark], benchmark)
+ bm[benchmark](opt1)
+ end
+end
diff --git a/contrib/lua-torch/decisiontree/doc/benchmark.md b/contrib/lua-torch/decisiontree/doc/benchmark.md
new file mode 100644
index 000000000..cb8f905d6
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/doc/benchmark.md
@@ -0,0 +1,291 @@
+# Benchmarks
+
+This file outlines the roadmap (and commensurate benchmarks) of optimizations and refactorings over time.
+
+## Baseline
+
+The baseline implementation is very slow.
+We converted the Twitter decision tree library (used internally) from Java to Lua.
+The objective was to replicate the GBDT and Random Forest implementations as is (more or less).
+The Java library is very good and reasonably fast. The same code in Lua is slow.
+The point of this Lua baseline was not to obtain the same computational performance as the Java library.
+Instead, we wanted the training and inferences algorithms of the Lua lib to match thoses of the Java lib.
+As such, the training/validation error of the baseline Lua lib should match that of the Java lib.
+The unit tests seem to validate this claim as both training/validation set performance is unit tested.
+We also used the conversion exercise as a way to learn about decision tree implementation (our background is deep learning).
+That being said, the baseline performance is terrible:
+
+```
+th -e "dt = require 'decisiontree'; dt.benchmark()"
+CartTrainer: sparse dataset create: 2963.192386 samples/sec; 0.337479 sec
+CartTrainer: train single-thread : 14.165438 samples/sec; 70.594361 sec
+CartTrainer: setup feature-parallel : 5.129034 samples/sec; 194.968478 sec
+CartTrainer: train feature-parallel : 9.736592 samples/sec; 102.705344 sec
+```
+
+The original Java lib had approximately 43 classes.
+The baseline has about 24.
+This reduction is due to obvious merging of classes. But also to conversions of classes to functions.
+The next patches continue this process of reducing the number of classes.
+
+## Patch 1 (complete):
+
+This patch further reduces the number of classes, but adds the DataSet class.
+The code is much simple to read. Examples are batched.
+
+ * [x] examples are batched in dt.DataSet: {input, target, score}
+ * [x] deprecate dt.LabeledExample
+ * [x] list of examples are replaced with torch.LongTensors of exampleIds
+ * [x] merge TreeBrancher into TreeState
+ * [x] merge BestSplitFinder and SplitStateUpdater into TreeState
+ * [x] TreeState subclasses: GradientBoostState and GiniState
+
+```
+th -e "dt = require 'decisiontree'; dt.benchmark()"
+CartTrainer: sparse dataset create: 3597.392294 samples/sec; 0.277984 sec
+CartTrainer: train single-thread : 35.763255 samples/sec; 27.961663 sec
+CartTrainer: setup feature-parallel : 36759.250495 samples/sec; 0.027220 sec
+CartTrainer: train feature-parallel : 72.523658 samples/sec; 13.788606 sec
+```
+
+ The setup time for feature-parallelization is most improved.
+ The run-time for feature-parallel also about half that of single-thread.
+ Since its using 2 threads, that means the parallelization is working quite well.
+
+ We also added benchmarks for the `RandomForestTrainer` and `GradientBoostTrainer`:
+
+```
+GradientBoostTrainer: train single-thread : 599.895105 samples/sec; 0.083348 sec/tree, 1.666958 sec
+GradientBoostTrainer: train feature-parallel : 974.235273 samples/sec; 0.051322 sec/tree, 1.026446 sec
+RandomForestTrainer: train single-thread : 134.781044 samples/sec; 0.370972 sec/tree, 7.419441 sec
+RandomForestTrainer: setup tree-parallel : 73341.097064 samples/sec; 0.013649 sec
+RandomForestTrainer: train tree-parallel : 262.975891 samples/sec; 0.190131 sec/tree, 3.802630 sec
+```
+
+Looks good.
+
+## Patch 2 (complete):
+
+ * [x] dt.LossFunction -> nn.Criterion (LogitBoost is done, missing MSE)
+ * [x] use SparseTensor:buildIndex() to accelerate TreeState:findBestSplit()
+ * [x] benchmarks use 10000 instead of 1000 examples
+
+The benchmarks indicate good improvements. Most improvements were made possible by the use of `buildIndex`:
+
+```
+th -e "dt = require 'decisiontree'; dt.benchmark()"
+GradientBoostState: findBestSplit (first) : 11.415645 sec
+GradientBoostState: findBestSplit (second) : 11.246336 sec
+CartTrainer: sparse dataset create: 3284.803629 samples/sec; 3.044327 sec
+CartTrainer: train single-thread : 239.544758 samples/sec; 41.745858 sec
+CartTrainer: setup feature-parallel : 10996.443063 samples/sec; 0.909390 sec
+CartTrainer: train feature-parallel : 473.888592 samples/sec; 21.102011 sec
+RandomForestTrainer: train single-thread : 892.985186 samples/sec; 0.559920 sec/tree, 11.198394 sec
+RandomForestTrainer: setup tree-parallel : 176806.252266 samples/sec; 0.056569 sec
+RandomForestTrainer: train tree-parallel : 1377.849291 samples/sec; 0.362884 sec/tree, 7.257688 sec
+GradientBoostTrainer: train single-thread : 2685.485128 samples/sec; 0.186186 sec/tree, 3.723722 sec
+GradientBoostTrainer: train feature-parallel : 3712.313215 samples/sec; 0.134687 sec/tree, 2.693738 sec
+```
+
+The main bottleneck now is in serializing the SparseTensor hash maps. We temporarly overcame this bottleneck by
+deleting indexes when calling `CartTrainer:featureParallel()` and `RandomForestTrainer:treeParallel()`.
+In this way, the indexes are recreated for each thread. Ideally, we would use a C hash map such that a pointer
+could be serialized instead. But `tds.Hash` does not serialize well. For now instead, we use lua tables.
+
+This is the benchmark for `GradientBoostTrainer` on a large dataset of dense inputs:
+
+```
+th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})"
+GradientBoostTrainer: train single-thread : 152.463989 samples/sec; 131.178517 sec/tree, 655.892584 sec
+GradientBoostTrainer: train feature-parallel : 224.288488 samples/sec; 89.170872 sec/tree, 445.854358 sec
+[tw-mbp-nleonard decisiontree]$ th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8,nThread=4})"
+GradientBoostTrainer: train single-thread : 163.836896 samples/sec; 122.072625 sec/tree, 610.363126 sec
+GradientBoostTrainer: train feature-parallel : 407.981442 samples/sec; 49.021838 sec/tree, 245.109188 sec
+```
+
+## Patch 3 :
+
+Optimize GBDT for large datasets consisting of dense inputs. The benchmarks:
+
+```
+th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})"
+GradientBoostTrainer: train single-thread : 547.553407 samples/sec; 36.526117 sec/tree, 182.630587 sec
+GradientBoostTrainer: train feature-parallel : 792.964678 samples/sec; 25.221804 sec/tree, 126.109022 sec
+[tw-mbp-nleonard decisiontree]$ th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8,nThread=4})"
+GradientBoostTrainer: train single-thread : 555.793759 samples/sec; 35.984571 sec/tree, 179.922855 sec
+GradientBoostTrainer: train feature-parallel : 1289.977846 samples/sec; 15.504142 sec/tree, 77.520711 sec
+```
+
+For 1, 2 and 4 threads, the speedups of patch 3 over patch 2 are respectively: 3.39, 3.53, and 3.18.
+For this patch, the multi-threading speedup of 2 and 4 threads over a single thread are respectively: 1.42 and 2.33.
+Improvements over the previous patch were obtained by optimizing two aspects:
+
+ 1. Optimizing `TreeState.findBestFeatureSplit` for dense datasets (for example: `if dense, then ...`);
+ 2. Removing `assert` clauses in `GradientBoostState.update`. The `update` method is called for every (example, feature), making it a major bottleneck.
+
+Converting the `update` to C could lead to further optimizations.
+
+This patch also improves the benchmark on sparse datasets:
+```
+$ th -e "dt = require 'decisiontree'; dt.benchmark()"
+RandomForestTrainer: train single-thread : 1121.311196 samples/sec; 0.445907 sec/tree, 8.918131 sec
+RandomForestTrainer: setup tree-parallel : 168773.323354 samples/sec; 0.059256 sec
+RandomForestTrainer: train tree-parallel : 1701.280938 samples/sec; 0.293896 sec/tree, 5.877924 sec
+GradientBoostState: findBestSplit (first) : 8.250646 sec
+GradientBoostState: findBestSplit (second) : 7.952077 sec
+GradientBoostTrainer: train single-thread : 3355.248596 samples/sec; 0.149020 sec/tree, 2.980405 sec
+GradientBoostTrainer: train feature-parallel : 4399.133369 samples/sec; 0.113659 sec/tree, 2.273175 sec
+CartTrainer: sparse dataset create: 3428.105601 samples/sec; 2.917069 sec
+CartTrainer: train single-thread : 282.172416 samples/sec; 35.439331 sec
+CartTrainer: setup feature-parallel : 9455.440801 samples/sec; 1.057598 sec
+CartTrainer: train feature-parallel : 594.054049 samples/sec; 16.833491 sec
+DFD: train random forest in parallel : 346.831378 samples/sec; 0.288325 sec/tree, 5.766491 sec
+DFD: updateOutput : 831.105546 samples/sec; 0.038509 sec
+```
+
+## Patch 4 :
+
+This patch improves `nn.DFD` from
+
+```
+th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})"
+DFD: train random forest in parallel : 10.527251 samples/sec; 0.037997 sec/tree, 18.998313 sec
+DFD: updateOutput : 32.442950 samples/sec; 9.863472 sec
+```
+
+to
+
+```
+th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})"
+DFD: train random forest in parallel : 10.839547 samples/sec; 0.036902 sec/tree, 18.450956 sec
+DFD: updateOutput : 359.158353 samples/sec; 0.890975 sec
+Sparse2Dense: updateOutput : 15395.648952 samples/sec; 0.020791 sec
+```
+
+That is a 10x speedup for `nn.DFD`.
+
+The patch also adds a benchmark for `nn.Sparse2Dense`:
+
+```
+th -e "dt = require 'decisiontree'; dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})"
+Sparse2Dense: updateOutput : 17158.126406 samples/sec; 0.018653 sec
+```
+
+Indeed, `nn.Sparse2Dense` is not the bottleneck; `nn.DFD` is.
+
+## Patch 5 :
+
+This patch improves `nn.DFD` inference from
+
+```
+for i in `seq 3`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=100})"; done
+DFD: train random forest in parallel : 8.452295 samples/sec; 0.047324 sec/tree, 23.662212 sec
+DFD: updateOutput : 176.617872 samples/sec; 9.059109 sec
+DFD: train random forest in parallel : 8.350019 samples/sec; 0.047904 sec/tree, 23.952042 sec
+DFD: updateOutput : 183.508204 samples/sec; 8.718962 sec
+DFD: train random forest in parallel : 8.525779 samples/sec; 0.046917 sec/tree, 23.458266 sec
+DFD: updateOutput : 178.877077 samples/sec; 8.944692 sec
+```
+
+to
+
+```
+for i in `seq 3`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=100})"; done
+DFD: train random forest in parallel : 8.434502 samples/sec; 0.047424 sec/tree, 23.712129 sec
+DFD: updateOutput : 6479.597179 samples/sec; 0.246933 sec
+DFD: train random forest in parallel : 8.334543 samples/sec; 0.047993 sec/tree, 23.996518 sec
+DFD: updateOutput : 6663.641184 samples/sec; 0.240114 sec
+DFD: train random forest in parallel : 8.353265 samples/sec; 0.047885 sec/tree, 23.942735 sec
+DFD: updateOutput : 6882.607456 samples/sec; 0.232475 sec
+```
+
+That is a 37x speedup for `nn.DFD`.
+
+## Patch 6:
+
+This patch improves `nn.DFD` from the previous result to
+
+```
+for i in `seq 5`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=10000})"; done
+DFD: train random forest in parallel : 8.353504 samples/sec; 0.047884 sec/tree, 23.942050 sec
+DFD: updateOutput : 91967.342339 samples/sec; 1.739753 sec
+DFD: train random forest in parallel : 8.528141 samples/sec; 0.046904 sec/tree, 23.451770 sec
+DFD: updateOutput : 91405.321702 samples/sec; 1.750451 sec
+DFD: train random forest in parallel : 8.184562 samples/sec; 0.048872 sec/tree, 24.436250 sec
+DFD: updateOutput : 91623.388867 samples/sec; 1.746284 sec
+DFD: train random forest in parallel : 8.779561 samples/sec; 0.045560 sec/tree, 22.780182 sec
+DFD: updateOutput : 93914.242852 samples/sec; 1.703686 sec
+DFD: train random forest in parallel : 8.636201 samples/sec; 0.046317 sec/tree, 23.158330 sec
+DFD: updateOutput : 94092.241963 samples/sec; 1.700465 sec
+```
+
+That is another 13.8x speedup.
+
+## Patch 7:
+
+This patch improves `nn.Sparse2Dense` computation from
+
+```
+for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done
+Sparse2Dense: updateOutput : 1103.570777 samples/sec; 28.996786 sec
+Sparse2Dense: updateOutput : 1092.064331 samples/sec; 29.302309 sec
+Sparse2Dense: updateOutput : 1036.963572 samples/sec; 30.859334 sec
+```
+
+to
+
+```
+for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done
+Sparse2Dense: updateOutput : 62995.834470 samples/sec; 0.507978 sec
+Sparse2Dense: updateOutput : 62471.568253 samples/sec; 0.512242 sec
+Sparse2Dense: updateOutput : 62965.099331 samples/sec; 0.508226 sec
+```
+
+This represents a speedup of about 57x.
+
+## Patch 8:
+
+This patch improves `nn.Sparse2Dense` from the previous result to
+
+```for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done
+Sparse2Dense: updateOutput : 124268.079914 samples/sec; 0.257515 sec
+Sparse2Dense: updateOutput : 114750.039542 samples/sec; 0.278873 sec
+Sparse2Dense: updateOutput : 122863.314766 samples/sec; 0.260458 sec
+```
+
+which corresponds to another 1.95x speedup.
+
+## Patch 9:
+
+This patches moves the core of training GBDTs, which used to be a big bottleneck, to C. It also
+performs small optimizations across the board (faster scoring, faster branching, ...) that provide a
+little more performance.
+
+The original commit had this performance:
+
+```
+th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})"
+GradientBoostTrainer: train single-thread : 500.414666 samples/sec; 39.966854 sec/tree, 199.834271 sec
+GradientBoostTrainer: train feature-parallel : 1227.228044 samples/sec; 16.296890 sec/tree, 81.484448 sec (4 threads)
+GradientBoostTrainer: train feature-parallel : 1385.926280 samples/sec; 14.430782 sec/tree, 72.153910 sec (8 threads)
+```
+
+and the new version has
+
+```
+GradientBoostTrainer: train single-thread : 15285.644631 samples/sec; 1.308417 sec/tree, 6.542086 sec
+GradientBoostTrainer: train feature-parallel : 43170.435932 samples/sec; 0.463280 sec/tree, 2.316400 sec (4 threads)
+GradientBoostTrainer: train feature-parallel : 50062.681239 samples/sec; 0.399499 sec/tree, 1.997496 sec (8 threads)
+```
+
+That represents a speedup of about 30.5x over the baseline for 1 thread and 36.1x for 8 threads.
+Note that the performance doesn't increase much as we increase the number of threads since we use
+feature parallelism and the number of features evaluated is small (29 in this case) due to bagging.
+If we disable bagging, then we have the following result with 8 threads and the new code:
+
+```
+GradientBoostTrainer: train single-thread : 590.823965 samples/sec; 33.851030 sec/tree, 169.255152 sec
+GradientBoostTrainer: train feature-parallel : 3232.188576 samples/sec; 6.187758 sec/tree, 30.938789 sec
+```
+
+So processing 836 features now is much faster than processing 29 before.
diff --git a/contrib/lua-torch/decisiontree/error.h b/contrib/lua-torch/decisiontree/error.h
new file mode 100644
index 000000000..18df3c939
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/error.h
@@ -0,0 +1,24 @@
+#ifndef _ERROR_H_
+#define _ERROR_H_
+
+#include "luaT.h"
+#include <string.h>
+
+static inline int _lua_error(lua_State *L, int ret, const char* file, int line) {
+ int pos_ret = ret >= 0 ? ret : -ret;
+ return luaL_error(L, "ERROR: (%s, %d): (%d, %s)\n", file, line, pos_ret, strerror(pos_ret));
+}
+
+static inline int _lua_error_str(lua_State *L, const char *str, const char* file, int line) {
+ return luaL_error(L, "ERROR: (%s, %d): (%s)\n", file, line, str);
+}
+
+static inline int _lua_error_str_str(lua_State *L, const char *str, const char* file, int line, const char *extra) {
+ return luaL_error(L, "ERROR: (%s, %d): (%s: %s)\n", file, line, str, extra);
+}
+
+#define LUA_HANDLE_ERROR(L, ret) _lua_error(L, ret, __FILE__, __LINE__)
+#define LUA_HANDLE_ERROR_STR(L, str) _lua_error_str(L, str, __FILE__, __LINE__)
+#define LUA_HANDLE_ERROR_STR_STR(L, str, extra) _lua_error_str_str(L, str, __FILE__, __LINE__, extra)
+
+#endif
diff --git a/contrib/lua-torch/decisiontree/generic/CartTree.c b/contrib/lua-torch/decisiontree/generic/CartTree.c
new file mode 100644
index 000000000..eb29fcf02
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/generic/CartTree.c
@@ -0,0 +1,88 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/CartTree.c"
+#else
+
+static int nn_(tree_fast_score)(lua_State *L) {
+ THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
+ THTensor *score = luaT_checkudata(L, 3, torch_Tensor);
+ long n_samples = THTensor_(size)(input, 0);
+ long n_features = THTensor_(size)(input, 1);
+ THTensor_(resize1d)(score, n_samples);
+ real *input_data = THTensor_(data)(input);
+ real *score_data = THTensor_(data)(score);
+
+ lua_pushstring(L, "leftChild");
+ const int left_child_string = 4;
+ lua_pushstring(L, "rightChild");
+ const int right_child_string = 5;
+ lua_pushstring(L, "score");
+ const int score_string = 6;
+ lua_pushstring(L, "splitFeatureId");
+ const int id_string = 7;
+ lua_pushstring(L, "splitFeatureValue");
+ const int value_string = 8;
+
+ const int original_top = lua_gettop(L);
+ for (long i = 0; i < n_samples; i++) {
+ int node = 2;
+ while (1) {
+ int current_top = lua_gettop(L);
+ lua_pushvalue(L, left_child_string);
+ lua_rawget(L, node);
+ lua_pushvalue(L, right_child_string);
+ lua_rawget(L, node);
+ if (lua_isnil(L, -2) && lua_isnil(L, -1)) {
+ lua_pushvalue(L, score_string);
+ lua_rawget(L, node);
+ score_data[i] = lua_tonumber(L, -1);
+ break;
+ }
+ if (lua_isnil(L, -2)) {
+ // go to right
+ node = current_top + 2;
+ continue;
+ }
+ if (lua_isnil(L, -1)) {
+ // go to left
+ node = current_top + 1;
+ continue;
+ }
+ lua_pushvalue(L, id_string);
+ lua_rawget(L, node);
+ lua_pushvalue(L, value_string);
+ lua_rawget(L, node);
+ long feature_id = lua_tointeger(L, -2);
+ real feature_value = lua_tonumber(L, -1);
+
+ real current_value = input_data[i * n_features + (feature_id-1)];
+ if (current_value < feature_value) {
+ // go to left
+ node = current_top + 1;
+ }
+ else {
+ // go to right
+ node = current_top + 2;
+ }
+ }
+ lua_pop(L, lua_gettop(L) - original_top);
+ }
+
+ lua_pop(L, 5);
+
+ lua_pushvalue(L, 3);
+ return 1;
+}
+
+static const struct luaL_Reg nn_(CT__) [] = {
+ {"CartTreeFastScore", nn_(tree_fast_score)},
+ {NULL, NULL}
+};
+
+static void nn_(CT_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(CT__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/contrib/lua-torch/decisiontree/generic/DFD.c b/contrib/lua-torch/decisiontree/generic/DFD.c
new file mode 100644
index 000000000..599c4d794
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/generic/DFD.c
@@ -0,0 +1,157 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/DFD.c"
+#else
+
+static int nn_(DFD_computeOutput)(lua_State *L) {
+ THLongTensor *outputkeys = luaT_checkudata(L, 1, "torch.LongTensor");
+ THTensor *outputvalues = luaT_checkudata(L, 2, torch_Tensor);
+ THLongTensor *root_ids = luaT_checkudata(L, 3, "torch.LongTensor");
+ THLongTensor *left_child = luaT_checkudata(L, 4, "torch.LongTensor");
+ THLongTensor *right_child = luaT_checkudata(L, 5, "torch.LongTensor");
+ THLongTensor *split_feature_id = luaT_checkudata(L, 6, "torch.LongTensor");
+ THTensor *split_feature_value = luaT_checkudata(L, 7, torch_Tensor);
+ THTensor *input = luaT_checkudata(L, 8, torch_Tensor);
+ char only_last_node = lua_toboolean(L, 9);
+
+ // gets some important sizes from the input
+ long batch_size = THTensor_(size)(input, 0);
+ long input_size = THTensor_(size)(input, 1);
+ long roots_size = THLongTensor_size(root_ids, 0);
+ long depth = THLongTensor_size(outputkeys, 1);
+
+ // keeps track of the number of nodes traversed in the trees by each sample.
+ // each traversed node maps to an output feature having a value of 1
+ long outputsize[batch_size];
+ for (long i = 0; i < batch_size; i++)
+ outputsize[i] = 0;
+
+ // gets direct pointers to the memory of each tensor for efficiency
+ long *root_ids_data = THLongTensor_data(root_ids);
+ long *left_child_data = THLongTensor_data(left_child);
+ long *right_child_data = THLongTensor_data(right_child);
+ real *split_feature_value_data = THTensor_(data)(split_feature_value);
+ long *split_feature_id_data = THLongTensor_data(split_feature_id);
+ long *outputkeys_data = THLongTensor_data(outputkeys);
+ real *input_data = THTensor_(data)(input);
+
+ // for each sample in the batch
+ for (long sample_index = 0; sample_index < batch_size; sample_index++) {
+ // gets pointers to the direct memory associated with each sample for efficiency
+ const long outputkeys_offset = sample_index * depth;
+ const long input_offset = sample_index * input_size;
+ long *local_outputkeys_data = &outputkeys_data[outputkeys_offset];
+ real *local_input_data = &input_data[input_offset];
+
+ // for each tree in the forest
+ for (long i = 0; i < roots_size; i++) {
+ int root = 1;
+ long node_id = root_ids_data[i];
+
+ // traverses the whole tree keeping track of which nodes were seen
+ while (1) {
+ if (root) {
+ // root nodes aren't added to output because they are always traversed
+ root = 0;
+ }
+ else if (!only_last_node) {
+ // updates the outputsize for all samples traversing this node; and
+ // set the traversed node as a feature in output for exampleIds
+ long output_index = outputsize[sample_index];
+ // updates the outputsize for all samples traversing this node
+ outputsize[sample_index]++;
+ // sets the traversed node as a feature in output for exampleIds
+ local_outputkeys_data[output_index] = node_id;
+ }
+
+ // gets the left and right nodes. values of -1 represent missing node
+ long left_id = left_child_data[node_id-1];
+ long right_id = right_child_data[node_id-1];
+
+ if (left_id <= 0 && right_id <= 0) {
+ if (only_last_node) {
+ long output_index = outputsize[sample_index];
+ outputsize[sample_index]++;
+ local_outputkeys_data[output_index] = node_id;
+ }
+ // if no children, stops
+ break;
+ }
+ else if (left_id <= 0) {
+ // if no left child, traverses right node
+ node_id = right_id;
+ }
+ else if (right_id <= 0) {
+ // if no right child, traverses left node
+ node_id = left_id;
+ }
+ else {
+ // if both left and right children, finds the direction for this sample
+ // first get the reference from the node
+ real split_value = split_feature_value_data[node_id-1];
+ long split_id = split_feature_id_data[node_id-1]-1;
+
+ // then gets the value of the sample
+ real node_value = local_input_data[split_id];
+ // and branchs
+ if (node_value < split_value)
+ node_id = left_id;
+ else
+ node_id = right_id;
+ }
+ }
+ }
+ }
+
+ // now that we know which nodes were traverse for each sample, we can create the sparse output
+ // with 1 entry pair for each sample
+ THTensor *input_feature = THTensor_(new)();
+ THLongTensor *indices = THLongTensor_new();
+
+ // pushes the return table with 2 children tables
+ lua_newtable(L);
+ lua_pushinteger(L, 1);
+ lua_newtable(L);
+ lua_pushinteger(L, 2);
+ lua_newtable(L);
+
+ // for each sample...
+ for (long i = 0; i < batch_size; i++) {
+ long j = outputsize[i];
+ // selects the tensor lines from the dense output
+ THLongTensor_select(indices, outputkeys, 0, i);
+ THTensor_(select)(input_feature, outputvalues, 0, i);
+
+ // narrows the keys to actual number of nodes traversed and saves to the output
+ lua_pushinteger(L, i+1);
+ luaT_pushudata(L, THLongTensor_newNarrow(indices, 0, 0, j), "torch.LongTensor");
+ lua_settable(L, -5);
+
+ // and narrows the values
+ lua_pushinteger(L, i+1);
+ luaT_pushudata(L, THTensor_(newNarrow)(input_feature, 0, 0, j), torch_Tensor);
+ lua_settable(L, -3);
+ }
+
+ // pushes the two parts of the output into the output table
+ lua_settable(L, -5);
+ lua_settable(L, -3);
+
+ THLongTensor_free(indices);
+ THTensor_(free)(input_feature);
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(DFD__) [] = {
+ {"DFD_computeOutput", nn_(DFD_computeOutput)},
+ {NULL, NULL}
+};
+
+static void nn_(DFD_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(DFD__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/contrib/lua-torch/decisiontree/generic/GBDT.c b/contrib/lua-torch/decisiontree/generic/GBDT.c
new file mode 100644
index 000000000..31f5b025d
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/generic/GBDT.c
@@ -0,0 +1,392 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/GBDT.c"
+#else
+
+#include "GBDT_internal.h"
+#include "GBDT_internal.c"
+
+// note that each one of the functions to find the best split is a subset of the next.
+// first we have one that can only evaluate a single feature, using the logic in lua to control the
+// features
+// then we have one that can go over a shard of faetures, following the feature parallelism
+// introduced by the lua logic
+// and finally we have one that performans the feature parallelism itself in the special case of
+// dense tensors
+// these functions are provided for completeness and to test in case the logic is to be changed
+
+// finds the best split for a given node and feature
+static int nn_(gb_findBestFeatureSplit)(lua_State *L) {
+ THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor");
+ const int dataset_index = 2;
+ if (!lua_isnumber(L, 3))
+ return LUA_HANDLE_ERROR_STR(L, "third argument should be an integer");
+ long feature_id = lua_tointeger(L, 3);
+ if (!lua_isnumber(L, 4))
+ return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer");
+ long minLeafSize = lua_tointeger(L, 4);
+ // Since minLeafSize == 1 corresponds to each sample in its own leaf, any value below it doesn't
+ // make sense
+ if (minLeafSize < 1)
+ minLeafSize = 1;
+ THTensor *grad = luaT_checkudata(L, 5, torch_Tensor);
+ THTensor *hess = luaT_checkudata(L, 6, torch_Tensor);
+
+ if (!THLongTensor_isContiguous(exampleIds))
+ return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous");
+ if (!THTensor_(isContiguous)(grad))
+ return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous");
+ if (!THTensor_(isContiguous)(hess))
+ return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous");
+
+ // initializes the static data
+ nn_(GBInitialization) initialization_data;
+ nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index);
+
+ // initializes the dynamic data
+ GBRunData run_data;
+ gb_create_run_data(&run_data, minLeafSize);
+
+ // finds the best state possible for the split
+ nn_(GBBestState) bs;
+ nn_(gb_find_best_feature_split)(L, &initialization_data, &bs, feature_id, &run_data);
+
+ lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index);
+
+ // fills the table we the best split found and the lua logic above will do everything else
+ // if no state was found, returns nil
+ if (bs.valid_state == 0) {
+ lua_pop(L, 1);
+ lua_pushnil(L);
+ }
+ else {
+ nn_(gb_internal_split_info)(L, &bs, initialization_data.splitInfo_index);
+ }
+
+ gb_destroy_run_data(&run_data);
+
+ return 1;
+}
+
+// finds the best split for a given node and shard of features
+// this is more efficient than calling the previous one multiple times
+static int nn_(gb_findBestSplit)(lua_State *L) {
+ THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor");
+ const int dataset_index = 2;
+ THLongTensor *feature_ids = luaT_checkudata(L, 3, "torch.LongTensor");
+ if (!lua_isnumber(L, 4))
+ return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer");
+ long minLeafSize = lua_tointeger(L, 4);
+ // Since minLeafSize == 1 corresponds to each sample in its own leaf, any value below it doesn't
+ // make sense
+ if (minLeafSize < 1)
+ minLeafSize = 1;
+ if (!lua_isnumber(L, 5))
+ return LUA_HANDLE_ERROR_STR(L, "fifth argument should be an integer");
+ long shardId = lua_tointeger(L, 5);
+ if (!lua_isnumber(L, 6))
+ return LUA_HANDLE_ERROR_STR(L, "sixth argument should be an integer");
+ long nShard = lua_tointeger(L, 6);
+ THTensor *grad = luaT_checkudata(L, 7, torch_Tensor);
+ THTensor *hess = luaT_checkudata(L, 8, torch_Tensor);
+
+ if (!THLongTensor_isContiguous(exampleIds))
+ return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous");
+ if (!THTensor_(isContiguous)(grad))
+ return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous");
+ if (!THTensor_(isContiguous)(hess))
+ return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous");
+
+ // initializes the static data
+ nn_(GBInitialization) initialization_data;
+ nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index);
+
+ // initializes the dynamic data
+ GBRunData run_data;
+ gb_create_run_data(&run_data, minLeafSize);
+
+ // initializes to evaluate all the features in this shard
+ nn_(GBBestState) global_bs;
+ global_bs.valid_state = 0;
+ long n_features = THLongTensor_size(feature_ids, 0);
+ if (!THLongTensor_isContiguous(feature_ids))
+ return LUA_HANDLE_ERROR_STR(L, "feature_ids must be contiguous");
+ long *feature_ids_data = THLongTensor_data(feature_ids);
+
+ // for every feature
+ for (long i = 0; i < n_features; i++) {
+ long feature_id = feature_ids_data[i];
+ // if we are responsible for it
+ if (nShard <= 1 || (feature_id % nShard) + 1 == shardId) {
+ // finds the best state possible for the split
+ nn_(GBBestState) bs;
+ nn_(gb_find_best_feature_split)(L, &initialization_data, &bs, feature_id, &run_data);
+
+ // if it's valid and better than one we found before, saves it
+ if (bs.valid_state) {
+ if (global_bs.valid_state == 0 || bs.gain < global_bs.gain) {
+ global_bs = bs;
+ }
+ }
+ }
+ }
+
+ lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index);
+
+ // fills the table we the best split found and the lua logic above will do everything else
+ // if no state was found, returns nil
+ if (global_bs.valid_state == 0) {
+ lua_pop(L, 1);
+ lua_pushnil(L);
+ }
+ else {
+ nn_(gb_internal_split_info)(L, &global_bs, initialization_data.splitInfo_index);
+ }
+
+ gb_destroy_run_data(&run_data);
+
+ return 1;
+}
+
+// all the info we have to apss to the slave threads so that they can do their jobs
+// note that we do not pass the lua state since it isn't required. we perform direct C parallelism
+// instead of using lua's parallelism like with the previous version
+typedef struct {
+ nn_(GBInitialization) *initialization_data;
+ GBRunData *run_data;
+ long *index;
+ nn_(GBBestState) *global_bs;
+ long n_features;
+ long *feature_ids_data;
+ pthread_mutex_t *mutex;
+ THLongTensor *exampleIds;
+ THTensor *input;
+ THLongTensor **sorted_ids_per_feature;
+} nn_(ThreadInfo);
+
+// loops over all the features in parallel and finds the best global split
+static void* nn_(thread_worker)(void *arg) {
+ nn_(ThreadInfo) *info = (nn_(ThreadInfo) *)arg;
+
+ while (1) {
+ pthread_mutex_lock(info->mutex);
+ long index = (*info->index);
+ (*info->index)++;
+ pthread_mutex_unlock(info->mutex);
+
+ if (index >= info->n_features)
+ break;
+
+ // performs part of steps (1) and (2) of gb_find_best_feature_split without having to access the
+ // lua state using pre-loaded data
+ long feature_id = info->feature_ids_data[index];
+ THLongTensor *exampleIdsWithFeature_ret = info->exampleIds;
+ THLongTensor *featureExampleIds = info->sorted_ids_per_feature[index];
+ nn_(GBInitialization) *initialization_data = info->initialization_data;
+ GBRunData *run_data = info->run_data;
+
+ // performs steps (3) and (4) of gb_find_best_feature_split since (1) and (2) were already
+ // performed before
+ nn_(GBBestState) bs;
+ nn_(gb_internal_create)(initialization_data->grad, initialization_data->hess,
+ exampleIdsWithFeature_ret, &bs.state);
+ nn_(gb_internal_get_best_split_special)(&bs, featureExampleIds, run_data->exampleMap,
+ info->input, run_data->minLeafSize, feature_id);
+
+ // saves to the global state if it's better
+ if (bs.valid_state) {
+ pthread_mutex_lock(info->mutex);
+ if (info->global_bs->valid_state == 0 || bs.gain < info->global_bs->gain) {
+ (*info->global_bs) = bs;
+ }
+ pthread_mutex_unlock(info->mutex);
+ }
+ }
+
+ return NULL;
+}
+
+// finds the global best split by doing feature parallelism directly in C
+static int nn_(gb_findBestSplitFP)(lua_State *L) {
+ THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor");
+ const int dataset_index = 2;
+ THLongTensor *feature_ids = luaT_checkudata(L, 3, "torch.LongTensor");
+ if (!lua_isnumber(L, 4))
+ return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer");
+ long minLeafSize = lua_tointeger(L, 4);
+ THTensor *grad = luaT_checkudata(L, 5, torch_Tensor);
+ THTensor *hess = luaT_checkudata(L, 6, torch_Tensor);
+ if (!lua_isnumber(L, 7))
+ return LUA_HANDLE_ERROR_STR(L, "seventh argument should be an integer");
+ long nThread = lua_tointeger(L, 7);
+
+ if (!THLongTensor_isContiguous(exampleIds))
+ return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous");
+ if (!THTensor_(isContiguous)(grad))
+ return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous");
+ if (!THTensor_(isContiguous)(hess))
+ return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous");
+
+ pthread_mutex_t mutex;
+ pthread_mutex_init(&mutex, NULL);
+
+ // initializes the static data
+ nn_(GBInitialization) initialization_data;
+ nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index);
+
+ // initializes the dynamic data
+ GBRunData run_data;
+ gb_create_run_data(&run_data, minLeafSize);
+
+ // initializes to evaluate all the features
+ nn_(GBBestState) global_bs;
+ global_bs.valid_state = 0;
+ long n_features = THLongTensor_size(feature_ids, 0);
+ if (!THLongTensor_isContiguous(feature_ids))
+ return LUA_HANDLE_ERROR_STR(L, "feature_ids must be contiguous");
+ long *feature_ids_data = THLongTensor_data(feature_ids);
+
+ THTensor *input = luaT_checkudata(L, initialization_data.input_index, torch_Tensor);
+
+ // performs step (1) of gb_find_best_feature_split so that we don't have to pass the lua state
+ THLongTensor *sorted_ids_per_feature[n_features];
+ for (long i = 0; i < n_features; i++) {
+ long feature_id = feature_ids_data[i];
+ lua_pushvalue(L, initialization_data.getSortedFeature_index);
+ lua_pushvalue(L, initialization_data.dataset_index);
+ lua_pushinteger(L, feature_id);
+ lua_call(L, 2, 1);
+
+ THLongTensor *featureExampleIds = luaT_checkudata(L, -1, "torch.LongTensor");
+ sorted_ids_per_feature[i] = featureExampleIds;
+ }
+
+ // performas step (2) of gb_find_best_feature_split since it's the same for all features when the
+ // data is dense
+ long exampleIds_size = THLongTensor_size(initialization_data.exampleIds, 0);
+ long *exampleIds_data = THLongTensor_data(initialization_data.exampleIds);
+
+ int ret;
+ kh_resize(long, run_data.exampleMap, exampleIds_size*8);
+ for (long i = 0; i < exampleIds_size; i++)
+ kh_put(long, run_data.exampleMap, exampleIds_data[i], &ret);
+
+ // saves the info for the threads
+ long index = 0;
+ nn_(ThreadInfo) info;
+ info.initialization_data = &initialization_data;
+ info.run_data = &run_data;
+ info.index = &index;
+ info.global_bs = &global_bs;
+ info.n_features = n_features;
+ info.feature_ids_data = feature_ids_data;
+ info.mutex = &mutex;
+ info.exampleIds = exampleIds;
+ info.input = input;
+ info.sorted_ids_per_feature = sorted_ids_per_feature;
+
+ pthread_t threads[nThread];
+
+ // let the threads run like crazy over the features to find the minimum
+ for (long i = 0; i < nThread; i++) {
+ int ret = pthread_create(&threads[i], NULL, nn_(thread_worker), &info);
+ if (ret)
+ return LUA_HANDLE_ERROR_STR(L, "falied to create thread");
+ }
+
+ for (long i = 0; i < nThread; i++) {
+ int ret = pthread_join(threads[i], NULL);
+ if (ret)
+ return LUA_HANDLE_ERROR_STR(L, "failed to join thread");
+ }
+
+ lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index);
+
+ // fills the table we the best split found and the lua logic above will do everything else
+ // if no state was found, returns nil
+ if (global_bs.valid_state == 0) {
+ lua_pop(L, 1);
+ lua_pushnil(L);
+ }
+ else {
+ nn_(gb_internal_split_info)(L, &global_bs, initialization_data.splitInfo_index);
+ }
+
+ gb_destroy_run_data(&run_data);
+ pthread_mutex_destroy(&mutex);
+
+ return 1;
+}
+
+// performs an efficient branch of the current examples based on a split info provided
+static int nn_(gb_branch)(lua_State *L) {
+ if (!lua_istable(L, 1))
+ return LUA_HANDLE_ERROR_STR(L, "first argument must be a table");
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ THLongTensor *exampleIds = luaT_checkudata(L, 3, "torch.LongTensor");
+
+ // gets direct access to the dataset
+ long n_exampleIds = THLongTensor_size(exampleIds, 0);
+ long *exampleIds_data = THLongTensor_data(exampleIds);
+ long n_features = THTensor_(size)(input, 1);
+ real *input_data = THTensor_(data)(input);
+
+ // creates the tensors to be returned
+ luaT_pushudata(L, THLongTensor_new(), "torch.LongTensor");
+ luaT_pushudata(L, THLongTensor_new(), "torch.LongTensor");
+ THLongTensor *leftExampleIds = luaT_checkudata(L, 4, "torch.LongTensor");
+ THLongTensor *rightExampleIds = luaT_checkudata(L, 5, "torch.LongTensor");
+ THLongTensor_resize1d(leftExampleIds, n_exampleIds);
+
+ // gets direct access to the examples
+ THLongTensor *splitExampleIds = leftExampleIds;
+ long *splitExampleIds_data = THLongTensor_data(splitExampleIds);
+
+ // gets the split info
+ lua_pushstring(L, "splitId");
+ lua_rawget(L, 1);
+ const long splitId = lua_tointeger(L, -1);
+ lua_pushstring(L, "splitValue");
+ lua_rawget(L, 1);
+ const real splitValue = lua_tonumber(L, -1);
+ lua_pop(L, 2);
+
+ long leftIdx = 0, rightIdx = 0;
+
+ // goes over all the samples dividing them into the two sides
+ for (long i = 0; i < n_exampleIds; i++) {
+ long exampleId = exampleIds_data[i];
+ real val = input_data[(exampleId-1) * n_features + (splitId - 1)];
+ if (val <= splitValue) {
+ leftIdx++;
+ splitExampleIds_data[leftIdx-1] = exampleId;
+ }
+ else {
+ rightIdx++;
+ splitExampleIds_data[n_exampleIds - rightIdx + 1 - 1] = exampleId;
+ }
+ }
+
+ // once done, the resulting tensors are just splits of the sample base. this is more efficient
+ // than having 2 tensors since we didn't know where the split would happen (how much to each
+ // side), but we knew that the sum would be constant
+ THLongTensor_narrow(rightExampleIds, splitExampleIds, 0, n_exampleIds-rightIdx+1-1, rightIdx);
+ THLongTensor_narrow(leftExampleIds, splitExampleIds, 0, 0, leftIdx);
+ return 2;
+}
+
+static const struct luaL_Reg nn_(GBDT__) [] = {
+ {"GBDT_findBestFeatureSplit", nn_(gb_findBestFeatureSplit)},
+ {"GBDT_findBestSplit", nn_(gb_findBestSplit)},
+ {"GBDT_findBestSplitFP", nn_(gb_findBestSplitFP)},
+ {"GBDT_branch", nn_(gb_branch)},
+ {NULL, NULL}
+};
+
+static void nn_(GBDT_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(GBDT__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/contrib/lua-torch/decisiontree/generic/GBDT_internal.c b/contrib/lua-torch/decisiontree/generic/GBDT_internal.c
new file mode 100644
index 000000000..739aabf25
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/generic/GBDT_internal.c
@@ -0,0 +1,312 @@
+// initializes the optimization structure based on the arguments provided, either filling directly
+// or making calls to lua to load some kind of data
+static void nn_(gb_initialize)(lua_State *L, nn_(GBInitialization) *initialization_data,
+ THLongTensor *exampleIds, THTensor *grad, THTensor *hess, int dataset_index) {
+ initialization_data->dataset_index = dataset_index;
+ initialization_data->exampleIds = exampleIds;
+ initialization_data->grad = grad;
+ initialization_data->hess = hess;
+
+ lua_newtable(L);
+ initialization_data->splitInfo_index = lua_gettop(L);
+
+ lua_pushstring(L, "input");
+ lua_gettable(L, dataset_index);
+ initialization_data->input_index = lua_gettop(L);
+
+ lua_pushstring(L, "getSortedFeature");
+ lua_gettable(L, dataset_index);
+ initialization_data->getSortedFeature_index = lua_gettop(L);
+}
+
+// initializes a state that will be passed to the optimizer
+static void nn_(gb_internal_create)(THTensor *grad, THTensor *hessian,
+ THLongTensor *exampleIds, nn_(GBState)* s) {
+ long *exampleIds_data = THLongTensor_data(exampleIds);
+ long n_examples = THLongTensor_size(exampleIds, 0);
+ accreal leftGradientSum = 0;
+ accreal leftHessianSum = 0;
+
+ real *grad_data = THTensor_(data)(grad);
+ real *hessian_data = THTensor_(data)(hessian);
+
+ // only sums the relevant gradients and hessians
+ for (long i = 0; i < n_examples; i++) {
+ long exampleId = exampleIds_data[i]-1;
+ leftGradientSum += grad_data[exampleId];
+ leftHessianSum += hessian_data[exampleId];
+ }
+
+ // we move data from the left branch to the right branch
+ s->rightGradientSum = 0;
+ s->rightHessianSum = 1;
+ s->nExampleInRightBranch = 0;
+ s->leftGradientSum = leftGradientSum;
+ s->leftHessianSum = leftHessianSum + 1;
+ s->nExampleInLeftBranch = n_examples;
+
+ // stores the loss in parent for efficiency
+ real lossInParent = computeGradientBoostLoss(s->leftGradientSum + s->rightGradientSum,
+ s->leftHessianSum + s->rightHessianSum);
+ s->lossInParent = lossInParent;
+
+ // caches the direct pointers to the data for efficiency
+ s->grad_data = grad_data;
+ s->hessian_data = hessian_data;
+}
+
+// computes the gain obtained by performing the split
+static real nn_(computeSplitGain)(nn_(GBState) *s) {
+ real lossInLeftBranch = computeGradientBoostLoss(s->leftGradientSum, s->leftHessianSum);
+ real lossInRightBranch = computeGradientBoostLoss(s->rightGradientSum, s->rightHessianSum);
+ return lossInLeftBranch + lossInRightBranch - s->lossInParent;
+}
+
+// uses the state information to build the table required by the lua library about the best split
+static void nn_(gb_internal_split_info)(lua_State *L, nn_(GBBestState) *bs, int res) {
+ long feature_id = bs->feature_id;
+ real feature_value = bs->feature_value;
+ real gain = bs->gain;
+ nn_(GBState) *s = &bs->state;
+ lua_pushstring(L, "splitGain");
+ lua_pushnumber(L, gain);
+ lua_rawset(L, res);
+ lua_pushstring(L, "splitId");
+ lua_pushinteger(L, feature_id);
+ lua_rawset(L, res);
+ lua_pushstring(L, "splitValue");
+ lua_pushnumber(L, feature_value);
+ lua_rawset(L, res);
+ lua_pushstring(L, "leftChildSize");
+ lua_pushinteger(L, s->nExampleInLeftBranch);
+ lua_rawset(L, res);
+ lua_pushstring(L, "rightChildSize");
+ lua_pushinteger(L, s->nExampleInRightBranch);
+ lua_rawset(L, res);
+ lua_pushstring(L, "leftGradient");
+ lua_pushnumber(L, s->leftGradientSum);
+ lua_rawset(L, res);
+ lua_pushstring(L, "rightGradient");
+ lua_pushnumber(L, s->rightGradientSum);
+ lua_rawset(L, res);
+ lua_pushstring(L, "leftHessian");
+ lua_pushnumber(L, s->leftHessianSum);
+ lua_rawset(L, res);
+ lua_pushstring(L, "rightHessian");
+ lua_pushnumber(L, s->rightHessianSum);
+ lua_rawset(L, res);
+}
+
+// core of the computation, where we loop over all the relevant samples looking for the best split
+// we can find
+static void nn_(gb_internal_get_best_split)(lua_State *L, nn_(GBBestState) *bs,
+ THLongTensor *featureExampleIds, khash_t(long)* exampleMap, int input_table_index,
+ long minLeafSize, long feature_id) {
+ nn_(GBState) current_state;
+ nn_(GBState) best_state;
+ current_state = bs->state;
+
+ real best_gain = INFINITY;
+ real best_value = 0;
+
+ // if the data is dense, pre-loads direct access to it
+ THTensor *input = NULL;
+ real *input_data = NULL;
+ long n_features = 0;
+ if (lua_istable(L, input_table_index)) {
+ }
+ else {
+ input = luaT_checkudata(L, input_table_index, torch_Tensor);
+ input_data = THTensor_(data)(input);
+ n_features = THTensor_(size)(input, 1);
+ }
+
+ long stride = featureExampleIds->stride[0];
+ long *featureExampleIds_data = THLongTensor_data(featureExampleIds);
+
+ khiter_t k;
+
+ real previousSplitValue = 0;
+ // for each example with the given feature and from large to small value...
+ for (long i = THLongTensor_size(featureExampleIds, 0)-1; i >= 0; i--) {
+ long exampleId = featureExampleIds_data[i * stride];
+
+ // checks if the sample is in the list of ones that have to be evaluated by this node
+ k = kh_get(long, exampleMap, exampleId);
+ if (k != kh_end(exampleMap)) {
+ long exampleIdx = exampleId;
+
+ // gets the split value, depending on whether the input is sparse or dense
+ real splitValue;
+ if (input_data) {
+ splitValue = input_data[(exampleId-1) * n_features + feature_id-1];
+ }
+ else {
+ lua_pushinteger(L, exampleId);
+ lua_gettable(L, input_table_index);
+ lua_pushinteger(L, feature_id);
+ lua_gettable(L, -2);
+ splitValue = lua_tonumber(L, -1);
+ lua_pop(L, 2);
+ }
+
+ // performs one update of the state, moving a sample from the left branch to the right
+ real gradient = current_state.grad_data[exampleIdx-1];
+ real hessian = current_state.hessian_data[exampleIdx-1];
+ current_state.leftGradientSum -= gradient;
+ current_state.rightGradientSum += gradient;
+ current_state.leftHessianSum -= hessian;
+ current_state.rightHessianSum += hessian;
+ current_state.nExampleInLeftBranch--;
+ current_state.nExampleInRightBranch++;
+
+ // since we remove from the left, once this becomes true, it stays true forever
+ // hence we stop the loop
+ if (current_state.nExampleInLeftBranch < minLeafSize)
+ break;
+
+ if (current_state.nExampleInRightBranch >= minLeafSize) {
+ // if the values are equal between the steps, it doesn't make sense to evaluate the score
+ // since we won't be able to separate the two
+ if (previousSplitValue != splitValue) {
+ // computes the gain **without including the parent** since it doesn't change as we move
+ // examples between branches
+ real lossInLeftBranch = computeGradientBoostLoss(current_state.leftGradientSum, current_state.leftHessianSum);
+ real lossInRightBranch = computeGradientBoostLoss(current_state.rightGradientSum, current_state.rightHessianSum);
+ real current_gain = lossInLeftBranch + lossInRightBranch;
+ if (current_gain < best_gain) {
+ best_gain = current_gain;
+ best_value = splitValue;
+ best_state = current_state;
+ }
+ }
+ }
+ previousSplitValue = splitValue;
+ }
+ }
+
+ // if there is a valid gain, then marks the state as valid and fills the meta-info
+ if (!isfinite(best_gain)) {
+ bs->valid_state = 0;
+ }
+ else {
+ bs->valid_state = 1;
+ bs->state = best_state;
+ bs->feature_id = feature_id;
+ bs->gain = nn_(computeSplitGain)(&bs->state);
+ bs->feature_value = best_value;
+ }
+}
+
+// exactly like the previous version, but direct access to the data for efficiency. it also doesn't
+// rely on the lua state in the particular case of dense data, so we can evaluate this without using
+// the lua state
+static void nn_(gb_internal_get_best_split_special)(nn_(GBBestState) *bs,
+ THLongTensor *featureExampleIds, khash_t(long)* exampleMap, THTensor *input, long minLeafSize,
+ long feature_id) {
+ nn_(GBState) current_state;
+ nn_(GBState) best_state;
+ current_state = bs->state;
+
+ real best_gain = INFINITY;
+ real best_value = 0;
+
+ real *input_data = NULL;
+ long n_features = 0;
+ input_data = THTensor_(data)(input);
+ n_features = THTensor_(size)(input, 1);
+
+ long stride = featureExampleIds->stride[0];
+ long *featureExampleIds_data = THLongTensor_data(featureExampleIds);
+
+ khiter_t k;
+
+ real previousSplitValue = 0;
+ for (long i = THLongTensor_size(featureExampleIds, 0)-1; i >= 0; i--) {
+ long exampleId = featureExampleIds_data[i * stride];
+
+ k = kh_get(long, exampleMap, exampleId);
+ if (k != kh_end(exampleMap)) {
+ long exampleIdx = exampleId;
+
+ // THIS is the main part that changes. seems crazy to have a special case just for this, but
+ // since there are a **lot** of samples to be evaluated, the "if" in the previous case can
+ // become expensive
+ real splitValue;
+ splitValue = input_data[(exampleId-1) * n_features + feature_id-1];
+
+ real gradient = current_state.grad_data[exampleIdx-1];
+ real hessian = current_state.hessian_data[exampleIdx-1];
+ current_state.leftGradientSum -= gradient;
+ current_state.rightGradientSum += gradient;
+ current_state.leftHessianSum -= hessian;
+ current_state.rightHessianSum += hessian;
+ current_state.nExampleInLeftBranch--;
+ current_state.nExampleInRightBranch++;
+
+ // since we remove from the left, once this becomes true, it stays true forever
+ // hence we stop the loop
+ if (current_state.nExampleInLeftBranch < minLeafSize)
+ break;
+
+ // This will always fail in the first pass since minLeafSize >= 1 and nExampleInRightBranch
+ // starts at 0
+ if (current_state.nExampleInRightBranch >= minLeafSize) {
+ if (previousSplitValue != splitValue) {
+ real lossInLeftBranch = computeGradientBoostLoss(current_state.leftGradientSum, current_state.leftHessianSum);
+ real lossInRightBranch = computeGradientBoostLoss(current_state.rightGradientSum, current_state.rightHessianSum);
+ real current_gain = lossInLeftBranch + lossInRightBranch;
+ if (current_gain < best_gain) {
+ best_gain = current_gain;
+ best_value = splitValue;
+ best_state = current_state;
+ }
+ }
+ }
+ previousSplitValue = splitValue;
+ }
+ }
+
+ if (!isfinite(best_gain)) {
+ bs->valid_state = 0;
+ }
+ else {
+ bs->valid_state = 1;
+ bs->state = best_state;
+ bs->feature_id = feature_id;
+ bs->gain = nn_(computeSplitGain)(&bs->state);
+ bs->feature_value = best_value;
+ }
+}
+
+// core of the computation to find the split for a given feature and is divided in 4 steps
+static void nn_(gb_find_best_feature_split)(lua_State *L,
+ nn_(GBInitialization) *initialization_data, nn_(GBBestState) *bs, long feature_id,
+ GBRunData *run_data) {
+
+ // 1) loads the examples in the dataset ordered by their feature value
+ lua_pushvalue(L, initialization_data->getSortedFeature_index);
+ lua_pushvalue(L, initialization_data->dataset_index);
+ lua_pushinteger(L, feature_id);
+ lua_call(L, 2, 1);
+
+ THLongTensor *featureExampleIds = luaT_checkudata(L, -1, "torch.LongTensor");
+
+ // 2) processes the data to find the intersection between the examples in the dataset and the
+ // examples the current node has to evaluate
+ THLongTensor *exampleIdsWithFeature_ret = gb_internal_prepare(L, initialization_data->exampleIds,
+ run_data->exampleIdsWithFeature_cache, initialization_data->input_index, feature_id,
+ run_data->exampleMap);
+ if (!exampleIdsWithFeature_ret) {
+ bs->valid_state = 0;
+ return;
+ }
+
+ // 3) creates a new state to be used by the optimizer
+ nn_(gb_internal_create)(initialization_data->grad, initialization_data->hess,
+ exampleIdsWithFeature_ret, &bs->state);
+
+ // 4) optimize away!
+ nn_(gb_internal_get_best_split)(L, bs, featureExampleIds, run_data->exampleMap,
+ initialization_data->input_index, run_data->minLeafSize, feature_id);
+}
diff --git a/contrib/lua-torch/decisiontree/generic/GBDT_internal.h b/contrib/lua-torch/decisiontree/generic/GBDT_internal.h
new file mode 100644
index 000000000..7119365cf
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/generic/GBDT_internal.h
@@ -0,0 +1,34 @@
+// representation of a state used while searching for the best split
+typedef struct {
+ real leftGradientSum, rightGradientSum;
+ real leftHessianSum, rightHessianSum;
+ real lossInParent;
+ long nExampleInLeftBranch, nExampleInRightBranch;
+ real *grad_data, *hessian_data;
+} nn_(GBState);
+
+// representation for the best state found for a given feature
+typedef struct {
+ nn_(GBState) state;
+ real gain;
+ long feature_id;
+ real feature_value;
+ int valid_state;
+} nn_(GBBestState);
+
+// full data that must be initialized before calling the optimizer
+typedef struct {
+ // *_index represent positions on the lua stack
+ int dataset_index;
+ int splitInfo_index;
+ int input_index;
+ // position of the dataset's function to return the samples ordered for a given feature
+ int getSortedFeature_index;
+
+ // samples that this node has to evaluate
+ THLongTensor *exampleIds;
+
+ // cached gradient and hessian for all data
+ THTensor *grad;
+ THTensor *hess;
+} nn_(GBInitialization);
diff --git a/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c b/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c
new file mode 100644
index 000000000..f2ea1ef09
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c
@@ -0,0 +1,90 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogitBoostCriterion.c"
+#else
+
+#define EPS 1e-12
+
+static int nn_(LogitBoostCriterion_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
+ THTensor *target = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *output = luaT_checkudata(L, 3, torch_Tensor);
+ int sizeAverage = lua_toboolean(L, 4);
+
+ if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) {
+ luaL_error(L, "inconsistent input and target size");
+ }
+ THTensor_(resize1d)(output, 1);
+
+ real sum = 0;
+
+ TH_TENSOR_APPLY2(real, input, real, target,
+ real x = *input_data;
+ real y = *target_data;
+ // math.log(1 + math.exp(target[i] <= 0 and input[i] or -input[i]))
+ sum += log(1 + exp(y <= 0 ? x : -x));
+ );
+
+ if (sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ THTensor_(set1d)(output, 0, sum);
+ return 0;
+}
+
+static int nn_(LogitBoostCriterion_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
+ THTensor *target = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *gradInput = luaT_checkudata(L, 3, torch_Tensor);
+
+ if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) {
+ luaL_error(L, "inconsistent input and target size");
+ }
+ THTensor_(resizeAs)(gradInput, input);
+
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ real x = *input_data;
+ real y = *target_data;
+ real p = (x >= 0) ? (1 / (1 + exp(-x))) : (1 - 1 / (1 + exp(x)));
+ *gradInput_data = (y <= 0) ? p : (p - 1);
+ );
+
+ return 0;
+}
+
+static int nn_(LogitBoostCriterion_updateHessInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
+ THTensor *target = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *hessInput = luaT_checkudata(L, 3, torch_Tensor);
+
+ if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) {
+ luaL_error(L, "inconsistent input and target size");
+ }
+ THTensor_(resizeAs)(hessInput, input);
+
+ TH_TENSOR_APPLY3(real, hessInput, real, input, real, target,
+ real x = *input_data;
+ real p = (x >= 0) ? (1 / (1 + exp(-x))) : (1 - 1 / (1 + exp(x)));
+ *hessInput_data = p * (1.0 - p);
+ );
+
+ return 0;
+}
+
+static const struct luaL_Reg nn_(LogitBoostCriterion__) [] = {
+ {"LogitBoostCriterion_updateOutput", nn_(LogitBoostCriterion_updateOutput)},
+ {"LogitBoostCriterion_updateGradInput", nn_(LogitBoostCriterion_updateGradInput)},
+ {"LogitBoostCriterion_updateHessInput", nn_(LogitBoostCriterion_updateHessInput)},
+ {NULL, NULL}
+};
+
+static void nn_(LogitBoostCriterion_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(LogitBoostCriterion__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/contrib/lua-torch/decisiontree/generic/S2D.c b/contrib/lua-torch/decisiontree/generic/S2D.c
new file mode 100644
index 000000000..2392ee7c8
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/generic/S2D.c
@@ -0,0 +1,90 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/S2D.c"
+#else
+
+static int nn_(S2D_computeOutput)(lua_State *L) {
+ THTensor *output = luaT_checkudata(L, 1, torch_Tensor);
+ const int keys_index = 2;
+ const int values_index = 3;
+ const int masks_index = 4;
+
+ if (!lua_istable(L, keys_index))
+ return LUA_HANDLE_ERROR_STR(L, "expeced position 2 to be a table");
+ if (!lua_istable(L, values_index))
+ return LUA_HANDLE_ERROR_STR(L, "expeced position 3 to be a table");
+ if (!lua_istable(L, masks_index))
+ return LUA_HANDLE_ERROR_STR(L, "expeced position 4 to be a table");
+
+
+ THLongTensor *features = luaT_checkudata(L, 5, "torch.LongTensor");
+
+ const int original_top = lua_gettop(L);
+
+ long outputsize = THLongTensor_size(features, 0);
+ long batch_size = lua_objlen(L, keys_index);
+
+ // initializes output
+ THTensor_(resize2d)(output, batch_size, outputsize);
+ THTensor_(zero)(output);
+ real *output_data = THTensor_(data)(output);
+
+ // iterates over samples
+ lua_pushnil(L);
+ const int local_top = lua_gettop(L);
+ while (lua_next(L, keys_index) != 0) {
+ // gets data corresponding to the current sample
+ long i = lua_tointeger(L, -2)-1;
+ real *current_output_data = &output_data[i * outputsize];
+ THLongTensor *keys = luaT_checkudata(L, -1, "torch.LongTensor");
+ lua_rawgeti(L, values_index, i+1);
+ THTensor *values = luaT_checkudata(L, -1, torch_Tensor);
+ lua_rawgeti(L, masks_index, i+1);
+ THByteTensor *mask = luaT_checkudata(L, -1, "torch.ByteTensor");
+
+ long n_keys = THLongTensor_size(keys, 0);
+ long n_values = THTensor_(size)(values, 0);
+
+ // quick safety check
+ if (n_keys != n_values)
+ return LUA_HANDLE_ERROR_STR(L, "keys and values have to have the same size");
+
+ // gets the direct memory pointers
+ long *keys_data = THLongTensor_data(keys);
+ real *values_data = THTensor_(data)(values);
+ unsigned char *mask_data = THByteTensor_data(mask);
+
+ // for each value in the sparse input...
+ for (long j = 0; j < n_keys; j++) {
+ // loads the value and key
+ real current_value = values_data[j];
+ long current_key = keys_data[j];
+ unsigned char current_mask = mask_data[j];
+
+ // if the feature is present in the map
+ if (current_mask)
+ // saves in the given position
+ current_output_data[current_key-1] = current_value;
+ }
+ // cleans up the trash we create by iterating over keys to avoid it from overflowing
+ lua_pop(L, lua_gettop(L) - local_top);
+ }
+
+ // cleans up the trash we added to the stack
+ lua_pop(L, lua_gettop(L) - original_top);
+
+ return 0;
+}
+
+static const struct luaL_Reg nn_(S2D__) [] = {
+ {"S2D_computeOutput", nn_(S2D_computeOutput)},
+ {NULL, NULL}
+};
+
+static void nn_(S2D_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(S2D__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/contrib/lua-torch/decisiontree/hash_map.c b/contrib/lua-torch/decisiontree/hash_map.c
new file mode 100644
index 000000000..2c679e207
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/hash_map.c
@@ -0,0 +1,445 @@
+#include "utils.h"
+#include "hash_map.h"
+#include "internal_hash_map.h"
+#include <pthread.h>
+
+hash_map_t hash_map_init(void) {
+ return kh_init(long);
+}
+
+void hash_map_destroy(hash_map_t h_) {
+ internal_hash_map_t h = (internal_hash_map_t) h_;
+ kh_destroy(long, h);
+}
+
+void hash_map_clear(hash_map_t h_) {
+ internal_hash_map_t h = (internal_hash_map_t) h_;
+ kh_clear(long, h);
+}
+
+int hash_map_put(hash_map_t h_, long key, long val) {
+ internal_hash_map_t h = (internal_hash_map_t) h_;
+ int ret;
+ khiter_t k = kh_put(long, h, key, &ret);
+ ret = (ret >= 0);
+ if (ret)
+ kh_value(h, k) = val;
+ return ret;
+}
+
+int hash_map_put_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_) {
+ long *keys = THLongTensor_data(keys_);
+ long *vals = THLongTensor_data(vals_);
+ long size = get_tensor_size(keys_, Long);
+ for (long i = 0; i < size; i++)
+ if (!hash_map_put(h_, keys[i], vals[i]))
+ return 0;
+ return 1;
+}
+
+int hash_map_fill(hash_map_t h_, long key, long *counter) {
+ internal_hash_map_t h = (internal_hash_map_t) h_;
+ khiter_t k = kh_get(long, h, key);
+ if (k == kh_end(h))
+ return hash_map_put(h_, key, ++(*counter));
+ return 1;
+}
+
+int hash_map_fill_tensor(hash_map_t h_, THLongTensor *keys_, long *counter) {
+ long *keys = THLongTensor_data(keys_);
+ long size = get_tensor_size(keys_, Long);
+ for (long i = 0; i < size; i++)
+ if (!hash_map_fill(h_, keys[i], counter))
+ return 0;
+ return 1;
+}
+
+int hash_map_get(hash_map_t h_, long key, long* val) {
+ internal_hash_map_t h = (internal_hash_map_t) h_;
+ khiter_t k = kh_get(long, h, key);
+ if (k == kh_end(h))
+ return 0;
+ *val = kh_value(h, k);
+ return 1;
+}
+
+void hash_map_get_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_, THByteTensor *mask_) {
+ long *keys = THLongTensor_data(keys_);
+ long *vals = THLongTensor_data(vals_);;
+ unsigned char *mask = THByteTensor_data(mask_);
+ long size = get_tensor_size(keys_, Long);
+ for (long i = 0; i < size; i++)
+ mask[i] = hash_map_get(h_, keys[i], &vals[i]);
+}
+
+void hash_map_del(hash_map_t h_, long key) {
+ internal_hash_map_t h = (internal_hash_map_t) h_;
+ khiter_t k = kh_get(long, h, key);
+ if (k != kh_end(h))
+ kh_del(long, h, k);
+}
+
+void hash_map_del_tensor(hash_map_t h_, THLongTensor *keys_) {
+ long *keys = THLongTensor_data(keys_);
+ long size = get_tensor_size(keys_, Long);
+ for (long i = 0; i < size; i++)
+ hash_map_del(h_, keys[i]);
+}
+
+size_t hash_map_size(hash_map_t h_) {
+ internal_hash_map_t h = (internal_hash_map_t) h_;
+ return kh_size(h);
+}
+
+void hash_map_to_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_) {
+ internal_hash_map_t h = (internal_hash_map_t) h_;
+ long *keys = THLongTensor_data(keys_);
+ long *vals = THLongTensor_data(vals_);
+ long key, val, i = 0;
+ kh_foreach(h, key, val, {
+ keys[i] = key;
+ vals[i] = val;
+ i++;
+ });
+}
+
+static void autolock(hash_map_lua_t *h) {
+ if (h->autolock) {
+ pthread_mutex_lock(&h->mutex);
+ }
+}
+
+static void autounlock(hash_map_lua_t *h) {
+ if (h->autolock) {
+ pthread_mutex_unlock(&h->mutex);
+ }
+}
+
+int hash_map_autolock_on_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ h->autolock = 1;
+ return 0;
+}
+
+int hash_map_autolock_off_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ h->autolock = 0;
+ return 0;
+}
+
+int hash_map_init_lua(lua_State *L) {
+ hash_map_lua_t **hp = (hash_map_lua_t**)lua_newuserdata(L, sizeof(hash_map_lua_t*));
+ *hp = (hash_map_lua_t*)malloc(sizeof(hash_map_lua_t));
+ hash_map_lua_t *h = *hp;
+ h->refcount = 1;
+ h->counter = 0;
+ h->autolock = 0;
+ h->h = hash_map_init();
+
+ pthread_mutexattr_t mutex_attr;
+ pthread_mutexattr_init(&mutex_attr);
+ pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_RECURSIVE);
+ pthread_mutex_init(&h->mutex, &mutex_attr);
+
+ luaL_getmetatable(L, "dt.HashMap");
+ lua_setmetatable(L, -2);
+ return 1;
+}
+
+int hash_map_gc_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ if (THAtomicDecrementRef(&h->refcount)) {
+ pthread_mutex_destroy(&h->mutex);
+ hash_map_destroy(h->h);
+ free(h);
+ }
+ return 0;
+}
+
+int hash_map_retain_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ THAtomicIncrementRef(&h->refcount);
+ return 0;
+}
+
+int hash_map_metatablename_lua(lua_State *L) {
+ lua_pushstring(L, "dt.HashMap");
+ return 1;
+}
+
+int hash_map_clear_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ autolock(h);
+ hash_map_clear(h->h);
+ autounlock(h);
+ return 0;
+}
+
+int hash_map_put_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ int ret;
+#if LUA_VERSION_NUM <= 501
+#define lua_isinteger lua_isnumber
+#endif
+ if (lua_isinteger(L, 2)) {
+ if (!lua_isinteger(L, 3))
+ return LUA_HANDLE_ERROR_STR(L, "second parameter is not a number");
+ long key = lua_tointeger(L, 2);
+ long val = lua_tointeger(L, 3);
+ autolock(h);
+ ret = hash_map_put(h->h, key, val);
+ autounlock(h);
+ }
+ else {
+ THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
+ THLongTensor *vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor");
+ check_tensor(L, keys, THLongTensor);
+ check_tensor(L, vals, THLongTensor);
+ check_tensors(L, keys, vals);
+ autolock(h);
+ ret = hash_map_put_tensor(h->h, keys, vals);
+ autounlock(h);
+ }
+ if (!ret)
+ return LUA_HANDLE_ERROR_STR(L, "failed to put into hash map");
+ return 0;
+}
+
+int hash_map_fill_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ int ret;
+ if (lua_isinteger(L, 2)) {
+ long key = lua_tointeger(L, 2);
+ autolock(h);
+ ret = hash_map_fill(h->h, key, &h->counter);
+ autounlock(h);
+ }
+ else {
+ THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
+ check_tensor(L, keys, THLongTensor);
+ autolock(h);
+ ret = hash_map_fill_tensor(h->h, keys, &h->counter);
+ autounlock(h);
+ }
+ if (!ret)
+ return LUA_HANDLE_ERROR_STR(L, "failed to fill into hash map");
+ return 0;
+}
+
+int hash_map_adjust_counter_lua(lua_State *L) {
+ hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ internal_hash_map_t h = (internal_hash_map_t) h_->h;
+
+ long val;
+ kh_foreach_value(h, val, {
+ if (val >= h_->counter)
+ h_->counter = val;
+ });
+ return 0;
+}
+
+int hash_map_set_counter_lua(lua_State *L) {
+ hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ h_->counter = lua_tointeger(L, 2);
+ return 0;
+}
+
+int hash_map_get_counter_lua(lua_State *L) {
+ hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ lua_pushinteger(L, h_->counter);
+ return 1;
+}
+
+static int hash_map_get_tensor_lua(lua_State *L, hash_map_lua_t *h, int inplace) {
+ THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
+ check_tensor(L, keys, THLongTensor);
+ THLongTensor *vals = inplace ? keys : NULL;
+ THByteTensor *mask = NULL;
+
+ int maskIdx = inplace ? 3 : 4;
+
+ if (!inplace) {
+ if (lua_gettop(L) < 3) {
+ vals = THLongTensor_new();
+ } else {
+ vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor");
+ check_tensor(L, vals, THLongTensor);
+ }
+ }
+
+ if (lua_gettop(L) < maskIdx) {
+ mask = THByteTensor_new();
+ } else {
+ mask = (THByteTensor *)luaT_checkudata(L, maskIdx, "torch.ByteTensor");
+ check_tensor(L, mask, THByteTensor);
+ }
+
+ int n_dim = THLongTensor_nDimension(keys);
+ THLongStorage *st = THLongStorage_newWithSize1(n_dim);
+ for (int i = 0; i < n_dim; i++) {
+ THLongStorage_set(st, i, THLongTensor_size(keys, i));
+ }
+ THByteTensor_resize(mask, st, NULL);
+ if (!inplace) THLongTensor_resize(vals, st, NULL);
+ THLongStorage_free(st);
+
+ autolock(h);
+ hash_map_get_tensor(h->h, keys, vals, mask);
+ autounlock(h);
+
+ if (!inplace && lua_gettop(L) < 3)
+ luaT_pushudata(L, vals, "torch.LongTensor");
+ if (lua_gettop(L) < maskIdx)
+ luaT_pushudata(L, mask, "torch.ByteTensor");
+
+ return 2;
+}
+
+static int hash_map_get_table_lua(lua_State *L, hash_map_lua_t *h, int inplace) {
+ const int kidx = 2;
+ const int vidx = inplace ? 2 : 3;
+ const int midx = inplace ? 3 : 4;
+ const int narg = lua_gettop(L);
+
+ if (inplace) {
+ if (narg < 3) {
+ LUA_HANDLE_ERROR_STR(L, "HashMap.getInplace requires two arguments.");
+ }
+ } else {
+ if (narg < 4) {
+ LUA_HANDLE_ERROR_STR(L, "HashMap.get requires three arguments.");
+ }
+ }
+
+ int count = push_table_contents(L, kidx);
+ verify_push_table_contents(L, vidx, count);
+ verify_push_table_contents(L, midx, count);
+
+ THLongTensor *keys;
+ THLongTensor *vals;
+ THByteTensor *mask;
+ for (int i = count - 1; i >= 0; i--) {
+ int maskIdx = i - count;
+ int valIdx = maskIdx - count;
+ int keyIdx = inplace ? valIdx : (valIdx - count);
+
+ keys = (THLongTensor *)luaT_checkudata(L, keyIdx, "torch.LongTensor");
+ check_tensor(L, keys, THLongTensor);
+ if (inplace) {
+ vals = keys;
+ } else {
+ vals = (THLongTensor *)luaT_checkudata(L, valIdx, "torch.LongTensor");
+ }
+ mask = (THByteTensor *)luaT_checkudata(L, maskIdx, "torch.ByteTensor");
+
+ int n_dim = THLongTensor_nDimension(keys);
+ THLongStorage *st = THLongStorage_newWithSize1(n_dim);
+ for (int i = 0; i < n_dim; i++) {
+ THLongStorage_set(st, i, THLongTensor_size(keys, i));
+ }
+ THByteTensor_resize(mask, st, NULL);
+ THLongTensor_resize(vals, st, NULL);
+ THLongStorage_free(st);
+
+ autolock(h);
+ hash_map_get_tensor(h->h, keys, vals, mask);
+ autounlock(h);
+ }
+ lua_pop(L, (narg - 1) * count);
+ return 2;
+}
+
+int hash_map_get_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ if (lua_isinteger(L, 2)) {
+ long key = lua_tointeger(L, 2);
+ long val;
+ autolock(h);
+ int ret = hash_map_get(h->h, key, &val);
+ autounlock(h);
+ if (ret) {
+ lua_pushinteger(L, val);
+ lua_pushinteger(L, 1);
+ }
+ else {
+ lua_pushinteger(L, 0);
+ lua_pushinteger(L, 0);
+ }
+ } else if (lua_istable(L, 2)) {
+ return hash_map_get_table_lua(L, h, 0);
+ } else {
+ return hash_map_get_tensor_lua(L, h, 0);
+ }
+ return 2;
+}
+
+int hash_map_get_inplace_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ if (lua_isinteger(L, 2)) {
+ LUA_HANDLE_ERROR_STR(L, "HashMap.getInplace does not support integer arguments.");
+ } else if (lua_istable(L, 2)) {
+ return hash_map_get_table_lua(L, h, 1);
+ } else {
+ return hash_map_get_tensor_lua(L, h, 1);
+ }
+ return 2;
+}
+
+int hash_map_del_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ if (lua_isinteger(L, 2)) {
+ long key = lua_tointeger(L, 2);
+ autolock(h);
+ hash_map_del(h->h, key);
+ autounlock(h);
+ }
+ else {
+ THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
+ autolock(h);
+ hash_map_del_tensor(h->h, keys);
+ autounlock(h);
+ }
+ return 0;
+}
+
+int hash_map_size_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ long size = hash_map_size(h->h);
+ lua_pushinteger(L, size);
+ return 1;
+}
+
+int hash_map_to_tensor_lua(lua_State *L) {
+ hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1);
+ THLongTensor *keys, *vals;
+
+ if (lua_gettop(L) < 2) {
+ keys = THLongTensor_new();
+ }
+ else {
+ keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor");
+ check_tensor(L, keys, THLongTensor);
+ }
+
+ if (lua_gettop(L) < 3) {
+ vals = THLongTensor_new();
+ }
+ else {
+ vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor");
+ check_tensor(L, vals, THLongTensor);
+ }
+
+ size_t size = hash_map_size(h->h);
+ THLongTensor_resize1d(keys, size);
+ THLongTensor_resize1d(vals, size);
+
+ autolock(h);
+ hash_map_to_tensor(h->h, keys, vals);
+ autounlock(h);
+
+ if (lua_gettop(L) < 2)
+ luaT_pushudata(L, keys, "torch.LongTensor");
+ if (lua_gettop(L) < 3)
+ luaT_pushudata(L, vals, "torch.LongTensor");
+ return 2;
+}
diff --git a/contrib/lua-torch/decisiontree/hash_map.h b/contrib/lua-torch/decisiontree/hash_map.h
new file mode 100644
index 000000000..5b215e4ca
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/hash_map.h
@@ -0,0 +1,36 @@
+#include "luaT.h"
+#include "TH.h"
+
+typedef void* hash_map_t;
+
+hash_map_t hash_map_init(void);
+void hash_map_destroy(hash_map_t);
+void hash_map_clear(hash_map_t);
+int hash_map_put(hash_map_t, long key, long val);
+int hash_map_put_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_);
+int hash_map_fill(hash_map_t, long key, long *counter);
+int hash_map_fill_tensor(hash_map_t, THLongTensor *keys_, long *counter);
+int hash_map_get(hash_map_t, long key, long *val);
+void hash_map_get_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_, THByteTensor *mask_);
+void hash_map_del(hash_map_t, long key);
+void hash_map_del_tensor(hash_map_t, THLongTensor *keys_);
+size_t hash_map_size(hash_map_t);
+void hash_map_to_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_);
+
+int hash_map_autolock_on_lua(lua_State *L);
+int hash_map_autolock_off_lua(lua_State *L);
+int hash_map_init_lua(lua_State *L);
+int hash_map_gc_lua(lua_State *L);
+int hash_map_retain_lua(lua_State *L);
+int hash_map_metatablename_lua(lua_State *L);
+int hash_map_clear_lua(lua_State *L);
+int hash_map_put_lua(lua_State *L);
+int hash_map_fill_lua(lua_State *L);
+int hash_map_adjust_counter_lua(lua_State *L);
+int hash_map_set_counter_lua(lua_State *L);
+int hash_map_get_counter_lua(lua_State *L);
+int hash_map_get_lua(lua_State *L);
+int hash_map_get_inplace_lua(lua_State *L);
+int hash_map_del_lua(lua_State *L);
+int hash_map_size_lua(lua_State *L);
+int hash_map_to_tensor_lua(lua_State *L);
diff --git a/contrib/lua-torch/decisiontree/init.c b/contrib/lua-torch/decisiontree/init.c
new file mode 100644
index 000000000..276241e8e
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/init.c
@@ -0,0 +1,77 @@
+#include "TH.h"
+#include "luaT.h"
+
+#ifdef _OPENMP
+#include "omp.h"
+#endif
+
+#include "error.h"
+#include "hash_map.h"
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor)
+#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
+
+#include "generic/LogitBoostCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/DFD.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/S2D.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/CartTree.c"
+#include "THGenerateFloatTypes.h"
+
+#include "GBDT_common.h"
+#include "generic/GBDT.c"
+#include "THGenerateFloatTypes.h"
+
+static const struct luaL_Reg decisiontree_hash_map_routines[] = {
+ {"__gc", hash_map_gc_lua},
+ {"retain", hash_map_retain_lua},
+ {"metatablename", hash_map_metatablename_lua},
+ {"clear", hash_map_clear_lua},
+ {"put", hash_map_put_lua},
+ {"fill", hash_map_fill_lua},
+ {"adjustCounter", hash_map_adjust_counter_lua},
+ {"getCounter", hash_map_get_counter_lua},
+ {"setCounter", hash_map_set_counter_lua},
+ {"get", hash_map_get_lua},
+ {"getInplace", hash_map_get_inplace_lua},
+ {"del", hash_map_del_lua},
+ {"size", hash_map_size_lua},
+ {"safe", hash_map_autolock_on_lua},
+ {"unsafe", hash_map_autolock_off_lua},
+ {"toTensors", hash_map_to_tensor_lua},
+ {"new", hash_map_init_lua},
+ {NULL, NULL}
+};
+
+DLL_EXPORT int luaopen_libdecisiontree(lua_State *L)
+{
+ // HashMap
+ luaL_newmetatable(L, "dt.HashMap");
+ lua_pushstring(L, "__index");
+ lua_pushvalue(L, -2);
+ lua_settable(L, -3);
+ luaT_setfuncs(L, decisiontree_hash_map_routines, 0);
+
+ nn_FloatLogitBoostCriterion_init(L);
+ nn_DoubleLogitBoostCriterion_init(L);
+
+ nn_FloatDFD_init(L);
+ nn_DoubleDFD_init(L);
+
+ nn_FloatS2D_init(L);
+ nn_DoubleS2D_init(L);
+
+ nn_FloatCT_init(L);
+ nn_DoubleCT_init(L);
+
+ nn_FloatGBDT_init(L);
+ nn_DoubleGBDT_init(L);
+
+ return 1;
+}
diff --git a/contrib/lua-torch/decisiontree/init.lua b/contrib/lua-torch/decisiontree/init.lua
new file mode 100644
index 000000000..26f790b60
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/init.lua
@@ -0,0 +1,70 @@
+require 'paths'
+--require 'xlua'
+require 'string'
+require 'os'
+--require 'sys'
+require 'nn'
+
+-- these actually return local variables but we will re-require them
+-- when needed. This is just to make sure they are loaded.
+require 'moses'
+
+unpack = unpack or table.unpack
+
+local dt = require 'decisiontree._env'
+
+-- c lib:
+require "paths"
+paths.require 'libdecisiontree'
+
+dt.HashMap = torch.getmetatable("dt.HashMap").new
+
+dt.EPSILON = 1e-6
+
+-- experimental Tensor-like container
+require 'decisiontree.SparseTensor'
+
+-- functions
+require 'decisiontree.math'
+require 'decisiontree.utils'
+
+-- for multi-threading
+--require 'decisiontree.WorkPool'
+
+-- abstract classes
+require 'decisiontree.DecisionTree'
+require 'decisiontree.DecisionForest'
+require 'decisiontree.DecisionForestTrainer'
+require 'decisiontree.TreeState'
+
+-- for CaRTree inference
+require 'decisiontree.CartNode'
+require 'decisiontree.CartTree'
+
+-- Criterions (extended with updateHessInput and backward2)
+require 'decisiontree.MSECriterion'
+require 'decisiontree.LogitBoostCriterion'
+
+-- Used by both RandomForestTrainer and GradientBoostTrainer
+require 'decisiontree.CartTrainer'
+
+-- Used by CartTrainer
+require 'decisiontree.DataSet'
+
+-- Random Forest Training
+require 'decisiontree.RandomForestTrainer'
+require 'decisiontree.GiniState' -- TreeState subclass
+
+-- Gradient Boosted Decision Tree Training
+require 'decisiontree.GradientBoostTrainer'
+require 'decisiontree.GradientBoostState' -- TreeState subclass
+
+-- unit tests and benchmarks
+--require 'decisiontree.test'
+--require 'decisiontree.benchmark'
+
+-- nn.Module
+require 'decisiontree.DFD'
+require 'decisiontree.Sparse2Dense'
+
+return dt
diff --git a/contrib/lua-torch/decisiontree/internal_hash_map.h b/contrib/lua-torch/decisiontree/internal_hash_map.h
new file mode 100644
index 000000000..bc8c523ef
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/internal_hash_map.h
@@ -0,0 +1,13 @@
+#include "khash.h"
+#include <pthread.h>
+
+KHASH_MAP_INIT_INT64(long, long)
+typedef khash_t(long)* internal_hash_map_t;
+
+typedef struct {
+ hash_map_t h;
+ int refcount;
+ pthread_mutex_t mutex;
+ int autolock;
+ long counter;
+} hash_map_lua_t;
diff --git a/contrib/lua-torch/decisiontree/khash.h b/contrib/lua-torch/decisiontree/khash.h
new file mode 100644
index 000000000..20e994063
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/khash.h
@@ -0,0 +1,627 @@
+/* The MIT License
+
+ Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/*
+ An example:
+
+#include "khash.h"
+KHASH_MAP_INIT_INT(32, char)
+int main() {
+ int ret, is_missing;
+ khiter_t k;
+ khash_t(32) *h = kh_init(32);
+ k = kh_put(32, h, 5, &ret);
+ kh_value(h, k) = 10;
+ k = kh_get(32, h, 10);
+ is_missing = (k == kh_end(h));
+ k = kh_get(32, h, 5);
+ kh_del(32, h, k);
+ for (k = kh_begin(h); k != kh_end(h); ++k)
+ if (kh_exist(h, k)) kh_value(h, k) = 1;
+ kh_destroy(32, h);
+ return 0;
+}
+*/
+
+/*
+ 2013-05-02 (0.2.8):
+
+ * Use quadratic probing. When the capacity is power of 2, stepping function
+ i*(i+1)/2 guarantees to traverse each bucket. It is better than double
+ hashing on cache performance and is more robust than linear probing.
+
+ In theory, double hashing should be more robust than quadratic probing.
+ However, my implementation is probably not for large hash tables, because
+ the second hash function is closely tied to the first hash function,
+ which reduce the effectiveness of double hashing.
+
+ Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
+
+ 2011-12-29 (0.2.7):
+
+ * Minor code clean up; no actual effect.
+
+ 2011-09-16 (0.2.6):
+
+ * The capacity is a power of 2. This seems to dramatically improve the
+ speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
+
+ - http://code.google.com/p/ulib/
+ - http://nothings.org/computer/judy/
+
+ * Allow to optionally use linear probing which usually has better
+ performance for random input. Double hashing is still the default as it
+ is more robust to certain non-random input.
+
+ * Added Wang's integer hash function (not used by default). This hash
+ function is more robust to certain non-random input.
+
+ 2011-02-14 (0.2.5):
+
+ * Allow to declare global functions.
+
+ 2009-09-26 (0.2.4):
+
+ * Improve portability
+
+ 2008-09-19 (0.2.3):
+
+ * Corrected the example
+ * Improved interfaces
+
+ 2008-09-11 (0.2.2):
+
+ * Improved speed a little in kh_put()
+
+ 2008-09-10 (0.2.1):
+
+ * Added kh_clear()
+ * Fixed a compiling error
+
+ 2008-09-02 (0.2.0):
+
+ * Changed to token concatenation which increases flexibility.
+
+ 2008-08-31 (0.1.2):
+
+ * Fixed a bug in kh_get(), which has not been tested previously.
+
+ 2008-08-31 (0.1.1):
+
+ * Added destructor
+*/
+
+
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
+
+/*!
+ @header
+
+ Generic hash table library.
+ */
+
+#define AC_VERSION_KHASH_H "0.2.8"
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+/* compiler specific configuration */
+
+#if UINT_MAX == 0xffffffffu
+typedef unsigned int khint32_t;
+#elif ULONG_MAX == 0xffffffffu
+typedef unsigned long khint32_t;
+#endif
+
+#if ULONG_MAX == ULLONG_MAX
+typedef unsigned long khint64_t;
+#else
+typedef unsigned long long khint64_t;
+#endif
+
+#ifndef kh_inline
+#ifdef _MSC_VER
+#define kh_inline __inline
+#else
+#define kh_inline inline
+#endif
+#endif /* kh_inline */
+
+#ifndef klib_unused
+#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__ ((__unused__))
+#else
+#define klib_unused
+#endif
+#endif /* klib_unused */
+
+typedef khint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+
+#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#ifndef kcalloc
+#define kcalloc(N,Z) calloc(N,Z)
+#endif
+#ifndef kmalloc
+#define kmalloc(Z) malloc(Z)
+#endif
+#ifndef krealloc
+#define krealloc(P,Z) realloc(P,Z)
+#endif
+#ifndef kfree
+#define kfree(P) free(P)
+#endif
+
+static const double __ac_HASH_UPPER = 0.77;
+
+#define __KHASH_TYPE(name, khkey_t, khval_t) \
+ typedef struct kh_##name##_s { \
+ khint_t n_buckets, size, n_occupied, upper_bound; \
+ khint32_t *flags; \
+ khkey_t *keys; \
+ khval_t *vals; \
+ } kh_##name##_t;
+
+#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
+ extern kh_##name##_t *kh_init_##name(void); \
+ extern void kh_destroy_##name(kh_##name##_t *h); \
+ extern void kh_clear_##name(kh_##name##_t *h); \
+ extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
+ extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
+ extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
+ extern void kh_del_##name(kh_##name##_t *h, khint_t x);
+
+#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+ SCOPE kh_##name##_t *kh_init_##name(void) { \
+ return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
+ } \
+ SCOPE void kh_destroy_##name(kh_##name##_t *h) \
+ { \
+ if (h) { \
+ kfree((void *)h->keys); kfree(h->flags); \
+ kfree((void *)h->vals); \
+ kfree(h); \
+ } \
+ } \
+ SCOPE void kh_clear_##name(kh_##name##_t *h) \
+ { \
+ if (h && h->flags) { \
+ memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
+ h->size = h->n_occupied = 0; \
+ } \
+ } \
+ SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
+ { \
+ if (h->n_buckets) { \
+ khint_t k, i, last, mask, step = 0; \
+ mask = h->n_buckets - 1; \
+ k = __hash_func(key); i = k & mask; \
+ last = i; \
+ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+ i = (i + (++step)) & mask; \
+ if (i == last) return h->n_buckets; \
+ } \
+ return __ac_iseither(h->flags, i)? h->n_buckets : i; \
+ } else return 0; \
+ } \
+ SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+ { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
+ khint32_t *new_flags = 0; \
+ khint_t j = 1; \
+ { \
+ kroundup32(new_n_buckets); \
+ if (new_n_buckets < 4) new_n_buckets = 4; \
+ if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
+ else { /* hash table size to be changed (shrink or expand); rehash */ \
+ new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
+ if (!new_flags) return -1; \
+ memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
+ if (h->n_buckets < new_n_buckets) { /* expand */ \
+ khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+ if (!new_keys) { kfree(new_flags); return -1; } \
+ h->keys = new_keys; \
+ if (kh_is_map) { \
+ khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+ if (!new_vals) { kfree(new_flags); return -1; } \
+ h->vals = new_vals; \
+ } \
+ } /* otherwise shrink */ \
+ } \
+ } \
+ if (j) { /* rehashing is needed */ \
+ for (j = 0; j != h->n_buckets; ++j) { \
+ if (__ac_iseither(h->flags, j) == 0) { \
+ khkey_t key = h->keys[j]; \
+ khval_t val; \
+ khint_t new_mask; \
+ new_mask = new_n_buckets - 1; \
+ if (kh_is_map) val = h->vals[j]; \
+ __ac_set_isdel_true(h->flags, j); \
+ while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+ khint_t k, i, step = 0; \
+ k = __hash_func(key); \
+ i = k & new_mask; \
+ while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
+ __ac_set_isempty_false(new_flags, i); \
+ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
+ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+ if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
+ __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
+ } else { /* write the element and jump out of the loop */ \
+ h->keys[i] = key; \
+ if (kh_is_map) h->vals[i] = val; \
+ break; \
+ } \
+ } \
+ } \
+ } \
+ if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
+ h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+ if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+ } \
+ kfree(h->flags); /* free the working space */ \
+ h->flags = new_flags; \
+ h->n_buckets = new_n_buckets; \
+ h->n_occupied = h->size; \
+ h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+ } \
+ return 0; \
+ } \
+ SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
+ { \
+ khint_t x; \
+ if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
+ if (h->n_buckets > (h->size<<1)) { \
+ if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
+ *ret = -1; return h->n_buckets; \
+ } \
+ } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
+ *ret = -1; return h->n_buckets; \
+ } \
+ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
+ { \
+ khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
+ x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
+ if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
+ else { \
+ last = i; \
+ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+ if (__ac_isdel(h->flags, i)) site = i; \
+ i = (i + (++step)) & mask; \
+ if (i == last) { x = site; break; } \
+ } \
+ if (x == h->n_buckets) { \
+ if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
+ else x = i; \
+ } \
+ } \
+ } \
+ if (__ac_isempty(h->flags, x)) { /* not present at all */ \
+ h->keys[x] = key; \
+ __ac_set_isboth_false(h->flags, x); \
+ ++h->size; ++h->n_occupied; \
+ *ret = 1; \
+ } else if (__ac_isdel(h->flags, x)) { /* deleted */ \
+ h->keys[x] = key; \
+ __ac_set_isboth_false(h->flags, x); \
+ ++h->size; \
+ *ret = 2; \
+ } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
+ return x; \
+ } \
+ SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
+ { \
+ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
+ __ac_set_isdel_true(h->flags, x); \
+ --h->size; \
+ } \
+ }
+
+#define KHASH_DECLARE(name, khkey_t, khval_t) \
+ __KHASH_TYPE(name, khkey_t, khval_t) \
+ __KHASH_PROTOTYPES(name, khkey_t, khval_t)
+
+#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+ __KHASH_TYPE(name, khkey_t, khval_t) \
+ __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+ KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+/* --- BEGIN OF HASH FUNCTIONS --- */
+
+/*! @function
+ @abstract Integer hash function
+ @param key The integer [khint32_t]
+ @return The hash value [khint_t]
+ */
+#define kh_int_hash_func(key) (khint32_t)(key)
+/*! @function
+ @abstract Integer comparison function
+ */
+#define kh_int_hash_equal(a, b) ((a) == (b))
+/*! @function
+ @abstract 64-bit integer hash function
+ @param key The integer [khint64_t]
+ @return The hash value [khint_t]
+ */
+#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
+/*! @function
+ @abstract 64-bit integer comparison function
+ */
+#define kh_int64_hash_equal(a, b) ((a) == (b))
+/*! @function
+ @abstract const char* hash function
+ @param s Pointer to a null terminated string
+ @return The hash value
+ */
+static kh_inline khint_t __ac_X31_hash_string(const char *s)
+{
+ khint_t h = (khint_t)*s;
+ if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
+ return h;
+}
+/*! @function
+ @abstract Another interface to const char* hash function
+ @param key Pointer to a null terminated string [const char*]
+ @return The hash value [khint_t]
+ */
+#define kh_str_hash_func(key) __ac_X31_hash_string(key)
+/*! @function
+ @abstract Const char* comparison function
+ */
+#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
+
+static kh_inline khint_t __ac_Wang_hash(khint_t key)
+{
+ key += ~(key << 15);
+ key ^= (key >> 10);
+ key += (key << 3);
+ key ^= (key >> 6);
+ key += ~(key << 11);
+ key ^= (key >> 16);
+ return key;
+}
+#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key)
+
+/* --- END OF HASH FUNCTIONS --- */
+
+/* Other convenient macros... */
+
+/*!
+ @abstract Type of the hash table.
+ @param name Name of the hash table [symbol]
+ */
+#define khash_t(name) kh_##name##_t
+
+/*! @function
+ @abstract Initiate a hash table.
+ @param name Name of the hash table [symbol]
+ @return Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_init(name) kh_init_##name()
+
+/*! @function
+ @abstract Destroy a hash table.
+ @param name Name of the hash table [symbol]
+ @param h Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_destroy(name, h) kh_destroy_##name(h)
+
+/*! @function
+ @abstract Reset a hash table without deallocating memory.
+ @param name Name of the hash table [symbol]
+ @param h Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_clear(name, h) kh_clear_##name(h)
+
+/*! @function
+ @abstract Resize a hash table.
+ @param name Name of the hash table [symbol]
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param s New size [khint_t]
+ */
+#define kh_resize(name, h, s) kh_resize_##name(h, s)
+
+/*! @function
+ @abstract Insert a key to the hash table.
+ @param name Name of the hash table [symbol]
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param k Key [type of keys]
+ @param r Extra return code: -1 if the operation failed;
+ 0 if the key is present in the hash table;
+ 1 if the bucket is empty (never used); 2 if the element in
+ the bucket has been deleted [int*]
+ @return Iterator to the inserted element [khint_t]
+ */
+#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
+
+/*! @function
+ @abstract Retrieve a key from the hash table.
+ @param name Name of the hash table [symbol]
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param k Key [type of keys]
+ @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
+ */
+#define kh_get(name, h, k) kh_get_##name(h, k)
+
+/*! @function
+ @abstract Remove a key from the hash table.
+ @param name Name of the hash table [symbol]
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param k Iterator to the element to be deleted [khint_t]
+ */
+#define kh_del(name, h, k) kh_del_##name(h, k)
+
+/*! @function
+ @abstract Test whether a bucket contains data.
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param x Iterator to the bucket [khint_t]
+ @return 1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
+
+/*! @function
+ @abstract Get key given an iterator
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param x Iterator to the bucket [khint_t]
+ @return Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x])
+
+/*! @function
+ @abstract Get value given an iterator
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param x Iterator to the bucket [khint_t]
+ @return Value [type of values]
+ @discussion For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->vals[x])
+
+/*! @function
+ @abstract Alias of kh_val()
+ */
+#define kh_value(h, x) ((h)->vals[x])
+
+/*! @function
+ @abstract Get the start iterator
+ @param h Pointer to the hash table [khash_t(name)*]
+ @return The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+ @abstract Get the end iterator
+ @param h Pointer to the hash table [khash_t(name)*]
+ @return The end iterator [khint_t]
+ */
+#define kh_end(h) ((h)->n_buckets)
+
+/*! @function
+ @abstract Get the number of elements in the hash table
+ @param h Pointer to the hash table [khash_t(name)*]
+ @return Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->size)
+
+/*! @function
+ @abstract Get the number of buckets in the hash table
+ @param h Pointer to the hash table [khash_t(name)*]
+ @return Number of buckets in the hash table [khint_t]
+ */
+#define kh_n_buckets(h) ((h)->n_buckets)
+
+/*! @function
+ @abstract Iterate over the entries in the hash table
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param kvar Variable to which key will be assigned
+ @param vvar Variable to which value will be assigned
+ @param code Block of code to execute
+ */
+#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
+ for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
+ if (!kh_exist(h,__i)) continue; \
+ (kvar) = kh_key(h,__i); \
+ (vvar) = kh_val(h,__i); \
+ code; \
+ } }
+
+/*! @function
+ @abstract Iterate over the values in the hash table
+ @param h Pointer to the hash table [khash_t(name)*]
+ @param vvar Variable to which value will be assigned
+ @param code Block of code to execute
+ */
+#define kh_foreach_value(h, vvar, code) { khint_t __i; \
+ for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
+ if (!kh_exist(h,__i)) continue; \
+ (vvar) = kh_val(h,__i); \
+ code; \
+ } }
+
+/* More conenient interfaces */
+
+/*! @function
+ @abstract Instantiate a hash set containing integer keys
+ @param name Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT(name) \
+ KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+ @abstract Instantiate a hash map containing integer keys
+ @param name Name of the hash table [symbol]
+ @param khval_t Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT(name, khval_t) \
+ KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+ @abstract Instantiate a hash map containing 64-bit integer keys
+ @param name Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT64(name) \
+ KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
+
+/*! @function
+ @abstract Instantiate a hash map containing 64-bit integer keys
+ @param name Name of the hash table [symbol]
+ @param khval_t Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT64(name, khval_t) \
+ KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
+
+typedef const char *kh_cstr_t;
+/*! @function
+ @abstract Instantiate a hash map containing const char* keys
+ @param name Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_STR(name) \
+ KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
+
+/*! @function
+ @abstract Instantiate a hash map containing const char* keys
+ @param name Name of the hash table [symbol]
+ @param khval_t Type of values [type]
+ */
+#define KHASH_MAP_INIT_STR(name, khval_t) \
+ KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+
+#endif /* __AC_KHASH_H */
diff --git a/contrib/lua-torch/decisiontree/math.lua b/contrib/lua-torch/decisiontree/math.lua
new file mode 100644
index 000000000..eb71b31ed
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/math.lua
@@ -0,0 +1,84 @@
+local dt = require "decisiontree._env"
+
+local PSEUDOCOUNT = 1.0
+local MIN_LOGISTIC = 1E-8
+local MAX_LOGISTIC = 1.0 - MIN_LOGISTIC
+
+-- Create counts of possible results (last column of each row is the result)
+function dt.uniquecounts(counts, inputset, nclass)
+ counts = counts or inputset.input.new()
+ nclass = nclass or inputset.target:max()
+ counts:resize(nclass):zero()
+
+ inputset.target:apply(function(c) counts[c] = counts[c] + 1 end)
+ return counts
+end
+
+-- Entropy is the sum of -p(x)log(p(x)) across all the different possible results
+local counts, logprobs
+function dt.entropy(inputset, nclass)
+ local dt = require 'decisiontree'
+ counts = dt.uniquecounts(counts, inputset, nclass)
+ -- convert counts to categorical probabilities
+ counts:add(0.0000001) -- prevent NaN
+ counts:div(counts:sum())
+
+ logprobs = logprobs or counts.new()
+ logprobs:resize(counts:size())
+ logprobs:log(counts):div(math.log(2)) -- log2(x)
+
+ counts:cmul(logprobs)
+
+ return -counts:sum()
+end
+
+-- Compute and return the probability of positive label.
+function dt.probabilityPositive(nPositive, nTotal)
+ return (nPositive + PSEUDOCOUNT) / (nTotal + 2.0 * PSEUDOCOUNT);
+end
+
+-- Ref. https://en.wikipedia.org/wiki/Logit
+-- Calculates logit of the probability.
+-- Logit represents the log-odds. Probabilities transformed to logit 'space' can be combined linearly.
+function dt.logit(p)
+ assert(p >= 0.0 and p <= 1.0, "Expecting probability for arg 1")
+ local truncatedP = math.max(MIN_LOGISTIC, math.min(MAX_LOGISTIC, p))
+ return math.log(truncatedP / (1.0 - truncatedP))
+end
+
+function dt.logistic(x)
+ return (x >= 0) and (1 / (1 + math.exp(-x))) or (1 - 1 / (1 + math.exp(x)))
+end
+
+function dt.computeGradientBoostLoss(gradient, hessian)
+ return -gradient * gradient / hessian
+end
+
+function dt.computeNewtonScore(gradient, hessian)
+ return -0.5 * gradient / hessian;
+end
+
+-- Calculates the logit score for a Node in a Decision Tree based on the probability of a positive label.
+-- params: number of positive examples and total number of examples.
+function dt.calculateLogitScore(nPositive, nTotal)
+ local dt = require 'decisiontree'
+ return dt.logit(dt.probabilityPositive(nPositive, nTotal))
+end
+
+-- Compute and return the Gini impurity score based on an input contingency table.
+function dt.computeGini(leftCount, positiveLeftCount, rightCount, positiveRightCount)
+ assert(torch.type(leftCount) == 'number', 'Expecting total number examples falling into leftBranch.')
+ assert(torch.type(positiveLeftCount) == 'number', 'Expecting total number of positive examples falling into left branch.')
+ assert(torch.type(rightCount) == 'number', 'Expecting total number of examples falling into the right branch.')
+ assert(torch.type(positiveRightCount) == 'number', 'Expecting total number of positive examples falling into the right branch.')
+
+ local total = leftCount + rightCount
+
+ local pPositiveLeft = leftCount == 0 and 0 or (positiveLeftCount / leftCount)
+ local leftGini = pPositiveLeft * (1.0 - pPositiveLeft)
+
+ local pPositiveRight = rightCount == 0 and 0 or (positiveRightCount / rightCount)
+ local rightGini = pPositiveRight * (1.0 - pPositiveRight)
+
+ return (leftCount * leftGini + rightCount * rightGini) / total
+end \ No newline at end of file
diff --git a/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec b/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec
new file mode 100644
index 000000000..d5d916275
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec
@@ -0,0 +1,40 @@
+package = "decisiontree"
+version = "scm-1"
+
+source = {
+ url = "git://github.com/Twitter/decisiontree",
+ tag = "master"
+}
+
+description = {
+ summary = "Decision trees for Torch by Twitter",
+ detailed = [[
+ Classification and regression trees (CART).
+ Gradients boosted decision trees (GBDT).
+ ]],
+ homepage = "https://github.com/Twitter/decisiontree",
+ license = "BSD"
+}
+
+dependencies = {
+ "torch >= 7.0",
+ "moses >= 1.3.1",
+ "xlua >= 1.0",
+ "image >= 1.0",
+ "luafilesystem >= 1.6.2",
+ "sys >= 1.1",
+ "paths >= 1.0",
+ "ipc >= 1.0",
+ "nn >= 1.0"
+}
+
+build = {
+ type = "command",
+ build_command = [[
+cmake -E make_directory build;
+cd build;
+cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)" -DCMAKE_C_FLAGS=-fPIC -DCMAKE_CXX_FLAGS=-fPIC;
+$(MAKE)
+ ]],
+ install_command = "cd build && $(MAKE) install"
+}
diff --git a/contrib/lua-torch/decisiontree/test.lua b/contrib/lua-torch/decisiontree/test.lua
new file mode 100644
index 000000000..80510a4f6
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/test.lua
@@ -0,0 +1,817 @@
+local dt = require "decisiontree._env"
+
+local dttest = {}
+local nloop = 50
+local epsilon = 0.000001
+local mytester
+
+--e.g. usage: th -e "dt = require 'decisiontree'; dt.test()"
+
+-- test 99% accuracy
+local function testAccuracy(cartTree, name, dataset, minacc)
+ assert(torch.isTypeOf(dataset, 'dt.DataSet'))
+ minacc = minacc or 0.99
+ local output = torch.Tensor(dataset:size())
+ local target, input = dataset.target, dataset.input
+
+ for i=1,dataset:size() do
+ local stack = {}
+ local score = cartTree:score(input[i], stack)
+ output[i] = score >= 0 and 1 or 0
+
+ if dt.VERBOSE and torch.type(cartTree) == 'dt.CartTree' and target[i] ~= output[i] then
+ print(cartTree:stackToString(stack, example.input))
+ print(i, score, target[i], output[i])
+ end
+ end
+
+ local accuracy = torch.eq(target, output):float():mean()
+ mytester:assert(accuracy >= minacc, name .. ": insufficient accuracy: " .. accuracy .. " < " .. minacc)
+end
+
+function dttest.SparseTensor()
+ local keys = torch.LongTensor{1,5,6,10}
+ local values = torch.randn(keys:size(1))
+ local st = torch.SparseTensor(keys, values)
+
+ mytester:assert(st[1] == values[1])
+ mytester:assert(st[5] == values[2])
+ mytester:assert(st[6] == values[3])
+ mytester:assert(st[10] == values[4])
+
+ mytester:assert(st[2] == nil)
+
+ st:buildIndex()
+
+ mytester:assert(st[1] == values[1])
+ mytester:assert(st[5] == values[2])
+ mytester:assert(st[6] == values[3])
+ mytester:assert(st[10] == values[4])
+
+ mytester:assert(st[2] == nil)
+
+ -- test empty sparse tensor
+
+ local est = torch.SparseTensor()
+end
+
+function dttest.GiniState()
+ local featureId = 2
+ local minLeafSize = 0
+
+ local input = torch.Tensor({{0,1,0},{0,2,0},{0,3,0}})
+ local target = torch.Tensor({1, 1, 1})
+ local dataset = dt.DataSet(input, target, 3)
+
+ local splitInfo1 = {_id=1}
+ local splitInfo2 = {_id=2, leftChildSize = 1, rightChildSize = 2, splitGain = 0}
+ local splitInfo3 = {_id=3, leftChildSize = 2, rightChildSize = 1, splitGain = -1}
+
+ local exampleIds = torch.LongTensor{1,2,3}
+
+ local treeState = dt.GiniState(exampleIds)
+
+ function treeState.computeSplitInfo(self, splitFeatureId, splitFeatureValue)
+ if splitFeatureId == featureId and splitFeatureValue == 2 then
+ return splitInfo2
+ elseif splitFeatureId == featureId and splitFeatureValue == 3 then
+ return splitInfo3
+ else
+ error("Unhandled computeSplitInfo call "..splitFeatureId.." "..splitFeatureValue)
+ end
+ end
+
+ local splitInfo = treeState:findBestFeatureSplit(dataset, featureId, minLeafSize)
+ mytester:assert(splitInfo._id == splitInfo3._id)
+end
+
+function dttest.CartTree()
+
+ local splitFeatureId = 100
+ local splitFeatureValue = 1.0
+
+ local function getBinaryCartTreeRootNode()
+
+ local leftNodeScore = 0.2
+ local rightNodeScore = 0.4
+
+ local rootNode = dt.CartNode()
+ rootNode.nodeId = 0
+ rootNode.score = 0.5
+ rootNode.splitFeatureId = splitFeatureId
+ rootNode.splitFeautreValue = splitFeatureValue
+
+ local leftChild = dt.CartNode()
+ leftChild.score = leftNodeScore
+ leftChild.nodeId = 1
+
+ local rightChild = dt.CartNode()
+ rightChild.score = rightNodeScore
+ rightChild.nodeId = 2
+
+ rootNode.leftChild = leftChild
+ rootNode.rightChild = rightChild
+
+ return rootNode
+ end
+
+ local function testScoreCartTreeBranchLeftIfMissing()
+ local rootNode = getBinaryCartTreeRootNode()
+
+ local cartTree = dt.CartTree(rootNode)
+
+ local continuousFeatures = torch.SparseTensor()
+
+ local score, nodeId = cartTree:score(continuousFeatures)
+
+ mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon)
+ mytester:assert(rootNode.leftChild.nodeId == nodeId)
+ end
+
+ local function testBranchRightWithFeature()
+ local rootNode = getBinaryCartTreeRootNode()
+
+ local cartTree = dt.CartTree(rootNode)
+
+ local continuousFeatures = torch.zeros(100)
+ continuousFeatures[splitFeatureId] = splitFeatureValue
+
+ local score, nodeId = cartTree:score(continuousFeatures)
+
+ mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon)
+ mytester:assert(rootNode.rightChild.nodeId == nodeId)
+ end
+
+ local function testMissingRightNode()
+ local rootNode = getBinaryCartTreeRootNode()
+
+ rootNode.rightChild = nil
+
+ local cartTree = dt.CartTree(rootNode)
+
+ local continuousFeatures = torch.Tensor()
+
+ local score, nodeId = cartTree:score(continuousFeatures)
+
+ mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon)
+ mytester:assert(rootNode.leftChild.nodeId == nodeId)
+ end
+
+ local function testMissingLeftNode()
+ local rootNode = getBinaryCartTreeRootNode()
+
+ rootNode.leftChild = nil
+
+ local cartTree = dt.CartTree(rootNode)
+
+ local continuousFeatures = torch.Tensor()
+
+ local score, nodeId = cartTree:score(continuousFeatures)
+
+ mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon)
+ mytester:assert(rootNode.rightChild.nodeId == nodeId)
+ end
+
+ local function testMissingAllChildren()
+ local rootNode = getBinaryCartTreeRootNode()
+
+ rootNode.leftChild = nil
+ rootNode.rightChild = nil
+
+ local cartTree = dt.CartTree(rootNode)
+
+ local continuousFeatures = torch.Tensor()
+
+ local score, nodeId = cartTree:score(continuousFeatures)
+
+ mytester:assert(math.abs(rootNode.score - score) < epsilon)
+ mytester:assert(rootNode.nodeId == nodeId)
+ end
+
+ local function testScoreCartTreeBranchRandomlyRight()
+ local rootNode = getBinaryCartTreeRootNode();
+
+ -- Force Branch Right
+ local cartTree = dt.CartTree(rootNode, function() return false end);
+
+ local continuousFeatures = torch.SparseTensor()
+
+ local score, nodeId = cartTree:score(continuousFeatures)
+
+ mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon)
+ mytester:assert(rootNode.rightChild.nodeId == nodeId)
+ end
+
+ local function testScoreCartTreeBranchRandomlyLeft()
+ local rootNode = getBinaryCartTreeRootNode();
+
+ -- Force Branch Left
+ local cartTree = dt.CartTree(rootNode, function() return true end);
+
+ local continuousFeatures = torch.SparseTensor()
+
+ local score, nodeId = cartTree:score(continuousFeatures)
+
+ mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon)
+ mytester:assert(rootNode.leftChild.nodeId == nodeId)
+ end
+
+ testScoreCartTreeBranchLeftIfMissing()
+ testBranchRightWithFeature()
+ testMissingRightNode()
+ testMissingLeftNode()
+ testMissingAllChildren()
+ testScoreCartTreeBranchRandomlyRight()
+ testScoreCartTreeBranchRandomlyLeft()
+
+end
+
+function dttest.TreeState_branch()
+ local _ = require 'moses'
+ local binFeatureId = 1
+ local featureId = 2
+
+ local input = {
+ torch.SparseTensor(torch.LongTensor{binFeatureId},torch.Tensor{1}),
+ torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{1,1}),
+ torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{0,2}),
+ torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{0,3})
+ }
+ local target = torch.LongTensor(4):fill(1)
+
+ local dataset = dt.DataSet(input, target)
+
+ local treeState = dt.TreeState(torch.LongTensor():range(1,4))
+ local splitInfo = {splitId = binFeatureId, splitValue = 1}
+
+ local function testBranchBinaryFeature()
+ splitInfo = {splitId = binFeatureId, splitValue = 1}
+ local leftBranch, rightBranch = treeState:branch(splitInfo, dataset)
+ mytester:assert(leftBranch ~= nil and rightBranch ~= nil)
+
+ mytester:assert(2 == leftBranch:size())
+ mytester:assert(leftBranch:contains(3))
+ mytester:assert(leftBranch:contains(4))
+
+ mytester:assert(2 == rightBranch:size())
+ mytester:assert(rightBranch:contains(1))
+ mytester:assert(rightBranch:contains(2))
+ end
+
+ local function testBranchContinuousFeature()
+ local splitValue = 2
+ splitInfo = {splitId = featureId, splitValue = splitValue}
+
+ local leftBranch, rightBranch = treeState:branch(splitInfo, dataset)
+ mytester:assert(leftBranch ~= nil and rightBranch ~= nil)
+
+ mytester:assert(1 == leftBranch:size())
+ mytester:assert(leftBranch:contains(2))
+
+ mytester:assert(2 == rightBranch:size())
+ mytester:assert(rightBranch:contains(3))
+ mytester:assert(rightBranch:contains(4))
+ end
+
+ testBranchBinaryFeature()
+ testBranchContinuousFeature()
+
+end
+
+function dttest.DecisionForest()
+ -- Create test decision forest, each forest has only a single node, and returns score == score of root node.
+
+ local function createCartTreeWithSingleNode(score)
+ local cartNode = dt.CartNode()
+ cartNode.score = score
+ return dt.CartTree(cartNode)
+ end
+
+ local function getTestDecisionForest()
+ local cartTrees = {
+ createCartTreeWithSingleNode(1),
+ createCartTreeWithSingleNode(2),
+ createCartTreeWithSingleNode(3)
+ }
+ local weight = torch.Tensor{10,20,30}
+ local bias = 0.5
+
+ return dt.DecisionForest(cartTrees, weight, bias)
+ end
+
+ local function testScoreDecisionForest()
+ local df = getTestDecisionForest()
+ local continuousFeatures = torch.SparseTensor()
+
+ local expectedResult = 1.0 * 10.0 + 2.0 * 20.0 + 3.0 * 30.0 + 0.5;
+ local result = df:score(continuousFeatures)
+
+ mytester:assert(math.abs(expectedResult - result) < epsilon)
+ end
+
+ testScoreDecisionForest()
+end
+
+function dttest.CartTrainer()
+ local minLeafSize, maxLeafNodes = 1, 1000
+ local nExample = 100
+
+ -- 1. dense dataset
+ local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample)
+
+ -- assert that the dataset is valid
+ for clusterId, exampleIds in ipairs(clusterExamples) do
+ local exampleIdx = torch.LongTensor(exampleIds)
+ local input = inputs:index(1,exampleIdx)
+ local target = targets:index(1,exampleIdx)
+ assert(input:std(1):mean() < 0.05)
+ end
+
+ local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes)
+ local treeState = dt.GiniState(trainSet:getExampleIds())
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+
+ mytester:assert(nleaf == nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true
+ testAccuracy(cartTree, "dense train single-thread first", trainSet, 0.99)
+ testAccuracy(cartTree, "dense valid single-thread first", validSet, 0.7) -- they don't generalize very well..
+
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+ testAccuracy(cartTree, "dense single-thread second", trainSet)
+
+ -- test feature parallelization
+ local nThread = 2
+ cartTrainer:featureParallel(nThread)
+ local treeState = dt.GiniState(trainSet:getExampleIds())
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+ testAccuracy(cartTree, "dense feature-parallel", trainSet)
+
+ -- 2. sparse-dense dataset
+ local trainSet, validSet, clusterExamples, inputs, targets = dt.getSparseDummyData(nExample, nil, 10, nil, nil, 10)
+
+ -- assert that the dataset is valid
+ for clusterId, exampleIds in ipairs(clusterExamples) do
+ local input = torch.Tensor(#exampleIds, 10):zero()
+ for i, exampleId in ipairs(exampleIds) do
+ input[i]:indexCopy(1, inputs[exampleId].keys, inputs[exampleId].values)
+ end
+ assert(input:std(1):mean() < 0.05)
+ end
+
+ local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes)
+ local treeState = dt.GiniState(trainSet:getExampleIds())
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+
+ mytester:assert(nleaf == nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true
+ testAccuracy(cartTree, "sparse-dense train single-thread first", trainSet, 0.99)
+
+ local shuffle = torch.LongTensor():randperm(10)
+ for i, input in ipairs(inputs) do
+ input.keys = input.keys:index(1, shuffle)
+ input.values = input.values:index(1, shuffle)
+ input._map = nil
+ end
+ testAccuracy(cartTree, "sparse-dense shuffled keys train single-thread first", trainSet, 0.99)
+ testAccuracy(cartTree, "sparse-dense valid single-thread first", validSet, 0.8)
+
+ -- 3. sparse dataset
+ local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9)
+
+ local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes)
+ local treeState = dt.GiniState(trainSet:getExampleIds())
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+ cartTree.branchleft = function() return true end
+
+ mytester:assert(nleaf < nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true
+ testAccuracy(cartTree, "sparse train single-thread first", trainSet, 0.9) -- the TreeBrancher drops examples with missing features, making it difficult to overfit
+ testAccuracy(cartTree, "sparse valid single-thread first", validSet, 0.8)
+end
+
+function dttest.GradientBoostTrainer()
+ local nExample = 100
+ local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9)
+
+ local maxLeafNode, minLeafSize = nExample/2, nExample/10
+ local loss = nn.LogitBoostCriterion(false)
+
+ local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNode)
+
+ local opt = {
+ lossFunction=loss,
+ treeTrainer=cartTrainer,
+ shrinkage=0.1,
+ downsampleRatio=6,
+ featureBaggingSize=-1,
+ nTree=14,
+ evalFreq=8,
+ earlyStop=0 -- no early-stopping
+ }
+
+ -- test single-thread
+ local trainer = dt.GradientBoostTrainer(opt)
+ local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet)
+
+ mytester:assert(#decisionForest.trees == opt.nTree)
+ testAccuracy(decisionForest, "sparse train single-thread first", trainSet, 0.98)
+ testAccuracy(decisionForest, "sparse valid single-thread first", validSet, 0.95)
+
+ -- test stateless
+ local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet)
+
+ mytester:assert(#decisionForest.trees == opt.nTree)
+ testAccuracy(decisionForest, "sparse train single-thread second", trainSet, 0.98)
+ testAccuracy(decisionForest, "sparse valid single-thread second", validSet, 0.95)
+
+ -- test feature-parallel
+ local nThread = 2
+ cartTrainer:featureParallel(nThread)
+
+ local trainer = dt.GradientBoostTrainer(opt)
+ local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet)
+
+ mytester:assert(#decisionForest.trees == opt.nTree)
+ testAccuracy(decisionForest, "sparse train feature-parallel first", trainSet, 0.98)
+ testAccuracy(decisionForest, "sparse valid feature-parallel first", validSet, 0.95)
+
+end
+
+function dttest.RandomForestTrainer()
+ local nExample = 100
+ local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9)
+
+ local opt = {
+ activeRatio=0.5,
+ featureBaggingSize=5,
+ nTree=14,
+ maxLeafNodes=nExample/2,
+ minLeafSize=nExample/10,
+ }
+
+ local trainer = dt.RandomForestTrainer(opt)
+
+ local decisionForest = trainer:train(trainSet, trainSet.featureIds)
+ mytester:assert(#decisionForest.trees == opt.nTree)
+ testAccuracy(decisionForest, "sparse train single-thread first", trainSet, 0.98)
+ testAccuracy(decisionForest, "sparse valid single-thread first", validSet, 0.95)
+
+ -- test stateless
+ local decisionForest = trainer:train(trainSet, trainSet.featureIds)
+
+ mytester:assert(#decisionForest.trees == opt.nTree)
+ testAccuracy(decisionForest, "sparse train single-thread second", trainSet, 0.98)
+ testAccuracy(decisionForest, "sparse valid single-thread second", validSet, 0.95)
+
+ -- test tree-parallel
+ local nThread = 2
+ trainer:treeParallel(nThread)
+
+ local trainer = dt.RandomForestTrainer(opt)
+ local decisionForest = trainer:train(trainSet, trainSet.featureIds)
+
+ mytester:assert(#decisionForest.trees == opt.nTree)
+ testAccuracy(decisionForest, "sparse train tree-parallel first", trainSet, 0.98)
+ testAccuracy(decisionForest, "sparse valid tree-parallel first", validSet, 0.95)
+end
+
+function dttest.WorkPool()
+ local nThread = 2
+ local wp = dt.WorkPool(nThread)
+
+ -- 1. some easy tests
+ local store = {key='nick',value=7}
+ wp:update('storeKeyValue', store)
+
+ wp:update('require', {libname='decisiontree', varname='dt'})
+
+ local bias = 2
+ local obj = nn.MSECriterion()
+ wp:update('require', {libname='decisiontree', varname='dt'})
+ wp:writeup('execute', function(store) return bias + obj:updateOutput(torch.Tensor{1},torch.Tensor{1}) + store.nick end)
+
+ local taskname, res = wp:read()
+ mytester:assert(taskname == 'execute')
+ mytester:assert(res == 9)
+
+ -- 2. trying to reproduce a difficult error
+ local trainSet, validSet = dt.getSparseDummyData()
+
+ -- setup worker store (each worker will have its own copy)
+ local store = {
+ trainSet=trainSet,
+ minLeafSize=2
+ }
+ wp:update('storeKeysValues', store)
+
+ -- arguments/upvalues
+ local treeState = dt.GiniState(trainSet:getExampleIds())
+ local shardId = 1
+ local nShard = nThread
+ local featureIds = trainSet.featureIds
+
+ local task = function(store, args)
+ assert(store.trainSet)
+ assert(store.minLeafSize)
+
+ local bestSplit = args.treeState:findBestSplit(store.trainSet, args.featureIds, store.minLeafSize, args.shardId, args.nShard)
+ return bestSplit
+ end
+ local args = {treeState=treeState,featureIds=featureIds,shardId=shardId,nShard=nShard}
+ wp:writeup("execute", {func=task,args=args})
+
+ local taskname, bestSplit = wp:read()
+ mytester:assert(taskname == 'execute')
+ mytester:assert(torch.type(bestSplit) == 'table')
+
+ -- closure
+ local task = function(store)
+ assert(store.trainSet)
+ assert(store.minLeafSize)
+
+ local bestSplit = treeState:findBestSplit(store.trainSet, featureIds, store.minLeafSize, shardId, nShard)
+ return bestSplit
+ end
+ wp:writeup("execute", task)
+
+ local taskname, bestSplit = wp:read()
+ mytester:assert(taskname == 'execute')
+ mytester:assert(torch.type(bestSplit) == 'table')
+
+ local task = function(store, args)
+ assert(store.trainSet)
+ assert(torch.isTypeOf(treeState, 'dt.TreeState'), torch.type(treeState))
+
+ local bestSplit = treeState:findBestSplit(store.trainSet, featureIds, store.minLeafSize, shardId, nShard)
+ return bestSplit
+ end
+ local args = {featureIds=featureIds,shardId=shardId,nShard=nShard}
+ wp:writeup("execute", {func=task,args=args})
+
+
+ local taskname, bestSplit = wp:read()
+ mytester:assert(taskname == 'execute')
+ mytester:assert(torch.type(bestSplit) == 'table')
+
+ wp:terminate()
+end
+
+function dttest.Sparse2Dense()
+ local batchsize = 4
+ local minFeatureId, maxFeatureId = 10, 100
+ local input = {{},{}}
+ for i=1,batchsize do
+ local inputsize = math.random(5,10)
+ input[1][i] = torch.LongTensor(inputsize):random(minFeatureId,maxFeatureId)
+ input[2][i] = torch.Tensor(inputsize):uniform(0,1)
+ end
+ local s2d = nn.Sparse2Dense(torch.LongTensor():range(minFeatureId,maxFeatureId))
+ -- test 2d forward
+ local output = s2d:forward(input)
+ local output2 = torch.Tensor(batchsize, maxFeatureId-minFeatureId+1):zero()
+ local featureMap = {}
+ local j = 0
+ for i=minFeatureId,maxFeatureId do
+ j = j + 1
+ featureMap[i] = j
+ end
+ for i=1,batchsize do
+ local keys, values = input[1][i], input[2][i]
+ for j=1,keys:size(1) do
+ output2[{i,featureMap[keys[j] ]}] = values[j]
+ end
+ end
+ mytester:assertTensorEq(output, output2, 0.000001)
+ -- test 1d forward
+ local input = {input[1][batchsize], input[2][batchsize]}
+ local output = s2d:forward(input)
+ mytester:assertTensorEq(output, output2[batchsize], 0.000001)
+end
+
+function dttest.Sparse2DenseDouble()
+ local batchsize = 4
+ local minFeatureId, maxFeatureId = 10, 100
+ local input = {{},{}}
+ for i=1,batchsize do
+ local inputsize = math.random(5,10)
+ input[1][i] = torch.LongTensor(inputsize):random(minFeatureId,maxFeatureId)
+ input[2][i] = torch.Tensor(inputsize):uniform(0,1):double()
+ end
+ local s2d = nn.Sparse2Dense(torch.LongTensor():range(minFeatureId,maxFeatureId))
+ s2d:double()
+ -- test 2d forward
+ local output = s2d:forward(input)
+ local output2 = torch.Tensor(batchsize, maxFeatureId-minFeatureId+1):zero():double()
+ local featureMap = {}
+ local j = 0
+ for i=minFeatureId,maxFeatureId do
+ j = j + 1
+ featureMap[i] = j
+ end
+ for i=1,batchsize do
+ local keys, values = input[1][i], input[2][i]
+ for j=1,keys:size(1) do
+ output2[{i,featureMap[keys[j] ]}] = values[j]
+ end
+ end
+ mytester:assertTensorEq(output, output2, 0.000001)
+ -- test 1d forward
+ local input = {input[1][batchsize], input[2][batchsize]}
+ local output = s2d:forward(input)
+ mytester:assertTensorEq(output, output2[batchsize], 0.000001)
+end
+
+function dttest.LogitBoostCriterion()
+ local input = torch.randn(10)
+ local target = torch.LongTensor(10):random(0,1):type(torch.type(input))
+
+ local lb = nn.LogitBoostCriterion(false)
+ local loss = lb:updateOutput(input, target)
+
+ local loss2 = 0
+ for i=1,10 do
+ loss2 = loss2 + math.log(1 + math.exp(target[i] <= 0 and input[i] or -input[i]))
+ end
+ mytester:assert(math.abs(loss - loss2) < 0.00001)
+
+ local gradInput = lb:updateGradInput(input, target)
+ local gradInput2 = gradInput:clone():zero()
+ for i=1,10 do
+ local p = dt.logistic(input[i])
+ gradInput2[i] = (target[i] <= 0) and p or (p - 1)
+ end
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001)
+
+ local hessInput = lb:updateHessInput(input, target)
+ local hessInput2 = hessInput:clone():zero()
+ for i=1,10 do
+ local p = dt.logistic(input[i])
+ hessInput2[i] = p * (1.0 - p)
+ end
+ mytester:assertTensorEq(hessInput, hessInput2, 0.000001)
+end
+
+function dttest.DFD()
+ local nExample = 100
+ local batchsize = 4
+ local inputsize = 10
+
+ -- train Random Forest
+ local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample, nil, inputsize)
+ local opt = {
+ activeRatio=0.5,
+ featureBaggingSize=5,
+ nTree=4,
+ maxLeafNodes=nExample/2,
+ minLeafSize=nExample/10,
+ }
+ local trainer = dt.RandomForestTrainer(opt)
+ local df = trainer:train(trainSet, trainSet.featureIds)
+ mytester:assert(#df.trees == opt.nTree)
+
+ local dfd = nn.DFD(df)
+ dfd = nn.DFD(dfd:getReconstructionInfo())
+ local dfd2 = nn.DFD(dfd:getReconstructionInfo(), true)
+ local input = validSet.input:sub(1,batchsize)
+ local output = dfd:forward(input)
+ local output2 = dfd2:forward(input)
+
+ local _ = require 'moses'
+
+ local function hasKey(keys,key)
+ local found = false
+ keys:apply(function(x)
+ if x == key then
+ found = true
+ end
+ end)
+ return found
+ end
+
+ for i=1,batchsize do
+ local nodes = {}
+ local keys = output[1][i]
+ local keys2 = output2[1][i]
+ for j,tree in ipairs(df.trees) do
+ local stack = {}
+ tree:score(input[i], stack)
+ mytester:assert(hasKey(keys2, stack[#stack]._nodeId))
+
+ for k,node in ipairs(stack) do
+ if k > 1 then
+ assert(node._nodeId)
+ mytester:assert(hasKey(keys, node._nodeId), string.format("missing key=%d in %s", node._nodeId, tostring(keys)))
+ table.insert(nodes, node._nodeId)
+ end
+ end
+ end
+ mytester:assert(#nodes == keys:size(1))
+ mytester:assert(#df.trees == keys2:size(1))
+ end
+end
+
+function dttest.DFDDouble()
+ local nExample = 100
+ local batchsize = 4
+ local inputsize = 10
+
+ -- train Random Forest
+ local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample, nil, inputsize)
+ local opt = {
+ activeRatio=0.5,
+ featureBaggingSize=5,
+ nTree=4,
+ maxLeafNodes=nExample/2,
+ minLeafSize=nExample/10,
+ }
+ local trainer = dt.RandomForestTrainer(opt)
+ local df = trainer:train(trainSet, trainSet.featureIds)
+ mytester:assert(#df.trees == opt.nTree)
+
+ local dfd = nn.DFD(df)
+ dfd:double()
+ dfd = nn.DFD(dfd:getReconstructionInfo())
+ local dfd2 = nn.DFD(dfd:getReconstructionInfo(), true)
+ local input = validSet.input:sub(1,batchsize):double()
+ local output = dfd:forward(input)
+ local output2 = dfd2:forward(input)
+
+ local _ = require 'moses'
+
+ local function hasKey(keys,key)
+ local found = false
+ keys:apply(function(x)
+ if x == key then
+ found = true
+ end
+ end)
+ return found
+ end
+
+ for i=1,batchsize do
+ local nodes = {}
+ local keys = output[1][i]
+ local keys2 = output2[1][i]
+ for j,tree in ipairs(df.trees) do
+ local stack = {}
+ tree:score(input[i], stack)
+ mytester:assert(hasKey(keys2, stack[#stack]._nodeId))
+
+ for k,node in ipairs(stack) do
+ if k > 1 then
+ assert(node._nodeId)
+ mytester:assert(hasKey(keys, node._nodeId), string.format("missing key=%d in %s", node._nodeId, tostring(keys)))
+ table.insert(nodes, node._nodeId)
+ end
+ end
+ end
+ mytester:assert(#nodes == keys:size(1))
+ mytester:assert(#df.trees == keys2:size(1))
+ end
+end
+
+function dttest.uniquecounts() -- DEPRECATED
+ local target = torch.LongTensor(100):random(1,3)
+ local input = torch.Tensor()
+ local inputset = {input=input, target=target}
+
+ local counts = dt.uniquecounts(nil, inputset, 3)
+
+ mytester:assert(counts:sum() == 100)
+ mytester:assert(counts:nElement() == 3)
+
+ local res = torch.Tensor(3):zero()
+ target:apply(function(t) res[t] = res[t] + 1 end)
+
+ mytester:assertTensorEq(counts, res)
+end
+
+function dttest.entropy() -- DEPRECATED
+ -- 2 clusters with a bit overlap between classes:
+ local input = torch.Tensor(100,2)
+ input:narrow(1,1,50):normal(-1,.01)
+ input:narrow(1,51,50):normal(2,.01)
+
+ local target = torch.LongTensor(100):fill(3)
+ target:narrow(1,1,45):fill(1)
+ target:narrow(1,56,45):fill(2)
+
+ local inputset = {input=input, target=target}
+
+ -- test entropy()
+ local fullent = dt.entropy(inputset)
+
+ local halfset = {input=input:narrow(1,1,50), target=target:narrow(1,1,50)}
+ local halfent = dt.entropy(halfset)
+
+ local perfectset = {input=input:narrow(1,56,45), target=target:narrow(1,56,45)}
+ local perfectent = dt.entropy(perfectset)
+
+ mytester:assert(fullent > halfent)
+ mytester:assert(halfent > perfectent)
+ mytester:assert(perfectent < 0.0000001 and perfectent >= 0)
+end
+
+function dt.test(tests)
+ math.randomseed(os.time())
+ mytester = torch.Tester()
+ mytester:add(dttest)
+ mytester:run(tests)
+end
diff --git a/contrib/lua-torch/decisiontree/utils.h b/contrib/lua-torch/decisiontree/utils.h
new file mode 100644
index 000000000..8a0196a58
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/utils.h
@@ -0,0 +1,45 @@
+#include "error.h"
+
+#define check_tensors(L, a, b) \
+ do { \
+ if ((a)->nDimension != (b)->nDimension) \
+ return LUA_HANDLE_ERROR_STR((L), "different tensor dimensions"); \
+ for (int __local__var = 0; __local__var < (a)->nDimension; __local__var++) \
+ if ((a)->size[__local__var] != (b)->size[__local__var]) \
+ return LUA_HANDLE_ERROR_STR((L), "different tensor sizes"); \
+ } while (0)
+
+#define check_tensor(L, t, type) \
+ do { \
+ if (!type##_isContiguous(t)) \
+ return LUA_HANDLE_ERROR_STR((L), "tensor should be contiguous"); \
+ } while (0)
+
+#define get_tensor_size(t, type) \
+ (TH##type##Tensor_nElement(t))
+
+#define get_tensor(L, idx, type) \
+ (TH##type##Tensor *)luaT_checkudata(L, idx, "torch." #type "Tensor")
+
+static int push_table_contents(lua_State *L, int arg)
+{
+ int size = 0;
+ while(1) {
+ lua_checkstack(L, 1);
+ lua_rawgeti(L, arg, size + 1);
+ if (lua_isnil(L, -1)) {
+ lua_pop(L, 1);
+ break;
+ }
+ size++;
+ }
+ return size;
+}
+
+#define verify_push_table_contents(L, idx, count) do { \
+ int __tmp_count = push_table_contents(L, idx); \
+ if (__tmp_count != count) { \
+ lua_pop(L, __tmp_count); \
+ LUA_HANDLE_ERROR_STR(L, "Table sizes do not match"); \
+ } \
+ } while(0)
diff --git a/contrib/lua-torch/decisiontree/utils.lua b/contrib/lua-torch/decisiontree/utils.lua
new file mode 100644
index 000000000..c32c3d08b
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/utils.lua
@@ -0,0 +1,125 @@
+local dt = require "decisiontree._env"
+
+-- returns a buffer table local to a thread (no serialized)
+function dt.getBufferTable(name)
+ local dt = require 'decisiontree'
+ assert(torch.type(name) == 'string')
+ dt.buffer = dt.buffer or {}
+ dt.buffer[name] = dt.buffer[name] or {}
+ return dt.buffer[name]
+end
+
+function dt.getSparseDummyData(nExample, nCluster, nFeature, overlap, nValid, nActive)
+ local dt = require 'decisiontree'
+ if torch.type(nExample) == 'table' then
+ local opt = nExample
+ nExample = opt.nExample
+ nCluster = opt.nCluster
+ nFeature = opt.nFeature
+ overlap = opt.overlap
+ nValid = opt.nValid
+ nActive = opt.nActive
+ end
+ nExample = nExample or 100 -- training set size
+ nCluster = nCluster or 10
+ assert(nCluster >= 2)
+ nFeature = math.max(2, nFeature or 10)
+ overlap = overlap or 0
+ nValid = nValid or nExample/10 -- validation set size
+ nActive = nActive or math.max(2, nFeature / 2)
+
+ -- sample nCluster centers
+ local clusterCenter = torch.rand(nCluster, nFeature)
+ local clusterLabel = torch.LongTensor(nCluster)
+ local clusterExamples = {}
+ for i=1,nCluster do
+ clusterCenter[i]:add(i)
+ clusterLabel[i] = i % 2
+ clusterExamples[i] = {}
+ end
+
+ local sparseCenter = torch.Tensor()
+
+ local shuffle = torch.LongTensor()
+
+ -- build dataset in pseudo-dense format
+ local inputs = {}
+ local targets = torch.Tensor(nExample+nValid)
+ for i=1,nExample+nValid do
+ local clusterIdx = torch.random(1,nCluster)
+ table.insert(clusterExamples[clusterIdx], i)
+
+ shuffle:randperm(nFeature)
+ local keys = torch.LongTensor(nActive):copy(shuffle:narrow(1,1,nActive))
+ sparseCenter:index(clusterCenter[clusterIdx], 1, keys)
+ local stdiv = i <= nExample and 100 or 1000
+ local values = torch.randn(nActive):div(stdiv):add(sparseCenter)
+
+ table.insert(inputs, torch.SparseTensor(keys, values))
+
+ local label = clusterLabel[clusterIdx]
+ if math.random() < overlap then
+ targets[i] = label == 1 and 0 or 1
+ else
+ targets[i] = label
+ end
+ end
+
+ local _ = require 'moses'
+ local validSet = dt.DataSet(_.slice(inputs, nExample+1, nExample+nValid), targets:narrow(1,nExample+1,nValid))
+ local trainSet = dt.DataSet(_.slice(inputs, 1, nExample), targets:narrow(1,1,nExample))
+
+ return trainSet, validSet, clusterExamples, inputs, targets
+end
+
+function dt.getDenseDummyData(nExample, nCluster, nFeature, overlap, nValid)
+ local dt = require 'decisiontree'
+ if torch.type(nExample) == 'table' then
+ local opt = nExample
+ nExample = opt.nExample
+ nCluster = opt.nCluster
+ nFeature = opt.nFeature
+ overlap = opt.overlap
+ nValid = opt.nValid
+ end
+ nExample = nExample or 100 -- training set size
+ nCluster = nCluster or 10
+ assert(nCluster >= 2)
+ nFeature = math.max(2, nFeature or 10)
+ overlap = overlap or 0
+ nValid = nValid or nExample/10 -- validation set size
+
+ -- sample nCluster centers
+ local clusterCenter = torch.rand(nCluster, nFeature)
+ local clusterLabel = torch.LongTensor(nCluster)
+ local clusterExamples = {}
+ for i=1,nCluster do
+ clusterCenter[i]:add(i)
+ clusterLabel[i] = i % 2
+ clusterExamples[i] = {}
+ end
+
+ -- build dataset in pseudo-dense format
+ local inputs = torch.Tensor(nExample+nValid, nFeature)
+ local targets = torch.Tensor(nExample+nValid)
+ for i=1,nExample+nValid do
+ local clusterIdx = torch.random(1,nCluster)
+ table.insert(clusterExamples[clusterIdx], i)
+
+ local stdiv = i <= nExample and 100 or 1000
+ inputs[i]:normal():div(stdiv):add(clusterCenter[clusterIdx])
+
+ local label = clusterLabel[clusterIdx]
+ if math.random() < overlap then
+ targets[i] = label == 1 and 0 or 1
+ else
+ targets[i] = label
+ end
+ end
+
+ local _ = require 'moses'
+ local validSet = dt.DataSet(inputs:narrow(1,nExample+1,nValid), targets:narrow(1,nExample+1,nValid))
+ local trainSet = dt.DataSet(inputs:narrow(1,1,nExample), targets:narrow(1,1,nExample))
+
+ return trainSet, validSet, clusterExamples, inputs, targets
+end