diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-05-23 18:14:15 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-05-23 18:14:15 +0100 |
commit | 714eb56e1760fdfb26afccde92664d3a2f1e8435 (patch) | |
tree | 84d1399acbb92f852b4bd64f9ea5412680b0c6ab /contrib/lua-torch/decisiontree | |
parent | 220a51ff68013dd668a45b78c60a7b8bfc10f074 (diff) | |
download | rspamd-714eb56e1760fdfb26afccde92664d3a2f1e8435.tar.gz rspamd-714eb56e1760fdfb26afccde92664d3a2f1e8435.zip |
[Minor] Move lua contrib libraries to lua- prefix
Diffstat (limited to 'contrib/lua-torch/decisiontree')
44 files changed, 6593 insertions, 0 deletions
diff --git a/contrib/lua-torch/decisiontree/CMakeLists.txt b/contrib/lua-torch/decisiontree/CMakeLists.txt new file mode 100644 index 000000000..6434eddd9 --- /dev/null +++ b/contrib/lua-torch/decisiontree/CMakeLists.txt @@ -0,0 +1,54 @@ +SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}) + +CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) +CMAKE_POLICY(VERSION 2.6) + +SET(src + init.c + hash_map.c +) +SET(luasrc + _env.lua + benchmark.lua + CartNode.lua + CartTrainer.lua + CartTree.lua + DataSet.lua + DecisionForest.lua + DecisionForestTrainer.lua + DecisionTree.lua + DFD.lua + GiniState.lua + GradientBoostState.lua + GradientBoostTrainer.lua + init.lua + LogitBoostCriterion.lua + math.lua + MSECriterion.lua + RandomForestTrainer.lua + Sparse2Dense.lua + SparseTensor.lua + test.lua + TreeState.lua + utils.lua + WorkPool.lua +) + +IF (WITH_OPENMP) + FIND_PACKAGE(OpenMP) + IF(OPENMP_FOUND) + MESSAGE(STATUS "Compiling with OpenMP support") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + ENDIF(OPENMP_FOUND) +ENDIF (WITH_OPENMP) + +ADD_TORCH_PACKAGE(decisiontree "${src}" "${luasrc}" "A decision tree library, for Torch") +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +### Torch packages supposes libraries prefix is "lib" +SET_TARGET_PROPERTIES(decisiontree PROPERTIES + PREFIX "lib" + IMPORT_PREFIX "lib") +TARGET_LINK_LIBRARIES(decisiontree ${TH_LIBRARIES}) +INSTALL(TARGETS decisiontree DESTINATION ${RSPAMD_LIBDIR}) diff --git a/contrib/lua-torch/decisiontree/CartNode.lua b/contrib/lua-torch/decisiontree/CartNode.lua new file mode 100644 index 000000000..e6a85006c --- /dev/null +++ b/contrib/lua-torch/decisiontree/CartNode.lua @@ -0,0 +1,42 @@ +local dt = require 'decisiontree._env' +local CartNode = torch.class("dt.CartNode", dt) + +function CartNode:__init(nodeId, leftChild, rightChild, splitFeatureId, splitFeatureValue, score, splitGain) + self.nodeId = nodeId or 0 + self.leftChild = leftChild + self.rightChild = rightChild + self.splitFeatureId = splitFeatureId or -1 + self.splitFeatureValue = splitFeatureValue or 0 + self.score = score or 0 + self.splitGain = splitGain +end + +function CartNode:__tostring__() + return self:recursivetostring() +end + +function CartNode:recursivetostring(indent) + indent = indent or ' ' + + -- Is this a leaf node? + local res = '' + if not (self.leftChild or self.rightChild) then + res = res .. self.score .. '\n' + else + -- Print the criteria + res = res .. 'input[' .. self.splitFeatureId .. '] <' .. self.splitFeatureValue .. '?\n' + + -- Print the branches + if self.leftChild then + res = res .. indent .. 'True->' .. self.leftChild:recursivetostring(indent .. ' ') + end + if self.rightChild then + res = res .. indent .. 'False->' .. self.rightChild:recursivetostring(indent .. ' ') + end + end + return res +end + +function CartNode:clone() + return CartNode(self.nodeId, self.leftChild, self.rightChild, self.splitFeatureId, self.splitFeatureValue, self.score, self.splitGain) +end diff --git a/contrib/lua-torch/decisiontree/CartTrainer.lua b/contrib/lua-torch/decisiontree/CartTrainer.lua new file mode 100644 index 000000000..63ae6c148 --- /dev/null +++ b/contrib/lua-torch/decisiontree/CartTrainer.lua @@ -0,0 +1,180 @@ +local dt = require "decisiontree._env" +local _ = require "moses" + +local CartTrainer = torch.class("dt.CartTrainer", dt) + +-- Generic CART trainer +function CartTrainer:__init(dataset, minLeafSize, maxLeafNodes) + assert(torch.isTypeOf(dataset, 'dt.DataSet')) + self.dataset = dataset + self.minLeafSize = assert(minLeafSize) -- min examples per leaf + self.maxLeafNodes = assert(maxLeafNodes) -- max leaf nodes in tree + + -- by default, single thread + self.parallelMode = 'singlethread' +end + +function CartTrainer:train(rootTreeState, activeFeatures) + assert(torch.isTypeOf(rootTreeState, 'dt.TreeState')) + assert(torch.isTensor(activeFeatures)) + local root = dt.CartNode() + root.id = 0 + root.score = rootTreeState:score(self.dataset) + + local nleaf = 1 + + -- TODO : nodeparallel: parallelize here. The queue is a workqueue. + local queue = {} + table.insert(queue, 1, {cartNode=root, treeState=rootTreeState}) + + while #queue > 0 and nleaf < self.maxLeafNodes do + local treeGrowerArgs = table.remove(queue, #queue) + local currentTreeState = treeGrowerArgs.treeState + + -- Note: if minLeafSize = 1 and maxLeafNode = inf, then each example will be its own leaf... + if self:hasEnoughTrainingExamplesToSplit(currentTreeState.exampleIds:size(1)) then + nleaf = self:processNode(nleaf, queue, treeGrowerArgs.cartNode, currentTreeState, activeFeatures) + end + end + + -- CartTree with random branching (when feature is missing) + local branchleft = function() return math.random() < 0.5 end + return dt.CartTree(root, branchleft), nleaf +end + +function CartTrainer:processNode(nleaf, queue, node, treeState, activeFeatures) + local bestSplit + if self.parallelMode == 'singlethread' then + bestSplit = self:findBestSplitForAllFeatures(treeState, activeFeatures) + elseif self.parallelMode == 'featureparallel' then + bestSplit = self:findBestSplitForAllFeaturesFP(treeState, activeFeatures) + else + error("Unrecognized parallel mode: " .. self.parallelMode) + end + + if bestSplit then + local leftTreeState, rightTreeState = treeState:branch(bestSplit, self.dataset) + assert(bestSplit.leftChildSize + bestSplit.rightChildSize == leftTreeState.exampleIds:size(1) + rightTreeState.exampleIds:size(1), "The left and right subtrees don't match the split found!") + self:setValuesAndCreateChildrenForNode(node, bestSplit, leftTreeState, rightTreeState, nleaf) + + table.insert(queue, 1, {cartNode=node.leftChild, treeState=leftTreeState}) + table.insert(queue, 1, {cartNode=node.rightChild, treeState=rightTreeState}) + + return nleaf + 1 + end + + return nleaf +end + +function CartTrainer:findBestSplitForAllFeatures(treeState, activeFeatures) + local timer = torch.Timer() + local bestSplit = treeState:findBestSplit(self.dataset, activeFeatures, self.minLeafSize, -1, -1) + + if bestSplit then + assert(torch.type(bestSplit) == 'table') + end + + if dt.PROFILE then + print("findBestSplitForAllFeatures time="..timer:time().real) + end + return bestSplit +end + +-- Updates the parentNode with the bestSplit information by creates left/right child Nodes. +function CartTrainer:setValuesAndCreateChildrenForNode(parentNode, bestSplit, leftState, rightState, nleaf) + assert(torch.isTypeOf(parentNode, 'dt.CartNode')) + assert(torch.type(bestSplit) == 'table') + assert(torch.isTypeOf(leftState, 'dt.TreeState')) + assert(torch.isTypeOf(rightState, 'dt.TreeState')) + assert(torch.type(nleaf) == 'number') + + local leftChild = dt.CartNode() + leftChild.score = leftState:score(self.dataset) + leftChild.nodeId = 2 * nleaf - 1 + + local rightChild = dt.CartNode() + rightChild.score = rightState:score(self.dataset) + rightChild.nodeId = 2 * nleaf + + parentNode.splitFeatureId = bestSplit.splitId + parentNode.splitFeatureValue = bestSplit.splitValue + parentNode.leftChild = leftChild + parentNode.rightChild = rightChild + parentNode.splitGain = bestSplit.splitGain +end + +-- We minimally need 2 * N examples in the parent to satisfy >= N examples per child +function CartTrainer:hasEnoughTrainingExamplesToSplit(count) + return count >= 2 * self.minLeafSize +end + +-- call before training to enable feature-parallelization +function CartTrainer:featureParallel(workPool) + assert(self.parallelMode == 'singlethread', self.parallelMode) + self.parallelMode = 'featureparallel' + self.workPool = torch.type(workPool) == 'number' and dt.WorkPool(workPool) or workPool + assert(torch.isTypeOf(self.workPool, 'dt.WorkPool')) + + -- this deletes all SparseTensor hash maps so that they aren't serialized + self.dataset:deleteIndex() + + -- require the dt package + self.workPool:update('require', {libname='decisiontree',varname='dt'}) + -- setup worker store (each worker will have its own copy) + local store = { + dataset=self.dataset, + minLeafSize=self.minLeafSize + } + self.workPool:update('storeKeysValues', store) +end + +-- feature parallel +function CartTrainer:findBestSplitForAllFeaturesFP(treeState, activeFeatures) + local timer = torch.Timer() + local bestSplit + if treeState.findBestSplitFP then + bestSplit = treeState:findBestSplitFP(self.dataset, activeFeatures, self.minLeafSize, self.workPool.nThread) + end + + if not bestSplit then + for i=1,self.workPool.nThread do + -- upvalues + local treeState = treeState + local shardId = i + local nShard = self.workPool.nThread + local featureIds = activeFeatures + -- closure + local task = function(store) + assert(store.dataset) + assert(store.minLeafSize) + if treeState.threadInitialize then + treeState:threadInitialize() + end + + local bestSplit = treeState:findBestSplit(store.dataset, featureIds, store.minLeafSize, shardId, nShard) + return bestSplit + end + + self.workPool:writeup('execute', task) + end + + for i=1,self.workPool.nThread do + local taskname, candidateSplit = self.workPool:read() + assert(taskname == 'execute') + if candidateSplit then + if ((not bestSplit) or candidateSplit.splitGain < bestSplit.splitGain) then + bestSplit = candidateSplit + end + end + end + end + + if bestSplit then + assert(torch.type(bestSplit) == 'table') + end + + if dt.PROFILE then + print("findBestSplitForAllFeaturesFP time="..timer:time().real) + end + return bestSplit +end diff --git a/contrib/lua-torch/decisiontree/CartTree.lua b/contrib/lua-torch/decisiontree/CartTree.lua new file mode 100644 index 000000000..c74dfda9e --- /dev/null +++ b/contrib/lua-torch/decisiontree/CartTree.lua @@ -0,0 +1,90 @@ +local _ = require "moses" +local dt = require 'decisiontree._env' + +-- CART (classification-regression decision tree). +-- The example is always branched to the left when the splitting feature is missing. +local CartTree = torch.class("dt.CartTree", "dt.DecisionTree", dt) + +function CartTree:__init(root, branchleft) + assert(torch.isTypeOf(root, 'dt.CartNode')) + self.root = root + self.branchleft = branchleft or function() return true end +end + +-- TODO optimize this +function CartTree:score(input, stack, optimized) + if optimized == true and stack == nil and torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then + return input.nn.CartTreeFastScore(input, self.root, input.new()) + end + return self:recursivescore(self.root, input, stack) +end + +-- Continuous: if input[node.splitFeatureId] < node.splitFeatureValue then leftNode else rightNode +-- Binary: if input[node.splitFeatureId] == 0 then leftNode else rightNode +-- when stack is provided, it is returned as the third argument containing the stack of nodes from root to leaf +function CartTree:recursivescore(node, input, stack) + assert(torch.isTypeOf(node, 'dt.CartNode')) + + if stack then + stack = torch.type(stack) == 'table' and stack or {} + table.insert(stack, node) + end + + if not (node.leftChild or node.rightChild) then + return node.score, node.nodeId, stack + elseif not node.leftChild then + return self:recursivescore(node.rightChild, input, stack) + elseif not node.rightChild then + return self:recursivescore(node.leftChild, input, stack) + end + + local splitId = node.splitFeatureId + local splitVal = node.splitFeatureValue + + if input[splitId] then -- if has key + local featureVal = input[splitId] + local nextNode = featureVal < splitVal and node.leftChild or node.rightChild + return self:recursivescore(nextNode, input, stack) + end + + -- if feature is missing, branch left + local nextNode = self.branchleft() and node.leftChild or node.rightChild + return self:recursivescore(nextNode, input, stack) +end + +function CartTree:__tostring__() + return self.root:recursivetostring() +end + +-- expects a stack returned by score +function CartTree:stackToString(stack, input) + assert(torch.type(stack) == 'table') + assert(torch.isTypeOf(stack[1], 'dt.CartNode')) + + local res = 'Stack nodes from root to leaf\n' + for i,node in ipairs(stack) do + if not (node.leftChild or node.rightChild) then + res = res .. "score="..node.score .. '\n' + else + local istr = '' + if input then + istr = '=' .. (input[node.splitFeatureId] or 'nil') + end + res = res .. 'input[' .. node.splitFeatureId .. ']' .. istr ..' < ' .. node.splitFeatureValue .. ' ? ' + res = res .. '(' .. ((node.leftChild and node.rightChild) and 'LR' or node.leftChild and 'L' or node.rightChild and 'R' or 'WAT?') .. ') ' + if node.leftChild == stack[i+1] then + res = res .. 'Left\n' + elseif node.rightChild == stack[i+1] then + res = res .. 'Right\n' + else + error"stackToString error" + end + end + end + return res .. #stack .. " nodes" +end + +function CartTree:clone() + return CartTree(self.root:clone(), self.branchleft) +end + diff --git a/contrib/lua-torch/decisiontree/DFD.lua b/contrib/lua-torch/decisiontree/DFD.lua new file mode 100644 index 000000000..e4746212a --- /dev/null +++ b/contrib/lua-torch/decisiontree/DFD.lua @@ -0,0 +1,182 @@ +-- nn.DFD: Decision Forest Discretizer +-- Takes a dense input and outputs a sparse output. +-- Each node in the forest is its own feature. +-- When a node is traversed, its commensurate feature takes on a value of 1. +-- For all non-traversed nodes, the feature is 0. +local DFD, parent = torch.class("nn.DFD", "nn.Module") + +-- TODO: add :type, as the default will convert the long tensors +function DFD:__init(df, onlyLastNode) + parent.__init(self) + if torch.type(df) == 'table' then + self:reconstructFromInfo(df) + else + assert(torch.type(df) == 'dt.DecisionForest') + + self.rootIds = torch.LongTensor() + -- nodeId of left and right child nodes + self.leftChild = torch.LongTensor() + self.rightChild = torch.LongTensor() + -- index and value of the feature that splits this node + self.splitFeatureId = torch.LongTensor() + self.splitFeatureValue = torch.Tensor() + -- initialize state given df + self:convertForest2Tensors(df) + self:clearState() + end + self.onlyLastNode = onlyLastNode + self.nTrees = self.rootIds:size(1) +end + +-- converts a DecisionForest to efficient tensor representation +function DFD:convertForest2Tensors(df) + self.rootIds:resize(#df.trees) + + -- nodeId will map to featureId + local nodeId = 0 + -- sets nodeIds of all subnodes + -- and measures the maximum depth over all trees + local function recursiveTree(node, depth) + depth = (depth or 0) + 1 + local rdepth = depth + nodeId = nodeId + 1 + node._nodeId = nodeId + + if node.leftChild then + rdepth = math.max(rdepth, recursiveTree(node.leftChild, depth)) + end + if node.rightChild then + rdepth = math.max(rdepth, recursiveTree(node.rightChild, depth)) + end + return rdepth + end + + -- sum over trees of max depth + self.depth = 0 + for i,tree in ipairs(df.trees) do + assert(torch.isTypeOf(tree.root, 'dt.CartNode')) + self.depth = self.depth + recursiveTree(tree.root) + end + -- remove roots from depth + self.depth = self.depth - self.rootIds:size(1) + + -- total number of nodes in all trees + self.nNode = nodeId + + -- nodeId of left and right child nodes + self.leftChild:resize(self.nNode):fill(-1) + self.rightChild:resize(self.nNode):fill(-1) + -- index and value of the feature that splits this node + self.splitFeatureId:resize(self.nNode):fill(-1) + self.splitFeatureValue:resize(self.nNode):fill(-1) + + -- aggregates CartNode attributes to an efficient tensor representation + local function recursiveTree2(node) + local nodeId = assert(node._nodeId) + assert(self.splitFeatureId[nodeId] == -1) + + if node.leftChild then + self.leftChild[nodeId] = assert(node.leftChild._nodeId) + recursiveTree2(node.leftChild) + else + self.leftChild[nodeId] = 0 + end + + if node.rightChild then + self.rightChild[nodeId] = assert(node.rightChild._nodeId) + recursiveTree2(node.rightChild) + else + self.rightChild[nodeId] = 0 + end + + -- each node splits the dataset on a feature id-value pair + self.splitFeatureId[nodeId] = assert(node.splitFeatureId) + self.splitFeatureValue[nodeId] = assert(node.splitFeatureValue) + end + + for i,tree in ipairs(df.trees) do + self.rootIds[i] = assert(tree.root._nodeId) + recursiveTree2(tree.root) + end + + assert(self.leftChild:min() >= 0) + assert(self.rightChild:min() >= 0) +end + +-- input is a batchsize x inputsize tensor +function DFD:updateOutput(input) + assert(torch.isTensor(input)) + assert(input:dim() == 2) + input = input:contiguous() + + local batchsize, inputsize = input:size(1), input:size(2) + local size = self.onlyLastNode and self.nTree or self.depth + + -- each sample's output keys is resized to maxdepth, which is the maximum size that it can take on + self.outputkeys = self.outputkeys or torch.LongTensor() + self.outputkeys:resize(batchsize, size) + -- values are 1 + self.outputvalues = self.outputvalues or input.new() + self.outputvalues:resize(batchsize, size):fill(1) + + self.output = input.nn.DFD_computeOutput(self.outputkeys, self.outputvalues, self.rootIds, self.leftChild, self.rightChild, self.splitFeatureId, self.splitFeatureValue, input, self.onlyLastNode) + return self.output +end + +function DFD:type(type, tensorCache) + if type then + local info = self:getReconstructionInfo() + for k, v in pairs(info) do + if torch.type(v) ~= 'torch.LongTensor' then + info[k] = nil + end + end + parent.type(self, type, tensorCache) + self:reconstructFromInfo(info) + return self + else + return parent.type(self) + end +end + +function DFD:updateGradInput() + error"Not Implemented" +end + +function DFD:clearState() + self.output = {{},{}} + self.taskbuffer = {} + self.outputkeys = nil + self.outputvalues = nil + self._range = nil + self._indices = nil + self._mask = nil +end + +function DFD:reconstructFromInfo(DFDinfo) + for k,v in pairs(DFDinfo) do + self[k] = v + end + assert(self.leftChild:nDimension() == 1) + assert(self.rightChild:nDimension() == 1) + assert(self.leftChild:size(1) == self.nNode) + assert(self.rightChild:size(1) == self.nNode) + assert(self.leftChild:min() >= 0) + assert(self.rightChild:min() >= 0) + assert(self.splitFeatureId:nDimension() == 1) + assert(self.splitFeatureValue:nDimension() == 1) + assert(self.splitFeatureId:size(1) == self.splitFeatureValue:size(1)) +end + +function DFD:getReconstructionInfo() + local DFDinfo = { + nNode = self.nNode, + rootIds = self.rootIds, + leftChild = self.leftChild, + rightChild = self.rightChild, + splitFeatureId = self.splitFeatureId, + splitFeatureValue = self.splitFeatureValue, + depth = self.depth + } + return DFDinfo +end diff --git a/contrib/lua-torch/decisiontree/DataSet.lua b/contrib/lua-torch/decisiontree/DataSet.lua new file mode 100644 index 000000000..15058a7c6 --- /dev/null +++ b/contrib/lua-torch/decisiontree/DataSet.lua @@ -0,0 +1,142 @@ +local dt = require "decisiontree._env" + +local DataSet = torch.class("dt.DataSet", dt) + +function DataSet:__init(input, target, nThreads) + if torch.type(input) == 'table' then + assert(torch.isTypeOf(input[1], 'torch.SparseTensor')) + else + assert(torch.isTensor(input)) + end + self.input = input + assert(torch.isTensor(target)) + self.target = target + self.nThreads = nThreads or 1 + + self.sortedFeatureValues, self.featureIds = self:sortFeatureValues(input) +end + +-- group examples by featureId. For each featureId, sort examples by featureValue (ascending order) +-- returns a table mapping featureIds to sorted lists of exampleIds +-- e.g. {featureId={example1,example2,example3}} +function DataSet:sortFeatureValues(inputs) + local isSparse = torch.typename(inputs[1]):match('torch.*SparseTensor') + assert(isSparse or torch.isTensor(inputs)) + + local featureIds = torch.LongTensor() + local dataset = {} -- TODO use tds.Hash (will require SparseTensor to be userdata) + if isSparse then + local proto = inputs[1].values + -- get list of featureIds + local featureMap = {} + for i,input in ipairs(inputs) do + input.keys:apply(function(key) + featureMap[key] = (featureMap[key] or 0) + 1 + end) + end + local _ = require "moses" + featureIds = featureIds.new(_.keys(featureMap)) + local featureCounts = torch.LongTensor(featureIds:size(1)) + for i=1,featureIds:size(1) do + featureCounts[i] = featureMap[featureIds[i]] + end + + for i=1,featureIds:size(1) do + local featureId = featureIds[i] + local featureCount = featureCounts[i] + dataset[featureId] = { + values=proto.new(featureCount), + examples=torch.LongTensor(featureCount), + i=0 + } + end + + for exampleId,input in ipairs(inputs) do + local sparseIdx = 0 + input.keys:apply(function(key) + sparseIdx = sparseIdx + 1 + local f = dataset[key] + f.i = f.i + 1 + f.values[f.i] = input.values[sparseIdx] + f.examples[f.i] = exampleId + end) + end + + local sortVal, sortIdx = proto.new(), torch.LongTensor() + for featureId,f in pairs(dataset) do + assert(f.values:size(1) == f.i) + sortVal:sort(sortIdx, f.values, 1, false) + + local sortedExampleIds = torch.LongTensor(f.i) + sortedExampleIds:index(f.examples, 1, sortIdx) + + dataset[featureId] = sortedExampleIds + end + else + assert(torch.isTensor(inputs)) + featureIds:range(1,inputs:size(2)) + + for i=1,inputs:size(2) do + local featureId = i + local values = inputs:select(2, i) + local _, sortedFeatureExampleIds = values:sort(1, false) + dataset[featureId] = sortedFeatureExampleIds + end + end + + return dataset, featureIds +end + +function DataSet:getSortedFeature(featureId) + assert(self.sortedFeatureValues) + return self.sortedFeatureValues[featureId] +end + +function DataSet:size() + return self.target:size(1) +end + +function DataSet:getExampleIds() + if not self.exampleIds then + self.exampleIds = torch.LongTensor():range(1,self:size()) + end + return self.exampleIds +end + +function DataSet:countPositive(exampleIds) + assert(torch.type(exampleIds) == 'torch.LongTensor') + local dt = require 'decisiontree' + local buffer = dt.getBufferTable('DataSet') + buffer.tensor = buffer.tensor or self.target.new() + buffer.tensor:index(self.target, 1, exampleIds) + local nPositive = 0 + buffer.tensor:apply(function(x) + if x > 0 then nPositive = nPositive + 1 end + end) + return nPositive +end + +function DataSet:initScore() + self.score = self.score or torch.Tensor() + self.score:resize(self:size()):fill(0) +end + +function DataSet:buildIndex() + if torch.type(self.input) == 'table' then + for exampleId,input in ipairs(self.input) do + if torch.isTypeOf(input, 'torch.SparseTensor') then + input:buildIndex() + end + end + end +end + +function DataSet:deleteIndex() + if torch.type(self.input) == 'table' then + for exampleId,input in ipairs(self.input) do + if torch.isTypeOf(input, 'torch.SparseTensor') then + input:deleteIndex() + end + end + end +end diff --git a/contrib/lua-torch/decisiontree/DecisionForest.lua b/contrib/lua-torch/decisiontree/DecisionForest.lua new file mode 100644 index 000000000..cac748e7e --- /dev/null +++ b/contrib/lua-torch/decisiontree/DecisionForest.lua @@ -0,0 +1,82 @@ +local dt = require "decisiontree._env" + +-- Decision forest that ensembles a bag of decision trees. +local DecisionForest = torch.class("dt.DecisionForest", "dt.DecisionTree", dt) + +function DecisionForest:__init(trees, weight, bias) + assert(torch.type(trees) == 'table') + self.trees = trees + if #trees == 0 then + self.weight = weight or torch.Tensor() + assert(torch.isTensor(self.weight)) + assert(self.weight:nElement() == 0) + else + assert(torch.isTypeOf(trees[1], 'dt.DecisionTree')) + self.weight = weight or torch.Tensor(#trees):fill(1) + assert(torch.isTensor(self.weight)) + assert(self.weight:dim() == 1) + assert(self.weight:min() >= 0, "Expecting positive weights") + assert(#trees == self.weight:size(1)) + end + + self.bias = bias or 0 + assert(torch.type(self.bias) == 'number') +end + +function DecisionForest:score(input, incrementalId) + assert(torch.isTensor(input)) + + local buffer = {} + if incrementalId then + self.buffers = self.buffers or {} + self.buffers[incrementalId] = self.buffers[incrementalId] or {} + buffer = self.buffers[incrementalId] + end + buffer.initialCounter = buffer.initialCounter or 0 + + -- TODO: score in parallel + local output + if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then + buffer.output = buffer.output or input.new() + output = buffer.output + assert(output:nElement() == 0 or output:size(1) == input:size(1)) + if output:nElement() == 0 then + output:resize(input:size(1)):fill(self.bias) + end + for i,tree in ipairs(self.trees) do + if i > buffer.initialCounter then + local score = tree:score(input, nil, true) + output:add(self.weight[i], score) + end + end + else + output = buffer.output or self.bias + for i,tree in ipairs(self.trees) do + if i > buffer.initialCounter then + output = output + tree:score(input) * self.weight[i] + end + end + buffer.output = output + end + + buffer.initialCounter = #self.trees + + return output +end + +function DecisionForest:add(tree, weight) + assert(torch.type(weight) == 'number') + assert(weight > 0) + table.insert(self.trees, tree) + self.weight:resize(#self.trees) + self.weight[#self.trees] = weight + return self +end + +function DecisionForest:clone() + local trees = {} + for i, tree in ipairs(self.trees) do + trees[i] = tree:clone() + end + return DecisionForest(trees, self.weight:clone(), self.bias) +end diff --git a/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua b/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua new file mode 100644 index 000000000..fc903678b --- /dev/null +++ b/contrib/lua-torch/decisiontree/DecisionForestTrainer.lua @@ -0,0 +1,22 @@ +local dt = require "decisiontree._env" + +-- Interface for all decisionForestTrainers +local DFT = torch.class("dt.DecisionForestTrainer", dt) + +-- Train a DecisionForest with examples, a table of valid featureIds and a dataset (i.e. sortedExamplesByFeatureId) +function DFT:train(examples, validFeatureIds, dataset) + assert(torch.type(examples) == "table") + assert(torch.isTypeOf(examples[1], "dt.LabeledExample")) + + assert(torch.type(validFeatureIds) == 'table') + + assert(torch.type(dataset) == 'table') + for k,v in pairs(dataset) do + assert(torch.type(v) == 'table') + assert(torch.isTypeOf(v[1], 'dt.LabeledExample')) + break + end + -- dataset is a table mapping featureIds to sorted lists of LabeledExamples + -- e.g. {featureId={example1,example2,example3}} + error"Not Implemented" +end diff --git a/contrib/lua-torch/decisiontree/DecisionTree.lua b/contrib/lua-torch/decisiontree/DecisionTree.lua new file mode 100644 index 000000000..c61bc3757 --- /dev/null +++ b/contrib/lua-torch/decisiontree/DecisionTree.lua @@ -0,0 +1,12 @@ +local dt = require "decisiontree._env" + +-- An interface for decision trees. +local DecisionTree = torch.class("dt.DecisionTree", dt) + +-- Score an input example and return the prediction score. +-- input is a Tensor or SparseTensor +-- return prediction score and nodeId +function DecisionTree:score(input) + error"Not Implemented" + return score, nodeId +end diff --git a/contrib/lua-torch/decisiontree/GBDT_common.h b/contrib/lua-torch/decisiontree/GBDT_common.h new file mode 100644 index 000000000..eb993702d --- /dev/null +++ b/contrib/lua-torch/decisiontree/GBDT_common.h @@ -0,0 +1,106 @@ +#include "khash.h" +#include <pthread.h> + +#define computeGradientBoostLoss(g, h) (-(g)*(g)/(h)) + +// we use khash to make iteration faster than lua tables +KHASH_SET_INIT_INT64(long) + +// defines the data we need for running an instance of thet and its constructor/destructor +typedef struct { + khash_t(long)* exampleMap; + THLongTensor *exampleIdsWithFeature_cache; + long minLeafSize; +} GBRunData; + + +// allocates data that cannot be shared between threads +static void gb_local_create_run_data(GBRunData *run_data) { + run_data->exampleMap = kh_init(long); + run_data->exampleIdsWithFeature_cache = THLongTensor_new(); +} + +static void gb_create_run_data(GBRunData *run_data, int minLeafSize) { + gb_local_create_run_data(run_data); + run_data->minLeafSize = minLeafSize; +} + +static void gb_destroy_run_data(GBRunData *run_data) { + THLongTensor_free(run_data->exampleIdsWithFeature_cache); + kh_destroy(long, run_data->exampleMap); +} + +// initializes the data required by the optimizer for the given feature. +static THLongTensor *gb_internal_prepare(lua_State *L, THLongTensor *exampleIds, + THLongTensor *exampleIdsWithFeature_cache, int input_index, long feature_id, + khash_t(long)* exampleMap) { + long *exampleIds_data = THLongTensor_data(exampleIds); + long exampleIds_size = THLongTensor_size(exampleIds, 0); + + int ret = 0; + + // if the the input is a table, then we have a sparse dataset + if (lua_istable(L, input_index)) { + if (exampleIds_size == 0) { + return NULL; + } + else { + // loops over the examples' ids that this node has to evaluate and, if they have the feature + // we're looking for, marks them as present and stores them in the order provided by the + // dataset + THLongTensor_resize1d(exampleIdsWithFeature_cache, exampleIds_size); + kh_clear(long, exampleMap); + kh_resize(long, exampleMap, exampleIds_size*8); + long *exampleIdsWithFeature_data = THLongTensor_data(exampleIdsWithFeature_cache); + long j = 0; + // for each sample to be evaluated + for (long i = 0; i < exampleIds_size; i++) { + // gets the representation for the example + lua_pushinteger(L, exampleIds_data[i]); + lua_gettable(L, input_index); + + // builds the index, which happens only once per thread for efficiency + lua_pushstring(L, "buildIndex"); + lua_gettable(L, -2); + lua_pushvalue(L, -2); + lua_call(L, 1, 0); + + // tries to get the feature for this sample + lua_pushinteger(L, feature_id); + lua_gettable(L, -2); + // if present, then... + if (!lua_isnil(L, -1)) { + // saves the example + exampleIdsWithFeature_data[j] = exampleIds_data[i]; + j++; + + // marks it as present in the hash table + kh_put(long, exampleMap, exampleIds_data[i], &ret); + } + + lua_pop(L, 2); + } + + // resizes to fit only the samples that have the feature + THLongTensor_resize1d(exampleIdsWithFeature_cache, j); + kh_resize(long, exampleMap, j*8); + return exampleIdsWithFeature_cache; + } + } + else { + // if the input isn't a table, then it's dense and we cannot have exampleIds missing, so it + // depends on feature_id + // since exampleIds is fixed between calls and this is going to store the same values to the + // same position, we can cache it between calls + if (kh_size(exampleMap) == 0) { + kh_resize(long, exampleMap, exampleIds_size*8); + for (long i = 0; i < exampleIds_size; i++) { + kh_put(long, exampleMap, exampleIds_data[i], &ret); + } + } + // notice that we just return the given tensor of ids instead of copying it. the rest of the + // code handles this transparently + return exampleIds; + } +} + diff --git a/contrib/lua-torch/decisiontree/GiniState.lua b/contrib/lua-torch/decisiontree/GiniState.lua new file mode 100644 index 000000000..6dfed2845 --- /dev/null +++ b/contrib/lua-torch/decisiontree/GiniState.lua @@ -0,0 +1,54 @@ +local dt = require 'decisiontree._env' + +-- used by RandomForestTrainer +local GiniState, parent = torch.class("dt.GiniState", "dt.TreeState", dt) + +function GiniState:__init(exampleIds) + parent.__init(self, exampleIds) + self.nPositiveInLeftBranch = 0 + self.nPositiveInRightBranch = 0 +end + +function GiniState:score(dataset) + local dt = require 'decisiontree' + local nPositive = dataset:countPositive(self.exampleIds) + return dt.calculateLogitScore(nPositive, self.exampleIds:size(1)) +end + +function GiniState:initialize(exampleIdsWithFeature, dataset) + assert(torch.type(exampleIdsWithFeature) == 'torch.LongTensor') + assert(torch.isTypeOf(dataset, 'dt.DataSet')) + self.nPositiveInLeftBranch = dataset:countPositive(exampleIdsWithFeature) + self.nPositiveInRightBranch = 0 + + self.nExampleInLeftBranch = exampleIdsWithFeature:size(1) + self.nExampleInRightBranch = 0 +end + +function GiniState:update(exampleId, dataset) + assert(torch.type(exampleId) == 'number') + assert(torch.isTypeOf(dataset, 'dt.DataSet')) + if dataset.target[exampleId] > 0 then + self.nPositiveInLeftBranch = self.nPositiveInLeftBranch - 1 + self.nPositiveInRightBranch = self.nPositiveInRightBranch + 1 + end + + self.nExampleInLeftBranch = self.nExampleInLeftBranch - 1 + self.nExampleInRightBranch = self.nExampleInRightBranch + 1 +end + +function GiniState:computeSplitInfo(splitFeatureId, splitFeatureValue) + local dt = require 'decisiontree' + local gini = dt.computeGini(self.nExampleInLeftBranch, self.nPositiveInLeftBranch, self.nExampleInRightBranch, self.nPositiveInRightBranch) + local splitInfo = { + splitId = assert(splitFeatureId), + splitValue = assert(splitFeatureValue), + leftChildSize = assert(self.nExampleInLeftBranch), + leftPositiveCount = assert(self.nPositiveInLeftBranch), + rightChildSize = assert(self.nExampleInRightBranch), + rightPositiveCount = assert(self.nPositiveInRightBranch), + gini = assert(gini), + splitGain = gini + } + return splitInfo +end
\ No newline at end of file diff --git a/contrib/lua-torch/decisiontree/GradientBoostState.lua b/contrib/lua-torch/decisiontree/GradientBoostState.lua new file mode 100644 index 000000000..f268f3da8 --- /dev/null +++ b/contrib/lua-torch/decisiontree/GradientBoostState.lua @@ -0,0 +1,57 @@ +local dt = require 'decisiontree._env' + +local GradientBoostState, parent = torch.class("dt.GradientBoostState", "dt.TreeState", dt) + +function GradientBoostState:__init(exampleIds, gradInput, hessInput) + parent.__init(self, exampleIds) + self.gradInput = gradInput + self.hessInput = hessInput +end + +function GradientBoostState:score(dataset) + local dt = require 'decisiontree' + local gradInput = self.gradInput:index(1, self.exampleIds) + local hessInput = self.hessInput:index(1, self.exampleIds) + return dt.computeNewtonScore(gradInput:sum(), hessInput:sum()) +end + +-- calls _branch and encapsulates the left and right exampleIds into a TreeStates +function GradientBoostState:branch(splitInfo, dataset) + local leftExampleIds, rightExampleIds = self:_branch(splitInfo, dataset) + return self.new(leftExampleIds, self.gradInput, self.hessInput), self.new(rightExampleIds, self.gradInput, self.hessInput) +end + +-- Partitions self given a splitInfo table, producing a pair of exampleIds corresponding to the left and right subtrees. +function GradientBoostState:_branch(splitInfo, dataset) + local input = dataset.input + -- if the input is dense, we can use the optimized version + if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then + return input.nn.GBDT_branch(splitInfo, input, self.exampleIds) + end + return parent._branch(self, splitInfo, dataset) +end + +-- The following methods are supersets of each other. You can comment out them to re-use the lua +-- version with just the provided core optimized + +-- THIS ONE CANNOT BE COMMENTED OUT +function GradientBoostState:findBestFeatureSplit(dataset, featureId, minLeafSize) + local ret = self.hessInput.nn.GBDT_findBestFeatureSplit(self.exampleIds, dataset, featureId, minLeafSize, self.gradInput, self.hessInput) + return ret +end + +-- finds the best split of examples in treeState among featureIds +function GradientBoostState:findBestSplit(dataset, featureIds, minLeafSize, shardId, nShard) + local ret = self.hessInput.nn.GBDT_findBestSplit(self.exampleIds, dataset, featureIds, minLeafSize, shardId, nShard, self.gradInput, self.hessInput) + return ret +end + +-- finds the best split like the previous one, but performs feature parallelism. Note that the +-- optimization is only applied if the input is dense +function GradientBoostState:findBestSplitFP(dataset, featureIds, minLeafSize, nThread) + local input = dataset.input + if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then + local ret = self.hessInput.nn.GBDT_findBestSplitFP(self.exampleIds, dataset, featureIds, minLeafSize, self.gradInput, self.hessInput, nThread) + return ret + end +end diff --git a/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua b/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua new file mode 100644 index 000000000..51299b109 --- /dev/null +++ b/contrib/lua-torch/decisiontree/GradientBoostTrainer.lua @@ -0,0 +1,244 @@ +local dt = require "decisiontree._env" + +-- Gradient boosted decision tree trainer +local GradientBoostTrainer = torch.class("dt.GradientBoostTrainer", "dt.DecisionForestTrainer", dt) + +function GradientBoostTrainer:__init(opt) + assert(torch.type(opt) == 'table') + + assert(torch.isTypeOf(opt.treeTrainer, 'dt.CartTrainer')) + self.treeTrainer = opt.treeTrainer + + assert(torch.isTypeOf(opt.lossFunction, 'nn.Criterion')) + self.lossFunction = opt.lossFunction + + assert(torch.type(opt.shrinkage) == 'number') + assert(opt.shrinkage > 0) + self.shrinkage = opt.shrinkage + + assert(torch.type(opt.downsampleRatio) == 'number') + assert(opt.downsampleRatio > 0) + self.downsampleRatio = opt.downsampleRatio + + assert(torch.type(opt.nTree) == 'number') + assert(opt.nTree > 0) + self.nTree = opt.nTree + + evalFreq = evalFreq or -1 + assert(torch.type(opt.evalFreq) == 'number') + assert(torch.round(opt.evalFreq) == opt.evalFreq) + self.evalFreq = opt.evalFreq + + -- when non-positive, no early-stopping + earlyStop = earlyStop or (evalFreq-1) + assert(torch.type(opt.earlyStop) == 'number') + self.earlyStop = opt.earlyStop + + -- when non-positive, defaults to sqrt(#feature) + assert(torch.type(opt.featureBaggingSize) == 'number') + self.featureBaggingSize = opt.featureBaggingSize + + if opt.decisionForest then + assert(torch.isTypeOf(opt.decisionForest, 'dt.DecisionForest')) + end + self.decisionForest = opt.decisionForest + + self.useInitBias = opt.useInitBias +end + +function GradientBoostTrainer:computeBias(trainSet, verbose) + assert(torch.isTypeOf(trainSet, 'dt.DataSet')) + + if verbose then print("Use new bias generated from the training examples.") end + + return -0.5 * self.gradInput:sum() / self.hessInput:sum() +end + + +function GradientBoostTrainer:initialize(trainSet, verbose) + assert(torch.isTypeOf(trainSet, 'dt.DataSet')) + + trainSet:initScore() + self.gradInput, self.hessInput = self.lossFunction:backward2(trainSet.score, trainSet.target) + + -- used for early-stopping (see validate()) + self.stopCount = 0 + self.prevTrainLoss = math.huge + self.prevTestLoss = math.huge + + if verbose then print("Processing initial decision forest") end + + local decisionForest, bias + + if self.decisionForest then + local bias = self.useInitBias and self.decisionForest.bias or self:computeBias(trainSet, verbose) + + decisionForest = dt.DecisionForest(self.decisionForest.trees, self.decisionForest.weight, bias) + + local input = trainSet.input + if torch.isTensor(input) and input.isContiguous and input:isContiguous() then + score = decisionForest:score(input) + else + score:resize(trainSet:size()) + for exampleId=1,trainSet:size() do + score[exampleId] = decisionForest:score(input[exampleId]) + end + end + else + local bias = self:computeBias(trainSet, verbose) + decisionForest = dt.DecisionForest({}, torch.Tensor(), bias) + + trainSet.score:fill(bias) + end + + if verbose then print("Finish loading initial decision forest") end + + return decisionForest +end + +-- Trains a decision forest of boosted decision trees. +-- examples are the training examples. validExamples are used for cross-validation. +function GradientBoostTrainer:train(trainSet, featureIds, validSet, verbose) + assert(torch.isTypeOf(trainSet, 'dt.DataSet')) + assert(torch.type(featureIds) == 'torch.LongTensor') + assert(torch.isTypeOf(validSet, 'dt.DataSet')) + + local decisionForest = self:initialize(trainSet, verbose) + local bestDecisionForest + + if verbose then print(string.format("Get %d featureIds.", featureIds:size(1))) end + + local baggingSize = self.featureBaggingSize > 0 and self.featureBaggingSize or torch.round(math.sqrt(featureIds:size(1))) + local trainExampleIds = trainSet:getExampleIds() + local baggingIndices, activeFeatures + local treeExampleIds + + local timer = torch.Timer() + + for treeId = 1,self.nTree do + timer:reset() + if verbose then print(string.format("Begin processing tree number %d of %d", treeId, self.nTree)) end + + -- Get active features + activeFeatures = activeFeatures or torch.LongTensor() + if baggingSize < featureIds:size(1) then + if verbose then print(string.format("Tree %d: Bagging %d from %d features", treeId, baggingSize, featureIds:size(1))) end + + baggingIndices = baggingIndices or torch.LongTensor() + baggingIndices:randperm(featureIds:size(1)) + activeFeatures:index(featureIds, 1, baggingIndices:narrow(1,1,baggingSize)) + else + activeFeatures = featureIds + end + + -- Get data samples + if self.downsampleRatio < 0.99 then + local sampleSize = torch.round(trainSet:size() * self.downsampleRatio) + + if verbose then print(string.format("Tree %d: Downsampling %d of %d samples", treeId, sampleSize, trainSet:size())) end + + baggingIndices = baggingIndices or torch.LongTensor() + baggingIndices:randperm(trainSet:size()) + + treeExampleIds = treeExampleIds or torch.LongTensor() + treeExampleIds:index(trainExampleIds, 1, baggingIndices:narrow(1,1,sampleSize)) + else + treeExampleIds = trainExampleIds + end + + if verbose then print(string.format("Tree %d: training CART tree", treeId)) end + + local rootTreeState = dt.GradientBoostState(treeExampleIds, self.gradInput, self.hessInput) + local cartTree = self.treeTrainer:train(rootTreeState, activeFeatures) + + if verbose then print(string.format("Tree %d: finished training CART tree in %f seconds", treeId, timer:time().real)) end + + decisionForest:add(cartTree, self.shrinkage) + + -- update score + local predictionScore + local input = trainSet.input + if torch.isTensor(input) and input:isContiguous() then + predictionScore = cartTree:score(trainSet.input, nil, true) + else + local size = trainSet:size() + predictionScore = torch.Tensor(size) + for exampleId=1,size do + predictionScore[exampleId] = cartTree:score(trainSet.input[exampleId]) + end + end + trainSet.score:add(self.shrinkage, predictionScore) + self.gradInput, self.hessInput = self.lossFunction:backward2(trainSet.score, trainSet.target) + + if verbose then print(string.format("Tree %d: training complete in %f seconds", treeId, timer:time().real)) end + + -- cross-validation/early-stopping + if self.evalFreq > 0 and treeId % self.evalFreq == 0 then + timer:reset() + local stop, validLoss, bestDecisionForest = self:validate(trainSet, validSet, decisionForest, bestDecisionForest) + if dt.PROFILE then print("validate tree time: "..timer:time().real) end + if verbose then print(string.format("Loss: train=%7.4f, valid=%7.4f", 0, validLoss)) end + if stop then + if verbose then print(string.format("GBDT early stopped on tree %d", treeId)) end + break + end + + end + end + + return bestDecisionForest or decisionForest +end + +function dt.GradientBoostTrainer:validate(trainSet, validSet, decisionForest, bestDecisionForest) + assert(torch.isTypeOf(trainSet, 'dt.DataSet')) + assert(torch.isTypeOf(validSet, 'dt.DataSet')) + assert(torch.isTypeOf(decisionForest, 'dt.DecisionForest')) + assert(not bestDecisionForest or torch.isTypeOf(decisionForest, 'dt.DecisionForest')) + + -- buffer + local buffer = dt.getBufferTable('GradientBoost') + buffer.tensor = buffer.tensor or trainSet.score.new() + local score = buffer.tensor + + -- per thread loss function (tensors are shared) + local lossname = torch.typename(self.lossFunction) + buffer[lossname] = buffer[lossname] or self.lossFunction:clone() + local lossFunction = buffer[lossname] + + -- TODO batch this for large datasets + local input = validSet.input + if torch.isTensor(input) and input.isContiguous and input:isContiguous() then + score = decisionForest:score(input, 'val') + else + score:resize(validSet:size()) + for exampleId=1,validSet:size() do + score[exampleId] = decisionForest:score(input[exampleId], 'val') + end + end + local validLoss = lossFunction:forward(score, validSet.target) + + -- early stop is not enabled when earlyStop=0 + local stop = false + if self.earlyStop > 0 then + -- Track test loss and detect early stop + if self.prevTestLoss - validLoss < 0 then + self.stopCount = self.stopCount + 1 + else + bestDecisionForest = decisionForest:clone() + self.stopCount = 0 + end + + stop = self.stopCount >= self.earlyStop + end + + self.prevTestLoss = validLoss + + return stop, validLoss, bestDecisionForest +end + +function GradientBoostTrainer:getName() + return string.format( + "gbdt-dRatio-%s-maxLeaf-%s-minExample-%s-nTree-%s-shrinkage-%s", + self.downsampleRatio, self.maxLeafNodes, self.minLeafSize, self.nTree, self.shrinkage + ) +end diff --git a/contrib/lua-torch/decisiontree/LICENSE b/contrib/lua-torch/decisiontree/LICENSE new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/contrib/lua-torch/decisiontree/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua b/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua new file mode 100644 index 000000000..5b9eb6028 --- /dev/null +++ b/contrib/lua-torch/decisiontree/LogitBoostCriterion.lua @@ -0,0 +1,45 @@ +local dt = require "decisiontree._env" + +-- Ref: slide 17 in https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf + +-- equivalent to nn.Sigmoid() + nn.BCECriterion() +local LogitBoostCriterion, parent = torch.class("nn.LogitBoostCriterion", "nn.Criterion") + +function LogitBoostCriterion:__init(sizeAverage) + parent.__init(self) + self.sizeAverage = sizeAverage + self.hessInput = self.gradInput.new() + self._output = torch.Tensor() +end + +function LogitBoostCriterion:updateOutput(input, target) + input.nn.LogitBoostCriterion_updateOutput(input, target, self._output, self.sizeAverage) + self.output = self._output[1] + return self.output +end + +function LogitBoostCriterion:updateGradInput(input, target) + input.nn.LogitBoostCriterion_updateGradInput(input, target, self.gradInput) + return self.gradInput +end + +function LogitBoostCriterion:updateHessInput(input, target) + input.nn.LogitBoostCriterion_updateHessInput(input, target, self.hessInput) + return self.hessInput +end + +-- returns gradInput and hessInput +function LogitBoostCriterion:backward2(input, target) + return self:updateGradInput(input, target), self:updateHessInput(input, target) +end + +local gradWrapper = function(input, target, grad) + input.nn.LogitBoostCriterion_updateGradInput(input, target, grad) +end +local hessianWrapper = function(input, target, hessian) + input.nn.LogitBoostCriterion_updateHessInput(input, target, hessian) +end + +function LogitBoostCriterion:getWrappers() + return gradWrapper, hessianWrapper +end diff --git a/contrib/lua-torch/decisiontree/MSECriterion.lua b/contrib/lua-torch/decisiontree/MSECriterion.lua new file mode 100644 index 000000000..948c1a17e --- /dev/null +++ b/contrib/lua-torch/decisiontree/MSECriterion.lua @@ -0,0 +1,13 @@ +local dt = require "decisiontree._env" + +function nn.MSECriterion.updateHessianInput(self, input, target) + self.hessInput = self.hessInput or input.new() + self.hessInput:resize(input:size()):fill(2) + return self.hessInput +end + +-- returns gradInput and hessInput +function nn.MSECriterion.backward2(self, input, target) + return self:updateGradInput(input, target), self:updateHessInput(input, target) +end + diff --git a/contrib/lua-torch/decisiontree/README.md b/contrib/lua-torch/decisiontree/README.md new file mode 100644 index 000000000..db4622add --- /dev/null +++ b/contrib/lua-torch/decisiontree/README.md @@ -0,0 +1,386 @@ +# Torch decision tree library + +```lua +local dt = require 'decisiontree' +``` + +This project implements random forests and gradient boosted decision trees (GBDT). +The latter uses gradient tree boosting. +Both use ensemble learning to produce ensembles of decision trees (that is, forests). + +## `nn.DFD` + +One practical application for decision forests is to *discretize* an input feature space into a richer output feature space. +The `nn.DFD` Module can be used as a decision forest discretizer (DFD): + +```lua +local dfd = nn.DFD(df, onlyLastNode) +``` + +where `df` is a `dt.DecisionForest` instance or the table returned by the method `getReconstructionInfo()` on another `nn.DFD` module, and `onlyLastNode` is a boolean that indicates that module should return only the id of the last node visited on each tree (by default it outputs all traversed nodes except for the roots). +The `nn.DFD` module requires dense `input` tensors. +Sparse `input` tensors (tables of tensors) are not supported. +The `output` returned by a call to `updateOutput` is a batch of sparse tensors. +This `output` where `output[1]` and `output[2]` are a respectively a list of key and value tensors: + +```lua +{ + { [torch.LongTensor], ... , [torch.LongTensor] }, + { [torch.Tensor], ... , [torch.Tensor] } +} +``` + +This module doesn't support CUDA. + +### Example +As a concrete example, let us first train a Random Forest on a dummy dense dataset: + +```lua +local nExample = 100 +local batchsize = 2 +local inputsize = 10 + +-- train Random Forest +local trainSet = dt.getDenseDummyData(nExample, nil, inputsize) +local opt = { + activeRatio=0.5, + featureBaggingSize=5, + nTree=4, + maxLeafNodes=nExample/2, + minLeafSize=nExample/10, +} +local trainer = dt.RandomForestTrainer(opt) +local df = trainer:train(trainSet, trainSet.featureIds) +mytester:assert(#df.trees == opt.nTree) +``` + +Now that we have `df`, a `dt.DecisionForest` instance, we can use it to initialize `nn.DFD`: + +```lua +local dfd = nn.DFD(df) +``` + +The `dfd` instance holds no reference to `df`, instead it extracts the relevant attributes from `df`. +These attributes are stored in tensors for batching and efficiency. + +We can discretize a hypothetical `input` by calling `forward`: +```lua +local input = trainSet.input:sub(1,batchsize) +local output = dfd:forward(input) +``` + +The resulting output is a table consisting of two tables: keys and values. +The keys and values tables each contains `batchsize` tensors: + +```lua +print(output) +{ + 1 : + { + 1 : LongTensor - size: 14 + 2 : LongTensor - size: 16 + 3 : LongTensor - size: 15 + 4 : LongTensor - size: 13 + } + 2 : + { + 1 : DoubleTensor - size: 14 + 2 : DoubleTensor - size: 16 + 3 : DoubleTensor - size: 15 + 4 : DoubleTensor - size: 13 + } +} +``` + +An example's feature keys (`LongTensor`) and commensurate values (`DoubleTensor`) have the same number of elements. +The examples have variable number of key-value pairs representing the nodes traversed in the tree. +The output feature space has as many dimensions (that is, possible feature keys) for each node in the forest. + +## `torch.SparseTensor` + +Suppose you have a set of `keys` mapped to `values`: +```lua +local keys = torch.LongTensor{1,3,4,7,2} +local values = torch.Tensor{0.1,0.3,0.4,0.7,0.2} +``` + +You can use a `SparseTensor` to encapsulate these into a read-only tensor: + +```lua +local st = torch.SparseTensor(input, target) +``` + +The _decisiontree_ library uses `SparseTensors` to simulate the `__index` method of the `torch.Tensor`. +For example, one can obtain the value associated to key 3 of the above `st` instance: + +```lua +local value = st[3] +assert(value == 0.3) +``` + +When the key,value pair are missing, `nil` is returned instead: + +```lua +local value = st[2] +assert(value == nil) +``` + +The best implementation for this kind of indexing is slow (it uses a sequential scan of the `keys). +To speedup indexing, one can call the `buildIndex()` method before hand: + +```lua +st:buildIndex() +``` + +The `buildIndex()` creates a hash map (a Lua table) of keys to their commensurate indices in the `values` table. + +## `dt.DataSet` + +The `CartTrainer`, `RandomForestTrainer` and `GradientBoostTrainer` require that data sets be encapsulated into a `DataSet`. +Suppose you have a dataset of dense inputs and targets: + +```lua +local nExample = 10 +local nFeature = 5 +local input = torch.randn(nExample, nFeature) +local target = torch.Tensor(nExample):random(0,1) +``` + +these can be encapsulated into a `DataSet` object: + +```lua +local dataset = dt.DataSet(input, target) +``` + +Now suppose you have a dataset where the `input` is a table of `SparseTensor` instances: + +```lua +local input = {} +for i=1,nExample do + local nKeyVal = math.random(2,nFeature) + local keys = torch.LongTensor(nKeyVal):random(1,nFeature) + local values = torch.randn(nKeyVal) + input[i] = torch.SparseTensor(keys, values) +end +``` + +You can still use a `DataSet` to encapsulate the sparse dataset: + +```lua +local dataset = dt.DataSet(input, target) +``` + +The main purpose of the `DataSet` class is to sort each feature by value. +This is captured by the `sortFeatureValues(input)` method, which is called in the constructor: + +```lua +local sortedFeatureValues, featureIds = self:sortFeatureValues(input) +``` + +The `featureIds` is a `torch.LongTensor` of all available feature IDs. +For a dense `input` tensor, this is just `torch.LongTensor():range(1,input:size(2))`. +But for a sparse `input` tensor, the `featureIds` tensor only contains the feature IDs present in the dataset. + +The resulting `sortedFeatureValues` is a table mapping `featureIds` to `exampleIds` sorted by `featureValues`. +For each `featureId`, examples are sorted by `featureValue` in ascending order. +For example, the table might look like: `{featureId=exampleIds}` where `examplesIds={1,3,2}`. + +The `CartTrainer` accesses the `sortedFeatureValues` tensor by calling `getSortedFeature(featureId)`: + +```lua +local exampleIdsWithFeature = dataset:getSortedFeature(featureId) +``` + +The ability to access examples IDs sorted by feature value, given a feature ID, is the main purpose of the `DataSet`. +The `CartTrainer` relies on these sorted lists to find the best way to split a set of examples between two tree nodes. + +## `dt.CartTrainer` + +```lua +local trainer = dt.CartTrainer(dataset, minLeafSize, maxLeafNodes) +``` + +The `CartTrainer` is used by the `RandomForestTrainer` and `GradientBoostTrainer` to train individual trees. +CART stands for classification and regression trees. +However, only binary classifiers are unit tested. + +The constructor takes the following arguments: + + * `dataset` is a `dt.DataSet` instance representing the training set. + * `minLeafSize` is the minimum examples per leaf node in a tree. The larger the value, the more regularization. + * `maxLeafNodes` is the maximum nodes in the tree. The lower the value, the more regularization. + +Training is initiated by calling the `train()` method: + +```lua +local trainSet = dt.DataSet(input, target) +local rootTreeState = dt.GiniState(trainSet:getExampleIds()) +local activeFeatures = trainSet.featureIds +local tree = trainer:train(rootTreeState, activeFeatures) +``` + +The resulting `tree` is a `CartTree` instance. +The `rootTreeState` is a `TreeState` instance like `GiniState` (used by `RandomForestTrainer`) or `GradientBoostState` (used by `GradientBoostTrainer`). +The `activeFeatures` is a `LongTensor` of feature IDs that used to build the tree. +Every other feature ID is ignored during training. This is useful for feature bagging. + +By default the `CartTrainer` runs in a single-thread. +The `featureParallel(nThread)` method can be called before calling `train()` to parallelize training using `nThread` workers: + +```lua +local nThread = 3 +trainer:featureParallel(nThread) +trainer:train(rootTreeState, activeFeatures) +``` + +Feature parallelization assigns a set of features IDs to each thread. + +The `CartTrainer` can be used as a stand-alone tree trainer. +But it is recommended to use it within the context of a `RandomForestTrainer` or `GradientBoostTrainer` instead. +The latter typically generalize better. + +## RandomForestTrainer + +The `RandomForestTrainer` is used to train a random forest: + +```lua +local nExample = trainSet:size() +local opt = { + activeRatio=0.5, + featureBaggingSize=5, + nTree=14, + maxLeafNodes=nExample/2, + minLeafSize=nExample/10, +} +local trainer = dt.RandomForestTrainer(opt) +local forest = trainer:train(trainSet, trainSet.featureIds) +``` + +The returned `forest` is a `DecisionForest` instance. +A `DecisionForest` has a similar interface to the `CartTree`. +Indeed, they both sub-class the `DecisionTree` abstract class. + +The constructor takes a single `opt` table argument, which contains the actual arguments: + + * `activeRatio` is the ratio of active examples per tree. This is used for boostrap sampling. + * `featureBaggingSize` is the number of features per tree. This is also used fpr feature bagging. + * `nTree` is the number of trees to be trained. + * `maxLeafNodes` and `minLeafSize` are passed to the underlying `CartTrainer` constructor (controls regularization). + +Internally, the `RandomForestTrainer` passes a `GiniBoostState` to the `CartTrainer:train()` method. + +Training can be parallelized by calling `treeParallel(nThread)`: + +```lua +local nThread = 3 +trainer:treeParallel(nThread) +local forest = trainer:train(trainSet, trainSet.featureIds) +``` + +Training then parallelizes by training each tree in its own thread worker. + +## GradientBoostTrainer + +References: + * A. [Boosted Tree presentation](https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf) + +Graient boosted decision trees (GBDT) can be trained as follows: +```lua +local nExample = trainSet:size() +local maxLeafNode, minLeafSize = nExample/2, nExample/10 +local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNode) + +local opt = { + lossFunction=nn.LogitBoostCriterion(false), + treeTrainer=cartTrainer, + shrinkage=0.1, + downsampleRatio=0.8, + featureBaggingSize=-1, + nTree=14, + evalFreq=8, + earlyStop=0 +} + +local trainer = dt.GradientBoostTrainer(opt) +local forest = trainer:train(trainSet, trainSet.featureIds, validSet) +``` + +The above code snippet uses the `LogitBoostCriterion` outlined in reference A. +It is used for training binary classification trees. + +The returned `forest` is a `DecisionForest` instance. +A `DecisionForest` has a similar interface to the `CartTree`. +Indeed, they both sub-class the `DecisionTree` abstract class. + +The constructor takes a single `opt` table argument, which contains the actual arguments: + + * `lossFunction` is a `nn.Criterion` instance extended to include the `updateHessInput(input, target)` and `backward2(input, target)`. These return the hessian of the `input`. + * `treeTrainer` is a `CartTrainer` instance. Its `featureParallel()` method can be called to implement feature parallelization. + * `shrinkage` is the weight of each additional tree. + * `downsampleRatio` is the ratio of examples to be sampled for each tree. Used for bootstrap sampling. + * `featureBaggingSize` is the number of features to sample per tree. Used for feature bagging. `-1` defaults to `torch.round(math.sqrt(featureIds:size(1)))` + * `nTree` is the maximum number of trees. + * `evalFreq` is the number of epochs between calls to `validate()` for cross-validation and early-stopping. + * `earlyStop` is the maximum number of epochs to wait for early-stopping. + +Internally, the `GradientBoostTrainer` passes a `GradientBoostState` to the `CartTrainer:train()` method. + +## TreeState + +An abstract class that holds the state of a subtree during decision tree training. +It also manages the state of candidate splits. + +```lua +local treeState = dt.TreeState(exampleIds) +``` + +The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree. + +## GiniState + +A `TreeState` subclass used internally by the `RandomForestTrainer`. +Uses Gini impurity to determine how to split trees. + +```lua +local treeState = dt.GiniState(exampleIds) +``` + +The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree. + +## GradientBoostState + +A `TreeState` subclass used internally by the `GradientBoostTrainer`. +It implements the GBDT spliting algorithm, which uses a loss function. + +```lua +local treeState = dt.GradientBoostState(exampleIds, lossFunction) +``` + +The `exampleIds` argument is a `LongTensor` containing the example IDs that make up the sub-tree. +The `lossFunction` is an `nn.Criterion` instance (see `GradientBoostTrainer`). + + +## WorkPool + +Utility class that simplifies construction of a pool of daemon threads with which to execute tasks in parallel. + +```lua +local workpool = dt.WorkPool(nThread) +``` + +## CartTree + +Implements a trained CART decision tree: + +```lua +local tree = nn.CartTree(rootNode) +``` + +The `rootNode` is a `CartNode` instance. +Each `CartNode` contains pointers to left and right branches, which are themselves `CartNode` instances. + +For inference, use the `score(input)` method: + +```lua +local score = tree:score(input) +``` diff --git a/contrib/lua-torch/decisiontree/RandomForestTrainer.lua b/contrib/lua-torch/decisiontree/RandomForestTrainer.lua new file mode 100644 index 000000000..41040b25b --- /dev/null +++ b/contrib/lua-torch/decisiontree/RandomForestTrainer.lua @@ -0,0 +1,159 @@ +local dt = require "decisiontree._env" + +local RandomForestTrainer = torch.class("dt.RandomForestTrainer", dt) + +function RandomForestTrainer:__init(opt) + assert(torch.type(opt.nTree) == 'number') + assert(opt.nTree > 0) + self.nTree = opt.nTree + -- max number of leaf nodes per tree + assert(torch.type(opt.maxLeafNodes) == 'number') + assert(opt.maxLeafNodes > 0) + self.maxLeafNodes = opt.maxLeafNodes + -- min number of examples per leaf + assert(torch.type(opt.minLeafSize) == 'number') + assert(opt.minLeafSize > 0) + self.minLeafSize = opt.minLeafSize + + -- when non-positive, defaults to sqrt(#feature) + assert(torch.type(opt.featureBaggingSize) == 'number') + self.featureBaggingSize = opt.featureBaggingSize + + assert(torch.type(opt.activeRatio) == 'number') + assert(opt.activeRatio > 0) + self.activeRatio = opt.activeRatio + + -- default parallelization is singlethread + self.parallelMode = 'singlethread' +end + +-- Train a DecisionForest +function RandomForestTrainer:train(trainSet, featureIds, verbose) + assert(torch.isTypeOf(trainSet, 'dt.DataSet')) + assert(torch.type(featureIds) == 'torch.LongTensor') + + if verbose then print(string.format("Begin training Decision Forest with %d trees", self.nTree)) end + + local weight = torch.Tensor(self.nTree):fill(1 / self.nTree) -- RF uses uniform weights + + local trees + if self.parallelMode == 'singlethread' then + trees = self:trainTrees(trainSet, featureIds, verbose) + elseif self.parallelMode == 'treeparallel' then + trainSet:deleteIndex() -- prevents serialization bottleneck + trees = self:trainTreesTP(trainSet, featureIds, verbose) + else + error("Unrecognized parallel mode: " .. self.parallelMode) + end + + if verbose then print(string.format("Successfully trained %d trees", #trees)) end + + -- set bias + local bias = 0; + for i, tree in ipairs(trees) do + bias = bias + tree.root.score * weight[i] + end + + return dt.DecisionForest(trees, weight, bias) +end + +function RandomForestTrainer:trainTrees(trainSet, featureIds, verbose) + + -- the same CartTrainer will be used for each tree + local cartTrainer = dt.CartTrainer(trainSet, self.minLeafSize, self.maxLeafNodes) + + local trees = {} + for treeId=1,self.nTree do + -- Train a CartTree + local tree = self.trainTree(cartTrainer, featureIds, self.featureBaggingSize, self.activeRatio, treeId, verbose) + table.insert(trees, tree) + end + return trees +end + +-- static function that returns a cartTree +function RandomForestTrainer.trainTree(cartTrainer, featureIds, baggingSize, activeRatio, treeId, verbose) + assert(torch.isTypeOf(cartTrainer, 'dt.CartTrainer')) + assert(torch.type(featureIds) == 'torch.LongTensor') + local baggingSize = baggingSize > 0 and baggingSize or torch.round(math.sqrt(featureIds:size(1))) + + if verbose then + print(string.format("Tree %d: Creating features bootstrap sample with baggingSize %d, nFeatures %d", treeId, baggingSize, featureIds:size(1))) + end + + local trainSet = cartTrainer.dataset + + -- sample boot strap features + local baggingIndices = torch.LongTensor(baggingSize):random(1,featureIds:size(1)) + local activeFeatures = featureIds:index(1, baggingIndices) + + -- sample boot strap examples + local sampleSize = torch.round(trainSet:size() * activeRatio) + if verbose then print(string.format("Creating bootstrap sample created of size %d", sampleSize)) end + + baggingIndices:resize(sampleSize):random(1,trainSet:size()) + local bootStrapExampleIds = torch.LongTensor() + bootStrapExampleIds:index(trainSet:getExampleIds(), 1, baggingIndices) + + local cartTree = cartTrainer:train(dt.GiniState(bootStrapExampleIds), activeFeatures) + + if verbose then print(string.format("Complete processing tree number %d", treeId)) end + + return cartTree +end + +-- call before training to enable tree-level parallelization +function RandomForestTrainer:treeParallel(workPool) + assert(self.parallelMode == 'singlethread', self.parallelMode) + self.parallelMode = 'treeparallel' + self.workPool = torch.type(workPool) == 'number' and dt.WorkPool(workPool) or workPool + assert(torch.isTypeOf(self.workPool, 'dt.WorkPool')) + + -- require the dt package + self.workPool:update('require', {libname='decisiontree',varname='dt'}) +end + +-- TP is for tree parallel (not toilet paper) +function RandomForestTrainer:trainTreesTP(trainSet, featureIds, verbose) + assert(torch.isTypeOf(trainSet, 'dt.DataSet')) + assert(torch.type(featureIds) == 'torch.LongTensor') + local minLeafSize = self.minLeafSize + local maxLeafNodes = self.maxLeafNodes + + -- setup worker store (each worker will have its own cartTrainer) + self.workPool:updateup('execute', function(store) + local dt = require 'decisiontree' + + store.cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes) + store.featureIds = featureIds + end) + + for treeId=1,self.nTree do + -- upvalues + local baggingSize = self.featureBaggingSize + local activeRatio = self.activeRatio + -- task closure that will be executed in worker-thread + local function trainTreeTask(store) + local dt = require 'decisiontree' + return dt.RandomForestTrainer.trainTree(store.cartTrainer, store.featureIds, baggingSize, activeRatio, treeId, verbose) + end + self.workPool:writeup('execute', trainTreeTask) + end + + local trees = {} + for treeId=1,self.nTree do + local taskname, tree = self.workPool:read() + assert(taskname=='execute') + assert(torch.isTypeOf(tree, 'dt.CartTree')) + table.insert(trees, tree) + end + return trees +end + +function RandomForestTrainer:getName() + return string.format( + "randomforest-aRatio-%4.2f-maxLeaf-%d-minExample-%d-nTree-%d", + self.activeRatio, self.maxLeafNodes, self.minLeafSize, self.nTree + ) +end + diff --git a/contrib/lua-torch/decisiontree/Sparse2Dense.lua b/contrib/lua-torch/decisiontree/Sparse2Dense.lua new file mode 100644 index 000000000..4e5b79d2f --- /dev/null +++ b/contrib/lua-torch/decisiontree/Sparse2Dense.lua @@ -0,0 +1,88 @@ +local S2D, parent = torch.class("nn.Sparse2Dense", "nn.Module") +local dt = require 'decisiontree._env' + +function S2D:__init(features) + parent.__init(self) + if torch.type(features) == 'table' then + assert(#features > 0) + features = torch.LongTensor(features) + end + assert(torch.isTensor(features)) + self.features = features + self.featureMap = nil + self.masks = {} + self.mappedKeys = {} +end + +function S2D:updateOutput(input) + if not self.featureMap then + self.featureMap = dt.HashMap() + self.featureMap:fill(self.features) + end + local batched, keys, values + if torch.isTensor(input[1]) then + keys = {input[1]} + values = {input[2]} + batched = false + else + keys = input[1] + values = input[2] + batched = true + end + assert(#keys == #values) + + local masks = self.masks + local mappedKeys = self.mappedKeys + local nKeys = #keys + local nMasks = #masks + if nMasks < nKeys then + for i=nMasks+1,nKeys do + masks[i] = torch.ByteTensor() + mappedKeys[i] = torch.LongTensor() + end + elseif nMasks > nKeys then + for i=nKeys+1,nMasks do + masks[i] = nil + mappedKeys[i] = nil + end + end + + self.featureMap:get(keys, mappedKeys, masks) + self.output = self.output or torch.Tensor():type(self._type) + self.output.nn.S2D_computeOutput(self.output, mappedKeys, values, masks, self.features) + if not batched then + self.output = self.output:view(-1) + end + return self.output +end + +function S2D:type(type, tensorCache) + if type then + local features = self.features + self.features = nil + parent.type(self, type, tensorCache) + self.features = features + return self + else + return parent.type(self) + end +end + +function S2D:updateGradInput(input, gradOutput) + error"Not Implemented" +end + +function S2D:reset() + parent.reset(self) + self.featureMap = nil +end + +function S2D:write(file) + self.featureMap = nil + parent.write(self, file) +end + +function S2D:read(file) + self.featureMap = nil + parent.read(self, file) +end diff --git a/contrib/lua-torch/decisiontree/SparseTensor.lua b/contrib/lua-torch/decisiontree/SparseTensor.lua new file mode 100644 index 000000000..4c620e618 --- /dev/null +++ b/contrib/lua-torch/decisiontree/SparseTensor.lua @@ -0,0 +1,54 @@ + +local SparseTensor = torch.class("torch.SparseTensor") + +function SparseTensor:__init(keys, values) + if keys and values then + assert(torch.typename(keys):find('torch%..*LongTensor')) + assert(torch.isTensor(values)) + assert(keys:nElement() == values:nElement(), "Expecting key and value tensors of same size") + self.keys = keys + self.values = values + elseif not (keys or values) then + self.keys = torch.LongTensor() + self.values = torch.Tensor() + else + error"Expecting zero or two args" + end +end + +function SparseTensor:buildIndex(overwrite) + if self._map and not overwrite then return end + assert(self.keys and self.keys:dim() == 1) + assert(self.values and self.values:dim() == 1) + -- hash table + self._map = {} + for i=1,self.keys:size(1) do + self._map[self.keys[i]] = i + end +end + +function SparseTensor:deleteIndex() + self._map = nil +end + +local __index = SparseTensor.__index +function SparseTensor:__index(key) + if key == nil then + error"Attempt to index using a nil key" + elseif torch.type(key) ~= 'number' then + return __index(self, key) + end + + if self._map then + assert(torch.type(self._map) == 'table') + local idx = self._map[key] + return idx and self.values[idx] or nil + elseif self.keys:nElement() > 0 then + for i=1,self.keys:size(1) do + if self.keys[i] == key then + return self.values[i] + end + end + end + return nil +end
\ No newline at end of file diff --git a/contrib/lua-torch/decisiontree/TreeState.lua b/contrib/lua-torch/decisiontree/TreeState.lua new file mode 100644 index 000000000..3928649fd --- /dev/null +++ b/contrib/lua-torch/decisiontree/TreeState.lua @@ -0,0 +1,191 @@ +local dt = require "decisiontree._env" + +local TreeState = torch.class("dt.TreeState", dt) + +-- Holds the state of a subtree during decision tree training. +-- Also, manages the state of candidate splits +function TreeState:__init(exampleIds) + assert(torch.type(exampleIds) == 'torch.LongTensor') + self.exampleIds = exampleIds + + self.nExampleInLeftBranch = 0 + self.nExampleInRightBranch = 0 +end + +-- computes and returns the score of the node based on its examples +function TreeState:score(dataset) + error"NotImplemented" +end + + +-- Initializes the split-state-updater. Initially all examples are in the left branch. +-- exampleIdsWithFeature is list of examples to split (those having a particular feature) +function TreeState:initialize(exampleIdsWithFeature, dataset) + error"NotImplemented" +end + +-- Update the split state. This call has the effect of shifting the example from the left to the right branch. +function TreeState:update(exampleId, dataset) + error"NotImplemented" +end + +-- Computes the SplitInfo determined by the current split state +-- @param splitFeatureId the feature id of the split feature +-- @param splitFeatureValue the feature value of the split feature +-- @return the SplitInfo determined by the current split state +function TreeState:computeSplitInfo(splitFeatureId, splitFeatureValue) + error"NotImplemented" +end + +-- bottleneck +function TreeState:findBestFeatureSplit(dataset, featureId, minLeafSize) + local dt = require "decisiontree" + assert(torch.isTypeOf(dataset, 'dt.DataSet')) + assert(torch.type(featureId) == 'number') + assert(torch.type(minLeafSize) == 'number') + + -- all dataset example having this feature, sorted by value + local featureExampleIds = dataset:getSortedFeature(featureId) + + local buffer = dt.getBufferTable('TreeState') + buffer.longtensor = buffer.longtensor or torch.LongTensor() + local exampleIdsWithFeature = buffer.longtensor + + -- map and tensor of examples containing feature: + local exampleMap = {} + local getExampleFeatureValue + + local j = 0 + if torch.type(dataset.input) == 'table' then + exampleIdsWithFeature:resize(self.exampleIds:size()) + self.exampleIds:apply(function(exampleId) + local input = dataset.input[exampleId] + input:buildIndex()-- only builds index first time + if input[featureId] then + j = j + 1 + exampleIdsWithFeature[j] = exampleId + exampleMap[exampleId] = j + end + end) + if j == 0 then + return + end + exampleIdsWithFeature:resize(j) + getExampleFeatureValue = function(exampleId) return dataset.input[exampleId][featureId] end + else + exampleIdsWithFeature = self.exampleIds + self.exampleIds:apply(function(exampleId) + j = j + 1 + exampleMap[exampleId] = j + end) + local featureValues = dataset.input:select(2,featureId) + getExampleFeatureValue = function(exampleId) return featureValues[exampleId] end + end + + + self:initialize(exampleIdsWithFeature, dataset) + + -- bottleneck + local bestSplit, previousSplitValue, _tictoc + for i=featureExampleIds:size(1),1,-1 do -- loop over examples sorted (desc) by feature value + local exampleId = featureExampleIds[i] + + local exampleIdx = exampleMap[exampleId] + if exampleIdx then + local splitValue = getExampleFeatureValue(exampleId) + + if previousSplitValue and math.abs(splitValue - previousSplitValue) > dt.EPSILON then + local splitInfo = self:computeSplitInfo(featureId, previousSplitValue, _tictoc) + if (splitInfo.leftChildSize >= minLeafSize) and (splitInfo.rightChildSize >= minLeafSize) then + + if (not bestSplit) or (splitInfo.splitGain < bestSplit.splitGain) then + _tictoc = bestSplit or {} -- reuse table + bestSplit = splitInfo + end + + end + end + + previousSplitValue = splitValue + + -- bottleneck + self:update(exampleId, dataset, exampleIdx) + end + end + + return bestSplit +end + +-- finds the best split of examples in treeState among featureIds +function TreeState:findBestSplit(dataset, featureIds, minLeafSize, shardId, nShard) + assert(torch.isTypeOf(dataset, 'dt.DataSet')) + assert(torch.type(featureIds) == 'torch.LongTensor') + assert(torch.type(minLeafSize) == 'number') + assert(torch.type(shardId) == 'number') + assert(torch.type(nShard) == 'number') + + local bestSplit + for i=1,featureIds:size(1) do + local featureId = featureIds[i] + if (nShard <= 1) or ( (featureId % nShard) + 1 == shardId ) then -- feature sharded + local splitCandidate = self:findBestFeatureSplit(dataset, featureId, minLeafSize) + if splitCandidate and ((not bestSplit) or (splitCandidate.splitGain < bestSplit.splitGain)) then + bestSplit = splitCandidate + end + end + end + + return bestSplit +end + +-- Partitions self given a splitInfo table, producing a pair of exampleIds corresponding to the left and right subtrees. +function TreeState:_branch(splitInfo, dataset) + local leftIdx, rightIdx = 0, 0 + local nExample = self.exampleIds:size(1) + local splitExampleIds = torch.LongTensor(nExample) + + + for i=1,self.exampleIds:size(1) do + local exampleId = self.exampleIds[i] + local input = dataset.input[exampleId] + local val = input[splitInfo.splitId] + -- Note: when the feature is not present in the example, the example is droped from all sub-trees. + -- Which means that for most sparse data, a tree cannot reach 100% accuracy... + if val then + if val < splitInfo.splitValue then + leftIdx = leftIdx + 1 + splitExampleIds[leftIdx] = exampleId + else + rightIdx = rightIdx + 1 + splitExampleIds[nExample-rightIdx+1] = exampleId + end + end + end + + local leftExampleIds = splitExampleIds:narrow(1,1,leftIdx) + local rightExampleIds = splitExampleIds:narrow(1,nExample-rightIdx+1,rightIdx) + + assert(leftExampleIds:size(1) + rightExampleIds:size(1) <= self.exampleIds:size(1), "Left and right branches contain more data than the parent!") + return leftExampleIds, rightExampleIds +end + +-- calls _branch and encapsulates the left and right exampleIds into a TreeStates +function TreeState:branch(splitInfo, dataset) + local leftExampleIds, rightExampleIds = self:_branch(splitInfo, dataset) + return self.new(leftExampleIds), self.new(rightExampleIds) +end + +function TreeState:size() + return self.exampleIds:size(1) +end + +function TreeState:contains(exampleId) + local found = false + self.exampleIds:apply(function(x) + if x == exampleId then + found = true + end + end) + return found +end + diff --git a/contrib/lua-torch/decisiontree/WorkPool.lua b/contrib/lua-torch/decisiontree/WorkPool.lua new file mode 100644 index 000000000..8f473727e --- /dev/null +++ b/contrib/lua-torch/decisiontree/WorkPool.lua @@ -0,0 +1,156 @@ +local dt = require "decisiontree._env" + +-- Utility to simplify construction of a pool of daemon threads with which to execute tasks in parallel. +local WorkPool = torch.class("dt.WorkPool", dt) + +function WorkPool:__init(nThread) + self.nThread = nThread or 16 + assert(torch.type(self.nThread) == 'number') + assert(self.nThread > 0) + + self:initialize() +end + +function WorkPool:initialize() + local ipc = require 'libipc' + self.queuename = os.tmpname() + self.queue = ipc.workqueue(self.queuename) + self.queues = {} + for i=1,self.nThread do + self.queues[i] = ipc.workqueue(self.queuename.."/"..i) + end + + -- spawn thread workers + ipc.map(self.nThread, function(queuename, nThread, myId) + assert(nThread) + assert(myId) + local ipc = require 'libipc' + + -- Open the queue by name (the main thread already created it) + local mainqueue = ipc.workqueue(queuename) + local workqueue = ipc.workqueue(queuename.."/"..myId) + + local taskname, args + + local store = {} + local queue = mainqueue + + repeat + local msg = queue:read() + assert(torch.type(msg) == 'table') + taskname, task = unpack(msg) + if taskname == nil then + break + elseif torch.type(taskname) ~= 'string' then + error("Expecting taskname string. Got "..torch.type(taskname)) + elseif taskname == 'storeKeyValue' then + assert(torch.type(task) == 'table') + assert(queue == workqueue) + store[task.key] = task.value + queue:write({taskname}) + elseif taskname == 'storeKeysValues' then + assert(torch.type(task) == 'table') + assert(queue == workqueue) + for key,value in pairs(task) do + store[key] = value + end + queue:write({taskname}) + elseif taskname == 'require' then + assert(torch.type(task) == 'table') + assert(torch.type(task.libname) == 'string') + assert(torch.type(task.varname) == 'string') + _G[task.varname] = require(task.libname) + assert(queue == workqueue) + queue:write({taskname}) + elseif taskname == 'storeReset' then + store = {} + mainqueue:write({taskname}) + elseif taskname == 'echo' then + mainqueue:write({taskname, task}) + elseif taskname == 'readWorkerQueue' then + queue = workqueue + elseif taskname == 'readMainQueue' then + queue = mainqueue + elseif taskname == 'execute' then + if torch.type(task) == 'table' then + assert(task.func and task.args) + queue:write({taskname, task.func(store, task.args, myId)}) + else + assert(torch.type(task) == 'function') + queue:write({taskname, task(store, myId)}) + end + else + error("Unknown taskname: "..taskname) + end + until taskname == nil + end, self.queuename, self.nThread) + +end + +-- Terminates all daemon threads. +function WorkPool:terminate() + for i=1,self.nThread do + self.queue:write({}) + end +end + +-- this function is used to update the store of data in each worker thread +function WorkPool:_update(taskname, task, upval) + assert(torch.type(taskname) == 'string') + local _ = require 'moses' + assert(_.contains({'storeKeyValue','storeKeysValues','require','execute'}, taskname)) + assert(torch.type(task) == 'table' or torch.type(task) == 'function') + + -- tell the workers to read their individual queue + for i=1,self.nThread do + self.queue:write({'readWorkerQueue'}) + end + + -- write to individual worker queues + for i=1,self.nThread do + if upval then + self.queues[i]:writeup({taskname, task}) + else + self.queues[i]:write({taskname, task}) + end + end + + -- TODO use ipc.mutex:barrier(nThread+1) + -- barrier: make sure that every worker has completed task by reading their queue + for i=1,self.nThread do + assert(self.queues[i]:read()[1] == taskname) + end + + -- finally, tell them to read the main queue + for i=1,self.nThread do + self.queues[i]:write({'readMainQueue'}) + end +end + +function WorkPool:update(taskname, task) + return self:_update(taskname, task, false) +end + +function WorkPool:updateup(taskname, task) + return self:_update(taskname, task, true) +end + +function WorkPool:write(taskname, task) + assert(torch.type(taskname) == 'string') + assert(taskname ~= 'storeKeyValue' or taskname ~= 'storeKeysValues') + self.queue:write({taskname, task}) +end + +function WorkPool:writeup(taskname, task) + assert(torch.type(taskname) == 'string') + assert(taskname ~= 'storeKeyValue' or taskname ~= 'storeKeysValues') + self.queue:writeup({taskname, task}) +end + +function WorkPool:read() + local res = self.queue:read() + assert(torch.type(res) == 'table') + assert(torch.type(res[1] == 'string')) + return unpack(res) +end + diff --git a/contrib/lua-torch/decisiontree/_env.lua b/contrib/lua-torch/decisiontree/_env.lua new file mode 100644 index 000000000..a92770152 --- /dev/null +++ b/contrib/lua-torch/decisiontree/_env.lua @@ -0,0 +1,5 @@ + +-- https://github.com/torch/torch7/issues/525 + +local dl = {} +return dl
\ No newline at end of file diff --git a/contrib/lua-torch/decisiontree/benchmark.lua b/contrib/lua-torch/decisiontree/benchmark.lua new file mode 100644 index 000000000..2b6a03dc6 --- /dev/null +++ b/contrib/lua-torch/decisiontree/benchmark.lua @@ -0,0 +1,171 @@ +local dt = require "decisiontree._env" + +local bm = {} +function bm.CartTrainer(opt) + local timer = torch.Timer() + local trainSet, validSet = dt.getSparseDummyData(opt) + print(string.format("CartTrainer: sparse dataset create: %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) + + local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes) + local treeState = dt.GiniState(trainSet:getExampleIds()) + timer:reset() + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + print(string.format("CartTrainer: train single-thread : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) + + timer:reset() + cartTrainer:featureParallel(opt.nThread) + print(string.format("CartTrainer: setup feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) + timer:reset() + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + print(string.format("CartTrainer: train feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) +end + +function bm.GradientBoostState(opt) + local trainSet, validSet = dt.getSparseDummyData(opt) + + trainSet:initScore() + + local treeState = dt.GradientBoostState(trainSet:getExampleIds(), nn.LogitBoostCriterion(false)) + + local timer = torch.Timer() -- first step also calls SparseTensor:buildIndex() + treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3) + print(string.format("GradientBoostState: findBestSplit (first) : %f sec", timer:time().real)) + + timer:reset() + treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3) + print(string.format("GradientBoostState: findBestSplit (second) : %f sec", timer:time().real)) + +end + +local function file_exists(name) + local f=io.open(name,"r") + if f~=nil then io.close(f) return true else return false end +end + +function bm.GradientBoostTrainer(opt) + local trainSet, validSet + if file_exists("/tmp/train.bin") and file_exists("/tmp/valid.bin") then + trainSet = torch.load("/tmp/train.bin") + validSet = torch.load("/tmp/valid.bin") + else + if opt.sparse then + trainSet, validSet = dt.getSparseDummyData(opt) + else + trainSet, validSet = dt.getDenseDummyData(opt) + end + torch.save("/tmp/train.bin", trainSet) + torch.save("/tmp/valid.bin", validSet) + end + + local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes) + opt.lossFunction = nn.LogitBoostCriterion(false) + opt.treeTrainer = cartTrainer + local forestTrainer = dt.GradientBoostTrainer(opt) + + local timer = torch.Timer() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet) + local time = timer:time().real + print(string.format("GradientBoostTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) + + cartTrainer:featureParallel(opt.nThread) + timer:reset() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet) + local time = timer:time().real + print(string.format("GradientBoostTrainer: train feature-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) +end + +function bm.RandomForestTrainer(opt) + local trainSet, validSet = dt.getSparseDummyData(opt) + + local forestTrainer = dt.RandomForestTrainer(opt) + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds) + + local timer = torch.Timer() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds) + local time = timer:time().real + print(string.format("RandomForestTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) + + timer:reset() + forestTrainer:treeParallel(opt.nThread) + print(string.format("RandomForestTrainer: setup tree-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) + + timer:reset() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds) + local time = timer:time().real + print(string.format("RandomForestTrainer: train tree-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) +end + +function bm.DFD(opt) + local _ = require 'moses' + local opt = _.clone(opt) + opt.nExample = 200 + local trainSet, validSet = dt.getDenseDummyData(opt) + + local forestTrainer = dt.RandomForestTrainer(opt) + forestTrainer:treeParallel(opt.nThread) + local timer = torch.Timer() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds) + local time = timer:time().real + print(string.format("DFD: train random forest in parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) + + + -- benchmark nn.DFD + local input = trainSet.input:sub(1,opt.batchsize) + local dfd = nn.DFD(decisionForest) + dfd:forward(input) + timer:reset() + for i=1,opt.nloop do + dfd:forward(input) + end + print(string.format("DFD: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real)) +end + +function bm.Sparse2Dense(opt) + local _ = require 'moses' + local opt = _.clone(opt) + opt.nExample = opt.batchsize + local trainSet = dt.getSparseDummyData(opt) + + local input = {{},{}} + for i=1,opt.batchsize do + input[1][i] = trainSet.input[i].keys + input[2][i] = trainSet.input[i].values + end + assert(#input[1] == opt.batchsize) + + -- benchmark nn.Sparse2Dense + local s2d = nn.Sparse2Dense(torch.LongTensor():range(1,opt.nFeature)) + s2d:forward(input) + local timer = torch.Timer() + for i=1,opt.nloop do + s2d:forward(input) + end + print(string.format("Sparse2Dense: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real)) +end + +function dt.benchmark(benchmarks, opt2) + local opt = { + nExample=10000, nCluster=2, nFeature=1000, overlap=0, nValid=100, -- getSparseDummyData + nTree=20, featureBaggingSize=-1, sparse=true, -- GradientBoostTrainer and RandomForestTrainer + nThread=2, shrinkage=0.1, downsampleRatio=0.1, evalFreq=5, earlyStop=0, -- GradientBoostTrainer + activeRatio=0.5, -- RandomForestTrainer + batchsize=32, nloop=10 + } + + local _ = require 'moses' + benchmarks = benchmarks or _.keys(bm) + assert(torch.type(benchmarks) == 'table') + for i,benchmark in ipairs(benchmarks) do + local opt1 = _.clone(opt) + for key, value in pairs(opt2 or {}) do + opt1[key] = value + end + opt1.nActive = opt1.nActive or torch.round(opt1.nFeature/10) + opt1.maxLeafNodes = opt1.maxLeafNodes or (opt1.nExample/10) + opt1.minLeafSize = opt1.minLeafSize or (opt1.nExample/100) + + assert(torch.type(benchmark) == 'string', benchmark) + assert(bm[benchmark], benchmark) + bm[benchmark](opt1) + end +end diff --git a/contrib/lua-torch/decisiontree/doc/benchmark.md b/contrib/lua-torch/decisiontree/doc/benchmark.md new file mode 100644 index 000000000..cb8f905d6 --- /dev/null +++ b/contrib/lua-torch/decisiontree/doc/benchmark.md @@ -0,0 +1,291 @@ +# Benchmarks + +This file outlines the roadmap (and commensurate benchmarks) of optimizations and refactorings over time. + +## Baseline + +The baseline implementation is very slow. +We converted the Twitter decision tree library (used internally) from Java to Lua. +The objective was to replicate the GBDT and Random Forest implementations as is (more or less). +The Java library is very good and reasonably fast. The same code in Lua is slow. +The point of this Lua baseline was not to obtain the same computational performance as the Java library. +Instead, we wanted the training and inferences algorithms of the Lua lib to match thoses of the Java lib. +As such, the training/validation error of the baseline Lua lib should match that of the Java lib. +The unit tests seem to validate this claim as both training/validation set performance is unit tested. +We also used the conversion exercise as a way to learn about decision tree implementation (our background is deep learning). +That being said, the baseline performance is terrible: + +``` +th -e "dt = require 'decisiontree'; dt.benchmark()" +CartTrainer: sparse dataset create: 2963.192386 samples/sec; 0.337479 sec +CartTrainer: train single-thread : 14.165438 samples/sec; 70.594361 sec +CartTrainer: setup feature-parallel : 5.129034 samples/sec; 194.968478 sec +CartTrainer: train feature-parallel : 9.736592 samples/sec; 102.705344 sec +``` + +The original Java lib had approximately 43 classes. +The baseline has about 24. +This reduction is due to obvious merging of classes. But also to conversions of classes to functions. +The next patches continue this process of reducing the number of classes. + +## Patch 1 (complete): + +This patch further reduces the number of classes, but adds the DataSet class. +The code is much simple to read. Examples are batched. + + * [x] examples are batched in dt.DataSet: {input, target, score} + * [x] deprecate dt.LabeledExample + * [x] list of examples are replaced with torch.LongTensors of exampleIds + * [x] merge TreeBrancher into TreeState + * [x] merge BestSplitFinder and SplitStateUpdater into TreeState + * [x] TreeState subclasses: GradientBoostState and GiniState + +``` +th -e "dt = require 'decisiontree'; dt.benchmark()" +CartTrainer: sparse dataset create: 3597.392294 samples/sec; 0.277984 sec +CartTrainer: train single-thread : 35.763255 samples/sec; 27.961663 sec +CartTrainer: setup feature-parallel : 36759.250495 samples/sec; 0.027220 sec +CartTrainer: train feature-parallel : 72.523658 samples/sec; 13.788606 sec +``` + + The setup time for feature-parallelization is most improved. + The run-time for feature-parallel also about half that of single-thread. + Since its using 2 threads, that means the parallelization is working quite well. + + We also added benchmarks for the `RandomForestTrainer` and `GradientBoostTrainer`: + +``` +GradientBoostTrainer: train single-thread : 599.895105 samples/sec; 0.083348 sec/tree, 1.666958 sec +GradientBoostTrainer: train feature-parallel : 974.235273 samples/sec; 0.051322 sec/tree, 1.026446 sec +RandomForestTrainer: train single-thread : 134.781044 samples/sec; 0.370972 sec/tree, 7.419441 sec +RandomForestTrainer: setup tree-parallel : 73341.097064 samples/sec; 0.013649 sec +RandomForestTrainer: train tree-parallel : 262.975891 samples/sec; 0.190131 sec/tree, 3.802630 sec +``` + +Looks good. + +## Patch 2 (complete): + + * [x] dt.LossFunction -> nn.Criterion (LogitBoost is done, missing MSE) + * [x] use SparseTensor:buildIndex() to accelerate TreeState:findBestSplit() + * [x] benchmarks use 10000 instead of 1000 examples + +The benchmarks indicate good improvements. Most improvements were made possible by the use of `buildIndex`: + +``` +th -e "dt = require 'decisiontree'; dt.benchmark()" +GradientBoostState: findBestSplit (first) : 11.415645 sec +GradientBoostState: findBestSplit (second) : 11.246336 sec +CartTrainer: sparse dataset create: 3284.803629 samples/sec; 3.044327 sec +CartTrainer: train single-thread : 239.544758 samples/sec; 41.745858 sec +CartTrainer: setup feature-parallel : 10996.443063 samples/sec; 0.909390 sec +CartTrainer: train feature-parallel : 473.888592 samples/sec; 21.102011 sec +RandomForestTrainer: train single-thread : 892.985186 samples/sec; 0.559920 sec/tree, 11.198394 sec +RandomForestTrainer: setup tree-parallel : 176806.252266 samples/sec; 0.056569 sec +RandomForestTrainer: train tree-parallel : 1377.849291 samples/sec; 0.362884 sec/tree, 7.257688 sec +GradientBoostTrainer: train single-thread : 2685.485128 samples/sec; 0.186186 sec/tree, 3.723722 sec +GradientBoostTrainer: train feature-parallel : 3712.313215 samples/sec; 0.134687 sec/tree, 2.693738 sec +``` + +The main bottleneck now is in serializing the SparseTensor hash maps. We temporarly overcame this bottleneck by +deleting indexes when calling `CartTrainer:featureParallel()` and `RandomForestTrainer:treeParallel()`. +In this way, the indexes are recreated for each thread. Ideally, we would use a C hash map such that a pointer +could be serialized instead. But `tds.Hash` does not serialize well. For now instead, we use lua tables. + +This is the benchmark for `GradientBoostTrainer` on a large dataset of dense inputs: + +``` +th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})" +GradientBoostTrainer: train single-thread : 152.463989 samples/sec; 131.178517 sec/tree, 655.892584 sec +GradientBoostTrainer: train feature-parallel : 224.288488 samples/sec; 89.170872 sec/tree, 445.854358 sec +[tw-mbp-nleonard decisiontree]$ th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8,nThread=4})" +GradientBoostTrainer: train single-thread : 163.836896 samples/sec; 122.072625 sec/tree, 610.363126 sec +GradientBoostTrainer: train feature-parallel : 407.981442 samples/sec; 49.021838 sec/tree, 245.109188 sec +``` + +## Patch 3 : + +Optimize GBDT for large datasets consisting of dense inputs. The benchmarks: + +``` +th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})" +GradientBoostTrainer: train single-thread : 547.553407 samples/sec; 36.526117 sec/tree, 182.630587 sec +GradientBoostTrainer: train feature-parallel : 792.964678 samples/sec; 25.221804 sec/tree, 126.109022 sec +[tw-mbp-nleonard decisiontree]$ th -e "dt = require 'decisiontree'; dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8,nThread=4})" +GradientBoostTrainer: train single-thread : 555.793759 samples/sec; 35.984571 sec/tree, 179.922855 sec +GradientBoostTrainer: train feature-parallel : 1289.977846 samples/sec; 15.504142 sec/tree, 77.520711 sec +``` + +For 1, 2 and 4 threads, the speedups of patch 3 over patch 2 are respectively: 3.39, 3.53, and 3.18. +For this patch, the multi-threading speedup of 2 and 4 threads over a single thread are respectively: 1.42 and 2.33. +Improvements over the previous patch were obtained by optimizing two aspects: + + 1. Optimizing `TreeState.findBestFeatureSplit` for dense datasets (for example: `if dense, then ...`); + 2. Removing `assert` clauses in `GradientBoostState.update`. The `update` method is called for every (example, feature), making it a major bottleneck. + +Converting the `update` to C could lead to further optimizations. + +This patch also improves the benchmark on sparse datasets: +``` +$ th -e "dt = require 'decisiontree'; dt.benchmark()" +RandomForestTrainer: train single-thread : 1121.311196 samples/sec; 0.445907 sec/tree, 8.918131 sec +RandomForestTrainer: setup tree-parallel : 168773.323354 samples/sec; 0.059256 sec +RandomForestTrainer: train tree-parallel : 1701.280938 samples/sec; 0.293896 sec/tree, 5.877924 sec +GradientBoostState: findBestSplit (first) : 8.250646 sec +GradientBoostState: findBestSplit (second) : 7.952077 sec +GradientBoostTrainer: train single-thread : 3355.248596 samples/sec; 0.149020 sec/tree, 2.980405 sec +GradientBoostTrainer: train feature-parallel : 4399.133369 samples/sec; 0.113659 sec/tree, 2.273175 sec +CartTrainer: sparse dataset create: 3428.105601 samples/sec; 2.917069 sec +CartTrainer: train single-thread : 282.172416 samples/sec; 35.439331 sec +CartTrainer: setup feature-parallel : 9455.440801 samples/sec; 1.057598 sec +CartTrainer: train feature-parallel : 594.054049 samples/sec; 16.833491 sec +DFD: train random forest in parallel : 346.831378 samples/sec; 0.288325 sec/tree, 5.766491 sec +DFD: updateOutput : 831.105546 samples/sec; 0.038509 sec +``` + +## Patch 4 : + +This patch improves `nn.DFD` from + +``` +th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})" +DFD: train random forest in parallel : 10.527251 samples/sec; 0.037997 sec/tree, 18.998313 sec +DFD: updateOutput : 32.442950 samples/sec; 9.863472 sec +``` + +to + +``` +th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})" +DFD: train random forest in parallel : 10.839547 samples/sec; 0.036902 sec/tree, 18.450956 sec +DFD: updateOutput : 359.158353 samples/sec; 0.890975 sec +Sparse2Dense: updateOutput : 15395.648952 samples/sec; 0.020791 sec +``` + +That is a 10x speedup for `nn.DFD`. + +The patch also adds a benchmark for `nn.Sparse2Dense`: + +``` +th -e "dt = require 'decisiontree'; dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1})" +Sparse2Dense: updateOutput : 17158.126406 samples/sec; 0.018653 sec +``` + +Indeed, `nn.Sparse2Dense` is not the bottleneck; `nn.DFD` is. + +## Patch 5 : + +This patch improves `nn.DFD` inference from + +``` +for i in `seq 3`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=100})"; done +DFD: train random forest in parallel : 8.452295 samples/sec; 0.047324 sec/tree, 23.662212 sec +DFD: updateOutput : 176.617872 samples/sec; 9.059109 sec +DFD: train random forest in parallel : 8.350019 samples/sec; 0.047904 sec/tree, 23.952042 sec +DFD: updateOutput : 183.508204 samples/sec; 8.718962 sec +DFD: train random forest in parallel : 8.525779 samples/sec; 0.046917 sec/tree, 23.458266 sec +DFD: updateOutput : 178.877077 samples/sec; 8.944692 sec +``` + +to + +``` +for i in `seq 3`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=100})"; done +DFD: train random forest in parallel : 8.434502 samples/sec; 0.047424 sec/tree, 23.712129 sec +DFD: updateOutput : 6479.597179 samples/sec; 0.246933 sec +DFD: train random forest in parallel : 8.334543 samples/sec; 0.047993 sec/tree, 23.996518 sec +DFD: updateOutput : 6663.641184 samples/sec; 0.240114 sec +DFD: train random forest in parallel : 8.353265 samples/sec; 0.047885 sec/tree, 23.942735 sec +DFD: updateOutput : 6882.607456 samples/sec; 0.232475 sec +``` + +That is a 37x speedup for `nn.DFD`. + +## Patch 6: + +This patch improves `nn.DFD` from the previous result to + +``` +for i in `seq 5`; do th -e "dt = require 'decisiontree'; dt.benchmark({'DFD'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,batchsize=16,nActive=1200,nFeature=1300,nloop=10000})"; done +DFD: train random forest in parallel : 8.353504 samples/sec; 0.047884 sec/tree, 23.942050 sec +DFD: updateOutput : 91967.342339 samples/sec; 1.739753 sec +DFD: train random forest in parallel : 8.528141 samples/sec; 0.046904 sec/tree, 23.451770 sec +DFD: updateOutput : 91405.321702 samples/sec; 1.750451 sec +DFD: train random forest in parallel : 8.184562 samples/sec; 0.048872 sec/tree, 24.436250 sec +DFD: updateOutput : 91623.388867 samples/sec; 1.746284 sec +DFD: train random forest in parallel : 8.779561 samples/sec; 0.045560 sec/tree, 22.780182 sec +DFD: updateOutput : 93914.242852 samples/sec; 1.703686 sec +DFD: train random forest in parallel : 8.636201 samples/sec; 0.046317 sec/tree, 23.158330 sec +DFD: updateOutput : 94092.241963 samples/sec; 1.700465 sec +``` + +That is another 13.8x speedup. + +## Patch 7: + +This patch improves `nn.Sparse2Dense` computation from + +``` +for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done +Sparse2Dense: updateOutput : 1103.570777 samples/sec; 28.996786 sec +Sparse2Dense: updateOutput : 1092.064331 samples/sec; 29.302309 sec +Sparse2Dense: updateOutput : 1036.963572 samples/sec; 30.859334 sec +``` + +to + +``` +for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done +Sparse2Dense: updateOutput : 62995.834470 samples/sec; 0.507978 sec +Sparse2Dense: updateOutput : 62471.568253 samples/sec; 0.512242 sec +Sparse2Dense: updateOutput : 62965.099331 samples/sec; 0.508226 sec +``` + +This represents a speedup of about 57x. + +## Patch 8: + +This patch improves `nn.Sparse2Dense` from the previous result to + +```for i in `seq 3`; do th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'Sparse2Dense'}, {nTree=500,maxLeafNodes=8,minLeafSize=1,nFeature=1500,nActive=1300,nloop=1000})"; done +Sparse2Dense: updateOutput : 124268.079914 samples/sec; 0.257515 sec +Sparse2Dense: updateOutput : 114750.039542 samples/sec; 0.278873 sec +Sparse2Dense: updateOutput : 122863.314766 samples/sec; 0.260458 sec +``` + +which corresponds to another 1.95x speedup. + +## Patch 9: + +This patches moves the core of training GBDTs, which used to be a big bottleneck, to C. It also +performs small optimizations across the board (faster scoring, faster branching, ...) that provide a +little more performance. + +The original commit had this performance: + +``` +th -e "dt = require 'decisiontree'; torch.setdefaulttensortype('torch.FloatTensor'); dt.benchmark({'GradientBoostTrainer'}, {nExample=100000, sparse=false, nFeature=836, nTree=5, downsampleRatio=1, minLeafSize=1000, maxLeafNodes=8})" +GradientBoostTrainer: train single-thread : 500.414666 samples/sec; 39.966854 sec/tree, 199.834271 sec +GradientBoostTrainer: train feature-parallel : 1227.228044 samples/sec; 16.296890 sec/tree, 81.484448 sec (4 threads) +GradientBoostTrainer: train feature-parallel : 1385.926280 samples/sec; 14.430782 sec/tree, 72.153910 sec (8 threads) +``` + +and the new version has + +``` +GradientBoostTrainer: train single-thread : 15285.644631 samples/sec; 1.308417 sec/tree, 6.542086 sec +GradientBoostTrainer: train feature-parallel : 43170.435932 samples/sec; 0.463280 sec/tree, 2.316400 sec (4 threads) +GradientBoostTrainer: train feature-parallel : 50062.681239 samples/sec; 0.399499 sec/tree, 1.997496 sec (8 threads) +``` + +That represents a speedup of about 30.5x over the baseline for 1 thread and 36.1x for 8 threads. +Note that the performance doesn't increase much as we increase the number of threads since we use +feature parallelism and the number of features evaluated is small (29 in this case) due to bagging. +If we disable bagging, then we have the following result with 8 threads and the new code: + +``` +GradientBoostTrainer: train single-thread : 590.823965 samples/sec; 33.851030 sec/tree, 169.255152 sec +GradientBoostTrainer: train feature-parallel : 3232.188576 samples/sec; 6.187758 sec/tree, 30.938789 sec +``` + +So processing 836 features now is much faster than processing 29 before. diff --git a/contrib/lua-torch/decisiontree/error.h b/contrib/lua-torch/decisiontree/error.h new file mode 100644 index 000000000..18df3c939 --- /dev/null +++ b/contrib/lua-torch/decisiontree/error.h @@ -0,0 +1,24 @@ +#ifndef _ERROR_H_ +#define _ERROR_H_ + +#include "luaT.h" +#include <string.h> + +static inline int _lua_error(lua_State *L, int ret, const char* file, int line) { + int pos_ret = ret >= 0 ? ret : -ret; + return luaL_error(L, "ERROR: (%s, %d): (%d, %s)\n", file, line, pos_ret, strerror(pos_ret)); +} + +static inline int _lua_error_str(lua_State *L, const char *str, const char* file, int line) { + return luaL_error(L, "ERROR: (%s, %d): (%s)\n", file, line, str); +} + +static inline int _lua_error_str_str(lua_State *L, const char *str, const char* file, int line, const char *extra) { + return luaL_error(L, "ERROR: (%s, %d): (%s: %s)\n", file, line, str, extra); +} + +#define LUA_HANDLE_ERROR(L, ret) _lua_error(L, ret, __FILE__, __LINE__) +#define LUA_HANDLE_ERROR_STR(L, str) _lua_error_str(L, str, __FILE__, __LINE__) +#define LUA_HANDLE_ERROR_STR_STR(L, str, extra) _lua_error_str_str(L, str, __FILE__, __LINE__, extra) + +#endif diff --git a/contrib/lua-torch/decisiontree/generic/CartTree.c b/contrib/lua-torch/decisiontree/generic/CartTree.c new file mode 100644 index 000000000..eb29fcf02 --- /dev/null +++ b/contrib/lua-torch/decisiontree/generic/CartTree.c @@ -0,0 +1,88 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/CartTree.c" +#else + +static int nn_(tree_fast_score)(lua_State *L) { + THTensor *input = luaT_checkudata(L, 1, torch_Tensor); + THTensor *score = luaT_checkudata(L, 3, torch_Tensor); + long n_samples = THTensor_(size)(input, 0); + long n_features = THTensor_(size)(input, 1); + THTensor_(resize1d)(score, n_samples); + real *input_data = THTensor_(data)(input); + real *score_data = THTensor_(data)(score); + + lua_pushstring(L, "leftChild"); + const int left_child_string = 4; + lua_pushstring(L, "rightChild"); + const int right_child_string = 5; + lua_pushstring(L, "score"); + const int score_string = 6; + lua_pushstring(L, "splitFeatureId"); + const int id_string = 7; + lua_pushstring(L, "splitFeatureValue"); + const int value_string = 8; + + const int original_top = lua_gettop(L); + for (long i = 0; i < n_samples; i++) { + int node = 2; + while (1) { + int current_top = lua_gettop(L); + lua_pushvalue(L, left_child_string); + lua_rawget(L, node); + lua_pushvalue(L, right_child_string); + lua_rawget(L, node); + if (lua_isnil(L, -2) && lua_isnil(L, -1)) { + lua_pushvalue(L, score_string); + lua_rawget(L, node); + score_data[i] = lua_tonumber(L, -1); + break; + } + if (lua_isnil(L, -2)) { + // go to right + node = current_top + 2; + continue; + } + if (lua_isnil(L, -1)) { + // go to left + node = current_top + 1; + continue; + } + lua_pushvalue(L, id_string); + lua_rawget(L, node); + lua_pushvalue(L, value_string); + lua_rawget(L, node); + long feature_id = lua_tointeger(L, -2); + real feature_value = lua_tonumber(L, -1); + + real current_value = input_data[i * n_features + (feature_id-1)]; + if (current_value < feature_value) { + // go to left + node = current_top + 1; + } + else { + // go to right + node = current_top + 2; + } + } + lua_pop(L, lua_gettop(L) - original_top); + } + + lua_pop(L, 5); + + lua_pushvalue(L, 3); + return 1; +} + +static const struct luaL_Reg nn_(CT__) [] = { + {"CartTreeFastScore", nn_(tree_fast_score)}, + {NULL, NULL} +}; + +static void nn_(CT_init)(lua_State *L) +{ + luaT_pushmetatable(L, torch_Tensor); + luaT_registeratname(L, nn_(CT__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/contrib/lua-torch/decisiontree/generic/DFD.c b/contrib/lua-torch/decisiontree/generic/DFD.c new file mode 100644 index 000000000..599c4d794 --- /dev/null +++ b/contrib/lua-torch/decisiontree/generic/DFD.c @@ -0,0 +1,157 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/DFD.c" +#else + +static int nn_(DFD_computeOutput)(lua_State *L) { + THLongTensor *outputkeys = luaT_checkudata(L, 1, "torch.LongTensor"); + THTensor *outputvalues = luaT_checkudata(L, 2, torch_Tensor); + THLongTensor *root_ids = luaT_checkudata(L, 3, "torch.LongTensor"); + THLongTensor *left_child = luaT_checkudata(L, 4, "torch.LongTensor"); + THLongTensor *right_child = luaT_checkudata(L, 5, "torch.LongTensor"); + THLongTensor *split_feature_id = luaT_checkudata(L, 6, "torch.LongTensor"); + THTensor *split_feature_value = luaT_checkudata(L, 7, torch_Tensor); + THTensor *input = luaT_checkudata(L, 8, torch_Tensor); + char only_last_node = lua_toboolean(L, 9); + + // gets some important sizes from the input + long batch_size = THTensor_(size)(input, 0); + long input_size = THTensor_(size)(input, 1); + long roots_size = THLongTensor_size(root_ids, 0); + long depth = THLongTensor_size(outputkeys, 1); + + // keeps track of the number of nodes traversed in the trees by each sample. + // each traversed node maps to an output feature having a value of 1 + long outputsize[batch_size]; + for (long i = 0; i < batch_size; i++) + outputsize[i] = 0; + + // gets direct pointers to the memory of each tensor for efficiency + long *root_ids_data = THLongTensor_data(root_ids); + long *left_child_data = THLongTensor_data(left_child); + long *right_child_data = THLongTensor_data(right_child); + real *split_feature_value_data = THTensor_(data)(split_feature_value); + long *split_feature_id_data = THLongTensor_data(split_feature_id); + long *outputkeys_data = THLongTensor_data(outputkeys); + real *input_data = THTensor_(data)(input); + + // for each sample in the batch + for (long sample_index = 0; sample_index < batch_size; sample_index++) { + // gets pointers to the direct memory associated with each sample for efficiency + const long outputkeys_offset = sample_index * depth; + const long input_offset = sample_index * input_size; + long *local_outputkeys_data = &outputkeys_data[outputkeys_offset]; + real *local_input_data = &input_data[input_offset]; + + // for each tree in the forest + for (long i = 0; i < roots_size; i++) { + int root = 1; + long node_id = root_ids_data[i]; + + // traverses the whole tree keeping track of which nodes were seen + while (1) { + if (root) { + // root nodes aren't added to output because they are always traversed + root = 0; + } + else if (!only_last_node) { + // updates the outputsize for all samples traversing this node; and + // set the traversed node as a feature in output for exampleIds + long output_index = outputsize[sample_index]; + // updates the outputsize for all samples traversing this node + outputsize[sample_index]++; + // sets the traversed node as a feature in output for exampleIds + local_outputkeys_data[output_index] = node_id; + } + + // gets the left and right nodes. values of -1 represent missing node + long left_id = left_child_data[node_id-1]; + long right_id = right_child_data[node_id-1]; + + if (left_id <= 0 && right_id <= 0) { + if (only_last_node) { + long output_index = outputsize[sample_index]; + outputsize[sample_index]++; + local_outputkeys_data[output_index] = node_id; + } + // if no children, stops + break; + } + else if (left_id <= 0) { + // if no left child, traverses right node + node_id = right_id; + } + else if (right_id <= 0) { + // if no right child, traverses left node + node_id = left_id; + } + else { + // if both left and right children, finds the direction for this sample + // first get the reference from the node + real split_value = split_feature_value_data[node_id-1]; + long split_id = split_feature_id_data[node_id-1]-1; + + // then gets the value of the sample + real node_value = local_input_data[split_id]; + // and branchs + if (node_value < split_value) + node_id = left_id; + else + node_id = right_id; + } + } + } + } + + // now that we know which nodes were traverse for each sample, we can create the sparse output + // with 1 entry pair for each sample + THTensor *input_feature = THTensor_(new)(); + THLongTensor *indices = THLongTensor_new(); + + // pushes the return table with 2 children tables + lua_newtable(L); + lua_pushinteger(L, 1); + lua_newtable(L); + lua_pushinteger(L, 2); + lua_newtable(L); + + // for each sample... + for (long i = 0; i < batch_size; i++) { + long j = outputsize[i]; + // selects the tensor lines from the dense output + THLongTensor_select(indices, outputkeys, 0, i); + THTensor_(select)(input_feature, outputvalues, 0, i); + + // narrows the keys to actual number of nodes traversed and saves to the output + lua_pushinteger(L, i+1); + luaT_pushudata(L, THLongTensor_newNarrow(indices, 0, 0, j), "torch.LongTensor"); + lua_settable(L, -5); + + // and narrows the values + lua_pushinteger(L, i+1); + luaT_pushudata(L, THTensor_(newNarrow)(input_feature, 0, 0, j), torch_Tensor); + lua_settable(L, -3); + } + + // pushes the two parts of the output into the output table + lua_settable(L, -5); + lua_settable(L, -3); + + THLongTensor_free(indices); + THTensor_(free)(input_feature); + + return 1; +} + +static const struct luaL_Reg nn_(DFD__) [] = { + {"DFD_computeOutput", nn_(DFD_computeOutput)}, + {NULL, NULL} +}; + +static void nn_(DFD_init)(lua_State *L) +{ + luaT_pushmetatable(L, torch_Tensor); + luaT_registeratname(L, nn_(DFD__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/contrib/lua-torch/decisiontree/generic/GBDT.c b/contrib/lua-torch/decisiontree/generic/GBDT.c new file mode 100644 index 000000000..31f5b025d --- /dev/null +++ b/contrib/lua-torch/decisiontree/generic/GBDT.c @@ -0,0 +1,392 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/GBDT.c" +#else + +#include "GBDT_internal.h" +#include "GBDT_internal.c" + +// note that each one of the functions to find the best split is a subset of the next. +// first we have one that can only evaluate a single feature, using the logic in lua to control the +// features +// then we have one that can go over a shard of faetures, following the feature parallelism +// introduced by the lua logic +// and finally we have one that performans the feature parallelism itself in the special case of +// dense tensors +// these functions are provided for completeness and to test in case the logic is to be changed + +// finds the best split for a given node and feature +static int nn_(gb_findBestFeatureSplit)(lua_State *L) { + THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor"); + const int dataset_index = 2; + if (!lua_isnumber(L, 3)) + return LUA_HANDLE_ERROR_STR(L, "third argument should be an integer"); + long feature_id = lua_tointeger(L, 3); + if (!lua_isnumber(L, 4)) + return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer"); + long minLeafSize = lua_tointeger(L, 4); + // Since minLeafSize == 1 corresponds to each sample in its own leaf, any value below it doesn't + // make sense + if (minLeafSize < 1) + minLeafSize = 1; + THTensor *grad = luaT_checkudata(L, 5, torch_Tensor); + THTensor *hess = luaT_checkudata(L, 6, torch_Tensor); + + if (!THLongTensor_isContiguous(exampleIds)) + return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous"); + if (!THTensor_(isContiguous)(grad)) + return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous"); + if (!THTensor_(isContiguous)(hess)) + return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous"); + + // initializes the static data + nn_(GBInitialization) initialization_data; + nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index); + + // initializes the dynamic data + GBRunData run_data; + gb_create_run_data(&run_data, minLeafSize); + + // finds the best state possible for the split + nn_(GBBestState) bs; + nn_(gb_find_best_feature_split)(L, &initialization_data, &bs, feature_id, &run_data); + + lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index); + + // fills the table we the best split found and the lua logic above will do everything else + // if no state was found, returns nil + if (bs.valid_state == 0) { + lua_pop(L, 1); + lua_pushnil(L); + } + else { + nn_(gb_internal_split_info)(L, &bs, initialization_data.splitInfo_index); + } + + gb_destroy_run_data(&run_data); + + return 1; +} + +// finds the best split for a given node and shard of features +// this is more efficient than calling the previous one multiple times +static int nn_(gb_findBestSplit)(lua_State *L) { + THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor"); + const int dataset_index = 2; + THLongTensor *feature_ids = luaT_checkudata(L, 3, "torch.LongTensor"); + if (!lua_isnumber(L, 4)) + return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer"); + long minLeafSize = lua_tointeger(L, 4); + // Since minLeafSize == 1 corresponds to each sample in its own leaf, any value below it doesn't + // make sense + if (minLeafSize < 1) + minLeafSize = 1; + if (!lua_isnumber(L, 5)) + return LUA_HANDLE_ERROR_STR(L, "fifth argument should be an integer"); + long shardId = lua_tointeger(L, 5); + if (!lua_isnumber(L, 6)) + return LUA_HANDLE_ERROR_STR(L, "sixth argument should be an integer"); + long nShard = lua_tointeger(L, 6); + THTensor *grad = luaT_checkudata(L, 7, torch_Tensor); + THTensor *hess = luaT_checkudata(L, 8, torch_Tensor); + + if (!THLongTensor_isContiguous(exampleIds)) + return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous"); + if (!THTensor_(isContiguous)(grad)) + return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous"); + if (!THTensor_(isContiguous)(hess)) + return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous"); + + // initializes the static data + nn_(GBInitialization) initialization_data; + nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index); + + // initializes the dynamic data + GBRunData run_data; + gb_create_run_data(&run_data, minLeafSize); + + // initializes to evaluate all the features in this shard + nn_(GBBestState) global_bs; + global_bs.valid_state = 0; + long n_features = THLongTensor_size(feature_ids, 0); + if (!THLongTensor_isContiguous(feature_ids)) + return LUA_HANDLE_ERROR_STR(L, "feature_ids must be contiguous"); + long *feature_ids_data = THLongTensor_data(feature_ids); + + // for every feature + for (long i = 0; i < n_features; i++) { + long feature_id = feature_ids_data[i]; + // if we are responsible for it + if (nShard <= 1 || (feature_id % nShard) + 1 == shardId) { + // finds the best state possible for the split + nn_(GBBestState) bs; + nn_(gb_find_best_feature_split)(L, &initialization_data, &bs, feature_id, &run_data); + + // if it's valid and better than one we found before, saves it + if (bs.valid_state) { + if (global_bs.valid_state == 0 || bs.gain < global_bs.gain) { + global_bs = bs; + } + } + } + } + + lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index); + + // fills the table we the best split found and the lua logic above will do everything else + // if no state was found, returns nil + if (global_bs.valid_state == 0) { + lua_pop(L, 1); + lua_pushnil(L); + } + else { + nn_(gb_internal_split_info)(L, &global_bs, initialization_data.splitInfo_index); + } + + gb_destroy_run_data(&run_data); + + return 1; +} + +// all the info we have to apss to the slave threads so that they can do their jobs +// note that we do not pass the lua state since it isn't required. we perform direct C parallelism +// instead of using lua's parallelism like with the previous version +typedef struct { + nn_(GBInitialization) *initialization_data; + GBRunData *run_data; + long *index; + nn_(GBBestState) *global_bs; + long n_features; + long *feature_ids_data; + pthread_mutex_t *mutex; + THLongTensor *exampleIds; + THTensor *input; + THLongTensor **sorted_ids_per_feature; +} nn_(ThreadInfo); + +// loops over all the features in parallel and finds the best global split +static void* nn_(thread_worker)(void *arg) { + nn_(ThreadInfo) *info = (nn_(ThreadInfo) *)arg; + + while (1) { + pthread_mutex_lock(info->mutex); + long index = (*info->index); + (*info->index)++; + pthread_mutex_unlock(info->mutex); + + if (index >= info->n_features) + break; + + // performs part of steps (1) and (2) of gb_find_best_feature_split without having to access the + // lua state using pre-loaded data + long feature_id = info->feature_ids_data[index]; + THLongTensor *exampleIdsWithFeature_ret = info->exampleIds; + THLongTensor *featureExampleIds = info->sorted_ids_per_feature[index]; + nn_(GBInitialization) *initialization_data = info->initialization_data; + GBRunData *run_data = info->run_data; + + // performs steps (3) and (4) of gb_find_best_feature_split since (1) and (2) were already + // performed before + nn_(GBBestState) bs; + nn_(gb_internal_create)(initialization_data->grad, initialization_data->hess, + exampleIdsWithFeature_ret, &bs.state); + nn_(gb_internal_get_best_split_special)(&bs, featureExampleIds, run_data->exampleMap, + info->input, run_data->minLeafSize, feature_id); + + // saves to the global state if it's better + if (bs.valid_state) { + pthread_mutex_lock(info->mutex); + if (info->global_bs->valid_state == 0 || bs.gain < info->global_bs->gain) { + (*info->global_bs) = bs; + } + pthread_mutex_unlock(info->mutex); + } + } + + return NULL; +} + +// finds the global best split by doing feature parallelism directly in C +static int nn_(gb_findBestSplitFP)(lua_State *L) { + THLongTensor *exampleIds = luaT_checkudata(L, 1, "torch.LongTensor"); + const int dataset_index = 2; + THLongTensor *feature_ids = luaT_checkudata(L, 3, "torch.LongTensor"); + if (!lua_isnumber(L, 4)) + return LUA_HANDLE_ERROR_STR(L, "fourth argument should be an integer"); + long minLeafSize = lua_tointeger(L, 4); + THTensor *grad = luaT_checkudata(L, 5, torch_Tensor); + THTensor *hess = luaT_checkudata(L, 6, torch_Tensor); + if (!lua_isnumber(L, 7)) + return LUA_HANDLE_ERROR_STR(L, "seventh argument should be an integer"); + long nThread = lua_tointeger(L, 7); + + if (!THLongTensor_isContiguous(exampleIds)) + return LUA_HANDLE_ERROR_STR(L, "exampleIds has to be contiguous"); + if (!THTensor_(isContiguous)(grad)) + return LUA_HANDLE_ERROR_STR(L, "grad has to be contiguous"); + if (!THTensor_(isContiguous)(hess)) + return LUA_HANDLE_ERROR_STR(L, "hessian has to be contiguous"); + + pthread_mutex_t mutex; + pthread_mutex_init(&mutex, NULL); + + // initializes the static data + nn_(GBInitialization) initialization_data; + nn_(gb_initialize)(L, &initialization_data, exampleIds, grad, hess, dataset_index); + + // initializes the dynamic data + GBRunData run_data; + gb_create_run_data(&run_data, minLeafSize); + + // initializes to evaluate all the features + nn_(GBBestState) global_bs; + global_bs.valid_state = 0; + long n_features = THLongTensor_size(feature_ids, 0); + if (!THLongTensor_isContiguous(feature_ids)) + return LUA_HANDLE_ERROR_STR(L, "feature_ids must be contiguous"); + long *feature_ids_data = THLongTensor_data(feature_ids); + + THTensor *input = luaT_checkudata(L, initialization_data.input_index, torch_Tensor); + + // performs step (1) of gb_find_best_feature_split so that we don't have to pass the lua state + THLongTensor *sorted_ids_per_feature[n_features]; + for (long i = 0; i < n_features; i++) { + long feature_id = feature_ids_data[i]; + lua_pushvalue(L, initialization_data.getSortedFeature_index); + lua_pushvalue(L, initialization_data.dataset_index); + lua_pushinteger(L, feature_id); + lua_call(L, 2, 1); + + THLongTensor *featureExampleIds = luaT_checkudata(L, -1, "torch.LongTensor"); + sorted_ids_per_feature[i] = featureExampleIds; + } + + // performas step (2) of gb_find_best_feature_split since it's the same for all features when the + // data is dense + long exampleIds_size = THLongTensor_size(initialization_data.exampleIds, 0); + long *exampleIds_data = THLongTensor_data(initialization_data.exampleIds); + + int ret; + kh_resize(long, run_data.exampleMap, exampleIds_size*8); + for (long i = 0; i < exampleIds_size; i++) + kh_put(long, run_data.exampleMap, exampleIds_data[i], &ret); + + // saves the info for the threads + long index = 0; + nn_(ThreadInfo) info; + info.initialization_data = &initialization_data; + info.run_data = &run_data; + info.index = &index; + info.global_bs = &global_bs; + info.n_features = n_features; + info.feature_ids_data = feature_ids_data; + info.mutex = &mutex; + info.exampleIds = exampleIds; + info.input = input; + info.sorted_ids_per_feature = sorted_ids_per_feature; + + pthread_t threads[nThread]; + + // let the threads run like crazy over the features to find the minimum + for (long i = 0; i < nThread; i++) { + int ret = pthread_create(&threads[i], NULL, nn_(thread_worker), &info); + if (ret) + return LUA_HANDLE_ERROR_STR(L, "falied to create thread"); + } + + for (long i = 0; i < nThread; i++) { + int ret = pthread_join(threads[i], NULL); + if (ret) + return LUA_HANDLE_ERROR_STR(L, "failed to join thread"); + } + + lua_pop(L, lua_gettop(L) - initialization_data.splitInfo_index); + + // fills the table we the best split found and the lua logic above will do everything else + // if no state was found, returns nil + if (global_bs.valid_state == 0) { + lua_pop(L, 1); + lua_pushnil(L); + } + else { + nn_(gb_internal_split_info)(L, &global_bs, initialization_data.splitInfo_index); + } + + gb_destroy_run_data(&run_data); + pthread_mutex_destroy(&mutex); + + return 1; +} + +// performs an efficient branch of the current examples based on a split info provided +static int nn_(gb_branch)(lua_State *L) { + if (!lua_istable(L, 1)) + return LUA_HANDLE_ERROR_STR(L, "first argument must be a table"); + THTensor *input = luaT_checkudata(L, 2, torch_Tensor); + THLongTensor *exampleIds = luaT_checkudata(L, 3, "torch.LongTensor"); + + // gets direct access to the dataset + long n_exampleIds = THLongTensor_size(exampleIds, 0); + long *exampleIds_data = THLongTensor_data(exampleIds); + long n_features = THTensor_(size)(input, 1); + real *input_data = THTensor_(data)(input); + + // creates the tensors to be returned + luaT_pushudata(L, THLongTensor_new(), "torch.LongTensor"); + luaT_pushudata(L, THLongTensor_new(), "torch.LongTensor"); + THLongTensor *leftExampleIds = luaT_checkudata(L, 4, "torch.LongTensor"); + THLongTensor *rightExampleIds = luaT_checkudata(L, 5, "torch.LongTensor"); + THLongTensor_resize1d(leftExampleIds, n_exampleIds); + + // gets direct access to the examples + THLongTensor *splitExampleIds = leftExampleIds; + long *splitExampleIds_data = THLongTensor_data(splitExampleIds); + + // gets the split info + lua_pushstring(L, "splitId"); + lua_rawget(L, 1); + const long splitId = lua_tointeger(L, -1); + lua_pushstring(L, "splitValue"); + lua_rawget(L, 1); + const real splitValue = lua_tonumber(L, -1); + lua_pop(L, 2); + + long leftIdx = 0, rightIdx = 0; + + // goes over all the samples dividing them into the two sides + for (long i = 0; i < n_exampleIds; i++) { + long exampleId = exampleIds_data[i]; + real val = input_data[(exampleId-1) * n_features + (splitId - 1)]; + if (val <= splitValue) { + leftIdx++; + splitExampleIds_data[leftIdx-1] = exampleId; + } + else { + rightIdx++; + splitExampleIds_data[n_exampleIds - rightIdx + 1 - 1] = exampleId; + } + } + + // once done, the resulting tensors are just splits of the sample base. this is more efficient + // than having 2 tensors since we didn't know where the split would happen (how much to each + // side), but we knew that the sum would be constant + THLongTensor_narrow(rightExampleIds, splitExampleIds, 0, n_exampleIds-rightIdx+1-1, rightIdx); + THLongTensor_narrow(leftExampleIds, splitExampleIds, 0, 0, leftIdx); + return 2; +} + +static const struct luaL_Reg nn_(GBDT__) [] = { + {"GBDT_findBestFeatureSplit", nn_(gb_findBestFeatureSplit)}, + {"GBDT_findBestSplit", nn_(gb_findBestSplit)}, + {"GBDT_findBestSplitFP", nn_(gb_findBestSplitFP)}, + {"GBDT_branch", nn_(gb_branch)}, + {NULL, NULL} +}; + +static void nn_(GBDT_init)(lua_State *L) +{ + luaT_pushmetatable(L, torch_Tensor); + luaT_registeratname(L, nn_(GBDT__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/contrib/lua-torch/decisiontree/generic/GBDT_internal.c b/contrib/lua-torch/decisiontree/generic/GBDT_internal.c new file mode 100644 index 000000000..739aabf25 --- /dev/null +++ b/contrib/lua-torch/decisiontree/generic/GBDT_internal.c @@ -0,0 +1,312 @@ +// initializes the optimization structure based on the arguments provided, either filling directly +// or making calls to lua to load some kind of data +static void nn_(gb_initialize)(lua_State *L, nn_(GBInitialization) *initialization_data, + THLongTensor *exampleIds, THTensor *grad, THTensor *hess, int dataset_index) { + initialization_data->dataset_index = dataset_index; + initialization_data->exampleIds = exampleIds; + initialization_data->grad = grad; + initialization_data->hess = hess; + + lua_newtable(L); + initialization_data->splitInfo_index = lua_gettop(L); + + lua_pushstring(L, "input"); + lua_gettable(L, dataset_index); + initialization_data->input_index = lua_gettop(L); + + lua_pushstring(L, "getSortedFeature"); + lua_gettable(L, dataset_index); + initialization_data->getSortedFeature_index = lua_gettop(L); +} + +// initializes a state that will be passed to the optimizer +static void nn_(gb_internal_create)(THTensor *grad, THTensor *hessian, + THLongTensor *exampleIds, nn_(GBState)* s) { + long *exampleIds_data = THLongTensor_data(exampleIds); + long n_examples = THLongTensor_size(exampleIds, 0); + accreal leftGradientSum = 0; + accreal leftHessianSum = 0; + + real *grad_data = THTensor_(data)(grad); + real *hessian_data = THTensor_(data)(hessian); + + // only sums the relevant gradients and hessians + for (long i = 0; i < n_examples; i++) { + long exampleId = exampleIds_data[i]-1; + leftGradientSum += grad_data[exampleId]; + leftHessianSum += hessian_data[exampleId]; + } + + // we move data from the left branch to the right branch + s->rightGradientSum = 0; + s->rightHessianSum = 1; + s->nExampleInRightBranch = 0; + s->leftGradientSum = leftGradientSum; + s->leftHessianSum = leftHessianSum + 1; + s->nExampleInLeftBranch = n_examples; + + // stores the loss in parent for efficiency + real lossInParent = computeGradientBoostLoss(s->leftGradientSum + s->rightGradientSum, + s->leftHessianSum + s->rightHessianSum); + s->lossInParent = lossInParent; + + // caches the direct pointers to the data for efficiency + s->grad_data = grad_data; + s->hessian_data = hessian_data; +} + +// computes the gain obtained by performing the split +static real nn_(computeSplitGain)(nn_(GBState) *s) { + real lossInLeftBranch = computeGradientBoostLoss(s->leftGradientSum, s->leftHessianSum); + real lossInRightBranch = computeGradientBoostLoss(s->rightGradientSum, s->rightHessianSum); + return lossInLeftBranch + lossInRightBranch - s->lossInParent; +} + +// uses the state information to build the table required by the lua library about the best split +static void nn_(gb_internal_split_info)(lua_State *L, nn_(GBBestState) *bs, int res) { + long feature_id = bs->feature_id; + real feature_value = bs->feature_value; + real gain = bs->gain; + nn_(GBState) *s = &bs->state; + lua_pushstring(L, "splitGain"); + lua_pushnumber(L, gain); + lua_rawset(L, res); + lua_pushstring(L, "splitId"); + lua_pushinteger(L, feature_id); + lua_rawset(L, res); + lua_pushstring(L, "splitValue"); + lua_pushnumber(L, feature_value); + lua_rawset(L, res); + lua_pushstring(L, "leftChildSize"); + lua_pushinteger(L, s->nExampleInLeftBranch); + lua_rawset(L, res); + lua_pushstring(L, "rightChildSize"); + lua_pushinteger(L, s->nExampleInRightBranch); + lua_rawset(L, res); + lua_pushstring(L, "leftGradient"); + lua_pushnumber(L, s->leftGradientSum); + lua_rawset(L, res); + lua_pushstring(L, "rightGradient"); + lua_pushnumber(L, s->rightGradientSum); + lua_rawset(L, res); + lua_pushstring(L, "leftHessian"); + lua_pushnumber(L, s->leftHessianSum); + lua_rawset(L, res); + lua_pushstring(L, "rightHessian"); + lua_pushnumber(L, s->rightHessianSum); + lua_rawset(L, res); +} + +// core of the computation, where we loop over all the relevant samples looking for the best split +// we can find +static void nn_(gb_internal_get_best_split)(lua_State *L, nn_(GBBestState) *bs, + THLongTensor *featureExampleIds, khash_t(long)* exampleMap, int input_table_index, + long minLeafSize, long feature_id) { + nn_(GBState) current_state; + nn_(GBState) best_state; + current_state = bs->state; + + real best_gain = INFINITY; + real best_value = 0; + + // if the data is dense, pre-loads direct access to it + THTensor *input = NULL; + real *input_data = NULL; + long n_features = 0; + if (lua_istable(L, input_table_index)) { + } + else { + input = luaT_checkudata(L, input_table_index, torch_Tensor); + input_data = THTensor_(data)(input); + n_features = THTensor_(size)(input, 1); + } + + long stride = featureExampleIds->stride[0]; + long *featureExampleIds_data = THLongTensor_data(featureExampleIds); + + khiter_t k; + + real previousSplitValue = 0; + // for each example with the given feature and from large to small value... + for (long i = THLongTensor_size(featureExampleIds, 0)-1; i >= 0; i--) { + long exampleId = featureExampleIds_data[i * stride]; + + // checks if the sample is in the list of ones that have to be evaluated by this node + k = kh_get(long, exampleMap, exampleId); + if (k != kh_end(exampleMap)) { + long exampleIdx = exampleId; + + // gets the split value, depending on whether the input is sparse or dense + real splitValue; + if (input_data) { + splitValue = input_data[(exampleId-1) * n_features + feature_id-1]; + } + else { + lua_pushinteger(L, exampleId); + lua_gettable(L, input_table_index); + lua_pushinteger(L, feature_id); + lua_gettable(L, -2); + splitValue = lua_tonumber(L, -1); + lua_pop(L, 2); + } + + // performs one update of the state, moving a sample from the left branch to the right + real gradient = current_state.grad_data[exampleIdx-1]; + real hessian = current_state.hessian_data[exampleIdx-1]; + current_state.leftGradientSum -= gradient; + current_state.rightGradientSum += gradient; + current_state.leftHessianSum -= hessian; + current_state.rightHessianSum += hessian; + current_state.nExampleInLeftBranch--; + current_state.nExampleInRightBranch++; + + // since we remove from the left, once this becomes true, it stays true forever + // hence we stop the loop + if (current_state.nExampleInLeftBranch < minLeafSize) + break; + + if (current_state.nExampleInRightBranch >= minLeafSize) { + // if the values are equal between the steps, it doesn't make sense to evaluate the score + // since we won't be able to separate the two + if (previousSplitValue != splitValue) { + // computes the gain **without including the parent** since it doesn't change as we move + // examples between branches + real lossInLeftBranch = computeGradientBoostLoss(current_state.leftGradientSum, current_state.leftHessianSum); + real lossInRightBranch = computeGradientBoostLoss(current_state.rightGradientSum, current_state.rightHessianSum); + real current_gain = lossInLeftBranch + lossInRightBranch; + if (current_gain < best_gain) { + best_gain = current_gain; + best_value = splitValue; + best_state = current_state; + } + } + } + previousSplitValue = splitValue; + } + } + + // if there is a valid gain, then marks the state as valid and fills the meta-info + if (!isfinite(best_gain)) { + bs->valid_state = 0; + } + else { + bs->valid_state = 1; + bs->state = best_state; + bs->feature_id = feature_id; + bs->gain = nn_(computeSplitGain)(&bs->state); + bs->feature_value = best_value; + } +} + +// exactly like the previous version, but direct access to the data for efficiency. it also doesn't +// rely on the lua state in the particular case of dense data, so we can evaluate this without using +// the lua state +static void nn_(gb_internal_get_best_split_special)(nn_(GBBestState) *bs, + THLongTensor *featureExampleIds, khash_t(long)* exampleMap, THTensor *input, long minLeafSize, + long feature_id) { + nn_(GBState) current_state; + nn_(GBState) best_state; + current_state = bs->state; + + real best_gain = INFINITY; + real best_value = 0; + + real *input_data = NULL; + long n_features = 0; + input_data = THTensor_(data)(input); + n_features = THTensor_(size)(input, 1); + + long stride = featureExampleIds->stride[0]; + long *featureExampleIds_data = THLongTensor_data(featureExampleIds); + + khiter_t k; + + real previousSplitValue = 0; + for (long i = THLongTensor_size(featureExampleIds, 0)-1; i >= 0; i--) { + long exampleId = featureExampleIds_data[i * stride]; + + k = kh_get(long, exampleMap, exampleId); + if (k != kh_end(exampleMap)) { + long exampleIdx = exampleId; + + // THIS is the main part that changes. seems crazy to have a special case just for this, but + // since there are a **lot** of samples to be evaluated, the "if" in the previous case can + // become expensive + real splitValue; + splitValue = input_data[(exampleId-1) * n_features + feature_id-1]; + + real gradient = current_state.grad_data[exampleIdx-1]; + real hessian = current_state.hessian_data[exampleIdx-1]; + current_state.leftGradientSum -= gradient; + current_state.rightGradientSum += gradient; + current_state.leftHessianSum -= hessian; + current_state.rightHessianSum += hessian; + current_state.nExampleInLeftBranch--; + current_state.nExampleInRightBranch++; + + // since we remove from the left, once this becomes true, it stays true forever + // hence we stop the loop + if (current_state.nExampleInLeftBranch < minLeafSize) + break; + + // This will always fail in the first pass since minLeafSize >= 1 and nExampleInRightBranch + // starts at 0 + if (current_state.nExampleInRightBranch >= minLeafSize) { + if (previousSplitValue != splitValue) { + real lossInLeftBranch = computeGradientBoostLoss(current_state.leftGradientSum, current_state.leftHessianSum); + real lossInRightBranch = computeGradientBoostLoss(current_state.rightGradientSum, current_state.rightHessianSum); + real current_gain = lossInLeftBranch + lossInRightBranch; + if (current_gain < best_gain) { + best_gain = current_gain; + best_value = splitValue; + best_state = current_state; + } + } + } + previousSplitValue = splitValue; + } + } + + if (!isfinite(best_gain)) { + bs->valid_state = 0; + } + else { + bs->valid_state = 1; + bs->state = best_state; + bs->feature_id = feature_id; + bs->gain = nn_(computeSplitGain)(&bs->state); + bs->feature_value = best_value; + } +} + +// core of the computation to find the split for a given feature and is divided in 4 steps +static void nn_(gb_find_best_feature_split)(lua_State *L, + nn_(GBInitialization) *initialization_data, nn_(GBBestState) *bs, long feature_id, + GBRunData *run_data) { + + // 1) loads the examples in the dataset ordered by their feature value + lua_pushvalue(L, initialization_data->getSortedFeature_index); + lua_pushvalue(L, initialization_data->dataset_index); + lua_pushinteger(L, feature_id); + lua_call(L, 2, 1); + + THLongTensor *featureExampleIds = luaT_checkudata(L, -1, "torch.LongTensor"); + + // 2) processes the data to find the intersection between the examples in the dataset and the + // examples the current node has to evaluate + THLongTensor *exampleIdsWithFeature_ret = gb_internal_prepare(L, initialization_data->exampleIds, + run_data->exampleIdsWithFeature_cache, initialization_data->input_index, feature_id, + run_data->exampleMap); + if (!exampleIdsWithFeature_ret) { + bs->valid_state = 0; + return; + } + + // 3) creates a new state to be used by the optimizer + nn_(gb_internal_create)(initialization_data->grad, initialization_data->hess, + exampleIdsWithFeature_ret, &bs->state); + + // 4) optimize away! + nn_(gb_internal_get_best_split)(L, bs, featureExampleIds, run_data->exampleMap, + initialization_data->input_index, run_data->minLeafSize, feature_id); +} diff --git a/contrib/lua-torch/decisiontree/generic/GBDT_internal.h b/contrib/lua-torch/decisiontree/generic/GBDT_internal.h new file mode 100644 index 000000000..7119365cf --- /dev/null +++ b/contrib/lua-torch/decisiontree/generic/GBDT_internal.h @@ -0,0 +1,34 @@ +// representation of a state used while searching for the best split +typedef struct { + real leftGradientSum, rightGradientSum; + real leftHessianSum, rightHessianSum; + real lossInParent; + long nExampleInLeftBranch, nExampleInRightBranch; + real *grad_data, *hessian_data; +} nn_(GBState); + +// representation for the best state found for a given feature +typedef struct { + nn_(GBState) state; + real gain; + long feature_id; + real feature_value; + int valid_state; +} nn_(GBBestState); + +// full data that must be initialized before calling the optimizer +typedef struct { + // *_index represent positions on the lua stack + int dataset_index; + int splitInfo_index; + int input_index; + // position of the dataset's function to return the samples ordered for a given feature + int getSortedFeature_index; + + // samples that this node has to evaluate + THLongTensor *exampleIds; + + // cached gradient and hessian for all data + THTensor *grad; + THTensor *hess; +} nn_(GBInitialization); diff --git a/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c b/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c new file mode 100644 index 000000000..f2ea1ef09 --- /dev/null +++ b/contrib/lua-torch/decisiontree/generic/LogitBoostCriterion.c @@ -0,0 +1,90 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/LogitBoostCriterion.c" +#else + +#define EPS 1e-12 + +static int nn_(LogitBoostCriterion_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 1, torch_Tensor); + THTensor *target = luaT_checkudata(L, 2, torch_Tensor); + THTensor *output = luaT_checkudata(L, 3, torch_Tensor); + int sizeAverage = lua_toboolean(L, 4); + + if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) { + luaL_error(L, "inconsistent input and target size"); + } + THTensor_(resize1d)(output, 1); + + real sum = 0; + + TH_TENSOR_APPLY2(real, input, real, target, + real x = *input_data; + real y = *target_data; + // math.log(1 + math.exp(target[i] <= 0 and input[i] or -input[i])) + sum += log(1 + exp(y <= 0 ? x : -x)); + ); + + if (sizeAverage) + sum /= THTensor_(nElement)(input); + + THTensor_(set1d)(output, 0, sum); + return 0; +} + +static int nn_(LogitBoostCriterion_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 1, torch_Tensor); + THTensor *target = luaT_checkudata(L, 2, torch_Tensor); + THTensor *gradInput = luaT_checkudata(L, 3, torch_Tensor); + + if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) { + luaL_error(L, "inconsistent input and target size"); + } + THTensor_(resizeAs)(gradInput, input); + + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + real x = *input_data; + real y = *target_data; + real p = (x >= 0) ? (1 / (1 + exp(-x))) : (1 - 1 / (1 + exp(x))); + *gradInput_data = (y <= 0) ? p : (p - 1); + ); + + return 0; +} + +static int nn_(LogitBoostCriterion_updateHessInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 1, torch_Tensor); + THTensor *target = luaT_checkudata(L, 2, torch_Tensor); + THTensor *hessInput = luaT_checkudata(L, 3, torch_Tensor); + + if (THTensor_(nElement)(input) != THTensor_(nElement)(target)) { + luaL_error(L, "inconsistent input and target size"); + } + THTensor_(resizeAs)(hessInput, input); + + TH_TENSOR_APPLY3(real, hessInput, real, input, real, target, + real x = *input_data; + real p = (x >= 0) ? (1 / (1 + exp(-x))) : (1 - 1 / (1 + exp(x))); + *hessInput_data = p * (1.0 - p); + ); + + return 0; +} + +static const struct luaL_Reg nn_(LogitBoostCriterion__) [] = { + {"LogitBoostCriterion_updateOutput", nn_(LogitBoostCriterion_updateOutput)}, + {"LogitBoostCriterion_updateGradInput", nn_(LogitBoostCriterion_updateGradInput)}, + {"LogitBoostCriterion_updateHessInput", nn_(LogitBoostCriterion_updateHessInput)}, + {NULL, NULL} +}; + +static void nn_(LogitBoostCriterion_init)(lua_State *L) +{ + luaT_pushmetatable(L, torch_Tensor); + luaT_registeratname(L, nn_(LogitBoostCriterion__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/contrib/lua-torch/decisiontree/generic/S2D.c b/contrib/lua-torch/decisiontree/generic/S2D.c new file mode 100644 index 000000000..2392ee7c8 --- /dev/null +++ b/contrib/lua-torch/decisiontree/generic/S2D.c @@ -0,0 +1,90 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/S2D.c" +#else + +static int nn_(S2D_computeOutput)(lua_State *L) { + THTensor *output = luaT_checkudata(L, 1, torch_Tensor); + const int keys_index = 2; + const int values_index = 3; + const int masks_index = 4; + + if (!lua_istable(L, keys_index)) + return LUA_HANDLE_ERROR_STR(L, "expeced position 2 to be a table"); + if (!lua_istable(L, values_index)) + return LUA_HANDLE_ERROR_STR(L, "expeced position 3 to be a table"); + if (!lua_istable(L, masks_index)) + return LUA_HANDLE_ERROR_STR(L, "expeced position 4 to be a table"); + + + THLongTensor *features = luaT_checkudata(L, 5, "torch.LongTensor"); + + const int original_top = lua_gettop(L); + + long outputsize = THLongTensor_size(features, 0); + long batch_size = lua_objlen(L, keys_index); + + // initializes output + THTensor_(resize2d)(output, batch_size, outputsize); + THTensor_(zero)(output); + real *output_data = THTensor_(data)(output); + + // iterates over samples + lua_pushnil(L); + const int local_top = lua_gettop(L); + while (lua_next(L, keys_index) != 0) { + // gets data corresponding to the current sample + long i = lua_tointeger(L, -2)-1; + real *current_output_data = &output_data[i * outputsize]; + THLongTensor *keys = luaT_checkudata(L, -1, "torch.LongTensor"); + lua_rawgeti(L, values_index, i+1); + THTensor *values = luaT_checkudata(L, -1, torch_Tensor); + lua_rawgeti(L, masks_index, i+1); + THByteTensor *mask = luaT_checkudata(L, -1, "torch.ByteTensor"); + + long n_keys = THLongTensor_size(keys, 0); + long n_values = THTensor_(size)(values, 0); + + // quick safety check + if (n_keys != n_values) + return LUA_HANDLE_ERROR_STR(L, "keys and values have to have the same size"); + + // gets the direct memory pointers + long *keys_data = THLongTensor_data(keys); + real *values_data = THTensor_(data)(values); + unsigned char *mask_data = THByteTensor_data(mask); + + // for each value in the sparse input... + for (long j = 0; j < n_keys; j++) { + // loads the value and key + real current_value = values_data[j]; + long current_key = keys_data[j]; + unsigned char current_mask = mask_data[j]; + + // if the feature is present in the map + if (current_mask) + // saves in the given position + current_output_data[current_key-1] = current_value; + } + // cleans up the trash we create by iterating over keys to avoid it from overflowing + lua_pop(L, lua_gettop(L) - local_top); + } + + // cleans up the trash we added to the stack + lua_pop(L, lua_gettop(L) - original_top); + + return 0; +} + +static const struct luaL_Reg nn_(S2D__) [] = { + {"S2D_computeOutput", nn_(S2D_computeOutput)}, + {NULL, NULL} +}; + +static void nn_(S2D_init)(lua_State *L) +{ + luaT_pushmetatable(L, torch_Tensor); + luaT_registeratname(L, nn_(S2D__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/contrib/lua-torch/decisiontree/hash_map.c b/contrib/lua-torch/decisiontree/hash_map.c new file mode 100644 index 000000000..2c679e207 --- /dev/null +++ b/contrib/lua-torch/decisiontree/hash_map.c @@ -0,0 +1,445 @@ +#include "utils.h" +#include "hash_map.h" +#include "internal_hash_map.h" +#include <pthread.h> + +hash_map_t hash_map_init(void) { + return kh_init(long); +} + +void hash_map_destroy(hash_map_t h_) { + internal_hash_map_t h = (internal_hash_map_t) h_; + kh_destroy(long, h); +} + +void hash_map_clear(hash_map_t h_) { + internal_hash_map_t h = (internal_hash_map_t) h_; + kh_clear(long, h); +} + +int hash_map_put(hash_map_t h_, long key, long val) { + internal_hash_map_t h = (internal_hash_map_t) h_; + int ret; + khiter_t k = kh_put(long, h, key, &ret); + ret = (ret >= 0); + if (ret) + kh_value(h, k) = val; + return ret; +} + +int hash_map_put_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_) { + long *keys = THLongTensor_data(keys_); + long *vals = THLongTensor_data(vals_); + long size = get_tensor_size(keys_, Long); + for (long i = 0; i < size; i++) + if (!hash_map_put(h_, keys[i], vals[i])) + return 0; + return 1; +} + +int hash_map_fill(hash_map_t h_, long key, long *counter) { + internal_hash_map_t h = (internal_hash_map_t) h_; + khiter_t k = kh_get(long, h, key); + if (k == kh_end(h)) + return hash_map_put(h_, key, ++(*counter)); + return 1; +} + +int hash_map_fill_tensor(hash_map_t h_, THLongTensor *keys_, long *counter) { + long *keys = THLongTensor_data(keys_); + long size = get_tensor_size(keys_, Long); + for (long i = 0; i < size; i++) + if (!hash_map_fill(h_, keys[i], counter)) + return 0; + return 1; +} + +int hash_map_get(hash_map_t h_, long key, long* val) { + internal_hash_map_t h = (internal_hash_map_t) h_; + khiter_t k = kh_get(long, h, key); + if (k == kh_end(h)) + return 0; + *val = kh_value(h, k); + return 1; +} + +void hash_map_get_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_, THByteTensor *mask_) { + long *keys = THLongTensor_data(keys_); + long *vals = THLongTensor_data(vals_);; + unsigned char *mask = THByteTensor_data(mask_); + long size = get_tensor_size(keys_, Long); + for (long i = 0; i < size; i++) + mask[i] = hash_map_get(h_, keys[i], &vals[i]); +} + +void hash_map_del(hash_map_t h_, long key) { + internal_hash_map_t h = (internal_hash_map_t) h_; + khiter_t k = kh_get(long, h, key); + if (k != kh_end(h)) + kh_del(long, h, k); +} + +void hash_map_del_tensor(hash_map_t h_, THLongTensor *keys_) { + long *keys = THLongTensor_data(keys_); + long size = get_tensor_size(keys_, Long); + for (long i = 0; i < size; i++) + hash_map_del(h_, keys[i]); +} + +size_t hash_map_size(hash_map_t h_) { + internal_hash_map_t h = (internal_hash_map_t) h_; + return kh_size(h); +} + +void hash_map_to_tensor(hash_map_t h_, THLongTensor *keys_, THLongTensor *vals_) { + internal_hash_map_t h = (internal_hash_map_t) h_; + long *keys = THLongTensor_data(keys_); + long *vals = THLongTensor_data(vals_); + long key, val, i = 0; + kh_foreach(h, key, val, { + keys[i] = key; + vals[i] = val; + i++; + }); +} + +static void autolock(hash_map_lua_t *h) { + if (h->autolock) { + pthread_mutex_lock(&h->mutex); + } +} + +static void autounlock(hash_map_lua_t *h) { + if (h->autolock) { + pthread_mutex_unlock(&h->mutex); + } +} + +int hash_map_autolock_on_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + h->autolock = 1; + return 0; +} + +int hash_map_autolock_off_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + h->autolock = 0; + return 0; +} + +int hash_map_init_lua(lua_State *L) { + hash_map_lua_t **hp = (hash_map_lua_t**)lua_newuserdata(L, sizeof(hash_map_lua_t*)); + *hp = (hash_map_lua_t*)malloc(sizeof(hash_map_lua_t)); + hash_map_lua_t *h = *hp; + h->refcount = 1; + h->counter = 0; + h->autolock = 0; + h->h = hash_map_init(); + + pthread_mutexattr_t mutex_attr; + pthread_mutexattr_init(&mutex_attr); + pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&h->mutex, &mutex_attr); + + luaL_getmetatable(L, "dt.HashMap"); + lua_setmetatable(L, -2); + return 1; +} + +int hash_map_gc_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + if (THAtomicDecrementRef(&h->refcount)) { + pthread_mutex_destroy(&h->mutex); + hash_map_destroy(h->h); + free(h); + } + return 0; +} + +int hash_map_retain_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + THAtomicIncrementRef(&h->refcount); + return 0; +} + +int hash_map_metatablename_lua(lua_State *L) { + lua_pushstring(L, "dt.HashMap"); + return 1; +} + +int hash_map_clear_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + autolock(h); + hash_map_clear(h->h); + autounlock(h); + return 0; +} + +int hash_map_put_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + int ret; +#if LUA_VERSION_NUM <= 501 +#define lua_isinteger lua_isnumber +#endif + if (lua_isinteger(L, 2)) { + if (!lua_isinteger(L, 3)) + return LUA_HANDLE_ERROR_STR(L, "second parameter is not a number"); + long key = lua_tointeger(L, 2); + long val = lua_tointeger(L, 3); + autolock(h); + ret = hash_map_put(h->h, key, val); + autounlock(h); + } + else { + THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor"); + THLongTensor *vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor"); + check_tensor(L, keys, THLongTensor); + check_tensor(L, vals, THLongTensor); + check_tensors(L, keys, vals); + autolock(h); + ret = hash_map_put_tensor(h->h, keys, vals); + autounlock(h); + } + if (!ret) + return LUA_HANDLE_ERROR_STR(L, "failed to put into hash map"); + return 0; +} + +int hash_map_fill_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + int ret; + if (lua_isinteger(L, 2)) { + long key = lua_tointeger(L, 2); + autolock(h); + ret = hash_map_fill(h->h, key, &h->counter); + autounlock(h); + } + else { + THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor"); + check_tensor(L, keys, THLongTensor); + autolock(h); + ret = hash_map_fill_tensor(h->h, keys, &h->counter); + autounlock(h); + } + if (!ret) + return LUA_HANDLE_ERROR_STR(L, "failed to fill into hash map"); + return 0; +} + +int hash_map_adjust_counter_lua(lua_State *L) { + hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1); + internal_hash_map_t h = (internal_hash_map_t) h_->h; + + long val; + kh_foreach_value(h, val, { + if (val >= h_->counter) + h_->counter = val; + }); + return 0; +} + +int hash_map_set_counter_lua(lua_State *L) { + hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1); + h_->counter = lua_tointeger(L, 2); + return 0; +} + +int hash_map_get_counter_lua(lua_State *L) { + hash_map_lua_t *h_ = *(hash_map_lua_t**)lua_touserdata(L, 1); + lua_pushinteger(L, h_->counter); + return 1; +} + +static int hash_map_get_tensor_lua(lua_State *L, hash_map_lua_t *h, int inplace) { + THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor"); + check_tensor(L, keys, THLongTensor); + THLongTensor *vals = inplace ? keys : NULL; + THByteTensor *mask = NULL; + + int maskIdx = inplace ? 3 : 4; + + if (!inplace) { + if (lua_gettop(L) < 3) { + vals = THLongTensor_new(); + } else { + vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor"); + check_tensor(L, vals, THLongTensor); + } + } + + if (lua_gettop(L) < maskIdx) { + mask = THByteTensor_new(); + } else { + mask = (THByteTensor *)luaT_checkudata(L, maskIdx, "torch.ByteTensor"); + check_tensor(L, mask, THByteTensor); + } + + int n_dim = THLongTensor_nDimension(keys); + THLongStorage *st = THLongStorage_newWithSize1(n_dim); + for (int i = 0; i < n_dim; i++) { + THLongStorage_set(st, i, THLongTensor_size(keys, i)); + } + THByteTensor_resize(mask, st, NULL); + if (!inplace) THLongTensor_resize(vals, st, NULL); + THLongStorage_free(st); + + autolock(h); + hash_map_get_tensor(h->h, keys, vals, mask); + autounlock(h); + + if (!inplace && lua_gettop(L) < 3) + luaT_pushudata(L, vals, "torch.LongTensor"); + if (lua_gettop(L) < maskIdx) + luaT_pushudata(L, mask, "torch.ByteTensor"); + + return 2; +} + +static int hash_map_get_table_lua(lua_State *L, hash_map_lua_t *h, int inplace) { + const int kidx = 2; + const int vidx = inplace ? 2 : 3; + const int midx = inplace ? 3 : 4; + const int narg = lua_gettop(L); + + if (inplace) { + if (narg < 3) { + LUA_HANDLE_ERROR_STR(L, "HashMap.getInplace requires two arguments."); + } + } else { + if (narg < 4) { + LUA_HANDLE_ERROR_STR(L, "HashMap.get requires three arguments."); + } + } + + int count = push_table_contents(L, kidx); + verify_push_table_contents(L, vidx, count); + verify_push_table_contents(L, midx, count); + + THLongTensor *keys; + THLongTensor *vals; + THByteTensor *mask; + for (int i = count - 1; i >= 0; i--) { + int maskIdx = i - count; + int valIdx = maskIdx - count; + int keyIdx = inplace ? valIdx : (valIdx - count); + + keys = (THLongTensor *)luaT_checkudata(L, keyIdx, "torch.LongTensor"); + check_tensor(L, keys, THLongTensor); + if (inplace) { + vals = keys; + } else { + vals = (THLongTensor *)luaT_checkudata(L, valIdx, "torch.LongTensor"); + } + mask = (THByteTensor *)luaT_checkudata(L, maskIdx, "torch.ByteTensor"); + + int n_dim = THLongTensor_nDimension(keys); + THLongStorage *st = THLongStorage_newWithSize1(n_dim); + for (int i = 0; i < n_dim; i++) { + THLongStorage_set(st, i, THLongTensor_size(keys, i)); + } + THByteTensor_resize(mask, st, NULL); + THLongTensor_resize(vals, st, NULL); + THLongStorage_free(st); + + autolock(h); + hash_map_get_tensor(h->h, keys, vals, mask); + autounlock(h); + } + lua_pop(L, (narg - 1) * count); + return 2; +} + +int hash_map_get_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + if (lua_isinteger(L, 2)) { + long key = lua_tointeger(L, 2); + long val; + autolock(h); + int ret = hash_map_get(h->h, key, &val); + autounlock(h); + if (ret) { + lua_pushinteger(L, val); + lua_pushinteger(L, 1); + } + else { + lua_pushinteger(L, 0); + lua_pushinteger(L, 0); + } + } else if (lua_istable(L, 2)) { + return hash_map_get_table_lua(L, h, 0); + } else { + return hash_map_get_tensor_lua(L, h, 0); + } + return 2; +} + +int hash_map_get_inplace_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + if (lua_isinteger(L, 2)) { + LUA_HANDLE_ERROR_STR(L, "HashMap.getInplace does not support integer arguments."); + } else if (lua_istable(L, 2)) { + return hash_map_get_table_lua(L, h, 1); + } else { + return hash_map_get_tensor_lua(L, h, 1); + } + return 2; +} + +int hash_map_del_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + if (lua_isinteger(L, 2)) { + long key = lua_tointeger(L, 2); + autolock(h); + hash_map_del(h->h, key); + autounlock(h); + } + else { + THLongTensor *keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor"); + autolock(h); + hash_map_del_tensor(h->h, keys); + autounlock(h); + } + return 0; +} + +int hash_map_size_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + long size = hash_map_size(h->h); + lua_pushinteger(L, size); + return 1; +} + +int hash_map_to_tensor_lua(lua_State *L) { + hash_map_lua_t *h = *(hash_map_lua_t**)lua_touserdata(L, 1); + THLongTensor *keys, *vals; + + if (lua_gettop(L) < 2) { + keys = THLongTensor_new(); + } + else { + keys = (THLongTensor *)luaT_checkudata(L, 2, "torch.LongTensor"); + check_tensor(L, keys, THLongTensor); + } + + if (lua_gettop(L) < 3) { + vals = THLongTensor_new(); + } + else { + vals = (THLongTensor *)luaT_checkudata(L, 3, "torch.LongTensor"); + check_tensor(L, vals, THLongTensor); + } + + size_t size = hash_map_size(h->h); + THLongTensor_resize1d(keys, size); + THLongTensor_resize1d(vals, size); + + autolock(h); + hash_map_to_tensor(h->h, keys, vals); + autounlock(h); + + if (lua_gettop(L) < 2) + luaT_pushudata(L, keys, "torch.LongTensor"); + if (lua_gettop(L) < 3) + luaT_pushudata(L, vals, "torch.LongTensor"); + return 2; +} diff --git a/contrib/lua-torch/decisiontree/hash_map.h b/contrib/lua-torch/decisiontree/hash_map.h new file mode 100644 index 000000000..5b215e4ca --- /dev/null +++ b/contrib/lua-torch/decisiontree/hash_map.h @@ -0,0 +1,36 @@ +#include "luaT.h" +#include "TH.h" + +typedef void* hash_map_t; + +hash_map_t hash_map_init(void); +void hash_map_destroy(hash_map_t); +void hash_map_clear(hash_map_t); +int hash_map_put(hash_map_t, long key, long val); +int hash_map_put_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_); +int hash_map_fill(hash_map_t, long key, long *counter); +int hash_map_fill_tensor(hash_map_t, THLongTensor *keys_, long *counter); +int hash_map_get(hash_map_t, long key, long *val); +void hash_map_get_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_, THByteTensor *mask_); +void hash_map_del(hash_map_t, long key); +void hash_map_del_tensor(hash_map_t, THLongTensor *keys_); +size_t hash_map_size(hash_map_t); +void hash_map_to_tensor(hash_map_t, THLongTensor *keys_, THLongTensor *vals_); + +int hash_map_autolock_on_lua(lua_State *L); +int hash_map_autolock_off_lua(lua_State *L); +int hash_map_init_lua(lua_State *L); +int hash_map_gc_lua(lua_State *L); +int hash_map_retain_lua(lua_State *L); +int hash_map_metatablename_lua(lua_State *L); +int hash_map_clear_lua(lua_State *L); +int hash_map_put_lua(lua_State *L); +int hash_map_fill_lua(lua_State *L); +int hash_map_adjust_counter_lua(lua_State *L); +int hash_map_set_counter_lua(lua_State *L); +int hash_map_get_counter_lua(lua_State *L); +int hash_map_get_lua(lua_State *L); +int hash_map_get_inplace_lua(lua_State *L); +int hash_map_del_lua(lua_State *L); +int hash_map_size_lua(lua_State *L); +int hash_map_to_tensor_lua(lua_State *L); diff --git a/contrib/lua-torch/decisiontree/init.c b/contrib/lua-torch/decisiontree/init.c new file mode 100644 index 000000000..276241e8e --- /dev/null +++ b/contrib/lua-torch/decisiontree/init.c @@ -0,0 +1,77 @@ +#include "TH.h" +#include "luaT.h" + +#ifdef _OPENMP +#include "omp.h" +#endif + +#include "error.h" +#include "hash_map.h" + +#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME) +#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor) +#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME) + +#include "generic/LogitBoostCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/DFD.c" +#include "THGenerateFloatTypes.h" + +#include "generic/S2D.c" +#include "THGenerateFloatTypes.h" + +#include "generic/CartTree.c" +#include "THGenerateFloatTypes.h" + +#include "GBDT_common.h" +#include "generic/GBDT.c" +#include "THGenerateFloatTypes.h" + +static const struct luaL_Reg decisiontree_hash_map_routines[] = { + {"__gc", hash_map_gc_lua}, + {"retain", hash_map_retain_lua}, + {"metatablename", hash_map_metatablename_lua}, + {"clear", hash_map_clear_lua}, + {"put", hash_map_put_lua}, + {"fill", hash_map_fill_lua}, + {"adjustCounter", hash_map_adjust_counter_lua}, + {"getCounter", hash_map_get_counter_lua}, + {"setCounter", hash_map_set_counter_lua}, + {"get", hash_map_get_lua}, + {"getInplace", hash_map_get_inplace_lua}, + {"del", hash_map_del_lua}, + {"size", hash_map_size_lua}, + {"safe", hash_map_autolock_on_lua}, + {"unsafe", hash_map_autolock_off_lua}, + {"toTensors", hash_map_to_tensor_lua}, + {"new", hash_map_init_lua}, + {NULL, NULL} +}; + +DLL_EXPORT int luaopen_libdecisiontree(lua_State *L) +{ + // HashMap + luaL_newmetatable(L, "dt.HashMap"); + lua_pushstring(L, "__index"); + lua_pushvalue(L, -2); + lua_settable(L, -3); + luaT_setfuncs(L, decisiontree_hash_map_routines, 0); + + nn_FloatLogitBoostCriterion_init(L); + nn_DoubleLogitBoostCriterion_init(L); + + nn_FloatDFD_init(L); + nn_DoubleDFD_init(L); + + nn_FloatS2D_init(L); + nn_DoubleS2D_init(L); + + nn_FloatCT_init(L); + nn_DoubleCT_init(L); + + nn_FloatGBDT_init(L); + nn_DoubleGBDT_init(L); + + return 1; +} diff --git a/contrib/lua-torch/decisiontree/init.lua b/contrib/lua-torch/decisiontree/init.lua new file mode 100644 index 000000000..26f790b60 --- /dev/null +++ b/contrib/lua-torch/decisiontree/init.lua @@ -0,0 +1,70 @@ +require 'paths' +--require 'xlua' +require 'string' +require 'os' +--require 'sys' +require 'nn' + +-- these actually return local variables but we will re-require them +-- when needed. This is just to make sure they are loaded. +require 'moses' + +unpack = unpack or table.unpack + +local dt = require 'decisiontree._env' + +-- c lib: +require "paths" +paths.require 'libdecisiontree' + +dt.HashMap = torch.getmetatable("dt.HashMap").new + +dt.EPSILON = 1e-6 + +-- experimental Tensor-like container +require 'decisiontree.SparseTensor' + +-- functions +require 'decisiontree.math' +require 'decisiontree.utils' + +-- for multi-threading +--require 'decisiontree.WorkPool' + +-- abstract classes +require 'decisiontree.DecisionTree' +require 'decisiontree.DecisionForest' +require 'decisiontree.DecisionForestTrainer' +require 'decisiontree.TreeState' + +-- for CaRTree inference +require 'decisiontree.CartNode' +require 'decisiontree.CartTree' + +-- Criterions (extended with updateHessInput and backward2) +require 'decisiontree.MSECriterion' +require 'decisiontree.LogitBoostCriterion' + +-- Used by both RandomForestTrainer and GradientBoostTrainer +require 'decisiontree.CartTrainer' + +-- Used by CartTrainer +require 'decisiontree.DataSet' + +-- Random Forest Training +require 'decisiontree.RandomForestTrainer' +require 'decisiontree.GiniState' -- TreeState subclass + +-- Gradient Boosted Decision Tree Training +require 'decisiontree.GradientBoostTrainer' +require 'decisiontree.GradientBoostState' -- TreeState subclass + +-- unit tests and benchmarks +--require 'decisiontree.test' +--require 'decisiontree.benchmark' + +-- nn.Module +require 'decisiontree.DFD' +require 'decisiontree.Sparse2Dense' + +return dt diff --git a/contrib/lua-torch/decisiontree/internal_hash_map.h b/contrib/lua-torch/decisiontree/internal_hash_map.h new file mode 100644 index 000000000..bc8c523ef --- /dev/null +++ b/contrib/lua-torch/decisiontree/internal_hash_map.h @@ -0,0 +1,13 @@ +#include "khash.h" +#include <pthread.h> + +KHASH_MAP_INIT_INT64(long, long) +typedef khash_t(long)* internal_hash_map_t; + +typedef struct { + hash_map_t h; + int refcount; + pthread_mutex_t mutex; + int autolock; + long counter; +} hash_map_lua_t; diff --git a/contrib/lua-torch/decisiontree/khash.h b/contrib/lua-torch/decisiontree/khash.h new file mode 100644 index 000000000..20e994063 --- /dev/null +++ b/contrib/lua-torch/decisiontree/khash.h @@ -0,0 +1,627 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2013-05-02 (0.2.8): + + * Use quadratic probing. When the capacity is power of 2, stepping function + i*(i+1)/2 guarantees to traverse each bucket. It is better than double + hashing on cache performance and is more robust than linear probing. + + In theory, double hashing should be more robust than quadratic probing. + However, my implementation is probably not for large hash tables, because + the second hash function is closely tied to the first hash function, + which reduce the effectiveness of double hashing. + + Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php + + 2011-12-29 (0.2.7): + + * Minor code clean up; no actual effect. + + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - http://code.google.com/p/ulib/ + - http://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + + 2009-09-26 (0.2.4): + + * Improve portability + + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + */ + +#define AC_VERSION_KHASH_H "0.2.8" + +#include <stdlib.h> +#include <string.h> +#include <limits.h> + +/* compiler specific configuration */ + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khint64_t; +#else +typedef unsigned long long khint64_t; +#endif + +#ifndef kh_inline +#ifdef _MSC_VER +#define kh_inline __inline +#else +#define kh_inline inline +#endif +#endif /* kh_inline */ + +#ifndef klib_unused +#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__ ((__unused__)) +#else +#define klib_unused +#endif +#endif /* klib_unused */ + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) +#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) +#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) +#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) +#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) +#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) +#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) + +#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#ifndef kcalloc +#define kcalloc(N,Z) calloc(N,Z) +#endif +#ifndef kmalloc +#define kmalloc(Z) malloc(Z) +#endif +#ifndef krealloc +#define krealloc(P,Z) realloc(P,Z) +#endif +#ifndef kfree +#define kfree(P) free(P) +#endif + +static const double __ac_HASH_UPPER = 0.77; + +#define __KHASH_TYPE(name, khkey_t, khval_t) \ + typedef struct kh_##name##_s { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; + +#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ + extern kh_##name##_t *kh_init_##name(void); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ + extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khint_t x); + +#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + kfree((void *)h->keys); kfree(h->flags); \ + kfree((void *)h->vals); \ + kfree(h); \ + } \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khint_t k, i, last, mask, step = 0; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); i = k & mask; \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + (++step)) & mask; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khint32_t *new_flags = 0; \ + khint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_flags) return -1; \ + memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) { kfree(new_flags); return -1; } \ + h->keys = new_keys; \ + if (kh_is_map) { \ + khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + if (!new_vals) { kfree(new_flags); return -1; } \ + h->vals = new_vals; \ + } \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isdel_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t k, i, step = 0; \ + k = __hash_func(key); \ + i = k & new_mask; \ + while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + kfree(h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + return 0; \ + } \ + SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) { \ + if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ + *ret = -1; return h->n_buckets; \ + } \ + } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ + *ret = -1; return h->n_buckets; \ + } \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + { \ + khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + (++step)) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_PROTOTYPES(name, khkey_t, khval_t) + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [khint32_t] + @return The hash value [khint_t] + */ +#define kh_int_hash_func(key) (khint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [khint64_t] + @return The hash value [khint_t] + */ +#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +static kh_inline khint_t __ac_X31_hash_string(const char *s) +{ + khint_t h = (khint_t)*s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; + return h; +} +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a null terminated string [const char*] + @return The hash value [khint_t] + */ +#define kh_str_hash_func(key) __ac_X31_hash_string(key) +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +static kh_inline khint_t __ac_Wang_hash(khint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other convenient macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name() + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: -1 if the operation failed; + 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/*! @function + @abstract Iterate over the entries in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param kvar Variable to which key will be assigned + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the values in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach_value(h, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/* More conenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + +#endif /* __AC_KHASH_H */ diff --git a/contrib/lua-torch/decisiontree/math.lua b/contrib/lua-torch/decisiontree/math.lua new file mode 100644 index 000000000..eb71b31ed --- /dev/null +++ b/contrib/lua-torch/decisiontree/math.lua @@ -0,0 +1,84 @@ +local dt = require "decisiontree._env" + +local PSEUDOCOUNT = 1.0 +local MIN_LOGISTIC = 1E-8 +local MAX_LOGISTIC = 1.0 - MIN_LOGISTIC + +-- Create counts of possible results (last column of each row is the result) +function dt.uniquecounts(counts, inputset, nclass) + counts = counts or inputset.input.new() + nclass = nclass or inputset.target:max() + counts:resize(nclass):zero() + + inputset.target:apply(function(c) counts[c] = counts[c] + 1 end) + return counts +end + +-- Entropy is the sum of -p(x)log(p(x)) across all the different possible results +local counts, logprobs +function dt.entropy(inputset, nclass) + local dt = require 'decisiontree' + counts = dt.uniquecounts(counts, inputset, nclass) + -- convert counts to categorical probabilities + counts:add(0.0000001) -- prevent NaN + counts:div(counts:sum()) + + logprobs = logprobs or counts.new() + logprobs:resize(counts:size()) + logprobs:log(counts):div(math.log(2)) -- log2(x) + + counts:cmul(logprobs) + + return -counts:sum() +end + +-- Compute and return the probability of positive label. +function dt.probabilityPositive(nPositive, nTotal) + return (nPositive + PSEUDOCOUNT) / (nTotal + 2.0 * PSEUDOCOUNT); +end + +-- Ref. https://en.wikipedia.org/wiki/Logit +-- Calculates logit of the probability. +-- Logit represents the log-odds. Probabilities transformed to logit 'space' can be combined linearly. +function dt.logit(p) + assert(p >= 0.0 and p <= 1.0, "Expecting probability for arg 1") + local truncatedP = math.max(MIN_LOGISTIC, math.min(MAX_LOGISTIC, p)) + return math.log(truncatedP / (1.0 - truncatedP)) +end + +function dt.logistic(x) + return (x >= 0) and (1 / (1 + math.exp(-x))) or (1 - 1 / (1 + math.exp(x))) +end + +function dt.computeGradientBoostLoss(gradient, hessian) + return -gradient * gradient / hessian +end + +function dt.computeNewtonScore(gradient, hessian) + return -0.5 * gradient / hessian; +end + +-- Calculates the logit score for a Node in a Decision Tree based on the probability of a positive label. +-- params: number of positive examples and total number of examples. +function dt.calculateLogitScore(nPositive, nTotal) + local dt = require 'decisiontree' + return dt.logit(dt.probabilityPositive(nPositive, nTotal)) +end + +-- Compute and return the Gini impurity score based on an input contingency table. +function dt.computeGini(leftCount, positiveLeftCount, rightCount, positiveRightCount) + assert(torch.type(leftCount) == 'number', 'Expecting total number examples falling into leftBranch.') + assert(torch.type(positiveLeftCount) == 'number', 'Expecting total number of positive examples falling into left branch.') + assert(torch.type(rightCount) == 'number', 'Expecting total number of examples falling into the right branch.') + assert(torch.type(positiveRightCount) == 'number', 'Expecting total number of positive examples falling into the right branch.') + + local total = leftCount + rightCount + + local pPositiveLeft = leftCount == 0 and 0 or (positiveLeftCount / leftCount) + local leftGini = pPositiveLeft * (1.0 - pPositiveLeft) + + local pPositiveRight = rightCount == 0 and 0 or (positiveRightCount / rightCount) + local rightGini = pPositiveRight * (1.0 - pPositiveRight) + + return (leftCount * leftGini + rightCount * rightGini) / total +end
\ No newline at end of file diff --git a/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec b/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec new file mode 100644 index 000000000..d5d916275 --- /dev/null +++ b/contrib/lua-torch/decisiontree/rocks/decisiontree-scm-1.rockspec @@ -0,0 +1,40 @@ +package = "decisiontree" +version = "scm-1" + +source = { + url = "git://github.com/Twitter/decisiontree", + tag = "master" +} + +description = { + summary = "Decision trees for Torch by Twitter", + detailed = [[ + Classification and regression trees (CART). + Gradients boosted decision trees (GBDT). + ]], + homepage = "https://github.com/Twitter/decisiontree", + license = "BSD" +} + +dependencies = { + "torch >= 7.0", + "moses >= 1.3.1", + "xlua >= 1.0", + "image >= 1.0", + "luafilesystem >= 1.6.2", + "sys >= 1.1", + "paths >= 1.0", + "ipc >= 1.0", + "nn >= 1.0" +} + +build = { + type = "command", + build_command = [[ +cmake -E make_directory build; +cd build; +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)" -DCMAKE_C_FLAGS=-fPIC -DCMAKE_CXX_FLAGS=-fPIC; +$(MAKE) + ]], + install_command = "cd build && $(MAKE) install" +} diff --git a/contrib/lua-torch/decisiontree/test.lua b/contrib/lua-torch/decisiontree/test.lua new file mode 100644 index 000000000..80510a4f6 --- /dev/null +++ b/contrib/lua-torch/decisiontree/test.lua @@ -0,0 +1,817 @@ +local dt = require "decisiontree._env" + +local dttest = {} +local nloop = 50 +local epsilon = 0.000001 +local mytester + +--e.g. usage: th -e "dt = require 'decisiontree'; dt.test()" + +-- test 99% accuracy +local function testAccuracy(cartTree, name, dataset, minacc) + assert(torch.isTypeOf(dataset, 'dt.DataSet')) + minacc = minacc or 0.99 + local output = torch.Tensor(dataset:size()) + local target, input = dataset.target, dataset.input + + for i=1,dataset:size() do + local stack = {} + local score = cartTree:score(input[i], stack) + output[i] = score >= 0 and 1 or 0 + + if dt.VERBOSE and torch.type(cartTree) == 'dt.CartTree' and target[i] ~= output[i] then + print(cartTree:stackToString(stack, example.input)) + print(i, score, target[i], output[i]) + end + end + + local accuracy = torch.eq(target, output):float():mean() + mytester:assert(accuracy >= minacc, name .. ": insufficient accuracy: " .. accuracy .. " < " .. minacc) +end + +function dttest.SparseTensor() + local keys = torch.LongTensor{1,5,6,10} + local values = torch.randn(keys:size(1)) + local st = torch.SparseTensor(keys, values) + + mytester:assert(st[1] == values[1]) + mytester:assert(st[5] == values[2]) + mytester:assert(st[6] == values[3]) + mytester:assert(st[10] == values[4]) + + mytester:assert(st[2] == nil) + + st:buildIndex() + + mytester:assert(st[1] == values[1]) + mytester:assert(st[5] == values[2]) + mytester:assert(st[6] == values[3]) + mytester:assert(st[10] == values[4]) + + mytester:assert(st[2] == nil) + + -- test empty sparse tensor + + local est = torch.SparseTensor() +end + +function dttest.GiniState() + local featureId = 2 + local minLeafSize = 0 + + local input = torch.Tensor({{0,1,0},{0,2,0},{0,3,0}}) + local target = torch.Tensor({1, 1, 1}) + local dataset = dt.DataSet(input, target, 3) + + local splitInfo1 = {_id=1} + local splitInfo2 = {_id=2, leftChildSize = 1, rightChildSize = 2, splitGain = 0} + local splitInfo3 = {_id=3, leftChildSize = 2, rightChildSize = 1, splitGain = -1} + + local exampleIds = torch.LongTensor{1,2,3} + + local treeState = dt.GiniState(exampleIds) + + function treeState.computeSplitInfo(self, splitFeatureId, splitFeatureValue) + if splitFeatureId == featureId and splitFeatureValue == 2 then + return splitInfo2 + elseif splitFeatureId == featureId and splitFeatureValue == 3 then + return splitInfo3 + else + error("Unhandled computeSplitInfo call "..splitFeatureId.." "..splitFeatureValue) + end + end + + local splitInfo = treeState:findBestFeatureSplit(dataset, featureId, minLeafSize) + mytester:assert(splitInfo._id == splitInfo3._id) +end + +function dttest.CartTree() + + local splitFeatureId = 100 + local splitFeatureValue = 1.0 + + local function getBinaryCartTreeRootNode() + + local leftNodeScore = 0.2 + local rightNodeScore = 0.4 + + local rootNode = dt.CartNode() + rootNode.nodeId = 0 + rootNode.score = 0.5 + rootNode.splitFeatureId = splitFeatureId + rootNode.splitFeautreValue = splitFeatureValue + + local leftChild = dt.CartNode() + leftChild.score = leftNodeScore + leftChild.nodeId = 1 + + local rightChild = dt.CartNode() + rightChild.score = rightNodeScore + rightChild.nodeId = 2 + + rootNode.leftChild = leftChild + rootNode.rightChild = rightChild + + return rootNode + end + + local function testScoreCartTreeBranchLeftIfMissing() + local rootNode = getBinaryCartTreeRootNode() + + local cartTree = dt.CartTree(rootNode) + + local continuousFeatures = torch.SparseTensor() + + local score, nodeId = cartTree:score(continuousFeatures) + + mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon) + mytester:assert(rootNode.leftChild.nodeId == nodeId) + end + + local function testBranchRightWithFeature() + local rootNode = getBinaryCartTreeRootNode() + + local cartTree = dt.CartTree(rootNode) + + local continuousFeatures = torch.zeros(100) + continuousFeatures[splitFeatureId] = splitFeatureValue + + local score, nodeId = cartTree:score(continuousFeatures) + + mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon) + mytester:assert(rootNode.rightChild.nodeId == nodeId) + end + + local function testMissingRightNode() + local rootNode = getBinaryCartTreeRootNode() + + rootNode.rightChild = nil + + local cartTree = dt.CartTree(rootNode) + + local continuousFeatures = torch.Tensor() + + local score, nodeId = cartTree:score(continuousFeatures) + + mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon) + mytester:assert(rootNode.leftChild.nodeId == nodeId) + end + + local function testMissingLeftNode() + local rootNode = getBinaryCartTreeRootNode() + + rootNode.leftChild = nil + + local cartTree = dt.CartTree(rootNode) + + local continuousFeatures = torch.Tensor() + + local score, nodeId = cartTree:score(continuousFeatures) + + mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon) + mytester:assert(rootNode.rightChild.nodeId == nodeId) + end + + local function testMissingAllChildren() + local rootNode = getBinaryCartTreeRootNode() + + rootNode.leftChild = nil + rootNode.rightChild = nil + + local cartTree = dt.CartTree(rootNode) + + local continuousFeatures = torch.Tensor() + + local score, nodeId = cartTree:score(continuousFeatures) + + mytester:assert(math.abs(rootNode.score - score) < epsilon) + mytester:assert(rootNode.nodeId == nodeId) + end + + local function testScoreCartTreeBranchRandomlyRight() + local rootNode = getBinaryCartTreeRootNode(); + + -- Force Branch Right + local cartTree = dt.CartTree(rootNode, function() return false end); + + local continuousFeatures = torch.SparseTensor() + + local score, nodeId = cartTree:score(continuousFeatures) + + mytester:assert(math.abs(rootNode.rightChild.score - score) < epsilon) + mytester:assert(rootNode.rightChild.nodeId == nodeId) + end + + local function testScoreCartTreeBranchRandomlyLeft() + local rootNode = getBinaryCartTreeRootNode(); + + -- Force Branch Left + local cartTree = dt.CartTree(rootNode, function() return true end); + + local continuousFeatures = torch.SparseTensor() + + local score, nodeId = cartTree:score(continuousFeatures) + + mytester:assert(math.abs(rootNode.leftChild.score - score) < epsilon) + mytester:assert(rootNode.leftChild.nodeId == nodeId) + end + + testScoreCartTreeBranchLeftIfMissing() + testBranchRightWithFeature() + testMissingRightNode() + testMissingLeftNode() + testMissingAllChildren() + testScoreCartTreeBranchRandomlyRight() + testScoreCartTreeBranchRandomlyLeft() + +end + +function dttest.TreeState_branch() + local _ = require 'moses' + local binFeatureId = 1 + local featureId = 2 + + local input = { + torch.SparseTensor(torch.LongTensor{binFeatureId},torch.Tensor{1}), + torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{1,1}), + torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{0,2}), + torch.SparseTensor(torch.LongTensor{binFeatureId,featureId},torch.Tensor{0,3}) + } + local target = torch.LongTensor(4):fill(1) + + local dataset = dt.DataSet(input, target) + + local treeState = dt.TreeState(torch.LongTensor():range(1,4)) + local splitInfo = {splitId = binFeatureId, splitValue = 1} + + local function testBranchBinaryFeature() + splitInfo = {splitId = binFeatureId, splitValue = 1} + local leftBranch, rightBranch = treeState:branch(splitInfo, dataset) + mytester:assert(leftBranch ~= nil and rightBranch ~= nil) + + mytester:assert(2 == leftBranch:size()) + mytester:assert(leftBranch:contains(3)) + mytester:assert(leftBranch:contains(4)) + + mytester:assert(2 == rightBranch:size()) + mytester:assert(rightBranch:contains(1)) + mytester:assert(rightBranch:contains(2)) + end + + local function testBranchContinuousFeature() + local splitValue = 2 + splitInfo = {splitId = featureId, splitValue = splitValue} + + local leftBranch, rightBranch = treeState:branch(splitInfo, dataset) + mytester:assert(leftBranch ~= nil and rightBranch ~= nil) + + mytester:assert(1 == leftBranch:size()) + mytester:assert(leftBranch:contains(2)) + + mytester:assert(2 == rightBranch:size()) + mytester:assert(rightBranch:contains(3)) + mytester:assert(rightBranch:contains(4)) + end + + testBranchBinaryFeature() + testBranchContinuousFeature() + +end + +function dttest.DecisionForest() + -- Create test decision forest, each forest has only a single node, and returns score == score of root node. + + local function createCartTreeWithSingleNode(score) + local cartNode = dt.CartNode() + cartNode.score = score + return dt.CartTree(cartNode) + end + + local function getTestDecisionForest() + local cartTrees = { + createCartTreeWithSingleNode(1), + createCartTreeWithSingleNode(2), + createCartTreeWithSingleNode(3) + } + local weight = torch.Tensor{10,20,30} + local bias = 0.5 + + return dt.DecisionForest(cartTrees, weight, bias) + end + + local function testScoreDecisionForest() + local df = getTestDecisionForest() + local continuousFeatures = torch.SparseTensor() + + local expectedResult = 1.0 * 10.0 + 2.0 * 20.0 + 3.0 * 30.0 + 0.5; + local result = df:score(continuousFeatures) + + mytester:assert(math.abs(expectedResult - result) < epsilon) + end + + testScoreDecisionForest() +end + +function dttest.CartTrainer() + local minLeafSize, maxLeafNodes = 1, 1000 + local nExample = 100 + + -- 1. dense dataset + local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample) + + -- assert that the dataset is valid + for clusterId, exampleIds in ipairs(clusterExamples) do + local exampleIdx = torch.LongTensor(exampleIds) + local input = inputs:index(1,exampleIdx) + local target = targets:index(1,exampleIdx) + assert(input:std(1):mean() < 0.05) + end + + local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes) + local treeState = dt.GiniState(trainSet:getExampleIds()) + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + + mytester:assert(nleaf == nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true + testAccuracy(cartTree, "dense train single-thread first", trainSet, 0.99) + testAccuracy(cartTree, "dense valid single-thread first", validSet, 0.7) -- they don't generalize very well.. + + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + testAccuracy(cartTree, "dense single-thread second", trainSet) + + -- test feature parallelization + local nThread = 2 + cartTrainer:featureParallel(nThread) + local treeState = dt.GiniState(trainSet:getExampleIds()) + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + testAccuracy(cartTree, "dense feature-parallel", trainSet) + + -- 2. sparse-dense dataset + local trainSet, validSet, clusterExamples, inputs, targets = dt.getSparseDummyData(nExample, nil, 10, nil, nil, 10) + + -- assert that the dataset is valid + for clusterId, exampleIds in ipairs(clusterExamples) do + local input = torch.Tensor(#exampleIds, 10):zero() + for i, exampleId in ipairs(exampleIds) do + input[i]:indexCopy(1, inputs[exampleId].keys, inputs[exampleId].values) + end + assert(input:std(1):mean() < 0.05) + end + + local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes) + local treeState = dt.GiniState(trainSet:getExampleIds()) + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + + mytester:assert(nleaf == nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true + testAccuracy(cartTree, "sparse-dense train single-thread first", trainSet, 0.99) + + local shuffle = torch.LongTensor():randperm(10) + for i, input in ipairs(inputs) do + input.keys = input.keys:index(1, shuffle) + input.values = input.values:index(1, shuffle) + input._map = nil + end + testAccuracy(cartTree, "sparse-dense shuffled keys train single-thread first", trainSet, 0.99) + testAccuracy(cartTree, "sparse-dense valid single-thread first", validSet, 0.8) + + -- 3. sparse dataset + local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9) + + local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNodes) + local treeState = dt.GiniState(trainSet:getExampleIds()) + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + cartTree.branchleft = function() return true end + + mytester:assert(nleaf < nExample) -- for dense inputs, minLeafSize =1 and maxLeafNode = inf, this is true + testAccuracy(cartTree, "sparse train single-thread first", trainSet, 0.9) -- the TreeBrancher drops examples with missing features, making it difficult to overfit + testAccuracy(cartTree, "sparse valid single-thread first", validSet, 0.8) +end + +function dttest.GradientBoostTrainer() + local nExample = 100 + local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9) + + local maxLeafNode, minLeafSize = nExample/2, nExample/10 + local loss = nn.LogitBoostCriterion(false) + + local cartTrainer = dt.CartTrainer(trainSet, minLeafSize, maxLeafNode) + + local opt = { + lossFunction=loss, + treeTrainer=cartTrainer, + shrinkage=0.1, + downsampleRatio=6, + featureBaggingSize=-1, + nTree=14, + evalFreq=8, + earlyStop=0 -- no early-stopping + } + + -- test single-thread + local trainer = dt.GradientBoostTrainer(opt) + local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet) + + mytester:assert(#decisionForest.trees == opt.nTree) + testAccuracy(decisionForest, "sparse train single-thread first", trainSet, 0.98) + testAccuracy(decisionForest, "sparse valid single-thread first", validSet, 0.95) + + -- test stateless + local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet) + + mytester:assert(#decisionForest.trees == opt.nTree) + testAccuracy(decisionForest, "sparse train single-thread second", trainSet, 0.98) + testAccuracy(decisionForest, "sparse valid single-thread second", validSet, 0.95) + + -- test feature-parallel + local nThread = 2 + cartTrainer:featureParallel(nThread) + + local trainer = dt.GradientBoostTrainer(opt) + local decisionForest = trainer:train(trainSet, trainSet.featureIds, validSet) + + mytester:assert(#decisionForest.trees == opt.nTree) + testAccuracy(decisionForest, "sparse train feature-parallel first", trainSet, 0.98) + testAccuracy(decisionForest, "sparse valid feature-parallel first", validSet, 0.95) + +end + +function dttest.RandomForestTrainer() + local nExample = 100 + local trainSet, validSet = dt.getSparseDummyData(nExample, 2, 10, nil, nil, 9) + + local opt = { + activeRatio=0.5, + featureBaggingSize=5, + nTree=14, + maxLeafNodes=nExample/2, + minLeafSize=nExample/10, + } + + local trainer = dt.RandomForestTrainer(opt) + + local decisionForest = trainer:train(trainSet, trainSet.featureIds) + mytester:assert(#decisionForest.trees == opt.nTree) + testAccuracy(decisionForest, "sparse train single-thread first", trainSet, 0.98) + testAccuracy(decisionForest, "sparse valid single-thread first", validSet, 0.95) + + -- test stateless + local decisionForest = trainer:train(trainSet, trainSet.featureIds) + + mytester:assert(#decisionForest.trees == opt.nTree) + testAccuracy(decisionForest, "sparse train single-thread second", trainSet, 0.98) + testAccuracy(decisionForest, "sparse valid single-thread second", validSet, 0.95) + + -- test tree-parallel + local nThread = 2 + trainer:treeParallel(nThread) + + local trainer = dt.RandomForestTrainer(opt) + local decisionForest = trainer:train(trainSet, trainSet.featureIds) + + mytester:assert(#decisionForest.trees == opt.nTree) + testAccuracy(decisionForest, "sparse train tree-parallel first", trainSet, 0.98) + testAccuracy(decisionForest, "sparse valid tree-parallel first", validSet, 0.95) +end + +function dttest.WorkPool() + local nThread = 2 + local wp = dt.WorkPool(nThread) + + -- 1. some easy tests + local store = {key='nick',value=7} + wp:update('storeKeyValue', store) + + wp:update('require', {libname='decisiontree', varname='dt'}) + + local bias = 2 + local obj = nn.MSECriterion() + wp:update('require', {libname='decisiontree', varname='dt'}) + wp:writeup('execute', function(store) return bias + obj:updateOutput(torch.Tensor{1},torch.Tensor{1}) + store.nick end) + + local taskname, res = wp:read() + mytester:assert(taskname == 'execute') + mytester:assert(res == 9) + + -- 2. trying to reproduce a difficult error + local trainSet, validSet = dt.getSparseDummyData() + + -- setup worker store (each worker will have its own copy) + local store = { + trainSet=trainSet, + minLeafSize=2 + } + wp:update('storeKeysValues', store) + + -- arguments/upvalues + local treeState = dt.GiniState(trainSet:getExampleIds()) + local shardId = 1 + local nShard = nThread + local featureIds = trainSet.featureIds + + local task = function(store, args) + assert(store.trainSet) + assert(store.minLeafSize) + + local bestSplit = args.treeState:findBestSplit(store.trainSet, args.featureIds, store.minLeafSize, args.shardId, args.nShard) + return bestSplit + end + local args = {treeState=treeState,featureIds=featureIds,shardId=shardId,nShard=nShard} + wp:writeup("execute", {func=task,args=args}) + + local taskname, bestSplit = wp:read() + mytester:assert(taskname == 'execute') + mytester:assert(torch.type(bestSplit) == 'table') + + -- closure + local task = function(store) + assert(store.trainSet) + assert(store.minLeafSize) + + local bestSplit = treeState:findBestSplit(store.trainSet, featureIds, store.minLeafSize, shardId, nShard) + return bestSplit + end + wp:writeup("execute", task) + + local taskname, bestSplit = wp:read() + mytester:assert(taskname == 'execute') + mytester:assert(torch.type(bestSplit) == 'table') + + local task = function(store, args) + assert(store.trainSet) + assert(torch.isTypeOf(treeState, 'dt.TreeState'), torch.type(treeState)) + + local bestSplit = treeState:findBestSplit(store.trainSet, featureIds, store.minLeafSize, shardId, nShard) + return bestSplit + end + local args = {featureIds=featureIds,shardId=shardId,nShard=nShard} + wp:writeup("execute", {func=task,args=args}) + + + local taskname, bestSplit = wp:read() + mytester:assert(taskname == 'execute') + mytester:assert(torch.type(bestSplit) == 'table') + + wp:terminate() +end + +function dttest.Sparse2Dense() + local batchsize = 4 + local minFeatureId, maxFeatureId = 10, 100 + local input = {{},{}} + for i=1,batchsize do + local inputsize = math.random(5,10) + input[1][i] = torch.LongTensor(inputsize):random(minFeatureId,maxFeatureId) + input[2][i] = torch.Tensor(inputsize):uniform(0,1) + end + local s2d = nn.Sparse2Dense(torch.LongTensor():range(minFeatureId,maxFeatureId)) + -- test 2d forward + local output = s2d:forward(input) + local output2 = torch.Tensor(batchsize, maxFeatureId-minFeatureId+1):zero() + local featureMap = {} + local j = 0 + for i=minFeatureId,maxFeatureId do + j = j + 1 + featureMap[i] = j + end + for i=1,batchsize do + local keys, values = input[1][i], input[2][i] + for j=1,keys:size(1) do + output2[{i,featureMap[keys[j] ]}] = values[j] + end + end + mytester:assertTensorEq(output, output2, 0.000001) + -- test 1d forward + local input = {input[1][batchsize], input[2][batchsize]} + local output = s2d:forward(input) + mytester:assertTensorEq(output, output2[batchsize], 0.000001) +end + +function dttest.Sparse2DenseDouble() + local batchsize = 4 + local minFeatureId, maxFeatureId = 10, 100 + local input = {{},{}} + for i=1,batchsize do + local inputsize = math.random(5,10) + input[1][i] = torch.LongTensor(inputsize):random(minFeatureId,maxFeatureId) + input[2][i] = torch.Tensor(inputsize):uniform(0,1):double() + end + local s2d = nn.Sparse2Dense(torch.LongTensor():range(minFeatureId,maxFeatureId)) + s2d:double() + -- test 2d forward + local output = s2d:forward(input) + local output2 = torch.Tensor(batchsize, maxFeatureId-minFeatureId+1):zero():double() + local featureMap = {} + local j = 0 + for i=minFeatureId,maxFeatureId do + j = j + 1 + featureMap[i] = j + end + for i=1,batchsize do + local keys, values = input[1][i], input[2][i] + for j=1,keys:size(1) do + output2[{i,featureMap[keys[j] ]}] = values[j] + end + end + mytester:assertTensorEq(output, output2, 0.000001) + -- test 1d forward + local input = {input[1][batchsize], input[2][batchsize]} + local output = s2d:forward(input) + mytester:assertTensorEq(output, output2[batchsize], 0.000001) +end + +function dttest.LogitBoostCriterion() + local input = torch.randn(10) + local target = torch.LongTensor(10):random(0,1):type(torch.type(input)) + + local lb = nn.LogitBoostCriterion(false) + local loss = lb:updateOutput(input, target) + + local loss2 = 0 + for i=1,10 do + loss2 = loss2 + math.log(1 + math.exp(target[i] <= 0 and input[i] or -input[i])) + end + mytester:assert(math.abs(loss - loss2) < 0.00001) + + local gradInput = lb:updateGradInput(input, target) + local gradInput2 = gradInput:clone():zero() + for i=1,10 do + local p = dt.logistic(input[i]) + gradInput2[i] = (target[i] <= 0) and p or (p - 1) + end + mytester:assertTensorEq(gradInput, gradInput2, 0.000001) + + local hessInput = lb:updateHessInput(input, target) + local hessInput2 = hessInput:clone():zero() + for i=1,10 do + local p = dt.logistic(input[i]) + hessInput2[i] = p * (1.0 - p) + end + mytester:assertTensorEq(hessInput, hessInput2, 0.000001) +end + +function dttest.DFD() + local nExample = 100 + local batchsize = 4 + local inputsize = 10 + + -- train Random Forest + local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample, nil, inputsize) + local opt = { + activeRatio=0.5, + featureBaggingSize=5, + nTree=4, + maxLeafNodes=nExample/2, + minLeafSize=nExample/10, + } + local trainer = dt.RandomForestTrainer(opt) + local df = trainer:train(trainSet, trainSet.featureIds) + mytester:assert(#df.trees == opt.nTree) + + local dfd = nn.DFD(df) + dfd = nn.DFD(dfd:getReconstructionInfo()) + local dfd2 = nn.DFD(dfd:getReconstructionInfo(), true) + local input = validSet.input:sub(1,batchsize) + local output = dfd:forward(input) + local output2 = dfd2:forward(input) + + local _ = require 'moses' + + local function hasKey(keys,key) + local found = false + keys:apply(function(x) + if x == key then + found = true + end + end) + return found + end + + for i=1,batchsize do + local nodes = {} + local keys = output[1][i] + local keys2 = output2[1][i] + for j,tree in ipairs(df.trees) do + local stack = {} + tree:score(input[i], stack) + mytester:assert(hasKey(keys2, stack[#stack]._nodeId)) + + for k,node in ipairs(stack) do + if k > 1 then + assert(node._nodeId) + mytester:assert(hasKey(keys, node._nodeId), string.format("missing key=%d in %s", node._nodeId, tostring(keys))) + table.insert(nodes, node._nodeId) + end + end + end + mytester:assert(#nodes == keys:size(1)) + mytester:assert(#df.trees == keys2:size(1)) + end +end + +function dttest.DFDDouble() + local nExample = 100 + local batchsize = 4 + local inputsize = 10 + + -- train Random Forest + local trainSet, validSet, clusterExamples, inputs, targets = dt.getDenseDummyData(nExample, nil, inputsize) + local opt = { + activeRatio=0.5, + featureBaggingSize=5, + nTree=4, + maxLeafNodes=nExample/2, + minLeafSize=nExample/10, + } + local trainer = dt.RandomForestTrainer(opt) + local df = trainer:train(trainSet, trainSet.featureIds) + mytester:assert(#df.trees == opt.nTree) + + local dfd = nn.DFD(df) + dfd:double() + dfd = nn.DFD(dfd:getReconstructionInfo()) + local dfd2 = nn.DFD(dfd:getReconstructionInfo(), true) + local input = validSet.input:sub(1,batchsize):double() + local output = dfd:forward(input) + local output2 = dfd2:forward(input) + + local _ = require 'moses' + + local function hasKey(keys,key) + local found = false + keys:apply(function(x) + if x == key then + found = true + end + end) + return found + end + + for i=1,batchsize do + local nodes = {} + local keys = output[1][i] + local keys2 = output2[1][i] + for j,tree in ipairs(df.trees) do + local stack = {} + tree:score(input[i], stack) + mytester:assert(hasKey(keys2, stack[#stack]._nodeId)) + + for k,node in ipairs(stack) do + if k > 1 then + assert(node._nodeId) + mytester:assert(hasKey(keys, node._nodeId), string.format("missing key=%d in %s", node._nodeId, tostring(keys))) + table.insert(nodes, node._nodeId) + end + end + end + mytester:assert(#nodes == keys:size(1)) + mytester:assert(#df.trees == keys2:size(1)) + end +end + +function dttest.uniquecounts() -- DEPRECATED + local target = torch.LongTensor(100):random(1,3) + local input = torch.Tensor() + local inputset = {input=input, target=target} + + local counts = dt.uniquecounts(nil, inputset, 3) + + mytester:assert(counts:sum() == 100) + mytester:assert(counts:nElement() == 3) + + local res = torch.Tensor(3):zero() + target:apply(function(t) res[t] = res[t] + 1 end) + + mytester:assertTensorEq(counts, res) +end + +function dttest.entropy() -- DEPRECATED + -- 2 clusters with a bit overlap between classes: + local input = torch.Tensor(100,2) + input:narrow(1,1,50):normal(-1,.01) + input:narrow(1,51,50):normal(2,.01) + + local target = torch.LongTensor(100):fill(3) + target:narrow(1,1,45):fill(1) + target:narrow(1,56,45):fill(2) + + local inputset = {input=input, target=target} + + -- test entropy() + local fullent = dt.entropy(inputset) + + local halfset = {input=input:narrow(1,1,50), target=target:narrow(1,1,50)} + local halfent = dt.entropy(halfset) + + local perfectset = {input=input:narrow(1,56,45), target=target:narrow(1,56,45)} + local perfectent = dt.entropy(perfectset) + + mytester:assert(fullent > halfent) + mytester:assert(halfent > perfectent) + mytester:assert(perfectent < 0.0000001 and perfectent >= 0) +end + +function dt.test(tests) + math.randomseed(os.time()) + mytester = torch.Tester() + mytester:add(dttest) + mytester:run(tests) +end diff --git a/contrib/lua-torch/decisiontree/utils.h b/contrib/lua-torch/decisiontree/utils.h new file mode 100644 index 000000000..8a0196a58 --- /dev/null +++ b/contrib/lua-torch/decisiontree/utils.h @@ -0,0 +1,45 @@ +#include "error.h" + +#define check_tensors(L, a, b) \ + do { \ + if ((a)->nDimension != (b)->nDimension) \ + return LUA_HANDLE_ERROR_STR((L), "different tensor dimensions"); \ + for (int __local__var = 0; __local__var < (a)->nDimension; __local__var++) \ + if ((a)->size[__local__var] != (b)->size[__local__var]) \ + return LUA_HANDLE_ERROR_STR((L), "different tensor sizes"); \ + } while (0) + +#define check_tensor(L, t, type) \ + do { \ + if (!type##_isContiguous(t)) \ + return LUA_HANDLE_ERROR_STR((L), "tensor should be contiguous"); \ + } while (0) + +#define get_tensor_size(t, type) \ + (TH##type##Tensor_nElement(t)) + +#define get_tensor(L, idx, type) \ + (TH##type##Tensor *)luaT_checkudata(L, idx, "torch." #type "Tensor") + +static int push_table_contents(lua_State *L, int arg) +{ + int size = 0; + while(1) { + lua_checkstack(L, 1); + lua_rawgeti(L, arg, size + 1); + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + break; + } + size++; + } + return size; +} + +#define verify_push_table_contents(L, idx, count) do { \ + int __tmp_count = push_table_contents(L, idx); \ + if (__tmp_count != count) { \ + lua_pop(L, __tmp_count); \ + LUA_HANDLE_ERROR_STR(L, "Table sizes do not match"); \ + } \ + } while(0) diff --git a/contrib/lua-torch/decisiontree/utils.lua b/contrib/lua-torch/decisiontree/utils.lua new file mode 100644 index 000000000..c32c3d08b --- /dev/null +++ b/contrib/lua-torch/decisiontree/utils.lua @@ -0,0 +1,125 @@ +local dt = require "decisiontree._env" + +-- returns a buffer table local to a thread (no serialized) +function dt.getBufferTable(name) + local dt = require 'decisiontree' + assert(torch.type(name) == 'string') + dt.buffer = dt.buffer or {} + dt.buffer[name] = dt.buffer[name] or {} + return dt.buffer[name] +end + +function dt.getSparseDummyData(nExample, nCluster, nFeature, overlap, nValid, nActive) + local dt = require 'decisiontree' + if torch.type(nExample) == 'table' then + local opt = nExample + nExample = opt.nExample + nCluster = opt.nCluster + nFeature = opt.nFeature + overlap = opt.overlap + nValid = opt.nValid + nActive = opt.nActive + end + nExample = nExample or 100 -- training set size + nCluster = nCluster or 10 + assert(nCluster >= 2) + nFeature = math.max(2, nFeature or 10) + overlap = overlap or 0 + nValid = nValid or nExample/10 -- validation set size + nActive = nActive or math.max(2, nFeature / 2) + + -- sample nCluster centers + local clusterCenter = torch.rand(nCluster, nFeature) + local clusterLabel = torch.LongTensor(nCluster) + local clusterExamples = {} + for i=1,nCluster do + clusterCenter[i]:add(i) + clusterLabel[i] = i % 2 + clusterExamples[i] = {} + end + + local sparseCenter = torch.Tensor() + + local shuffle = torch.LongTensor() + + -- build dataset in pseudo-dense format + local inputs = {} + local targets = torch.Tensor(nExample+nValid) + for i=1,nExample+nValid do + local clusterIdx = torch.random(1,nCluster) + table.insert(clusterExamples[clusterIdx], i) + + shuffle:randperm(nFeature) + local keys = torch.LongTensor(nActive):copy(shuffle:narrow(1,1,nActive)) + sparseCenter:index(clusterCenter[clusterIdx], 1, keys) + local stdiv = i <= nExample and 100 or 1000 + local values = torch.randn(nActive):div(stdiv):add(sparseCenter) + + table.insert(inputs, torch.SparseTensor(keys, values)) + + local label = clusterLabel[clusterIdx] + if math.random() < overlap then + targets[i] = label == 1 and 0 or 1 + else + targets[i] = label + end + end + + local _ = require 'moses' + local validSet = dt.DataSet(_.slice(inputs, nExample+1, nExample+nValid), targets:narrow(1,nExample+1,nValid)) + local trainSet = dt.DataSet(_.slice(inputs, 1, nExample), targets:narrow(1,1,nExample)) + + return trainSet, validSet, clusterExamples, inputs, targets +end + +function dt.getDenseDummyData(nExample, nCluster, nFeature, overlap, nValid) + local dt = require 'decisiontree' + if torch.type(nExample) == 'table' then + local opt = nExample + nExample = opt.nExample + nCluster = opt.nCluster + nFeature = opt.nFeature + overlap = opt.overlap + nValid = opt.nValid + end + nExample = nExample or 100 -- training set size + nCluster = nCluster or 10 + assert(nCluster >= 2) + nFeature = math.max(2, nFeature or 10) + overlap = overlap or 0 + nValid = nValid or nExample/10 -- validation set size + + -- sample nCluster centers + local clusterCenter = torch.rand(nCluster, nFeature) + local clusterLabel = torch.LongTensor(nCluster) + local clusterExamples = {} + for i=1,nCluster do + clusterCenter[i]:add(i) + clusterLabel[i] = i % 2 + clusterExamples[i] = {} + end + + -- build dataset in pseudo-dense format + local inputs = torch.Tensor(nExample+nValid, nFeature) + local targets = torch.Tensor(nExample+nValid) + for i=1,nExample+nValid do + local clusterIdx = torch.random(1,nCluster) + table.insert(clusterExamples[clusterIdx], i) + + local stdiv = i <= nExample and 100 or 1000 + inputs[i]:normal():div(stdiv):add(clusterCenter[clusterIdx]) + + local label = clusterLabel[clusterIdx] + if math.random() < overlap then + targets[i] = label == 1 and 0 or 1 + else + targets[i] = label + end + end + + local _ = require 'moses' + local validSet = dt.DataSet(inputs:narrow(1,nExample+1,nValid), targets:narrow(1,nExample+1,nValid)) + local trainSet = dt.DataSet(inputs:narrow(1,1,nExample), targets:narrow(1,1,nExample)) + + return trainSet, validSet, clusterExamples, inputs, targets +end |