local dt = require 'decisiontree._env' local GradientBoostState, parent = torch.class("dt.GradientBoostState", "dt.TreeState", dt) function GradientBoostState:__init(exampleIds, gradInput, hessInput) parent.__init(self, exampleIds) self.gradInput = gradInput self.hessInput = hessInput end function GradientBoostState:score(dataset) local dt = require 'decisiontree' local gradInput = self.gradInput:index(1, self.exampleIds) local hessInput = self.hessInput:index(1, self.exampleIds) return dt.computeNewtonScore(gradInput:sum(), hessInput:sum()) end -- calls _branch and encapsulates the left and right exampleIds into a TreeStates function GradientBoostState:branch(splitInfo, dataset) local leftExampleIds, rightExampleIds = self:_branch(splitInfo, dataset) return self.new(leftExampleIds, self.gradInput, self.hessInput), self.new(rightExampleIds, self.gradInput, self.hessInput) end -- Partitions self given a splitInfo table, producing a pair of exampleIds corresponding to the left and right subtrees. function GradientBoostState:_branch(splitInfo, dataset) local input = dataset.input -- if the input is dense, we can use the optimized version if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then return input.nn.GBDT_branch(splitInfo, input, self.exampleIds) end return parent._branch(self, splitInfo, dataset) end -- The following methods are supersets of each other. You can comment out them to re-use the lua -- version with just the provided core optimized -- THIS ONE CANNOT BE COMMENTED OUT function GradientBoostState:findBestFeatureSplit(dataset, featureId, minLeafSize) local ret = self.hessInput.nn.GBDT_findBestFeatureSplit(self.exampleIds, dataset, featureId, minLeafSize, self.gradInput, self.hessInput) return ret end -- finds the best split of examples in treeState among featureIds function GradientBoostState:findBestSplit(dataset, featureIds, minLeafSize, shardId, nShard) local ret = self.hessInput.nn.GBDT_findBestSplit(self.exampleIds, dataset, featureIds, minLeafSize, shardId, nShard, self.gradInput, self.hessInput) return ret end -- finds the best split like the previous one, but performs feature parallelism. Note that the -- optimization is only applied if the input is dense function GradientBoostState:findBestSplitFP(dataset, featureIds, minLeafSize, nThread) local input = dataset.input if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then local ret = self.hessInput.nn.GBDT_findBestSplitFP(self.exampleIds, dataset, featureIds, minLeafSize, self.gradInput, self.hessInput, nThread) return ret end end