123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- local dt = require 'decisiontree._env'
-
- local GradientBoostState, parent = torch.class("dt.GradientBoostState", "dt.TreeState", dt)
-
- function GradientBoostState:__init(exampleIds, gradInput, hessInput)
- parent.__init(self, exampleIds)
- self.gradInput = gradInput
- self.hessInput = hessInput
- end
-
- function GradientBoostState:score(dataset)
- local dt = require 'decisiontree'
- local gradInput = self.gradInput:index(1, self.exampleIds)
- local hessInput = self.hessInput:index(1, self.exampleIds)
- return dt.computeNewtonScore(gradInput:sum(), hessInput:sum())
- end
-
- -- calls _branch and encapsulates the left and right exampleIds into a TreeStates
- function GradientBoostState:branch(splitInfo, dataset)
- local leftExampleIds, rightExampleIds = self:_branch(splitInfo, dataset)
- return self.new(leftExampleIds, self.gradInput, self.hessInput), self.new(rightExampleIds, self.gradInput, self.hessInput)
- end
-
- -- Partitions self given a splitInfo table, producing a pair of exampleIds corresponding to the left and right subtrees.
- function GradientBoostState:_branch(splitInfo, dataset)
- local input = dataset.input
- -- if the input is dense, we can use the optimized version
- if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
- return input.nn.GBDT_branch(splitInfo, input, self.exampleIds)
- end
- return parent._branch(self, splitInfo, dataset)
- end
-
- -- The following methods are supersets of each other. You can comment out them to re-use the lua
- -- version with just the provided core optimized
-
- -- THIS ONE CANNOT BE COMMENTED OUT
- function GradientBoostState:findBestFeatureSplit(dataset, featureId, minLeafSize)
- local ret = self.hessInput.nn.GBDT_findBestFeatureSplit(self.exampleIds, dataset, featureId, minLeafSize, self.gradInput, self.hessInput)
- return ret
- end
-
- -- finds the best split of examples in treeState among featureIds
- function GradientBoostState:findBestSplit(dataset, featureIds, minLeafSize, shardId, nShard)
- local ret = self.hessInput.nn.GBDT_findBestSplit(self.exampleIds, dataset, featureIds, minLeafSize, shardId, nShard, self.gradInput, self.hessInput)
- return ret
- end
-
- -- finds the best split like the previous one, but performs feature parallelism. Note that the
- -- optimization is only applied if the input is dense
- function GradientBoostState:findBestSplitFP(dataset, featureIds, minLeafSize, nThread)
- local input = dataset.input
- if torch.isTensor(input) and input.isContiguous and input:isContiguous() and input:nDimension() == 2 then
- local ret = self.hessInput.nn.GBDT_findBestSplitFP(self.exampleIds, dataset, featureIds, minLeafSize, self.gradInput, self.hessInput, nThread)
- return ret
- end
- end
|