diff options
Diffstat (limited to 'contrib/lua-torch/decisiontree/benchmark.lua')
-rw-r--r-- | contrib/lua-torch/decisiontree/benchmark.lua | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/contrib/lua-torch/decisiontree/benchmark.lua b/contrib/lua-torch/decisiontree/benchmark.lua new file mode 100644 index 000000000..2b6a03dc6 --- /dev/null +++ b/contrib/lua-torch/decisiontree/benchmark.lua @@ -0,0 +1,171 @@ +local dt = require "decisiontree._env" + +local bm = {} +function bm.CartTrainer(opt) + local timer = torch.Timer() + local trainSet, validSet = dt.getSparseDummyData(opt) + print(string.format("CartTrainer: sparse dataset create: %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) + + local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes) + local treeState = dt.GiniState(trainSet:getExampleIds()) + timer:reset() + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + print(string.format("CartTrainer: train single-thread : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) + + timer:reset() + cartTrainer:featureParallel(opt.nThread) + print(string.format("CartTrainer: setup feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) + timer:reset() + local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds) + print(string.format("CartTrainer: train feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) +end + +function bm.GradientBoostState(opt) + local trainSet, validSet = dt.getSparseDummyData(opt) + + trainSet:initScore() + + local treeState = dt.GradientBoostState(trainSet:getExampleIds(), nn.LogitBoostCriterion(false)) + + local timer = torch.Timer() -- first step also calls SparseTensor:buildIndex() + treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3) + print(string.format("GradientBoostState: findBestSplit (first) : %f sec", timer:time().real)) + + timer:reset() + treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3) + print(string.format("GradientBoostState: findBestSplit (second) : %f sec", timer:time().real)) + +end + +local function file_exists(name) + local f=io.open(name,"r") + if f~=nil then io.close(f) return true else return false end +end + +function bm.GradientBoostTrainer(opt) + local trainSet, validSet + if file_exists("/tmp/train.bin") and file_exists("/tmp/valid.bin") then + trainSet = torch.load("/tmp/train.bin") + validSet = torch.load("/tmp/valid.bin") + else + if opt.sparse then + trainSet, validSet = dt.getSparseDummyData(opt) + else + trainSet, validSet = dt.getDenseDummyData(opt) + end + torch.save("/tmp/train.bin", trainSet) + torch.save("/tmp/valid.bin", validSet) + end + + local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes) + opt.lossFunction = nn.LogitBoostCriterion(false) + opt.treeTrainer = cartTrainer + local forestTrainer = dt.GradientBoostTrainer(opt) + + local timer = torch.Timer() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet) + local time = timer:time().real + print(string.format("GradientBoostTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) + + cartTrainer:featureParallel(opt.nThread) + timer:reset() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet) + local time = timer:time().real + print(string.format("GradientBoostTrainer: train feature-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) +end + +function bm.RandomForestTrainer(opt) + local trainSet, validSet = dt.getSparseDummyData(opt) + + local forestTrainer = dt.RandomForestTrainer(opt) + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds) + + local timer = torch.Timer() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds) + local time = timer:time().real + print(string.format("RandomForestTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) + + timer:reset() + forestTrainer:treeParallel(opt.nThread) + print(string.format("RandomForestTrainer: setup tree-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real)) + + timer:reset() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds) + local time = timer:time().real + print(string.format("RandomForestTrainer: train tree-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) +end + +function bm.DFD(opt) + local _ = require 'moses' + local opt = _.clone(opt) + opt.nExample = 200 + local trainSet, validSet = dt.getDenseDummyData(opt) + + local forestTrainer = dt.RandomForestTrainer(opt) + forestTrainer:treeParallel(opt.nThread) + local timer = torch.Timer() + local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds) + local time = timer:time().real + print(string.format("DFD: train random forest in parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time)) + + + -- benchmark nn.DFD + local input = trainSet.input:sub(1,opt.batchsize) + local dfd = nn.DFD(decisionForest) + dfd:forward(input) + timer:reset() + for i=1,opt.nloop do + dfd:forward(input) + end + print(string.format("DFD: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real)) +end + +function bm.Sparse2Dense(opt) + local _ = require 'moses' + local opt = _.clone(opt) + opt.nExample = opt.batchsize + local trainSet = dt.getSparseDummyData(opt) + + local input = {{},{}} + for i=1,opt.batchsize do + input[1][i] = trainSet.input[i].keys + input[2][i] = trainSet.input[i].values + end + assert(#input[1] == opt.batchsize) + + -- benchmark nn.Sparse2Dense + local s2d = nn.Sparse2Dense(torch.LongTensor():range(1,opt.nFeature)) + s2d:forward(input) + local timer = torch.Timer() + for i=1,opt.nloop do + s2d:forward(input) + end + print(string.format("Sparse2Dense: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real)) +end + +function dt.benchmark(benchmarks, opt2) + local opt = { + nExample=10000, nCluster=2, nFeature=1000, overlap=0, nValid=100, -- getSparseDummyData + nTree=20, featureBaggingSize=-1, sparse=true, -- GradientBoostTrainer and RandomForestTrainer + nThread=2, shrinkage=0.1, downsampleRatio=0.1, evalFreq=5, earlyStop=0, -- GradientBoostTrainer + activeRatio=0.5, -- RandomForestTrainer + batchsize=32, nloop=10 + } + + local _ = require 'moses' + benchmarks = benchmarks or _.keys(bm) + assert(torch.type(benchmarks) == 'table') + for i,benchmark in ipairs(benchmarks) do + local opt1 = _.clone(opt) + for key, value in pairs(opt2 or {}) do + opt1[key] = value + end + opt1.nActive = opt1.nActive or torch.round(opt1.nFeature/10) + opt1.maxLeafNodes = opt1.maxLeafNodes or (opt1.nExample/10) + opt1.minLeafSize = opt1.minLeafSize or (opt1.nExample/100) + + assert(torch.type(benchmark) == 'string', benchmark) + assert(bm[benchmark], benchmark) + bm[benchmark](opt1) + end +end |