summaryrefslogtreecommitdiffstats
path: root/contrib/lua-torch/decisiontree/benchmark.lua
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/lua-torch/decisiontree/benchmark.lua')
-rw-r--r--contrib/lua-torch/decisiontree/benchmark.lua171
1 files changed, 171 insertions, 0 deletions
diff --git a/contrib/lua-torch/decisiontree/benchmark.lua b/contrib/lua-torch/decisiontree/benchmark.lua
new file mode 100644
index 000000000..2b6a03dc6
--- /dev/null
+++ b/contrib/lua-torch/decisiontree/benchmark.lua
@@ -0,0 +1,171 @@
+local dt = require "decisiontree._env"
+
+local bm = {}
+function bm.CartTrainer(opt)
+ local timer = torch.Timer()
+ local trainSet, validSet = dt.getSparseDummyData(opt)
+ print(string.format("CartTrainer: sparse dataset create: %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+
+ local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes)
+ local treeState = dt.GiniState(trainSet:getExampleIds())
+ timer:reset()
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+ print(string.format("CartTrainer: train single-thread : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+
+ timer:reset()
+ cartTrainer:featureParallel(opt.nThread)
+ print(string.format("CartTrainer: setup feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+ timer:reset()
+ local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
+ print(string.format("CartTrainer: train feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+end
+
+function bm.GradientBoostState(opt)
+ local trainSet, validSet = dt.getSparseDummyData(opt)
+
+ trainSet:initScore()
+
+ local treeState = dt.GradientBoostState(trainSet:getExampleIds(), nn.LogitBoostCriterion(false))
+
+ local timer = torch.Timer() -- first step also calls SparseTensor:buildIndex()
+ treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3)
+ print(string.format("GradientBoostState: findBestSplit (first) : %f sec", timer:time().real))
+
+ timer:reset()
+ treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3)
+ print(string.format("GradientBoostState: findBestSplit (second) : %f sec", timer:time().real))
+
+end
+
+local function file_exists(name)
+ local f=io.open(name,"r")
+ if f~=nil then io.close(f) return true else return false end
+end
+
+function bm.GradientBoostTrainer(opt)
+ local trainSet, validSet
+ if file_exists("/tmp/train.bin") and file_exists("/tmp/valid.bin") then
+ trainSet = torch.load("/tmp/train.bin")
+ validSet = torch.load("/tmp/valid.bin")
+ else
+ if opt.sparse then
+ trainSet, validSet = dt.getSparseDummyData(opt)
+ else
+ trainSet, validSet = dt.getDenseDummyData(opt)
+ end
+ torch.save("/tmp/train.bin", trainSet)
+ torch.save("/tmp/valid.bin", validSet)
+ end
+
+ local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes)
+ opt.lossFunction = nn.LogitBoostCriterion(false)
+ opt.treeTrainer = cartTrainer
+ local forestTrainer = dt.GradientBoostTrainer(opt)
+
+ local timer = torch.Timer()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet)
+ local time = timer:time().real
+ print(string.format("GradientBoostTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+
+ cartTrainer:featureParallel(opt.nThread)
+ timer:reset()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet)
+ local time = timer:time().real
+ print(string.format("GradientBoostTrainer: train feature-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+end
+
+function bm.RandomForestTrainer(opt)
+ local trainSet, validSet = dt.getSparseDummyData(opt)
+
+ local forestTrainer = dt.RandomForestTrainer(opt)
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
+
+ local timer = torch.Timer()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
+ local time = timer:time().real
+ print(string.format("RandomForestTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+
+ timer:reset()
+ forestTrainer:treeParallel(opt.nThread)
+ print(string.format("RandomForestTrainer: setup tree-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
+
+ timer:reset()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
+ local time = timer:time().real
+ print(string.format("RandomForestTrainer: train tree-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+end
+
+function bm.DFD(opt)
+ local _ = require 'moses'
+ local opt = _.clone(opt)
+ opt.nExample = 200
+ local trainSet, validSet = dt.getDenseDummyData(opt)
+
+ local forestTrainer = dt.RandomForestTrainer(opt)
+ forestTrainer:treeParallel(opt.nThread)
+ local timer = torch.Timer()
+ local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
+ local time = timer:time().real
+ print(string.format("DFD: train random forest in parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
+
+
+ -- benchmark nn.DFD
+ local input = trainSet.input:sub(1,opt.batchsize)
+ local dfd = nn.DFD(decisionForest)
+ dfd:forward(input)
+ timer:reset()
+ for i=1,opt.nloop do
+ dfd:forward(input)
+ end
+ print(string.format("DFD: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real))
+end
+
+function bm.Sparse2Dense(opt)
+ local _ = require 'moses'
+ local opt = _.clone(opt)
+ opt.nExample = opt.batchsize
+ local trainSet = dt.getSparseDummyData(opt)
+
+ local input = {{},{}}
+ for i=1,opt.batchsize do
+ input[1][i] = trainSet.input[i].keys
+ input[2][i] = trainSet.input[i].values
+ end
+ assert(#input[1] == opt.batchsize)
+
+ -- benchmark nn.Sparse2Dense
+ local s2d = nn.Sparse2Dense(torch.LongTensor():range(1,opt.nFeature))
+ s2d:forward(input)
+ local timer = torch.Timer()
+ for i=1,opt.nloop do
+ s2d:forward(input)
+ end
+ print(string.format("Sparse2Dense: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real))
+end
+
+function dt.benchmark(benchmarks, opt2)
+ local opt = {
+ nExample=10000, nCluster=2, nFeature=1000, overlap=0, nValid=100, -- getSparseDummyData
+ nTree=20, featureBaggingSize=-1, sparse=true, -- GradientBoostTrainer and RandomForestTrainer
+ nThread=2, shrinkage=0.1, downsampleRatio=0.1, evalFreq=5, earlyStop=0, -- GradientBoostTrainer
+ activeRatio=0.5, -- RandomForestTrainer
+ batchsize=32, nloop=10
+ }
+
+ local _ = require 'moses'
+ benchmarks = benchmarks or _.keys(bm)
+ assert(torch.type(benchmarks) == 'table')
+ for i,benchmark in ipairs(benchmarks) do
+ local opt1 = _.clone(opt)
+ for key, value in pairs(opt2 or {}) do
+ opt1[key] = value
+ end
+ opt1.nActive = opt1.nActive or torch.round(opt1.nFeature/10)
+ opt1.maxLeafNodes = opt1.maxLeafNodes or (opt1.nExample/10)
+ opt1.minLeafSize = opt1.minLeafSize or (opt1.nExample/100)
+
+ assert(torch.type(benchmark) == 'string', benchmark)
+ assert(bm[benchmark], benchmark)
+ bm[benchmark](opt1)
+ end
+end