1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
local dt = require "decisiontree._env"
local bm = {}
function bm.CartTrainer(opt)
local timer = torch.Timer()
local trainSet, validSet = dt.getSparseDummyData(opt)
print(string.format("CartTrainer: sparse dataset create: %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes)
local treeState = dt.GiniState(trainSet:getExampleIds())
timer:reset()
local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
print(string.format("CartTrainer: train single-thread : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
timer:reset()
cartTrainer:featureParallel(opt.nThread)
print(string.format("CartTrainer: setup feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
timer:reset()
local cartTree, nleaf = cartTrainer:train(treeState, trainSet.featureIds)
print(string.format("CartTrainer: train feature-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
end
function bm.GradientBoostState(opt)
local trainSet, validSet = dt.getSparseDummyData(opt)
trainSet:initScore()
local treeState = dt.GradientBoostState(trainSet:getExampleIds(), nn.LogitBoostCriterion(false))
local timer = torch.Timer() -- first step also calls SparseTensor:buildIndex()
treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3)
print(string.format("GradientBoostState: findBestSplit (first) : %f sec", timer:time().real))
timer:reset()
treeState:findBestSplit(trainSet, trainSet.featureIds, 10, 1, 3)
print(string.format("GradientBoostState: findBestSplit (second) : %f sec", timer:time().real))
end
local function file_exists(name)
local f=io.open(name,"r")
if f~=nil then io.close(f) return true else return false end
end
function bm.GradientBoostTrainer(opt)
local trainSet, validSet
if file_exists("/tmp/train.bin") and file_exists("/tmp/valid.bin") then
trainSet = torch.load("/tmp/train.bin")
validSet = torch.load("/tmp/valid.bin")
else
if opt.sparse then
trainSet, validSet = dt.getSparseDummyData(opt)
else
trainSet, validSet = dt.getDenseDummyData(opt)
end
torch.save("/tmp/train.bin", trainSet)
torch.save("/tmp/valid.bin", validSet)
end
local cartTrainer = dt.CartTrainer(trainSet, opt.minLeafSize, opt.maxLeafNodes)
opt.lossFunction = nn.LogitBoostCriterion(false)
opt.treeTrainer = cartTrainer
local forestTrainer = dt.GradientBoostTrainer(opt)
local timer = torch.Timer()
local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet)
local time = timer:time().real
print(string.format("GradientBoostTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
cartTrainer:featureParallel(opt.nThread)
timer:reset()
local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds, validSet)
local time = timer:time().real
print(string.format("GradientBoostTrainer: train feature-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
end
function bm.RandomForestTrainer(opt)
local trainSet, validSet = dt.getSparseDummyData(opt)
local forestTrainer = dt.RandomForestTrainer(opt)
local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
local timer = torch.Timer()
local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
local time = timer:time().real
print(string.format("RandomForestTrainer: train single-thread : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
timer:reset()
forestTrainer:treeParallel(opt.nThread)
print(string.format("RandomForestTrainer: setup tree-parallel : %f samples/sec; %f sec", opt.nExample/timer:time().real, timer:time().real))
timer:reset()
local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
local time = timer:time().real
print(string.format("RandomForestTrainer: train tree-parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
end
function bm.DFD(opt)
local _ = require 'moses'
local opt = _.clone(opt)
opt.nExample = 200
local trainSet, validSet = dt.getDenseDummyData(opt)
local forestTrainer = dt.RandomForestTrainer(opt)
forestTrainer:treeParallel(opt.nThread)
local timer = torch.Timer()
local decisionForest = forestTrainer:train(trainSet, trainSet.featureIds)
local time = timer:time().real
print(string.format("DFD: train random forest in parallel : %f samples/sec; %f sec/tree, %f sec", opt.nExample/time, time/opt.nTree, time))
-- benchmark nn.DFD
local input = trainSet.input:sub(1,opt.batchsize)
local dfd = nn.DFD(decisionForest)
dfd:forward(input)
timer:reset()
for i=1,opt.nloop do
dfd:forward(input)
end
print(string.format("DFD: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real))
end
function bm.Sparse2Dense(opt)
local _ = require 'moses'
local opt = _.clone(opt)
opt.nExample = opt.batchsize
local trainSet = dt.getSparseDummyData(opt)
local input = {{},{}}
for i=1,opt.batchsize do
input[1][i] = trainSet.input[i].keys
input[2][i] = trainSet.input[i].values
end
assert(#input[1] == opt.batchsize)
-- benchmark nn.Sparse2Dense
local s2d = nn.Sparse2Dense(torch.LongTensor():range(1,opt.nFeature))
s2d:forward(input)
local timer = torch.Timer()
for i=1,opt.nloop do
s2d:forward(input)
end
print(string.format("Sparse2Dense: updateOutput : %f samples/sec; %f sec", opt.nloop*opt.batchsize/timer:time().real, timer:time().real))
end
function dt.benchmark(benchmarks, opt2)
local opt = {
nExample=10000, nCluster=2, nFeature=1000, overlap=0, nValid=100, -- getSparseDummyData
nTree=20, featureBaggingSize=-1, sparse=true, -- GradientBoostTrainer and RandomForestTrainer
nThread=2, shrinkage=0.1, downsampleRatio=0.1, evalFreq=5, earlyStop=0, -- GradientBoostTrainer
activeRatio=0.5, -- RandomForestTrainer
batchsize=32, nloop=10
}
local _ = require 'moses'
benchmarks = benchmarks or _.keys(bm)
assert(torch.type(benchmarks) == 'table')
for i,benchmark in ipairs(benchmarks) do
local opt1 = _.clone(opt)
for key, value in pairs(opt2 or {}) do
opt1[key] = value
end
opt1.nActive = opt1.nActive or torch.round(opt1.nFeature/10)
opt1.maxLeafNodes = opt1.maxLeafNodes or (opt1.nExample/10)
opt1.minLeafSize = opt1.minLeafSize or (opt1.nExample/100)
assert(torch.type(benchmark) == 'string', benchmark)
assert(bm[benchmark], benchmark)
bm[benchmark](opt1)
end
end
|