use of hex.ModelMetricsRegression in project h2o-3 by h2oai.
the class DRFTest method testStochasticDRFEquivalent.
@Test
public void testStochasticDRFEquivalent() {
Frame tfr = null, vfr = null;
DRFModel drf = null;
Scope.enter();
try {
tfr = parse_test_file("./smalldata/junit/cars.csv");
for (String s : new String[] { "name" }) {
tfr.remove(s).remove();
}
DKV.put(tfr);
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
//regression
parms._response_column = "cylinders";
parms._seed = 234;
parms._min_rows = 2;
parms._max_depth = 5;
parms._ntrees = 5;
parms._mtries = 3;
parms._sample_rate = 0.5f;
// Build a first model; all remaining models should be equal
drf = new DRF(parms).trainModel().get();
ModelMetricsRegression mm = (ModelMetricsRegression) drf._output._training_metrics;
assertEquals(0.12358322821934015, mm.mse(), 1e-4);
} finally {
if (tfr != null)
tfr.remove();
if (vfr != null)
vfr.remove();
if (drf != null)
drf.delete();
Scope.exit();
}
}
use of hex.ModelMetricsRegression in project h2o-3 by h2oai.
the class DeepLearningGradientCheck method gradientCheck.
@Test
public void gradientCheck() {
Frame tfr = null;
DeepLearningModel dl = null;
try {
tfr = parse_test_file("smalldata/glm_test/cancar_logIn.csv");
for (String s : new String[] { "Merit", "Class" }) {
Vec f = tfr.vec(s).toCategoricalVec();
tfr.remove(s).remove();
tfr.add(s, f);
}
DKV.put(tfr);
tfr.add("Binary", tfr.anyVec().makeZero());
new MRTask() {
public void map(Chunk[] c) {
for (int i = 0; i < c[0]._len; ++i) if (c[0].at8(i) == 1)
c[1].set(i, 1);
}
}.doAll(tfr.vecs(new String[] { "Class", "Binary" }));
Vec cv = tfr.vec("Binary").toCategoricalVec();
tfr.remove("Binary").remove();
tfr.add("Binary", cv);
DKV.put(tfr);
Random rng = new Random(0xDECAF);
int count = 0;
int failedcount = 0;
double maxRelErr = 0;
double meanRelErr = 0;
for (DistributionFamily dist : new DistributionFamily[] { DistributionFamily.gaussian, DistributionFamily.laplace, DistributionFamily.quantile, DistributionFamily.huber, // DistributionFamily.modified_huber,
DistributionFamily.gamma, DistributionFamily.poisson, DistributionFamily.AUTO, DistributionFamily.tweedie, DistributionFamily.multinomial, DistributionFamily.bernoulli }) {
for (DeepLearningParameters.Activation act : new DeepLearningParameters.Activation[] { // DeepLearningParameters.Activation.ExpRectifier,
DeepLearningParameters.Activation.Tanh, DeepLearningParameters.Activation.Rectifier }) {
for (String response : new String[] { //binary classification
"Binary", //multi-class
"Class", //regression
"Cost" }) {
for (boolean adaptive : new boolean[] { true, false }) {
for (int miniBatchSize : new int[] { 1 }) {
if (response.equals("Class")) {
if (dist != DistributionFamily.multinomial && dist != DistributionFamily.AUTO)
continue;
} else if (response.equals("Binary")) {
if (dist != DistributionFamily.modified_huber && dist != DistributionFamily.bernoulli && dist != DistributionFamily.AUTO)
continue;
} else {
if (dist == DistributionFamily.multinomial || dist == DistributionFamily.modified_huber || dist == DistributionFamily.bernoulli)
continue;
}
DeepLearningParameters parms = new DeepLearningParameters();
parms._huber_alpha = rng.nextDouble() + 0.1;
parms._tweedie_power = 1.01 + rng.nextDouble() * 0.9;
parms._quantile_alpha = 0.05 + rng.nextDouble() * 0.9;
parms._train = tfr._key;
//converge to a reasonable model to avoid too large gradients
parms._epochs = 100;
parms._l1 = 1e-3;
parms._l2 = 1e-3;
parms._force_load_balance = false;
parms._hidden = new int[] { 10, 10, 10 };
//otherwise we introduce small bprop errors
parms._fast_mode = false;
parms._response_column = response;
parms._distribution = dist;
parms._max_w2 = 10;
parms._seed = 0xaaabbb;
parms._activation = act;
parms._adaptive_rate = adaptive;
parms._rate = 1e-4;
parms._momentum_start = 0.9;
parms._momentum_stable = 0.99;
parms._mini_batch_size = miniBatchSize;
// DeepLearningModelInfo.gradientCheck = null;
//tell it what gradient to collect
DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(0, 0, 0);
// Build a first model; all remaining models should be equal
DeepLearning job = new DeepLearning(parms);
try {
dl = job.trainModel().get();
boolean classification = response.equals("Class") || response.equals("Binary");
if (!classification) {
Frame p = dl.score(tfr);
hex.ModelMetrics mm = hex.ModelMetrics.getFromDKV(dl, tfr);
double resdev = ((ModelMetricsRegression) mm)._mean_residual_deviance;
Log.info("Mean residual deviance: " + resdev);
p.delete();
}
//golden version
DeepLearningModelInfo modelInfo = IcedUtils.deepCopy(dl.model_info());
// Log.info(modelInfo.toStringAll());
long before = dl.model_info().checksum_impl();
float meanLoss = 0;
// loop over every row in the dataset and check that the predictions
for (int rId = 0; rId < tfr.numRows(); rId += 1) /*miniBatchSize*/
{
// start from scratch - with a clean model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
final DataInfo di = dl.model_info().data_info();
// populate miniBatch (consecutive rows)
final DataInfo.Row[] rowsMiniBatch = new DataInfo.Row[miniBatchSize];
for (int i = 0; i < rowsMiniBatch.length; ++i) {
if (0 <= rId + i && rId + i < tfr.numRows()) {
rowsMiniBatch[i] = new FrameTask.ExtractDenseRow(di, rId + i).doAll(di._adaptedFrame)._row;
}
}
// loss at weight
long cs = dl.model_info().checksum_impl();
double loss = dl.meanLoss(rowsMiniBatch);
assert (cs == before);
assert (before == dl.model_info().checksum_impl());
meanLoss += loss;
for (int layer = 0; layer <= parms._hidden.length; ++layer) {
int rows = dl.model_info().get_weights(layer).rows();
assert (dl.model_info().get_biases(layer).size() == rows);
for (int row = 0; row < rows; ++row) {
//check bias
if (true) {
// start from scratch - with a clean model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
// do one forward propagation pass (and fill the mini-batch gradients -> set training=true)
Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(dl.model_info());
double[] responses = new double[miniBatchSize];
double[] offsets = new double[miniBatchSize];
int n = 0;
for (DataInfo.Row myRow : rowsMiniBatch) {
if (myRow == null)
continue;
((Neurons.Input) neurons[0]).setInput(-1, myRow.numIds, myRow.numVals, myRow.nBins, myRow.binIds, n);
responses[n] = myRow.response(0);
offsets[n] = myRow.offset;
n++;
}
DeepLearningTask.fpropMiniBatch(-1, /*seed doesn't matter*/
neurons, dl.model_info(), null, true, /*training*/
responses, offsets, n);
// check that we didn't change the model's weights/biases
long after = dl.model_info().checksum_impl();
assert (after == before);
// record the gradient since gradientChecking is enabled
//tell it what gradient to collect
DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(layer, row, -1);
//update the weights and biases
DeepLearningTask.bpropMiniBatch(neurons, n);
assert (before != dl.model_info().checksum_impl());
// reset the model back to the trained model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
assert (before == dl.model_info().checksum_impl());
double bpropGradient = DeepLearningModelInfo.gradientCheck.gradient;
// FIXME: re-enable this once the loss is computed from the de-standardized prediction/response
// double actualResponse=myRow.response[0];
// double predResponseLinkSpace = neurons[neurons.length-1]._a.get(0);
// if (di._normRespMul != null) {
// bpropGradient /= di._normRespMul[0]; //no shift for gradient
// actualResponse = (actualResponse / di._normRespMul[0] + di._normRespSub[0]);
// predResponseLinkSpace = (predResponseLinkSpace / di._normRespMul[0] + di._normRespSub[0]);
// }
// bpropGradient *= new Distribution(parms._distribution).gradient(actualResponse, predResponseLinkSpace);
final double bias = dl.model_info().get_biases(layer).get(row);
//don't make the weight deltas too small, or the float weights "won't notice"
double eps = 1e-4 * Math.abs(bias);
if (eps == 0)
eps = 1e-6;
// loss at bias + eps
dl.model_info().get_biases(layer).set(row, bias + eps);
double up = dl.meanLoss(rowsMiniBatch);
// loss at bias - eps
dl.model_info().get_biases(layer).set(row, bias - eps);
double down = dl.meanLoss(rowsMiniBatch);
if (Math.abs(up - down) / Math.abs(up + down) < 1e-8) {
//relative change in loss function is too small -> skip
continue;
}
double gradient = ((up - down) / (2. * eps));
double relError = 2 * Math.abs(bpropGradient - gradient) / (Math.abs(gradient) + Math.abs(bpropGradient));
count++;
// if either gradient is tiny, check if both are tiny
if (Math.abs(gradient) < 1e-7 || Math.abs(bpropGradient) < 1e-7) {
//all good
if (Math.abs(bpropGradient - gradient) < 1e-7)
continue;
}
meanRelErr += relError;
if (relError > MAX_TOLERANCE) {
Log.info("\nDistribution: " + dl._parms._distribution);
Log.info("\nRow: " + rId);
Log.info("bias (layer " + layer + ", row " + row + "): " + bias + " +/- " + eps);
Log.info("loss: " + loss);
Log.info("losses up/down: " + up + " / " + down);
Log.info("=> Finite differences gradient: " + gradient);
Log.info("=> Back-propagation gradient : " + bpropGradient);
Log.info("=> Relative error : " + PrettyPrint.formatPct(relError));
failedcount++;
}
}
int cols = dl.model_info().get_weights(layer).cols();
for (int col = 0; col < cols; ++col) {
if (rng.nextFloat() >= SAMPLE_RATE)
continue;
// start from scratch - with a clean model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
// do one forward propagation pass (and fill the mini-batch gradients -> set training=true)
Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(dl.model_info());
double[] responses = new double[miniBatchSize];
double[] offsets = new double[miniBatchSize];
int n = 0;
for (DataInfo.Row myRow : rowsMiniBatch) {
if (myRow == null)
continue;
((Neurons.Input) neurons[0]).setInput(-1, myRow.numIds, myRow.numVals, myRow.nBins, myRow.binIds, n);
responses[n] = myRow.response(0);
offsets[n] = myRow.offset;
n++;
}
DeepLearningTask.fpropMiniBatch(-1, /*seed doesn't matter*/
neurons, dl.model_info(), null, true, /*training*/
responses, offsets, n);
// check that we didn't change the model's weights/biases
long after = dl.model_info().checksum_impl();
assert (after == before);
// record the gradient since gradientChecking is enabled
//tell it what gradient to collect
DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(layer, row, col);
//update the weights
DeepLearningTask.bpropMiniBatch(neurons, n);
assert (before != dl.model_info().checksum_impl());
// reset the model back to the trained model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
assert (before == dl.model_info().checksum_impl());
double bpropGradient = DeepLearningModelInfo.gradientCheck.gradient;
// FIXME: re-enable this once the loss is computed from the de-standardized prediction/response
// double actualResponse=myRow.response[0];
// double predResponseLinkSpace = neurons[neurons.length-1]._a.get(0);
// if (di._normRespMul != null) {
// bpropGradient /= di._normRespMul[0]; //no shift for gradient
// actualResponse = (actualResponse / di._normRespMul[0] + di._normRespSub[0]);
// predResponseLinkSpace = (predResponseLinkSpace / di._normRespMul[0] + di._normRespSub[0]);
// }
// bpropGradient *= new Distribution(parms._distribution).gradient(actualResponse, predResponseLinkSpace);
final float weight = dl.model_info().get_weights(layer).get(row, col);
//don't make the weight deltas too small, or the float weights "won't notice"
double eps = 1e-4 * Math.abs(weight);
if (eps == 0)
eps = 1e-6;
// loss at weight + eps
dl.model_info().get_weights(layer).set(row, col, (float) (weight + eps));
double up = dl.meanLoss(rowsMiniBatch);
// loss at weight - eps
dl.model_info().get_weights(layer).set(row, col, (float) (weight - eps));
double down = dl.meanLoss(rowsMiniBatch);
if (Math.abs(up - down) / Math.abs(up + down) < 1e-8) {
//relative change in loss function is too small -> skip
continue;
}
double gradient = ((up - down) / (2. * eps));
double relError = 2 * Math.abs(bpropGradient - gradient) / (Math.abs(gradient) + Math.abs(bpropGradient));
count++;
// if either gradient is tiny, check if both are tiny
if (Math.abs(gradient) < 1e-7 || Math.abs(bpropGradient) < 1e-7) {
//all good
if (Math.abs(bpropGradient - gradient) < 1e-7)
continue;
}
meanRelErr += relError;
if (relError > MAX_TOLERANCE) {
Log.info("\nDistribution: " + dl._parms._distribution);
Log.info("\nRow: " + rId);
Log.info("weight (layer " + layer + ", row " + row + ", col " + col + "): " + weight + " +/- " + eps);
Log.info("loss: " + loss);
Log.info("losses up/down: " + up + " / " + down);
Log.info("=> Finite differences gradient: " + gradient);
Log.info("=> Back-propagation gradient : " + bpropGradient);
Log.info("=> Relative error : " + PrettyPrint.formatPct(relError));
failedcount++;
}
// Assert.assertTrue(failedcount==0);
maxRelErr = Math.max(maxRelErr, relError);
assert (!Double.isNaN(maxRelErr));
}
}
}
}
meanLoss /= tfr.numRows();
Log.info("Mean loss: " + meanLoss);
// // FIXME: re-enable this
// if (parms._l1 == 0 && parms._l2 == 0) {
// assert(Math.abs(meanLoss-resdev)/Math.abs(resdev) < 1e-5);
// }
} catch (RuntimeException ex) {
dl = DKV.getGet(job.dest());
if (dl != null)
Assert.assertTrue(dl.model_info().isUnstable());
else
Assert.assertTrue(job.isStopped());
} finally {
if (dl != null)
dl.delete();
}
}
}
}
}
}
Log.info("Number of tests: " + count);
Log.info("Number of failed tests: " + failedcount);
Log.info("Mean. relative error: " + meanRelErr / count);
Log.info("Max. relative error: " + PrettyPrint.formatPct(maxRelErr));
Assert.assertTrue("Error too large: " + maxRelErr + " >= " + MAX_TOLERANCE, maxRelErr < MAX_TOLERANCE);
Assert.assertTrue("Failed count too large: " + failedcount + " > " + MAX_FAILED_COUNT, failedcount <= MAX_FAILED_COUNT);
} finally {
if (tfr != null)
tfr.remove();
}
}
use of hex.ModelMetricsRegression in project h2o-3 by h2oai.
the class DRFTest method testColSamplingPerTree.
@Test
public void testColSamplingPerTree() {
Frame tfr = null;
Key[] ksplits = new Key[0];
try {
tfr = parse_test_file("./smalldata/gbm_test/ecology_model.csv");
SplitFrame sf = new SplitFrame(tfr, new double[] { 0.5, 0.5 }, new Key[] { Key.make("train.hex"), Key.make("test.hex") });
// Invoke the job
sf.exec().get();
ksplits = sf._destination_frames;
DRFModel drf = null;
float[] sample_rates = new float[] { 0.2f, 0.4f, 0.6f, 0.8f, 1.0f };
float[] col_sample_rates = new float[] { 0.4f, 0.6f, 0.8f, 1.0f };
float[] col_sample_rates_per_tree = new float[] { 0.4f, 0.6f, 0.8f, 1.0f };
Map<Double, Triple<Float>> hm = new TreeMap<>();
for (float sample_rate : sample_rates) {
for (float col_sample_rate : col_sample_rates) {
for (float col_sample_rate_per_tree : col_sample_rates_per_tree) {
Scope.enter();
try {
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = ksplits[0];
parms._valid = ksplits[1];
//regression
parms._response_column = "Angaus";
parms._seed = 12345;
parms._min_rows = 1;
parms._max_depth = 15;
parms._ntrees = 2;
parms._mtries = Math.max(1, (int) (col_sample_rate * (tfr.numCols() - 1)));
parms._col_sample_rate_per_tree = col_sample_rate_per_tree;
parms._sample_rate = sample_rate;
// Build a first model; all remaining models should be equal
DRF job = new DRF(parms);
drf = job.trainModel().get();
// too slow, but passes (now)
// // Build a POJO, validate same results
// Frame pred = drf.score(tfr);
// Assert.assertTrue(drf.testJavaScoring(tfr,pred,1e-15));
// pred.remove();
ModelMetricsRegression mm = (ModelMetricsRegression) drf._output._validation_metrics;
hm.put(mm.mse(), new Triple<>(sample_rate, col_sample_rate, col_sample_rate_per_tree));
} finally {
if (drf != null)
drf.delete();
Scope.exit();
}
}
}
}
Iterator<Map.Entry<Double, Triple<Float>>> it;
Triple<Float> last = null;
// iterator over results (min to max MSE) - best to worst
for (it = hm.entrySet().iterator(); it.hasNext(); ) {
Map.Entry<Double, Triple<Float>> n = it.next();
Log.info("MSE: " + n.getKey() + ", row sample: " + n.getValue().v1 + ", col sample: " + n.getValue().v2 + ", col sample per tree: " + n.getValue().v3);
last = n.getValue();
}
// worst validation MSE should belong to the most overfit case (1.0, 1.0, 1.0)
// Assert.assertTrue(last.v1==sample_rates[sample_rates.length-1]);
// Assert.assertTrue(last.v2==col_sample_rates[col_sample_rates.length-1]);
// Assert.assertTrue(last.v3==col_sample_rates_per_tree[col_sample_rates_per_tree.length-1]);
} finally {
if (tfr != null)
tfr.remove();
for (Key k : ksplits) if (k != null)
k.remove();
}
}
use of hex.ModelMetricsRegression in project h2o-3 by h2oai.
the class DRFTest method testAutoRebalance.
@Ignore
@Test
public void testAutoRebalance() {
//First pass to warm up
boolean warmUp = true;
if (warmUp) {
int[] warmUpChunks = { 1, 2, 3, 4, 5 };
for (int chunk : warmUpChunks) {
Frame tfr = null;
Scope.enter();
try {
// Load data, hack frames
tfr = parse_test_file("/Users/ludirehak/Downloads/train.csv.zip");
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
parms._response_column = "Sales";
parms._nbins = 1000;
parms._ntrees = 10;
parms._max_depth = 20;
parms._mtries = -1;
parms._min_rows = 10;
parms._seed = 1234;
// parms._rebalance_me = true;
// parms._nchunks = 22;
// Build a first model; all remaining models should be equal
DRF job = new DRF(parms);
DRFModel drf = job.trainModel().get();
drf.delete();
} finally {
if (tfr != null)
tfr.remove();
}
Scope.exit();
}
}
int[] max_depths = { 2, 5, 10, 15, 20 };
int[] chunks = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 };
boolean[] rebalanceMes = { true };
int[] ntrees = { 10 };
int totalLength = chunks.length * max_depths.length * rebalanceMes.length * ntrees.length;
double[] executionTimes = new double[totalLength];
int[] outputchunks = new int[totalLength];
int[] outputdepths = new int[totalLength];
boolean[] outputrebalanceme = new boolean[totalLength];
int[] outputntrees = new int[totalLength];
int c = 0;
for (int max_depth : max_depths) {
for (int ntree : ntrees) {
for (boolean rebalanceMe : rebalanceMes) {
for (int chunk : chunks) {
long startTime = System.currentTimeMillis();
Scope.enter();
// Load data, hack frames
Frame tfr = parse_test_file("/Users/ludirehak/Downloads/train.csv.zip");
DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
parms._train = tfr._key;
parms._response_column = "Sales";
parms._nbins = 1000;
parms._mtries = -1;
parms._min_rows = 10;
parms._seed = 1234;
parms._ntrees = ntree;
parms._max_depth = max_depth;
// parms._rebalance_me = rebalanceMe;
// parms._nchunks = chunk;
// Build a first model
DRF job = new DRF(parms);
DRFModel drf = job.trainModel().get();
assertEquals(drf._output._ntrees, parms._ntrees);
ModelMetricsRegression mm = (ModelMetricsRegression) drf._output._training_metrics;
int actualChunk = job.train().anyVec().nChunks();
drf.delete();
tfr.remove();
Scope.exit();
executionTimes[c] = (System.currentTimeMillis() - startTime) / 1000d;
if (!rebalanceMe)
assert actualChunk == 22;
outputchunks[c] = actualChunk;
outputdepths[c] = max_depth;
outputrebalanceme[c] = rebalanceMe;
outputntrees[c] = drf._output._ntrees;
Log.info("Iteration " + (c + 1) + " out of " + executionTimes.length);
Log.info(" DEPTH: " + outputdepths[c] + " NTREES: " + outputntrees[c] + " CHUNKS: " + outputchunks[c] + " EXECUTION TIME: " + executionTimes[c] + " Rebalanced: " + rebalanceMe + " WarmedUp: " + warmUp);
c++;
}
}
}
}
String fileName = "/Users/ludirehak/Desktop/DRFTestRebalance3.txt";
// legend('topright', legend= c('max_depth',unique(max_depth)),col = 0:length(unique(max_depth)),pch=1);
try {
FileWriter fileWriter = new FileWriter(fileName);
BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);
bufferedWriter.write("max_depth,ntrees,nbins,min_rows,chunks,execution_time,rebalanceMe,warmUp");
bufferedWriter.newLine();
for (int i = 0; i < executionTimes.length; i++) {
bufferedWriter.write(outputdepths[i] + "," + outputntrees[i] + "," + 1000 + "," + 10 + "," + outputchunks[i] + "," + executionTimes[i] + "," + "," + (outputrebalanceme[i] ? 1 : 0) + "," + (warmUp ? 1 : 0));
bufferedWriter.newLine();
}
bufferedWriter.close();
} catch (Exception e) {
Log.info("Fail");
}
}
Aggregations