use of hex.DataInfo in project h2o-3 by h2oai.
the class ComputationState method applyStrongRulesMultinomial_old.
/**
* Apply strong rules to filter out expected inactive (with zero coefficient) predictors.
*
* @return indices of expected active predictors.
*/
/**
* Apply strong rules to filter out expected inactive (with zero coefficient) predictors.
*
* @return indices of expected active predictors.
*/
protected int applyStrongRulesMultinomial_old(double lambdaNew, double lambdaOld) {
int P = _dinfo.fullN();
int N = P + 1;
int selected = 0;
_activeBC = _bc;
_activeData = _dinfo;
if (!_allIn) {
if (_activeDataMultinomial == null)
_activeDataMultinomial = new DataInfo[_nclasses];
final double rhs = _alpha * (2 * lambdaNew - lambdaOld);
int[] oldActiveCols = _activeData._activeCols == null ? new int[0] : _activeData.activeCols();
int[] cols = MemoryManager.malloc4(N * _nclasses);
int j = 0;
for (int c = 0; c < _nclasses; ++c) {
int start = selected;
for (int i = 0; i < P; ++i) {
if (j < oldActiveCols.length && i == oldActiveCols[j]) {
cols[selected++] = i;
++j;
} else if (_ginfo._gradient[c * N + i] > rhs || _ginfo._gradient[c * N + i] < -rhs) {
cols[selected++] = i;
}
}
// intercept
cols[selected++] = P;
_activeDataMultinomial[c] = _dinfo.filterExpandedColumns(Arrays.copyOfRange(cols, start, selected));
for (int i = start; i < selected; ++i) cols[i] += c * N;
}
_allIn = selected == cols.length;
}
return selected;
}
use of hex.DataInfo in project h2o-3 by h2oai.
the class MakeGLMModelHandler method oneHot.
public static Frame oneHot(Frame fr, String[] interactions, boolean useAll, boolean standardize, final boolean interactionsOnly, final boolean skipMissing) {
final DataInfo dinfo = new DataInfo(fr, null, 1, useAll, standardize ? TransformType.STANDARDIZE : TransformType.NONE, TransformType.NONE, skipMissing, false, false, false, false, false, interactions);
Frame res;
if (interactionsOnly) {
if (null == dinfo._interactionVecs)
throw new IllegalArgumentException("no interactions");
int noutputs = 0;
final int[] colIds = new int[dinfo._interactionVecs.length];
final int[] offsetIds = new int[dinfo._interactionVecs.length];
int idx = 0;
String[] coefNames = dinfo.coefNames();
for (int i : dinfo._interactionVecs) noutputs += (offsetIds[idx++] = ((InteractionWrappedVec) dinfo._adaptedFrame.vec(i)).expandedLength());
String[] names = new String[noutputs];
int offset = idx = 0;
int namesIdx = 0;
for (int i = 0; i < dinfo._adaptedFrame.numCols(); ++i) {
Vec v = dinfo._adaptedFrame.vec(i);
if (v instanceof InteractionWrappedVec) {
// ding! start copying coefNames into names while offset < colIds[idx+1]
colIds[idx] = offset;
for (int nid = 0; nid < offsetIds[idx]; ++nid) names[namesIdx++] = coefNames[offset++];
idx++;
// no more interaciton vecs left
if (idx > dinfo._interactionVecs.length)
break;
} else {
if (v.isCategorical())
offset += v.domain().length - (useAll ? 0 : 1);
else
offset++;
}
}
res = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
DataInfo.Row r = dinfo.newDenseRow();
for (int i = 0; i < cs[0]._len; ++i) {
r = dinfo.extractDenseRow(cs, i, r);
if (skipMissing && r.isBad())
continue;
int newChkIdx = 0;
for (int idx = 0; idx < colIds.length; ++idx) {
int startOffset = colIds[idx];
for (int start = startOffset; start < (startOffset + offsetIds[idx]); ++start) ncs[newChkIdx++].addNum(r.get(start));
}
}
}
}.doAll(noutputs, Vec.T_NUM, dinfo._adaptedFrame).outputFrame(Key.make(), names, null);
} else {
byte[] types = new byte[dinfo.fullN()];
Arrays.fill(types, Vec.T_NUM);
res = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
DataInfo.Row r = dinfo.newDenseRow();
for (int i = 0; i < cs[0]._len; ++i) {
r = dinfo.extractDenseRow(cs, i, r);
if (skipMissing && r.isBad())
continue;
for (int n = 0; n < ncs.length; ++n) ncs[n].addNum(r.get(n));
}
}
}.doAll(types, dinfo._adaptedFrame.vecs()).outputFrame(Key.make("OneHot" + Key.make().toString()), dinfo.coefNames(), null);
}
dinfo.dropInteractions();
dinfo.remove();
return res;
}
use of hex.DataInfo in project h2o-3 by h2oai.
the class GLRMCategoricalTest method testExpandCatsProstate.
@Test
public void testExpandCatsProstate() throws InterruptedException, ExecutionException {
double[][] prostate = ard(ard(0, 71, 1, 0, 0, 4.8, 14.0, 7), ard(1, 70, 1, 1, 0, 8.4, 21.8, 5), ard(0, 73, 1, 3, 0, 10.0, 27.4, 6), ard(1, 68, 1, 0, 0, 6.7, 16.7, 6));
double[][] pros_expandR = ard(ard(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 71, 4.8, 14.0, 7), ard(0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 70, 8.4, 21.8, 5), ard(0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 73, 10.0, 27.4, 6), ard(1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 68, 6.7, 16.7, 6));
String[] pros_cols = new String[] { "Capsule", "Age", "Race", "Dpros", "Dcaps", "PSA", "Vol", "Gleason" };
String[][] pros_domains = new String[][] { new String[] { "No", "Yes" }, null, new String[] { "Other", "White", "Black" }, new String[] { "None", "UniLeft", "UniRight", "Bilobar" }, new String[] { "No", "Yes" }, null, null, null };
// Categoricals: CAPSULE, RACE, DPROS, DCAPS
final int[] cats = new int[] { 1, 3, 4, 5 };
Frame fr = null;
try {
Scope.enter();
fr = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv");
for (int i = 0; i < cats.length; i++) Scope.track(fr.replace(cats[i], fr.vec(cats[i]).toCategoricalVec()));
fr.remove("ID").remove();
DKV.put(fr._key, fr);
DataInfo dinfo = new DataInfo(fr, null, 0, true, DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, false, false, false, /* weights */
false, /* offset */
false, /* fold */
false);
Log.info("Original matrix:\n" + colFormat(pros_cols, "%8.7s") + ArrayUtils.pprint(prostate));
double[][] pros_perm = ArrayUtils.permuteCols(prostate, dinfo._permutation);
Log.info("Permuted matrix:\n" + colFormat(pros_cols, "%8.7s", dinfo._permutation) + ArrayUtils.pprint(pros_perm));
double[][] pros_exp = GLRM.expandCats(pros_perm, dinfo);
Log.info("Expanded matrix:\n" + colExpFormat(pros_cols, pros_domains, "%8.7s", dinfo._permutation) + ArrayUtils.pprint(pros_exp));
Assert.assertArrayEquals(pros_expandR, pros_exp);
} finally {
if (fr != null)
fr.delete();
Scope.exit();
}
}
use of hex.DataInfo in project h2o-3 by h2oai.
the class DeepLearningGradientCheck method gradientCheck.
@Test
public void gradientCheck() {
Frame tfr = null;
DeepLearningModel dl = null;
try {
tfr = parse_test_file("smalldata/glm_test/cancar_logIn.csv");
for (String s : new String[] { "Merit", "Class" }) {
Vec f = tfr.vec(s).toCategoricalVec();
tfr.remove(s).remove();
tfr.add(s, f);
}
DKV.put(tfr);
tfr.add("Binary", tfr.anyVec().makeZero());
new MRTask() {
public void map(Chunk[] c) {
for (int i = 0; i < c[0]._len; ++i) if (c[0].at8(i) == 1)
c[1].set(i, 1);
}
}.doAll(tfr.vecs(new String[] { "Class", "Binary" }));
Vec cv = tfr.vec("Binary").toCategoricalVec();
tfr.remove("Binary").remove();
tfr.add("Binary", cv);
DKV.put(tfr);
Random rng = new Random(0xDECAF);
int count = 0;
int failedcount = 0;
double maxRelErr = 0;
double meanRelErr = 0;
for (DistributionFamily dist : new DistributionFamily[] { DistributionFamily.gaussian, DistributionFamily.laplace, DistributionFamily.quantile, DistributionFamily.huber, // DistributionFamily.modified_huber,
DistributionFamily.gamma, DistributionFamily.poisson, DistributionFamily.AUTO, DistributionFamily.tweedie, DistributionFamily.multinomial, DistributionFamily.bernoulli }) {
for (DeepLearningParameters.Activation act : new DeepLearningParameters.Activation[] { // DeepLearningParameters.Activation.ExpRectifier,
DeepLearningParameters.Activation.Tanh, DeepLearningParameters.Activation.Rectifier }) {
for (String response : new String[] { //binary classification
"Binary", //multi-class
"Class", //regression
"Cost" }) {
for (boolean adaptive : new boolean[] { true, false }) {
for (int miniBatchSize : new int[] { 1 }) {
if (response.equals("Class")) {
if (dist != DistributionFamily.multinomial && dist != DistributionFamily.AUTO)
continue;
} else if (response.equals("Binary")) {
if (dist != DistributionFamily.modified_huber && dist != DistributionFamily.bernoulli && dist != DistributionFamily.AUTO)
continue;
} else {
if (dist == DistributionFamily.multinomial || dist == DistributionFamily.modified_huber || dist == DistributionFamily.bernoulli)
continue;
}
DeepLearningParameters parms = new DeepLearningParameters();
parms._huber_alpha = rng.nextDouble() + 0.1;
parms._tweedie_power = 1.01 + rng.nextDouble() * 0.9;
parms._quantile_alpha = 0.05 + rng.nextDouble() * 0.9;
parms._train = tfr._key;
//converge to a reasonable model to avoid too large gradients
parms._epochs = 100;
parms._l1 = 1e-3;
parms._l2 = 1e-3;
parms._force_load_balance = false;
parms._hidden = new int[] { 10, 10, 10 };
//otherwise we introduce small bprop errors
parms._fast_mode = false;
parms._response_column = response;
parms._distribution = dist;
parms._max_w2 = 10;
parms._seed = 0xaaabbb;
parms._activation = act;
parms._adaptive_rate = adaptive;
parms._rate = 1e-4;
parms._momentum_start = 0.9;
parms._momentum_stable = 0.99;
parms._mini_batch_size = miniBatchSize;
// DeepLearningModelInfo.gradientCheck = null;
//tell it what gradient to collect
DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(0, 0, 0);
// Build a first model; all remaining models should be equal
DeepLearning job = new DeepLearning(parms);
try {
dl = job.trainModel().get();
boolean classification = response.equals("Class") || response.equals("Binary");
if (!classification) {
Frame p = dl.score(tfr);
hex.ModelMetrics mm = hex.ModelMetrics.getFromDKV(dl, tfr);
double resdev = ((ModelMetricsRegression) mm)._mean_residual_deviance;
Log.info("Mean residual deviance: " + resdev);
p.delete();
}
//golden version
DeepLearningModelInfo modelInfo = IcedUtils.deepCopy(dl.model_info());
// Log.info(modelInfo.toStringAll());
long before = dl.model_info().checksum_impl();
float meanLoss = 0;
// loop over every row in the dataset and check that the predictions
for (int rId = 0; rId < tfr.numRows(); rId += 1) /*miniBatchSize*/
{
// start from scratch - with a clean model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
final DataInfo di = dl.model_info().data_info();
// populate miniBatch (consecutive rows)
final DataInfo.Row[] rowsMiniBatch = new DataInfo.Row[miniBatchSize];
for (int i = 0; i < rowsMiniBatch.length; ++i) {
if (0 <= rId + i && rId + i < tfr.numRows()) {
rowsMiniBatch[i] = new FrameTask.ExtractDenseRow(di, rId + i).doAll(di._adaptedFrame)._row;
}
}
// loss at weight
long cs = dl.model_info().checksum_impl();
double loss = dl.meanLoss(rowsMiniBatch);
assert (cs == before);
assert (before == dl.model_info().checksum_impl());
meanLoss += loss;
for (int layer = 0; layer <= parms._hidden.length; ++layer) {
int rows = dl.model_info().get_weights(layer).rows();
assert (dl.model_info().get_biases(layer).size() == rows);
for (int row = 0; row < rows; ++row) {
//check bias
if (true) {
// start from scratch - with a clean model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
// do one forward propagation pass (and fill the mini-batch gradients -> set training=true)
Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(dl.model_info());
double[] responses = new double[miniBatchSize];
double[] offsets = new double[miniBatchSize];
int n = 0;
for (DataInfo.Row myRow : rowsMiniBatch) {
if (myRow == null)
continue;
((Neurons.Input) neurons[0]).setInput(-1, myRow.numIds, myRow.numVals, myRow.nBins, myRow.binIds, n);
responses[n] = myRow.response(0);
offsets[n] = myRow.offset;
n++;
}
DeepLearningTask.fpropMiniBatch(-1, /*seed doesn't matter*/
neurons, dl.model_info(), null, true, /*training*/
responses, offsets, n);
// check that we didn't change the model's weights/biases
long after = dl.model_info().checksum_impl();
assert (after == before);
// record the gradient since gradientChecking is enabled
//tell it what gradient to collect
DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(layer, row, -1);
//update the weights and biases
DeepLearningTask.bpropMiniBatch(neurons, n);
assert (before != dl.model_info().checksum_impl());
// reset the model back to the trained model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
assert (before == dl.model_info().checksum_impl());
double bpropGradient = DeepLearningModelInfo.gradientCheck.gradient;
// FIXME: re-enable this once the loss is computed from the de-standardized prediction/response
// double actualResponse=myRow.response[0];
// double predResponseLinkSpace = neurons[neurons.length-1]._a.get(0);
// if (di._normRespMul != null) {
// bpropGradient /= di._normRespMul[0]; //no shift for gradient
// actualResponse = (actualResponse / di._normRespMul[0] + di._normRespSub[0]);
// predResponseLinkSpace = (predResponseLinkSpace / di._normRespMul[0] + di._normRespSub[0]);
// }
// bpropGradient *= new Distribution(parms._distribution).gradient(actualResponse, predResponseLinkSpace);
final double bias = dl.model_info().get_biases(layer).get(row);
//don't make the weight deltas too small, or the float weights "won't notice"
double eps = 1e-4 * Math.abs(bias);
if (eps == 0)
eps = 1e-6;
// loss at bias + eps
dl.model_info().get_biases(layer).set(row, bias + eps);
double up = dl.meanLoss(rowsMiniBatch);
// loss at bias - eps
dl.model_info().get_biases(layer).set(row, bias - eps);
double down = dl.meanLoss(rowsMiniBatch);
if (Math.abs(up - down) / Math.abs(up + down) < 1e-8) {
//relative change in loss function is too small -> skip
continue;
}
double gradient = ((up - down) / (2. * eps));
double relError = 2 * Math.abs(bpropGradient - gradient) / (Math.abs(gradient) + Math.abs(bpropGradient));
count++;
// if either gradient is tiny, check if both are tiny
if (Math.abs(gradient) < 1e-7 || Math.abs(bpropGradient) < 1e-7) {
//all good
if (Math.abs(bpropGradient - gradient) < 1e-7)
continue;
}
meanRelErr += relError;
if (relError > MAX_TOLERANCE) {
Log.info("\nDistribution: " + dl._parms._distribution);
Log.info("\nRow: " + rId);
Log.info("bias (layer " + layer + ", row " + row + "): " + bias + " +/- " + eps);
Log.info("loss: " + loss);
Log.info("losses up/down: " + up + " / " + down);
Log.info("=> Finite differences gradient: " + gradient);
Log.info("=> Back-propagation gradient : " + bpropGradient);
Log.info("=> Relative error : " + PrettyPrint.formatPct(relError));
failedcount++;
}
}
int cols = dl.model_info().get_weights(layer).cols();
for (int col = 0; col < cols; ++col) {
if (rng.nextFloat() >= SAMPLE_RATE)
continue;
// start from scratch - with a clean model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
// do one forward propagation pass (and fill the mini-batch gradients -> set training=true)
Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(dl.model_info());
double[] responses = new double[miniBatchSize];
double[] offsets = new double[miniBatchSize];
int n = 0;
for (DataInfo.Row myRow : rowsMiniBatch) {
if (myRow == null)
continue;
((Neurons.Input) neurons[0]).setInput(-1, myRow.numIds, myRow.numVals, myRow.nBins, myRow.binIds, n);
responses[n] = myRow.response(0);
offsets[n] = myRow.offset;
n++;
}
DeepLearningTask.fpropMiniBatch(-1, /*seed doesn't matter*/
neurons, dl.model_info(), null, true, /*training*/
responses, offsets, n);
// check that we didn't change the model's weights/biases
long after = dl.model_info().checksum_impl();
assert (after == before);
// record the gradient since gradientChecking is enabled
//tell it what gradient to collect
DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(layer, row, col);
//update the weights
DeepLearningTask.bpropMiniBatch(neurons, n);
assert (before != dl.model_info().checksum_impl());
// reset the model back to the trained model
dl.set_model_info(IcedUtils.deepCopy(modelInfo));
assert (before == dl.model_info().checksum_impl());
double bpropGradient = DeepLearningModelInfo.gradientCheck.gradient;
// FIXME: re-enable this once the loss is computed from the de-standardized prediction/response
// double actualResponse=myRow.response[0];
// double predResponseLinkSpace = neurons[neurons.length-1]._a.get(0);
// if (di._normRespMul != null) {
// bpropGradient /= di._normRespMul[0]; //no shift for gradient
// actualResponse = (actualResponse / di._normRespMul[0] + di._normRespSub[0]);
// predResponseLinkSpace = (predResponseLinkSpace / di._normRespMul[0] + di._normRespSub[0]);
// }
// bpropGradient *= new Distribution(parms._distribution).gradient(actualResponse, predResponseLinkSpace);
final float weight = dl.model_info().get_weights(layer).get(row, col);
//don't make the weight deltas too small, or the float weights "won't notice"
double eps = 1e-4 * Math.abs(weight);
if (eps == 0)
eps = 1e-6;
// loss at weight + eps
dl.model_info().get_weights(layer).set(row, col, (float) (weight + eps));
double up = dl.meanLoss(rowsMiniBatch);
// loss at weight - eps
dl.model_info().get_weights(layer).set(row, col, (float) (weight - eps));
double down = dl.meanLoss(rowsMiniBatch);
if (Math.abs(up - down) / Math.abs(up + down) < 1e-8) {
//relative change in loss function is too small -> skip
continue;
}
double gradient = ((up - down) / (2. * eps));
double relError = 2 * Math.abs(bpropGradient - gradient) / (Math.abs(gradient) + Math.abs(bpropGradient));
count++;
// if either gradient is tiny, check if both are tiny
if (Math.abs(gradient) < 1e-7 || Math.abs(bpropGradient) < 1e-7) {
//all good
if (Math.abs(bpropGradient - gradient) < 1e-7)
continue;
}
meanRelErr += relError;
if (relError > MAX_TOLERANCE) {
Log.info("\nDistribution: " + dl._parms._distribution);
Log.info("\nRow: " + rId);
Log.info("weight (layer " + layer + ", row " + row + ", col " + col + "): " + weight + " +/- " + eps);
Log.info("loss: " + loss);
Log.info("losses up/down: " + up + " / " + down);
Log.info("=> Finite differences gradient: " + gradient);
Log.info("=> Back-propagation gradient : " + bpropGradient);
Log.info("=> Relative error : " + PrettyPrint.formatPct(relError));
failedcount++;
}
// Assert.assertTrue(failedcount==0);
maxRelErr = Math.max(maxRelErr, relError);
assert (!Double.isNaN(maxRelErr));
}
}
}
}
meanLoss /= tfr.numRows();
Log.info("Mean loss: " + meanLoss);
// // FIXME: re-enable this
// if (parms._l1 == 0 && parms._l2 == 0) {
// assert(Math.abs(meanLoss-resdev)/Math.abs(resdev) < 1e-5);
// }
} catch (RuntimeException ex) {
dl = DKV.getGet(job.dest());
if (dl != null)
Assert.assertTrue(dl.model_info().isUnstable());
else
Assert.assertTrue(job.isStopped());
} finally {
if (dl != null)
dl.delete();
}
}
}
}
}
}
Log.info("Number of tests: " + count);
Log.info("Number of failed tests: " + failedcount);
Log.info("Mean. relative error: " + meanRelErr / count);
Log.info("Max. relative error: " + PrettyPrint.formatPct(maxRelErr));
Assert.assertTrue("Error too large: " + maxRelErr + " >= " + MAX_TOLERANCE, maxRelErr < MAX_TOLERANCE);
Assert.assertTrue("Failed count too large: " + failedcount + " > " + MAX_FAILED_COUNT, failedcount <= MAX_FAILED_COUNT);
} finally {
if (tfr != null)
tfr.remove();
}
}
use of hex.DataInfo in project h2o-3 by h2oai.
the class DeepLearningTask method makeNeurons.
// Helper
private static Neurons[] makeNeurons(final DeepLearningModelInfo minfo, boolean training) {
DataInfo dinfo = minfo.data_info();
final DeepLearningParameters params = minfo.get_params();
final int[] h = params._hidden;
// input + hidden + output
Neurons[] neurons = new Neurons[h.length + 2];
// input
neurons[0] = new Neurons.Input(params, minfo.units[0], dinfo);
// hidden
for (int i = 0; i < h.length + (params._autoencoder ? 1 : 0); i++) {
int n = params._autoencoder && i == h.length ? minfo.units[0] : h[i];
switch(params._activation) {
case Tanh:
neurons[i + 1] = new Neurons.Tanh(n);
break;
case TanhWithDropout:
neurons[i + 1] = params._autoencoder && i == h.length ? new Neurons.Tanh(n) : new Neurons.TanhDropout(n);
break;
case Rectifier:
neurons[i + 1] = new Neurons.Rectifier(n);
break;
case RectifierWithDropout:
neurons[i + 1] = params._autoencoder && i == h.length ? new Neurons.Rectifier(n) : new Neurons.RectifierDropout(n);
break;
case Maxout:
neurons[i + 1] = new Neurons.Maxout(params, (short) 2, n);
break;
case MaxoutWithDropout:
neurons[i + 1] = params._autoencoder && i == h.length ? new Neurons.Maxout(params, (short) 2, n) : new Neurons.MaxoutDropout(params, (short) 2, n);
break;
case ExpRectifier:
neurons[i + 1] = new Neurons.ExpRectifier(n);
break;
case ExpRectifierWithDropout:
neurons[i + 1] = params._autoencoder && i == h.length ? new Neurons.ExpRectifier(n) : new Neurons.ExpRectifierDropout(n);
break;
}
}
if (!params._autoencoder) {
if (minfo._classification && minfo.get_params()._distribution != DistributionFamily.modified_huber)
neurons[neurons.length - 1] = new Neurons.Softmax(minfo.units[minfo.units.length - 1]);
else
neurons[neurons.length - 1] = new Neurons.Linear();
}
//copy parameters from NN, and set previous/input layer links
for (int i = 0; i < neurons.length; i++) {
neurons[i].init(neurons, i, params, minfo, training);
neurons[i]._input = neurons[0];
}
// for (Neurons n : neurons) Log.info(n.toString());
return neurons;
}
Aggregations