use of hex.genmodel.algos.glrm.GlrmLoss in project h2o-3 by h2oai.
the class GLRM method initLoss.
/** Validate all Loss-related parameters, and fill in the `_lossFunc` array. */
private void initLoss() {
int num_loss_by_cols = _parms._loss_by_col == null ? 0 : _parms._loss_by_col.length;
int num_loss_by_cols_idx = _parms._loss_by_col_idx == null ? 0 : _parms._loss_by_col_idx.length;
// First validate the parameters that do not require access to the training frame
if (_parms._period <= 0)
error("_period", "_period must be a positive integer");
if (!_parms._loss.isForNumeric())
error("_loss", _parms._loss + " is not a numeric loss function");
if (!_parms._multi_loss.isForCategorical())
error("_multi_loss", _parms._multi_loss + " is not a multivariate loss function");
if (num_loss_by_cols != num_loss_by_cols_idx && num_loss_by_cols_idx > 0)
error("_loss_by_col", "Sizes of arrays _loss_by_col and _loss_by_col_idx must be the same");
if (_train == null)
return;
_binaryColumnIndices = new ArrayList<Integer>();
// Initialize the default loss functions for each column
// Note: right now for binary columns `.isCategorical()` returns true. It has the undesired consequence that
// such variables will get categorical loss function, and will get expanded into 2 columns.
_lossFunc = new GlrmLoss[_ncolA];
for (int i = 0; i < _ncolA; i++) {
Vec vi = _train.vec(i);
_lossFunc[i] = vi.isCategorical() ? _parms._multi_loss : _parms._loss;
}
// grab original frame column names before change
String[] origColumnNames = _parms.train().names();
ArrayList<String> newColumnNames = new ArrayList<String>(Arrays.asList(_train._names));
// If _loss_by_col is provided, then override loss functions on the specified columns
if (num_loss_by_cols > 0) {
if (num_loss_by_cols_idx == 0) {
if (num_loss_by_cols == origColumnNames.length)
assignLossByCol(num_loss_by_cols, newColumnNames, origColumnNames);
else
error("_loss_by_col", "Number of override loss functions should be the same as the " + "number of columns in the input frame; or otherwise an explicit _loss_by_col_idx should be " + "provided.");
}
if (num_loss_by_cols_idx == num_loss_by_cols)
assignLossByCol(num_loss_by_cols, newColumnNames, origColumnNames);
// Otherwise we have already reported an error at the start of this method
}
// Check that all loss functions correspond to their actual type
for (int i = 0; i < _ncolA; i++) {
Vec vi = _train.vec(i);
GlrmLoss lossi = _lossFunc[i];
if (vi.isNumeric()) {
// numeric columns
if (!vi.isBinary()) {
// non-binary numeric columns
if (!lossi.isForNumeric())
error("_loss_by_col", "Loss function " + lossi + " cannot be applied to numeric column " + i);
} else {
// binary numeric columns
if (!lossi.isForBinary() && !lossi.isForNumeric()) {
error("_loss_by_col", "Loss function " + lossi + " cannot be applied to binary column " + i);
}
}
} else if (vi.isCategorical()) {
// categorical columns
if (vi.isBinary()) {
// categorical binary columns
if (!lossi.isForBinary() && !lossi.isForCategorical())
error("_loss_by_col", "Loss function " + lossi + " cannot be applied to binary column " + i);
else if (lossi.isForBinary())
// collect column indices storing binary columns with binary loss function.
_binaryColumnIndices.add(i);
} else {
// categorical non-binary columns
if (!lossi.isForCategorical())
error("_loss_by_col", "Loss function " + lossi + " cannot be applied to categorical column " + i);
}
}
// different columns.
if (lossi == GlrmLoss.Periodic)
lossi.setParameters(_parms._period);
}
}
use of hex.genmodel.algos.glrm.GlrmLoss in project h2o-3 by h2oai.
the class GlrmMojoWriter method writeModelData.
@Override
protected void writeModelData() throws IOException {
writekv("initialization", model._parms._init);
writekv("regularizationX", model._parms._regularization_x);
writekv("regularizationY", model._parms._regularization_y);
writekv("gammaX", model._parms._gamma_x);
writekv("gammaY", model._parms._gamma_y);
writekv("ncolX", model._parms._k);
// DataInfo mapping
writekv("cols_permutation", model._output._permutation);
writekv("num_categories", model._output._ncats);
writekv("num_numeric", model._output._nnums);
writekv("norm_sub", model._output._normSub);
writekv("norm_mul", model._output._normMul);
// Loss functions
writekv("ncolA", model._output._lossFunc.length);
startWritingTextFile("losses");
for (GlrmLoss loss : model._output._lossFunc) {
writeln(loss.toString());
}
finishWritingTextFile();
// Archetypes
GLRM.Archetypes arch = model._output._archetypes_raw;
writekv("ncolY", arch.nfeatures());
writekv("nrowY", arch.rank());
writekv("num_levels_per_category", arch._numLevels);
int n = arch.rank() * arch.nfeatures();
ByteBuffer bb = ByteBuffer.wrap(new byte[n * 8]);
for (double[] row : arch.getY(false)) for (double val : row) bb.putDouble(val);
writeblob("archetypes", bb.array());
}
use of hex.genmodel.algos.glrm.GlrmLoss in project h2o-3 by h2oai.
the class GLRMCategoricalTest method testLosses.
@Test
public void testLosses() throws InterruptedException, ExecutionException {
long seed = 0xDECAF;
Random rng = new Random(seed);
Frame train = null;
// Categoricals: CAPSULE, RACE, DPROS, DCAPS
final int[] cats = new int[] { 1, 3, 4, 5 };
final GlrmRegularizer[] regs = new GlrmRegularizer[] { GlrmRegularizer.Quadratic, GlrmRegularizer.L1, GlrmRegularizer.NonNegative, GlrmRegularizer.OneSparse, GlrmRegularizer.UnitOneSparse, GlrmRegularizer.Simplex };
Scope.enter();
try {
train = parse_test_file(Key.make("prostate.hex"), "smalldata/logreg/prostate.csv");
for (int i = 0; i < cats.length; i++) Scope.track(train.replace(cats[i], train.vec(cats[i]).toCategoricalVec()));
train.remove("ID").remove();
DKV.put(train._key, train);
for (GlrmLoss loss : new GlrmLoss[] { GlrmLoss.Quadratic, GlrmLoss.Absolute, GlrmLoss.Huber, GlrmLoss.Poisson }) {
for (GlrmLoss multiloss : new GlrmLoss[] { GlrmLoss.Categorical, GlrmLoss.Ordinal }) {
GLRMModel model = null;
try {
Scope.enter();
long myseed = rng.nextLong();
Log.info("GLRM using seed = " + myseed);
GLRMParameters parms = new GLRMParameters();
parms._train = train._key;
parms._transform = DataInfo.TransformType.NONE;
parms._k = 5;
parms._loss = loss;
parms._multi_loss = multiloss;
parms._init = GlrmInitialization.SVD;
parms._regularization_x = regs[rng.nextInt(regs.length)];
parms._regularization_y = regs[rng.nextInt(regs.length)];
parms._gamma_x = Math.abs(rng.nextDouble());
parms._gamma_y = Math.abs(rng.nextDouble());
parms._recover_svd = false;
parms._seed = myseed;
parms._verbose = false;
parms._max_iterations = 500;
model = new GLRM(parms).trainModel().get();
Log.info("Iteration " + model._output._iterations + ": Objective value = " + model._output._objective);
model.score(train).delete();
ModelMetricsGLRM mm = (ModelMetricsGLRM) ModelMetrics.getFromDKV(model, train);
Log.info("Numeric Sum of Squared Error = " + mm._numerr + "\tCategorical Misclassification Error = " + mm._caterr);
} finally {
if (model != null)
model.delete();
Scope.exit();
}
}
}
} finally {
if (train != null)
train.delete();
Scope.exit();
}
}
use of hex.genmodel.algos.glrm.GlrmLoss in project h2o-3 by h2oai.
the class GLRMTest method checkLossbyCol.
public static void checkLossbyCol(GLRMParameters parms, GLRMModel model) {
int ncats = model._output._ncats;
GlrmLoss[] actual = model._output._lossFunc;
assert ncats >= 0 && ncats <= actual.length;
if (null == parms._loss_by_col || null == parms._loss_by_col_idx)
return;
Assert.assertEquals(parms._loss_by_col.length, parms._loss_by_col_idx.length);
// Map original to adapted frame column indices
int[] loss_idx_adapt = new int[parms._loss_by_col_idx.length];
for (int i = 0; i < parms._loss_by_col_idx.length; i++) {
int idx_adapt = -1;
for (int j = 0; j < model._output._permutation.length; j++) {
if (model._output._permutation[j] == parms._loss_by_col_idx[i]) {
idx_adapt = j;
break;
}
}
loss_idx_adapt[i] = idx_adapt;
}
Arrays.sort(loss_idx_adapt);
// Categorical columns
for (int i = 0; i < ncats; i++) {
int idx = Arrays.binarySearch(loss_idx_adapt, i);
GlrmLoss comp = idx >= 0 ? parms._loss_by_col[idx] : parms._multi_loss;
Assert.assertEquals(comp, actual[i]);
}
// Numeric columns
for (int i = ncats; i < actual.length; i++) {
int idx = Arrays.binarySearch(loss_idx_adapt, i);
GlrmLoss comp = idx >= 0 ? parms._loss_by_col[idx] : parms._loss;
Assert.assertEquals(comp, actual[i]);
}
}
Aggregations