use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.
the class DeepLearningModel method scoreDeepFeatures.
public Frame scoreDeepFeatures(Frame frame, final int layer, final Job job) {
if (layer < 0 || layer >= model_info().get_params()._hidden.length)
throw new H2OIllegalArgumentException("hidden layer (index) to extract must be between " + 0 + " and " + (model_info().get_params()._hidden.length - 1), "");
final int len = _output.nfeatures();
if (isSupervised()) {
int ridx = frame.find(_output.responseName());
if (ridx != -1) {
// drop the response for scoring!
frame = new Frame(frame);
Frame adaptFrm = new Frame(frame);
//create new features, will be dense
final int features = model_info().get_params()._hidden[layer];
Vec v = adaptFrm.anyVec();
Vec[] vecs = v != null ? v.makeZeros(features) : null;
if (vecs == null)
throw new IllegalArgumentException("Cannot create deep features from a frame with no columns.");
adaptTestForTrain(adaptFrm, true, false);
for (int j = 0; j < features; ++j) {
adaptFrm.add("DF.L" + (layer + 1) + ".C" + (j + 1), vecs[j]);
final int mb = 0;
final int n = 1;
new MRTask() {
public void map(Chunk[] chks) {
if (isCancelled() || job != null && job.stop_requested())
double[] tmp = new double[len];
final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
for (int row = 0; row < chks[0]._len; row++) {
for (int i = 0; i < len; i++) tmp[i] = chks[i].atd(row);
//FIXME: No weights yet
((Neurons.Input) neurons[0]).setInput(-1, tmp, mb);
DeepLearningTask.fpropMiniBatch(-1, neurons, model_info, null, false, null, null, /*no offset*/
//extract the layer-th hidden feature
double[] out = neurons[layer + 1]._a[mb].raw();
for (int c = 0; c < features; c++) chks[_output._names.length + c].set(row, out[c]);
if (job != null)
// Return just the output columns
int x = _output._names.length, y = adaptFrm.numCols();
Frame ret = adaptFrm.extractFrame(x, y);
return ret;
use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.
the class SharedTree method init.
/** Initialize the ModelBuilder, validating all arguments and preparing the
* training frame. This call is expected to be overridden in the subclasses
* and each subclass will start with "super.init();". This call is made
* by the front-end whenever the GUI is clicked, and needs to be fast;
* heavy-weight prep needs to wait for the trainModel() call.
* Validate the requested ntrees; precompute actual ntrees. Validate
* the number of classes to predict on; validate a checkpoint. */
public void init(boolean expensive) {
if (H2O.ARGS.client && _parms._build_tree_one_node)
error("_build_tree_one_node", "Cannot run on a single node in client mode.");
if (_parms._min_rows < 0)
error("_min_rows", "Requested min_rows must be greater than 0");
if (_parms._categorical_encoding == Model.Parameters.CategoricalEncodingScheme.OneHotInternal) {
error("_categorical_encoding", "Cannot use OneHotInternal categorical encoding for tree methods.");
if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES)
error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES);
// Total trees in final model
_ntrees = _parms._ntrees;
if (_parms.hasCheckpoint()) {
// Asking to continue from checkpoint?
Value cv = DKV.get(_parms._checkpoint);
if (cv != null) {
// Look for prior model
M checkpointModel = cv.get();
try {
if (isClassifier() != checkpointModel._output.isClassifier())
throw new IllegalArgumentException("Response type must be the same as for the checkpointed model.");
if (!Arrays.equals(_train.names(), checkpointModel._output._names)) {
throw new IllegalArgumentException("The columns of the training data must be the same as for the checkpointed model");
if (!Arrays.deepEquals(, checkpointModel._output._domains)) {
throw new IllegalArgumentException("Categorical factor levels of the training data must be the same as for the checkpointed model");
} catch (H2OIllegalArgumentException e) {
error(e.values.get("argument").toString(), e.values.get("value").toString());
if (_parms._ntrees < checkpointModel._output._ntrees + 1)
error("_ntrees", "If checkpoint is specified then requested ntrees must be higher than " + (checkpointModel._output._ntrees + 1));
// Compute number of trees to build for this checkpoint
// Needed trees
_ntrees = _parms._ntrees - checkpointModel._output._ntrees;
if (_parms._nbins <= 1)
error("_nbins", "nbins must be > 1.");
if (_parms._nbins >= 1 << 16)
error("_nbins", "nbins must be < " + (1 << 16));
if (_parms._nbins_cats <= 1)
error("_nbins_cats", "nbins_cats must be > 1.");
if (_parms._nbins_cats >= 1 << 16)
error("_nbins_cats", "nbins_cats must be < " + (1 << 16));
if (_parms._nbins_top_level < _parms._nbins)
error("_nbins_top_level", "nbins_top_level must be >= nbins (" + _parms._nbins + ").");
if (_parms._nbins_top_level >= 1 << 16)
error("_nbins_top_level", "nbins_top_level must be < " + (1 << 16));
if (_parms._max_depth <= 0)
error("_max_depth", "_max_depth must be > 0.");
if (_parms._min_rows <= 0)
error("_min_rows", "_min_rows must be > 0.");
if (_parms._r2_stopping != Double.MAX_VALUE)
warn("_r2_stopping", "_r2_stopping is no longer supported - please use stopping_rounds, stopping_metric and stopping_tolerance instead.");
if (_parms._score_tree_interval < 0)
error("_score_tree_interval", "_score_tree_interval must be >= 0.");
if (_parms._sample_rate_per_class != null) {
warn("_sample_rate", "_sample_rate is ignored if _sample_rate_per_class is specified.");
if (_parms._sample_rate_per_class.length != nclasses())
error("_sample_rate_per_class", "_sample_rate_per_class must have " + nclasses() + " values (one per class).");
for (int i = 0; i < _parms._sample_rate_per_class.length; ++i) {
if (!(0.0 < _parms._sample_rate_per_class[i] && _parms._sample_rate_per_class[i] <= 1.0))
error("_sample_rate_per_class", "sample_rate_per_class for class " + response().domain()[i] + " should be in interval ]0,1] but it is " + _parms._sample_rate_per_class[i] + ".");
if (!(0.0 < _parms._sample_rate && _parms._sample_rate <= 1.0))
error("_sample_rate", "sample_rate should be in interval ]0,1] but it is " + _parms._sample_rate + ".");
if (_parms._min_split_improvement < 0)
error("_min_split_improvement", "min_split_improvement must be >= 0, but is " + _parms._min_split_improvement + ".");
if (!(0.0 < _parms._col_sample_rate_per_tree && _parms._col_sample_rate_per_tree <= 1.0))
error("_col_sample_rate_per_tree", "col_sample_rate_per_tree should be in interval ]0,1] but it is " + _parms._col_sample_rate_per_tree + ".");
if (!(0. < _parms._col_sample_rate_change_per_level && _parms._col_sample_rate_change_per_level <= 2))
error("_col_sample_rate_change_per_level", "col_sample_rate_change_per_level must be between 0 and 2");
if (_train != null) {
double sumWeights = _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
if (// Need at least 2*min_rows weighted rows to split even once
sumWeights < 2 * _parms._min_rows)
error("_min_rows", "The dataset size is too small to split for min_rows=" + _parms._min_rows + ": must have at least " + 2 * _parms._min_rows + " (weighted) rows, but have only " + sumWeights + ".");
if (_train != null)
_ncols = _train.numCols() - 1 - numSpecialCols();
use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.
the class Tabulate method execImpl.
public Tabulate execImpl() {
if (_dataset == null)
throw new H2OIllegalArgumentException("Dataset not found");
if (_nbins_predictor < 1)
throw new H2OIllegalArgumentException("Number of bins for predictor must be >= 1");
if (_nbins_response < 1)
throw new H2OIllegalArgumentException("Number of bins for response must be >= 1");
Vec x = _dataset.vec(_predictor);
if (x == null)
throw new H2OIllegalArgumentException("Predictor column " + _predictor + " not found");
if (x.cardinality() > _nbins_predictor) {
Interaction in = new Interaction();
in._source_frame = _dataset._key;
in._factor_columns = new String[] { _predictor };
in._max_factors = _nbins_predictor - 1;
x = in._job._result.get().anyVec();
} else if (x.isInt() && (x.max() - x.min() + 1) <= _nbins_predictor) {
x = x.toCategoricalVec();
Vec y = _dataset.vec(_response);
if (y == null)
throw new H2OIllegalArgumentException("Response column " + _response + " not found");
if (y.cardinality() > _nbins_response) {
Interaction in = new Interaction();
in._source_frame = _dataset._key;
in._factor_columns = new String[] { _response };
in._max_factors = _nbins_response - 1;
y = in._job._result.get().anyVec();
} else if (y.isInt() && (y.max() - y.min() + 1) <= _nbins_response) {
y = y.toCategoricalVec();
if (y != null && y.cardinality() > 2)
Log.warn("Response column has more than two factor levels - mean response depends on lexicographic order of factors!");
//can be null
Vec w = _dataset.vec(_weight);
if (w != null && (!w.isNumeric() && w.min() < 0))
throw new H2OIllegalArgumentException("Observation weights must be numeric with values >= 0");
if (x != null) {
_vecs[0] = x._key;
_stats[0] = new Stats(x);
if (y != null) {
_vecs[1] = y._key;
_stats[1] = new Stats(y);
Tabulate sp = w != null ? new CoOccurrence(this).doAll(x, y, w)._sp : new CoOccurrence(this).doAll(x, y)._sp;
_count_table = sp.tabulationTwoDimTable();
_response_table = sp.responseCharTwoDimTable();, false));, false));
return sp;
use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.
the class VecUtils method categoricalToInt.
* Create a new {@link Vec} of numeric values from a categorical {@link Vec}.
* If the first value in the domain of the src Vec is a stringified ints,
* then it will use those ints. Otherwise, it will use the raw enumeration level mapping.
* If the domain is stringified ints, then all of the domain must be able to be parsed as
* an int. If it cannot be parsed as such, a NumberFormatException will be caught and
* rethrown as an H2OIllegalArgumentException that declares the illegal domain value.
* Otherwise, the this pointer is copied to a new Vec whose domain is null.
* The magic of this method should be eliminated. It should just use enumeration level
* maps. If the user wants domains to be used, call categoricalDomainsToNumeric().
* PUBDEV-2209
* @param src a categorical {@link Vec}
* @return a numeric {@link Vec}
public static Vec categoricalToInt(final Vec src) {
if (src.isInt() && (src.domain() == null || src.domain().length == 0))
return copyOver(src, Vec.T_NUM, null);
if (!src.isCategorical())
throw new IllegalArgumentException("categoricalToInt conversion only works on categorical columns.");
// check if the 1st lvl of the domain can be parsed as int
boolean useDomain = false;
Vec newVec = copyOver(src, Vec.T_NUM, null);
try {
useDomain = true;
} catch (NumberFormatException e) {
// makeCopy and return...
if (useDomain) {
new MRTask() {
public void map(Chunk c) {
for (int i = 0; i < c._len; ++i) if (!c.isNA(i))
c.set(i, Integer.parseInt(src.domain()[(int) c.at8(i)]));
return newVec;
use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.
the class VecUtils method UUIDToStringVec.
* Create a new {@link Vec} of string values from a UUID {@link Vec}.
* String {@link Vec} is the standard hexadecimal representations of a UUID.
* @param src a UUID {@link Vec}
* @return a string {@link Vec}
public static Vec UUIDToStringVec(Vec src) {
if (!src.isUUID())
throw new H2OIllegalArgumentException("UUIDToStringVec() conversion only works on UUID columns");
Vec res = new MRTask() {
public void map(Chunk chk, NewChunk newChk) {
if (chk instanceof C0DChunk) {
// all NAs
for (int i = 0; i < chk._len; i++) newChk.addNA();
} else {
for (int i = 0; i < chk._len; i++) {
if (!chk.isNA(i))
newChk.addStr(PrettyPrint.UUID(chk.at16l(i), chk.at16h(i)));
}.doAll(Vec.T_STR, src).outputFrame().anyVec();
assert res != null;
return res;