Search in sources :

Example 21 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class DeepLearningModel method scoreDeepFeatures.

public Frame scoreDeepFeatures(Frame frame, final int layer, final Job job) {
    if (layer < 0 || layer >= model_info().get_params()._hidden.length)
        throw new H2OIllegalArgumentException("hidden layer (index) to extract must be between " + 0 + " and " + (model_info().get_params()._hidden.length - 1), "");
    final int len = _output.nfeatures();
    if (isSupervised()) {
        int ridx = frame.find(_output.responseName());
        if (ridx != -1) {
            // drop the response for scoring!
            frame = new Frame(frame);
            frame.remove(ridx);
        }
    }
    Frame adaptFrm = new Frame(frame);
    //create new features, will be dense
    final int features = model_info().get_params()._hidden[layer];
    Vec v = adaptFrm.anyVec();
    Vec[] vecs = v != null ? v.makeZeros(features) : null;
    if (vecs == null)
        throw new IllegalArgumentException("Cannot create deep features from a frame with no columns.");
    Scope.enter();
    adaptTestForTrain(adaptFrm, true, false);
    for (int j = 0; j < features; ++j) {
        adaptFrm.add("DF.L" + (layer + 1) + ".C" + (j + 1), vecs[j]);
    }
    final int mb = 0;
    final int n = 1;
    new MRTask() {

        @Override
        public void map(Chunk[] chks) {
            if (isCancelled() || job != null && job.stop_requested())
                return;
            double[] tmp = new double[len];
            final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
            for (int row = 0; row < chks[0]._len; row++) {
                for (int i = 0; i < len; i++) tmp[i] = chks[i].atd(row);
                //FIXME: No weights yet
                ((Neurons.Input) neurons[0]).setInput(-1, tmp, mb);
                DeepLearningTask.fpropMiniBatch(-1, neurons, model_info, null, false, null, null, /*no offset*/
                n);
                //extract the layer-th hidden feature
                double[] out = neurons[layer + 1]._a[mb].raw();
                for (int c = 0; c < features; c++) chks[_output._names.length + c].set(row, out[c]);
            }
            if (job != null)
                job.update(1);
        }
    }.doAll(adaptFrm);
    // Return just the output columns
    int x = _output._names.length, y = adaptFrm.numCols();
    Frame ret = adaptFrm.extractFrame(x, y);
    Scope.exit();
    return ret;
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 22 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class SharedTree method init.

/** Initialize the ModelBuilder, validating all arguments and preparing the
   *  training frame.  This call is expected to be overridden in the subclasses
   *  and each subclass will start with "super.init();".  This call is made
   *  by the front-end whenever the GUI is clicked, and needs to be fast;
   *  heavy-weight prep needs to wait for the trainModel() call.
   *
   *  Validate the requested ntrees; precompute actual ntrees.  Validate
   *  the number of classes to predict on; validate a checkpoint.  */
@Override
public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
        error("_build_tree_one_node", "Cannot run on a single node in client mode.");
    if (_parms._min_rows < 0)
        error("_min_rows", "Requested min_rows must be greater than 0");
    if (_parms._categorical_encoding == Model.Parameters.CategoricalEncodingScheme.OneHotInternal) {
        error("_categorical_encoding", "Cannot use OneHotInternal categorical encoding for tree methods.");
    }
    if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES)
        error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES);
    // Total trees in final model
    _ntrees = _parms._ntrees;
    if (_parms.hasCheckpoint()) {
        // Asking to continue from checkpoint?
        Value cv = DKV.get(_parms._checkpoint);
        if (cv != null) {
            // Look for prior model
            M checkpointModel = cv.get();
            try {
                _parms.validateWithCheckpoint(checkpointModel._parms);
                if (isClassifier() != checkpointModel._output.isClassifier())
                    throw new IllegalArgumentException("Response type must be the same as for the checkpointed model.");
                if (!Arrays.equals(_train.names(), checkpointModel._output._names)) {
                    throw new IllegalArgumentException("The columns of the training data must be the same as for the checkpointed model");
                }
                if (!Arrays.deepEquals(_train.domains(), checkpointModel._output._domains)) {
                    throw new IllegalArgumentException("Categorical factor levels of the training data must be the same as for the checkpointed model");
                }
            } catch (H2OIllegalArgumentException e) {
                error(e.values.get("argument").toString(), e.values.get("value").toString());
            }
            if (_parms._ntrees < checkpointModel._output._ntrees + 1)
                error("_ntrees", "If checkpoint is specified then requested ntrees must be higher than " + (checkpointModel._output._ntrees + 1));
            // Compute number of trees to build for this checkpoint
            // Needed trees
            _ntrees = _parms._ntrees - checkpointModel._output._ntrees;
        }
    }
    if (_parms._nbins <= 1)
        error("_nbins", "nbins must be > 1.");
    if (_parms._nbins >= 1 << 16)
        error("_nbins", "nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1)
        error("_nbins_cats", "nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16)
        error("_nbins_cats", "nbins_cats must be < " + (1 << 16));
    if (_parms._nbins_top_level < _parms._nbins)
        error("_nbins_top_level", "nbins_top_level must be >= nbins (" + _parms._nbins + ").");
    if (_parms._nbins_top_level >= 1 << 16)
        error("_nbins_top_level", "nbins_top_level must be < " + (1 << 16));
    if (_parms._max_depth <= 0)
        error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0)
        error("_min_rows", "_min_rows must be > 0.");
    if (_parms._r2_stopping != Double.MAX_VALUE)
        warn("_r2_stopping", "_r2_stopping is no longer supported - please use stopping_rounds, stopping_metric and stopping_tolerance instead.");
    if (_parms._score_tree_interval < 0)
        error("_score_tree_interval", "_score_tree_interval must be >= 0.");
    if (_parms._sample_rate_per_class != null) {
        warn("_sample_rate", "_sample_rate is ignored if _sample_rate_per_class is specified.");
        if (_parms._sample_rate_per_class.length != nclasses())
            error("_sample_rate_per_class", "_sample_rate_per_class must have " + nclasses() + " values (one per class).");
        for (int i = 0; i < _parms._sample_rate_per_class.length; ++i) {
            if (!(0.0 < _parms._sample_rate_per_class[i] && _parms._sample_rate_per_class[i] <= 1.0))
                error("_sample_rate_per_class", "sample_rate_per_class for class " + response().domain()[i] + " should be in interval ]0,1] but it is " + _parms._sample_rate_per_class[i] + ".");
        }
    }
    if (!(0.0 < _parms._sample_rate && _parms._sample_rate <= 1.0))
        error("_sample_rate", "sample_rate should be in interval ]0,1] but it is " + _parms._sample_rate + ".");
    if (_parms._min_split_improvement < 0)
        error("_min_split_improvement", "min_split_improvement must be >= 0, but is " + _parms._min_split_improvement + ".");
    if (!(0.0 < _parms._col_sample_rate_per_tree && _parms._col_sample_rate_per_tree <= 1.0))
        error("_col_sample_rate_per_tree", "col_sample_rate_per_tree should be in interval ]0,1] but it is " + _parms._col_sample_rate_per_tree + ".");
    if (!(0. < _parms._col_sample_rate_change_per_level && _parms._col_sample_rate_change_per_level <= 2))
        error("_col_sample_rate_change_per_level", "col_sample_rate_change_per_level must be between 0 and 2");
    if (_train != null) {
        double sumWeights = _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
        if (// Need at least 2*min_rows weighted rows to split even once
        sumWeights < 2 * _parms._min_rows)
            error("_min_rows", "The dataset size is too small to split for min_rows=" + _parms._min_rows + ": must have at least " + 2 * _parms._min_rows + " (weighted) rows, but have only " + sumWeights + ".");
    }
    if (_train != null)
        _ncols = _train.numCols() - 1 - numSpecialCols();
}
Also used : H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) H2OModelBuilderIllegalArgumentException(water.exceptions.H2OModelBuilderIllegalArgumentException)

Example 23 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class Tabulate method execImpl.

public Tabulate execImpl() {
    if (_dataset == null)
        throw new H2OIllegalArgumentException("Dataset not found");
    if (_nbins_predictor < 1)
        throw new H2OIllegalArgumentException("Number of bins for predictor must be >= 1");
    if (_nbins_response < 1)
        throw new H2OIllegalArgumentException("Number of bins for response must be >= 1");
    Vec x = _dataset.vec(_predictor);
    if (x == null)
        throw new H2OIllegalArgumentException("Predictor column " + _predictor + " not found");
    if (x.cardinality() > _nbins_predictor) {
        Interaction in = new Interaction();
        in._source_frame = _dataset._key;
        in._factor_columns = new String[] { _predictor };
        in._max_factors = _nbins_predictor - 1;
        in.execImpl(null);
        x = in._job._result.get().anyVec();
    } else if (x.isInt() && (x.max() - x.min() + 1) <= _nbins_predictor) {
        x = x.toCategoricalVec();
    }
    Vec y = _dataset.vec(_response);
    if (y == null)
        throw new H2OIllegalArgumentException("Response column " + _response + " not found");
    if (y.cardinality() > _nbins_response) {
        Interaction in = new Interaction();
        in._source_frame = _dataset._key;
        in._factor_columns = new String[] { _response };
        in._max_factors = _nbins_response - 1;
        in.execImpl(null);
        y = in._job._result.get().anyVec();
    } else if (y.isInt() && (y.max() - y.min() + 1) <= _nbins_response) {
        y = y.toCategoricalVec();
    }
    if (y != null && y.cardinality() > 2)
        Log.warn("Response column has more than two factor levels - mean response depends on lexicographic order of factors!");
    //can be null
    Vec w = _dataset.vec(_weight);
    if (w != null && (!w.isNumeric() && w.min() < 0))
        throw new H2OIllegalArgumentException("Observation weights must be numeric with values >= 0");
    if (x != null) {
        _vecs[0] = x._key;
        _stats[0] = new Stats(x);
    }
    if (y != null) {
        _vecs[1] = y._key;
        _stats[1] = new Stats(y);
    }
    Tabulate sp = w != null ? new CoOccurrence(this).doAll(x, y, w)._sp : new CoOccurrence(this).doAll(x, y)._sp;
    _count_table = sp.tabulationTwoDimTable();
    _response_table = sp.responseCharTwoDimTable();
    Log.info(_count_table.toString(2, false));
    Log.info(_response_table.toString(2, false));
    return sp;
}
Also used : Vec(water.fvec.Vec) Interaction(hex.Interaction) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 24 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class VecUtils method categoricalToInt.

/**
   * Create a new {@link Vec} of numeric values from a categorical {@link Vec}.
   *
   * If the first value in the domain of the src Vec is a stringified ints,
   * then it will use those ints. Otherwise, it will use the raw enumeration level mapping.
   * If the domain is stringified ints, then all of the domain must be able to be parsed as
   * an int. If it cannot be parsed as such, a NumberFormatException will be caught and
   * rethrown as an H2OIllegalArgumentException that declares the illegal domain value.
   * Otherwise, the this pointer is copied to a new Vec whose domain is null.
   *
   * The magic of this method should be eliminated. It should just use enumeration level
   * maps. If the user wants domains to be used, call categoricalDomainsToNumeric().
   * PUBDEV-2209
   *
   * @param src a categorical {@link Vec}
   * @return a numeric {@link Vec}
   */
public static Vec categoricalToInt(final Vec src) {
    if (src.isInt() && (src.domain() == null || src.domain().length == 0))
        return copyOver(src, Vec.T_NUM, null);
    if (!src.isCategorical())
        throw new IllegalArgumentException("categoricalToInt conversion only works on categorical columns.");
    // check if the 1st lvl of the domain can be parsed as int
    boolean useDomain = false;
    Vec newVec = copyOver(src, Vec.T_NUM, null);
    try {
        Integer.parseInt(src.domain()[0]);
        useDomain = true;
    } catch (NumberFormatException e) {
    // makeCopy and return...
    }
    if (useDomain) {
        new MRTask() {

            @Override
            public void map(Chunk c) {
                for (int i = 0; i < c._len; ++i) if (!c.isNA(i))
                    c.set(i, Integer.parseInt(src.domain()[(int) c.at8(i)]));
            }
        }.doAll(newVec);
    }
    return newVec;
}
Also used : Vec(water.fvec.Vec) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) C0DChunk(water.fvec.C0DChunk) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 25 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class VecUtils method UUIDToStringVec.

/**
   * Create a new {@link Vec} of string values from a UUID {@link Vec}.
   *
   * String {@link Vec} is the standard hexadecimal representations of a UUID.
   *
   * @param src a UUID {@link Vec}
   * @return a string {@link Vec}
   */
public static Vec UUIDToStringVec(Vec src) {
    if (!src.isUUID())
        throw new H2OIllegalArgumentException("UUIDToStringVec() conversion only works on UUID columns");
    Vec res = new MRTask() {

        @Override
        public void map(Chunk chk, NewChunk newChk) {
            if (chk instanceof C0DChunk) {
                // all NAs
                for (int i = 0; i < chk._len; i++) newChk.addNA();
            } else {
                for (int i = 0; i < chk._len; i++) {
                    if (!chk.isNA(i))
                        newChk.addStr(PrettyPrint.UUID(chk.at16l(i), chk.at16h(i)));
                    else
                        newChk.addNA();
                }
            }
        }
    }.doAll(Vec.T_STR, src).outputFrame().anyVec();
    assert res != null;
    return res;
}
Also used : C0DChunk(water.fvec.C0DChunk) Vec(water.fvec.Vec) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) C0DChunk(water.fvec.C0DChunk) NewChunk(water.fvec.NewChunk)

Aggregations

H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)43 Frame (water.fvec.Frame)16 Key (water.Key)6 H2OKeyNotFoundArgumentException (water.exceptions.H2OKeyNotFoundArgumentException)6 Vec (water.fvec.Vec)6 Field (java.lang.reflect.Field)3 Chunk (water.fvec.Chunk)3 NewChunk (water.fvec.NewChunk)3 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)2 GLMModel (hex.glm.GLMModel)2 DRFModel (hex.tree.drf.DRFModel)2 Method (java.lang.reflect.Method)2 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 Iced (water.Iced)2 FrameV3 (water.api.schemas3.FrameV3)2 JobV3 (water.api.schemas3.JobV3)2 KeyV3 (water.api.schemas3.KeyV3)2 ByteVec (water.fvec.ByteVec)2 C0DChunk (water.fvec.C0DChunk)2