Search in sources :

Example 1 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class StackedEnsemble method addModelPredictionsToLevelOneFrame.

public static void addModelPredictionsToLevelOneFrame(Model aModel, Frame aModelsPredictions, Frame levelOneFrame) {
    if (aModel._output.isBinomialClassifier()) {
        // GLM uses a different column name than the other algos, yay!
        // Predictions column names have been changed. . .
        Vec preds = aModelsPredictions.vec(2);
        levelOneFrame.add(aModel._key.toString(), preds);
    } else if (aModel._output.isClassifier()) {
        throw new H2OIllegalArgumentException("Don't yet know how to stack multinomial classifiers: " + aModel._key);
    } else if (aModel._output.isAutoencoder()) {
        throw new H2OIllegalArgumentException("Don't yet know how to stack autoencoders: " + aModel._key);
    } else if (!aModel._output.isSupervised()) {
        throw new H2OIllegalArgumentException("Don't yet know how to stack unsupervised models: " + aModel._key);
    } else {
        levelOneFrame.add(aModel._key.toString(), aModelsPredictions.vec("predict"));
    }
}
Also used : Vec(water.fvec.Vec) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 2 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class StackedEnsembleModel method distributionFamily.

private DistributionFamily distributionFamily(Model aModel) {
    // TODO: hack alert: In DRF, _parms._distribution is always set to multinomial.  Yay.
    if (aModel instanceof DRFModel)
        if (aModel._output.isBinomialClassifier())
            return DistributionFamily.bernoulli;
        else if (aModel._output.isClassifier())
            throw new H2OIllegalArgumentException("Don't know how to set the distribution for a multinomial Random Forest classifier.");
        else
            return DistributionFamily.gaussian;
    try {
        Field familyField = ReflectionUtils.findNamedField(aModel._parms, "_family");
        Field distributionField = (familyField != null ? null : ReflectionUtils.findNamedField(aModel, "_dist"));
        if (null != familyField) {
            // GLM only, for now
            GLMModel.GLMParameters.Family thisFamily = (GLMModel.GLMParameters.Family) familyField.get(aModel._parms);
            if (thisFamily == GLMModel.GLMParameters.Family.binomial) {
                return DistributionFamily.bernoulli;
            }
            try {
                return Enum.valueOf(DistributionFamily.class, thisFamily.toString());
            } catch (IllegalArgumentException e) {
                throw new H2OIllegalArgumentException("Don't know how to find the right DistributionFamily for Family: " + thisFamily);
            }
        }
        if (null != distributionField) {
            Distribution distribution = ((Distribution) distributionField.get(aModel));
            DistributionFamily distributionFamily;
            if (null != distribution)
                distributionFamily = distribution.distribution;
            else
                distributionFamily = aModel._parms._distribution;
            // NOTE: If the algo does smart guessing of the distribution family we need to duplicate the logic here.
            if (distributionFamily == DistributionFamily.AUTO) {
                if (aModel._output.isBinomialClassifier())
                    distributionFamily = DistributionFamily.bernoulli;
                else if (aModel._output.isClassifier())
                    throw new H2OIllegalArgumentException("Don't know how to determine the distribution for a multinomial classifier.");
                else
                    distributionFamily = DistributionFamily.gaussian;
            }
            return distributionFamily;
        }
        throw new H2OIllegalArgumentException("Don't know how to stack models that have neither a distribution hyperparameter nor a family hyperparameter.");
    } catch (Exception e) {
        throw new H2OIllegalArgumentException(e.toString(), e.toString());
    }
}
Also used : Field(java.lang.reflect.Field) DRFModel(hex.tree.drf.DRFModel) GLMModel(hex.glm.GLMModel) DistributionFamily(hex.genmodel.utils.DistributionFamily) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) DistributionFamily(hex.genmodel.utils.DistributionFamily) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 3 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class StackedEnsembleModel method checkAndInheritModelProperties.

public void checkAndInheritModelProperties() {
    if (null == _parms._base_models || 0 == _parms._base_models.length)
        throw new H2OIllegalArgumentException("When creating a StackedEnsemble you must specify one or more models; found 0.");
    Model aModel = null;
    boolean beenHere = false;
    trainingFrameChecksum = _parms.train().checksum();
    for (Key<Model> k : _parms._base_models) {
        aModel = DKV.getGet(k);
        if (null == aModel) {
            Log.warn("Failed to find base model; skipping: " + k);
            continue;
        }
        if (beenHere) {
            // check that the base models are all consistent
            if (_output._isSupervised ^ aModel.isSupervised())
                throw new H2OIllegalArgumentException("Base models are inconsistent: there is a mix of supervised and unsupervised models: " + Arrays.toString(_parms._base_models));
            if (modelCategory != aModel._output.getModelCategory())
                throw new H2OIllegalArgumentException("Base models are inconsistent: there is a mix of different categories of models: " + Arrays.toString(_parms._base_models));
            Frame aTrainingFrame = aModel._parms.train();
            if (trainingFrameChecksum != aTrainingFrame.checksum())
                throw new H2OIllegalArgumentException("Base models are inconsistent: they use different training frames.  Found checksums: " + trainingFrameChecksum + " and: " + aTrainingFrame.checksum() + ".");
            NonBlockingHashSet<String> aNames = new NonBlockingHashSet<>();
            aNames.addAll(Arrays.asList(aModel._output._names));
            if (!aNames.equals(this.names))
                throw new H2OIllegalArgumentException("Base models are inconsistent: they use different column lists.  Found: " + this.names + " and: " + aNames + ".");
            NonBlockingHashSet<String> anIgnoredColumns = new NonBlockingHashSet<>();
            if (null != aModel._parms._ignored_columns)
                anIgnoredColumns.addAll(Arrays.asList(aModel._parms._ignored_columns));
            if (!anIgnoredColumns.equals(this.ignoredColumns))
                throw new H2OIllegalArgumentException("Base models are inconsistent: they use different ignored_column lists.  Found: " + this.ignoredColumns + " and: " + aModel._parms._ignored_columns + ".");
            if (!responseColumn.equals(aModel._parms._response_column))
                throw new H2OIllegalArgumentException("Base models are inconsistent: they use different response columns.  Found: " + responseColumn + " and: " + aModel._parms._response_column + ".");
            if (_output._domains.length != aModel._output._domains.length)
                throw new H2OIllegalArgumentException("Base models are inconsistent: there is a mix of different numbers of domains (categorical levels): " + Arrays.toString(_parms._base_models));
            if (nfolds != aModel._parms._nfolds)
                throw new H2OIllegalArgumentException("Base models are inconsistent: they use different values for nfolds.");
            // TODO: loosen this iff _parms._valid or if we add a separate holdout dataset for the ensemble
            if (aModel._parms._nfolds < 2)
                throw new H2OIllegalArgumentException("Base model does not use cross-validation: " + aModel._parms._nfolds);
            // TODO: loosen this iff it's consistent, like if we have a _fold_column
            if (aModel._parms._fold_assignment != Modulo)
                throw new H2OIllegalArgumentException("Base model does not use Modulo for cross-validation: " + aModel._parms._nfolds);
            if (!aModel._parms._keep_cross_validation_predictions)
                throw new H2OIllegalArgumentException("Base model does not keep cross-validation predictions: " + aModel._parms._nfolds);
            // Hack alert: DRF only does Bernoulli and Gaussian, so only compare _domains.length above.
            if (!(aModel instanceof DRFModel) && distributionFamily(aModel) != distributionFamily(this))
                Log.warn("Base models are inconsistent; they use different distributions: " + distributionFamily(this) + " and: " + distributionFamily(aModel) + ". Is this intentional?");
        // TODO: If we're set to DistributionFamily.AUTO then GLM might auto-conform the response column
        // giving us inconsistencies.
        } else {
            // !beenHere: this is the first base_model
            _output._isSupervised = aModel.isSupervised();
            this.modelCategory = aModel._output.getModelCategory();
            this._dist = new Distribution(distributionFamily(aModel));
            _output._domains = Arrays.copyOf(aModel._output._domains, aModel._output._domains.length);
            // TODO: set _parms._train to aModel._parms.train()
            _output._names = aModel._output._names;
            this.names = new NonBlockingHashSet<>();
            this.names.addAll(Arrays.asList(aModel._output._names));
            this.ignoredColumns = new NonBlockingHashSet<>();
            if (null != aModel._parms._ignored_columns)
                this.ignoredColumns.addAll(Arrays.asList(aModel._parms._ignored_columns));
            // consistent with the base_models:
            if (null != this._parms._ignored_columns) {
                NonBlockingHashSet<String> ensembleIgnoredColumns = new NonBlockingHashSet<>();
                ensembleIgnoredColumns.addAll(Arrays.asList(this._parms._ignored_columns));
                if (!ensembleIgnoredColumns.equals(this.ignoredColumns))
                    throw new H2OIllegalArgumentException("A StackedEnsemble takes its ignored_columns list from the base models.  An inconsistent list of ignored_columns was specified for the ensemble model.");
            }
            responseColumn = aModel._parms._response_column;
            if (!responseColumn.equals(_parms._response_column))
                throw new H2OIllegalArgumentException("StackedModel response_column must match the response_column of each base model.  Found: " + responseColumn + " and: " + _parms._response_column);
            nfolds = aModel._parms._nfolds;
            _parms._distribution = aModel._parms._distribution;
            beenHere = true;
        }
    }
    if (null == aModel)
        throw new H2OIllegalArgumentException("When creating a StackedEnsemble you must specify one or more models; " + _parms._base_models.length + " were specified but none of those were found: " + Arrays.toString(_parms._base_models));
}
Also used : Frame(water.fvec.Frame) DRFModel(hex.tree.drf.DRFModel) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) GLMModel(hex.glm.GLMModel) DRFModel(hex.tree.drf.DRFModel) NonBlockingHashSet(water.nbhm.NonBlockingHashSet)

Example 4 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class DeepLearning method makeDataInfo.

/**
   * Helper to create the DataInfo object from training/validation frames and the DL parameters
   * @param train Training frame
   * @param valid Validation frame
   * @param parms Model parameters
   * @param nClasses Number of response levels (1: regression, >=2: classification)
   * @return DataInfo
   */
static DataInfo makeDataInfo(Frame train, Frame valid, DeepLearningParameters parms, int nClasses) {
    double x = 0.782347234;
    boolean identityLink = new Distribution(parms).link(x) == x;
    DataInfo dinfo = new DataInfo(train, valid, //nResponses
    parms._autoencoder ? 0 : 1, //use all FactorLevels for auto-encoder
    parms._autoencoder || parms._use_all_factor_levels, //transform predictors
    parms._standardize ? (parms._autoencoder ? DataInfo.TransformType.NORMALIZE : parms._sparse ? DataInfo.TransformType.DESCALE : DataInfo.TransformType.STANDARDIZE) : DataInfo.TransformType.NONE, //transform response for regression with identity link
    !parms._standardize || train.lastVec().isCategorical() ? DataInfo.TransformType.NONE : identityLink ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE, //whether to skip missing
    parms._missing_values_handling == DeepLearningParameters.MissingValuesHandling.Skip, // do not replace NAs in numeric cols with mean
    false, // always add a bucket for missing values
    true, // observation weights
    parms._weights_column != null, parms._offset_column != null, parms._fold_column != null);
    // Checks and adjustments:
    // 1) observation weights (adjust mean/sigmas for predictors and response)
    // 2) NAs (check that there's enough rows left)
    GLMTask.YMUTask ymt = new GLMTask.YMUTask(dinfo, nClasses, !parms._autoencoder && nClasses == 1, parms._missing_values_handling == MissingValuesHandling.Skip, !parms._autoencoder).doAll(dinfo._adaptedFrame);
    if (ymt.wsum() == 0 && parms._missing_values_handling == DeepLearningParameters.MissingValuesHandling.Skip)
        throw new H2OIllegalArgumentException("No rows left in the dataset after filtering out rows with missing values. Ignore columns with many NAs or set missing_values_handling to 'MeanImputation'.");
    if (parms._weights_column != null && parms._offset_column != null) {
        Log.warn("Combination of offset and weights can lead to slight differences because Rollupstats aren't weighted - need to re-calculate weighted mean/sigma of the response including offset terms.");
    }
    if (parms._weights_column != null && parms._offset_column == null) /*FIXME: offset not yet implemented*/
    {
        dinfo.updateWeightedSigmaAndMean(ymt.predictorSDs(), ymt.predictorMeans());
        if (nClasses == 1)
            dinfo.updateWeightedSigmaAndMeanForResponse(ymt.responseSDs(), ymt.responseMeans());
    }
    return dinfo;
}
Also used : GLMTask(hex.glm.GLMTask) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 5 with H2OIllegalArgumentException

use of water.exceptions.H2OIllegalArgumentException in project h2o-3 by h2oai.

the class DeepLearningModel method scoreAutoEncoder.

/**
   * Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
   * @param frame Original data (can contain response, will be ignored)
   * @param destination_key Frame Id for output
   * @param reconstruction_error_per_feature whether to return the squared error per feature
   * @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
   */
public Frame scoreAutoEncoder(Frame frame, Key destination_key, final boolean reconstruction_error_per_feature) {
    if (!get_params()._autoencoder)
        throw new H2OIllegalArgumentException("Only for AutoEncoder Deep Learning model.", "");
    final int len = _output._names.length;
    Frame adaptFrm = new Frame(frame);
    adaptTestForTrain(adaptFrm, true, false);
    final int outputcols = reconstruction_error_per_feature ? model_info.data_info.fullN() : 1;
    Frame mse = new MRTask() {

        @Override
        public void map(Chunk[] chks, NewChunk[] mse) {
            double[] tmp = new double[len];
            double[] out = new double[outputcols];
            final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
            for (int row = 0; row < chks[0]._len; row++) {
                for (int i = 0; i < len; i++) tmp[i] = chks[i].atd(row);
                score_autoencoder(tmp, out, neurons, false, /*reconstruction*/
                reconstruction_error_per_feature);
                for (int i = 0; i < outputcols; ++i) mse[i].addNum(out[i]);
            }
        }
    }.doAll(outputcols, Vec.T_NUM, adaptFrm).outputFrame();
    String[] names;
    if (reconstruction_error_per_feature) {
        String[] coefnames = model_info().data_info().coefNames();
        assert (outputcols == coefnames.length);
        names = new String[outputcols];
        for (int i = 0; i < names.length; ++i) {
            names[i] = "reconstr_" + coefnames[i] + ".SE";
        }
    } else {
        names = new String[] { "Reconstruction.MSE" };
    }
    Frame res = new Frame(destination_key, names, mse.vecs());
    DKV.put(res);
    addModelMetrics(new ModelMetricsAutoEncoder(this, frame, res.numRows(), res.vecs()[0].mean()));
    return res;
}
Also used : Frame(water.fvec.Frame) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Aggregations

H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)43 Frame (water.fvec.Frame)16 Key (water.Key)6 H2OKeyNotFoundArgumentException (water.exceptions.H2OKeyNotFoundArgumentException)6 Vec (water.fvec.Vec)6 Field (java.lang.reflect.Field)3 Chunk (water.fvec.Chunk)3 NewChunk (water.fvec.NewChunk)3 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)2 GLMModel (hex.glm.GLMModel)2 DRFModel (hex.tree.drf.DRFModel)2 Method (java.lang.reflect.Method)2 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 Iced (water.Iced)2 FrameV3 (water.api.schemas3.FrameV3)2 JobV3 (water.api.schemas3.JobV3)2 KeyV3 (water.api.schemas3.KeyV3)2 ByteVec (water.fvec.ByteVec)2 C0DChunk (water.fvec.C0DChunk)2