Examples with DataInfo - hex.FrameTask.DataInfo

Example 6 with DataInfo

use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.

the class NaiveBayes method execImpl.

@Override
protected void execImpl() {
    long before = System.currentTimeMillis();
    Frame fr = DataInfo.prepareFrame(source, response, ignored_cols, false, true, /*drop const*/
    drop_na_cols);
    DataInfo dinfo = new DataInfo(fr, 1, false, true, DataInfo.TransformType.NONE, DataInfo.TransformType.NONE);
    NBTask tsk = new NBTask(this, dinfo).doAll(dinfo._adaptedFrame);
    NBModel myModel = buildModel(dinfo, tsk, laplace, min_std_dev);
    myModel.start_training(before);
    myModel.stop_training();
    myModel.delete_and_lock(self());
    myModel.unlock(self());
}

Also used : DataInfo(hex.FrameTask.DataInfo)

Example 7 with DataInfo

use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.

the class PCA method execImpl.

@Override
protected void execImpl() {
    Frame fr = selectFrame(source);
    Vec[] vecs = fr.vecs();
    // Remove constant cols and cols with too many NAs
    ArrayList<Integer> removeCols = new ArrayList<Integer>();
    for (int i = 0; i < vecs.length; i++) {
        if (vecs[i].min() == vecs[i].max() || vecs[i].naCnt() > vecs[i].length() * 0.2)
            // if(vecs[i].min() == vecs[i].max() || vecs[i].naCnt() > vecs[i].length()*0.2 || vecs[i].domain() != null)
            removeCols.add(i);
    }
    if (!removeCols.isEmpty()) {
        int[] cols = new int[removeCols.size()];
        for (int i = 0; i < cols.length; i++) cols[i] = removeCols.get(i);
        fr.remove(cols);
    }
    if (fr.numCols() < 2)
        throw new IllegalArgumentException("Need more than one column to run PCA");
    DataInfo dinfo = new DataInfo(fr, 0, false, false, standardize ? DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE);
    GramTask tsk = new GramTask(self(), dinfo, false, false).doAll(dinfo._adaptedFrame);
    PCAModel myModel = buildModel(dinfo, tsk);
    myModel.delete_and_lock(self());
    myModel.unlock(self());
    // Close/remove job
    remove();
    final JobState state = UKV.<Job>get(self()).state;
    new TAtomic<PCAModel>() {

        @Override
        public PCAModel atomic(PCAModel m) {
            if (m != null)
                m.get_params().state = state;
            return m;
        }
    }.invoke(dest());
}

Also used : DataInfo(hex.FrameTask.DataInfo) Frame(water.fvec.Frame) ArrayList(java.util.ArrayList) Vec(water.fvec.Vec) GramTask(hex.gram.Gram.GramTask)

Example 8 with DataInfo

use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.

the class PCAScore method execImpl.

@Override
protected void execImpl() {
    // Note: Source data MUST contain all features (matched by name) used to build PCA model!
    // If additional columns exist in source, they are automatically ignored in scoring
    new Frame(destination_key, new String[0], new Vec[0]).delete_and_lock(self());
    Frame fr = model.adapt(source, true)[0];
    int nfeat = model._names.length;
    DataInfo dinfo = new DataInfo(fr, 0, false, false, model.normSub, model.normMul, DataInfo.TransformType.STANDARDIZE, null, null);
    PCAScoreTask tsk = new PCAScoreTask(this, dinfo, nfeat, num_pc, model.eigVec);
    tsk.doAll(num_pc, dinfo._adaptedFrame);
    String[] names = new String[num_pc];
    String[][] domains = new String[num_pc][];
    for (int i = 0; i < num_pc; i++) {
        names[i] = "PC" + i;
        domains[i] = null;
    }
    tsk.outputFrame(destination_key, names, domains).unlock(self());
}

Also used : DataInfo(hex.FrameTask.DataInfo) Frame(water.fvec.Frame) Vec(water.fvec.Vec) RString(water.util.RString)

Example 9 with DataInfo

use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.

the class DeepLearning method buildModel.

/**
   * Train a Deep Learning model, assumes that all members are populated
   * If checkpoint == null, then start training a new model, otherwise continue from a checkpoint
   */
private void buildModel() {
    DeepLearningModel cp = null;
    if (checkpoint == null) {
        cp = initModel();
        cp.start_training(null);
    } else {
        final DeepLearningModel previous = UKV.get(checkpoint);
        if (previous == null)
            throw new IllegalArgumentException("Checkpoint not found.");
        Log.info("Resuming from checkpoint.");
        if (n_folds != 0) {
            throw new UnsupportedOperationException("n_folds must be 0: Cross-validation is not supported during checkpoint restarts.");
        } else {
            //remove existing cross-validation keys after checkpoint restart
            ((ValidatedJob) previous.job()).xval_models = null;
        }
        if (source == null || (previous.model_info().get_params().source != null && !Arrays.equals(source._key._kb, previous.model_info().get_params().source._key._kb))) {
            throw new IllegalArgumentException("source must be the same as for the checkpointed model.");
        }
        autoencoder = previous.model_info().get_params().autoencoder;
        if (!autoencoder && (response == null || !source.names()[source.find(response)].equals(previous.responseName()))) {
            throw new IllegalArgumentException("response must be the same as for the checkpointed model.");
        }
        //      }
        if (Utils.difference(ignored_cols, previous.model_info().get_params().ignored_cols).length != 0 || Utils.difference(previous.model_info().get_params().ignored_cols, ignored_cols).length != 0) {
            ignored_cols = previous.model_info().get_params().ignored_cols;
            Log.warn("Automatically re-using ignored_cols from the checkpointed model.");
        }
        if ((validation == null) == (previous._validationKey != null) || (validation != null && validation._key != null && previous._validationKey != null && !Arrays.equals(validation._key._kb, previous._validationKey._kb))) {
            throw new IllegalArgumentException("validation must be the same as for the checkpointed model.");
        }
        if (classification != previous.model_info().get_params().classification) {
            Log.warn("Automatically switching to " + ((classification = !classification) ? "classification" : "regression") + " (same as the checkpointed model).");
        }
        //add new epochs to existing model
        epochs += previous.epoch_counter;
        Log.info("Adding " + String.format("%.3f", previous.epoch_counter) + " epochs from the checkpointed model.");
        try {
            final DataInfo dataInfo = prepareDataInfo();
            cp = new DeepLearningModel(previous, destination_key, job_key, dataInfo);
            cp.write_lock(self());
            cp.start_training(previous);
            assert (state == JobState.RUNNING);
            final DeepLearning A = cp.model_info().get_params();
            Object B = this;
            for (Field fA : A.getClass().getDeclaredFields()) {
                if (Utils.contains(cp_modifiable, fA.getName())) {
                    if (!expert_mode && Utils.contains(expert_options, fA.getName()))
                        continue;
                    for (Field fB : B.getClass().getDeclaredFields()) {
                        if (fA.equals(fB)) {
                            try {
                                if (fB.get(B) == null || fA.get(A) == null || !fA.get(A).toString().equals(fB.get(B).toString())) {
                                    //if both parameters are null, we don't need to do anything
                                    if (fA.get(A) == null && fB.get(B) == null)
                                        continue;
                                    Log.info("Applying user-requested modification of '" + fA.getName() + "': " + fA.get(A) + " -> " + fB.get(B));
                                    fA.set(A, fB.get(B));
                                }
                            } catch (IllegalAccessException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                }
            }
            if (A.n_folds != 0) {
                Log.warn("Disabling cross-validation: Not supported when resuming training from a checkpoint.");
                A.n_folds = 0;
            }
            cp.update(self());
        } finally {
            if (cp != null)
                cp.unlock(self());
        }
    }
    trainModel(cp);
    cp.stop_training();
}

Also used : DataInfo(hex.FrameTask.DataInfo) Field(java.lang.reflect.Field)

Example 10 with DataInfo

use of hex.FrameTask.DataInfo in project h2o-2 by h2oai.

the class DeepLearning method initModel.

/**
   * Create an initial Deep Learning model, typically to be trained by trainModel(model)
   * @return Randomly initialized model
   */
public final DeepLearningModel initModel() {
    try {
        lock_data();
        checkParams();
        final DataInfo dinfo = prepareDataInfo();
        //convention from DataInfo: response is the last Vec
        final Vec resp = dinfo._adaptedFrame.lastVec();
        float[] priorDist = classification ? new MRUtils.ClassDist(resp).doAll(resp).rel_dist() : null;
        final DeepLearningModel model = new DeepLearningModel(dest(), self(), source._key, dinfo, (DeepLearning) this.clone(), priorDist);
        model.model_info().initializeMembers();
        return model;
    } finally {
        unlock_data();
    }
}

Also used : DataInfo(hex.FrameTask.DataInfo) Vec(water.fvec.Vec)

Aggregations

DataInfo (hex.FrameTask.DataInfo)12 Frame (water.fvec.Frame)5 Vec (water.fvec.Vec)5 GramTask (hex.gram.Gram.GramTask)3 Test (org.junit.Test)3 RString (water.util.RString)2 TransformType (hex.FrameTask.DataInfo.TransformType)1 Source (hex.glm.GLM2.Source)1 GLMIterationTask (hex.glm.GLMTask.GLMIterationTask)1 File (java.io.File)1 Field (java.lang.reflect.Field)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Futures (water.Futures)1 NFSFileVec (water.fvec.NFSFileVec)1 RebalanceDataSet (water.fvec.RebalanceDataSet)1 MRUtils.sampleFrame (water.util.MRUtils.sampleFrame)1