Search in sources :

Example 6 with RebalanceDataSet

use of water.fvec.RebalanceDataSet in project h2o-2 by h2oai.

the class MatrixTest method testTranspose.

@Test
public void testTranspose() {
    Futures fs = new Futures();
    Key parsed = Key.make("prostate_parsed");
    Key modelKey = Key.make("prostate_model");
    GLMModel model = null;
    File f = TestUtil.find_test_file("smalldata/glm_test/prostate_cat_replaced.csv");
    Frame fr = getFrameForFile(parsed, "smalldata/glm_test/prostate_cat_replaced.csv");
    fr.remove("RACE").remove(fs);
    Key k = Key.make("rebalanced");
    H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
    fr.delete();
    fr = DKV.get(k).get();
    Frame tr = DMatrix.transpose(fr);
    tr.reloadVecs();
    for (int i = 0; i < fr.numRows(); ++i) for (int j = 0; j < fr.numCols(); ++j) assertEquals(fr.vec(j).at(i), tr.vec(i).at(j), 1e-4);
    fr.delete();
    for (Vec v : tr.vecs()) v.remove(fs);
    fs.blockForPending();
//    checkLeakedKeys();
}
Also used : GLMModel(hex.glm.GLMModel) Frame(water.fvec.Frame) RebalanceDataSet(water.fvec.RebalanceDataSet) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Test(org.junit.Test)

Example 7 with RebalanceDataSet

use of water.fvec.RebalanceDataSet in project h2o-3 by h2oai.

the class DeepLearningScoreTest method testPubDev928.

/** Load simple dataset, rebalance to a number of chunks > number of rows, and run deep learning */
@Test
public void testPubDev928() {
    // Create rebalanced dataset
    Key rebalancedKey = Key.make("rebalanced");
    NFSFileVec nfs = TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv");
    Frame fr = ParseDataset.parse(Key.make(), nfs._key);
    RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, (int) (fr.numRows() + 1));
    H2O.submitTask(rb);
    rb.join();
    Frame rebalanced = DKV.get(rebalancedKey).get();
    // Assert that there is at least one 0-len chunk
    assertZeroLengthChunk("Rebalanced dataset should contain at least one 0-len chunk!", rebalanced.anyVec());
    DeepLearningModel dlModel = null;
    try {
        // Launch Deep Learning
        DeepLearningParameters dlParams = new DeepLearningParameters();
        dlParams._train = rebalancedKey;
        dlParams._epochs = 5;
        dlParams._response_column = "CAPSULE";
        dlModel = new DeepLearning(dlParams).trainModel().get();
    } finally {
        fr.delete();
        rebalanced.delete();
        if (dlModel != null)
            dlModel.delete();
    }
}
Also used : Frame(water.fvec.Frame) RebalanceDataSet(water.fvec.RebalanceDataSet) NFSFileVec(water.fvec.NFSFileVec) DeepLearningParameters(hex.deeplearning.DeepLearningModel.DeepLearningParameters) Key(water.Key) Test(org.junit.Test)

Example 8 with RebalanceDataSet

use of water.fvec.RebalanceDataSet in project h2o-3 by h2oai.

the class DeepLearning method rebalance.

@Override
protected Frame rebalance(final Frame original_fr, boolean local, final String name) {
    if (original_fr == null)
        return null;
    if (_parms._force_load_balance || _parms._reproducible) {
        //this is called before the parameters are sanitized, so force_load_balance might be user-disabled -> so must check reproducible flag as well
        int original_chunks = original_fr.anyVec().nChunks();
        _job.update(0, "Load balancing " + name.substring(name.length() - 5) + " data...");
        int chunks = desiredChunks(original_fr, local);
        if (!_parms._reproducible) {
            if (original_chunks >= chunks) {
                if (!_parms._quiet_mode)
                    Log.info("Dataset already contains " + original_chunks + " chunks. No need to rebalance.");
                return original_fr;
            }
        } else {
            //reproducible, set chunks to 1
            assert chunks == 1;
            if (!_parms._quiet_mode)
                Log.warn("Reproducibility enforced - using only 1 thread - can be slow.");
            if (original_chunks == 1)
                return original_fr;
        }
        if (!_parms._quiet_mode)
            Log.info("Rebalancing " + name.substring(name.length() - 5) + " dataset into " + chunks + " chunks.");
        Key newKey = Key.make(name + ".chks" + chunks);
        RebalanceDataSet rb = new RebalanceDataSet(original_fr, newKey, chunks);
        H2O.submitTask(rb).join();
        Frame rebalanced_fr = DKV.get(newKey).get();
        Scope.track(rebalanced_fr);
        return rebalanced_fr;
    }
    return original_fr;
}
Also used : MRUtils.sampleFrame(water.util.MRUtils.sampleFrame) Frame(water.fvec.Frame) RebalanceDataSet(water.fvec.RebalanceDataSet) PrettyPrint(water.util.PrettyPrint)

Example 9 with RebalanceDataSet

use of water.fvec.RebalanceDataSet in project h2o-2 by h2oai.

the class LoadDatasets method reBalanceFrames.

public void reBalanceFrames() {
    final Key[] keySet = H2O.KeySnapshot.globalSnapshot().keys();
    for (Key key : keySet) {
        final Value val = DKV.get(key);
        if (val == null || !val.isFrame())
            continue;
        final Frame fr = val.get();
        if (!fr._key.toString().contains("balanced")) {
            final int splits = Math.min((int) fr.numRows(), 4 * H2O.NUMCPUS * H2O.CLOUD.size());
            final String name = fr._key.toString() + ".rebalanced";
            Log.info("Load balancing frame under key '" + fr._key.toString() + "' into " + splits + " splits.");
            try {
                final Key frHexBalanced = Key.make(name);
                new RebalanceDataSet(fr, frHexBalanced, splits).invoke();
            } catch (Exception ex) {
                Log.err(ex.getMessage());
            }
        }
    }
}
Also used : Frame(water.fvec.Frame) CreateFrame(hex.CreateFrame) RebalanceDataSet(water.fvec.RebalanceDataSet)

Example 10 with RebalanceDataSet

use of water.fvec.RebalanceDataSet in project h2o-2 by h2oai.

the class ReBalance method serve.

@Override
public RequestBuilders.Response serve() {
    if (source == null)
        throw new IllegalArgumentException("Missing frame to rebalance!");
    try {
        if (chunks > source.numRows())
            throw new IllegalArgumentException("Cannot create more than " + source.numRows() + " chunks.");
        if (after == null)
            after = Key.make(source._key.toString() + ".balanced");
        RebalanceDataSet rb = new RebalanceDataSet(source, after, chunks);
        H2O.submitTask(rb);
        rb.join();
        return RequestBuilders.Response.done(this);
    } catch (Throwable t) {
        return RequestBuilders.Response.error(t);
    }
}
Also used : RebalanceDataSet(water.fvec.RebalanceDataSet)

Aggregations

RebalanceDataSet (water.fvec.RebalanceDataSet)16 Frame (water.fvec.Frame)14 Test (org.junit.Test)11 SplitFrame (hex.SplitFrame)3 NFSFileVec (water.fvec.NFSFileVec)3 CreateFrame (hex.CreateFrame)2 DataInfo (hex.FrameTask.DataInfo)1 Aggregator (hex.aggregator.Aggregator)1 AggregatorModel (hex.aggregator.AggregatorModel)1 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)1 DRFModel (hex.drf.DRF.DRFModel)1 GLMModel (hex.glm.GLMModel)1 GramTask (hex.gram.Gram.GramTask)1 File (java.io.File)1 Key (water.Key)1 MRUtils.sampleFrame (water.util.MRUtils.sampleFrame)1 PrettyPrint (water.util.PrettyPrint)1