use of water.fvec.RebalanceDataSet in project h2o-2 by h2oai.
the class MatrixTest method testTranspose.
@Test
public void testTranspose() {
Futures fs = new Futures();
Key parsed = Key.make("prostate_parsed");
Key modelKey = Key.make("prostate_model");
GLMModel model = null;
File f = TestUtil.find_test_file("smalldata/glm_test/prostate_cat_replaced.csv");
Frame fr = getFrameForFile(parsed, "smalldata/glm_test/prostate_cat_replaced.csv");
fr.remove("RACE").remove(fs);
Key k = Key.make("rebalanced");
H2O.submitTask(new RebalanceDataSet(fr, k, 64)).join();
fr.delete();
fr = DKV.get(k).get();
Frame tr = DMatrix.transpose(fr);
tr.reloadVecs();
for (int i = 0; i < fr.numRows(); ++i) for (int j = 0; j < fr.numCols(); ++j) assertEquals(fr.vec(j).at(i), tr.vec(i).at(j), 1e-4);
fr.delete();
for (Vec v : tr.vecs()) v.remove(fs);
fs.blockForPending();
// checkLeakedKeys();
}
use of water.fvec.RebalanceDataSet in project h2o-3 by h2oai.
the class DeepLearningScoreTest method testPubDev928.
/** Load simple dataset, rebalance to a number of chunks > number of rows, and run deep learning */
@Test
public void testPubDev928() {
// Create rebalanced dataset
Key rebalancedKey = Key.make("rebalanced");
NFSFileVec nfs = TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv");
Frame fr = ParseDataset.parse(Key.make(), nfs._key);
RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, (int) (fr.numRows() + 1));
H2O.submitTask(rb);
rb.join();
Frame rebalanced = DKV.get(rebalancedKey).get();
// Assert that there is at least one 0-len chunk
assertZeroLengthChunk("Rebalanced dataset should contain at least one 0-len chunk!", rebalanced.anyVec());
DeepLearningModel dlModel = null;
try {
// Launch Deep Learning
DeepLearningParameters dlParams = new DeepLearningParameters();
dlParams._train = rebalancedKey;
dlParams._epochs = 5;
dlParams._response_column = "CAPSULE";
dlModel = new DeepLearning(dlParams).trainModel().get();
} finally {
fr.delete();
rebalanced.delete();
if (dlModel != null)
dlModel.delete();
}
}
use of water.fvec.RebalanceDataSet in project h2o-3 by h2oai.
the class DeepLearning method rebalance.
@Override
protected Frame rebalance(final Frame original_fr, boolean local, final String name) {
if (original_fr == null)
return null;
if (_parms._force_load_balance || _parms._reproducible) {
//this is called before the parameters are sanitized, so force_load_balance might be user-disabled -> so must check reproducible flag as well
int original_chunks = original_fr.anyVec().nChunks();
_job.update(0, "Load balancing " + name.substring(name.length() - 5) + " data...");
int chunks = desiredChunks(original_fr, local);
if (!_parms._reproducible) {
if (original_chunks >= chunks) {
if (!_parms._quiet_mode)
Log.info("Dataset already contains " + original_chunks + " chunks. No need to rebalance.");
return original_fr;
}
} else {
//reproducible, set chunks to 1
assert chunks == 1;
if (!_parms._quiet_mode)
Log.warn("Reproducibility enforced - using only 1 thread - can be slow.");
if (original_chunks == 1)
return original_fr;
}
if (!_parms._quiet_mode)
Log.info("Rebalancing " + name.substring(name.length() - 5) + " dataset into " + chunks + " chunks.");
Key newKey = Key.make(name + ".chks" + chunks);
RebalanceDataSet rb = new RebalanceDataSet(original_fr, newKey, chunks);
H2O.submitTask(rb).join();
Frame rebalanced_fr = DKV.get(newKey).get();
Scope.track(rebalanced_fr);
return rebalanced_fr;
}
return original_fr;
}
use of water.fvec.RebalanceDataSet in project h2o-2 by h2oai.
the class LoadDatasets method reBalanceFrames.
public void reBalanceFrames() {
final Key[] keySet = H2O.KeySnapshot.globalSnapshot().keys();
for (Key key : keySet) {
final Value val = DKV.get(key);
if (val == null || !val.isFrame())
continue;
final Frame fr = val.get();
if (!fr._key.toString().contains("balanced")) {
final int splits = Math.min((int) fr.numRows(), 4 * H2O.NUMCPUS * H2O.CLOUD.size());
final String name = fr._key.toString() + ".rebalanced";
Log.info("Load balancing frame under key '" + fr._key.toString() + "' into " + splits + " splits.");
try {
final Key frHexBalanced = Key.make(name);
new RebalanceDataSet(fr, frHexBalanced, splits).invoke();
} catch (Exception ex) {
Log.err(ex.getMessage());
}
}
}
}
use of water.fvec.RebalanceDataSet in project h2o-2 by h2oai.
the class ReBalance method serve.
@Override
public RequestBuilders.Response serve() {
if (source == null)
throw new IllegalArgumentException("Missing frame to rebalance!");
try {
if (chunks > source.numRows())
throw new IllegalArgumentException("Cannot create more than " + source.numRows() + " chunks.");
if (after == null)
after = Key.make(source._key.toString() + ".balanced");
RebalanceDataSet rb = new RebalanceDataSet(source, after, chunks);
H2O.submitTask(rb);
rb.join();
return RequestBuilders.Response.done(this);
} catch (Throwable t) {
return RequestBuilders.Response.error(t);
}
}
Aggregations