use of water.Futures in project h2o-2 by h2oai.
the class FillNAsWithMeanDemo03 method frame_001.
@Test
public void frame_001() {
// String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
//String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
//String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
String fileName = "./cookbookData/iris_withNA.csv";
File file = new File(fileName);
Key fkey = NFSFileVec.make(file);
Key okey = Key.make("iris.hex");
Frame fr;
fr = ParseDataset2.parse(okey, new Key[] { fkey });
Frame f = DKV.get(okey).get();
int len = f.numCols();
Vec[] vv = f.vecs();
double[] arrayofMeans = new double[len];
for (int i = 0; i < len; i++) arrayofMeans[i] = vv[i].mean();
// map reduce call
FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(len, f);
Key fk = Key.make(f._key.toString() + "_nas_replaced_with_mean");
Futures fs = new Futures();
//new frame
Frame outputFrame = lr1.outputFrame(fk, f.names(), f.domains(), fs);
fs.blockForPending();
//puts the new frame in the KV store
DKV.put(fk, outputFrame, fs);
fs.blockForPending();
Log.info(" new output frame : " + outputFrame);
//logThisH2OInstanceWebBrowserAddress();
//sleepForever();
Frame.delete(okey);
outputFrame.delete();
}
use of water.Futures in project h2o-3 by h2oai.
the class DeepWaterTask method setupLocal.
/**
* Transfer ownership from global (shared) model to local model which will be worked on
*/
@Override
protected void setupLocal() {
// long start = System.currentTimeMillis();
assert (_localmodel == null);
_localmodel = _sharedmodel;
_sharedmodel = null;
_localmodel.set_processed_local(0);
final int weightIdx = _fr.find(_localmodel.get_params()._weights_column);
final int respIdx = _fr.find(_localmodel.get_params()._response_column);
final int batchSize = _localmodel.get_params()._mini_batch_size;
// long nativetime = 0;
DeepWaterIterator iter = null;
long seed = 0xDECAF + 0xD00D * _localmodel.get_processed_global();
Random rng = RandomUtils.getRNG(seed);
if (_fr.numRows() > Integer.MAX_VALUE) {
throw H2O.unimpl("Need to implement batching into int-sized chunks.");
}
int len = (int) _fr.numRows();
int j = 0;
Futures fs = new Futures();
ArrayList trainLabels = new ArrayList<>();
ArrayList trainData = new ArrayList<>();
try {
// Binary data (Images/Documents/etc.)
if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.image || _localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.text) {
//must be the first column //FIXME
int dataIdx = 0;
Log.debug("Using column " + _fr.name(dataIdx) + " for " + ((_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.image) ? "path to image data" : ((_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.text) ? "text data" : "path to arbitrary bytes")));
// full passes over the data
BufferedString bs = new BufferedString();
// Example: train_samples_per_iteration = 4700, and train.numRows()=1000 -> _useFraction = 4.7 -> fullpasses = 4
int fullpasses = (int) _useFraction;
while (j++ < fullpasses) {
for (int i = 0; i < _fr.numRows(); ++i) {
double weight = weightIdx == -1 ? 1 : _fr.vec(weightIdx).at(i);
if (weight == 0)
continue;
BufferedString file = _fr.vec(dataIdx).atStr(bs, i);
if (file != null)
trainData.add(file.toString());
float response = (float) _fr.vec(respIdx).at(i);
trainLabels.add(response);
}
}
// fractional passes // 0.7
while (trainData.size() < _useFraction * len || trainData.size() % batchSize != 0) {
assert (_shuffle);
int i = rng.nextInt(len);
double weight = weightIdx == -1 ? 1 : _fr.vec(weightIdx).at(i);
if (weight == 0)
continue;
BufferedString file = _fr.vec(dataIdx).atStr(bs, i);
if (file != null)
trainData.add(file.toString());
float response = (float) _fr.vec(respIdx).at(i);
trainLabels.add(response);
}
} else // Numeric data (H2O Frame full with numeric columns)
if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.dataset) {
double mul = _localmodel._dataInfo._normRespMul != null ? _localmodel._dataInfo._normRespMul[0] : 1;
double sub = _localmodel._dataInfo._normRespSub != null ? _localmodel._dataInfo._normRespSub[0] : 0;
// full passes over the data
int fullpasses = (int) _useFraction;
while (j++ < fullpasses) {
for (int i = 0; i < _fr.numRows(); ++i) {
double weight = weightIdx == -1 ? 1 : _fr.vec(weightIdx).at(i);
if (weight == 0)
continue;
float response = (float) ((_fr.vec(respIdx).at(i) - sub) / mul);
trainData.add(i);
trainLabels.add(response);
}
}
// fractional passes
while (trainData.size() < _useFraction * len || trainData.size() % batchSize != 0) {
int i = rng.nextInt(len);
double weight = weightIdx == -1 ? 1 : _fr.vec(weightIdx).at(i);
if (weight == 0)
continue;
float response = (float) ((_fr.vec(respIdx).at(i) - sub) / mul);
trainData.add(i);
trainLabels.add(response);
}
}
// shuffle the (global) list
if (_shuffle) {
rng.setSeed(seed);
Collections.shuffle(trainLabels, rng);
rng.setSeed(seed);
Collections.shuffle(trainData, rng);
}
if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.image) {
iter = new DeepWaterImageIterator(trainData, trainLabels, _localmodel._meanData, batchSize, _localmodel._width, _localmodel._height, _localmodel._channels, _localmodel.get_params()._cache_data);
} else if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.dataset) {
assert (_localmodel._dataInfo != null);
iter = new DeepWaterDatasetIterator(trainData, trainLabels, _localmodel._dataInfo, batchSize, _localmodel.get_params()._cache_data);
} else if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.text) {
iter = new DeepWaterTextIterator(trainData, trainLabels, batchSize, 56, /*FIXME*/
_localmodel.get_params()._cache_data);
}
NativeTrainTask ntt;
while (iter.Next(fs) && !_job.isStopping()) {
// if (ntt != null) nativetime += ntt._timeInMillis;
long n = _localmodel.get_processed_total();
// if(!_localmodel.get_params()._quiet_mode)
// Log.info("Trained " + n + " samples. Training on " + Arrays.toString(((DeepWaterImageIterator)iter).getFiles()));
_localmodel._backend.setParameter(_localmodel._model, "learning_rate", _localmodel.get_params().learningRate((double) n));
_localmodel._backend.setParameter(_localmodel._model, "momentum", _localmodel.get_params().momentum((double) n));
//fork off GPU work, but let the iterator.Next() wait on completion before swapping again
//System.err.println("data: " + Arrays.toString(iter.getData()));
float[] preds = _localmodel._backend.predict(_localmodel._model, iter.getData());
if (Float.isNaN(ArrayUtils.sum(preds))) {
Log.err(DeepWaterModel.unstable_msg);
throw new UnsupportedOperationException(DeepWaterModel.unstable_msg);
}
// System.err.println("pred: " + Arrays.toString(preds));
ntt = new NativeTrainTask(_localmodel._backend, _localmodel._model, iter.getData(), iter.getLabel());
fs.add(H2O.submitTask(ntt));
_localmodel.add_processed_local(iter._batch_size);
}
fs.blockForPending();
// nativetime += ntt._timeInMillis;
} catch (IOException e) {
//gracefully continue if we can't find files etc.
e.printStackTrace();
}
// long end = System.currentTimeMillis();
// if (!_localmodel.get_params()._quiet_mode) {
// Log.info("Time for one iteration: " + PrettyPrint.msecs(end - start, true));
// Log.info("Time for native training : " + PrettyPrint.msecs(nativetime, true));
// }
}
use of water.Futures in project h2o-3 by h2oai.
the class RemoveAllHandler method remove.
// called through reflection by RequestServer
@SuppressWarnings("unused")
public RemoveAllV3 remove(int version, RemoveAllV3 u) {
Log.info("Removing all objects");
Futures fs = new Futures();
// Cancel and remove leftover running jobs
for (Job j : Job.jobs()) {
j.stop_requested();
j.remove(fs);
}
// Wipe out any and all session info
if (RapidsHandler.SESSIONS != null) {
for (String k : RapidsHandler.SESSIONS.keySet()) (RapidsHandler.SESSIONS.get(k)).endQuietly(null);
RapidsHandler.SESSIONS.clear();
}
fs.blockForPending();
// Bulk brainless key removal. Completely wipes all Keys without regard.
new MRTask(H2O.MIN_HI_PRIORITY) {
@Override
public void setupLocal() {
H2O.raw_clear();
water.fvec.Vec.ESPC.clear();
}
}.doAllNodes();
// Wipe the backing store without regard as well
H2O.getPM().getIce().cleanUp();
Log.info("Finished removing objects");
return u;
}
use of water.Futures in project h2o-2 by h2oai.
the class CoxPH method execImpl.
@Override
protected void execImpl() {
final DataInfo dinfo = model.data_info;
final int n_offsets = (model.parameters.offset_columns == null) ? 0 : model.parameters.offset_columns.length;
final int n_coef = dinfo.fullN() - n_offsets;
final double[] step = MemoryManager.malloc8d(n_coef);
final double[] oldCoef = MemoryManager.malloc8d(n_coef);
final double[] newCoef = MemoryManager.malloc8d(n_coef);
Arrays.fill(step, Double.NaN);
Arrays.fill(oldCoef, Double.NaN);
for (int j = 0; j < n_coef; ++j) newCoef[j] = init;
double oldLoglik = -Double.MAX_VALUE;
final int n_time = (int) (model.max_time - model.min_time + 1);
final boolean has_start_column = (model.parameters.start_column != null);
final boolean has_weights_column = (model.parameters.weights_column != null);
for (int i = 0; i <= iter_max; ++i) {
model.iter = i;
final CoxPHTask coxMR = new CoxPHTask(self(), dinfo, newCoef, model.min_time, n_time, n_offsets, has_start_column, has_weights_column).doAll(dinfo._adaptedFrame);
final double newLoglik = model.calcLoglik(coxMR);
if (newLoglik > oldLoglik) {
if (i == 0)
model.calcCounts(coxMR);
model.calcModelStats(newCoef, newLoglik);
model.calcCumhaz_0(coxMR);
if (newLoglik == 0)
model.lre = -Math.log10(Math.abs(oldLoglik - newLoglik));
else
model.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik));
if (model.lre >= lre_min)
break;
Arrays.fill(step, 0);
for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model.var_coef[j][k] * model.gradient[k];
for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j]))
break;
oldLoglik = newLoglik;
System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length);
} else {
for (int j = 0; j < n_coef; ++j) step[j] /= 2;
}
for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j];
}
final Futures fs = new Futures();
DKV.put(dest(), model, fs);
fs.blockForPending();
}
use of water.Futures in project h2o-2 by h2oai.
the class SparseTest method makeChunk.
protected Chunk makeChunk(double[] vals) {
int nzs = 0;
int[] nonzeros = new int[vals.length];
int j = 0;
for (double d : vals) if (d != 0)
nonzeros[nzs++] = j++;
Key key = Vec.newKey();
AppendableVec av = new AppendableVec(key);
NewChunk nv = new NewChunk(av, 0);
for (double d : vals) {
if (Double.isNaN(d))
nv.addNA();
else if ((long) d == d)
nv.addNum((long) d, 0);
else
nv.addNum(d);
}
nv.close(0, null);
Futures fs = new Futures();
Vec vec = av.close(fs);
fs.blockForPending();
return vec.chunkForChunkIdx(0);
}
Aggregations