Search in sources :

Example 1 with Futures

use of water.Futures in project h2o-2 by h2oai.

the class FillNAsWithMeanDemo03 method frame_001.

@Test
public void frame_001() {
    // String fileName = "/Users/nidhimehta/h2o/smalldata/iris/iris.csv";
    //String fileName = "/Users/nidhimehta/Desktop/data/covtype/covtrain_tit";
    //String fileName = "/Users/nidhimehta/Desktop/iris_withNA.csv";
    String fileName = "./cookbookData/iris_withNA.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    Frame f = DKV.get(okey).get();
    int len = f.numCols();
    Vec[] vv = f.vecs();
    double[] arrayofMeans = new double[len];
    for (int i = 0; i < len; i++) arrayofMeans[i] = vv[i].mean();
    // map reduce call
    FillNasWithMean lr1 = new FillNasWithMean(arrayofMeans).doAll(len, f);
    Key fk = Key.make(f._key.toString() + "_nas_replaced_with_mean");
    Futures fs = new Futures();
    //new frame
    Frame outputFrame = lr1.outputFrame(fk, f.names(), f.domains(), fs);
    fs.blockForPending();
    //puts the new frame in the KV store
    DKV.put(fk, outputFrame, fs);
    fs.blockForPending();
    Log.info(" new output frame        : " + outputFrame);
    //logThisH2OInstanceWebBrowserAddress();
    //sleepForever();
    Frame.delete(okey);
    outputFrame.delete();
}
Also used : Frame(water.fvec.Frame) Futures(water.Futures) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Example 2 with Futures

use of water.Futures in project h2o-3 by h2oai.

the class DeepWaterTask method setupLocal.

/**
   * Transfer ownership from global (shared) model to local model which will be worked on
   */
@Override
protected void setupLocal() {
    //    long start = System.currentTimeMillis();
    assert (_localmodel == null);
    _localmodel = _sharedmodel;
    _sharedmodel = null;
    _localmodel.set_processed_local(0);
    final int weightIdx = _fr.find(_localmodel.get_params()._weights_column);
    final int respIdx = _fr.find(_localmodel.get_params()._response_column);
    final int batchSize = _localmodel.get_params()._mini_batch_size;
    //    long nativetime = 0;
    DeepWaterIterator iter = null;
    long seed = 0xDECAF + 0xD00D * _localmodel.get_processed_global();
    Random rng = RandomUtils.getRNG(seed);
    if (_fr.numRows() > Integer.MAX_VALUE) {
        throw H2O.unimpl("Need to implement batching into int-sized chunks.");
    }
    int len = (int) _fr.numRows();
    int j = 0;
    Futures fs = new Futures();
    ArrayList trainLabels = new ArrayList<>();
    ArrayList trainData = new ArrayList<>();
    try {
        // Binary data (Images/Documents/etc.)
        if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.image || _localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.text) {
            //must be the first column //FIXME
            int dataIdx = 0;
            Log.debug("Using column " + _fr.name(dataIdx) + " for " + ((_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.image) ? "path to image data" : ((_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.text) ? "text data" : "path to arbitrary bytes")));
            // full passes over the data
            BufferedString bs = new BufferedString();
            // Example: train_samples_per_iteration = 4700, and train.numRows()=1000 -> _useFraction = 4.7 -> fullpasses = 4
            int fullpasses = (int) _useFraction;
            while (j++ < fullpasses) {
                for (int i = 0; i < _fr.numRows(); ++i) {
                    double weight = weightIdx == -1 ? 1 : _fr.vec(weightIdx).at(i);
                    if (weight == 0)
                        continue;
                    BufferedString file = _fr.vec(dataIdx).atStr(bs, i);
                    if (file != null)
                        trainData.add(file.toString());
                    float response = (float) _fr.vec(respIdx).at(i);
                    trainLabels.add(response);
                }
            }
            // fractional passes // 0.7
            while (trainData.size() < _useFraction * len || trainData.size() % batchSize != 0) {
                assert (_shuffle);
                int i = rng.nextInt(len);
                double weight = weightIdx == -1 ? 1 : _fr.vec(weightIdx).at(i);
                if (weight == 0)
                    continue;
                BufferedString file = _fr.vec(dataIdx).atStr(bs, i);
                if (file != null)
                    trainData.add(file.toString());
                float response = (float) _fr.vec(respIdx).at(i);
                trainLabels.add(response);
            }
        } else // Numeric data (H2O Frame full with numeric columns)
        if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.dataset) {
            double mul = _localmodel._dataInfo._normRespMul != null ? _localmodel._dataInfo._normRespMul[0] : 1;
            double sub = _localmodel._dataInfo._normRespSub != null ? _localmodel._dataInfo._normRespSub[0] : 0;
            // full passes over the data
            int fullpasses = (int) _useFraction;
            while (j++ < fullpasses) {
                for (int i = 0; i < _fr.numRows(); ++i) {
                    double weight = weightIdx == -1 ? 1 : _fr.vec(weightIdx).at(i);
                    if (weight == 0)
                        continue;
                    float response = (float) ((_fr.vec(respIdx).at(i) - sub) / mul);
                    trainData.add(i);
                    trainLabels.add(response);
                }
            }
            // fractional passes
            while (trainData.size() < _useFraction * len || trainData.size() % batchSize != 0) {
                int i = rng.nextInt(len);
                double weight = weightIdx == -1 ? 1 : _fr.vec(weightIdx).at(i);
                if (weight == 0)
                    continue;
                float response = (float) ((_fr.vec(respIdx).at(i) - sub) / mul);
                trainData.add(i);
                trainLabels.add(response);
            }
        }
        // shuffle the (global) list
        if (_shuffle) {
            rng.setSeed(seed);
            Collections.shuffle(trainLabels, rng);
            rng.setSeed(seed);
            Collections.shuffle(trainData, rng);
        }
        if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.image) {
            iter = new DeepWaterImageIterator(trainData, trainLabels, _localmodel._meanData, batchSize, _localmodel._width, _localmodel._height, _localmodel._channels, _localmodel.get_params()._cache_data);
        } else if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.dataset) {
            assert (_localmodel._dataInfo != null);
            iter = new DeepWaterDatasetIterator(trainData, trainLabels, _localmodel._dataInfo, batchSize, _localmodel.get_params()._cache_data);
        } else if (_localmodel.get_params()._problem_type == DeepWaterParameters.ProblemType.text) {
            iter = new DeepWaterTextIterator(trainData, trainLabels, batchSize, 56, /*FIXME*/
            _localmodel.get_params()._cache_data);
        }
        NativeTrainTask ntt;
        while (iter.Next(fs) && !_job.isStopping()) {
            //        if (ntt != null) nativetime += ntt._timeInMillis;
            long n = _localmodel.get_processed_total();
            //        if(!_localmodel.get_params()._quiet_mode)
            //          Log.info("Trained " + n + " samples. Training on " + Arrays.toString(((DeepWaterImageIterator)iter).getFiles()));
            _localmodel._backend.setParameter(_localmodel._model, "learning_rate", _localmodel.get_params().learningRate((double) n));
            _localmodel._backend.setParameter(_localmodel._model, "momentum", _localmodel.get_params().momentum((double) n));
            //fork off GPU work, but let the iterator.Next() wait on completion before swapping again
            //System.err.println("data: " + Arrays.toString(iter.getData()));
            float[] preds = _localmodel._backend.predict(_localmodel._model, iter.getData());
            if (Float.isNaN(ArrayUtils.sum(preds))) {
                Log.err(DeepWaterModel.unstable_msg);
                throw new UnsupportedOperationException(DeepWaterModel.unstable_msg);
            }
            //        System.err.println("pred: " + Arrays.toString(preds));
            ntt = new NativeTrainTask(_localmodel._backend, _localmodel._model, iter.getData(), iter.getLabel());
            fs.add(H2O.submitTask(ntt));
            _localmodel.add_processed_local(iter._batch_size);
        }
        fs.blockForPending();
    //      nativetime += ntt._timeInMillis;
    } catch (IOException e) {
        //gracefully continue if we can't find files etc.
        e.printStackTrace();
    }
//    long end = System.currentTimeMillis();
//    if (!_localmodel.get_params()._quiet_mode) {
//      Log.info("Time for one iteration: " + PrettyPrint.msecs(end - start, true));
//      Log.info("Time for native training : " + PrettyPrint.msecs(nativetime, true));
//    }
}
Also used : Futures(water.Futures) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Random(java.util.Random) BufferedString(water.parser.BufferedString)

Example 3 with Futures

use of water.Futures in project h2o-3 by h2oai.

the class RemoveAllHandler method remove.

// called through reflection by RequestServer
@SuppressWarnings("unused")
public RemoveAllV3 remove(int version, RemoveAllV3 u) {
    Log.info("Removing all objects");
    Futures fs = new Futures();
    // Cancel and remove leftover running jobs
    for (Job j : Job.jobs()) {
        j.stop_requested();
        j.remove(fs);
    }
    // Wipe out any and all session info
    if (RapidsHandler.SESSIONS != null) {
        for (String k : RapidsHandler.SESSIONS.keySet()) (RapidsHandler.SESSIONS.get(k)).endQuietly(null);
        RapidsHandler.SESSIONS.clear();
    }
    fs.blockForPending();
    // Bulk brainless key removal.  Completely wipes all Keys without regard.
    new MRTask(H2O.MIN_HI_PRIORITY) {

        @Override
        public void setupLocal() {
            H2O.raw_clear();
            water.fvec.Vec.ESPC.clear();
        }
    }.doAllNodes();
    // Wipe the backing store without regard as well
    H2O.getPM().getIce().cleanUp();
    Log.info("Finished removing objects");
    return u;
}
Also used : Futures(water.Futures) MRTask(water.MRTask) Job(water.Job)

Example 4 with Futures

use of water.Futures in project h2o-2 by h2oai.

the class CoxPH method execImpl.

@Override
protected void execImpl() {
    final DataInfo dinfo = model.data_info;
    final int n_offsets = (model.parameters.offset_columns == null) ? 0 : model.parameters.offset_columns.length;
    final int n_coef = dinfo.fullN() - n_offsets;
    final double[] step = MemoryManager.malloc8d(n_coef);
    final double[] oldCoef = MemoryManager.malloc8d(n_coef);
    final double[] newCoef = MemoryManager.malloc8d(n_coef);
    Arrays.fill(step, Double.NaN);
    Arrays.fill(oldCoef, Double.NaN);
    for (int j = 0; j < n_coef; ++j) newCoef[j] = init;
    double oldLoglik = -Double.MAX_VALUE;
    final int n_time = (int) (model.max_time - model.min_time + 1);
    final boolean has_start_column = (model.parameters.start_column != null);
    final boolean has_weights_column = (model.parameters.weights_column != null);
    for (int i = 0; i <= iter_max; ++i) {
        model.iter = i;
        final CoxPHTask coxMR = new CoxPHTask(self(), dinfo, newCoef, model.min_time, n_time, n_offsets, has_start_column, has_weights_column).doAll(dinfo._adaptedFrame);
        final double newLoglik = model.calcLoglik(coxMR);
        if (newLoglik > oldLoglik) {
            if (i == 0)
                model.calcCounts(coxMR);
            model.calcModelStats(newCoef, newLoglik);
            model.calcCumhaz_0(coxMR);
            if (newLoglik == 0)
                model.lre = -Math.log10(Math.abs(oldLoglik - newLoglik));
            else
                model.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik));
            if (model.lre >= lre_min)
                break;
            Arrays.fill(step, 0);
            for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model.var_coef[j][k] * model.gradient[k];
            for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j]))
                break;
            oldLoglik = newLoglik;
            System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length);
        } else {
            for (int j = 0; j < n_coef; ++j) step[j] /= 2;
        }
        for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j];
    }
    final Futures fs = new Futures();
    DKV.put(dest(), model, fs);
    fs.blockForPending();
}
Also used : DataInfo(hex.FrameTask.DataInfo) Futures(water.Futures)

Example 5 with Futures

use of water.Futures in project h2o-2 by h2oai.

the class SparseTest method makeChunk.

protected Chunk makeChunk(double[] vals) {
    int nzs = 0;
    int[] nonzeros = new int[vals.length];
    int j = 0;
    for (double d : vals) if (d != 0)
        nonzeros[nzs++] = j++;
    Key key = Vec.newKey();
    AppendableVec av = new AppendableVec(key);
    NewChunk nv = new NewChunk(av, 0);
    for (double d : vals) {
        if (Double.isNaN(d))
            nv.addNA();
        else if ((long) d == d)
            nv.addNum((long) d, 0);
        else
            nv.addNum(d);
    }
    nv.close(0, null);
    Futures fs = new Futures();
    Vec vec = av.close(fs);
    fs.blockForPending();
    return vec.chunkForChunkIdx(0);
}
Also used : Futures(water.Futures) Key(water.Key)

Aggregations

Futures (water.Futures)34 Vec (water.fvec.Vec)11 Key (water.Key)10 Frame (water.fvec.Frame)7 AppendableVec (water.fvec.AppendableVec)4 NewChunk (water.fvec.NewChunk)4 ValFrame (water.rapids.vals.ValFrame)4 ArrayList (java.util.ArrayList)2 Random (java.util.Random)2 Test (org.junit.Test)2 MRTask (water.MRTask)2 DataInfo (hex.FrameTask.DataInfo)1 DMatrix (hex.la.DMatrix)1 File (java.io.File)1 IOException (java.io.IOException)1 Job (water.Job)1 FrameKeyV3 (water.api.schemas3.KeyV3.FrameKeyV3)1 ByteVec (water.fvec.ByteVec)1 NFSFileVec (water.fvec.NFSFileVec)1 BufferedString (water.parser.BufferedString)1