Search in sources :

Example 1 with HeartBeat

use of water.HeartBeat in project h2o-3 by h2oai.

the class ClusterService method nodes.

@Override
public void nodes(Empty request, StreamObserver<NodesInfo> responseObserver) {
    try {
        NodesInfo.Builder nb = NodesInfo.newBuilder();
        for (H2ONode node : H2O.CLOUD.members()) {
            HeartBeat heartBeat = node._heartbeat;
            NodeInfo.Builder nib = NodeInfo.newBuilder().setAddress(node.getIpPortString()).setIsHealthy(node.isHealthy()).setPid(heartBeat._pid).setNumCpus(heartBeat._num_cpus).setNumCpusAllowed(heartBeat._cpus_allowed).setSysLoad(heartBeat._system_load_average).setMyCpuPercentage(Float.NaN).setSysCpuPercentage(Float.NaN).setGflops(heartBeat._gflops).setMemoryBandwidth(heartBeat._membw).setMemoryData(heartBeat.get_kv_mem()).setMemoryJava(heartBeat.get_pojo_mem()).setMemoryFree(heartBeat.get_free_mem()).setNumKeys(heartBeat._keys).setNumThreads(heartBeat._nthreads).setNumActiveRpcs(heartBeat._rpcs).setNumOpenTcps(heartBeat._tcps_active).setNumOpenFds(heartBeat._process_num_open_fds);
            int nValidPriorities = 0;
            for (int priority = 0; priority <= H2O.MAX_PRIORITY; priority++) {
                short fjq = heartBeat._fjqueue[priority];
                short fjt = heartBeat._fjthrds[priority];
                assert (fjq == -1) == (fjt == -1) : "fjqueue and fjthreads are out of sync at priority " + priority;
                if (fjq >= 0) {
                    nib.addFjQueueCount(fjq);
                    nib.addFjThreadCount(fjt);
                    nValidPriorities++;
                }
            }
            // surreptitiously without verifying that client code still works
            assert nValidPriorities == 8 : "Unexpected number of priority queues";
            nb.addNode(nib);
        }
        responseObserver.onNext(nb.build());
        responseObserver.onCompleted();
    } catch (Throwable ex) {
        GrpcUtils.sendError(ex, responseObserver, NodesInfo.class);
    }
}
Also used : HeartBeat(water.HeartBeat) H2ONode(water.H2ONode) PrettyPrint(water.util.PrettyPrint)

Example 2 with HeartBeat

use of water.HeartBeat in project h2o-3 by h2oai.

the class PCA method checkMemoryFootPrint.

@Override
protected void checkMemoryFootPrint() {
    // todo: Add to H2O object memory information so we don't have to use heartbeat.
    HeartBeat hb = H2O.SELF._heartbeat;
    //   int numCPUs= H2O.NUMCPUS;   // proper way to get number of CPUs.
    double p = hex.util.LinearAlgebraUtils.numColsExp(_train, true);
    double r = _train.numRows();
    boolean useGramSVD = _parms._pca_method == PCAParameters.Method.GramSVD;
    boolean usePower = _parms._pca_method == PCAParameters.Method.Power;
    long mem_usage = (useGramSVD || usePower) ? (long) (hb._cpus_allowed * p * p * 8 * /*doubles*/
    Math.log((double) _train.lastVec().nChunks()) / Math.log(2.)) : //one gram per core
    1;
    long mem_usage_w = (useGramSVD || usePower) ? (long) (hb._cpus_allowed * r * r * 8 * /*doubles*/
    Math.log((double) _train.lastVec().nChunks()) / Math.log(2.)) : 1;
    long max_mem = hb.get_free_mem();
    if ((mem_usage > max_mem) && (mem_usage_w > max_mem)) {
        String msg = "Gram matrices (one per thread) won't fit in the driver node's memory (" + PrettyPrint.bytes(mem_usage) + " > " + PrettyPrint.bytes(max_mem) + ") - try reducing the number of columns and/or the number of categorical factors.";
        error("_train", msg);
    }
    if (mem_usage > max_mem) {
        // choose the most memory efficient one
        // set to true if wide dataset is detected
        _wideDataset = true;
    }
}
Also used : HeartBeat(water.HeartBeat)

Aggregations

HeartBeat (water.HeartBeat)2 H2ONode (water.H2ONode)1 PrettyPrint (water.util.PrettyPrint)1