use of water.HeartBeat in project h2o-3 by h2oai.
the class ClusterService method nodes.
@Override
public void nodes(Empty request, StreamObserver<NodesInfo> responseObserver) {
try {
NodesInfo.Builder nb = NodesInfo.newBuilder();
for (H2ONode node : H2O.CLOUD.members()) {
HeartBeat heartBeat = node._heartbeat;
NodeInfo.Builder nib = NodeInfo.newBuilder().setAddress(node.getIpPortString()).setIsHealthy(node.isHealthy()).setPid(heartBeat._pid).setNumCpus(heartBeat._num_cpus).setNumCpusAllowed(heartBeat._cpus_allowed).setSysLoad(heartBeat._system_load_average).setMyCpuPercentage(Float.NaN).setSysCpuPercentage(Float.NaN).setGflops(heartBeat._gflops).setMemoryBandwidth(heartBeat._membw).setMemoryData(heartBeat.get_kv_mem()).setMemoryJava(heartBeat.get_pojo_mem()).setMemoryFree(heartBeat.get_free_mem()).setNumKeys(heartBeat._keys).setNumThreads(heartBeat._nthreads).setNumActiveRpcs(heartBeat._rpcs).setNumOpenTcps(heartBeat._tcps_active).setNumOpenFds(heartBeat._process_num_open_fds);
int nValidPriorities = 0;
for (int priority = 0; priority <= H2O.MAX_PRIORITY; priority++) {
short fjq = heartBeat._fjqueue[priority];
short fjt = heartBeat._fjthrds[priority];
assert (fjq == -1) == (fjt == -1) : "fjqueue and fjthreads are out of sync at priority " + priority;
if (fjq >= 0) {
nib.addFjQueueCount(fjq);
nib.addFjThreadCount(fjt);
nValidPriorities++;
}
}
// surreptitiously without verifying that client code still works
assert nValidPriorities == 8 : "Unexpected number of priority queues";
nb.addNode(nib);
}
responseObserver.onNext(nb.build());
responseObserver.onCompleted();
} catch (Throwable ex) {
GrpcUtils.sendError(ex, responseObserver, NodesInfo.class);
}
}
use of water.HeartBeat in project h2o-3 by h2oai.
the class PCA method checkMemoryFootPrint.
@Override
protected void checkMemoryFootPrint() {
// todo: Add to H2O object memory information so we don't have to use heartbeat.
HeartBeat hb = H2O.SELF._heartbeat;
// int numCPUs= H2O.NUMCPUS; // proper way to get number of CPUs.
double p = hex.util.LinearAlgebraUtils.numColsExp(_train, true);
double r = _train.numRows();
boolean useGramSVD = _parms._pca_method == PCAParameters.Method.GramSVD;
boolean usePower = _parms._pca_method == PCAParameters.Method.Power;
long mem_usage = (useGramSVD || usePower) ? (long) (hb._cpus_allowed * p * p * 8 * /*doubles*/
Math.log((double) _train.lastVec().nChunks()) / Math.log(2.)) : //one gram per core
1;
long mem_usage_w = (useGramSVD || usePower) ? (long) (hb._cpus_allowed * r * r * 8 * /*doubles*/
Math.log((double) _train.lastVec().nChunks()) / Math.log(2.)) : 1;
long max_mem = hb.get_free_mem();
if ((mem_usage > max_mem) && (mem_usage_w > max_mem)) {
String msg = "Gram matrices (one per thread) won't fit in the driver node's memory (" + PrettyPrint.bytes(mem_usage) + " > " + PrettyPrint.bytes(max_mem) + ") - try reducing the number of columns and/or the number of categorical factors.";
error("_train", msg);
}
if (mem_usage > max_mem) {
// choose the most memory efficient one
// set to true if wide dataset is detected
_wideDataset = true;
}
}
Aggregations