use of io.mantisrx.common.network.WorkerEndpoint in project mantis by Netflix.
the class MasterClientWrapper method getAllNonJobMasterEndpoints.
private List<Endpoint> getAllNonJobMasterEndpoints(final String jobId, final Map<Integer, WorkerAssignments> workerAssignments) {
List<Endpoint> endpoints = new ArrayList<>();
int totalWorkers = 0;
for (Map.Entry<Integer, WorkerAssignments> workerAssignment : workerAssignments.entrySet()) {
final Integer stageNum = workerAssignment.getKey();
// skip workers for stage 0
if (stageNum == 0) {
continue;
}
final WorkerAssignments assignments = workerAssignment.getValue();
logger.info("job {} Creating endpoints conx from {} worker assignments for stage {}", jobId, assignments.getHosts().size(), stageNum);
if (logger.isDebugEnabled()) {
logger.debug("stage {} hosts: {}", stageNum, assignments.getHosts());
}
totalWorkers += assignments.getNumWorkers();
for (WorkerHost host : assignments.getHosts().values()) {
final int workerIndex = host.getWorkerIndex();
if (host.getState() == MantisJobState.Started) {
logger.info("job " + jobId + ": creating new endpoint for worker number=" + host.getWorkerNumber() + ", index=" + host.getWorkerIndex() + ", host:port=" + host.getHost() + ":" + host.getPort().get(0));
Endpoint ep = new WorkerEndpoint(getWrappedHost(host.getHost(), host.getWorkerNumber()), host.getPort().get(0), stageNum, host.getMetricsPort(), host.getWorkerIndex(), host.getWorkerNumber(), // completed callback
new Action0() {
@Override
public void call() {
logger.info("job " + jobId + " WorkerIndex " + workerIndex + " completed");
}
}, // error callback
new Action1<Throwable>() {
@Override
public void call(Throwable t1) {
logger.info("job " + jobId + " WorkerIndex " + workerIndex + " failed");
}
});
endpoints.add(ep);
}
}
}
numWorkersSubject.onNext(new JobNumWorkers(jobId, totalWorkers));
return endpoints;
}
use of io.mantisrx.common.network.WorkerEndpoint in project mantis by Netflix.
the class MetricsClientImpl method handleEndpointConnect.
private Observable<T> handleEndpointConnect(EndpointChange ec) {
logger.info("Opening connection to metrics sink at " + ec.toString());
final String unwrappedHost = MasterClientWrapper.getUnwrappedHost(ec.getEndpoint().getHost());
final int metricsPort;
if (ec.getEndpoint() instanceof WorkerEndpoint) {
metricsPort = ((WorkerEndpoint) ec.getEndpoint()).getMetricPort();
} else {
logger.error("endpoint received on Endpoint connect is not a WorkerEndpoint {}, no metrics port to connect to", ec.getEndpoint());
return Observable.empty();
}
WorkerConnection<T> workerConnection = workerConnectionFunc.call(unwrappedHost, metricsPort, new Action1<Boolean>() {
@Override
public void call(Boolean flag) {
updateWorkerConx(flag);
}
}, new Action1<Boolean>() {
@Override
public void call(Boolean flag) {
updateWorkerDataReceivingStatus(flag);
}
}, dataRecvTimeoutSecs);
if (nowClosed.get()) {
// check if closed before adding
try {
workerConnection.close();
} catch (Exception e) {
logger.warn("Error closing worker metrics connection " + workerConnection.getName() + " - " + e.getMessage(), e);
}
return Observable.empty();
}
workerConnections.put(toWorkerConnName(unwrappedHost, metricsPort), workerConnection);
if (nowClosed.get()) {
try {
workerConnection.close();
workerConnections.remove(toWorkerConnName(unwrappedHost, metricsPort));
return Observable.empty();
} catch (Exception e) {
logger.warn("Error closing worker metrics connection - " + e.getMessage());
}
}
return workerConnection.call();
}
use of io.mantisrx.common.network.WorkerEndpoint in project mantis by Netflix.
the class MetricsClientImpl method handleEndpointClose.
private Observable<T> handleEndpointClose(EndpointChange ec) {
logger.info("Closed connection to metrics sink at " + ec.toString());
final String unwrappedHost = MasterClientWrapper.getUnwrappedHost(ec.getEndpoint().getHost());
final int metricsPort;
if (ec.getEndpoint() instanceof WorkerEndpoint) {
metricsPort = ((WorkerEndpoint) ec.getEndpoint()).getMetricPort();
} else {
logger.warn("endpoint received on Endpoint close is not a WorkerEndpoint {}, worker endpoint required for metrics port", ec.getEndpoint());
return Observable.empty();
}
final WorkerConnection<T> removed = workerConnections.remove(toWorkerConnName(unwrappedHost, metricsPort));
if (removed != null) {
try {
removed.close();
} catch (Exception e) {
// shouldn't happen
logger.error("Unexpected exception on closing worker metrics connection: " + e.getMessage(), e);
}
}
return Observable.empty();
}
use of io.mantisrx.common.network.WorkerEndpoint in project mantis by Netflix.
the class MetricsClientImplTest method testMetricConnections.
@Test
public void testMetricConnections() throws InterruptedException, UnsupportedEncodingException, JsonProcessingException {
final String jobId = "test-job-1";
final String testResUsageMetricData = generateMetricJson(MetricStringConstants.RESOURCE_USAGE_METRIC_GROUP);
final String testDropDataMetricData = generateMetricJson(MetricStringConstants.DATA_DROP_METRIC_GROUP);
final int metricsPort = TestSseServerFactory.newServerWithInitialData(testResUsageMetricData);
final AtomicInteger i = new AtomicInteger(0);
final Observable<EndpointChange> workerMetricLocationStream = Observable.interval(1, TimeUnit.SECONDS, Schedulers.io()).map(new Func1<Long, EndpointChange>() {
@Override
public EndpointChange call(Long aLong) {
logger.info("emitting endpointChange");
if (i.getAndIncrement() % 10 == 0) {
return new EndpointChange(EndpointChange.Type.add, new Endpoint("localhost", 31002));
} else {
return new EndpointChange(EndpointChange.Type.add, new WorkerEndpoint("localhost", 31002, 1, metricsPort, 0, 1));
}
}
});
MetricsClientImpl<MantisServerSentEvent> metricsClient = new MetricsClientImpl<>(jobId, new SseWorkerConnectionFunction(true, new Action1<Throwable>() {
@Override
public void call(Throwable throwable) {
logger.error("Metric connection error: " + throwable.getMessage());
try {
Thread.sleep(500);
} catch (InterruptedException ie) {
logger.error("Interrupted waiting for retrying connection");
}
}
}, new SinkParameters.Builder().withParameter("name", MetricStringConstants.RESOURCE_USAGE_METRIC_GROUP).build()), new JobWorkerMetricsLocator() {
@Override
public Observable<EndpointChange> locateWorkerMetricsForJob(String jobId) {
return workerMetricLocationStream;
}
}, Observable.just(1), new Observer<WorkerConnectionsStatus>() {
@Override
public void onCompleted() {
logger.info("got onCompleted in WorkerConnStatus obs");
}
@Override
public void onError(Throwable e) {
logger.info("got onError in WorkerConnStatus obs");
}
@Override
public void onNext(WorkerConnectionsStatus workerConnectionsStatus) {
logger.info("got WorkerConnStatus {}", workerConnectionsStatus);
}
}, 60);
final CountDownLatch latch = new CountDownLatch(1);
final Observable<Observable<MantisServerSentEvent>> results = metricsClient.getResults();
Observable.merge(results).doOnNext(new Action1<MantisServerSentEvent>() {
@Override
public void call(MantisServerSentEvent event) {
logger.info("got event {}", event.getEventAsString());
assertEquals(testResUsageMetricData, event.getEventAsString());
latch.countDown();
}
}).doOnError(new Action1<Throwable>() {
@Override
public void call(Throwable throwable) {
logger.error("got error {}", throwable.getMessage(), throwable);
}
}).doOnCompleted(new Action0() {
@Override
public void call() {
logger.info("onComplete");
}
}).subscribe();
latch.await(30, TimeUnit.SECONDS);
TestSseServerFactory.stopAllRunning();
}
Aggregations