use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.
the class Http2NetworkClient method sendAndPoll.
@Override
public List<ResponseInfo> sendAndPoll(List<RequestInfo> requestsToSend, Set<Integer> requestsToDrop, int pollTimeoutMs) {
List<ResponseInfo> readyResponseInfos = new ArrayList<>();
if (requestsToDrop.size() != 0) {
logger.warn("Number of requestsToDrop: {}", requestsToDrop.size());
http2ClientMetrics.http2RequestsToDropCount.inc(requestsToDrop.size());
for (int correlationId : requestsToDrop) {
Channel streamChannel = correlationIdInFlightToChannelMap.remove(correlationId);
if (streamChannel != null) {
logger.warn("Drop request on streamChannel: {}", streamChannel);
// Drop request just generates a ResponseInfo with TimeoutError to router.
// The stream is still transmitting, but router will ignore ResponseInfo with the same correlationId.
// We need stream reset to cancel the stream in transmitting.
RequestInfo requestInfo = streamChannel.attr(Http2NetworkClient.REQUEST_INFO).get();
if (requestInfo != null) {
readyResponseInfos.add(new ResponseInfo(requestInfo, NetworkClientErrorCode.TimeoutError, null));
}
}
}
}
long sendStartTime = System.currentTimeMillis();
// Send request
http2ClientMetrics.http2ClientSendRate.mark(requestsToSend.size());
for (RequestInfo requestInfo : requestsToSend) {
long streamInitiateTime = System.currentTimeMillis();
long waitingTime = streamInitiateTime - requestInfo.getRequestCreateTime();
http2ClientMetrics.requestToNetworkClientLatencyMs.update(waitingTime);
this.pools.get(InetSocketAddress.createUnresolved(requestInfo.getHost(), requestInfo.getPort().getPort())).acquire().addListener((GenericFutureListener<Future<Channel>>) future -> {
if (future.isSuccess()) {
http2ClientMetrics.http2StreamAcquireTime.update(System.currentTimeMillis() - streamInitiateTime);
long streamAcquiredTime = System.currentTimeMillis();
Channel streamChannel = future.getNow();
correlationIdInFlightToChannelMap.put(requestInfo.getRequest().getCorrelationId(), streamChannel);
streamChannel.attr(REQUEST_INFO).set(requestInfo);
if (!streamChannel.isWritable() || !streamChannel.parent().isWritable()) {
http2ClientMetrics.http2StreamNotWritableCount.inc();
logger.debug("Stream {} {} not writable. BytesBeforeWritable {} {}", streamChannel.hashCode(), streamChannel, streamChannel.bytesBeforeWritable(), streamChannel.parent().bytesBeforeWritable());
}
streamChannel.writeAndFlush(requestInfo.getRequest()).addListener(new ChannelFutureListener() {
@Override
public void operationComplete(ChannelFuture future) throws Exception {
if (future.isSuccess()) {
long writeAndFlushUsedTime = System.currentTimeMillis() - streamAcquiredTime;
http2ClientMetrics.http2StreamWriteAndFlushTime.update(writeAndFlushUsedTime);
requestInfo.setStreamSendTime(System.currentTimeMillis());
if (writeAndFlushUsedTime > http2ClientConfig.http2WriteAndFlushTimeoutMs) {
logger.debug("WriteAndFlush exceeds http2RequestTimeoutMs {}ms, used time: {}ms, stream channel {}", http2ClientConfig.http2WriteAndFlushTimeoutMs, writeAndFlushUsedTime, streamChannel);
if (http2ClientConfig.http2DropRequestOnWriteAndFlushTimeout) {
RequestInfo requestInfoFromChannelAttr = releaseAndCloseStreamChannel(streamChannel);
if (requestInfoFromChannelAttr != null) {
http2ClientResponseHandler.getResponseInfoQueue().put(new ResponseInfo(requestInfo, NetworkClientErrorCode.NetworkError, null));
}
}
}
} else {
http2ClientMetrics.http2StreamWriteAndFlushErrorCount.inc();
logger.warn("Stream {} {} writeAndFlush fail. Cause: {}", streamChannel.hashCode(), streamChannel, future.cause().toString());
RequestInfo requestInfoFromChannelAttr = releaseAndCloseStreamChannel(streamChannel);
if (requestInfoFromChannelAttr != null) {
http2ClientResponseHandler.getResponseInfoQueue().put(new ResponseInfo(requestInfoFromChannelAttr, NetworkClientErrorCode.NetworkError, null));
}
}
}
});
} else {
logger.error("Couldn't acquire stream channel to {}:{} . Cause:", requestInfo.getHost(), requestInfo.getPort().getPort(), future.cause());
requestInfo.getRequest().release();
http2ClientResponseHandler.getResponseInfoQueue().put(new ResponseInfo(requestInfo, NetworkClientErrorCode.NetworkError, null));
}
});
}
http2ClientMetrics.http2ClientSendTime.update(System.currentTimeMillis() - sendStartTime);
http2ClientResponseHandler.getResponseInfoQueue().poll(readyResponseInfos, pollTimeoutMs);
for (ResponseInfo responseInfo : readyResponseInfos) {
correlationIdInFlightToChannelMap.remove(responseInfo.getRequestInfo().getRequest().getCorrelationId());
}
http2ClientMetrics.http2ClientSendRate.mark(readyResponseInfos.size());
http2ClientMetrics.http2ClientSendAndPollTime.update(System.currentTimeMillis() - sendStartTime);
return readyResponseInfos;
}
use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.
the class Http2NetworkClient method warmUpConnections.
@Override
public int warmUpConnections(List<DataNodeId> dataNodeIds, int connectionWarmUpPercentagePerDataNode, long timeForWarmUp, List<ResponseInfo> responseInfoList) {
long startTime = System.currentTimeMillis();
AtomicInteger successCount = new AtomicInteger();
AtomicInteger failCount = new AtomicInteger();
int warmUpConnectionPerPort = http2ClientConfig.http2MinConnectionPerPort * connectionWarmUpPercentagePerDataNode / 100;
int expectedConnections = dataNodeIds.size() * warmUpConnectionPerPort;
for (DataNodeId dataNodeId : dataNodeIds) {
for (int i = 0; i < warmUpConnectionPerPort; i++) {
this.pools.get(InetSocketAddress.createUnresolved(dataNodeId.getHostname(), dataNodeId.getHttp2Port())).acquire().addListener((GenericFutureListener<Future<Channel>>) future -> {
if (future.isSuccess()) {
Channel streamChannel = future.getNow();
releaseAndCloseStreamChannel(streamChannel);
successCount.incrementAndGet();
} else {
failCount.incrementAndGet();
responseInfoList.add(new ResponseInfo(null, NetworkClientErrorCode.NetworkError, null, dataNodeId));
logger.error("Couldn't acquire stream channel to {}:{} . Cause: {}.", dataNodeId.getHostname(), dataNodeId.getHttp2Port(), future.cause());
}
});
}
}
while (System.currentTimeMillis() - startTime < timeForWarmUp) {
if (successCount.get() + failCount.get() == expectedConnections) {
break;
} else {
try {
Thread.sleep(300);
} catch (InterruptedException e) {
break;
}
}
}
logger.info("HTTP2 connection warm up done. Tried: {}, Succeeded: {}, Failed: {}, Time elapsed: {} ms", expectedConnections, successCount, failCount, System.currentTimeMillis() - startTime);
return successCount.get();
}
use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.
the class NonBlockingRouterTest method testResponseWithNullRequestInfo.
/**
* Test the case where request is timed out in the pending queue and network client returns response with null requestInfo
* to mark node down via response handler.
* @throws Exception
*/
@Test
public void testResponseWithNullRequestInfo() throws Exception {
NonBlockingRouter testRouter = null;
try {
Properties props = getNonBlockingRouterProperties("DC1");
VerifiableProperties verifiableProperties = new VerifiableProperties((props));
RouterConfig routerConfig = new RouterConfig(verifiableProperties);
routerMetrics = new NonBlockingRouterMetrics(mockClusterMap, routerConfig);
NetworkClient mockNetworkClient = Mockito.mock(NetworkClient.class);
Mockito.when(mockNetworkClient.warmUpConnections(anyList(), anyInt(), anyLong(), anyList())).thenReturn(1);
doNothing().when(mockNetworkClient).close();
List<ResponseInfo> responseInfoList = new ArrayList<>();
MockDataNodeId testDataNode = (MockDataNodeId) mockClusterMap.getDataNodeIds().get(0);
responseInfoList.add(new ResponseInfo(null, NetworkClientErrorCode.NetworkError, null, testDataNode));
// By default, there are 1 operation controller and 1 background deleter thread. We set CountDownLatch to 3 so that
// at least one thread has completed calling onResponse() and test node's state has been updated in ResponseHandler
CountDownLatch invocationLatch = new CountDownLatch(3);
doAnswer(invocation -> {
invocationLatch.countDown();
return responseInfoList;
}).when(mockNetworkClient).sendAndPoll(anyList(), anySet(), anyInt());
NetworkClientFactory networkClientFactory = Mockito.mock(NetworkClientFactory.class);
Mockito.when(networkClientFactory.getNetworkClient()).thenReturn(mockNetworkClient);
testRouter = new NonBlockingRouter(routerConfig, routerMetrics, networkClientFactory, new LoggingNotificationSystem(), mockClusterMap, kms, cryptoService, cryptoJobHandler, accountService, mockTime, MockClusterMap.DEFAULT_PARTITION_CLASS);
assertTrue("Invocation latch didn't count to 0 within 10 seconds", invocationLatch.await(10, TimeUnit.SECONDS));
// verify the test node is considered timeout
assertTrue("The node should be considered timeout", testDataNode.isTimedOut());
} finally {
if (testRouter != null) {
testRouter.close();
}
}
}
use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.
the class NonBlockingRouterTestBase method testFailureDetectorNotification.
/**
* Test that failure detector is correctly notified for all responses regardless of the order in which successful
* and failed responses arrive.
* @param opHelper the {@link OperationHelper}
* @param networkClient the {@link SocketNetworkClient}
* @param failedReplicaIds the list that will contain all the replicas for which failure was notified.
* @param blobId the id of the blob to get/delete. For puts, this will be null.
* @param successfulResponseCount the AtomicInteger that will contain the count of replicas for which success was
* notified.
* @param invalidResponse the AtomicBoolean that will contain whether an unexpected failure was notified.
* @param indexToFail if greater than 0, the index representing which response for which failure is to be simulated.
* For example, if index is 0, then the first response will be failed.
* If the index is -1, no responses will be failed, and successful responses will be returned to
* the operation managers.
*/
protected void testFailureDetectorNotification(OperationHelper opHelper, SocketNetworkClient networkClient, List<ReplicaId> failedReplicaIds, BlobId blobId, AtomicInteger successfulResponseCount, AtomicBoolean invalidResponse, int indexToFail) throws Exception {
failedReplicaIds.clear();
successfulResponseCount.set(0);
invalidResponse.set(false);
mockSelectorState.set(MockSelectorState.Good);
FutureResult futureResult = opHelper.submitOperation(blobId);
int requestParallelism = opHelper.requestParallelism;
List<RequestInfo> allRequests = new ArrayList<>();
Set<Integer> allDropped = new HashSet<>();
long loopStartTimeMs = SystemTime.getInstance().milliseconds();
while (allRequests.size() < requestParallelism) {
if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
Assert.fail("Waited too long for requests.");
}
opHelper.pollOpManager(allRequests, allDropped);
}
ReplicaId replicaIdToFail = indexToFail == -1 ? null : allRequests.get(indexToFail).getReplicaId();
for (RequestInfo requestInfo : allRequests) {
ResponseInfo responseInfo;
if (replicaIdToFail != null && replicaIdToFail.equals(requestInfo.getReplicaId())) {
responseInfo = new ResponseInfo(requestInfo, NetworkClientErrorCode.NetworkError, null);
requestInfo.getRequest().release();
} else {
List<RequestInfo> requestInfoListToSend = new ArrayList<>();
requestInfoListToSend.add(requestInfo);
List<ResponseInfo> responseInfoList;
loopStartTimeMs = SystemTime.getInstance().milliseconds();
do {
if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
Assert.fail("Waited too long for the response.");
}
responseInfoList = networkClient.sendAndPoll(requestInfoListToSend, Collections.emptySet(), 10);
requestInfoListToSend.clear();
} while (responseInfoList.size() == 0);
responseInfo = responseInfoList.get(0);
}
opHelper.handleResponse(responseInfo);
responseInfo.release();
}
// Poll once again so that the operation gets a chance to complete.
allRequests.clear();
if (testEncryption) {
opHelper.awaitOpCompletionOrTimeOut(futureResult);
} else {
opHelper.pollOpManager(allRequests, allDropped);
}
futureResult.get(AWAIT_TIMEOUT_MS, TimeUnit.MILLISECONDS);
Assert.assertEquals(0, allDropped.size());
if (indexToFail == -1) {
Assert.assertEquals("Successful notification should have arrived for replicas that were up", opHelper.requestParallelism, successfulResponseCount.get());
Assert.assertEquals("Failure detector should not have been notified", 0, failedReplicaIds.size());
Assert.assertFalse("There should be no notifications of any other kind", invalidResponse.get());
} else {
Assert.assertEquals("Failure detector should have been notified", 1, failedReplicaIds.size());
Assert.assertEquals("Failed notification should have arrived for the failed replica", replicaIdToFail, failedReplicaIds.get(0));
Assert.assertEquals("Successful notification should have arrived for replicas that were up", opHelper.requestParallelism - 1, successfulResponseCount.get());
Assert.assertFalse("There should be no notifications of any other kind", invalidResponse.get());
}
}
use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.
the class GetBlobOperationTest method testRequestTimeoutAndBlobNotFoundLocalTimeout.
/**
* Test the case where 2 local replicas timed out. The remaining one local replica and rest remote replicas respond
* with Blob_Not_Found.
* @throws Exception
*/
@Test
public void testRequestTimeoutAndBlobNotFoundLocalTimeout() throws Exception {
assumeTrue(operationTrackerType.equals(AdaptiveOperationTracker.class.getSimpleName()));
doPut();
GetBlobOperation op = createOperation(routerConfig, null);
AdaptiveOperationTracker tracker = (AdaptiveOperationTracker) op.getFirstChunkOperationTrackerInUse();
correlationIdToGetOperation.clear();
for (MockServer server : mockServerLayout.getMockServers()) {
server.setServerErrorForAllRequests(ServerErrorCode.Blob_Not_Found);
}
op.poll(requestRegistrationCallback);
time.sleep(routerConfig.routerRequestTimeoutMs + 1);
// The request should have response from one local replica and all remote replicas.
while (!op.isOperationComplete()) {
op.poll(requestRegistrationCallback);
List<ResponseInfo> responses = sendAndWaitForResponses(requestRegistrationCallback.getRequestsToSend());
for (ResponseInfo responseInfo : responses) {
GetResponse getResponse = responseInfo.getError() == null ? GetResponse.readFrom(new NettyByteBufDataInputStream(responseInfo.content()), mockClusterMap) : null;
op.handleResponse(responseInfo, getResponse);
responseInfo.release();
}
}
RouterException routerException = (RouterException) op.getOperationException();
// error code should be OperationTimedOut because it precedes BlobDoesNotExist
Assert.assertEquals(RouterErrorCode.OperationTimedOut, routerException.getErrorCode());
Histogram localColoTracker = tracker.getLatencyHistogram(RouterTestHelpers.getAnyReplica(blobId, true, localDcName));
Histogram crossColoTracker = tracker.getLatencyHistogram(RouterTestHelpers.getAnyReplica(blobId, false, localDcName));
// the count of data points in local colo Histogram should be 1, because first 2 request timed out
Assert.assertEquals("The number of data points in local colo latency histogram is not expected", 1, localColoTracker.getCount());
// the count of data points in cross colo Histogram should be 6 because all remote replicas respond with proper error code
Assert.assertEquals("The number of data points in cross colo latency histogram is not expected", 6, crossColoTracker.getCount());
}
Aggregations