Search in sources :

Example 11 with ResponseInfo

use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.

the class Http2NetworkClient method sendAndPoll.

@Override
public List<ResponseInfo> sendAndPoll(List<RequestInfo> requestsToSend, Set<Integer> requestsToDrop, int pollTimeoutMs) {
    List<ResponseInfo> readyResponseInfos = new ArrayList<>();
    if (requestsToDrop.size() != 0) {
        logger.warn("Number of requestsToDrop: {}", requestsToDrop.size());
        http2ClientMetrics.http2RequestsToDropCount.inc(requestsToDrop.size());
        for (int correlationId : requestsToDrop) {
            Channel streamChannel = correlationIdInFlightToChannelMap.remove(correlationId);
            if (streamChannel != null) {
                logger.warn("Drop request on streamChannel: {}", streamChannel);
                // Drop request just generates a ResponseInfo with TimeoutError to router.
                // The stream is still transmitting, but router will ignore ResponseInfo with the same correlationId.
                // We need stream reset to cancel the stream in transmitting.
                RequestInfo requestInfo = streamChannel.attr(Http2NetworkClient.REQUEST_INFO).get();
                if (requestInfo != null) {
                    readyResponseInfos.add(new ResponseInfo(requestInfo, NetworkClientErrorCode.TimeoutError, null));
                }
            }
        }
    }
    long sendStartTime = System.currentTimeMillis();
    // Send request
    http2ClientMetrics.http2ClientSendRate.mark(requestsToSend.size());
    for (RequestInfo requestInfo : requestsToSend) {
        long streamInitiateTime = System.currentTimeMillis();
        long waitingTime = streamInitiateTime - requestInfo.getRequestCreateTime();
        http2ClientMetrics.requestToNetworkClientLatencyMs.update(waitingTime);
        this.pools.get(InetSocketAddress.createUnresolved(requestInfo.getHost(), requestInfo.getPort().getPort())).acquire().addListener((GenericFutureListener<Future<Channel>>) future -> {
            if (future.isSuccess()) {
                http2ClientMetrics.http2StreamAcquireTime.update(System.currentTimeMillis() - streamInitiateTime);
                long streamAcquiredTime = System.currentTimeMillis();
                Channel streamChannel = future.getNow();
                correlationIdInFlightToChannelMap.put(requestInfo.getRequest().getCorrelationId(), streamChannel);
                streamChannel.attr(REQUEST_INFO).set(requestInfo);
                if (!streamChannel.isWritable() || !streamChannel.parent().isWritable()) {
                    http2ClientMetrics.http2StreamNotWritableCount.inc();
                    logger.debug("Stream {} {} not writable. BytesBeforeWritable {} {}", streamChannel.hashCode(), streamChannel, streamChannel.bytesBeforeWritable(), streamChannel.parent().bytesBeforeWritable());
                }
                streamChannel.writeAndFlush(requestInfo.getRequest()).addListener(new ChannelFutureListener() {

                    @Override
                    public void operationComplete(ChannelFuture future) throws Exception {
                        if (future.isSuccess()) {
                            long writeAndFlushUsedTime = System.currentTimeMillis() - streamAcquiredTime;
                            http2ClientMetrics.http2StreamWriteAndFlushTime.update(writeAndFlushUsedTime);
                            requestInfo.setStreamSendTime(System.currentTimeMillis());
                            if (writeAndFlushUsedTime > http2ClientConfig.http2WriteAndFlushTimeoutMs) {
                                logger.debug("WriteAndFlush exceeds http2RequestTimeoutMs {}ms, used time: {}ms, stream channel {}", http2ClientConfig.http2WriteAndFlushTimeoutMs, writeAndFlushUsedTime, streamChannel);
                                if (http2ClientConfig.http2DropRequestOnWriteAndFlushTimeout) {
                                    RequestInfo requestInfoFromChannelAttr = releaseAndCloseStreamChannel(streamChannel);
                                    if (requestInfoFromChannelAttr != null) {
                                        http2ClientResponseHandler.getResponseInfoQueue().put(new ResponseInfo(requestInfo, NetworkClientErrorCode.NetworkError, null));
                                    }
                                }
                            }
                        } else {
                            http2ClientMetrics.http2StreamWriteAndFlushErrorCount.inc();
                            logger.warn("Stream {} {} writeAndFlush fail. Cause: {}", streamChannel.hashCode(), streamChannel, future.cause().toString());
                            RequestInfo requestInfoFromChannelAttr = releaseAndCloseStreamChannel(streamChannel);
                            if (requestInfoFromChannelAttr != null) {
                                http2ClientResponseHandler.getResponseInfoQueue().put(new ResponseInfo(requestInfoFromChannelAttr, NetworkClientErrorCode.NetworkError, null));
                            }
                        }
                    }
                });
            } else {
                logger.error("Couldn't acquire stream channel to {}:{} . Cause:", requestInfo.getHost(), requestInfo.getPort().getPort(), future.cause());
                requestInfo.getRequest().release();
                http2ClientResponseHandler.getResponseInfoQueue().put(new ResponseInfo(requestInfo, NetworkClientErrorCode.NetworkError, null));
            }
        });
    }
    http2ClientMetrics.http2ClientSendTime.update(System.currentTimeMillis() - sendStartTime);
    http2ClientResponseHandler.getResponseInfoQueue().poll(readyResponseInfos, pollTimeoutMs);
    for (ResponseInfo responseInfo : readyResponseInfos) {
        correlationIdInFlightToChannelMap.remove(responseInfo.getRequestInfo().getRequest().getCorrelationId());
    }
    http2ClientMetrics.http2ClientSendRate.mark(readyResponseInfos.size());
    http2ClientMetrics.http2ClientSendAndPollTime.update(System.currentTimeMillis() - sendStartTime);
    return readyResponseInfos;
}
Also used : ResponseInfo(com.github.ambry.network.ResponseInfo) ResponseInfo(com.github.ambry.network.ResponseInfo) AttributeKey(io.netty.util.AttributeKey) Http2ClientConfig(com.github.ambry.config.Http2ClientConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) LoggerFactory(org.slf4j.LoggerFactory) ArrayList(java.util.ArrayList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NetworkClientErrorCode(com.github.ambry.network.NetworkClientErrorCode) Http2StreamFrameToHttpObjectCodec(io.netty.handler.codec.http2.Http2StreamFrameToHttpObjectCodec) ChannelFutureListener(io.netty.channel.ChannelFutureListener) Map(java.util.Map) Http2Utils(com.github.ambry.network.http2.Http2Utils) EventLoopGroup(io.netty.channel.EventLoopGroup) Logger(org.slf4j.Logger) SSLFactory(com.github.ambry.commons.SSLFactory) ChannelInitializer(io.netty.channel.ChannelInitializer) NetworkClient(com.github.ambry.network.NetworkClient) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) GenericFutureListener(io.netty.util.concurrent.GenericFutureListener) Set(java.util.Set) InetSocketAddress(java.net.InetSocketAddress) RequestInfo(com.github.ambry.network.RequestInfo) ChannelFuture(io.netty.channel.ChannelFuture) Channel(io.netty.channel.Channel) List(java.util.List) ChannelPool(io.netty.channel.pool.ChannelPool) ChannelPoolMap(io.netty.channel.pool.ChannelPoolMap) Future(io.netty.util.concurrent.Future) HttpObjectAggregator(io.netty.handler.codec.http.HttpObjectAggregator) ChannelFuture(io.netty.channel.ChannelFuture) Channel(io.netty.channel.Channel) ArrayList(java.util.ArrayList) ChannelFuture(io.netty.channel.ChannelFuture) Future(io.netty.util.concurrent.Future) RequestInfo(com.github.ambry.network.RequestInfo) ChannelFutureListener(io.netty.channel.ChannelFutureListener)

Example 12 with ResponseInfo

use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.

the class Http2NetworkClient method warmUpConnections.

@Override
public int warmUpConnections(List<DataNodeId> dataNodeIds, int connectionWarmUpPercentagePerDataNode, long timeForWarmUp, List<ResponseInfo> responseInfoList) {
    long startTime = System.currentTimeMillis();
    AtomicInteger successCount = new AtomicInteger();
    AtomicInteger failCount = new AtomicInteger();
    int warmUpConnectionPerPort = http2ClientConfig.http2MinConnectionPerPort * connectionWarmUpPercentagePerDataNode / 100;
    int expectedConnections = dataNodeIds.size() * warmUpConnectionPerPort;
    for (DataNodeId dataNodeId : dataNodeIds) {
        for (int i = 0; i < warmUpConnectionPerPort; i++) {
            this.pools.get(InetSocketAddress.createUnresolved(dataNodeId.getHostname(), dataNodeId.getHttp2Port())).acquire().addListener((GenericFutureListener<Future<Channel>>) future -> {
                if (future.isSuccess()) {
                    Channel streamChannel = future.getNow();
                    releaseAndCloseStreamChannel(streamChannel);
                    successCount.incrementAndGet();
                } else {
                    failCount.incrementAndGet();
                    responseInfoList.add(new ResponseInfo(null, NetworkClientErrorCode.NetworkError, null, dataNodeId));
                    logger.error("Couldn't acquire stream channel to {}:{} . Cause: {}.", dataNodeId.getHostname(), dataNodeId.getHttp2Port(), future.cause());
                }
            });
        }
    }
    while (System.currentTimeMillis() - startTime < timeForWarmUp) {
        if (successCount.get() + failCount.get() == expectedConnections) {
            break;
        } else {
            try {
                Thread.sleep(300);
            } catch (InterruptedException e) {
                break;
            }
        }
    }
    logger.info("HTTP2 connection warm up done. Tried: {}, Succeeded: {}, Failed: {}, Time elapsed: {} ms", expectedConnections, successCount, failCount, System.currentTimeMillis() - startTime);
    return successCount.get();
}
Also used : ResponseInfo(com.github.ambry.network.ResponseInfo) AttributeKey(io.netty.util.AttributeKey) Http2ClientConfig(com.github.ambry.config.Http2ClientConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) LoggerFactory(org.slf4j.LoggerFactory) ArrayList(java.util.ArrayList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NetworkClientErrorCode(com.github.ambry.network.NetworkClientErrorCode) Http2StreamFrameToHttpObjectCodec(io.netty.handler.codec.http2.Http2StreamFrameToHttpObjectCodec) ChannelFutureListener(io.netty.channel.ChannelFutureListener) Map(java.util.Map) Http2Utils(com.github.ambry.network.http2.Http2Utils) EventLoopGroup(io.netty.channel.EventLoopGroup) Logger(org.slf4j.Logger) SSLFactory(com.github.ambry.commons.SSLFactory) ChannelInitializer(io.netty.channel.ChannelInitializer) NetworkClient(com.github.ambry.network.NetworkClient) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) GenericFutureListener(io.netty.util.concurrent.GenericFutureListener) Set(java.util.Set) InetSocketAddress(java.net.InetSocketAddress) RequestInfo(com.github.ambry.network.RequestInfo) ChannelFuture(io.netty.channel.ChannelFuture) Channel(io.netty.channel.Channel) List(java.util.List) ChannelPool(io.netty.channel.pool.ChannelPool) ChannelPoolMap(io.netty.channel.pool.ChannelPoolMap) Future(io.netty.util.concurrent.Future) HttpObjectAggregator(io.netty.handler.codec.http.HttpObjectAggregator) ResponseInfo(com.github.ambry.network.ResponseInfo) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Channel(io.netty.channel.Channel) ChannelFuture(io.netty.channel.ChannelFuture) Future(io.netty.util.concurrent.Future) DataNodeId(com.github.ambry.clustermap.DataNodeId)

Example 13 with ResponseInfo

use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.

the class NonBlockingRouterTest method testResponseWithNullRequestInfo.

/**
 * Test the case where request is timed out in the pending queue and network client returns response with null requestInfo
 * to mark node down via response handler.
 * @throws Exception
 */
@Test
public void testResponseWithNullRequestInfo() throws Exception {
    NonBlockingRouter testRouter = null;
    try {
        Properties props = getNonBlockingRouterProperties("DC1");
        VerifiableProperties verifiableProperties = new VerifiableProperties((props));
        RouterConfig routerConfig = new RouterConfig(verifiableProperties);
        routerMetrics = new NonBlockingRouterMetrics(mockClusterMap, routerConfig);
        NetworkClient mockNetworkClient = Mockito.mock(NetworkClient.class);
        Mockito.when(mockNetworkClient.warmUpConnections(anyList(), anyInt(), anyLong(), anyList())).thenReturn(1);
        doNothing().when(mockNetworkClient).close();
        List<ResponseInfo> responseInfoList = new ArrayList<>();
        MockDataNodeId testDataNode = (MockDataNodeId) mockClusterMap.getDataNodeIds().get(0);
        responseInfoList.add(new ResponseInfo(null, NetworkClientErrorCode.NetworkError, null, testDataNode));
        // By default, there are 1 operation controller and 1 background deleter thread. We set CountDownLatch to 3 so that
        // at least one thread has completed calling onResponse() and test node's state has been updated in ResponseHandler
        CountDownLatch invocationLatch = new CountDownLatch(3);
        doAnswer(invocation -> {
            invocationLatch.countDown();
            return responseInfoList;
        }).when(mockNetworkClient).sendAndPoll(anyList(), anySet(), anyInt());
        NetworkClientFactory networkClientFactory = Mockito.mock(NetworkClientFactory.class);
        Mockito.when(networkClientFactory.getNetworkClient()).thenReturn(mockNetworkClient);
        testRouter = new NonBlockingRouter(routerConfig, routerMetrics, networkClientFactory, new LoggingNotificationSystem(), mockClusterMap, kms, cryptoService, cryptoJobHandler, accountService, mockTime, MockClusterMap.DEFAULT_PARTITION_CLASS);
        assertTrue("Invocation latch didn't count to 0 within 10 seconds", invocationLatch.await(10, TimeUnit.SECONDS));
        // verify the test node is considered timeout
        assertTrue("The node should be considered timeout", testDataNode.isTimedOut());
    } finally {
        if (testRouter != null) {
            testRouter.close();
        }
    }
}
Also used : ResponseInfo(com.github.ambry.network.ResponseInfo) VerifiableProperties(com.github.ambry.config.VerifiableProperties) NetworkClientFactory(com.github.ambry.network.NetworkClientFactory) ArrayList(java.util.ArrayList) BlobProperties(com.github.ambry.messageformat.BlobProperties) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) CountDownLatch(java.util.concurrent.CountDownLatch) RouterConfig(com.github.ambry.config.RouterConfig) SocketNetworkClient(com.github.ambry.network.SocketNetworkClient) NetworkClient(com.github.ambry.network.NetworkClient) LoggingNotificationSystem(com.github.ambry.commons.LoggingNotificationSystem) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) Test(org.junit.Test)

Example 14 with ResponseInfo

use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.

the class NonBlockingRouterTestBase method testFailureDetectorNotification.

/**
 * Test that failure detector is correctly notified for all responses regardless of the order in which successful
 * and failed responses arrive.
 * @param opHelper the {@link OperationHelper}
 * @param networkClient the {@link SocketNetworkClient}
 * @param failedReplicaIds the list that will contain all the replicas for which failure was notified.
 * @param blobId the id of the blob to get/delete. For puts, this will be null.
 * @param successfulResponseCount the AtomicInteger that will contain the count of replicas for which success was
 *                                notified.
 * @param invalidResponse the AtomicBoolean that will contain whether an unexpected failure was notified.
 * @param indexToFail if greater than 0, the index representing which response for which failure is to be simulated.
 *                    For example, if index is 0, then the first response will be failed.
 *                    If the index is -1, no responses will be failed, and successful responses will be returned to
 *                    the operation managers.
 */
protected void testFailureDetectorNotification(OperationHelper opHelper, SocketNetworkClient networkClient, List<ReplicaId> failedReplicaIds, BlobId blobId, AtomicInteger successfulResponseCount, AtomicBoolean invalidResponse, int indexToFail) throws Exception {
    failedReplicaIds.clear();
    successfulResponseCount.set(0);
    invalidResponse.set(false);
    mockSelectorState.set(MockSelectorState.Good);
    FutureResult futureResult = opHelper.submitOperation(blobId);
    int requestParallelism = opHelper.requestParallelism;
    List<RequestInfo> allRequests = new ArrayList<>();
    Set<Integer> allDropped = new HashSet<>();
    long loopStartTimeMs = SystemTime.getInstance().milliseconds();
    while (allRequests.size() < requestParallelism) {
        if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
            Assert.fail("Waited too long for requests.");
        }
        opHelper.pollOpManager(allRequests, allDropped);
    }
    ReplicaId replicaIdToFail = indexToFail == -1 ? null : allRequests.get(indexToFail).getReplicaId();
    for (RequestInfo requestInfo : allRequests) {
        ResponseInfo responseInfo;
        if (replicaIdToFail != null && replicaIdToFail.equals(requestInfo.getReplicaId())) {
            responseInfo = new ResponseInfo(requestInfo, NetworkClientErrorCode.NetworkError, null);
            requestInfo.getRequest().release();
        } else {
            List<RequestInfo> requestInfoListToSend = new ArrayList<>();
            requestInfoListToSend.add(requestInfo);
            List<ResponseInfo> responseInfoList;
            loopStartTimeMs = SystemTime.getInstance().milliseconds();
            do {
                if (loopStartTimeMs + AWAIT_TIMEOUT_MS < SystemTime.getInstance().milliseconds()) {
                    Assert.fail("Waited too long for the response.");
                }
                responseInfoList = networkClient.sendAndPoll(requestInfoListToSend, Collections.emptySet(), 10);
                requestInfoListToSend.clear();
            } while (responseInfoList.size() == 0);
            responseInfo = responseInfoList.get(0);
        }
        opHelper.handleResponse(responseInfo);
        responseInfo.release();
    }
    // Poll once again so that the operation gets a chance to complete.
    allRequests.clear();
    if (testEncryption) {
        opHelper.awaitOpCompletionOrTimeOut(futureResult);
    } else {
        opHelper.pollOpManager(allRequests, allDropped);
    }
    futureResult.get(AWAIT_TIMEOUT_MS, TimeUnit.MILLISECONDS);
    Assert.assertEquals(0, allDropped.size());
    if (indexToFail == -1) {
        Assert.assertEquals("Successful notification should have arrived for replicas that were up", opHelper.requestParallelism, successfulResponseCount.get());
        Assert.assertEquals("Failure detector should not have been notified", 0, failedReplicaIds.size());
        Assert.assertFalse("There should be no notifications of any other kind", invalidResponse.get());
    } else {
        Assert.assertEquals("Failure detector should have been notified", 1, failedReplicaIds.size());
        Assert.assertEquals("Failed notification should have arrived for the failed replica", replicaIdToFail, failedReplicaIds.get(0));
        Assert.assertEquals("Successful notification should have arrived for replicas that were up", opHelper.requestParallelism - 1, successfulResponseCount.get());
        Assert.assertFalse("There should be no notifications of any other kind", invalidResponse.get());
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ResponseInfo(com.github.ambry.network.ResponseInfo) ArrayList(java.util.ArrayList) RequestInfo(com.github.ambry.network.RequestInfo) ReplicaId(com.github.ambry.clustermap.ReplicaId) HashSet(java.util.HashSet)

Example 15 with ResponseInfo

use of com.github.ambry.network.ResponseInfo in project ambry by linkedin.

the class GetBlobOperationTest method testRequestTimeoutAndBlobNotFoundLocalTimeout.

/**
 * Test the case where 2 local replicas timed out. The remaining one local replica and rest remote replicas respond
 * with Blob_Not_Found.
 * @throws Exception
 */
@Test
public void testRequestTimeoutAndBlobNotFoundLocalTimeout() throws Exception {
    assumeTrue(operationTrackerType.equals(AdaptiveOperationTracker.class.getSimpleName()));
    doPut();
    GetBlobOperation op = createOperation(routerConfig, null);
    AdaptiveOperationTracker tracker = (AdaptiveOperationTracker) op.getFirstChunkOperationTrackerInUse();
    correlationIdToGetOperation.clear();
    for (MockServer server : mockServerLayout.getMockServers()) {
        server.setServerErrorForAllRequests(ServerErrorCode.Blob_Not_Found);
    }
    op.poll(requestRegistrationCallback);
    time.sleep(routerConfig.routerRequestTimeoutMs + 1);
    // The request should have response from one local replica and all remote replicas.
    while (!op.isOperationComplete()) {
        op.poll(requestRegistrationCallback);
        List<ResponseInfo> responses = sendAndWaitForResponses(requestRegistrationCallback.getRequestsToSend());
        for (ResponseInfo responseInfo : responses) {
            GetResponse getResponse = responseInfo.getError() == null ? GetResponse.readFrom(new NettyByteBufDataInputStream(responseInfo.content()), mockClusterMap) : null;
            op.handleResponse(responseInfo, getResponse);
            responseInfo.release();
        }
    }
    RouterException routerException = (RouterException) op.getOperationException();
    // error code should be OperationTimedOut because it precedes BlobDoesNotExist
    Assert.assertEquals(RouterErrorCode.OperationTimedOut, routerException.getErrorCode());
    Histogram localColoTracker = tracker.getLatencyHistogram(RouterTestHelpers.getAnyReplica(blobId, true, localDcName));
    Histogram crossColoTracker = tracker.getLatencyHistogram(RouterTestHelpers.getAnyReplica(blobId, false, localDcName));
    // the count of data points in local colo Histogram should be 1, because first 2 request timed out
    Assert.assertEquals("The number of data points in local colo latency histogram is not expected", 1, localColoTracker.getCount());
    // the count of data points in cross colo Histogram should be 6 because all remote replicas respond with proper error code
    Assert.assertEquals("The number of data points in cross colo latency histogram is not expected", 6, crossColoTracker.getCount());
}
Also used : ResponseInfo(com.github.ambry.network.ResponseInfo) NettyByteBufDataInputStream(com.github.ambry.utils.NettyByteBufDataInputStream) Histogram(com.codahale.metrics.Histogram) GetResponse(com.github.ambry.protocol.GetResponse) PutManagerTest(com.github.ambry.router.PutManagerTest) Test(org.junit.Test)

Aggregations

ResponseInfo (com.github.ambry.network.ResponseInfo)44 RequestInfo (com.github.ambry.network.RequestInfo)33 ArrayList (java.util.ArrayList)25 Test (org.junit.Test)18 GetResponse (com.github.ambry.protocol.GetResponse)17 NettyByteBufDataInputStream (com.github.ambry.utils.NettyByteBufDataInputStream)17 PartitionRequestInfo (com.github.ambry.protocol.PartitionRequestInfo)12 BlobProperties (com.github.ambry.messageformat.BlobProperties)9 PutResponse (com.github.ambry.protocol.PutResponse)9 DataInputStream (java.io.DataInputStream)9 BlobId (com.github.ambry.commons.BlobId)8 LoggingNotificationSystem (com.github.ambry.commons.LoggingNotificationSystem)6 PartitionResponseInfo (com.github.ambry.protocol.PartitionResponseInfo)6 ReplicaId (com.github.ambry.clustermap.ReplicaId)5 VerifiableProperties (com.github.ambry.config.VerifiableProperties)5 AdminRequest (com.github.ambry.protocol.AdminRequest)5 BlobStoreControlAdminRequest (com.github.ambry.protocol.BlobStoreControlAdminRequest)5 CatchupStatusAdminRequest (com.github.ambry.protocol.CatchupStatusAdminRequest)5 CatchupStatusAdminResponse (com.github.ambry.protocol.CatchupStatusAdminResponse)5 ReplicationControlAdminRequest (com.github.ambry.protocol.ReplicationControlAdminRequest)5