Search in sources :

Example 1 with ConnectedChannel

use of com.github.ambry.network.ConnectedChannel in project ambry by linkedin.

the class ReplicaThread method replicate.

/**
 * Replicas from the given replicas
 * @param replicasToReplicate list of {@link RemoteReplicaInfo} by data node
 */
void replicate(List<List<RemoteReplicaInfo>> replicasToReplicate) {
    // shuffle the nodes
    Collections.shuffle(replicasToReplicate);
    for (List<RemoteReplicaInfo> replicasToReplicatePerNode : replicasToReplicate) {
        if (!running) {
            break;
        }
        DataNodeId remoteNode = replicasToReplicatePerNode.get(0).getReplicaId().getDataNodeId();
        logger.trace("Remote node: {} Thread name: {} Remote replicas: {}", remoteNode, threadName, replicasToReplicatePerNode);
        Timer.Context context = null;
        Timer.Context portTypeBasedContext = null;
        if (replicatingFromRemoteColo) {
            context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            }
        } else {
            context = replicationMetrics.intraColoReplicationLatency.time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
            }
        }
        ConnectedChannel connectedChannel = null;
        long checkoutConnectionTimeInMs = -1;
        long exchangeMetadataTimeInMs = -1;
        long fixMissingStoreKeysTimeInMs = -1;
        long replicationStartTimeInMs = SystemTime.getInstance().milliseconds();
        long startTimeInMs = replicationStartTimeInMs;
        List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<RemoteReplicaInfo>();
        for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
            ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
            if (!replicationDisabledPartitions.contains(replicaId.getPartitionId()) && !replicaId.isDown()) {
                activeReplicasPerNode.add(remoteReplicaInfo);
            }
        }
        if (activeReplicasPerNode.size() > 0) {
            try {
                connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                checkoutConnectionTimeInMs = SystemTime.getInstance().milliseconds() - startTimeInMs;
                startTimeInMs = SystemTime.getInstance().milliseconds();
                List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, activeReplicasPerNode);
                exchangeMetadataTimeInMs = SystemTime.getInstance().milliseconds() - startTimeInMs;
                startTimeInMs = SystemTime.getInstance().milliseconds();
                fixMissingStoreKeys(connectedChannel, activeReplicasPerNode, exchangeMetadataResponseList);
                fixMissingStoreKeysTimeInMs = SystemTime.getInstance().milliseconds() - startTimeInMs;
            } catch (Throwable e) {
                if (checkoutConnectionTimeInMs == -1) {
                    // throwable happened in checkout connection phase
                    checkoutConnectionTimeInMs = SystemTime.getInstance().milliseconds() - startTimeInMs;
                    responseHandler.onEvent(activeReplicasPerNode.get(0).getReplicaId(), e);
                } else if (exchangeMetadataTimeInMs == -1) {
                    // throwable happened in exchange metadata phase
                    exchangeMetadataTimeInMs = SystemTime.getInstance().milliseconds() - startTimeInMs;
                } else if (fixMissingStoreKeysTimeInMs == -1) {
                    // throwable happened in fix missing store phase
                    fixMissingStoreKeysTimeInMs = SystemTime.getInstance().milliseconds() - startTimeInMs;
                }
                logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Active " + "remote replicas: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing store key " + "time {}", remoteNode, threadName, replicasToReplicatePerNode, activeReplicasPerNode, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
                replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
                if (connectedChannel != null) {
                    connectionPool.destroyConnection(connectedChannel);
                    connectedChannel = null;
                }
            } finally {
                long totalReplicationTime = SystemTime.getInstance().milliseconds() - replicationStartTimeInMs;
                replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
                if (connectedChannel != null) {
                    connectionPool.checkInConnection(connectedChannel);
                }
                context.stop();
                portTypeBasedContext.stop();
            }
        }
    }
}
Also used : Timer(com.codahale.metrics.Timer) ArrayList(java.util.ArrayList) ConnectedChannel(com.github.ambry.network.ConnectedChannel) DataNodeId(com.github.ambry.clustermap.DataNodeId) ReplicaId(com.github.ambry.clustermap.ReplicaId)

Example 2 with ConnectedChannel

use of com.github.ambry.network.ConnectedChannel in project ambry by linkedin.

the class ServerReadPerformance method main.

public static void main(String[] args) {
    ConnectionPool connectionPool = null;
    FileWriter writer = null;
    try {
        OptionParser parser = new OptionParser();
        ArgumentAcceptingOptionSpec<String> logToReadOpt = parser.accepts("logToRead", "The log that needs to be replayed for traffic").withRequiredArg().describedAs("log_to_read").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> hardwareLayoutOpt = parser.accepts("hardwareLayout", "The path of the hardware layout file").withRequiredArg().describedAs("hardware_layout").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> partitionLayoutOpt = parser.accepts("partitionLayout", "The path of the partition layout file").withRequiredArg().describedAs("partition_layout").ofType(String.class);
        ArgumentAcceptingOptionSpec<Integer> readsPerSecondOpt = parser.accepts("readsPerSecond", "The rate at which reads need to be performed").withRequiredArg().describedAs("The number of reads per second").ofType(Integer.class).defaultsTo(1000);
        ArgumentAcceptingOptionSpec<Long> measurementIntervalOpt = parser.accepts("measurementInterval", "The interval in second to report performance result").withOptionalArg().describedAs("The CPU time spent for getting blobs, not wall time").ofType(Long.class).defaultsTo(300L);
        ArgumentAcceptingOptionSpec<Boolean> verboseLoggingOpt = parser.accepts("enableVerboseLogging", "Enables verbose logging").withOptionalArg().describedAs("Enable verbose logging").ofType(Boolean.class).defaultsTo(false);
        ArgumentAcceptingOptionSpec<String> sslEnabledDatacentersOpt = parser.accepts("sslEnabledDatacenters", "Datacenters to which ssl should be enabled").withOptionalArg().describedAs("Comma separated list").ofType(String.class).defaultsTo("");
        ArgumentAcceptingOptionSpec<String> sslKeystorePathOpt = parser.accepts("sslKeystorePath", "SSL key store path").withOptionalArg().describedAs("The file path of SSL key store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslKeystoreTypeOpt = parser.accepts("sslKeystoreType", "SSL key store type").withOptionalArg().describedAs("The type of SSL key store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslTruststorePathOpt = parser.accepts("sslTruststorePath", "SSL trust store path").withOptionalArg().describedAs("The file path of SSL trust store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslKeystorePasswordOpt = parser.accepts("sslKeystorePassword", "SSL key store password").withOptionalArg().describedAs("The password of SSL key store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslKeyPasswordOpt = parser.accepts("sslKeyPassword", "SSL key password").withOptionalArg().describedAs("The password of SSL private key").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslTruststorePasswordOpt = parser.accepts("sslTruststorePassword", "SSL trust store password").withOptionalArg().describedAs("The password of SSL trust store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslCipherSuitesOpt = parser.accepts("sslCipherSuites", "SSL enabled cipher suites").withOptionalArg().describedAs("Comma separated list").defaultsTo("TLS_RSA_WITH_AES_128_CBC_SHA").ofType(String.class);
        OptionSet options = parser.parse(args);
        ArrayList<OptionSpec> listOpt = new ArrayList<>();
        listOpt.add(logToReadOpt);
        listOpt.add(hardwareLayoutOpt);
        listOpt.add(partitionLayoutOpt);
        ToolUtils.ensureOrExit(listOpt, options, parser);
        long measurementIntervalNs = options.valueOf(measurementIntervalOpt) * SystemTime.NsPerSec;
        ToolUtils.validateSSLOptions(options, parser, sslEnabledDatacentersOpt, sslKeystorePathOpt, sslKeystoreTypeOpt, sslTruststorePathOpt, sslKeystorePasswordOpt, sslKeyPasswordOpt, sslTruststorePasswordOpt);
        String sslEnabledDatacenters = options.valueOf(sslEnabledDatacentersOpt);
        Properties sslProperties;
        if (sslEnabledDatacenters.length() != 0) {
            sslProperties = ToolUtils.createSSLProperties(sslEnabledDatacenters, options.valueOf(sslKeystorePathOpt), options.valueOf(sslKeystoreTypeOpt), options.valueOf(sslKeystorePasswordOpt), options.valueOf(sslKeyPasswordOpt), options.valueOf(sslTruststorePathOpt), options.valueOf(sslTruststorePasswordOpt), options.valueOf(sslCipherSuitesOpt));
        } else {
            sslProperties = new Properties();
        }
        ToolUtils.addClusterMapProperties(sslProperties);
        String logToRead = options.valueOf(logToReadOpt);
        int readsPerSecond = options.valueOf(readsPerSecondOpt);
        boolean enableVerboseLogging = options.has(verboseLoggingOpt);
        if (enableVerboseLogging) {
            System.out.println("Enabled verbose logging");
        }
        File logFile = new File(System.getProperty("user.dir"), "readperfresult");
        writer = new FileWriter(logFile);
        String hardwareLayoutPath = options.valueOf(hardwareLayoutOpt);
        String partitionLayoutPath = options.valueOf(partitionLayoutOpt);
        ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(sslProperties));
        ClusterMap map = ((ClusterAgentsFactory) Utils.getObj(clusterMapConfig.clusterMapClusterAgentsFactory, clusterMapConfig, hardwareLayoutPath, partitionLayoutPath)).getClusterMap();
        final AtomicLong totalTimeTaken = new AtomicLong(0);
        final AtomicLong totalReads = new AtomicLong(0);
        final AtomicBoolean shutdown = new AtomicBoolean(false);
        // attach shutdown handler to catch control-c
        Runtime.getRuntime().addShutdownHook(new Thread() {

            public void run() {
                try {
                    System.out.println("Shutdown invoked");
                    shutdown.set(true);
                    String message = "Total reads : " + totalReads.get() + "  Total time taken : " + totalTimeTaken.get() + " Nano Seconds  Average time taken per read " + ((double) totalTimeTaken.get()) / SystemTime.NsPerSec / totalReads.get() + " Seconds";
                    System.out.println(message);
                } catch (Exception e) {
                    System.out.println("Error while shutting down " + e);
                }
            }
        });
        final BufferedReader br = new BufferedReader(new FileReader(logToRead));
        Throttler throttler = new Throttler(readsPerSecond, 100, true, SystemTime.getInstance());
        String line;
        ConnectedChannel channel = null;
        ConnectionPoolConfig connectionPoolConfig = new ConnectionPoolConfig(new VerifiableProperties(new Properties()));
        VerifiableProperties vProps = new VerifiableProperties(sslProperties);
        SSLConfig sslConfig = new SSLConfig(vProps);
        clusterMapConfig = new ClusterMapConfig(vProps);
        connectionPool = new BlockingChannelConnectionPool(connectionPoolConfig, sslConfig, clusterMapConfig, new MetricRegistry());
        long totalNumberOfGetBlobs = 0;
        long totalLatencyForGetBlobs = 0;
        ArrayList<Long> latenciesForGetBlobs = new ArrayList<Long>();
        long maxLatencyForGetBlobs = 0;
        long minLatencyForGetBlobs = Long.MAX_VALUE;
        while ((line = br.readLine()) != null) {
            String[] id = line.split("-");
            BlobData blobData = null;
            BlobId blobId = new BlobId(id[1], map);
            ArrayList<BlobId> blobIds = new ArrayList<BlobId>();
            blobIds.add(blobId);
            for (ReplicaId replicaId : blobId.getPartition().getReplicaIds()) {
                long startTimeGetBlob = 0;
                ArrayList<PartitionRequestInfo> partitionRequestInfoList = new ArrayList<PartitionRequestInfo>();
                try {
                    partitionRequestInfoList.clear();
                    PartitionRequestInfo partitionRequestInfo = new PartitionRequestInfo(blobId.getPartition(), blobIds);
                    partitionRequestInfoList.add(partitionRequestInfo);
                    GetRequest getRequest = new GetRequest(1, "getperf", MessageFormatFlags.Blob, partitionRequestInfoList, GetOption.None);
                    Port port = replicaId.getDataNodeId().getPortToConnectTo();
                    channel = connectionPool.checkOutConnection(replicaId.getDataNodeId().getHostname(), port, 10000);
                    startTimeGetBlob = SystemTime.getInstance().nanoseconds();
                    channel.send(getRequest);
                    DataInputStream receiveStream = channel.receive().getInputStream();
                    GetResponse getResponse = GetResponse.readFrom(receiveStream, map);
                    blobData = MessageFormatRecord.deserializeBlob(getResponse.getInputStream());
                    long sizeRead = 0;
                    byte[] outputBuffer = new byte[(int) blobData.getSize()];
                    ByteBufferOutputStream streamOut = new ByteBufferOutputStream(ByteBuffer.wrap(outputBuffer));
                    ByteBuf buffer = blobData.content();
                    try {
                        buffer.readBytes(streamOut, (int) blobData.getSize());
                    } finally {
                        buffer.release();
                    }
                    long latencyPerBlob = SystemTime.getInstance().nanoseconds() - startTimeGetBlob;
                    totalTimeTaken.addAndGet(latencyPerBlob);
                    latenciesForGetBlobs.add(latencyPerBlob);
                    totalReads.incrementAndGet();
                    totalNumberOfGetBlobs++;
                    totalLatencyForGetBlobs += latencyPerBlob;
                    if (enableVerboseLogging) {
                        System.out.println("Time taken to get blob id " + blobId + " in ms " + latencyPerBlob / SystemTime.NsPerMs);
                    }
                    if (latencyPerBlob > maxLatencyForGetBlobs) {
                        maxLatencyForGetBlobs = latencyPerBlob;
                    }
                    if (latencyPerBlob < minLatencyForGetBlobs) {
                        minLatencyForGetBlobs = latencyPerBlob;
                    }
                    if (totalLatencyForGetBlobs >= measurementIntervalNs) {
                        Collections.sort(latenciesForGetBlobs);
                        int index99 = (int) (latenciesForGetBlobs.size() * 0.99) - 1;
                        int index95 = (int) (latenciesForGetBlobs.size() * 0.95) - 1;
                        String message = totalNumberOfGetBlobs + "," + (double) latenciesForGetBlobs.get(index99) / SystemTime.NsPerSec + "," + (double) latenciesForGetBlobs.get(index95) / SystemTime.NsPerSec + "," + ((double) totalLatencyForGetBlobs / SystemTime.NsPerSec / totalNumberOfGetBlobs);
                        System.out.println(message);
                        writer.write(message + "\n");
                        totalLatencyForGetBlobs = 0;
                        latenciesForGetBlobs.clear();
                        totalNumberOfGetBlobs = 0;
                        maxLatencyForGetBlobs = 0;
                        minLatencyForGetBlobs = Long.MAX_VALUE;
                    }
                    partitionRequestInfoList.clear();
                    partitionRequestInfo = new PartitionRequestInfo(blobId.getPartition(), blobIds);
                    partitionRequestInfoList.add(partitionRequestInfo);
                    GetRequest getRequestProperties = new GetRequest(1, "getperf", MessageFormatFlags.BlobProperties, partitionRequestInfoList, GetOption.None);
                    long startTimeGetBlobProperties = SystemTime.getInstance().nanoseconds();
                    channel.send(getRequestProperties);
                    DataInputStream receivePropertyStream = channel.receive().getInputStream();
                    GetResponse getResponseProperty = GetResponse.readFrom(receivePropertyStream, map);
                    BlobProperties blobProperties = MessageFormatRecord.deserializeBlobProperties(getResponseProperty.getInputStream());
                    long endTimeGetBlobProperties = SystemTime.getInstance().nanoseconds() - startTimeGetBlobProperties;
                    partitionRequestInfoList.clear();
                    partitionRequestInfo = new PartitionRequestInfo(blobId.getPartition(), blobIds);
                    partitionRequestInfoList.add(partitionRequestInfo);
                    GetRequest getRequestUserMetadata = new GetRequest(1, "getperf", MessageFormatFlags.BlobUserMetadata, partitionRequestInfoList, GetOption.None);
                    long startTimeGetBlobUserMetadata = SystemTime.getInstance().nanoseconds();
                    channel.send(getRequestUserMetadata);
                    DataInputStream receiveUserMetadataStream = channel.receive().getInputStream();
                    GetResponse getResponseUserMetadata = GetResponse.readFrom(receiveUserMetadataStream, map);
                    ByteBuffer userMetadata = MessageFormatRecord.deserializeUserMetadata(getResponseUserMetadata.getInputStream());
                    long endTimeGetBlobUserMetadata = SystemTime.getInstance().nanoseconds() - startTimeGetBlobUserMetadata;
                    // delete the blob
                    DeleteRequest deleteRequest = new DeleteRequest(0, "perf", blobId, System.currentTimeMillis());
                    channel.send(deleteRequest);
                    DeleteResponse deleteResponse = DeleteResponse.readFrom(channel.receive().getInputStream());
                    if (deleteResponse.getError() != ServerErrorCode.No_Error) {
                        throw new UnexpectedException("error " + deleteResponse.getError());
                    }
                    throttler.maybeThrottle(1);
                } finally {
                    if (channel != null) {
                        connectionPool.checkInConnection(channel);
                        channel = null;
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("Error in server read performance " + e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                System.out.println("Error when closing writer");
            }
        }
        if (connectionPool != null) {
            connectionPool.shutdown();
        }
    }
}
Also used : ClusterMap(com.github.ambry.clustermap.ClusterMap) Port(com.github.ambry.network.Port) ArrayList(java.util.ArrayList) GetRequest(com.github.ambry.protocol.GetRequest) BlobData(com.github.ambry.messageformat.BlobData) ClusterAgentsFactory(com.github.ambry.clustermap.ClusterAgentsFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Throttler(com.github.ambry.utils.Throttler) UnexpectedException(java.rmi.UnexpectedException) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) ConnectedChannel(com.github.ambry.network.ConnectedChannel) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) BlockingChannelConnectionPool(com.github.ambry.network.BlockingChannelConnectionPool) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) DeleteResponse(com.github.ambry.protocol.DeleteResponse) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File) BlockingChannelConnectionPool(com.github.ambry.network.BlockingChannelConnectionPool) ConnectionPool(com.github.ambry.network.ConnectionPool) OptionSpec(joptsimple.OptionSpec) ArgumentAcceptingOptionSpec(joptsimple.ArgumentAcceptingOptionSpec) ConnectionPoolConfig(com.github.ambry.config.ConnectionPoolConfig) FileWriter(java.io.FileWriter) BlobProperties(com.github.ambry.messageformat.BlobProperties) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ByteBuf(io.netty.buffer.ByteBuf) OptionParser(joptsimple.OptionParser) FileReader(java.io.FileReader) SSLConfig(com.github.ambry.config.SSLConfig) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) DataInputStream(java.io.DataInputStream) GetResponse(com.github.ambry.protocol.GetResponse) ByteBuffer(java.nio.ByteBuffer) UnexpectedException(java.rmi.UnexpectedException) ReplicaId(com.github.ambry.clustermap.ReplicaId) AtomicLong(java.util.concurrent.atomic.AtomicLong) ByteBufferOutputStream(com.github.ambry.utils.ByteBufferOutputStream) BlobProperties(com.github.ambry.messageformat.BlobProperties) BufferedReader(java.io.BufferedReader) OptionSet(joptsimple.OptionSet) BlobId(com.github.ambry.commons.BlobId) DeleteRequest(com.github.ambry.protocol.DeleteRequest)

Example 3 with ConnectedChannel

use of com.github.ambry.network.ConnectedChannel in project ambry by linkedin.

the class ReplicaThread method replicate.

/**
 * Do replication for replicas grouped by {@link DataNodeId}
 * A replication cycle between two replicas involves the following steps:
 *    1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
 *    and filter the ones missing in local store.
 *    2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
 *       the local store
 *
 *  During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
 *  from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
 *  leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
 *  standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
 *  exchanges and intra-dc replication.
 *
 *  Here is a table listing on what is exchanged between local and remote replicas based on their roles
 *  (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
 *
 *              |   Local Leader    |     Local Standby   |   Remote Leader   |  Remote Standby
 *            -------------------------------------------------------------------------------------
 *     Leader:  |        ---        |  metadata and data  | metadata and data |   metadata only
 *     Standby: | metadata and data |  metadata and data  | metadata only     |   metadata only
 */
public void replicate() {
    boolean allCaughtUp = true;
    Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
    logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
    for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
        DataNodeId remoteNode = entry.getKey();
        if (!running) {
            break;
        }
        List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
        Timer.Context context = null;
        Timer.Context portTypeBasedContext = null;
        if (replicatingFromRemoteColo) {
            context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            }
        } else {
            context = replicationMetrics.intraColoReplicationLatency.time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
            }
        }
        ConnectedChannel connectedChannel = null;
        long checkoutConnectionTimeInMs = -1;
        long exchangeMetadataTimeInMs = -1;
        long fixMissingStoreKeysTimeInMs = -1;
        long replicationStartTimeInMs = time.milliseconds();
        long startTimeInMs = replicationStartTimeInMs;
        // Get a list of active replicas that needs be included for this replication cycle
        List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
        List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
        for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
            ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
            boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
            if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
                logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
                continue;
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // check if all missing keys for standby replicas from previous replication cycle are now obtained
                // via leader replica. If we still have missing keys, don't include them in current replication cycle
                // to avoid sending duplicate metadata requests since their token wouldn't have advanced.
                processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
                if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
                    standbyReplicasWithNoProgress.add(remoteReplicaInfo);
                    continue;
                }
            }
            activeReplicasPerNode.add(remoteReplicaInfo);
        }
        logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
        // use a variable to track current replica list to replicate (for logging purpose)
        List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
        try {
            if (activeReplicasPerNode.size() > 0) {
                allCaughtUp = false;
                // if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
                // no limit.
                List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
                startTimeInMs = time.milliseconds();
                connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                // we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
                for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
                    exchangeMetadataTimeInMs = -1;
                    fixMissingStoreKeysTimeInMs = -1;
                    currentReplicaList = replicaSubList;
                    logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
                    startTimeInMs = time.milliseconds();
                    List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
                    exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
                    if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                        // If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
                        // only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
                        // Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
                        // missing blobs from their leader pair exchanges and intra-dc replication.
                        List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
                        List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
                        getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
                        replicaSubList = leaderReplicaList;
                        exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
                    }
                    if (replicaSubList.size() > 0) {
                        startTimeInMs = time.milliseconds();
                        fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
                        fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                }
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
                // Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
                // found in metadata exchange and expect them to come via leader <-> leader replication.
                // This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
                // by fetching the missing keys themselves.
                // TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
                // TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
                // This will involve co-ordination between replica threads containing replicas of same partition.
                List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
                if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
                    allCaughtUp = false;
                    currentReplicaList = standbyReplicasTimedOutOnNoProgress;
                    if (connectedChannel == null) {
                        checkoutConnectionTimeInMs = -1;
                        startTimeInMs = time.milliseconds();
                        connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                        checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                    List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
                    // Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
                    // these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
                    List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
                    convertStoreKeys(storeKeysToConvert);
                    exchangeMetadataTimeInMs = 0;
                    fixMissingStoreKeysTimeInMs = -1;
                    logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
                    startTimeInMs = time.milliseconds();
                    fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
                    fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                }
            }
        } catch (Throwable e) {
            if (checkoutConnectionTimeInMs == -1) {
                // throwable happened in checkout connection phase
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
            } else if (exchangeMetadataTimeInMs == -1) {
                // throwable happened in exchange metadata phase
                exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
            } else if (fixMissingStoreKeysTimeInMs == -1) {
                // throwable happened in fix missing store phase
                fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
            }
            logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
            replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
            if (connectedChannel != null) {
                connectionPool.destroyConnection(connectedChannel);
                connectedChannel = null;
            }
        } finally {
            long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
            replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
            if (connectedChannel != null) {
                connectionPool.checkInConnection(connectedChannel);
            }
            context.stop();
            portTypeBasedContext.stop();
        }
    }
    long sleepDurationMs = 0;
    if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
        sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
        idleCount.inc();
    } else if (threadThrottleDurationMs > 0) {
        sleepDurationMs = threadThrottleDurationMs;
        throttleCount.inc();
    }
    if (sleepDurationMs > 0) {
        try {
            long currentTime = time.milliseconds();
            time.sleep(sleepDurationMs);
            logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
        } catch (InterruptedException e) {
            logger.error("Received interrupted exception during throttling", e);
        }
    }
}
Also used : GetOption(com.github.ambry.protocol.GetOption) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) LoggerFactory(org.slf4j.LoggerFactory) MessageFormatWriteSet(com.github.ambry.messageformat.MessageFormatWriteSet) StoreErrorCodes(com.github.ambry.store.StoreErrorCodes) GetResponse(com.github.ambry.protocol.GetResponse) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Counter(com.codahale.metrics.Counter) ReplicaMetadataRequest(com.github.ambry.protocol.ReplicaMetadataRequest) GetRequest(com.github.ambry.protocol.GetRequest) ReplicationConfig(com.github.ambry.config.ReplicationConfig) NotificationSystem(com.github.ambry.notification.NotificationSystem) ReplicaSyncUpManager(com.github.ambry.clustermap.ReplicaSyncUpManager) PartitionResponseInfo(com.github.ambry.protocol.PartitionResponseInfo) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) Collectors(java.util.stream.Collectors) ConnectedChannel(com.github.ambry.network.ConnectedChannel) ReplicaMetadataRequestInfo(com.github.ambry.protocol.ReplicaMetadataRequestInfo) CountDownLatch(java.util.concurrent.CountDownLatch) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) MessageFormatFlags(com.github.ambry.messageformat.MessageFormatFlags) UpdateType(com.github.ambry.notification.UpdateType) Timer(com.codahale.metrics.Timer) MessageSievingInputStream(com.github.ambry.messageformat.MessageSievingInputStream) PartitionId(com.github.ambry.clustermap.PartitionId) BlobId(com.github.ambry.commons.BlobId) ResponseHandler(com.github.ambry.commons.ResponseHandler) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) BlobReplicaSourceType(com.github.ambry.notification.BlobReplicaSourceType) ServerErrorCode(com.github.ambry.server.ServerErrorCode) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) NettyByteBufDataInputStream(com.github.ambry.utils.NettyByteBufDataInputStream) HashSet(java.util.HashSet) Transformer(com.github.ambry.store.Transformer) ChannelOutput(com.github.ambry.network.ChannelOutput) StoreException(com.github.ambry.store.StoreException) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) CloudDataNode(com.github.ambry.clustermap.CloudDataNode) Time(com.github.ambry.utils.Time) ReplicaState(com.github.ambry.clustermap.ReplicaState) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ConnectionPool(com.github.ambry.network.ConnectionPool) ClusterMap(com.github.ambry.clustermap.ClusterMap) IOException(java.io.IOException) Condition(java.util.concurrent.locks.Condition) MessageInfo(com.github.ambry.store.MessageInfo) ReplicaId(com.github.ambry.clustermap.ReplicaId) BlobStore(com.github.ambry.store.BlobStore) Collections(java.util.Collections) ArrayList(java.util.ArrayList) ConnectedChannel(com.github.ambry.network.ConnectedChannel) StoreKey(com.github.ambry.store.StoreKey) ReplicaId(com.github.ambry.clustermap.ReplicaId) Timer(com.codahale.metrics.Timer) List(java.util.List) ArrayList(java.util.ArrayList) DataNodeId(com.github.ambry.clustermap.DataNodeId) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap)

Example 4 with ConnectedChannel

use of com.github.ambry.network.ConnectedChannel in project ambry by linkedin.

the class ServerTestUtil method undeleteCornerCasesTest.

static void undeleteCornerCasesTest(MockCluster cluster, PortType portType, SSLConfig clientSSLConfig1, SSLConfig clientSSLConfig2, SSLConfig clientSSLConfig3, SSLSocketFactory clientSSLSocketFactory1, SSLSocketFactory clientSSLSocketFactory2, SSLSocketFactory clientSSLSocketFactory3, MockNotificationSystem notificationSystem, Properties routerProps, boolean testEncryption) {
    MockClusterMap clusterMap = cluster.getClusterMap();
    byte[] userMetadata = new byte[1000];
    byte[] data = new byte[31870];
    byte[] encryptionKey = new byte[100];
    short accountId = Utils.getRandomShort(TestUtils.RANDOM);
    short containerId = Utils.getRandomShort(TestUtils.RANDOM);
    BlobProperties properties = new BlobProperties(31870, "serviceid1", accountId, containerId, testEncryption, cluster.time.milliseconds());
    TestUtils.RANDOM.nextBytes(userMetadata);
    TestUtils.RANDOM.nextBytes(data);
    if (testEncryption) {
        TestUtils.RANDOM.nextBytes(encryptionKey);
    }
    short blobIdVersion = CommonTestUtils.getCurrentBlobIdVersion();
    Map<String, List<DataNodeId>> dataNodesPerDC = clusterMap.getDataNodes().stream().collect(Collectors.groupingBy(DataNodeId::getDatacenterName));
    Map<String, Pair<SSLConfig, SSLSocketFactory>> sslSettingPerDC = new HashMap<>();
    sslSettingPerDC.put("DC1", new Pair<>(clientSSLConfig1, clientSSLSocketFactory1));
    sslSettingPerDC.put("DC2", new Pair<>(clientSSLConfig2, clientSSLSocketFactory2));
    sslSettingPerDC.put("DC3", new Pair<>(clientSSLConfig3, clientSSLSocketFactory3));
    List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(MockClusterMap.DEFAULT_PARTITION_CLASS);
    DataNodeId dataNodeId = dataNodesPerDC.get("DC1").get(0);
    Router router = null;
    try {
        Properties routerProperties = getRouterProps("DC1");
        routerProperties.putAll(routerProps);
        VerifiableProperties routerVerifiableProps = new VerifiableProperties(routerProperties);
        AccountService accountService = new InMemAccountService(false, true);
        router = new NonBlockingRouterFactory(routerVerifiableProps, clusterMap, new MockNotificationSystem(clusterMap), getSSLFactoryIfRequired(routerVerifiableProps), accountService).getRouter();
        // channels to all datanodes
        List<ConnectedChannel> channels = new ArrayList<>();
        for (Map.Entry<String, List<DataNodeId>> entry : dataNodesPerDC.entrySet()) {
            Pair<SSLConfig, SSLSocketFactory> pair = sslSettingPerDC.get(entry.getKey());
            for (DataNodeId node : entry.getValue()) {
                ConnectedChannel connectedChannel = getBlockingChannelBasedOnPortType(portType, node, pair.getSecond(), pair.getFirst());
                connectedChannel.connect();
                channels.add(connectedChannel);
            }
        }
        // ////////////////////////////////////////////////////
        // Corner case 1: When only one datacenter has delete
        // ////////////////////////////////////////////////////
        BlobId blobId1 = new BlobId(blobIdVersion, BlobId.BlobIdType.NATIVE, clusterMap.getLocalDatacenterId(), properties.getAccountId(), properties.getContainerId(), partitionIds.get(0), false, BlobId.BlobDataType.DATACHUNK);
        ConnectedChannel channel = getBlockingChannelBasedOnPortType(portType, dataNodeId, clientSSLSocketFactory1, clientSSLConfig1);
        channel.connect();
        PutRequest putRequest = new PutRequest(1, "client1", blobId1, properties, ByteBuffer.wrap(userMetadata), Unpooled.wrappedBuffer(data), properties.getBlobSize(), BlobType.DataBlob, testEncryption ? ByteBuffer.wrap(encryptionKey) : null);
        DataInputStream putResponseStream = channel.sendAndReceive(putRequest).getInputStream();
        PutResponse response = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response.getError());
        notificationSystem.awaitBlobCreations(blobId1.toString());
        // Now stop the replications this partition.
        PartitionId partitionId = blobId1.getPartition();
        controlReplicationForPartition(channels, partitionId, false);
        // Now send the delete to two data nodes in the same DC
        List<DataNodeId> toBeDeleteDataNodes = dataNodesPerDC.values().stream().findFirst().get();
        Pair<SSLConfig, SSLSocketFactory> pair = sslSettingPerDC.get(toBeDeleteDataNodes.get(0).getDatacenterName());
        ConnectedChannel channel1 = getBlockingChannelBasedOnPortType(portType, toBeDeleteDataNodes.get(0), pair.getSecond(), pair.getFirst());
        channel1.connect();
        ConnectedChannel channel2 = getBlockingChannelBasedOnPortType(portType, toBeDeleteDataNodes.get(1), pair.getSecond(), pair.getFirst());
        channel2.connect();
        DeleteRequest deleteRequest1 = new DeleteRequest(1, "deleteClient", blobId1, System.currentTimeMillis());
        DataInputStream stream = channel1.sendAndReceive(deleteRequest1).getInputStream();
        DeleteResponse deleteResponse = DeleteResponse.readFrom(stream);
        releaseNettyBufUnderneathStream(stream);
        assertEquals(ServerErrorCode.No_Error, deleteResponse.getError());
        DeleteRequest deleteRequest2 = new DeleteRequest(1, "deleteClient", blobId1, deleteRequest1.getDeletionTimeInMs());
        stream = channel2.sendAndReceive(deleteRequest2).getInputStream();
        deleteResponse = DeleteResponse.readFrom(stream);
        releaseNettyBufUnderneathStream(stream);
        assertEquals(ServerErrorCode.No_Error, deleteResponse.getError());
        // Now send the undelete operation through router, and it should fail because of not deleted error.
        Future<Void> future = router.undeleteBlob(blobId1.toString(), "service");
        try {
            future.get();
            fail("Undelete blob " + blobId1.toString() + " should fail");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof RouterException);
            assertEquals(RouterErrorCode.BlobNotDeleted, ((RouterException) e.getCause()).getErrorCode());
        }
        // Now see if either data node 1 or data node 2 has undelete or not, if so, undelete would replicate. If not,
        // delete would replicate.
        List<PartitionRequestInfo> partitionRequestInfoList = getPartitionRequestInfoListFromBlobId(blobId1);
        boolean hasUndelete = false;
        for (ConnectedChannel connectedChannel : new ConnectedChannel[] { channel1, channel2 }) {
            GetRequest getRequest = new GetRequest(1, "clientId1", MessageFormatFlags.BlobProperties, partitionRequestInfoList, GetOption.Include_All);
            stream = channel1.sendAndReceive(getRequest).getInputStream();
            GetResponse getResponse = GetResponse.readFrom(stream, clusterMap);
            assertEquals(ServerErrorCode.No_Error, getResponse.getPartitionResponseInfoList().get(0).getErrorCode());
            MessageFormatRecord.deserializeBlobProperties(getResponse.getInputStream());
            hasUndelete = getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).getLifeVersion() == (short) 1;
            if (hasUndelete) {
                break;
            }
        }
        releaseNettyBufUnderneathStream(stream);
        // Now restart the replication
        controlReplicationForPartition(channels, partitionId, true);
        if (hasUndelete) {
            notificationSystem.awaitBlobUndeletes(blobId1.toString());
        } else {
            notificationSystem.awaitBlobDeletions(blobId1.toString());
        }
        for (ConnectedChannel connectedChannel : channels) {
            GetRequest getRequest = new GetRequest(1, "clientId1", MessageFormatFlags.BlobProperties, partitionRequestInfoList, GetOption.Include_All);
            stream = connectedChannel.sendAndReceive(getRequest).getInputStream();
            GetResponse getResponse = GetResponse.readFrom(stream, clusterMap);
            releaseNettyBufUnderneathStream(stream);
            assertEquals(ServerErrorCode.No_Error, getResponse.getPartitionResponseInfoList().get(0).getErrorCode());
            MessageFormatRecord.deserializeBlobProperties(getResponse.getInputStream());
            if (hasUndelete) {
                assertEquals((short) 1, getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).getLifeVersion());
                assertTrue(getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).isUndeleted());
                assertFalse(getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).isDeleted());
            } else {
                assertEquals((short) 0, getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).getLifeVersion());
                assertTrue(getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).isDeleted());
                assertFalse(getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).isUndeleted());
            }
        }
        // ///////////////////////////////////////////////////////////
        // Corner case 2: two data nodes have different life versions
        // //////////////////////////////////////////////////////////
        BlobId blobId2 = new BlobId(blobIdVersion, BlobId.BlobIdType.NATIVE, clusterMap.getLocalDatacenterId(), properties.getAccountId(), properties.getContainerId(), partitionIds.get(0), false, BlobId.BlobDataType.DATACHUNK);
        putRequest = new PutRequest(1, "client1", blobId2, properties, ByteBuffer.wrap(userMetadata), Unpooled.wrappedBuffer(data), properties.getBlobSize(), BlobType.DataBlob, testEncryption ? ByteBuffer.wrap(encryptionKey) : null);
        putResponseStream = channel.sendAndReceive(putRequest).getInputStream();
        response = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response.getError());
        notificationSystem.awaitBlobCreations(blobId2.toString());
        // Now delete this blob on all servers.
        DeleteRequest deleteRequest = new DeleteRequest(1, "deleteClient", blobId2, System.currentTimeMillis());
        stream = channel.sendAndReceive(deleteRequest).getInputStream();
        deleteResponse = DeleteResponse.readFrom(stream);
        releaseNettyBufUnderneathStream(stream);
        assertEquals(ServerErrorCode.No_Error, deleteResponse.getError());
        notificationSystem.awaitBlobDeletions(blobId2.toString());
        // Now stop the replication
        partitionId = blobId2.getPartition();
        controlReplicationForPartition(channels, partitionId, false);
        // Now send the undelete to two data nodes in the same DC and then send delete
        UndeleteRequest undeleteRequest = new UndeleteRequest(1, "undeleteClient", blobId2, System.currentTimeMillis());
        stream = channel1.sendAndReceive(undeleteRequest).getInputStream();
        UndeleteResponse undeleteResponse = UndeleteResponse.readFrom(stream);
        releaseNettyBufUnderneathStream(stream);
        assertEquals(ServerErrorCode.No_Error, undeleteResponse.getError());
        assertEquals((short) 1, undeleteResponse.getLifeVersion());
        undeleteRequest = new UndeleteRequest(1, "undeleteClient", blobId2, undeleteRequest.getOperationTimeMs());
        stream = channel2.sendAndReceive(undeleteRequest).getInputStream();
        undeleteResponse = UndeleteResponse.readFrom(stream);
        releaseNettyBufUnderneathStream(stream);
        assertEquals(ServerErrorCode.No_Error, undeleteResponse.getError());
        assertEquals((short) 1, undeleteResponse.getLifeVersion());
        deleteRequest1 = new DeleteRequest(1, "deleteClient", blobId2, System.currentTimeMillis());
        stream = channel1.sendAndReceive(deleteRequest1).getInputStream();
        deleteResponse = DeleteResponse.readFrom(stream);
        releaseNettyBufUnderneathStream(stream);
        assertEquals(ServerErrorCode.No_Error, deleteResponse.getError());
        deleteRequest2 = new DeleteRequest(1, "deleteClient", blobId2, deleteRequest1.getDeletionTimeInMs());
        stream = channel2.sendAndReceive(deleteRequest2).getInputStream();
        deleteResponse = DeleteResponse.readFrom(stream);
        releaseNettyBufUnderneathStream(stream);
        assertEquals(ServerErrorCode.No_Error, deleteResponse.getError());
        // Now send the undelete operation through router, and it should fail because of lifeVersion conflict error.
        future = router.undeleteBlob(blobId2.toString(), "service");
        try {
            future.get();
            fail("Undelete blob " + blobId2.toString() + " should fail");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof RouterException);
            assertEquals(RouterErrorCode.LifeVersionConflict, ((RouterException) e.getCause()).getErrorCode());
        }
        // Now restart the replication
        controlReplicationForPartition(channels, partitionId, true);
        notificationSystem.awaitBlobUndeletes(blobId2.toString());
        // Now after replication is resumed, the undelete of lifeversion 2 will eventually be replicated to all servers.
        partitionRequestInfoList = getPartitionRequestInfoListFromBlobId(blobId2);
        for (ConnectedChannel connectedChannel : channels) {
            // Even if the notificationSystem acknowledged the undelete, it might be triggered by undelete at lifeversion 1.
            // So check in a loop with a time out.
            long deadline = System.currentTimeMillis() + TimeUnit.SECONDS.toMillis(10);
            while (true) {
                GetRequest getRequest = new GetRequest(1, "clientId1", MessageFormatFlags.BlobProperties, partitionRequestInfoList, GetOption.Include_All);
                stream = connectedChannel.sendAndReceive(getRequest).getInputStream();
                GetResponse getResponse = GetResponse.readFrom(stream, clusterMap);
                assertEquals(ServerErrorCode.No_Error, getResponse.getPartitionResponseInfoList().get(0).getErrorCode());
                MessageFormatRecord.deserializeBlobProperties(getResponse.getInputStream());
                if (getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).getLifeVersion() == 2) {
                    assertTrue(getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).isUndeleted());
                    assertFalse(getResponse.getPartitionResponseInfoList().get(0).getMessageInfoList().get(0).isDeleted());
                    break;
                } else {
                    Thread.sleep(1000);
                    if (System.currentTimeMillis() > deadline) {
                        throw new TimeoutException("Fail to get blob " + blobId2 + " at lifeversion 2 at " + connectedChannel.getRemoteHost());
                    }
                }
            }
        }
        releaseNettyBufUnderneathStream(stream);
        for (ConnectedChannel connectedChannel : channels) {
            connectedChannel.disconnect();
        }
        channel1.disconnect();
        channel2.disconnect();
        channel.disconnect();
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    } finally {
        if (router != null) {
            try {
                router.close();
            } catch (Exception e) {
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) UndeleteRequest(com.github.ambry.protocol.UndeleteRequest) UndeleteResponse(com.github.ambry.protocol.UndeleteResponse) InMemAccountService(com.github.ambry.account.InMemAccountService) GetRequest(com.github.ambry.protocol.GetRequest) ArrayList(java.util.ArrayList) List(java.util.List) SSLSocketFactory(javax.net.ssl.SSLSocketFactory) TimeoutException(java.util.concurrent.TimeoutException) VerifiableProperties(com.github.ambry.config.VerifiableProperties) Router(com.github.ambry.router.Router) ConnectedChannel(com.github.ambry.network.ConnectedChannel) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) DeleteResponse(com.github.ambry.protocol.DeleteResponse) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) InMemAccountService(com.github.ambry.account.InMemAccountService) AccountService(com.github.ambry.account.AccountService) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) BlobProperties(com.github.ambry.messageformat.BlobProperties) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) PutResponse(com.github.ambry.protocol.PutResponse) ExecutionException(java.util.concurrent.ExecutionException) Pair(com.github.ambry.utils.Pair) NonBlockingRouterFactory(com.github.ambry.router.NonBlockingRouterFactory) SSLConfig(com.github.ambry.config.SSLConfig) RouterException(com.github.ambry.router.RouterException) PutRequest(com.github.ambry.protocol.PutRequest) NettyByteBufDataInputStream(com.github.ambry.utils.NettyByteBufDataInputStream) DataInputStream(java.io.DataInputStream) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) GetResponse(com.github.ambry.protocol.GetResponse) IOException(java.io.IOException) MessageFormatException(com.github.ambry.messageformat.MessageFormatException) RouterException(com.github.ambry.router.RouterException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) BlobProperties(com.github.ambry.messageformat.BlobProperties) BlobId(com.github.ambry.commons.BlobId) DeleteRequest(com.github.ambry.protocol.DeleteRequest)

Example 5 with ConnectedChannel

use of com.github.ambry.network.ConnectedChannel in project ambry by linkedin.

the class ServerTestUtil method endToEndReplicationWithMultiNodeSinglePartitionTest.

static void endToEndReplicationWithMultiNodeSinglePartitionTest(String routerDatacenter, int interestedDataNodePortNumber, Port dataNode1Port, Port dataNode2Port, Port dataNode3Port, MockCluster cluster, SSLConfig clientSSLConfig1, SSLSocketFactory clientSSLSocketFactory1, MockNotificationSystem notificationSystem, Properties routerProps, boolean testEncryption) {
    // interestedDataNodePortNumber is used to locate the datanode and hence has to be PlainText port
    try {
        // The header size of a LogSegment. This shouldn't be here since it breaks the interface of Log. But to satisfy the test cases
        // we will use this number here.
        // This also means we only have one log segment for this partition. If we put more operations to the partition and it excceeds
        // the log segment capacity, this number will have to be increased.
        int expectedTokenSize = 18;
        MockClusterMap clusterMap = cluster.getClusterMap();
        BlobIdFactory blobIdFactory = new BlobIdFactory(clusterMap);
        ArrayList<BlobProperties> propertyList = new ArrayList<>();
        ArrayList<BlobId> blobIdList = new ArrayList<>();
        ArrayList<byte[]> dataList = new ArrayList<>();
        ArrayList<byte[]> encryptionKeyList = new ArrayList<>();
        byte[] usermetadata = new byte[1000];
        TestUtils.RANDOM.nextBytes(usermetadata);
        PartitionId partition = clusterMap.getWritablePartitionIds(MockClusterMap.DEFAULT_PARTITION_CLASS).get(0);
        for (int i = 0; i < 11; i++) {
            short accountId = Utils.getRandomShort(TestUtils.RANDOM);
            short containerId = Utils.getRandomShort(TestUtils.RANDOM);
            propertyList.add(new BlobProperties(1000, "serviceid1", null, null, false, TestUtils.TTL_SECS, cluster.time.milliseconds(), accountId, containerId, testEncryption, null, null, null));
            blobIdList.add(new BlobId(CommonTestUtils.getCurrentBlobIdVersion(), BlobId.BlobIdType.NATIVE, clusterMap.getLocalDatacenterId(), accountId, containerId, partition, false, BlobId.BlobDataType.DATACHUNK));
            dataList.add(TestUtils.getRandomBytes(1000));
            if (testEncryption) {
                encryptionKeyList.add(TestUtils.getRandomBytes(128));
            } else {
                encryptionKeyList.add(null);
            }
        }
        // put blob 1
        PutRequest putRequest = new PutRequest(1, "client1", blobIdList.get(0), propertyList.get(0), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(0)), propertyList.get(0).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(0) != null ? ByteBuffer.wrap(encryptionKeyList.get(0)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(0), blobIdList.get(0), encryptionKeyList.get(0) != null ? ByteBuffer.wrap(encryptionKeyList.get(0)) : null, ByteBuffer.wrap(usermetadata), dataList.get(0));
        ConnectedChannel channel1 = getBlockingChannelBasedOnPortType(dataNode1Port, "localhost", clientSSLSocketFactory1, clientSSLConfig1);
        ConnectedChannel channel2 = getBlockingChannelBasedOnPortType(dataNode2Port, "localhost", clientSSLSocketFactory1, clientSSLConfig1);
        ConnectedChannel channel3 = getBlockingChannelBasedOnPortType(dataNode3Port, "localhost", clientSSLSocketFactory1, clientSSLConfig1);
        channel1.connect();
        channel2.connect();
        channel3.connect();
        DataInputStream putResponseStream = channel1.sendAndReceive(putRequest).getInputStream();
        PutResponse response = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response.getError());
        // put blob 2
        PutRequest putRequest2 = new PutRequest(1, "client1", blobIdList.get(1), propertyList.get(1), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(1)), propertyList.get(1).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(1) != null ? ByteBuffer.wrap(encryptionKeyList.get(1)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(1), blobIdList.get(1), encryptionKeyList.get(1) != null ? ByteBuffer.wrap(encryptionKeyList.get(1)) : null, ByteBuffer.wrap(usermetadata), dataList.get(1));
        putResponseStream = channel2.sendAndReceive(putRequest2).getInputStream();
        PutResponse response2 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response2.getError());
        // put blob 3
        PutRequest putRequest3 = new PutRequest(1, "client1", blobIdList.get(2), propertyList.get(2), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(2)), propertyList.get(2).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(2) != null ? ByteBuffer.wrap(encryptionKeyList.get(2)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(2), blobIdList.get(2), encryptionKeyList.get(2) != null ? ByteBuffer.wrap(encryptionKeyList.get(2)) : null, ByteBuffer.wrap(usermetadata), dataList.get(2));
        putResponseStream = channel3.sendAndReceive(putRequest3).getInputStream();
        PutResponse response3 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response3.getError());
        // put blob 4
        putRequest = new PutRequest(1, "client1", blobIdList.get(3), propertyList.get(3), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(3)), propertyList.get(3).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(3) != null ? ByteBuffer.wrap(encryptionKeyList.get(3)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(3), blobIdList.get(3), encryptionKeyList.get(3) != null ? ByteBuffer.wrap(encryptionKeyList.get(3)) : null, ByteBuffer.wrap(usermetadata), dataList.get(3));
        putResponseStream = channel1.sendAndReceive(putRequest).getInputStream();
        response = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response.getError());
        // put blob 5
        putRequest2 = new PutRequest(1, "client1", blobIdList.get(4), propertyList.get(4), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(4)), propertyList.get(4).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(4) != null ? ByteBuffer.wrap(encryptionKeyList.get(4)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(4), blobIdList.get(4), encryptionKeyList.get(4) != null ? ByteBuffer.wrap(encryptionKeyList.get(4)) : null, ByteBuffer.wrap(usermetadata), dataList.get(4));
        putResponseStream = channel2.sendAndReceive(putRequest2).getInputStream();
        response2 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response2.getError());
        // put blob 6
        putRequest3 = new PutRequest(1, "client1", blobIdList.get(5), propertyList.get(5), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(5)), propertyList.get(5).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(5) != null ? ByteBuffer.wrap(encryptionKeyList.get(5)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(5), blobIdList.get(5), encryptionKeyList.get(5) != null ? ByteBuffer.wrap(encryptionKeyList.get(5)) : null, ByteBuffer.wrap(usermetadata), dataList.get(5));
        putResponseStream = channel3.sendAndReceive(putRequest3).getInputStream();
        response3 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response3.getError());
        // wait till replication can complete
        notificationSystem.awaitBlobCreations(blobIdList.get(0).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(1).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(2).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(3).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(4).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(5).getID());
        checkTtlUpdateStatus(channel3, clusterMap, blobIdFactory, blobIdList.get(5), dataList.get(5), false, getExpiryTimeMs(propertyList.get(5)));
        updateBlobTtl(channel3, blobIdList.get(5), cluster.time.milliseconds());
        expectedTokenSize += getUpdateRecordSize(blobIdList.get(5), SubRecord.Type.TTL_UPDATE);
        checkTtlUpdateStatus(channel3, clusterMap, blobIdFactory, blobIdList.get(5), dataList.get(5), true, Utils.Infinite_Time);
        notificationSystem.awaitBlobUpdates(blobIdList.get(5).getID(), UpdateType.TTL_UPDATE);
        // get blob properties
        ArrayList<BlobId> ids = new ArrayList<BlobId>();
        MockPartitionId mockPartitionId = (MockPartitionId) clusterMap.getWritablePartitionIds(MockClusterMap.DEFAULT_PARTITION_CLASS).get(0);
        ids.add(blobIdList.get(2));
        ArrayList<PartitionRequestInfo> partitionRequestInfoList = new ArrayList<PartitionRequestInfo>();
        PartitionRequestInfo partitionRequestInfo = new PartitionRequestInfo(mockPartitionId, ids);
        partitionRequestInfoList.add(partitionRequestInfo);
        GetRequest getRequest1 = new GetRequest(1, "clientid2", MessageFormatFlags.BlobProperties, partitionRequestInfoList, GetOption.None);
        DataInputStream stream = channel2.sendAndReceive(getRequest1).getInputStream();
        GetResponse resp1 = GetResponse.readFrom(stream, clusterMap);
        assertEquals(ServerErrorCode.No_Error, resp1.getError());
        assertEquals(ServerErrorCode.No_Error, resp1.getPartitionResponseInfoList().get(0).getErrorCode());
        try {
            BlobProperties propertyOutput = MessageFormatRecord.deserializeBlobProperties(resp1.getInputStream());
            assertEquals(1000, propertyOutput.getBlobSize());
            assertEquals("serviceid1", propertyOutput.getServiceId());
            assertEquals("AccountId mismatch", propertyList.get(2).getAccountId(), propertyOutput.getAccountId());
            assertEquals("ContainerId mismatch", propertyList.get(2).getContainerId(), propertyOutput.getContainerId());
            assertEquals("IsEncrypted mismatch", propertyList.get(2).isEncrypted(), propertyOutput.isEncrypted());
        } catch (MessageFormatException e) {
            fail();
        }
        // get user metadata
        ids.clear();
        ids.add(blobIdList.get(1));
        GetRequest getRequest2 = new GetRequest(1, "clientid2", MessageFormatFlags.BlobUserMetadata, partitionRequestInfoList, GetOption.None);
        stream = channel1.sendAndReceive(getRequest2).getInputStream();
        GetResponse resp2 = GetResponse.readFrom(stream, clusterMap);
        assertEquals(ServerErrorCode.No_Error, resp2.getError());
        assertEquals(ServerErrorCode.No_Error, resp2.getPartitionResponseInfoList().get(0).getErrorCode());
        try {
            ByteBuffer userMetadataOutput = MessageFormatRecord.deserializeUserMetadata(resp2.getInputStream());
            assertArrayEquals(usermetadata, userMetadataOutput.array());
            if (testEncryption) {
                assertNotNull("MessageMetadata should not have been null", resp2.getPartitionResponseInfoList().get(0).getMessageMetadataList().get(0));
                assertArrayEquals("EncryptionKey mismatch", encryptionKeyList.get(1), resp2.getPartitionResponseInfoList().get(0).getMessageMetadataList().get(0).getEncryptionKey().array());
            } else {
                assertNull("MessageMetadata should have been null", resp2.getPartitionResponseInfoList().get(0).getMessageMetadataList().get(0));
            }
        } catch (MessageFormatException e) {
            fail();
        }
        releaseNettyBufUnderneathStream(stream);
        // get blob
        ids.clear();
        ids.add(blobIdList.get(0));
        GetRequest getRequest3 = new GetRequest(1, "clientid2", MessageFormatFlags.Blob, partitionRequestInfoList, GetOption.None);
        stream = channel3.sendAndReceive(getRequest3).getInputStream();
        GetResponse resp3 = GetResponse.readFrom(stream, clusterMap);
        try {
            BlobData blobData = MessageFormatRecord.deserializeBlob(resp3.getInputStream());
            byte[] blobout = getBlobDataAndRelease(blobData);
            assertArrayEquals(dataList.get(0), blobout);
            if (testEncryption) {
                assertNotNull("MessageMetadata should not have been null", resp3.getPartitionResponseInfoList().get(0).getMessageMetadataList().get(0));
                assertArrayEquals("EncryptionKey mismatch", encryptionKeyList.get(0), resp3.getPartitionResponseInfoList().get(0).getMessageMetadataList().get(0).getEncryptionKey().array());
            } else {
                assertNull("MessageMetadata should have been null", resp3.getPartitionResponseInfoList().get(0).getMessageMetadataList().get(0));
            }
        } catch (MessageFormatException e) {
            fail();
        }
        releaseNettyBufUnderneathStream(stream);
        // get blob all
        ids.clear();
        ids.add(blobIdList.get(0));
        GetRequest getRequest4 = new GetRequest(1, "clientid2", MessageFormatFlags.All, partitionRequestInfoList, GetOption.None);
        stream = channel1.sendAndReceive(getRequest4).getInputStream();
        GetResponse resp4 = GetResponse.readFrom(stream, clusterMap);
        try {
            BlobAll blobAll = MessageFormatRecord.deserializeBlobAll(resp4.getInputStream(), blobIdFactory);
            byte[] blobout = getBlobDataAndRelease(blobAll.getBlobData());
            assertArrayEquals(dataList.get(0), blobout);
            if (testEncryption) {
                assertNotNull("MessageMetadata should not have been null", blobAll.getBlobEncryptionKey());
                assertArrayEquals("EncryptionKey mismatch", encryptionKeyList.get(0), blobAll.getBlobEncryptionKey().array());
            } else {
                assertNull("MessageMetadata should have been null", blobAll.getBlobEncryptionKey());
            }
        } catch (MessageFormatException e) {
            fail();
        }
        releaseNettyBufUnderneathStream(stream);
        if (!testEncryption) {
            // get blob data
            // Use router to get the blob
            Properties routerProperties = getRouterProps(routerDatacenter);
            routerProperties.putAll(routerProps);
            VerifiableProperties routerVerifiableProperties = new VerifiableProperties(routerProperties);
            AccountService accountService = new InMemAccountService(false, true);
            Router router = new NonBlockingRouterFactory(routerVerifiableProperties, clusterMap, notificationSystem, getSSLFactoryIfRequired(routerVerifiableProperties), accountService).getRouter();
            checkBlobId(router, blobIdList.get(0), dataList.get(0));
            checkBlobId(router, blobIdList.get(1), dataList.get(1));
            checkBlobId(router, blobIdList.get(2), dataList.get(2));
            checkBlobId(router, blobIdList.get(3), dataList.get(3));
            checkBlobId(router, blobIdList.get(4), dataList.get(4));
            checkBlobId(router, blobIdList.get(5), dataList.get(5));
            router.close();
        }
        // fetch blob that does not exist
        // get blob properties
        ids = new ArrayList<BlobId>();
        mockPartitionId = (MockPartitionId) clusterMap.getWritablePartitionIds(MockClusterMap.DEFAULT_PARTITION_CLASS).get(0);
        ids.add(new BlobId(CommonTestUtils.getCurrentBlobIdVersion(), BlobId.BlobIdType.NATIVE, clusterMap.getLocalDatacenterId(), propertyList.get(0).getAccountId(), propertyList.get(0).getContainerId(), mockPartitionId, false, BlobId.BlobDataType.DATACHUNK));
        partitionRequestInfoList.clear();
        partitionRequestInfo = new PartitionRequestInfo(mockPartitionId, ids);
        partitionRequestInfoList.add(partitionRequestInfo);
        GetRequest getRequest5 = new GetRequest(1, "clientid2", MessageFormatFlags.BlobProperties, partitionRequestInfoList, GetOption.None);
        stream = channel3.sendAndReceive(getRequest5).getInputStream();
        GetResponse resp5 = GetResponse.readFrom(stream, clusterMap);
        assertEquals(ServerErrorCode.No_Error, resp5.getError());
        assertEquals(ServerErrorCode.Blob_Not_Found, resp5.getPartitionResponseInfoList().get(0).getErrorCode());
        releaseNettyBufUnderneathStream(stream);
        // delete a blob and ensure it is propagated
        DeleteRequest deleteRequest = new DeleteRequest(1, "reptest", blobIdList.get(0), System.currentTimeMillis());
        expectedTokenSize += getUpdateRecordSize(blobIdList.get(0), SubRecord.Type.DELETE);
        DataInputStream deleteResponseStream = channel1.sendAndReceive(deleteRequest).getInputStream();
        DeleteResponse deleteResponse = DeleteResponse.readFrom(deleteResponseStream);
        releaseNettyBufUnderneathStream(deleteResponseStream);
        assertEquals(ServerErrorCode.No_Error, deleteResponse.getError());
        notificationSystem.awaitBlobDeletions(blobIdList.get(0).getID());
        ids = new ArrayList<BlobId>();
        ids.add(blobIdList.get(0));
        partitionRequestInfoList.clear();
        partitionRequestInfo = new PartitionRequestInfo(partition, ids);
        partitionRequestInfoList.add(partitionRequestInfo);
        GetRequest getRequest6 = new GetRequest(1, "clientid2", MessageFormatFlags.Blob, partitionRequestInfoList, GetOption.None);
        stream = channel3.sendAndReceive(getRequest6).getInputStream();
        GetResponse resp6 = GetResponse.readFrom(stream, clusterMap);
        assertEquals(ServerErrorCode.No_Error, resp6.getError());
        assertEquals(ServerErrorCode.Blob_Deleted, resp6.getPartitionResponseInfoList().get(0).getErrorCode());
        releaseNettyBufUnderneathStream(stream);
        // get the data node to inspect replication tokens on
        DataNodeId dataNodeId = clusterMap.getDataNodeId("localhost", interestedDataNodePortNumber);
        checkReplicaTokens(clusterMap, dataNodeId, expectedTokenSize - getUpdateRecordSize(blobIdList.get(0), SubRecord.Type.DELETE), "0");
        // Shut down server 1
        cluster.getServers().get(0).shutdown();
        cluster.getServers().get(0).awaitShutdown();
        // Add more data to server 2 and server 3. Recover server 1 and ensure it is completely replicated
        // put blob 7
        putRequest2 = new PutRequest(1, "client1", blobIdList.get(6), propertyList.get(6), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(6)), propertyList.get(6).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(6) != null ? ByteBuffer.wrap(encryptionKeyList.get(6)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(6), blobIdList.get(6), encryptionKeyList.get(6) != null ? ByteBuffer.wrap(encryptionKeyList.get(6)) : null, ByteBuffer.wrap(usermetadata), dataList.get(6));
        putResponseStream = channel2.sendAndReceive(putRequest2).getInputStream();
        response2 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response2.getError());
        // put blob 8
        putRequest3 = new PutRequest(1, "client1", blobIdList.get(7), propertyList.get(7), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(7)), propertyList.get(7).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(7) != null ? ByteBuffer.wrap(encryptionKeyList.get(7)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(7), blobIdList.get(7), encryptionKeyList.get(7) != null ? ByteBuffer.wrap(encryptionKeyList.get(7)) : null, ByteBuffer.wrap(usermetadata), dataList.get(7));
        putResponseStream = channel3.sendAndReceive(putRequest3).getInputStream();
        response3 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response3.getError());
        // put blob 9
        putRequest2 = new PutRequest(1, "client1", blobIdList.get(8), propertyList.get(8), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(8)), propertyList.get(8).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(8) != null ? ByteBuffer.wrap(encryptionKeyList.get(8)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(8), blobIdList.get(8), encryptionKeyList.get(8) != null ? ByteBuffer.wrap(encryptionKeyList.get(8)) : null, ByteBuffer.wrap(usermetadata), dataList.get(8));
        putResponseStream = channel2.sendAndReceive(putRequest2).getInputStream();
        response2 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response2.getError());
        // put blob 10
        putRequest3 = new PutRequest(1, "client1", blobIdList.get(9), propertyList.get(9), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(9)), propertyList.get(9).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(9) != null ? ByteBuffer.wrap(encryptionKeyList.get(9)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(9), blobIdList.get(9), encryptionKeyList.get(9) != null ? ByteBuffer.wrap(encryptionKeyList.get(9)) : null, ByteBuffer.wrap(usermetadata), dataList.get(9));
        putResponseStream = channel3.sendAndReceive(putRequest3).getInputStream();
        response3 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response3.getError());
        // put blob 11
        putRequest2 = new PutRequest(1, "client1", blobIdList.get(10), propertyList.get(10), ByteBuffer.wrap(usermetadata), Unpooled.wrappedBuffer(dataList.get(10)), propertyList.get(10).getBlobSize(), BlobType.DataBlob, encryptionKeyList.get(10) != null ? ByteBuffer.wrap(encryptionKeyList.get(10)) : null);
        expectedTokenSize += getPutRecordSize(propertyList.get(10), blobIdList.get(10), encryptionKeyList.get(10) != null ? ByteBuffer.wrap(encryptionKeyList.get(10)) : null, ByteBuffer.wrap(usermetadata), dataList.get(10));
        putResponseStream = channel2.sendAndReceive(putRequest2).getInputStream();
        response2 = PutResponse.readFrom(putResponseStream);
        releaseNettyBufUnderneathStream(putResponseStream);
        assertEquals(ServerErrorCode.No_Error, response2.getError());
        checkTtlUpdateStatus(channel2, clusterMap, blobIdFactory, blobIdList.get(10), dataList.get(10), false, getExpiryTimeMs(propertyList.get(10)));
        updateBlobTtl(channel2, blobIdList.get(10), cluster.time.milliseconds());
        expectedTokenSize += getUpdateRecordSize(blobIdList.get(10), SubRecord.Type.TTL_UPDATE);
        checkTtlUpdateStatus(channel2, clusterMap, blobIdFactory, blobIdList.get(10), dataList.get(10), true, Utils.Infinite_Time);
        cluster.reinitServer(0);
        // wait for server to recover
        notificationSystem.awaitBlobCreations(blobIdList.get(6).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(7).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(8).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(9).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(10).getID());
        notificationSystem.awaitBlobUpdates(blobIdList.get(10).getID(), UpdateType.TTL_UPDATE);
        channel1.disconnect();
        channel1.connect();
        // get blob
        try {
            checkBlobContent(clusterMap, blobIdList.get(1), channel1, dataList.get(1), encryptionKeyList.get(1));
            checkBlobContent(clusterMap, blobIdList.get(2), channel1, dataList.get(2), encryptionKeyList.get(2));
            checkBlobContent(clusterMap, blobIdList.get(3), channel1, dataList.get(3), encryptionKeyList.get(3));
            checkBlobContent(clusterMap, blobIdList.get(4), channel1, dataList.get(4), encryptionKeyList.get(4));
            checkBlobContent(clusterMap, blobIdList.get(5), channel1, dataList.get(5), encryptionKeyList.get(5));
            checkBlobContent(clusterMap, blobIdList.get(6), channel1, dataList.get(6), encryptionKeyList.get(6));
            checkBlobContent(clusterMap, blobIdList.get(7), channel1, dataList.get(7), encryptionKeyList.get(7));
            checkBlobContent(clusterMap, blobIdList.get(8), channel1, dataList.get(8), encryptionKeyList.get(8));
            checkBlobContent(clusterMap, blobIdList.get(9), channel1, dataList.get(9), encryptionKeyList.get(9));
            checkBlobContent(clusterMap, blobIdList.get(10), channel1, dataList.get(10), encryptionKeyList.get(10));
        } catch (MessageFormatException e) {
            fail();
        }
        // check that the ttl update went through
        checkTtlUpdateStatus(channel1, clusterMap, blobIdFactory, blobIdList.get(10), dataList.get(10), true, Utils.Infinite_Time);
        // Shutdown server 1. Remove all its data from all mount path. Recover server 1 and ensure node is built
        cluster.getServers().get(0).shutdown();
        cluster.getServers().get(0).awaitShutdown();
        File mountFile = new File(clusterMap.getReplicaIds(dataNodeId).get(0).getMountPath());
        for (File toDelete : Objects.requireNonNull(mountFile.listFiles())) {
            deleteFolderContent(toDelete, true);
        }
        notificationSystem.decrementCreatedReplica(blobIdList.get(1).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementCreatedReplica(blobIdList.get(2).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementCreatedReplica(blobIdList.get(3).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementCreatedReplica(blobIdList.get(4).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementCreatedReplica(blobIdList.get(5).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementUpdatedReplica(blobIdList.get(5).getID(), dataNodeId.getHostname(), dataNodeId.getPort(), UpdateType.TTL_UPDATE);
        notificationSystem.decrementCreatedReplica(blobIdList.get(6).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementCreatedReplica(blobIdList.get(7).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementCreatedReplica(blobIdList.get(8).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementCreatedReplica(blobIdList.get(9).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementCreatedReplica(blobIdList.get(10).getID(), dataNodeId.getHostname(), dataNodeId.getPort());
        notificationSystem.decrementUpdatedReplica(blobIdList.get(10).getID(), dataNodeId.getHostname(), dataNodeId.getPort(), UpdateType.TTL_UPDATE);
        cluster.reinitServer(0);
        notificationSystem.awaitBlobCreations(blobIdList.get(1).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(2).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(3).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(4).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(5).getID());
        notificationSystem.awaitBlobUpdates(blobIdList.get(5).getID(), UpdateType.TTL_UPDATE);
        notificationSystem.awaitBlobCreations(blobIdList.get(6).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(7).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(8).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(9).getID());
        notificationSystem.awaitBlobCreations(blobIdList.get(10).getID());
        notificationSystem.awaitBlobUpdates(blobIdList.get(10).getID(), UpdateType.TTL_UPDATE);
        channel1.disconnect();
        channel1.connect();
        // get blob
        try {
            checkBlobContent(clusterMap, blobIdList.get(1), channel1, dataList.get(1), encryptionKeyList.get(1));
            checkBlobContent(clusterMap, blobIdList.get(2), channel1, dataList.get(2), encryptionKeyList.get(2));
            checkBlobContent(clusterMap, blobIdList.get(3), channel1, dataList.get(3), encryptionKeyList.get(3));
            checkBlobContent(clusterMap, blobIdList.get(4), channel1, dataList.get(4), encryptionKeyList.get(4));
            checkBlobContent(clusterMap, blobIdList.get(5), channel1, dataList.get(5), encryptionKeyList.get(5));
            checkBlobContent(clusterMap, blobIdList.get(6), channel1, dataList.get(6), encryptionKeyList.get(6));
            checkBlobContent(clusterMap, blobIdList.get(7), channel1, dataList.get(7), encryptionKeyList.get(7));
            checkBlobContent(clusterMap, blobIdList.get(8), channel1, dataList.get(8), encryptionKeyList.get(8));
            checkBlobContent(clusterMap, blobIdList.get(9), channel1, dataList.get(9), encryptionKeyList.get(9));
            checkBlobContent(clusterMap, blobIdList.get(10), channel1, dataList.get(10), encryptionKeyList.get(10));
        } catch (MessageFormatException e) {
            fail();
        }
        // check that the ttl updates are present
        checkTtlUpdateStatus(channel1, clusterMap, blobIdFactory, blobIdList.get(5), dataList.get(5), true, Utils.Infinite_Time);
        checkTtlUpdateStatus(channel1, clusterMap, blobIdFactory, blobIdList.get(10), dataList.get(10), true, Utils.Infinite_Time);
        channel1.disconnect();
        channel2.disconnect();
        channel3.disconnect();
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : ArrayList(java.util.ArrayList) PutResponse(com.github.ambry.protocol.PutResponse) BlobProperties(com.github.ambry.messageformat.BlobProperties) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) InMemAccountService(com.github.ambry.account.InMemAccountService) BlobAll(com.github.ambry.messageformat.BlobAll) GetRequest(com.github.ambry.protocol.GetRequest) BlobData(com.github.ambry.messageformat.BlobData) MessageFormatException(com.github.ambry.messageformat.MessageFormatException) NonBlockingRouterFactory(com.github.ambry.router.NonBlockingRouterFactory) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) VerifiableProperties(com.github.ambry.config.VerifiableProperties) PutRequest(com.github.ambry.protocol.PutRequest) Router(com.github.ambry.router.Router) ConnectedChannel(com.github.ambry.network.ConnectedChannel) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) NettyByteBufDataInputStream(com.github.ambry.utils.NettyByteBufDataInputStream) DataInputStream(java.io.DataInputStream) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) GetResponse(com.github.ambry.protocol.GetResponse) ByteBuffer(java.nio.ByteBuffer) IOException(java.io.IOException) MessageFormatException(com.github.ambry.messageformat.MessageFormatException) RouterException(com.github.ambry.router.RouterException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) DeleteResponse(com.github.ambry.protocol.DeleteResponse) BlobProperties(com.github.ambry.messageformat.BlobProperties) BlobId(com.github.ambry.commons.BlobId) InMemAccountService(com.github.ambry.account.InMemAccountService) AccountService(com.github.ambry.account.AccountService) DeleteRequest(com.github.ambry.protocol.DeleteRequest) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) File(java.io.File) MockClusterMap(com.github.ambry.clustermap.MockClusterMap)

Aggregations

ConnectedChannel (com.github.ambry.network.ConnectedChannel)20 BlobId (com.github.ambry.commons.BlobId)16 BlobProperties (com.github.ambry.messageformat.BlobProperties)14 VerifiableProperties (com.github.ambry.config.VerifiableProperties)12 GetRequest (com.github.ambry.protocol.GetRequest)12 GetResponse (com.github.ambry.protocol.GetResponse)12 PartitionRequestInfo (com.github.ambry.protocol.PartitionRequestInfo)12 ArrayList (java.util.ArrayList)12 Port (com.github.ambry.network.Port)11 Properties (java.util.Properties)10 DataInputStream (java.io.DataInputStream)9 PartitionId (com.github.ambry.clustermap.PartitionId)8 MessageFormatException (com.github.ambry.messageformat.MessageFormatException)8 DataNodeId (com.github.ambry.clustermap.DataNodeId)7 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)7 ReplicaId (com.github.ambry.clustermap.ReplicaId)7 PutRequest (com.github.ambry.protocol.PutRequest)7 PutResponse (com.github.ambry.protocol.PutResponse)7 NettyByteBufDataInputStream (com.github.ambry.utils.NettyByteBufDataInputStream)7 MetricRegistry (com.codahale.metrics.MetricRegistry)6