Search in sources :

Example 76 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class StorageManagerTest method diskSpaceAllocatorTest.

/**
 * Test that stores on a disk are inaccessible if the {@link DiskSpaceAllocator} fails to start.
 * @throws Exception
 */
@Test
public void diskSpaceAllocatorTest() throws Exception {
    generateConfigs(true, false);
    MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
    List<String> mountPaths = dataNode.getMountPaths();
    Map<String, List<ReplicaId>> replicasByMountPath = new HashMap<>();
    for (ReplicaId replica : replicas) {
        replicasByMountPath.computeIfAbsent(replica.getMountPath(), key -> new ArrayList<>()).add(replica);
    }
    // Startup/shutdown one more time to verify the restart scenario.
    for (int i = 0; i < 2; i++) {
        metricRegistry = new MetricRegistry();
        StorageManager storageManager = createStorageManager(dataNode, metricRegistry, null);
        storageManager.start();
        assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
        checkStoreAccessibility(replicas, null, storageManager);
        Map<String, Counter> counters = metricRegistry.getCounters();
        assertEquals(0, getCounterValue(counters, DiskSpaceAllocator.class.getName(), "DiskSpaceAllocatorInitFailureCount"));
        assertEquals(0, getCounterValue(counters, DiskManager.class.getName(), "TotalStoreStartFailures"));
        assertEquals(0, getCounterValue(counters, DiskManager.class.getName(), "DiskMountPathFailures"));
        for (String mountPath : dataNode.getMountPaths()) {
            List<ReplicaId> replicasOnDisk = replicasByMountPath.get(mountPath);
            DiskSpaceAllocatorTest.ExpectedState expectedState = new DiskSpaceAllocatorTest.ExpectedState();
            // There should be 1 unallocated segment per replica on a mount path (each replica can have 2 segments) and the
            // swap segments.
            expectedState.addSwapSeg(storeConfig.storeSegmentSizeInBytes, 1);
            for (ReplicaId replica : replicasOnDisk) {
                expectedState.addStoreSeg(replica.getPartitionId().toPathString(), storeConfig.storeSegmentSizeInBytes, 1);
            }
            DiskSpaceAllocatorTest.verifyPoolState(new File(mountPath, diskManagerConfig.diskManagerReserveFileDirName), expectedState);
        }
        shutdownAndAssertStoresInaccessible(storageManager, replicas);
        assertEquals(0, getCounterValue(counters, DiskManager.class.getName(), "TotalStoreShutdownFailures"));
    }
    // Induce a initializePool failure by:
    // 1. deleting a file size directory
    // 2. instantiating the DiskManagers (this will not fail b/c the directory just won't be inventory)
    // 3. creating a regular file with the same name as the file size directory
    // 4. start the DiskManagers (this should cause the DiskSpaceAllocator to fail to initialize when it sees the
    // file where the directory should be created.
    metricRegistry = new MetricRegistry();
    String diskToFail = mountPaths.get(RANDOM.nextInt(mountPaths.size()));
    File reservePoolDir = new File(diskToFail, diskManagerConfig.diskManagerReserveFileDirName);
    File storeReserveDir = new File(reservePoolDir, DiskSpaceAllocator.STORE_DIR_PREFIX + replicasByMountPath.get(diskToFail).get(0).getPartitionId().toPathString());
    File fileSizeDir = new File(storeReserveDir, DiskSpaceAllocator.generateFileSizeDirName(storeConfig.storeSegmentSizeInBytes));
    Utils.deleteFileOrDirectory(fileSizeDir);
    StorageManager storageManager = createStorageManager(dataNode, metricRegistry, null);
    assertTrue("File creation should have succeeded", fileSizeDir.createNewFile());
    storageManager.start();
    assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
    checkStoreAccessibility(replicas, diskToFail, storageManager);
    Map<String, Counter> counters = metricRegistry.getCounters();
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
    assertEquals(0, getCounterValue(counters, DiskManager.class.getName(), "TotalStoreShutdownFailures"));
}
Also used : DiskId(com.github.ambry.clustermap.DiskId) Arrays(java.util.Arrays) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) DataNodeId(com.github.ambry.clustermap.DataNodeId) Random(java.util.Random) ByteBuffer(java.nio.ByteBuffer) MockHelixManagerFactory(com.github.ambry.clustermap.MockHelixManagerFactory) JSONObject(org.json.JSONObject) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) After(org.junit.After) Counter(com.codahale.metrics.Counter) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) AccountStatsStore(com.github.ambry.accountstats.AccountStatsStore) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) StatsSnapshot(com.github.ambry.server.StatsSnapshot) Callback(com.github.ambry.commons.Callback) TransitionErrorCode(com.github.ambry.clustermap.StateTransitionException.TransitionErrorCode) InMemAccountService(com.github.ambry.account.InMemAccountService) PartitionId(com.github.ambry.clustermap.PartitionId) HashMap(java.util.HashMap) HardwareState(com.github.ambry.clustermap.HardwareState) AmbryStatsReport(com.github.ambry.server.AmbryStatsReport) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TestUtils(com.github.ambry.clustermap.TestUtils) SystemTime(com.github.ambry.utils.SystemTime) Before(org.junit.Before) BlobStoreTest(com.github.ambry.store.BlobStoreTest) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) ReplicaState(com.github.ambry.clustermap.ReplicaState) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) HelixParticipant(com.github.ambry.clustermap.HelixParticipant) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IOException(java.io.IOException) Test(org.junit.Test) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) HelixAdmin(org.apache.helix.HelixAdmin) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) HashMap(java.util.HashMap) MetricRegistry(com.codahale.metrics.MetricRegistry) ArrayList(java.util.ArrayList) ReplicaId(com.github.ambry.clustermap.ReplicaId) Counter(com.codahale.metrics.Counter) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 77 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class StorageManagerTest method residualDirDeletionTest.

/**
 * Test that residual directory associated with removed replica is deleted correctly during OFFLINE -> DROPPED transition.
 * @throws Exception
 */
@Test
public void residualDirDeletionTest() throws Exception {
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(localNode);
    MockClusterParticipant mockHelixParticipant = Mockito.spy(new MockClusterParticipant());
    doNothing().when(mockHelixParticipant).setPartitionDisabledState(anyString(), anyBoolean());
    // create an extra store dir at one of the mount paths
    String mountPath = replicas.get(0).getMountPath();
    String extraPartitionName = "1000";
    File extraStoreDir = new File(mountPath, extraPartitionName);
    assertTrue("Can't create an extra store dir", extraStoreDir.mkdir());
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // failure case: IOException when deleting store dir
    File invalidDir = new File(extraStoreDir.getAbsolutePath(), "invalidDir");
    invalidDir.deleteOnExit();
    assertTrue("Couldn't create dir within store dir", invalidDir.mkdir());
    assertTrue("Could not make unreadable", invalidDir.setReadable(false));
    try {
        mockHelixParticipant.onPartitionBecomeDroppedFromOffline(extraPartitionName);
        fail("should fail because there is IOException when deleting store dir");
    } catch (StateTransitionException e) {
        assertEquals("Error code is not expected", ReplicaOperationFailure, e.getErrorCode());
    }
    assertTrue("Could not make readable", invalidDir.setReadable(true));
    // trigger OFFLINE -> DROPPED transition on extra partition. Storage manager should delete residual store dir.
    mockHelixParticipant.onPartitionBecomeDroppedFromOffline(extraPartitionName);
    verify(mockHelixParticipant).setPartitionDisabledState(extraPartitionName, false);
    assertFalse("Extra store dir should not exist", extraStoreDir.exists());
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) File(java.io.File) ReplicaId(com.github.ambry.clustermap.ReplicaId) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 78 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class ServerReadPerformance method main.

public static void main(String[] args) {
    ConnectionPool connectionPool = null;
    FileWriter writer = null;
    try {
        OptionParser parser = new OptionParser();
        ArgumentAcceptingOptionSpec<String> logToReadOpt = parser.accepts("logToRead", "The log that needs to be replayed for traffic").withRequiredArg().describedAs("log_to_read").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> hardwareLayoutOpt = parser.accepts("hardwareLayout", "The path of the hardware layout file").withRequiredArg().describedAs("hardware_layout").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> partitionLayoutOpt = parser.accepts("partitionLayout", "The path of the partition layout file").withRequiredArg().describedAs("partition_layout").ofType(String.class);
        ArgumentAcceptingOptionSpec<Integer> readsPerSecondOpt = parser.accepts("readsPerSecond", "The rate at which reads need to be performed").withRequiredArg().describedAs("The number of reads per second").ofType(Integer.class).defaultsTo(1000);
        ArgumentAcceptingOptionSpec<Long> measurementIntervalOpt = parser.accepts("measurementInterval", "The interval in second to report performance result").withOptionalArg().describedAs("The CPU time spent for getting blobs, not wall time").ofType(Long.class).defaultsTo(300L);
        ArgumentAcceptingOptionSpec<Boolean> verboseLoggingOpt = parser.accepts("enableVerboseLogging", "Enables verbose logging").withOptionalArg().describedAs("Enable verbose logging").ofType(Boolean.class).defaultsTo(false);
        ArgumentAcceptingOptionSpec<String> sslEnabledDatacentersOpt = parser.accepts("sslEnabledDatacenters", "Datacenters to which ssl should be enabled").withOptionalArg().describedAs("Comma separated list").ofType(String.class).defaultsTo("");
        ArgumentAcceptingOptionSpec<String> sslKeystorePathOpt = parser.accepts("sslKeystorePath", "SSL key store path").withOptionalArg().describedAs("The file path of SSL key store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslKeystoreTypeOpt = parser.accepts("sslKeystoreType", "SSL key store type").withOptionalArg().describedAs("The type of SSL key store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslTruststorePathOpt = parser.accepts("sslTruststorePath", "SSL trust store path").withOptionalArg().describedAs("The file path of SSL trust store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslKeystorePasswordOpt = parser.accepts("sslKeystorePassword", "SSL key store password").withOptionalArg().describedAs("The password of SSL key store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslKeyPasswordOpt = parser.accepts("sslKeyPassword", "SSL key password").withOptionalArg().describedAs("The password of SSL private key").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslTruststorePasswordOpt = parser.accepts("sslTruststorePassword", "SSL trust store password").withOptionalArg().describedAs("The password of SSL trust store").defaultsTo("").ofType(String.class);
        ArgumentAcceptingOptionSpec<String> sslCipherSuitesOpt = parser.accepts("sslCipherSuites", "SSL enabled cipher suites").withOptionalArg().describedAs("Comma separated list").defaultsTo("TLS_RSA_WITH_AES_128_CBC_SHA").ofType(String.class);
        OptionSet options = parser.parse(args);
        ArrayList<OptionSpec> listOpt = new ArrayList<>();
        listOpt.add(logToReadOpt);
        listOpt.add(hardwareLayoutOpt);
        listOpt.add(partitionLayoutOpt);
        ToolUtils.ensureOrExit(listOpt, options, parser);
        long measurementIntervalNs = options.valueOf(measurementIntervalOpt) * SystemTime.NsPerSec;
        ToolUtils.validateSSLOptions(options, parser, sslEnabledDatacentersOpt, sslKeystorePathOpt, sslKeystoreTypeOpt, sslTruststorePathOpt, sslKeystorePasswordOpt, sslKeyPasswordOpt, sslTruststorePasswordOpt);
        String sslEnabledDatacenters = options.valueOf(sslEnabledDatacentersOpt);
        Properties sslProperties;
        if (sslEnabledDatacenters.length() != 0) {
            sslProperties = ToolUtils.createSSLProperties(sslEnabledDatacenters, options.valueOf(sslKeystorePathOpt), options.valueOf(sslKeystoreTypeOpt), options.valueOf(sslKeystorePasswordOpt), options.valueOf(sslKeyPasswordOpt), options.valueOf(sslTruststorePathOpt), options.valueOf(sslTruststorePasswordOpt), options.valueOf(sslCipherSuitesOpt));
        } else {
            sslProperties = new Properties();
        }
        ToolUtils.addClusterMapProperties(sslProperties);
        String logToRead = options.valueOf(logToReadOpt);
        int readsPerSecond = options.valueOf(readsPerSecondOpt);
        boolean enableVerboseLogging = options.has(verboseLoggingOpt);
        if (enableVerboseLogging) {
            System.out.println("Enabled verbose logging");
        }
        File logFile = new File(System.getProperty("user.dir"), "readperfresult");
        writer = new FileWriter(logFile);
        String hardwareLayoutPath = options.valueOf(hardwareLayoutOpt);
        String partitionLayoutPath = options.valueOf(partitionLayoutOpt);
        ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(sslProperties));
        ClusterMap map = ((ClusterAgentsFactory) Utils.getObj(clusterMapConfig.clusterMapClusterAgentsFactory, clusterMapConfig, hardwareLayoutPath, partitionLayoutPath)).getClusterMap();
        final AtomicLong totalTimeTaken = new AtomicLong(0);
        final AtomicLong totalReads = new AtomicLong(0);
        final AtomicBoolean shutdown = new AtomicBoolean(false);
        // attach shutdown handler to catch control-c
        Runtime.getRuntime().addShutdownHook(new Thread() {

            public void run() {
                try {
                    System.out.println("Shutdown invoked");
                    shutdown.set(true);
                    String message = "Total reads : " + totalReads.get() + "  Total time taken : " + totalTimeTaken.get() + " Nano Seconds  Average time taken per read " + ((double) totalTimeTaken.get()) / SystemTime.NsPerSec / totalReads.get() + " Seconds";
                    System.out.println(message);
                } catch (Exception e) {
                    System.out.println("Error while shutting down " + e);
                }
            }
        });
        final BufferedReader br = new BufferedReader(new FileReader(logToRead));
        Throttler throttler = new Throttler(readsPerSecond, 100, true, SystemTime.getInstance());
        String line;
        ConnectedChannel channel = null;
        ConnectionPoolConfig connectionPoolConfig = new ConnectionPoolConfig(new VerifiableProperties(new Properties()));
        VerifiableProperties vProps = new VerifiableProperties(sslProperties);
        SSLConfig sslConfig = new SSLConfig(vProps);
        clusterMapConfig = new ClusterMapConfig(vProps);
        connectionPool = new BlockingChannelConnectionPool(connectionPoolConfig, sslConfig, clusterMapConfig, new MetricRegistry());
        long totalNumberOfGetBlobs = 0;
        long totalLatencyForGetBlobs = 0;
        ArrayList<Long> latenciesForGetBlobs = new ArrayList<Long>();
        long maxLatencyForGetBlobs = 0;
        long minLatencyForGetBlobs = Long.MAX_VALUE;
        while ((line = br.readLine()) != null) {
            String[] id = line.split("-");
            BlobData blobData = null;
            BlobId blobId = new BlobId(id[1], map);
            ArrayList<BlobId> blobIds = new ArrayList<BlobId>();
            blobIds.add(blobId);
            for (ReplicaId replicaId : blobId.getPartition().getReplicaIds()) {
                long startTimeGetBlob = 0;
                ArrayList<PartitionRequestInfo> partitionRequestInfoList = new ArrayList<PartitionRequestInfo>();
                try {
                    partitionRequestInfoList.clear();
                    PartitionRequestInfo partitionRequestInfo = new PartitionRequestInfo(blobId.getPartition(), blobIds);
                    partitionRequestInfoList.add(partitionRequestInfo);
                    GetRequest getRequest = new GetRequest(1, "getperf", MessageFormatFlags.Blob, partitionRequestInfoList, GetOption.None);
                    Port port = replicaId.getDataNodeId().getPortToConnectTo();
                    channel = connectionPool.checkOutConnection(replicaId.getDataNodeId().getHostname(), port, 10000);
                    startTimeGetBlob = SystemTime.getInstance().nanoseconds();
                    channel.send(getRequest);
                    DataInputStream receiveStream = channel.receive().getInputStream();
                    GetResponse getResponse = GetResponse.readFrom(receiveStream, map);
                    blobData = MessageFormatRecord.deserializeBlob(getResponse.getInputStream());
                    long sizeRead = 0;
                    byte[] outputBuffer = new byte[(int) blobData.getSize()];
                    ByteBufferOutputStream streamOut = new ByteBufferOutputStream(ByteBuffer.wrap(outputBuffer));
                    ByteBuf buffer = blobData.content();
                    try {
                        buffer.readBytes(streamOut, (int) blobData.getSize());
                    } finally {
                        buffer.release();
                    }
                    long latencyPerBlob = SystemTime.getInstance().nanoseconds() - startTimeGetBlob;
                    totalTimeTaken.addAndGet(latencyPerBlob);
                    latenciesForGetBlobs.add(latencyPerBlob);
                    totalReads.incrementAndGet();
                    totalNumberOfGetBlobs++;
                    totalLatencyForGetBlobs += latencyPerBlob;
                    if (enableVerboseLogging) {
                        System.out.println("Time taken to get blob id " + blobId + " in ms " + latencyPerBlob / SystemTime.NsPerMs);
                    }
                    if (latencyPerBlob > maxLatencyForGetBlobs) {
                        maxLatencyForGetBlobs = latencyPerBlob;
                    }
                    if (latencyPerBlob < minLatencyForGetBlobs) {
                        minLatencyForGetBlobs = latencyPerBlob;
                    }
                    if (totalLatencyForGetBlobs >= measurementIntervalNs) {
                        Collections.sort(latenciesForGetBlobs);
                        int index99 = (int) (latenciesForGetBlobs.size() * 0.99) - 1;
                        int index95 = (int) (latenciesForGetBlobs.size() * 0.95) - 1;
                        String message = totalNumberOfGetBlobs + "," + (double) latenciesForGetBlobs.get(index99) / SystemTime.NsPerSec + "," + (double) latenciesForGetBlobs.get(index95) / SystemTime.NsPerSec + "," + ((double) totalLatencyForGetBlobs / SystemTime.NsPerSec / totalNumberOfGetBlobs);
                        System.out.println(message);
                        writer.write(message + "\n");
                        totalLatencyForGetBlobs = 0;
                        latenciesForGetBlobs.clear();
                        totalNumberOfGetBlobs = 0;
                        maxLatencyForGetBlobs = 0;
                        minLatencyForGetBlobs = Long.MAX_VALUE;
                    }
                    partitionRequestInfoList.clear();
                    partitionRequestInfo = new PartitionRequestInfo(blobId.getPartition(), blobIds);
                    partitionRequestInfoList.add(partitionRequestInfo);
                    GetRequest getRequestProperties = new GetRequest(1, "getperf", MessageFormatFlags.BlobProperties, partitionRequestInfoList, GetOption.None);
                    long startTimeGetBlobProperties = SystemTime.getInstance().nanoseconds();
                    channel.send(getRequestProperties);
                    DataInputStream receivePropertyStream = channel.receive().getInputStream();
                    GetResponse getResponseProperty = GetResponse.readFrom(receivePropertyStream, map);
                    BlobProperties blobProperties = MessageFormatRecord.deserializeBlobProperties(getResponseProperty.getInputStream());
                    long endTimeGetBlobProperties = SystemTime.getInstance().nanoseconds() - startTimeGetBlobProperties;
                    partitionRequestInfoList.clear();
                    partitionRequestInfo = new PartitionRequestInfo(blobId.getPartition(), blobIds);
                    partitionRequestInfoList.add(partitionRequestInfo);
                    GetRequest getRequestUserMetadata = new GetRequest(1, "getperf", MessageFormatFlags.BlobUserMetadata, partitionRequestInfoList, GetOption.None);
                    long startTimeGetBlobUserMetadata = SystemTime.getInstance().nanoseconds();
                    channel.send(getRequestUserMetadata);
                    DataInputStream receiveUserMetadataStream = channel.receive().getInputStream();
                    GetResponse getResponseUserMetadata = GetResponse.readFrom(receiveUserMetadataStream, map);
                    ByteBuffer userMetadata = MessageFormatRecord.deserializeUserMetadata(getResponseUserMetadata.getInputStream());
                    long endTimeGetBlobUserMetadata = SystemTime.getInstance().nanoseconds() - startTimeGetBlobUserMetadata;
                    // delete the blob
                    DeleteRequest deleteRequest = new DeleteRequest(0, "perf", blobId, System.currentTimeMillis());
                    channel.send(deleteRequest);
                    DeleteResponse deleteResponse = DeleteResponse.readFrom(channel.receive().getInputStream());
                    if (deleteResponse.getError() != ServerErrorCode.No_Error) {
                        throw new UnexpectedException("error " + deleteResponse.getError());
                    }
                    throttler.maybeThrottle(1);
                } finally {
                    if (channel != null) {
                        connectionPool.checkInConnection(channel);
                        channel = null;
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("Error in server read performance " + e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                System.out.println("Error when closing writer");
            }
        }
        if (connectionPool != null) {
            connectionPool.shutdown();
        }
    }
}
Also used : ClusterMap(com.github.ambry.clustermap.ClusterMap) Port(com.github.ambry.network.Port) ArrayList(java.util.ArrayList) GetRequest(com.github.ambry.protocol.GetRequest) BlobData(com.github.ambry.messageformat.BlobData) ClusterAgentsFactory(com.github.ambry.clustermap.ClusterAgentsFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Throttler(com.github.ambry.utils.Throttler) UnexpectedException(java.rmi.UnexpectedException) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) ConnectedChannel(com.github.ambry.network.ConnectedChannel) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) BlockingChannelConnectionPool(com.github.ambry.network.BlockingChannelConnectionPool) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) DeleteResponse(com.github.ambry.protocol.DeleteResponse) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File) BlockingChannelConnectionPool(com.github.ambry.network.BlockingChannelConnectionPool) ConnectionPool(com.github.ambry.network.ConnectionPool) OptionSpec(joptsimple.OptionSpec) ArgumentAcceptingOptionSpec(joptsimple.ArgumentAcceptingOptionSpec) ConnectionPoolConfig(com.github.ambry.config.ConnectionPoolConfig) FileWriter(java.io.FileWriter) BlobProperties(com.github.ambry.messageformat.BlobProperties) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ByteBuf(io.netty.buffer.ByteBuf) OptionParser(joptsimple.OptionParser) FileReader(java.io.FileReader) SSLConfig(com.github.ambry.config.SSLConfig) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) DataInputStream(java.io.DataInputStream) GetResponse(com.github.ambry.protocol.GetResponse) ByteBuffer(java.nio.ByteBuffer) UnexpectedException(java.rmi.UnexpectedException) ReplicaId(com.github.ambry.clustermap.ReplicaId) AtomicLong(java.util.concurrent.atomic.AtomicLong) ByteBufferOutputStream(com.github.ambry.utils.ByteBufferOutputStream) BlobProperties(com.github.ambry.messageformat.BlobProperties) BufferedReader(java.io.BufferedReader) OptionSet(joptsimple.OptionSet) BlobId(com.github.ambry.commons.BlobId) DeleteRequest(com.github.ambry.protocol.DeleteRequest)

Example 79 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class DiskReformatter method reformat.

/**
 * Uses {@link StoreCopier} to convert all the partitions on the given disk (D).
 * 1. Copies one partition on D to a scratch space
 * 2. Using {@link StoreCopier}, performs copies of all other partitions on D using D as a staging area. When a
 * partition is completely copied and verified, the original is replaced by the copy.
 * 3. Copies the partition in the scratch space back onto D.
 * 4. Deletes the folder in the scratch space
 * @param diskMountPath the mount path of the disk to reformat
 * @param scratch the scratch space to use
 * @throws Exception
 */
public void reformat(String diskMountPath, File scratch) throws Exception {
    if (!scratch.exists()) {
        throw new IllegalArgumentException("Scratch space " + scratch + " does not exist");
    }
    List<ReplicaId> replicasOnDisk = new ArrayList<>();
    // populate the replicas on disk
    List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(dataNodeId);
    for (ReplicaId replicaId : replicaIds) {
        if (replicaId.getDiskId().getMountPath().equals(diskMountPath)) {
            replicasOnDisk.add(replicaId);
        }
    }
    if (replicasOnDisk.size() == 0) {
        throw new IllegalArgumentException("There are no replicas on " + diskMountPath + " of " + dataNodeId);
    }
    replicasOnDisk.sort(Comparator.comparingLong(ReplicaId::getCapacityInBytes));
    logger.info("Found {} on {}", replicasOnDisk, diskMountPath);
    // move the last replica id (the largest one) to scratch space
    ReplicaId toMove = replicasOnDisk.get(replicasOnDisk.size() - 1);
    String partIdString = toMove.getPartitionId().toString();
    File scratchSrc = new File(toMove.getReplicaPath());
    File scratchTmp = new File(scratch, partIdString + RELOCATION_IN_PROGRESS_SUFFIX);
    File scratchTgt = new File(scratch, partIdString + RELOCATED_DIR_NAME_SUFFIX);
    if (scratchTmp.exists()) {
        throw new IllegalStateException(scratchTmp + " already exists");
    }
    if (scratchTgt.exists()) {
        throw new IllegalStateException(scratchTgt + " already exists");
    }
    ensureNotInUse(scratchSrc, toMove.getCapacityInBytes());
    logger.info("Moving {} to {}", scratchSrc, scratchTgt);
    FileUtils.moveDirectory(scratchSrc, scratchTmp);
    if (!scratchTmp.renameTo(scratchTgt)) {
        throw new IllegalStateException("Could not rename " + scratchTmp + " to " + scratchTgt);
    }
    // reformat each store, except the one moved, one by one
    for (int i = 0; i < replicasOnDisk.size() - 1; i++) {
        ReplicaId replicaId = replicasOnDisk.get(i);
        partIdString = replicaId.getPartitionId().toString();
        File src = new File(replicaId.getReplicaPath());
        File tgt = new File(replicaId.getMountPath(), partIdString + UNDER_REFORMAT_DIR_NAME_SUFFIX);
        logger.info("Copying {} to {}", src, tgt);
        copy(partIdString, src, tgt, replicaId.getCapacityInBytes());
        logger.info("Deleting {}", src);
        Utils.deleteFileOrDirectory(src);
        logger.info("Renaming {} to {}", tgt, src);
        if (!tgt.renameTo(src)) {
            throw new IllegalStateException("Could not rename " + tgt + " to " + src);
        }
        logger.info("Done reformatting {}", replicaId);
    }
    // reformat the moved store
    logger.info("Copying {} to {}", scratchTgt, scratchSrc);
    copy(toMove.getPartitionId().toString(), scratchTgt, scratchSrc, toMove.getCapacityInBytes());
    logger.info("Deleting {}", scratchTgt);
    Utils.deleteFileOrDirectory(scratchTgt);
    logger.info("Done reformatting {}", toMove);
    logger.info("Done reformatting disk {}", diskMountPath);
}
Also used : ArrayList(java.util.ArrayList) File(java.io.File) ReplicaId(com.github.ambry.clustermap.ReplicaId)

Example 80 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class StorageManagerTest method updateInstanceConfigFailureTest.

/**
 * Test failure cases when updating InstanceConfig in Helix for both Offline-To-Bootstrap and Inactive-To-Offline.
 */
@Test
public void updateInstanceConfigFailureTest() throws Exception {
    generateConfigs(true, true);
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
    MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // create a new partition and get its replica on local node
    PartitionId newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
    // override return value of updateDataNodeInfoInCluster() to mock update InstanceConfig failure
    mockHelixParticipant.updateNodeInfoReturnVal = false;
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
        fail("should fail because updating InstanceConfig didn't succeed during Offline-To-Bootstrap");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
    }
    try {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive(localReplicas.get(0).getPartitionId().toPathString());
        fail("should fail because updating InstanceConfig didn't succeed during Inactive-To-Offline");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
    }
    mockHelixParticipant.updateNodeInfoReturnVal = null;
    // mock InstanceConfig not found error (note that MockHelixAdmin is empty by default, so no InstanceConfig is present)
    newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
        fail("should fail because InstanceConfig is not found during Offline-To-Bootstrap");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
    }
    try {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive(localReplicas.get(1).getPartitionId().toPathString());
        fail("should fail because InstanceConfig is not found during Inactive-To-Offline");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
    }
    shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ReplicaId(com.github.ambry.clustermap.ReplicaId) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Aggregations

ReplicaId (com.github.ambry.clustermap.ReplicaId)147 Test (org.junit.Test)83 PartitionId (com.github.ambry.clustermap.PartitionId)68 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)60 MockReplicaId (com.github.ambry.clustermap.MockReplicaId)57 ArrayList (java.util.ArrayList)55 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)43 DataNodeId (com.github.ambry.clustermap.DataNodeId)32 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)31 MetricRegistry (com.codahale.metrics.MetricRegistry)29 HashMap (java.util.HashMap)28 HashSet (java.util.HashSet)25 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)24 VerifiableProperties (com.github.ambry.config.VerifiableProperties)24 BlobStoreTest (com.github.ambry.store.BlobStoreTest)24 File (java.io.File)24 List (java.util.List)21 Map (java.util.Map)21 Port (com.github.ambry.network.Port)20 Properties (java.util.Properties)20