Search in sources :

Example 21 with StoreConfig

use of com.github.ambry.config.StoreConfig in project ambry by linkedin.

the class DumpDataTool method compareIndexEntriesToLogContent.

/**
 * Compares every entry in an index file with those in the log. Checks to see if each blob in index is successfully
 * deserializable from the log
 * @param indexFile the file that represents the index segment.
 * @param checkLogEndOffsetMatch if {@code true}, checks that the end offset of the log matches the end offset of the
 *                               index.
 * @throws Exception
 */
private void compareIndexEntriesToLogContent(File indexFile, boolean checkLogEndOffsetMatch) throws Exception {
    if (!indexFile.exists()) {
        throw new IllegalArgumentException("File does not exist " + indexFile);
    }
    final Timer.Context context = metrics.compareIndexFileToLogTimeMs.time();
    try {
        logger.info("Dumping index {}", indexFile.getAbsolutePath());
        StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
        StoreConfig config = new StoreConfig(new VerifiableProperties(new Properties()));
        MetricRegistry metricRegistry = new MetricRegistry();
        StoreMetrics storeMetrics = new StoreMetrics(metricRegistry);
        IndexSegment segment = new IndexSegment(indexFile, false, storeKeyFactory, config, storeMetrics, new Journal(indexFile.getParent(), 0, 0), time);
        Offset startOffset = segment.getStartOffset();
        TreeMap<Long, Long> coveredRanges = new TreeMap<>();
        String logFileName = segment.getLogSegmentName().toFilename();
        File logFile = new File(indexFile.getParent(), logFileName);
        if (!logFile.exists()) {
            throw new IllegalStateException("Log file does not exist " + logFile);
        }
        RandomAccessFile randomAccessFile = new RandomAccessFile(logFile, "r");
        long logFileSize = randomAccessFile.getChannel().size();
        List<MessageInfo> entries = new ArrayList<>();
        segment.getEntriesSince(null, new FindEntriesCondition(Long.MAX_VALUE), entries, new AtomicLong(0), false);
        for (MessageInfo entry : entries) {
            StoreKey key = entry.getStoreKey();
            IndexValue value = segment.find(key).last();
            boolean isDeleted = value.isFlagSet(IndexValue.Flags.Delete_Index);
            if (value.getOffset().getOffset() < logFileSize) {
                boolean success = readFromLogAndVerify(randomAccessFile, key.getID(), value, coveredRanges);
                if (success) {
                    if (isDeleted) {
                        long originalOffset = value.getOriginalMessageOffset();
                        if (originalOffset != -1) {
                            if (!coveredRanges.containsKey(originalOffset)) {
                                if (startOffset.getOffset() > originalOffset) {
                                    logger.trace("Put Record at {} with delete msg offset {} ignored because it is prior to startOffset {}", originalOffset, value.getOffset(), startOffset);
                                } else {
                                    try {
                                        DumpDataHelper.LogBlobRecordInfo logBlobRecordInfo = DumpDataHelper.readSingleRecordFromLog(randomAccessFile, originalOffset, clusterMap, currentTimeInMs, metrics);
                                        coveredRanges.put(originalOffset, originalOffset + logBlobRecordInfo.totalRecordSize);
                                        logger.trace("PUT Record {} with start offset {} and end offset {} for a delete msg {} at offset {} ", logBlobRecordInfo.blobId, originalOffset, (originalOffset + logBlobRecordInfo.totalRecordSize), key.getID(), value.getOffset());
                                        if (!logBlobRecordInfo.blobId.getID().equals(key.getID())) {
                                            logger.error("BlobId value mismatch between delete record {} and put record {}", key.getID(), logBlobRecordInfo.blobId.getID());
                                        }
                                    } catch (IllegalArgumentException e) {
                                        metrics.logDeserializationError.inc();
                                        logger.error("Illegal arg exception thrown at  {}, while reading blob starting at offset {} with exception: ", randomAccessFile.getChannel().position(), originalOffset, e);
                                    } catch (MessageFormatException e) {
                                        metrics.logDeserializationError.inc();
                                        logger.error("MessageFormat exception thrown at  {} while reading blob starting at offset {} with exception: ", randomAccessFile.getChannel().position(), originalOffset, e);
                                    } catch (EOFException e) {
                                        metrics.endOfFileOnDumpLogError.inc();
                                        logger.error("EOFException thrown at {} ", randomAccessFile.getChannel().position(), e);
                                    } catch (Exception e) {
                                        metrics.unknownErrorOnDumpIndex.inc();
                                        logger.error("Unknown exception thrown {} ", e.getMessage(), e);
                                    }
                                }
                            }
                        }
                    }
                } else {
                    metrics.indexToLogBlobRecordComparisonFailure.inc();
                    logger.error("Failed for key {} with value {} ", key, value);
                }
            } else {
                logger.trace("Blob's {} offset {} is outside of log size {}, with a diff of {}", key, value.getOffset().getOffset(), logFileSize, (value.getOffset().getOffset() - logFileSize));
            }
        }
        throttler.maybeThrottle(entries.size());
        long indexEndOffset = segment.getEndOffset().getOffset();
        if (checkLogEndOffsetMatch && indexEndOffset != randomAccessFile.length()) {
            metrics.indexLogEndOffsetMisMatchError.inc();
            logger.error("Log end offset {} and index end offset {} do not match", randomAccessFile.length(), indexEndOffset);
        }
        logRangesNotCovered(coveredRanges, indexEndOffset);
    } finally {
        context.stop();
    }
}
Also used : ArrayList(java.util.ArrayList) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) EOFException(java.io.EOFException) MessageFormatException(com.github.ambry.messageformat.MessageFormatException) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) TreeMap(java.util.TreeMap) IOException(java.io.IOException) MessageFormatException(com.github.ambry.messageformat.MessageFormatException) EOFException(java.io.EOFException) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) AtomicLong(java.util.concurrent.atomic.AtomicLong) Timer(com.codahale.metrics.Timer) RandomAccessFile(java.io.RandomAccessFile) AtomicLong(java.util.concurrent.atomic.AtomicLong) StoreConfig(com.github.ambry.config.StoreConfig) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File)

Example 22 with StoreConfig

use of com.github.ambry.config.StoreConfig in project ambry by linkedin.

the class StoreCopier method main.

public static void main(String[] args) throws Exception {
    VerifiableProperties properties = ToolUtils.getVerifiableProperties(args);
    CopierConfig config = new CopierConfig(properties);
    StoreConfig storeConfig = new StoreConfig(properties);
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(properties);
    ClusterAgentsFactory clusterAgentsFactory = Utils.getObj(clusterMapConfig.clusterMapClusterAgentsFactory, clusterMapConfig, config.hardwareLayoutFilePath, config.partitionLayoutFilePath);
    try (ClusterMap clusterMap = clusterAgentsFactory.getClusterMap()) {
        StoreKeyFactory storeKeyFactory = Utils.getObj(storeConfig.storeKeyFactory, clusterMap);
        File srcDir = new File(config.srcStoreDirPath);
        File tgtDir = new File(config.tgtStoreDirPath);
        StoreMetrics metrics = new StoreMetrics(clusterMap.getMetricRegistry());
        DiskSpaceAllocator diskSpaceAllocator = new DiskSpaceAllocator(false, null, 0, new StorageManagerMetrics(clusterMap.getMetricRegistry()));
        try (StoreCopier storeCopier = new StoreCopier("src", srcDir, tgtDir, config.storeCapacity, config.fetchSizeInBytes, storeConfig, metrics, storeKeyFactory, new DiskIOScheduler(null), diskSpaceAllocator, Collections.emptyList(), SystemTime.getInstance())) {
            storeCopier.copy(new StoreFindTokenFactory(storeKeyFactory).getNewFindToken());
        }
    }
}
Also used : ClusterMap(com.github.ambry.clustermap.ClusterMap) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) StoreConfig(com.github.ambry.config.StoreConfig) ClusterAgentsFactory(com.github.ambry.clustermap.ClusterAgentsFactory) File(java.io.File)

Example 23 with StoreConfig

use of com.github.ambry.config.StoreConfig in project ambry by linkedin.

the class MockReplicationManager method getReplicationManager.

/**
 * Static construction helper
 * @param verifiableProperties the {@link VerifiableProperties} to use for config.
 * @param storageManager the {@link StorageManager} to use.
 * @param clusterMap the {@link ClusterMap} to use.
 * @param dataNodeId the {@link DataNodeId} to use.
 * @param storeKeyConverterFactory the {@link StoreKeyConverterFactory} to use.
 * @return an instance of {@link MockReplicationManager}
 * @throws ReplicationException
 */
public static MockReplicationManager getReplicationManager(VerifiableProperties verifiableProperties, StorageManager storageManager, ClusterMap clusterMap, DataNodeId dataNodeId, StoreKeyConverterFactory storeKeyConverterFactory) throws ReplicationException {
    ReplicationConfig replicationConfig = new ReplicationConfig(verifiableProperties);
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    StoreConfig storeConfig = new StoreConfig(verifiableProperties);
    return new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, dataNodeId, storeKeyConverterFactory, null);
}
Also used : ReplicationConfig(com.github.ambry.config.ReplicationConfig) StoreConfig(com.github.ambry.config.StoreConfig) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig)

Example 24 with StoreConfig

use of com.github.ambry.config.StoreConfig in project ambry by linkedin.

the class BlobStoreTest method storeErrorTriggerDisableReplicaTest.

/**
 * Test that replica is correctly disabled when store is shut down due to disk I/O error.
 * @throws Exception
 */
@Test
public void storeErrorTriggerDisableReplicaTest() throws Exception {
    final String RESOURCE_NAME = "0";
    final String CLUSTER_NAME = "BlobStoreTest";
    // setup testing environment
    store.shutdown();
    List<TestUtils.ZkInfo> zkInfoList = new ArrayList<>();
    zkInfoList.add(new TestUtils.ZkInfo(null, "DC1", (byte) 0, 2199, false));
    JSONObject zkJson = constructZkLayoutJSON(zkInfoList);
    properties.setProperty("clustermap.cluster.name", CLUSTER_NAME);
    properties.setProperty("clustermap.datacenter.name", "DC1");
    properties.setProperty("clustermap.host.name", "localhost");
    properties.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
    properties.setProperty("store.io.error.count.to.trigger.shutdown", "1");
    properties.setProperty("store.replica.status.delegate.enable", "true");
    properties.setProperty("store.set.local.partition.state.enabled", "true");
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
    AtomicReference<InstanceConfig> instanceConfig = new AtomicReference<>(new InstanceConfig("localhost"));
    instanceConfig.get().setPort("2222");
    Map<String, List<String>> listMap = new HashMap<>();
    listMap.put(storeId, null);
    ZNRecord znRecord = new ZNRecord("localhost");
    znRecord.setListFields(listMap);
    IdealState idealState = new IdealState(znRecord);
    idealState.setRebalanceMode(IdealState.RebalanceMode.SEMI_AUTO);
    // mock helix related components
    HelixAdmin mockHelixAdmin = mock(HelixAdmin.class);
    when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).then(invocation -> instanceConfig.get());
    when(mockHelixAdmin.getResourcesInCluster(eq(CLUSTER_NAME))).thenReturn(Collections.singletonList(RESOURCE_NAME));
    when(mockHelixAdmin.getResourceIdealState(eq(CLUSTER_NAME), eq(RESOURCE_NAME))).thenReturn(idealState);
    when(mockHelixAdmin.setInstanceConfig(any(), any(), any())).then(invocation -> {
        instanceConfig.set(invocation.getArgument(2));
        return true;
    });
    HelixManager mockHelixManager = mock(HelixManager.class);
    when(mockHelixManager.getClusterManagmentTool()).thenReturn(mockHelixAdmin);
    HelixFactory mockHelixFactory = new HelixFactory() {

        @Override
        public HelixManager getZKHelixManager(String clusterName, String instanceName, InstanceType instanceType, String zkAddr) {
            return mockHelixManager;
        }
    };
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockParticipant = new MockHelixParticipant(clusterMapConfig, mockHelixFactory);
    mockParticipant.overrideDisableReplicaMethod = false;
    ReplicaStatusDelegate replicaStatusDelegate = new ReplicaStatusDelegate(mockParticipant);
    BlobStore testStore = createBlobStore(getMockAmbryReplica(clusterMapConfig, tempDirStr), new StoreConfig(new VerifiableProperties(properties)), Collections.singletonList(replicaStatusDelegate));
    testStore.start();
    assertTrue("Store should start successfully", testStore.isStarted());
    // create corrupted write set
    MessageInfo corruptedInfo = new MessageInfo(getUniqueId(), PUT_RECORD_SIZE, Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), Utils.Infinite_Time);
    MessageWriteSet corruptedWriteSet = new MockMessageWriteSet(Collections.singletonList(corruptedInfo), Collections.singletonList(ByteBuffer.allocate(PUT_RECORD_SIZE)), new StoreException(StoreException.IO_ERROR_STR, StoreErrorCodes.IOError));
    // 1. mock failure case
    when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).thenReturn(null);
    // trigger store exception when calling store.put()
    try {
        testStore.put(corruptedWriteSet);
        fail("should throw exception");
    } catch (StoreException e) {
        assertEquals("Mismatch in error code", StoreErrorCodes.IOError, e.getErrorCode());
    }
    assertNull("Disabled partition list should be null as disabling replica didn't succeed", instanceConfig.get().getDisabledPartitions(RESOURCE_NAME));
    // 2. mock success case
    when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).then(invocation -> instanceConfig.get());
    testStore.start();
    assertTrue("Store should start successfully", testStore.isStarted());
    try {
        testStore.put(corruptedWriteSet);
        fail("should throw exception");
    } catch (StoreException e) {
        assertEquals("Mismatch in error code", StoreErrorCodes.IOError, e.getErrorCode());
    }
    assertEquals("Disabled partition name is not expected", storeId, instanceConfig.get().getDisabledPartitions(RESOURCE_NAME).get(0));
    // verify "DISABLED" list in InstanceConfig has correct partition id.
    assertEquals("Disabled replica list is not expected", Collections.singletonList(storeId), getDisabledReplicas(instanceConfig.get()));
    // 3. mock disk is replaced case, restart should succeed
    testStore.start();
    assertNull("Disabled partition list should be null as restart will enable same replica", instanceConfig.get().getDisabledPartitions(RESOURCE_NAME));
    assertTrue("Disabled replica list should be empty", getDisabledReplicas(instanceConfig.get()).isEmpty());
    testStore.shutdown();
    reloadStore();
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HelixAdmin(org.apache.helix.HelixAdmin) IdealState(org.apache.helix.model.IdealState) TestUtils(com.github.ambry.utils.TestUtils) TestUtils(com.github.ambry.clustermap.TestUtils) ReplicaStatusDelegate(com.github.ambry.clustermap.ReplicaStatusDelegate) InstanceConfig(org.apache.helix.model.InstanceConfig) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) List(java.util.List) ArrayList(java.util.ArrayList) InstanceType(org.apache.helix.InstanceType) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) HelixManager(org.apache.helix.HelixManager) HelixFactory(com.github.ambry.clustermap.HelixFactory) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) AtomicReference(java.util.concurrent.atomic.AtomicReference) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) JSONObject(org.json.JSONObject) StoreConfig(com.github.ambry.config.StoreConfig) Test(org.junit.Test)

Example 25 with StoreConfig

use of com.github.ambry.config.StoreConfig in project ambry by linkedin.

the class BlobStoreTest method catchStoreExceptionAndVerifyErrorCode.

// helpers
// general
/**
 * Verify store method can capture store exception and correctly handle it. The method also verifies that if exception
 * is really caused by disk I/O error, store shutdown process would skip any disk flush operation and no clean shutdown
 * file should exist in directory.
 * @param methodCaller the method caller to invoke store methods to trigger store exception
 * @throws StoreException
 */
private void catchStoreExceptionAndVerifyErrorCode(StoreMethodCaller methodCaller) throws StoreException {
    properties.put("store.io.error.count.to.trigger.shutdown", "1");
    ReplicaStatusDelegate mockDelegate = mock(ReplicaStatusDelegate.class);
    MockBlobStore mockBlobStore = new MockBlobStore(getMockReplicaId(tempDirStr), new StoreConfig(new VerifiableProperties(properties)), Collections.singletonList(mockDelegate), new StoreMetrics(new MetricRegistry()));
    // First, verify that a normal shutdown will create a clean shutdown file in the store directory.
    mockBlobStore.start();
    mockBlobStore.shutdown();
    File shutdownFile = new File(tempDir, PersistentIndex.CLEAN_SHUTDOWN_FILENAME);
    assertTrue("Clean shutdown file should exist", shutdownFile.exists());
    mockBlobStore.start();
    // Second, verify that store won't be shut down if Unknown_Error occurred.
    StoreException storeExceptionInIndex = new StoreException("Mock Unknown error", StoreErrorCodes.Unknown_Error);
    mockBlobStore.setPersistentIndex(storeExceptionInIndex);
    try {
        methodCaller.invoke(mockBlobStore);
        fail("should fail");
    } catch (StoreException e) {
        assertEquals("Mismatch in StoreErrorCode", StoreErrorCodes.Unknown_Error, e.getErrorCode());
    }
    assertTrue("Store should not be shut down", mockBlobStore.isStarted());
    assertEquals("Mismatch in store io error count", 0, mockBlobStore.getErrorCount().get());
    // Third, verify that store will be shut down if IOError occurred (disk I/O error)
    storeExceptionInIndex = new StoreException("Mock disk I/O error", StoreErrorCodes.IOError);
    mockBlobStore.setPersistentIndex(storeExceptionInIndex);
    try {
        methodCaller.invoke(mockBlobStore);
        fail("should fail");
    } catch (StoreException e) {
        assertEquals("Mismatch in StoreErrorCode", StoreErrorCodes.IOError, e.getErrorCode());
    }
    assertFalse("Store should be shutdown after error count exceeded threshold", mockBlobStore.isStarted());
    // In the end, verify that store shutdown would skip any disk flush operation if it is triggered by a real disk I/O error.
    assertFalse("When encountering disk I/O error, clean shutdown file shouldn't exist", shutdownFile.exists());
}
Also used : ReplicaStatusDelegate(com.github.ambry.clustermap.ReplicaStatusDelegate) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) StoreConfig(com.github.ambry.config.StoreConfig) File(java.io.File)

Aggregations

StoreConfig (com.github.ambry.config.StoreConfig)60 VerifiableProperties (com.github.ambry.config.VerifiableProperties)50 MetricRegistry (com.codahale.metrics.MetricRegistry)34 Test (org.junit.Test)29 File (java.io.File)18 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)17 ArrayList (java.util.ArrayList)15 Properties (java.util.Properties)15 ClusterMap (com.github.ambry.clustermap.ClusterMap)10 BlobIdFactory (com.github.ambry.commons.BlobIdFactory)9 InMemAccountService (com.github.ambry.account.InMemAccountService)8 DataNodeId (com.github.ambry.clustermap.DataNodeId)8 CountDownLatch (java.util.concurrent.CountDownLatch)8 MockTime (com.github.ambry.utils.MockTime)7 HashSet (java.util.HashSet)7 ClusterAgentsFactory (com.github.ambry.clustermap.ClusterAgentsFactory)6 ReplicaId (com.github.ambry.clustermap.ReplicaId)6 ReplicaStatusDelegate (com.github.ambry.clustermap.ReplicaStatusDelegate)6 DiskManagerConfig (com.github.ambry.config.DiskManagerConfig)6 ReplicationConfig (com.github.ambry.config.ReplicationConfig)6