use of com.github.ambry.config.StoreConfig in project ambry by linkedin.
the class DumpDataTool method compareIndexEntriesToLogContent.
/**
* Compares every entry in an index file with those in the log. Checks to see if each blob in index is successfully
* deserializable from the log
* @param indexFile the file that represents the index segment.
* @param checkLogEndOffsetMatch if {@code true}, checks that the end offset of the log matches the end offset of the
* index.
* @throws Exception
*/
private void compareIndexEntriesToLogContent(File indexFile, boolean checkLogEndOffsetMatch) throws Exception {
if (!indexFile.exists()) {
throw new IllegalArgumentException("File does not exist " + indexFile);
}
final Timer.Context context = metrics.compareIndexFileToLogTimeMs.time();
try {
logger.info("Dumping index {}", indexFile.getAbsolutePath());
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
StoreConfig config = new StoreConfig(new VerifiableProperties(new Properties()));
MetricRegistry metricRegistry = new MetricRegistry();
StoreMetrics storeMetrics = new StoreMetrics(metricRegistry);
IndexSegment segment = new IndexSegment(indexFile, false, storeKeyFactory, config, storeMetrics, new Journal(indexFile.getParent(), 0, 0), time);
Offset startOffset = segment.getStartOffset();
TreeMap<Long, Long> coveredRanges = new TreeMap<>();
String logFileName = segment.getLogSegmentName().toFilename();
File logFile = new File(indexFile.getParent(), logFileName);
if (!logFile.exists()) {
throw new IllegalStateException("Log file does not exist " + logFile);
}
RandomAccessFile randomAccessFile = new RandomAccessFile(logFile, "r");
long logFileSize = randomAccessFile.getChannel().size();
List<MessageInfo> entries = new ArrayList<>();
segment.getEntriesSince(null, new FindEntriesCondition(Long.MAX_VALUE), entries, new AtomicLong(0), false);
for (MessageInfo entry : entries) {
StoreKey key = entry.getStoreKey();
IndexValue value = segment.find(key).last();
boolean isDeleted = value.isFlagSet(IndexValue.Flags.Delete_Index);
if (value.getOffset().getOffset() < logFileSize) {
boolean success = readFromLogAndVerify(randomAccessFile, key.getID(), value, coveredRanges);
if (success) {
if (isDeleted) {
long originalOffset = value.getOriginalMessageOffset();
if (originalOffset != -1) {
if (!coveredRanges.containsKey(originalOffset)) {
if (startOffset.getOffset() > originalOffset) {
logger.trace("Put Record at {} with delete msg offset {} ignored because it is prior to startOffset {}", originalOffset, value.getOffset(), startOffset);
} else {
try {
DumpDataHelper.LogBlobRecordInfo logBlobRecordInfo = DumpDataHelper.readSingleRecordFromLog(randomAccessFile, originalOffset, clusterMap, currentTimeInMs, metrics);
coveredRanges.put(originalOffset, originalOffset + logBlobRecordInfo.totalRecordSize);
logger.trace("PUT Record {} with start offset {} and end offset {} for a delete msg {} at offset {} ", logBlobRecordInfo.blobId, originalOffset, (originalOffset + logBlobRecordInfo.totalRecordSize), key.getID(), value.getOffset());
if (!logBlobRecordInfo.blobId.getID().equals(key.getID())) {
logger.error("BlobId value mismatch between delete record {} and put record {}", key.getID(), logBlobRecordInfo.blobId.getID());
}
} catch (IllegalArgumentException e) {
metrics.logDeserializationError.inc();
logger.error("Illegal arg exception thrown at {}, while reading blob starting at offset {} with exception: ", randomAccessFile.getChannel().position(), originalOffset, e);
} catch (MessageFormatException e) {
metrics.logDeserializationError.inc();
logger.error("MessageFormat exception thrown at {} while reading blob starting at offset {} with exception: ", randomAccessFile.getChannel().position(), originalOffset, e);
} catch (EOFException e) {
metrics.endOfFileOnDumpLogError.inc();
logger.error("EOFException thrown at {} ", randomAccessFile.getChannel().position(), e);
} catch (Exception e) {
metrics.unknownErrorOnDumpIndex.inc();
logger.error("Unknown exception thrown {} ", e.getMessage(), e);
}
}
}
}
}
} else {
metrics.indexToLogBlobRecordComparisonFailure.inc();
logger.error("Failed for key {} with value {} ", key, value);
}
} else {
logger.trace("Blob's {} offset {} is outside of log size {}, with a diff of {}", key, value.getOffset().getOffset(), logFileSize, (value.getOffset().getOffset() - logFileSize));
}
}
throttler.maybeThrottle(entries.size());
long indexEndOffset = segment.getEndOffset().getOffset();
if (checkLogEndOffsetMatch && indexEndOffset != randomAccessFile.length()) {
metrics.indexLogEndOffsetMisMatchError.inc();
logger.error("Log end offset {} and index end offset {} do not match", randomAccessFile.length(), indexEndOffset);
}
logRangesNotCovered(coveredRanges, indexEndOffset);
} finally {
context.stop();
}
}
use of com.github.ambry.config.StoreConfig in project ambry by linkedin.
the class StoreCopier method main.
public static void main(String[] args) throws Exception {
VerifiableProperties properties = ToolUtils.getVerifiableProperties(args);
CopierConfig config = new CopierConfig(properties);
StoreConfig storeConfig = new StoreConfig(properties);
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(properties);
ClusterAgentsFactory clusterAgentsFactory = Utils.getObj(clusterMapConfig.clusterMapClusterAgentsFactory, clusterMapConfig, config.hardwareLayoutFilePath, config.partitionLayoutFilePath);
try (ClusterMap clusterMap = clusterAgentsFactory.getClusterMap()) {
StoreKeyFactory storeKeyFactory = Utils.getObj(storeConfig.storeKeyFactory, clusterMap);
File srcDir = new File(config.srcStoreDirPath);
File tgtDir = new File(config.tgtStoreDirPath);
StoreMetrics metrics = new StoreMetrics(clusterMap.getMetricRegistry());
DiskSpaceAllocator diskSpaceAllocator = new DiskSpaceAllocator(false, null, 0, new StorageManagerMetrics(clusterMap.getMetricRegistry()));
try (StoreCopier storeCopier = new StoreCopier("src", srcDir, tgtDir, config.storeCapacity, config.fetchSizeInBytes, storeConfig, metrics, storeKeyFactory, new DiskIOScheduler(null), diskSpaceAllocator, Collections.emptyList(), SystemTime.getInstance())) {
storeCopier.copy(new StoreFindTokenFactory(storeKeyFactory).getNewFindToken());
}
}
}
use of com.github.ambry.config.StoreConfig in project ambry by linkedin.
the class MockReplicationManager method getReplicationManager.
/**
* Static construction helper
* @param verifiableProperties the {@link VerifiableProperties} to use for config.
* @param storageManager the {@link StorageManager} to use.
* @param clusterMap the {@link ClusterMap} to use.
* @param dataNodeId the {@link DataNodeId} to use.
* @param storeKeyConverterFactory the {@link StoreKeyConverterFactory} to use.
* @return an instance of {@link MockReplicationManager}
* @throws ReplicationException
*/
public static MockReplicationManager getReplicationManager(VerifiableProperties verifiableProperties, StorageManager storageManager, ClusterMap clusterMap, DataNodeId dataNodeId, StoreKeyConverterFactory storeKeyConverterFactory) throws ReplicationException {
ReplicationConfig replicationConfig = new ReplicationConfig(verifiableProperties);
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
StoreConfig storeConfig = new StoreConfig(verifiableProperties);
return new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, dataNodeId, storeKeyConverterFactory, null);
}
use of com.github.ambry.config.StoreConfig in project ambry by linkedin.
the class BlobStoreTest method storeErrorTriggerDisableReplicaTest.
/**
* Test that replica is correctly disabled when store is shut down due to disk I/O error.
* @throws Exception
*/
@Test
public void storeErrorTriggerDisableReplicaTest() throws Exception {
final String RESOURCE_NAME = "0";
final String CLUSTER_NAME = "BlobStoreTest";
// setup testing environment
store.shutdown();
List<TestUtils.ZkInfo> zkInfoList = new ArrayList<>();
zkInfoList.add(new TestUtils.ZkInfo(null, "DC1", (byte) 0, 2199, false));
JSONObject zkJson = constructZkLayoutJSON(zkInfoList);
properties.setProperty("clustermap.cluster.name", CLUSTER_NAME);
properties.setProperty("clustermap.datacenter.name", "DC1");
properties.setProperty("clustermap.host.name", "localhost");
properties.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
properties.setProperty("store.io.error.count.to.trigger.shutdown", "1");
properties.setProperty("store.replica.status.delegate.enable", "true");
properties.setProperty("store.set.local.partition.state.enabled", "true");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
AtomicReference<InstanceConfig> instanceConfig = new AtomicReference<>(new InstanceConfig("localhost"));
instanceConfig.get().setPort("2222");
Map<String, List<String>> listMap = new HashMap<>();
listMap.put(storeId, null);
ZNRecord znRecord = new ZNRecord("localhost");
znRecord.setListFields(listMap);
IdealState idealState = new IdealState(znRecord);
idealState.setRebalanceMode(IdealState.RebalanceMode.SEMI_AUTO);
// mock helix related components
HelixAdmin mockHelixAdmin = mock(HelixAdmin.class);
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).then(invocation -> instanceConfig.get());
when(mockHelixAdmin.getResourcesInCluster(eq(CLUSTER_NAME))).thenReturn(Collections.singletonList(RESOURCE_NAME));
when(mockHelixAdmin.getResourceIdealState(eq(CLUSTER_NAME), eq(RESOURCE_NAME))).thenReturn(idealState);
when(mockHelixAdmin.setInstanceConfig(any(), any(), any())).then(invocation -> {
instanceConfig.set(invocation.getArgument(2));
return true;
});
HelixManager mockHelixManager = mock(HelixManager.class);
when(mockHelixManager.getClusterManagmentTool()).thenReturn(mockHelixAdmin);
HelixFactory mockHelixFactory = new HelixFactory() {
@Override
public HelixManager getZKHelixManager(String clusterName, String instanceName, InstanceType instanceType, String zkAddr) {
return mockHelixManager;
}
};
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockParticipant = new MockHelixParticipant(clusterMapConfig, mockHelixFactory);
mockParticipant.overrideDisableReplicaMethod = false;
ReplicaStatusDelegate replicaStatusDelegate = new ReplicaStatusDelegate(mockParticipant);
BlobStore testStore = createBlobStore(getMockAmbryReplica(clusterMapConfig, tempDirStr), new StoreConfig(new VerifiableProperties(properties)), Collections.singletonList(replicaStatusDelegate));
testStore.start();
assertTrue("Store should start successfully", testStore.isStarted());
// create corrupted write set
MessageInfo corruptedInfo = new MessageInfo(getUniqueId(), PUT_RECORD_SIZE, Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), Utils.Infinite_Time);
MessageWriteSet corruptedWriteSet = new MockMessageWriteSet(Collections.singletonList(corruptedInfo), Collections.singletonList(ByteBuffer.allocate(PUT_RECORD_SIZE)), new StoreException(StoreException.IO_ERROR_STR, StoreErrorCodes.IOError));
// 1. mock failure case
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).thenReturn(null);
// trigger store exception when calling store.put()
try {
testStore.put(corruptedWriteSet);
fail("should throw exception");
} catch (StoreException e) {
assertEquals("Mismatch in error code", StoreErrorCodes.IOError, e.getErrorCode());
}
assertNull("Disabled partition list should be null as disabling replica didn't succeed", instanceConfig.get().getDisabledPartitions(RESOURCE_NAME));
// 2. mock success case
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).then(invocation -> instanceConfig.get());
testStore.start();
assertTrue("Store should start successfully", testStore.isStarted());
try {
testStore.put(corruptedWriteSet);
fail("should throw exception");
} catch (StoreException e) {
assertEquals("Mismatch in error code", StoreErrorCodes.IOError, e.getErrorCode());
}
assertEquals("Disabled partition name is not expected", storeId, instanceConfig.get().getDisabledPartitions(RESOURCE_NAME).get(0));
// verify "DISABLED" list in InstanceConfig has correct partition id.
assertEquals("Disabled replica list is not expected", Collections.singletonList(storeId), getDisabledReplicas(instanceConfig.get()));
// 3. mock disk is replaced case, restart should succeed
testStore.start();
assertNull("Disabled partition list should be null as restart will enable same replica", instanceConfig.get().getDisabledPartitions(RESOURCE_NAME));
assertTrue("Disabled replica list should be empty", getDisabledReplicas(instanceConfig.get()).isEmpty());
testStore.shutdown();
reloadStore();
}
use of com.github.ambry.config.StoreConfig in project ambry by linkedin.
the class BlobStoreTest method catchStoreExceptionAndVerifyErrorCode.
// helpers
// general
/**
* Verify store method can capture store exception and correctly handle it. The method also verifies that if exception
* is really caused by disk I/O error, store shutdown process would skip any disk flush operation and no clean shutdown
* file should exist in directory.
* @param methodCaller the method caller to invoke store methods to trigger store exception
* @throws StoreException
*/
private void catchStoreExceptionAndVerifyErrorCode(StoreMethodCaller methodCaller) throws StoreException {
properties.put("store.io.error.count.to.trigger.shutdown", "1");
ReplicaStatusDelegate mockDelegate = mock(ReplicaStatusDelegate.class);
MockBlobStore mockBlobStore = new MockBlobStore(getMockReplicaId(tempDirStr), new StoreConfig(new VerifiableProperties(properties)), Collections.singletonList(mockDelegate), new StoreMetrics(new MetricRegistry()));
// First, verify that a normal shutdown will create a clean shutdown file in the store directory.
mockBlobStore.start();
mockBlobStore.shutdown();
File shutdownFile = new File(tempDir, PersistentIndex.CLEAN_SHUTDOWN_FILENAME);
assertTrue("Clean shutdown file should exist", shutdownFile.exists());
mockBlobStore.start();
// Second, verify that store won't be shut down if Unknown_Error occurred.
StoreException storeExceptionInIndex = new StoreException("Mock Unknown error", StoreErrorCodes.Unknown_Error);
mockBlobStore.setPersistentIndex(storeExceptionInIndex);
try {
methodCaller.invoke(mockBlobStore);
fail("should fail");
} catch (StoreException e) {
assertEquals("Mismatch in StoreErrorCode", StoreErrorCodes.Unknown_Error, e.getErrorCode());
}
assertTrue("Store should not be shut down", mockBlobStore.isStarted());
assertEquals("Mismatch in store io error count", 0, mockBlobStore.getErrorCount().get());
// Third, verify that store will be shut down if IOError occurred (disk I/O error)
storeExceptionInIndex = new StoreException("Mock disk I/O error", StoreErrorCodes.IOError);
mockBlobStore.setPersistentIndex(storeExceptionInIndex);
try {
methodCaller.invoke(mockBlobStore);
fail("should fail");
} catch (StoreException e) {
assertEquals("Mismatch in StoreErrorCode", StoreErrorCodes.IOError, e.getErrorCode());
}
assertFalse("Store should be shutdown after error count exceeded threshold", mockBlobStore.isStarted());
// In the end, verify that store shutdown would skip any disk flush operation if it is triggered by a real disk I/O error.
assertFalse("When encountering disk I/O error, clean shutdown file shouldn't exist", shutdownFile.exists());
}
Aggregations