use of org.apache.kafka.streams.errors.ProcessorStateException in project kafka by apache.
the class ProcessorStateManager method initializeStoreOffsetsFromCheckpoint.
// package-private for test only
void initializeStoreOffsetsFromCheckpoint(final boolean storeDirIsEmpty) {
try {
final Map<TopicPartition, Long> loadedCheckpoints = checkpointFile.read();
log.trace("Loaded offsets from the checkpoint file: {}", loadedCheckpoints);
for (final StateStoreMetadata store : stores.values()) {
if (store.corrupted) {
log.error("Tried to initialize store offsets for corrupted store {}", store);
throw new IllegalStateException("Should not initialize offsets for a corrupted task");
}
if (store.changelogPartition == null) {
log.info("State store {} is not logged and hence would not be restored", store.stateStore.name());
} else if (!store.stateStore.persistent()) {
log.info("Initializing to the starting offset for changelog {} of in-memory state store {}", store.changelogPartition, store.stateStore.name());
} else if (store.offset() == null) {
if (loadedCheckpoints.containsKey(store.changelogPartition)) {
final Long offset = changelogOffsetFromCheckpointedOffset(loadedCheckpoints.remove(store.changelogPartition));
store.setOffset(offset);
log.debug("State store {} initialized from checkpoint with offset {} at changelog {}", store.stateStore.name(), store.offset, store.changelogPartition);
} else {
// in that case we need to treat it as a task-corrupted exception
if (eosEnabled && !storeDirIsEmpty) {
log.warn("State store {} did not find checkpoint offsets while stores are not empty, " + "since under EOS it has the risk of getting uncommitted data in stores we have to " + "treat it as a task corruption error and wipe out the local state of task {} " + "before re-bootstrapping", store.stateStore.name(), taskId);
throw new TaskCorruptedException(Collections.singleton(taskId));
} else {
log.info("State store {} did not find checkpoint offset, hence would " + "default to the starting offset at changelog {}", store.stateStore.name(), store.changelogPartition);
}
}
} else {
loadedCheckpoints.remove(store.changelogPartition);
log.debug("Skipping re-initialization of offset from checkpoint for recycled store {}", store.stateStore.name());
}
}
if (!loadedCheckpoints.isEmpty()) {
log.warn("Some loaded checkpoint offsets cannot find their corresponding state stores: {}", loadedCheckpoints);
}
if (eosEnabled) {
checkpointFile.delete();
}
} catch (final TaskCorruptedException e) {
throw e;
} catch (final IOException | RuntimeException e) {
// both IOException or runtime exception like number parsing can throw
throw new ProcessorStateException(format("%sError loading and deleting checkpoint file when creating the state manager", logPrefix), e);
}
}
use of org.apache.kafka.streams.errors.ProcessorStateException in project kafka by apache.
the class ProcessorStateManager method restore.
// used by the changelog reader only
void restore(final StateStoreMetadata storeMetadata, final List<ConsumerRecord<byte[], byte[]>> restoreRecords) {
if (!stores.containsValue(storeMetadata)) {
throw new IllegalStateException("Restoring " + storeMetadata + " which is not registered in this state manager, " + "this should not happen.");
}
if (!restoreRecords.isEmpty()) {
// restore states from changelog records and update the snapshot offset as the batch end record's offset
final Long batchEndOffset = restoreRecords.get(restoreRecords.size() - 1).offset();
final RecordBatchingStateRestoreCallback restoreCallback = adapt(storeMetadata.restoreCallback);
final List<ConsumerRecord<byte[], byte[]>> convertedRecords = restoreRecords.stream().map(storeMetadata.recordConverter::convert).collect(Collectors.toList());
try {
restoreCallback.restoreBatch(convertedRecords);
} catch (final RuntimeException e) {
throw new ProcessorStateException(format("%sException caught while trying to restore state from %s", logPrefix, storeMetadata.changelogPartition), e);
}
storeMetadata.setOffset(batchEndOffset);
}
}
use of org.apache.kafka.streams.errors.ProcessorStateException in project kafka by apache.
the class StateDirectory method getOrCreateDirectoryForTask.
/**
* Get or create the directory for the provided {@link TaskId}.
* @return directory for the {@link TaskId}
* @throws ProcessorStateException if the task directory does not exist and could not be created
*/
public File getOrCreateDirectoryForTask(final TaskId taskId) {
final File taskParentDir = getTaskDirectoryParentName(taskId);
final File taskDir = new File(taskParentDir, StateManagerUtil.toTaskDirString(taskId));
if (hasPersistentStores) {
if (!taskDir.exists()) {
synchronized (taskDirCreationLock) {
// and the blocking one fails when trying to create it after it's unblocked
if (!taskParentDir.exists() && !taskParentDir.mkdir()) {
throw new ProcessorStateException(String.format("Parent [%s] of task directory [%s] doesn't exist and couldn't be created", taskParentDir.getPath(), taskDir.getPath()));
}
if (!taskDir.exists() && !taskDir.mkdir()) {
throw new ProcessorStateException(String.format("task directory [%s] doesn't exist and couldn't be created", taskDir.getPath()));
}
}
} else if (!taskDir.isDirectory()) {
throw new ProcessorStateException(String.format("state directory [%s] can't be created as there is an existing file with the same name", taskDir.getPath()));
}
}
return taskDir;
}
use of org.apache.kafka.streams.errors.ProcessorStateException in project kafka by apache.
the class StateManagerUtil method closeStateManager.
/**
* @throws ProcessorStateException if there is an error while closing the state manager
*/
static void closeStateManager(final Logger log, final String logPrefix, final boolean closeClean, final boolean eosEnabled, final ProcessorStateManager stateMgr, final StateDirectory stateDirectory, final TaskType taskType) {
// if EOS is enabled, wipe out the whole state store for unclean close since it is now invalid
final boolean wipeStateStore = !closeClean && eosEnabled;
final TaskId id = stateMgr.taskId();
log.trace("Closing state manager for {} task {}", taskType, id);
final AtomicReference<ProcessorStateException> firstException = new AtomicReference<>(null);
try {
if (stateDirectory.lock(id)) {
try {
stateMgr.close();
} catch (final ProcessorStateException e) {
firstException.compareAndSet(null, e);
} finally {
try {
if (wipeStateStore) {
log.debug("Wiping state stores for {} task {}", taskType, id);
// we can just delete the whole dir of the task, including the state store images and the checkpoint files,
// and then we write an empty checkpoint file indicating that the previous close is graceful and we just
// need to re-bootstrap the restoration from the beginning
Utils.delete(stateMgr.baseDir());
}
} finally {
stateDirectory.unlock(id);
}
}
}
} catch (final IOException e) {
final ProcessorStateException exception = new ProcessorStateException(String.format("%sFatal error while trying to close the state manager for task %s", logPrefix, id), e);
firstException.compareAndSet(null, exception);
}
final ProcessorStateException exception = firstException.get();
if (exception != null) {
throw exception;
}
}
use of org.apache.kafka.streams.errors.ProcessorStateException in project kafka by apache.
the class ProcessorStateManagerTest method shouldThrowOnFailureToWritePositionCheckpointFile.
@Test
public void shouldThrowOnFailureToWritePositionCheckpointFile() throws IOException {
final ProcessorStateManager stateMgr = getStateManager(Task.TaskType.ACTIVE);
final CommitCallback persistentCheckpoint = mock(CommitCallback.class);
persistentCheckpoint.onCommit();
final IOException ioException = new IOException("asdf");
expectLastCall().andThrow(ioException);
replay(persistentCheckpoint);
stateMgr.registerStore(persistentStore, persistentStore.stateRestoreCallback, persistentCheckpoint);
final ProcessorStateException processorStateException = assertThrows(ProcessorStateException.class, stateMgr::checkpoint);
assertThat(processorStateException.getMessage(), containsString("process-state-manager-test Exception caught while trying to checkpoint store," + " changelog partition test-application-My-Topology-persistentStore-changelog-1"));
assertThat(processorStateException.getCause(), is(ioException));
}
Aggregations