use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.
the class SeekableStreamSupervisor method verifyAndMergeCheckpoints.
/**
* This method does two things -
* 1. Makes sure the checkpoints information in the taskGroup is consistent with that of the tasks, if not kill
* inconsistent tasks.
* 2. truncates the checkpoints in the taskGroup corresponding to which segments have been published, so that any newly
* created tasks for the taskGroup start indexing from after the latest published sequences.
*/
private void verifyAndMergeCheckpoints(final TaskGroup taskGroup) {
final int groupId = taskGroup.groupId;
final List<Pair<String, TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>>>> taskSequences = new ArrayList<>();
final List<ListenableFuture<TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>>>> futures = new ArrayList<>();
final List<String> taskIds = new ArrayList<>();
for (String taskId : taskGroup.taskIds()) {
final ListenableFuture<TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>>> checkpointsFuture = taskClient.getCheckpointsAsync(taskId, true);
futures.add(checkpointsFuture);
taskIds.add(taskId);
}
try {
List<TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>>> futuresResult = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
for (int i = 0; i < futuresResult.size(); i++) {
final TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>> checkpoints = futuresResult.get(i);
final String taskId = taskIds.get(i);
if (checkpoints == null) {
try {
// catch the exception in failed futures
futures.get(i).get();
} catch (Exception e) {
stateManager.recordThrowableEvent(e);
log.error(e, "Problem while getting checkpoints for task [%s], killing the task", taskId);
killTask(taskId, "Exception[%s] while getting checkpoints", e.getClass());
taskGroup.tasks.remove(taskId);
}
} else if (checkpoints.isEmpty()) {
log.warn("Ignoring task [%s], as probably it is not started running yet", taskId);
} else {
taskSequences.add(new Pair<>(taskId, checkpoints));
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
final DataSourceMetadata rawDataSourceMetadata = indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(dataSource);
if (rawDataSourceMetadata != null && !checkSourceMetadataMatch(rawDataSourceMetadata)) {
throw new IAE("Datasource metadata instance does not match required, found instance of [%s]", rawDataSourceMetadata.getClass());
}
@SuppressWarnings("unchecked") final SeekableStreamDataSourceMetadata<PartitionIdType, SequenceOffsetType> latestDataSourceMetadata = (SeekableStreamDataSourceMetadata<PartitionIdType, SequenceOffsetType>) rawDataSourceMetadata;
final boolean hasValidOffsetsFromDb = latestDataSourceMetadata != null && latestDataSourceMetadata.getSeekableStreamSequenceNumbers() != null && ioConfig.getStream().equals(latestDataSourceMetadata.getSeekableStreamSequenceNumbers().getStream());
final Map<PartitionIdType, SequenceOffsetType> latestOffsetsFromDb;
if (hasValidOffsetsFromDb) {
latestOffsetsFromDb = latestDataSourceMetadata.getSeekableStreamSequenceNumbers().getPartitionSequenceNumberMap();
} else {
latestOffsetsFromDb = null;
}
// order tasks of this taskGroup by the latest sequenceId
taskSequences.sort((o1, o2) -> o2.rhs.firstKey().compareTo(o1.rhs.firstKey()));
final Set<String> tasksToKill = new HashSet<>();
final AtomicInteger earliestConsistentSequenceId = new AtomicInteger(-1);
int taskIndex = 0;
while (taskIndex < taskSequences.size()) {
TreeMap<Integer, Map<PartitionIdType, SequenceOffsetType>> taskCheckpoints = taskSequences.get(taskIndex).rhs;
String taskId = taskSequences.get(taskIndex).lhs;
if (earliestConsistentSequenceId.get() == -1) {
// store
if (taskCheckpoints.entrySet().stream().anyMatch(sequenceCheckpoint -> sequenceCheckpoint.getValue().entrySet().stream().allMatch(partitionOffset -> {
OrderedSequenceNumber<SequenceOffsetType> sequence = makeSequenceNumber(partitionOffset.getValue());
OrderedSequenceNumber<SequenceOffsetType> latestOffset = makeSequenceNumber(latestOffsetsFromDb == null ? partitionOffset.getValue() : latestOffsetsFromDb.getOrDefault(partitionOffset.getKey(), partitionOffset.getValue()));
return sequence.compareTo(latestOffset) == 0;
}) && earliestConsistentSequenceId.compareAndSet(-1, sequenceCheckpoint.getKey())) || (pendingCompletionTaskGroups.getOrDefault(groupId, new CopyOnWriteArrayList<>()).size() > 0 && earliestConsistentSequenceId.compareAndSet(-1, taskCheckpoints.firstKey()))) {
final SortedMap<Integer, Map<PartitionIdType, SequenceOffsetType>> latestCheckpoints = new TreeMap<>(taskCheckpoints.tailMap(earliestConsistentSequenceId.get()));
log.info("Setting taskGroup sequences to [%s] for group [%d]", latestCheckpoints, groupId);
taskGroup.checkpointSequences.clear();
taskGroup.checkpointSequences.putAll(latestCheckpoints);
} else {
log.debug("Adding task [%s] to kill list, checkpoints[%s], latestoffsets from DB [%s]", taskId, taskCheckpoints, latestOffsetsFromDb);
tasksToKill.add(taskId);
}
} else {
// check consistency with taskGroup sequences
if (taskCheckpoints.get(taskGroup.checkpointSequences.firstKey()) == null || !(taskCheckpoints.get(taskGroup.checkpointSequences.firstKey()).equals(taskGroup.checkpointSequences.firstEntry().getValue())) || taskCheckpoints.tailMap(taskGroup.checkpointSequences.firstKey()).size() != taskGroup.checkpointSequences.size()) {
log.debug("Adding task [%s] to kill list, checkpoints[%s], taskgroup checkpoints [%s]", taskId, taskCheckpoints, taskGroup.checkpointSequences);
tasksToKill.add(taskId);
}
}
taskIndex++;
}
if ((tasksToKill.size() > 0 && tasksToKill.size() == taskGroup.tasks.size()) || (taskGroup.tasks.size() == 0 && pendingCompletionTaskGroups.getOrDefault(groupId, new CopyOnWriteArrayList<>()).size() == 0)) {
// killing all tasks or no task left in the group ?
// clear state about the taskgroup so that get latest sequence information is fetched from metadata store
log.warn("Clearing task group [%d] information as no valid tasks left the group", groupId);
activelyReadingTaskGroups.remove(groupId);
for (PartitionIdType partitionId : taskGroup.startingSequences.keySet()) {
partitionOffsets.put(partitionId, getNotSetMarker());
}
}
taskSequences.stream().filter(taskIdSequences -> tasksToKill.contains(taskIdSequences.lhs)).forEach(sequenceCheckpoint -> {
killTask(sequenceCheckpoint.lhs, "Killing task [%s], as its checkpoints [%s] are not consistent with group checkpoints[%s] or latest " + "persisted sequences in metadata store [%s]", sequenceCheckpoint.lhs, sequenceCheckpoint.rhs, taskGroup.checkpointSequences, latestOffsetsFromDb);
taskGroup.tasks.remove(sequenceCheckpoint.lhs);
});
}
use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinatorTest method testTransactionalAnnounceRetryAndSuccess.
@Test
public void testTransactionalAnnounceRetryAndSuccess() throws IOException {
final AtomicLong attemptCounter = new AtomicLong();
final IndexerSQLMetadataStorageCoordinator failOnceCoordinator = new IndexerSQLMetadataStorageCoordinator(mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector) {
@Override
protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle(Handle handle, String dataSource, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata) throws IOException {
metadataUpdateCounter.getAndIncrement();
if (attemptCounter.getAndIncrement() == 0) {
return DataStoreMetadataUpdateResult.TRY_AGAIN;
} else {
return super.updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata);
}
}
};
// Insert first segment.
final SegmentPublishResult result1 = failOnceCoordinator.announceHistoricalSegments(ImmutableSet.of(defaultSegment), ImmutableSet.of(), new ObjectMetadata(null), new ObjectMetadata(ImmutableMap.of("foo", "bar")));
Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment)), result1);
Assert.assertArrayEquals(mapper.writeValueAsString(defaultSegment).getBytes(StandardCharsets.UTF_8), derbyConnector.lookup(derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(), "id", "payload", defaultSegment.getId().toString()));
// Reset attempt counter to induce another failure.
attemptCounter.set(0);
// Insert second segment.
final SegmentPublishResult result2 = failOnceCoordinator.announceHistoricalSegments(ImmutableSet.of(defaultSegment2), ImmutableSet.of(), new ObjectMetadata(ImmutableMap.of("foo", "bar")), new ObjectMetadata(ImmutableMap.of("foo", "baz")));
Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment2)), result2);
Assert.assertArrayEquals(mapper.writeValueAsString(defaultSegment2).getBytes(StandardCharsets.UTF_8), derbyConnector.lookup(derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(), "id", "payload", defaultSegment2.getId().toString()));
// Examine metadata.
Assert.assertEquals(new ObjectMetadata(ImmutableMap.of("foo", "baz")), failOnceCoordinator.retrieveDataSourceMetadata("fooDataSource"));
// Should be tried twice per call.
Assert.assertEquals(4, metadataUpdateCounter.get());
}
Aggregations