use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinator method updateDataSourceMetadataWithHandle.
/**
* Compare-and-swap dataSource metadata in a transaction. This will only modify dataSource metadata if it equals
* oldCommitMetadata when this function is called (based on T.equals). This method is idempotent in that if
* the metadata already equals newCommitMetadata, it will return true.
*
* @param handle database handle
* @param dataSource druid dataSource
* @param startMetadata dataSource metadata pre-insert must match this startMetadata according to
* {@link DataSourceMetadata#matches(DataSourceMetadata)}
* @param endMetadata dataSource metadata post-insert will have this endMetadata merged in with
* {@link DataSourceMetadata#plus(DataSourceMetadata)}
*
* @return SUCCESS if dataSource metadata was updated from matching startMetadata to matching endMetadata, FAILURE or
* TRY_AGAIN if it definitely was not updated. This guarantee is meant to help
* {@link #announceHistoricalSegments(Set, Set, DataSourceMetadata, DataSourceMetadata)}
* achieve its own guarantee.
*
* @throws RuntimeException if state is unknown after this call
*/
protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle(final Handle handle, final String dataSource, final DataSourceMetadata startMetadata, final DataSourceMetadata endMetadata) throws IOException {
Preconditions.checkNotNull(dataSource, "dataSource");
Preconditions.checkNotNull(startMetadata, "startMetadata");
Preconditions.checkNotNull(endMetadata, "endMetadata");
final byte[] oldCommitMetadataBytesFromDb = retrieveDataSourceMetadataWithHandleAsBytes(handle, dataSource);
final String oldCommitMetadataSha1FromDb;
final DataSourceMetadata oldCommitMetadataFromDb;
if (oldCommitMetadataBytesFromDb == null) {
oldCommitMetadataSha1FromDb = null;
oldCommitMetadataFromDb = null;
} else {
oldCommitMetadataSha1FromDb = BaseEncoding.base16().encode(Hashing.sha1().hashBytes(oldCommitMetadataBytesFromDb).asBytes());
oldCommitMetadataFromDb = jsonMapper.readValue(oldCommitMetadataBytesFromDb, DataSourceMetadata.class);
}
final boolean startMetadataMatchesExisting;
if (oldCommitMetadataFromDb == null) {
startMetadataMatchesExisting = startMetadata.isValidStart();
} else {
// Checking against the last committed metadata.
// Converting the last one into start metadata for checking since only the same type of metadata can be matched.
// Even though kafka/kinesis indexing services use different sequenceNumber types for representing
// start and end sequenceNumbers, the below conversion is fine because the new start sequenceNumbers are supposed
// to be same with end sequenceNumbers of the last commit.
startMetadataMatchesExisting = startMetadata.asStartMetadata().matches(oldCommitMetadataFromDb.asStartMetadata());
}
if (!startMetadataMatchesExisting) {
// Not in the desired start state.
log.error("Not updating metadata, existing state[%s] in metadata store doesn't match to the new start state[%s].", oldCommitMetadataFromDb, startMetadata);
return DataStoreMetadataUpdateResult.FAILURE;
}
// Only endOffsets should be stored in metadata store
final DataSourceMetadata newCommitMetadata = oldCommitMetadataFromDb == null ? endMetadata : oldCommitMetadataFromDb.plus(endMetadata);
final byte[] newCommitMetadataBytes = jsonMapper.writeValueAsBytes(newCommitMetadata);
final String newCommitMetadataSha1 = BaseEncoding.base16().encode(Hashing.sha1().hashBytes(newCommitMetadataBytes).asBytes());
final DataStoreMetadataUpdateResult retVal;
if (oldCommitMetadataBytesFromDb == null) {
// SELECT -> INSERT can fail due to races; callers must be prepared to retry.
final int numRows = handle.createStatement(StringUtils.format("INSERT INTO %s (dataSource, created_date, commit_metadata_payload, commit_metadata_sha1) " + "VALUES (:dataSource, :created_date, :commit_metadata_payload, :commit_metadata_sha1)", dbTables.getDataSourceTable())).bind("dataSource", dataSource).bind("created_date", DateTimes.nowUtc().toString()).bind("commit_metadata_payload", newCommitMetadataBytes).bind("commit_metadata_sha1", newCommitMetadataSha1).execute();
retVal = numRows == 1 ? DataStoreMetadataUpdateResult.SUCCESS : DataStoreMetadataUpdateResult.TRY_AGAIN;
} else {
// Expecting a particular old metadata; use the SHA1 in a compare-and-swap UPDATE
final int numRows = handle.createStatement(StringUtils.format("UPDATE %s SET " + "commit_metadata_payload = :new_commit_metadata_payload, " + "commit_metadata_sha1 = :new_commit_metadata_sha1 " + "WHERE dataSource = :dataSource AND commit_metadata_sha1 = :old_commit_metadata_sha1", dbTables.getDataSourceTable())).bind("dataSource", dataSource).bind("old_commit_metadata_sha1", oldCommitMetadataSha1FromDb).bind("new_commit_metadata_payload", newCommitMetadataBytes).bind("new_commit_metadata_sha1", newCommitMetadataSha1).execute();
retVal = numRows == 1 ? DataStoreMetadataUpdateResult.SUCCESS : DataStoreMetadataUpdateResult.TRY_AGAIN;
}
if (retVal == DataStoreMetadataUpdateResult.SUCCESS) {
log.info("Updated metadata from[%s] to[%s].", oldCommitMetadataFromDb, newCommitMetadata);
} else {
log.info("Not updating metadata, compare-and-swap failure.");
}
return retVal;
}
use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.
the class SeekableStreamSupervisor method resetInternal.
@VisibleForTesting
public void resetInternal(DataSourceMetadata dataSourceMetadata) {
if (dataSourceMetadata == null) {
// Reset everything
boolean result = indexerMetadataStorageCoordinator.deleteDataSourceMetadata(dataSource);
log.info("Reset dataSource[%s] - dataSource metadata entry deleted? [%s]", dataSource, result);
activelyReadingTaskGroups.values().forEach(group -> killTasksInGroup(group, "DataSourceMetadata is not found while reset"));
activelyReadingTaskGroups.clear();
partitionGroups.clear();
partitionOffsets.clear();
} else {
if (!checkSourceMetadataMatch(dataSourceMetadata)) {
throw new IAE("Datasource metadata instance does not match required, found instance of [%s]", dataSourceMetadata.getClass());
}
log.info("Reset dataSource[%s] with metadata[%s]", dataSource, dataSourceMetadata);
// Reset only the partitions in dataSourceMetadata if it has not been reset yet
@SuppressWarnings("unchecked") final SeekableStreamDataSourceMetadata<PartitionIdType, SequenceOffsetType> resetMetadata = (SeekableStreamDataSourceMetadata<PartitionIdType, SequenceOffsetType>) dataSourceMetadata;
if (resetMetadata.getSeekableStreamSequenceNumbers().getStream().equals(ioConfig.getStream())) {
// metadata can be null
final DataSourceMetadata metadata = indexerMetadataStorageCoordinator.retrieveDataSourceMetadata(dataSource);
if (metadata != null && !checkSourceMetadataMatch(metadata)) {
throw new IAE("Datasource metadata instance does not match required, found instance of [%s]", metadata.getClass());
}
@SuppressWarnings("unchecked") final SeekableStreamDataSourceMetadata<PartitionIdType, SequenceOffsetType> currentMetadata = (SeekableStreamDataSourceMetadata<PartitionIdType, SequenceOffsetType>) metadata;
// defend against consecutive reset requests from replicas
// as well as the case where the metadata store do not have an entry for the reset partitions
boolean doReset = false;
for (Entry<PartitionIdType, SequenceOffsetType> resetPartitionOffset : resetMetadata.getSeekableStreamSequenceNumbers().getPartitionSequenceNumberMap().entrySet()) {
final SequenceOffsetType partitionOffsetInMetadataStore = currentMetadata == null ? null : currentMetadata.getSeekableStreamSequenceNumbers().getPartitionSequenceNumberMap().get(resetPartitionOffset.getKey());
final TaskGroup partitionTaskGroup = activelyReadingTaskGroups.get(getTaskGroupIdForPartition(resetPartitionOffset.getKey()));
final boolean isSameOffset = partitionTaskGroup != null && partitionTaskGroup.startingSequences.get(resetPartitionOffset.getKey()).equals(resetPartitionOffset.getValue());
if (partitionOffsetInMetadataStore != null || isSameOffset) {
doReset = true;
break;
}
}
if (!doReset) {
log.info("Ignoring duplicate reset request [%s]", dataSourceMetadata);
return;
}
boolean metadataUpdateSuccess;
if (currentMetadata == null) {
metadataUpdateSuccess = true;
} else {
final DataSourceMetadata newMetadata = currentMetadata.minus(resetMetadata);
try {
metadataUpdateSuccess = indexerMetadataStorageCoordinator.resetDataSourceMetadata(dataSource, newMetadata);
} catch (IOException e) {
log.error("Resetting DataSourceMetadata failed [%s]", e.getMessage());
throw new RuntimeException(e);
}
}
if (metadataUpdateSuccess) {
resetMetadata.getSeekableStreamSequenceNumbers().getPartitionSequenceNumberMap().keySet().forEach(partition -> {
final int groupId = getTaskGroupIdForPartition(partition);
killTaskGroupForPartitions(ImmutableSet.of(partition), "DataSourceMetadata is updated while reset");
activelyReadingTaskGroups.remove(groupId);
// killTaskGroupForPartitions() cleans up partitionGroups.
// Add the removed groups back.
partitionGroups.computeIfAbsent(groupId, k -> new HashSet<>());
partitionOffsets.put(partition, getNotSetMarker());
});
} else {
throw new ISE("Unable to reset metadata");
}
} else {
log.warn("Reset metadata stream [%s] and supervisor's stream name [%s] do not match", resetMetadata.getSeekableStreamSequenceNumbers().getStream(), ioConfig.getStream());
}
}
}
use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method makeToolboxFactory.
private void makeToolboxFactory(final File directory) {
taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null));
publishedSegments = new CopyOnWriteArrayList<>();
ObjectMapper mapper = new DefaultObjectMapper();
mapper.registerSubtypes(LinearShardSpec.class);
mapper.registerSubtypes(NumberedShardSpec.class);
IndexerSQLMetadataStorageCoordinator mdc = new IndexerSQLMetadataStorageCoordinator(mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnectorRule.getConnector()) {
@Override
public Set<DataSegment> announceHistoricalSegments(Set<DataSegment> segments) throws IOException {
Set<DataSegment> result = super.announceHistoricalSegments(segments);
Assert.assertFalse("Segment latch not initialized, did you forget to call expectPublishSegments?", segmentLatch == null);
publishedSegments.addAll(result);
segments.forEach(s -> segmentLatch.countDown());
return result;
}
@Override
public SegmentPublishResult announceHistoricalSegments(Set<DataSegment> segments, Set<DataSegment> segmentsToDrop, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata) throws IOException {
SegmentPublishResult result = super.announceHistoricalSegments(segments, segmentsToDrop, startMetadata, endMetadata);
Assert.assertFalse("Segment latch not initialized, did you forget to call expectPublishSegments?", segmentLatch == null);
publishedSegments.addAll(result.getSegments());
result.getSegments().forEach(s -> segmentLatch.countDown());
return result;
}
};
taskLockbox = new TaskLockbox(taskStorage, mdc);
final TaskConfig taskConfig = new TaskConfig(directory.getPath(), null, null, 50000, null, true, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
final TaskActionToolbox taskActionToolbox = new TaskActionToolbox(taskLockbox, taskStorage, mdc, EMITTER, EasyMock.createMock(SupervisorManager.class));
final TaskActionClientFactory taskActionClientFactory = new LocalTaskActionClientFactory(taskStorage, taskActionToolbox, new TaskAuditLogConfig(false));
final QueryRunnerFactoryConglomerate conglomerate = new DefaultQueryRunnerFactoryConglomerate(ImmutableMap.of(TimeseriesQuery.class, new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), (query, future) -> {
// do nothing
})));
handOffCallbacks = new ConcurrentHashMap<>();
final SegmentHandoffNotifierFactory handoffNotifierFactory = dataSource -> new SegmentHandoffNotifier() {
@Override
public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable) {
handOffCallbacks.put(descriptor, new Pair<>(exec, handOffRunnable));
handoffLatch.countDown();
return true;
}
@Override
public void start() {
// Noop
}
@Override
public void close() {
// Noop
}
};
final TestUtils testUtils = new TestUtils();
taskToolboxFactory = new TaskToolboxFactory(taskConfig, new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false), taskActionClientFactory, EMITTER, new TestDataSegmentPusher(), new TestDataSegmentKiller(), // DataSegmentMover
null, // DataSegmentArchiver
null, new TestDataSegmentAnnouncer(), EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, () -> conglomerate, // queryExecutorService
DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, () -> EasyMock.createMock(MonitorScheduler.class), new SegmentCacheManagerFactory(testUtils.getTestObjectMapper()), testUtils.getTestObjectMapper(), testUtils.getTestIndexIO(), MapCache.create(1024), new CacheConfig(), new CachePopulatorStats(), testUtils.getTestIndexMergerV9(), EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0), new SingleFileTaskReportFileWriter(reportsFile), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, new NoopChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopIndexingServiceClient(), null, null, null);
}
use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.
the class DerivativeDataSourceManager method updateDerivatives.
private void updateDerivatives() {
List<Pair<String, DerivativeDataSourceMetadata>> derivativesInDatabase = connector.retryWithHandle(handle -> handle.createQuery(StringUtils.format("SELECT DISTINCT dataSource,commit_metadata_payload FROM %1$s", dbTables.get().getDataSourceTable())).map((int index, ResultSet r, StatementContext ctx) -> {
String datasourceName = r.getString("dataSource");
DataSourceMetadata payload = JacksonUtils.readValue(objectMapper, r.getBytes("commit_metadata_payload"), DataSourceMetadata.class);
if (!(payload instanceof DerivativeDataSourceMetadata)) {
return null;
}
DerivativeDataSourceMetadata metadata = (DerivativeDataSourceMetadata) payload;
return new Pair<>(datasourceName, metadata);
}).list());
List<DerivativeDataSource> derivativeDataSources = derivativesInDatabase.parallelStream().filter(data -> data != null).map(derivatives -> {
String name = derivatives.lhs;
DerivativeDataSourceMetadata metadata = derivatives.rhs;
String baseDataSource = metadata.getBaseDataSource();
long avgSizePerGranularity = getAvgSizePerGranularity(name);
log.info("find derivatives: {bases=%s, derivative=%s, dimensions=%s, metrics=%s, avgSize=%s}", baseDataSource, name, metadata.getDimensions(), metadata.getMetrics(), avgSizePerGranularity);
return new DerivativeDataSource(name, baseDataSource, metadata.getColumns(), avgSizePerGranularity);
}).filter(derivatives -> derivatives.getAvgSizeBasedGranularity() > 0).collect(Collectors.toList());
ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> newDerivatives = new ConcurrentHashMap<>();
for (DerivativeDataSource derivative : derivativeDataSources) {
newDerivatives.computeIfAbsent(derivative.getBaseDataSource(), ds -> new TreeSet<>()).add(derivative);
}
ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> current;
do {
current = DERIVATIVES_REF.get();
} while (!DERIVATIVES_REF.compareAndSet(current, newDerivatives));
}
use of org.apache.druid.indexing.overlord.DataSourceMetadata in project druid by druid-io.
the class KinesisIndexTaskTest method makeToolboxFactory.
private void makeToolboxFactory() throws IOException {
directory = tempFolder.newFolder();
final TestUtils testUtils = new TestUtils();
final ObjectMapper objectMapper = testUtils.getTestObjectMapper();
objectMapper.setInjectableValues(((InjectableValues.Std) objectMapper.getInjectableValues()).addValue(AWSCredentialsConfig.class, new AWSCredentialsConfig()));
for (Module module : new KinesisIndexingServiceModule().getJacksonModules()) {
objectMapper.registerModule(module);
}
final TaskConfig taskConfig = new TaskConfig(new File(directory, "baseDir").getPath(), new File(directory, "baseTaskDir").getPath(), null, 50000, null, true, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
final TestDerbyConnector derbyConnector = derby.getConnector();
derbyConnector.createDataSourceTable();
derbyConnector.createPendingSegmentsTable();
derbyConnector.createSegmentTable();
derbyConnector.createRulesTable();
derbyConnector.createConfigTable();
derbyConnector.createTaskTables();
derbyConnector.createAuditTable();
taskStorage = new MetadataTaskStorage(derbyConnector, new TaskStorageConfig(null), new DerbyMetadataStorageActionHandlerFactory(derbyConnector, derby.metadataTablesConfigSupplier().get(), objectMapper));
metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator(testUtils.getTestObjectMapper(), derby.metadataTablesConfigSupplier().get(), derbyConnector);
taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator);
final TaskActionToolbox taskActionToolbox = new TaskActionToolbox(taskLockbox, taskStorage, metadataStorageCoordinator, emitter, new SupervisorManager(null) {
@Override
public boolean checkPointDataSourceMetadata(String supervisorId, int taskGroupId, @Nullable DataSourceMetadata checkpointMetadata) {
LOG.info("Adding checkpoint hash to the set");
checkpointRequestsHash.add(Objects.hash(supervisorId, taskGroupId, checkpointMetadata));
return true;
}
});
final TaskActionClientFactory taskActionClientFactory = new LocalTaskActionClientFactory(taskStorage, taskActionToolbox, new TaskAuditLogConfig(false));
final SegmentHandoffNotifierFactory handoffNotifierFactory = dataSource -> new SegmentHandoffNotifier() {
@Override
public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable) {
if (doHandoff) {
// Simulate immediate handoff
exec.execute(handOffRunnable);
}
return true;
}
@Override
public void start() {
// Noop
}
@Override
public void close() {
// Noop
}
};
final LocalDataSegmentPusherConfig dataSegmentPusherConfig = new LocalDataSegmentPusherConfig();
dataSegmentPusherConfig.storageDirectory = getSegmentDirectory();
final DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(dataSegmentPusherConfig);
toolboxFactory = new TaskToolboxFactory(taskConfig, // taskExecutorNode
null, taskActionClientFactory, emitter, dataSegmentPusher, new TestDataSegmentKiller(), // DataSegmentMover
null, // DataSegmentArchiver
null, new TestDataSegmentAnnouncer(), EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, this::makeTimeseriesOnlyConglomerate, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, () -> EasyMock.createMock(MonitorScheduler.class), new SegmentCacheManagerFactory(testUtils.getTestObjectMapper()), testUtils.getTestObjectMapper(), testUtils.getTestIndexIO(), MapCache.create(1024), new CacheConfig(), new CachePopulatorStats(), testUtils.getTestIndexMergerV9(), EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), new SingleFileTaskReportFileWriter(reportsFile), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, new NoopChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopIndexingServiceClient(), null, null, null);
}
Aggregations