use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class SinkQuerySegmentWalker method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
// We only handle one particular dataSource. Make sure that's what we have, then ignore from here on out.
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
// Sanity check: make sure the query is based on the table we're meant to handle.
if (!analysis.getBaseTableDataSource().filter(ds -> dataSource.equals(ds.getName())).isPresent()) {
throw new ISE("Cannot handle datasource: %s", analysis.getDataSource());
}
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
if (factory == null) {
throw new ISE("Unknown query type[%s].", query.getClass());
}
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
final boolean skipIncrementalSegment = query.getContextValue(CONTEXT_SKIP_INCREMENTAL_SEGMENT, false);
final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
// Make sure this query type can handle the subquery, if present.
if (analysis.isQuery() && !toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery())) {
throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
}
// segmentMapFn maps each base Segment into a joined Segment if necessary.
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), cpuTimeAccumulator, analysis.getBaseQuery().orElse(query));
// We compute the join cache key here itself so it doesn't need to be re-computed for every segment
final Optional<byte[]> cacheKeyPrefix = analysis.isJoin() ? joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis) : Optional.of(StringUtils.EMPTY_BYTES);
Iterable<QueryRunner<T>> perSegmentRunners = Iterables.transform(specs, descriptor -> {
final PartitionChunk<Sink> chunk = sinkTimeline.findChunk(descriptor.getInterval(), descriptor.getVersion(), descriptor.getPartitionNumber());
if (chunk == null) {
return new ReportTimelineMissingSegmentQueryRunner<>(descriptor);
}
final Sink theSink = chunk.getObject();
final SegmentId sinkSegmentId = theSink.getSegment().getId();
Iterable<QueryRunner<T>> perHydrantRunners = new SinkQueryRunners<>(Iterables.transform(theSink, hydrant -> {
// Hydrant might swap at any point, but if it's swapped at the start
// then we know it's *definitely* swapped.
final boolean hydrantDefinitelySwapped = hydrant.hasSwapped();
if (skipIncrementalSegment && !hydrantDefinitelySwapped) {
return new Pair<>(hydrant.getSegmentDataInterval(), new NoopQueryRunner<>());
}
// Prevent the underlying segment from swapping when its being iterated
final Optional<Pair<SegmentReference, Closeable>> maybeSegmentAndCloseable = hydrant.getSegmentForQuery(segmentMapFn);
// if optional isn't present, we failed to acquire reference to the segment or any joinables
if (!maybeSegmentAndCloseable.isPresent()) {
return new Pair<>(hydrant.getSegmentDataInterval(), new ReportTimelineMissingSegmentQueryRunner<>(descriptor));
}
final Pair<SegmentReference, Closeable> segmentAndCloseable = maybeSegmentAndCloseable.get();
try {
QueryRunner<T> runner = factory.createRunner(segmentAndCloseable.lhs);
// 2) Hydrants are not the same between replicas, make sure cache is local
if (hydrantDefinitelySwapped && cache.isLocal()) {
StorageAdapter storageAdapter = segmentAndCloseable.lhs.asStorageAdapter();
long segmentMinTime = storageAdapter.getMinTime().getMillis();
long segmentMaxTime = storageAdapter.getMaxTime().getMillis();
Interval actualDataInterval = Intervals.utc(segmentMinTime, segmentMaxTime + 1);
runner = new CachingQueryRunner<>(makeHydrantCacheIdentifier(hydrant), cacheKeyPrefix, descriptor, actualDataInterval, objectMapper, cache, toolChest, runner, // Always populate in foreground regardless of config
new ForegroundCachePopulator(objectMapper, cachePopulatorStats, cacheConfig.getMaxEntrySize()), cacheConfig);
}
// Make it always use Closeable to decrement()
runner = QueryRunnerHelper.makeClosingQueryRunner(runner, segmentAndCloseable.rhs);
return new Pair<>(segmentAndCloseable.lhs.getDataInterval(), runner);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, segmentAndCloseable.rhs);
}
}));
return new SpecificSegmentQueryRunner<>(withPerSinkMetrics(new BySegmentQueryRunner<>(sinkSegmentId, descriptor.getInterval().getStart(), factory.mergeRunners(DirectQueryProcessingPool.INSTANCE, perHydrantRunners)), toolChest, sinkSegmentId, cpuTimeAccumulator), new SpecificSegmentSpec(descriptor));
});
final QueryRunner<T> mergedRunner = toolChest.mergeResults(factory.mergeRunners(queryProcessingPool, perSegmentRunners));
return CPUTimeMetricQueryRunner.safeBuild(new FinalizeResultsQueryRunner<>(mergedRunner, toolChest), toolChest, emitter, cpuTimeAccumulator, true);
}
use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinator method createNewSegment.
/**
* This function creates a new segment for the given datasource/interval/etc. A critical
* aspect of the creation is to make sure that the new version & new partition number will make
* sense given the existing segments & pending segments also very important is to avoid
* clashes with existing pending & used/unused segments.
* @param handle Database handle
* @param dataSource datasource for the new segment
* @param interval interval for the new segment
* @param partialShardSpec Shard spec info minus segment id stuff
* @param existingVersion Version of segments in interval, used to compute the version of the very first segment in
* interval
* @return
* @throws IOException
*/
@Nullable
private SegmentIdWithShardSpec createNewSegment(final Handle handle, final String dataSource, final Interval interval, final PartialShardSpec partialShardSpec, final String existingVersion) throws IOException {
// Get the time chunk and associated data segments for the given interval, if any
final List<TimelineObjectHolder<String, DataSegment>> existingChunks = getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)).lookup(interval);
if (existingChunks.size() > 1) {
// Not possible to expand more than one chunk with a single segment.
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s]: already have [%,d] chunks.", dataSource, interval, existingChunks.size());
return null;
} else {
// max partitionId of the shardSpecs which share the same partition space.
SegmentIdWithShardSpec maxId = null;
if (!existingChunks.isEmpty()) {
TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
// noinspection ConstantConditions
for (DataSegment segment : FluentIterable.from(existingHolder.getObject()).transform(PartitionChunk::getObject).filter(segment -> segment.getShardSpec().sharePartitionSpace(partialShardSpec))) {
// Note that this will compute the max id of existing, visible, data segments in the time chunk:
if (maxId == null || maxId.getShardSpec().getPartitionNum() < segment.getShardSpec().getPartitionNum()) {
maxId = SegmentIdWithShardSpec.fromDataSegment(segment);
}
}
}
// Get the version of the existing chunk, we might need it in some of the cases below
// to compute the new identifier's version
@Nullable final String versionOfExistingChunk;
if (!existingChunks.isEmpty()) {
// remember only one chunk possible for given interval so get the first & only one
versionOfExistingChunk = existingChunks.get(0).getVersion();
} else {
versionOfExistingChunk = null;
}
// next, we need to enrich the maxId computed before with the information of the pending segments
// it is possible that a pending segment has a higher id in which case we need that, it will work,
// and it will avoid clashes when inserting the new pending segment later in the caller of this method
final Set<SegmentIdWithShardSpec> pendings = getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval);
// Make sure we add the maxId we obtained from the segments table:
if (maxId != null) {
pendings.add(maxId);
}
// Now compute the maxId with all the information: pendings + segments:
// The versionOfExistingChunks filter is ensure that we pick the max id with the version of the existing chunk
// in the case that there may be a pending segment with a higher version but no corresponding used segments
// which may generate a clash with an existing segment once the new id is generated
maxId = pendings.stream().filter(id -> id.getShardSpec().sharePartitionSpace(partialShardSpec)).filter(id -> versionOfExistingChunk == null ? true : id.getVersion().equals(versionOfExistingChunk)).max((id1, id2) -> {
final int versionCompare = id1.getVersion().compareTo(id2.getVersion());
if (versionCompare != 0) {
return versionCompare;
} else {
return Integer.compare(id1.getShardSpec().getPartitionNum(), id2.getShardSpec().getPartitionNum());
}
}).orElse(null);
// The following code attempts to compute the new version, if this
// new version is not null at the end of next block then it will be
// used as the new version in the case for initial or appended segment
final String newSegmentVersion;
if (versionOfExistingChunk != null) {
// segment version overrides, so pick that now that we know it exists
newSegmentVersion = versionOfExistingChunk;
} else if (!pendings.isEmpty() && maxId != null) {
// there is no visible segments in the time chunk, so pick the maxId of pendings, as computed above
newSegmentVersion = maxId.getVersion();
} else {
// no segments, no pendings, so this must be the very first segment created for this interval
newSegmentVersion = null;
}
if (maxId == null) {
// When appending segments, null maxId means that we are allocating the very initial
// segment for this time chunk.
// This code is executed when the Overlord coordinates segment allocation, which is either you append segments
// or you use segment lock. Since the core partitions set is not determined for appended segments, we set
// it 0. When you use segment lock, the core partitions set doesn't work with it. We simply set it 0 so that the
// OvershadowableManager handles the atomic segment update.
final int newPartitionId = partialShardSpec.useNonRootGenerationPartitionSpace() ? PartitionIds.NON_ROOT_GEN_START_PARTITION_ID : PartitionIds.ROOT_GEN_START_PARTITION_ID;
String version = newSegmentVersion == null ? existingVersion : newSegmentVersion;
return new SegmentIdWithShardSpec(dataSource, interval, version, partialShardSpec.complete(jsonMapper, newPartitionId, 0));
} else if (!maxId.getInterval().equals(interval) || maxId.getVersion().compareTo(existingVersion) > 0) {
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], existingVersion[%s]: conflicting segment[%s].", dataSource, interval, existingVersion, maxId);
return null;
} else if (maxId.getShardSpec().getNumCorePartitions() == SingleDimensionShardSpec.UNKNOWN_NUM_CORE_PARTITIONS) {
log.warn("Cannot allocate new segment because of unknown core partition size of segment[%s], shardSpec[%s]", maxId, maxId.getShardSpec());
return null;
} else {
return new SegmentIdWithShardSpec(dataSource, maxId.getInterval(), Preconditions.checkNotNull(newSegmentVersion, "newSegmentVersion"), partialShardSpec.complete(jsonMapper, maxId.getShardSpec().getPartitionNum() + 1, maxId.getShardSpec().getNumCorePartitions()));
}
}
}
use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class CompactSegmentsTest method assertCompactSegments.
private void assertCompactSegments(CompactSegments compactSegments, Interval expectedInterval, int expectedRemainingSegments, int expectedCompactTaskCount, Supplier<String> expectedVersionSupplier) {
for (int i = 0; i < 3; i++) {
final CoordinatorStats stats = doCompactSegments(compactSegments);
Assert.assertEquals(expectedCompactTaskCount, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
// One of dataSource is compacted
if (expectedRemainingSegments > 0) {
// If expectedRemainingSegments is positive, we check how many dataSources have the segments waiting for
// compaction.
long numDataSourceOfExpectedRemainingSegments = stats.getDataSources(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_AWAITING).stream().mapToLong(ds -> stats.getDataSourceStat(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_AWAITING, ds)).filter(stat -> stat == expectedRemainingSegments).count();
Assert.assertEquals(i + 1, numDataSourceOfExpectedRemainingSegments);
} else {
// Otherwise, we check how many dataSources are in the coordinator stats.
Assert.assertEquals(2 - i, stats.getDataSources(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_AWAITING).size());
}
}
for (int i = 0; i < 3; i++) {
final String dataSource = DATA_SOURCE_PREFIX + i;
List<TimelineObjectHolder<String, DataSegment>> holders = dataSources.get(dataSource).lookup(expectedInterval);
Assert.assertEquals(1, holders.size());
List<PartitionChunk<DataSegment>> chunks = Lists.newArrayList(holders.get(0).getObject());
Assert.assertEquals(2, chunks.size());
final String expectedVersion = expectedVersionSupplier.get();
for (PartitionChunk<DataSegment> chunk : chunks) {
Assert.assertEquals(expectedInterval, chunk.getObject().getInterval());
Assert.assertEquals(expectedVersion, chunk.getObject().getVersion());
}
}
}
Aggregations