use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class SinkQuerySegmentWalker method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
// We only handle one particular dataSource. Make sure that's what we have, then ignore from here on out.
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
// Sanity check: make sure the query is based on the table we're meant to handle.
if (!analysis.getBaseTableDataSource().filter(ds -> dataSource.equals(ds.getName())).isPresent()) {
throw new ISE("Cannot handle datasource: %s", analysis.getDataSource());
}
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
if (factory == null) {
throw new ISE("Unknown query type[%s].", query.getClass());
}
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
final boolean skipIncrementalSegment = query.getContextValue(CONTEXT_SKIP_INCREMENTAL_SEGMENT, false);
final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
// Make sure this query type can handle the subquery, if present.
if (analysis.isQuery() && !toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery())) {
throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
}
// segmentMapFn maps each base Segment into a joined Segment if necessary.
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), cpuTimeAccumulator, analysis.getBaseQuery().orElse(query));
// We compute the join cache key here itself so it doesn't need to be re-computed for every segment
final Optional<byte[]> cacheKeyPrefix = analysis.isJoin() ? joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis) : Optional.of(StringUtils.EMPTY_BYTES);
Iterable<QueryRunner<T>> perSegmentRunners = Iterables.transform(specs, descriptor -> {
final PartitionChunk<Sink> chunk = sinkTimeline.findChunk(descriptor.getInterval(), descriptor.getVersion(), descriptor.getPartitionNumber());
if (chunk == null) {
return new ReportTimelineMissingSegmentQueryRunner<>(descriptor);
}
final Sink theSink = chunk.getObject();
final SegmentId sinkSegmentId = theSink.getSegment().getId();
Iterable<QueryRunner<T>> perHydrantRunners = new SinkQueryRunners<>(Iterables.transform(theSink, hydrant -> {
// Hydrant might swap at any point, but if it's swapped at the start
// then we know it's *definitely* swapped.
final boolean hydrantDefinitelySwapped = hydrant.hasSwapped();
if (skipIncrementalSegment && !hydrantDefinitelySwapped) {
return new Pair<>(hydrant.getSegmentDataInterval(), new NoopQueryRunner<>());
}
// Prevent the underlying segment from swapping when its being iterated
final Optional<Pair<SegmentReference, Closeable>> maybeSegmentAndCloseable = hydrant.getSegmentForQuery(segmentMapFn);
// if optional isn't present, we failed to acquire reference to the segment or any joinables
if (!maybeSegmentAndCloseable.isPresent()) {
return new Pair<>(hydrant.getSegmentDataInterval(), new ReportTimelineMissingSegmentQueryRunner<>(descriptor));
}
final Pair<SegmentReference, Closeable> segmentAndCloseable = maybeSegmentAndCloseable.get();
try {
QueryRunner<T> runner = factory.createRunner(segmentAndCloseable.lhs);
// 2) Hydrants are not the same between replicas, make sure cache is local
if (hydrantDefinitelySwapped && cache.isLocal()) {
StorageAdapter storageAdapter = segmentAndCloseable.lhs.asStorageAdapter();
long segmentMinTime = storageAdapter.getMinTime().getMillis();
long segmentMaxTime = storageAdapter.getMaxTime().getMillis();
Interval actualDataInterval = Intervals.utc(segmentMinTime, segmentMaxTime + 1);
runner = new CachingQueryRunner<>(makeHydrantCacheIdentifier(hydrant), cacheKeyPrefix, descriptor, actualDataInterval, objectMapper, cache, toolChest, runner, // Always populate in foreground regardless of config
new ForegroundCachePopulator(objectMapper, cachePopulatorStats, cacheConfig.getMaxEntrySize()), cacheConfig);
}
// Make it always use Closeable to decrement()
runner = QueryRunnerHelper.makeClosingQueryRunner(runner, segmentAndCloseable.rhs);
return new Pair<>(segmentAndCloseable.lhs.getDataInterval(), runner);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, segmentAndCloseable.rhs);
}
}));
return new SpecificSegmentQueryRunner<>(withPerSinkMetrics(new BySegmentQueryRunner<>(sinkSegmentId, descriptor.getInterval().getStart(), factory.mergeRunners(DirectQueryProcessingPool.INSTANCE, perHydrantRunners)), toolChest, sinkSegmentId, cpuTimeAccumulator), new SpecificSegmentSpec(descriptor));
});
final QueryRunner<T> mergedRunner = toolChest.mergeResults(factory.mergeRunners(queryProcessingPool, perSegmentRunners));
return CPUTimeMetricQueryRunner.safeBuild(new FinalizeResultsQueryRunner<>(mergedRunner, toolChest), toolChest, emitter, cpuTimeAccumulator, true);
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class KafkaIndexTaskTest method makeToolboxFactory.
private void makeToolboxFactory() throws IOException {
directory = tempFolder.newFolder();
final TestUtils testUtils = new TestUtils();
rowIngestionMetersFactory = testUtils.getRowIngestionMetersFactory();
final ObjectMapper objectMapper = testUtils.getTestObjectMapper();
for (Module module : new KafkaIndexTaskModule().getJacksonModules()) {
objectMapper.registerModule(module);
}
objectMapper.registerModule(TEST_MODULE);
final TaskConfig taskConfig = new TaskConfig(new File(directory, "baseDir").getPath(), new File(directory, "baseTaskDir").getPath(), null, 50000, null, true, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
final TestDerbyConnector derbyConnector = derby.getConnector();
derbyConnector.createDataSourceTable();
derbyConnector.createPendingSegmentsTable();
derbyConnector.createSegmentTable();
derbyConnector.createRulesTable();
derbyConnector.createConfigTable();
derbyConnector.createTaskTables();
derbyConnector.createAuditTable();
taskStorage = new MetadataTaskStorage(derbyConnector, new TaskStorageConfig(null), new DerbyMetadataStorageActionHandlerFactory(derbyConnector, derby.metadataTablesConfigSupplier().get(), objectMapper));
metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator(testUtils.getTestObjectMapper(), derby.metadataTablesConfigSupplier().get(), derbyConnector);
taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator);
final TaskActionToolbox taskActionToolbox = new TaskActionToolbox(taskLockbox, taskStorage, metadataStorageCoordinator, emitter, new SupervisorManager(null) {
@Override
public boolean checkPointDataSourceMetadata(String supervisorId, int taskGroupId, @Nullable DataSourceMetadata previousDataSourceMetadata) {
log.info("Adding checkpoint hash to the set");
checkpointRequestsHash.add(Objects.hash(supervisorId, taskGroupId, previousDataSourceMetadata));
return true;
}
});
final TaskActionClientFactory taskActionClientFactory = new LocalTaskActionClientFactory(taskStorage, taskActionToolbox, new TaskAuditLogConfig(false));
final SegmentHandoffNotifierFactory handoffNotifierFactory = dataSource -> new SegmentHandoffNotifier() {
@Override
public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable) {
if (doHandoff) {
// Simulate immediate handoff
exec.execute(handOffRunnable);
}
return true;
}
@Override
public void start() {
// Noop
}
@Override
public void close() {
// Noop
}
};
final LocalDataSegmentPusherConfig dataSegmentPusherConfig = new LocalDataSegmentPusherConfig();
dataSegmentPusherConfig.storageDirectory = getSegmentDirectory();
final DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(dataSegmentPusherConfig);
toolboxFactory = new TaskToolboxFactory(taskConfig, // taskExecutorNode
null, taskActionClientFactory, emitter, dataSegmentPusher, new TestDataSegmentKiller(), // DataSegmentMover
null, // DataSegmentArchiver
null, new TestDataSegmentAnnouncer(), EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, this::makeTimeseriesAndScanConglomerate, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, () -> EasyMock.createMock(MonitorScheduler.class), new SegmentCacheManagerFactory(testUtils.getTestObjectMapper()), testUtils.getTestObjectMapper(), testUtils.getTestIndexIO(), MapCache.create(1024), new CacheConfig(), new CachePopulatorStats(), testUtils.getTestIndexMergerV9(), EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), new SingleFileTaskReportFileWriter(reportsFile), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, new NoopChatHandlerProvider(), testUtils.getRowIngestionMetersFactory(), new TestAppenderatorsManager(), new NoopIndexingServiceClient(), null, null, null);
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class SeekableStreamIndexTaskTestBase method assertEqualsExceptVersion.
protected void assertEqualsExceptVersion(List<SegmentDescriptorAndExpectedDim1Values> expectedDescriptors, List<SegmentDescriptor> actualDescriptors) throws IOException {
Assert.assertEquals(expectedDescriptors.size(), actualDescriptors.size());
final Comparator<SegmentDescriptor> comparator = (s1, s2) -> {
final int intervalCompare = Comparators.intervalsByStartThenEnd().compare(s1.getInterval(), s2.getInterval());
if (intervalCompare == 0) {
return Integer.compare(s1.getPartitionNumber(), s2.getPartitionNumber());
} else {
return intervalCompare;
}
};
final List<SegmentDescriptorAndExpectedDim1Values> expectedDescsCopy = new ArrayList<>(expectedDescriptors);
final List<SegmentDescriptor> actualDescsCopy = new ArrayList<>(actualDescriptors);
expectedDescsCopy.sort(Comparator.comparing(SegmentDescriptorAndExpectedDim1Values::getSegmentDescriptor, comparator));
actualDescsCopy.sort(comparator);
for (int i = 0; i < expectedDescsCopy.size(); i++) {
SegmentDescriptorAndExpectedDim1Values expectedDesc = expectedDescsCopy.get(i);
SegmentDescriptor actualDesc = actualDescsCopy.get(i);
Assert.assertEquals(expectedDesc.segmentDescriptor.getInterval(), actualDesc.getInterval());
Assert.assertEquals(expectedDesc.segmentDescriptor.getPartitionNumber(), actualDesc.getPartitionNumber());
if (expectedDesc.expectedDim1Values.isEmpty()) {
// Treating empty expectedDim1Values as a signal that checking the dim1 column value is not needed.
continue;
}
Assertions.assertThat(readSegmentColumn("dim1", actualDesc)).isIn(expectedDesc.expectedDim1Values);
}
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class ServerManagerForQueryErrorTest method buildQueryRunnerForSegment.
@Override
protected <T> QueryRunner<T> buildQueryRunnerForSegment(Query<T> query, SegmentDescriptor descriptor, QueryRunnerFactory<T, Query<T>> factory, QueryToolChest<T, Query<T>> toolChest, VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline, Function<SegmentReference, SegmentReference> segmentMapFn, AtomicLong cpuTimeAccumulator, Optional<byte[]> cacheKeyPrefix) {
if (query.getContextBoolean(QUERY_RETRY_TEST_CONTEXT_KEY, false)) {
final MutableBoolean isIgnoreSegment = new MutableBoolean(false);
queryToIgnoredSegments.compute(query.getMostSpecificId(), (queryId, ignoredSegments) -> {
if (ignoredSegments == null) {
ignoredSegments = new HashSet<>();
}
if (ignoredSegments.size() < MAX_NUM_FALSE_MISSING_SEGMENTS_REPORTS) {
ignoredSegments.add(descriptor);
isIgnoreSegment.setTrue();
}
return ignoredSegments;
});
if (isIgnoreSegment.isTrue()) {
LOG.info("Pretending I don't have segment[%s]", descriptor);
return new ReportTimelineMissingSegmentQueryRunner<>(descriptor);
}
} else if (query.getContextBoolean(QUERY_TIMEOUT_TEST_CONTEXT_KEY, false)) {
return (queryPlus, responseContext) -> new Sequence<T>() {
@Override
public <OutType> OutType accumulate(OutType initValue, Accumulator<OutType, T> accumulator) {
throw new QueryTimeoutException("query timeout test");
}
@Override
public <OutType> Yielder<OutType> toYielder(OutType initValue, YieldingAccumulator<OutType, T> accumulator) {
throw new QueryTimeoutException("query timeout test");
}
};
} else if (query.getContextBoolean(QUERY_CAPACITY_EXCEEDED_TEST_CONTEXT_KEY, false)) {
return (queryPlus, responseContext) -> new Sequence<T>() {
@Override
public <OutType> OutType accumulate(OutType initValue, Accumulator<OutType, T> accumulator) {
throw QueryCapacityExceededException.withErrorMessageAndResolvedHost("query capacity exceeded test");
}
@Override
public <OutType> Yielder<OutType> toYielder(OutType initValue, YieldingAccumulator<OutType, T> accumulator) {
throw QueryCapacityExceededException.withErrorMessageAndResolvedHost("query capacity exceeded test");
}
};
} else if (query.getContextBoolean(QUERY_UNSUPPORTED_TEST_CONTEXT_KEY, false)) {
return (queryPlus, responseContext) -> new Sequence<T>() {
@Override
public <OutType> OutType accumulate(OutType initValue, Accumulator<OutType, T> accumulator) {
throw new QueryUnsupportedException("query unsupported test");
}
@Override
public <OutType> Yielder<OutType> toYielder(OutType initValue, YieldingAccumulator<OutType, T> accumulator) {
throw new QueryUnsupportedException("query unsupported test");
}
};
} else if (query.getContextBoolean(RESOURCE_LIMIT_EXCEEDED_TEST_CONTEXT_KEY, false)) {
return (queryPlus, responseContext) -> new Sequence<T>() {
@Override
public <OutType> OutType accumulate(OutType initValue, Accumulator<OutType, T> accumulator) {
throw new ResourceLimitExceededException("resource limit exceeded test");
}
@Override
public <OutType> Yielder<OutType> toYielder(OutType initValue, YieldingAccumulator<OutType, T> accumulator) {
throw new ResourceLimitExceededException("resource limit exceeded test");
}
};
} else if (query.getContextBoolean(QUERY_FAILURE_TEST_CONTEXT_KEY, false)) {
return (queryPlus, responseContext) -> new Sequence<T>() {
@Override
public <OutType> OutType accumulate(OutType initValue, Accumulator<OutType, T> accumulator) {
throw new RuntimeException("query failure test");
}
@Override
public <OutType> Yielder<OutType> toYielder(OutType initValue, YieldingAccumulator<OutType, T> accumulator) {
throw new RuntimeException("query failure test");
}
};
}
return super.buildQueryRunnerForSegment(query, descriptor, factory, toolChest, timeline, segmentMapFn, cpuTimeAccumulator, cacheKeyPrefix);
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class QueryResourceTest method createScheduledQueryResource.
private void createScheduledQueryResource(QueryScheduler scheduler, Collection<CountDownLatch> beforeScheduler, Collection<CountDownLatch> inScheduler) {
QuerySegmentWalker texasRanger = new QuerySegmentWalker() {
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
return (queryPlus, responseContext) -> {
beforeScheduler.forEach(CountDownLatch::countDown);
return scheduler.run(scheduler.prioritizeAndLaneQuery(queryPlus, ImmutableSet.of()), new LazySequence<T>(() -> {
inScheduler.forEach(CountDownLatch::countDown);
try {
// pretend to be a query that is waiting on results
Thread.sleep(500);
} catch (InterruptedException ignored) {
}
// all that waiting for nothing :(
return Sequences.empty();
}));
};
}
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(Query<T> query, Iterable<SegmentDescriptor> specs) {
return getQueryRunnerForIntervals(null, null);
}
};
queryResource = new QueryResource(new QueryLifecycleFactory(WAREHOUSE, texasRanger, new DefaultGenericQueryMetricsFactory(), new NoopServiceEmitter(), testRequestLogger, new AuthConfig(), AuthTestUtils.TEST_AUTHORIZER_MAPPER, Suppliers.ofInstance(new DefaultQueryConfig(ImmutableMap.of()))), jsonMapper, smileMapper, scheduler, new AuthConfig(), null, ResponseContextConfig.newConfig(true), DRUID_NODE);
}
Aggregations