use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class ClientQuerySegmentWalkerTest method initWalker.
/**
* Initialize (or reinitialize) our {@link #walker} and {@link #closer}.
*/
private void initWalker(final Map<String, String> serverProperties, QueryScheduler schedulerForTest) {
final ObjectMapper jsonMapper = TestHelper.makeJsonMapper();
final ServerConfig serverConfig = jsonMapper.convertValue(serverProperties, ServerConfig.class);
final SegmentWrangler segmentWrangler = new MapSegmentWrangler(ImmutableMap.<Class<? extends DataSource>, SegmentWrangler>builder().put(InlineDataSource.class, new InlineSegmentWrangler()).build());
final JoinableFactory globalFactory = new JoinableFactory() {
@Override
public boolean isDirectlyJoinable(DataSource dataSource) {
return ((GlobalTableDataSource) dataSource).getName().equals(GLOBAL);
}
@Override
public Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition) {
return Optional.empty();
}
};
final JoinableFactory joinableFactory = new MapJoinableFactory(ImmutableSet.of(globalFactory, new InlineJoinableFactory()), ImmutableMap.<Class<? extends JoinableFactory>, Class<? extends DataSource>>builder().put(InlineJoinableFactory.class, InlineDataSource.class).put(globalFactory.getClass(), GlobalTableDataSource.class).build());
class CapturingWalker implements QuerySegmentWalker {
private QuerySegmentWalker baseWalker;
private ClusterOrLocal how;
CapturingWalker(QuerySegmentWalker baseWalker, ClusterOrLocal how) {
this.baseWalker = baseWalker;
this.how = how;
}
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForIntervals(query, intervals);
return (queryPlus, responseContext) -> {
log.info("Query (%s): %s", how, queryPlus.getQuery());
issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
return baseRunner.run(queryPlus, responseContext);
};
}
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(Query<T> query, Iterable<SegmentDescriptor> specs) {
final QueryRunner<T> baseRunner = baseWalker.getQueryRunnerForSegments(query, specs);
return (queryPlus, responseContext) -> {
log.info("Query (%s): %s", how, queryPlus.getQuery());
issuedQueries.add(new ExpectedQuery(queryPlus.getQuery(), how));
return baseRunner.run(queryPlus, responseContext);
};
}
}
walker = QueryStackTests.createClientQuerySegmentWalker(new CapturingWalker(QueryStackTests.createClusterQuerySegmentWalker(ImmutableMap.<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>>builder().put(FOO, makeTimeline(FOO, FOO_INLINE)).put(BAR, makeTimeline(BAR, BAR_INLINE)).put(MULTI, makeTimeline(MULTI, MULTI_VALUE_INLINE)).put(GLOBAL, makeTimeline(GLOBAL, FOO_INLINE)).put(ARRAY, makeTimeline(ARRAY, ARRAY_INLINE)).put(ARRAY_UNKNOWN, makeTimeline(ARRAY_UNKNOWN, ARRAY_INLINE_UNKNOWN)).build(), joinableFactory, conglomerate, schedulerForTest), ClusterOrLocal.CLUSTER), new CapturingWalker(QueryStackTests.createLocalQuerySegmentWalker(conglomerate, segmentWrangler, joinableFactory, schedulerForTest), ClusterOrLocal.LOCAL), conglomerate, joinableFactory, serverConfig);
}
use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class ServerManager method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(Query<T> query, Iterable<SegmentDescriptor> specs) {
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
if (factory == null) {
final QueryUnsupportedException e = new QueryUnsupportedException(StringUtils.format("Unknown query type, [%s]", query.getClass()));
log.makeAlert(e, "Error while executing a query[%s]", query.getId()).addData("dataSource", query.getDataSource()).emit();
throw e;
}
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
final VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline;
final Optional<VersionedIntervalTimeline<String, ReferenceCountingSegment>> maybeTimeline = segmentManager.getTimeline(analysis);
// Make sure this query type can handle the subquery, if present.
if (analysis.isQuery() && !toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery())) {
throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
}
if (maybeTimeline.isPresent()) {
timeline = maybeTimeline.get();
} else {
return new ReportTimelineMissingSegmentQueryRunner<>(Lists.newArrayList(specs));
}
// segmentMapFn maps each base Segment into a joined Segment if necessary.
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), cpuTimeAccumulator, analysis.getBaseQuery().orElse(query));
// We compute the join cache key here itself so it doesn't need to be re-computed for every segment
final Optional<byte[]> cacheKeyPrefix = analysis.isJoin() ? joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis) : Optional.of(StringUtils.EMPTY_BYTES);
final FunctionalIterable<QueryRunner<T>> queryRunners = FunctionalIterable.create(specs).transformCat(descriptor -> Collections.singletonList(buildQueryRunnerForSegment(query, descriptor, factory, toolChest, timeline, segmentMapFn, cpuTimeAccumulator, cacheKeyPrefix)));
return CPUTimeMetricQueryRunner.safeBuild(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(queryProcessingPool, queryRunners)), toolChest), toolChest, emitter, cpuTimeAccumulator, true);
}
use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class ServerManager method getQueryRunnerForIntervals.
@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
final VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline;
final Optional<VersionedIntervalTimeline<String, ReferenceCountingSegment>> maybeTimeline = segmentManager.getTimeline(analysis);
if (maybeTimeline.isPresent()) {
timeline = maybeTimeline.get();
} else {
// we must find.
return new NoopQueryRunner<>();
}
FunctionalIterable<SegmentDescriptor> segmentDescriptors = FunctionalIterable.create(intervals).transformCat(timeline::lookup).transformCat(holder -> {
if (holder == null) {
return null;
}
return FunctionalIterable.create(holder.getObject()).transform(partitionChunk -> new SegmentDescriptor(holder.getInterval(), holder.getVersion(), partitionChunk.getChunkNumber()));
});
return getQueryRunnerForSegments(query, segmentDescriptors);
}
use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class EmitClusterStatsAndMetrics method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
DruidCluster cluster = params.getDruidCluster();
CoordinatorStats stats = params.getCoordinatorStats();
ServiceEmitter emitter = params.getEmitter();
stats.forEachTieredStat("assignedCount", (final String tier, final long count) -> {
log.info("[%s] : Assigned %s segments among %,d servers", tier, count, cluster.getHistoricalsByTier(tier).size());
emitTieredStat(emitter, "segment/assigned/count", tier, count);
});
stats.forEachTieredStat("droppedCount", (final String tier, final long count) -> {
log.info("[%s] : Dropped %s segments among %,d servers", tier, count, cluster.getHistoricalsByTier(tier).size());
emitTieredStat(emitter, "segment/dropped/count", tier, count);
});
emitTieredStats(emitter, "segment/cost/raw", stats, "initialCost");
emitTieredStats(emitter, "segment/cost/normalization", stats, "normalization");
emitTieredStats(emitter, "segment/moved/count", stats, "movedCount");
emitTieredStats(emitter, "segment/deleted/count", stats, "deletedCount");
stats.forEachTieredStat("normalizedInitialCostTimesOneThousand", (final String tier, final long count) -> {
emitTieredStat(emitter, "segment/cost/normalized", tier, count / 1000d);
});
stats.forEachTieredStat("unneededCount", (final String tier, final long count) -> {
log.info("[%s] : Removed %s unneeded segments among %,d servers", tier, count, cluster.getHistoricalsByTier(tier).size());
emitTieredStat(emitter, "segment/unneeded/count", tier, count);
});
emitter.emit(new ServiceMetricEvent.Builder().build("segment/overShadowed/count", stats.getGlobalStat("overShadowedCount")));
stats.forEachTieredStat("movedCount", (final String tier, final long count) -> {
log.info("[%s] : Moved %,d segment(s)", tier, count);
});
stats.forEachTieredStat("unmovedCount", (final String tier, final long count) -> {
log.info("[%s] : Let alone %,d segment(s)", tier, count);
});
log.info("Load Queues:");
for (Iterable<ServerHolder> serverHolders : cluster.getSortedHistoricalsByTier()) {
for (ServerHolder serverHolder : serverHolders) {
ImmutableDruidServer server = serverHolder.getServer();
LoadQueuePeon queuePeon = serverHolder.getPeon();
log.info("Server[%s, %s, %s] has %,d left to load, %,d left to drop, %,d bytes queued, %,d bytes served.", server.getName(), server.getType().toString(), server.getTier(), queuePeon.getSegmentsToLoad().size(), queuePeon.getSegmentsToDrop().size(), queuePeon.getLoadQueueSize(), server.getCurrSize());
if (log.isDebugEnabled()) {
for (DataSegment segment : queuePeon.getSegmentsToLoad()) {
log.debug("Segment to load[%s]", segment);
}
for (DataSegment segment : queuePeon.getSegmentsToDrop()) {
log.debug("Segment to drop[%s]", segment);
}
}
stats.addToTieredStat(TOTAL_CAPACITY, server.getTier(), server.getMaxSize());
stats.addToTieredStat(TOTAL_HISTORICAL_COUNT, server.getTier(), 1);
}
}
params.getDatabaseRuleManager().getAllRules().values().forEach(rules -> rules.forEach(rule -> {
if (rule instanceof LoadRule) {
((LoadRule) rule).getTieredReplicants().forEach((tier, replica) -> stats.accumulateMaxTieredStat(MAX_REPLICATION_FACTOR, tier, replica));
}
}));
emitTieredStats(emitter, "tier/required/capacity", stats, LoadRule.REQUIRED_CAPACITY);
emitTieredStats(emitter, "tier/total/capacity", stats, TOTAL_CAPACITY);
emitTieredStats(emitter, "tier/replication/factor", stats, MAX_REPLICATION_FACTOR);
emitTieredStats(emitter, "tier/historical/count", stats, TOTAL_HISTORICAL_COUNT);
// Emit coordinator metrics
params.getLoadManagementPeons().forEach((final String serverName, final LoadQueuePeon queuePeon) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/size", queuePeon.getLoadQueueSize()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/failed", queuePeon.getAndResetFailedAssignCount()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/loadQueue/count", queuePeon.getSegmentsToLoad().size()));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.SERVER, serverName).build("segment/dropQueue/count", queuePeon.getSegmentsToDrop().size()));
});
coordinator.computeNumsUnavailableUsedSegmentsPerDataSource().object2IntEntrySet().forEach((final Object2IntMap.Entry<String> entry) -> {
final String dataSource = entry.getKey();
final int numUnavailableUsedSegmentsInDataSource = entry.getIntValue();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/unavailable/count", numUnavailableUsedSegmentsInDataSource));
});
coordinator.computeUnderReplicationCountsPerDataSourcePerTier().forEach((final String tier, final Object2LongMap<String> underReplicationCountsPerDataSource) -> {
for (final Object2LongMap.Entry<String> entry : underReplicationCountsPerDataSource.object2LongEntrySet()) {
final String dataSource = entry.getKey();
final long underReplicationCount = entry.getLongValue();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.TIER, tier).setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/underReplicated/count", underReplicationCount));
}
});
emitter.emit(new ServiceMetricEvent.Builder().build("compact/task/count", stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT)));
emitter.emit(new ServiceMetricEvent.Builder().build("compactTask/maxSlot/count", stats.getGlobalStat(CompactSegments.MAX_COMPACTION_TASK_SLOT)));
emitter.emit(new ServiceMetricEvent.Builder().build("compactTask/availableSlot/count", stats.getGlobalStat(CompactSegments.AVAILABLE_COMPACTION_TASK_SLOT)));
stats.forEachDataSourceStat(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_AWAITING, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/waitCompact/bytes", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_COUNT_OF_SEGMENTS_AWAITING, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/waitCompact/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_INTERVAL_OF_SEGMENTS_AWAITING, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("interval/waitCompact/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_SKIPPED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/skipCompact/bytes", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_COUNT_OF_SEGMENTS_SKIPPED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/skipCompact/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_INTERVAL_OF_SEGMENTS_SKIPPED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("interval/skipCompact/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_SIZE_OF_SEGMENTS_COMPACTED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/compacted/bytes", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_COUNT_OF_SEGMENTS_COMPACTED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/compacted/count", count));
});
stats.forEachDataSourceStat(CompactSegments.TOTAL_INTERVAL_OF_SEGMENTS_COMPACTED, (final String dataSource, final long count) -> {
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("interval/compacted/count", count));
});
// Emit segment metrics
params.getUsedSegmentsTimelinesPerDataSource().forEach((String dataSource, VersionedIntervalTimeline<String, DataSegment> dataSourceWithUsedSegments) -> {
long totalSizeOfUsedSegments = dataSourceWithUsedSegments.iterateAllObjects().stream().mapToLong(DataSegment::getSize).sum();
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/size", totalSizeOfUsedSegments));
emitter.emit(new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource).build("segment/count", dataSourceWithUsedSegments.getNumObjects()));
});
// Emit coordinator runtime stats
emitDutyStats(emitter, "coordinator/time", stats, "runtime");
return params;
}
use of org.apache.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class NumberedShardSpecTest method testVersionedIntervalTimelineBehaviorForNumberedShardSpec.
private void testVersionedIntervalTimelineBehaviorForNumberedShardSpec(List<PartitionChunk<OvershadowableString>> chunks, Set<OvershadowableString> expectedObjects) {
VersionedIntervalTimeline<String, OvershadowableString> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
Interval interval = Intervals.of("2000/3000");
String version = "v1";
for (PartitionChunk<OvershadowableString> chunk : chunks) {
timeline.add(interval, version, chunk);
}
Set<OvershadowableString> actualObjects = new HashSet<>();
List<TimelineObjectHolder<String, OvershadowableString>> entries = timeline.lookup(interval);
for (TimelineObjectHolder<String, OvershadowableString> entry : entries) {
for (PartitionChunk<OvershadowableString> chunk : entry.getObject()) {
actualObjects.add(chunk.getObject());
}
}
Assert.assertEquals(expectedObjects, actualObjects);
}
Aggregations