use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method testGetUniqueDimensionsAndMetrics.
@Test
public void testGetUniqueDimensionsAndMetrics() {
final int numSegmentsPerPartitionChunk = 5;
final int numPartitionChunksPerTimelineObject = 10;
final int numSegments = numSegmentsPerPartitionChunk * numPartitionChunksPerTimelineObject;
final Interval interval = Intervals.of("2017-01-01/2017-01-02");
final String version = "1";
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = new ArrayList<>();
for (int i = 0; i < numPartitionChunksPerTimelineObject; i++) {
final List<PartitionChunk<DataSegment>> chunks = new ArrayList<>();
for (int j = 0; j < numSegmentsPerPartitionChunk; j++) {
final List<String> dims = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "dim" + suffix).collect(Collectors.toList());
final List<String> metrics = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "met" + suffix).collect(Collectors.toList());
final DataSegment segment = new DataSegment("ds", interval, version, ImmutableMap.of(), dims, metrics, new NumberedShardSpec(numPartitionChunksPerTimelineObject, i), 1, 1);
final PartitionChunk<DataSegment> partitionChunk = new NumberedPartitionChunk<>(i, numPartitionChunksPerTimelineObject, segment);
chunks.add(partitionChunk);
}
final TimelineObjectHolder<String, DataSegment> timelineHolder = new TimelineObjectHolder<>(interval, version, new PartitionHolder<>(chunks));
timelineSegments.add(timelineHolder);
}
final String[] expectedDims = new String[] { "dim9", "dim10", "dim11", "dim12", "dim13", "dim8", "dim7", "dim6", "dim5", "dim4", "dim3", "dim2", "dim1", "dim0" };
final String[] expectedMetrics = new String[] { "met9", "met10", "met11", "met12", "met13", "met8", "met7", "met6", "met5", "met4", "met3", "met2", "met1", "met0" };
Assert.assertEquals(Arrays.asList(expectedDims), ReingestionTimelineUtils.getUniqueDimensions(timelineSegments, null));
Assert.assertEquals(Arrays.asList(expectedMetrics), ReingestionTimelineUtils.getUniqueMetrics(timelineSegments));
}
use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.
the class AbstractITBatchIndexTest method submitTaskAndWait.
protected void submitTaskAndWait(String taskSpec, String dataSourceName, boolean waitForNewVersion, boolean waitForSegmentsToLoad, Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair) {
final List<DataSegment> oldVersions = waitForNewVersion ? coordinator.getAvailableSegments(dataSourceName) : null;
long startSubTaskCount = -1;
final boolean assertRunsSubTasks = taskSpec.contains("index_parallel");
if (assertRunsSubTasks) {
startSubTaskCount = countCompleteSubTasks(dataSourceName, !taskSpec.contains("dynamic"));
}
final String taskID = indexer.submitTask(taskSpec);
LOG.info("TaskID for loading index task %s", taskID);
indexer.waitUntilTaskCompletes(taskID);
if (assertRunsSubTasks) {
final boolean perfectRollup = !taskSpec.contains("dynamic");
final long newSubTasks = countCompleteSubTasks(dataSourceName, perfectRollup) - startSubTaskCount;
Assert.assertTrue(newSubTasks > 0, StringUtils.format("The supervisor task[%s] didn't create any sub tasks. Was it executed in the parallel mode?", taskID));
}
if (segmentAvailabilityConfirmationPair.lhs != null && segmentAvailabilityConfirmationPair.lhs) {
TaskReport reportRaw = indexer.getTaskReport(taskID).get("ingestionStatsAndErrors");
IngestionStatsAndErrorsTaskReport report = (IngestionStatsAndErrorsTaskReport) reportRaw;
IngestionStatsAndErrorsTaskReportData reportData = (IngestionStatsAndErrorsTaskReportData) report.getPayload();
// Confirm that the task waited longer than 0ms for the task to complete.
Assert.assertTrue(reportData.getSegmentAvailabilityWaitTimeMs() > 0);
// Make sure that the result of waiting for segments to load matches the expected result
if (segmentAvailabilityConfirmationPair.rhs != null) {
Assert.assertEquals(Boolean.valueOf(reportData.isSegmentAvailabilityConfirmed()), segmentAvailabilityConfirmationPair.rhs);
}
}
// original segments have loaded.
if (waitForNewVersion) {
ITRetryUtil.retryUntilTrue(() -> {
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(coordinator.getAvailableSegments(dataSourceName));
final List<TimelineObjectHolder<String, DataSegment>> holders = timeline.lookup(Intervals.ETERNITY);
return FluentIterable.from(holders).transformAndConcat(TimelineObjectHolder::getObject).anyMatch(chunk -> FluentIterable.from(oldVersions).anyMatch(oldSegment -> chunk.getObject().overshadows(oldSegment)));
}, "See a new version");
}
if (waitForSegmentsToLoad) {
ITRetryUtil.retryUntilTrue(() -> coordinator.areSegmentsLoaded(dataSourceName), "Segment Load");
}
}
use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.
the class QueryRunnerTestHelper method makeFilteringQueryRunner.
public static <T> QueryRunner<T> makeFilteringQueryRunner(final VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline, final QueryRunnerFactory<T, Query<T>> factory) {
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
return new FluentQueryRunnerBuilder<T>(toolChest).create(new QueryRunner<T>() {
@Override
public Sequence<T> run(QueryPlus<T> queryPlus, ResponseContext responseContext) {
Query<T> query = queryPlus.getQuery();
List<TimelineObjectHolder> segments = new ArrayList<>();
for (Interval interval : query.getIntervals()) {
segments.addAll(timeline.lookup(interval));
}
List<Sequence<T>> sequences = new ArrayList<>();
for (TimelineObjectHolder<String, ReferenceCountingSegment> holder : toolChest.filterSegments(query, segments)) {
Segment segment = holder.getObject().getChunk(0).getObject();
QueryPlus queryPlusRunning = queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new SpecificSegmentSpec(new SegmentDescriptor(holder.getInterval(), holder.getVersion(), 0))));
sequences.add(factory.createRunner(segment).run(queryPlusRunning, responseContext));
}
return new MergeSequence<>(query.getResultOrdering(), Sequences.simple(sequences));
}
}).applyPreMergeDecoration().mergeResults().applyPostMergeDecoration();
}
use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.
the class HadoopIngestionSpec method updateSegmentListIfDatasourcePathSpecIsUsed.
public static void updateSegmentListIfDatasourcePathSpecIsUsed(HadoopIngestionSpec spec, ObjectMapper jsonMapper, UsedSegmentsRetriever segmentsRetriever) throws IOException {
String dataSource = "dataSource";
String type = "type";
String multi = "multi";
String children = "children";
String segments = "segments";
String ingestionSpec = "ingestionSpec";
Map<String, Object> pathSpec = spec.getIOConfig().getPathSpec();
List<Map<String, Object>> datasourcePathSpecs = new ArrayList<>();
if (pathSpec.get(type).equals(dataSource)) {
datasourcePathSpecs.add(pathSpec);
} else if (pathSpec.get(type).equals(multi)) {
List<Map<String, Object>> childPathSpecs = (List<Map<String, Object>>) pathSpec.get(children);
for (Map<String, Object> childPathSpec : childPathSpecs) {
if (childPathSpec.get(type).equals(dataSource)) {
datasourcePathSpecs.add(childPathSpec);
}
}
}
for (Map<String, Object> datasourcePathSpec : datasourcePathSpecs) {
Map<String, Object> ingestionSpecMap = (Map<String, Object>) datasourcePathSpec.get(ingestionSpec);
DatasourceIngestionSpec ingestionSpecObj = jsonMapper.convertValue(ingestionSpecMap, DatasourceIngestionSpec.class);
Collection<DataSegment> usedVisibleSegments = segmentsRetriever.retrieveUsedSegmentsForIntervals(ingestionSpecObj.getDataSource(), ingestionSpecObj.getIntervals(), Segments.ONLY_VISIBLE);
if (ingestionSpecObj.getSegments() != null) {
// ensure that user supplied segment list matches with the usedVisibleSegments obtained from db
// this safety check lets users do test-n-set kind of batch delta ingestion where the delta
// ingestion task would only run if current state of the system is same as when they submitted
// the task.
List<DataSegment> userSuppliedSegmentsList = ingestionSpecObj.getSegments();
if (usedVisibleSegments.size() == userSuppliedSegmentsList.size()) {
Set<DataSegment> segmentsSet = new HashSet<>(usedVisibleSegments);
for (DataSegment userSegment : userSuppliedSegmentsList) {
if (!segmentsSet.contains(userSegment)) {
throw new IOException("user supplied segments list did not match with segments list obtained from db");
}
}
} else {
throw new IOException("user supplied segments list did not match with segments list obtained from db");
}
}
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(usedVisibleSegments);
final List<WindowedDataSegment> windowedSegments = new ArrayList<>();
for (Interval interval : ingestionSpecObj.getIntervals()) {
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
windowedSegments.add(new WindowedDataSegment(chunk.getObject(), holder.getInterval()));
}
}
datasourcePathSpec.put(segments, windowedSegments);
}
}
}
use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.
the class SinglePhaseSubTask method runTask.
@Override
public TaskStatus runTask(final TaskToolbox toolbox) {
try {
if (missingIntervalsInOverwriteMode) {
LOG.warn("Intervals are missing in granularitySpec while this task is potentially overwriting existing segments. " + "Forced to use timeChunk lock.");
}
this.authorizerMapper = toolbox.getAuthorizerMapper();
toolbox.getChatHandlerProvider().register(getId(), this, false);
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, ingestionSchema.getTuningConfig().isLogParseExceptions(), ingestionSchema.getTuningConfig().getMaxParseExceptions(), ingestionSchema.getTuningConfig().getMaxSavedParseExceptions());
final InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
1, ingestionSchema.getTuningConfig().getChatHandlerTimeout(), ingestionSchema.getTuningConfig().getChatHandlerNumRetries());
ingestionState = IngestionState.BUILD_SEGMENTS;
final Set<DataSegment> pushedSegments = generateAndPushSegments(toolbox, taskClient, inputSource, toolbox.getIndexingTmpDir());
// Find inputSegments overshadowed by pushedSegments
final Set<DataSegment> allSegments = new HashSet<>(getTaskLockHelper().getLockedExistingSegments());
allSegments.addAll(pushedSegments);
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments);
final Set<DataSegment> oldSegments = FluentIterable.from(timeline.findFullyOvershadowed()).transformAndConcat(TimelineObjectHolder::getObject).transform(PartitionChunk::getObject).toSet();
Map<String, TaskReport> taskReport = getTaskCompletionReports();
taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), oldSegments, pushedSegments, taskReport));
toolbox.getTaskReportFileWriter().write(getId(), taskReport);
return TaskStatus.success(getId());
} catch (Exception e) {
LOG.error(e, "Encountered exception in parallel sub task.");
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
}
}
Aggregations