use of org.apache.druid.indexing.common.TaskToolbox in project druid by druid-io.
the class ThreadingTaskRunnerTest method testTaskStatusWhenTaskThrowsExceptionWhileRunning.
@Test
public void testTaskStatusWhenTaskThrowsExceptionWhileRunning() throws ExecutionException, InterruptedException {
ThreadingTaskRunner runner = new ThreadingTaskRunner(mockTaskToolboxFactory(), new TaskConfig(null, null, null, null, ImmutableList.of(), false, new Period("PT0S"), new Period("PT10S"), ImmutableList.of(), false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name()), new WorkerConfig(), new NoopTaskLogs(), new DefaultObjectMapper(), new TestAppenderatorsManager(), new MultipleFileTaskReportFileWriter(), new DruidNode("middleManager", "host", false, 8091, null, true, false));
Future<TaskStatus> statusFuture = runner.run(new AbstractTask("id", "datasource", null) {
@Override
public String getType() {
return "test";
}
@Override
public boolean isReady(TaskActionClient taskActionClient) {
return true;
}
@Override
public void stopGracefully(TaskConfig taskConfig) {
}
@Override
public TaskStatus run(TaskToolbox toolbox) {
throw new RuntimeException("Task failure test");
}
});
TaskStatus status = statusFuture.get();
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
Assert.assertEquals("Failed with an exception. See indexer logs for more details.", status.getErrorMsg());
}
use of org.apache.druid.indexing.common.TaskToolbox in project druid by druid-io.
the class IndexTask method generateAndPublishSegments.
/**
* This method reads input data row by row and adds the read row to a proper segment using {@link BaseAppenderatorDriver}.
* If there is no segment for the row, a new one is created. Segments can be published in the middle of reading inputs
* if {@link DynamicPartitionsSpec} is used and one of below conditions are satisfied.
*
* <ul>
* <li>
* If the number of rows in a segment exceeds {@link DynamicPartitionsSpec#maxRowsPerSegment}
* </li>
* <li>
* If the number of rows added to {@link BaseAppenderatorDriver} so far exceeds {@link DynamicPartitionsSpec#maxTotalRows}
* </li>
* </ul>
* <p>
* At the end of this method, all the remaining segments are published.
*
* @return the last {@link TaskStatus}
*/
private TaskStatus generateAndPublishSegments(final TaskToolbox toolbox, final DataSchema dataSchema, final InputSource inputSource, final File tmpDir, final PartitionAnalysis partitionAnalysis) throws IOException, InterruptedException {
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
FireDepartmentMetrics buildSegmentsFireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
if (toolbox.getMonitorScheduler() != null) {
final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, buildSegmentsMeters);
toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
}
final PartitionsSpec partitionsSpec = partitionAnalysis.getPartitionsSpec();
final IndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
final long pushTimeout = tuningConfig.getPushTimeout();
final SegmentAllocatorForBatch segmentAllocator;
final SequenceNameFunction sequenceNameFunction;
switch(partitionsSpec.getType()) {
case HASH:
case RANGE:
final SegmentAllocatorForBatch localSegmentAllocator = SegmentAllocators.forNonLinearPartitioning(toolbox, getDataSource(), baseSequenceName, dataSchema.getGranularitySpec(), null, (CompletePartitionAnalysis) partitionAnalysis);
sequenceNameFunction = localSegmentAllocator.getSequenceNameFunction();
segmentAllocator = localSegmentAllocator;
break;
case LINEAR:
segmentAllocator = SegmentAllocators.forLinearPartitioning(toolbox, baseSequenceName, null, dataSchema, getTaskLockHelper(), ingestionSchema.getIOConfig().isAppendToExisting(), partitionAnalysis.getPartitionsSpec(), null);
sequenceNameFunction = segmentAllocator.getSequenceNameFunction();
break;
default:
throw new UOE("[%s] secondary partition type is not supported", partitionsSpec.getType());
}
Set<DataSegment> segmentsFoundForDrop = null;
if (ingestionSchema.getIOConfig().isDropExisting()) {
segmentsFoundForDrop = getUsedSegmentsWithinInterval(toolbox, getDataSource(), ingestionSchema.getDataSchema().getGranularitySpec().inputIntervals());
}
final TransactionalSegmentPublisher publisher = (segmentsToBeOverwritten, segmentsToDrop, segmentsToPublish, commitMetadata) -> toolbox.getTaskActionClient().submit(SegmentTransactionalInsertAction.overwriteAction(segmentsToBeOverwritten, segmentsToDrop, segmentsToPublish));
String effectiveId = getContextValue(CompactionTask.CTX_KEY_APPENDERATOR_TRACKING_TASK_ID, null);
if (effectiveId == null) {
effectiveId = getId();
}
final Appenderator appenderator = BatchAppenderators.newAppenderator(effectiveId, toolbox.getAppenderatorsManager(), buildSegmentsFireDepartmentMetrics, toolbox, dataSchema, tuningConfig, buildSegmentsMeters, buildSegmentsParseExceptionHandler, isUseMaxMemoryEstimates());
boolean exceptionOccurred = false;
try (final BatchAppenderatorDriver driver = BatchAppenderators.newDriver(appenderator, toolbox, segmentAllocator)) {
driver.startJob();
InputSourceProcessor.process(dataSchema, driver, partitionsSpec, inputSource, inputSource.needsFormat() ? getInputFormat(ingestionSchema) : null, tmpDir, sequenceNameFunction, new DefaultIndexTaskInputRowIteratorBuilder(), buildSegmentsMeters, buildSegmentsParseExceptionHandler, pushTimeout);
// If we use timeChunk lock, then we don't have to specify what segments will be overwritten because
// it will just overwrite all segments overlapped with the new segments.
final Set<DataSegment> inputSegments = getTaskLockHelper().isUseSegmentLock() ? getTaskLockHelper().getLockedExistingSegments() : null;
final boolean storeCompactionState = getContextValue(Tasks.STORE_COMPACTION_STATE_KEY, Tasks.DEFAULT_STORE_COMPACTION_STATE);
final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction = compactionStateAnnotateFunction(storeCompactionState, toolbox, ingestionSchema);
// Probably we can publish atomicUpdateGroup along with segments.
final SegmentsAndCommitMetadata published = awaitPublish(driver.publishAll(inputSegments, segmentsFoundForDrop, publisher, annotateFunction), pushTimeout);
appenderator.close();
// for awaitSegmentAvailabilityTimeoutMillis
if (tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() > 0 && published != null) {
ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT;
ArrayList<DataSegment> segmentsToWaitFor = new ArrayList<>(published.getSegments());
waitForSegmentAvailability(toolbox, segmentsToWaitFor, tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis());
}
ingestionState = IngestionState.COMPLETED;
if (published == null) {
log.error("Failed to publish segments, aborting!");
errorMsg = "Failed to publish segments.";
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} else {
log.info("Processed[%,d] events, unparseable[%,d], thrownAway[%,d].", buildSegmentsMeters.getProcessed(), buildSegmentsMeters.getUnparseable(), buildSegmentsMeters.getThrownAway());
log.info("Published [%s] segments", published.getSegments().size());
log.debugSegments(published.getSegments(), "Published segments");
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
}
} catch (TimeoutException | ExecutionException e) {
exceptionOccurred = true;
throw new RuntimeException(e);
} catch (Exception e) {
exceptionOccurred = true;
throw e;
} finally {
if (exceptionOccurred) {
appenderator.closeNow();
} else {
appenderator.close();
}
}
}
use of org.apache.druid.indexing.common.TaskToolbox in project druid by druid-io.
the class ParallelIndexSupervisorTask method publishSegments.
private void publishSegments(TaskToolbox toolbox, Map<String, PushedSegmentsReport> reportsMap) throws IOException {
final Set<DataSegment> oldSegments = new HashSet<>();
final Set<DataSegment> newSegments = new HashSet<>();
reportsMap.values().forEach(report -> {
oldSegments.addAll(report.getOldSegments());
newSegments.addAll(report.getNewSegments());
});
final boolean storeCompactionState = getContextValue(Tasks.STORE_COMPACTION_STATE_KEY, Tasks.DEFAULT_STORE_COMPACTION_STATE);
final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction = compactionStateAnnotateFunction(storeCompactionState, toolbox, ingestionSchema);
Set<DataSegment> segmentsFoundForDrop = null;
if (ingestionSchema.getIOConfig().isDropExisting()) {
segmentsFoundForDrop = getUsedSegmentsWithinInterval(toolbox, getDataSource(), ingestionSchema.getDataSchema().getGranularitySpec().inputIntervals());
}
final TransactionalSegmentPublisher publisher = (segmentsToBeOverwritten, segmentsToDrop, segmentsToPublish, commitMetadata) -> toolbox.getTaskActionClient().submit(SegmentTransactionalInsertAction.overwriteAction(segmentsToBeOverwritten, segmentsToDrop, segmentsToPublish));
final boolean published = newSegments.isEmpty() || publisher.publishSegments(oldSegments, segmentsFoundForDrop, newSegments, annotateFunction, null).isSuccess();
if (published) {
LOG.info("Published [%d] segments", newSegments.size());
} else {
throw new ISE("Failed to publish segments");
}
}
use of org.apache.druid.indexing.common.TaskToolbox in project druid by druid-io.
the class PartialSegmentMergeTask method fetchSegmentFiles.
private Map<Interval, Int2ObjectMap<List<File>>> fetchSegmentFiles(TaskToolbox toolbox, Map<Interval, Int2ObjectMap<List<PartitionLocation>>> intervalToBuckets) throws IOException {
final File tempDir = toolbox.getIndexingTmpDir();
org.apache.commons.io.FileUtils.deleteQuietly(tempDir);
FileUtils.mkdirp(tempDir);
final Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles = new HashMap<>();
// Fetch partition files
for (Entry<Interval, Int2ObjectMap<List<PartitionLocation>>> entryPerInterval : intervalToBuckets.entrySet()) {
final Interval interval = entryPerInterval.getKey();
for (Int2ObjectMap.Entry<List<PartitionLocation>> entryPerBucketId : entryPerInterval.getValue().int2ObjectEntrySet()) {
final int bucketId = entryPerBucketId.getIntKey();
final File partitionDir = org.apache.commons.io.FileUtils.getFile(tempDir, interval.getStart().toString(), interval.getEnd().toString(), Integer.toString(bucketId));
FileUtils.mkdirp(partitionDir);
for (PartitionLocation location : entryPerBucketId.getValue()) {
final File unzippedDir = toolbox.getShuffleClient().fetchSegmentFile(partitionDir, supervisorTaskId, location);
intervalToUnzippedFiles.computeIfAbsent(interval, k -> new Int2ObjectOpenHashMap<>()).computeIfAbsent(bucketId, k -> new ArrayList<>()).add(unzippedDir);
}
}
}
return intervalToUnzippedFiles;
}
use of org.apache.druid.indexing.common.TaskToolbox in project druid by druid-io.
the class HashPartitionCachingLocalSegmentAllocatorTest method createToolbox.
private static TaskToolbox createToolbox() {
TaskToolbox toolbox = EasyMock.mock(TaskToolbox.class);
EasyMock.expect(toolbox.getTaskActionClient()).andStubReturn(createTaskActionClient());
EasyMock.expect(toolbox.getJsonMapper()).andStubReturn(OBJECT_MAPPER);
EasyMock.replay(toolbox);
return toolbox;
}
Aggregations