use of org.apache.druid.data.input.FirehoseFactory in project druid by druid-io.
the class HdfsFirehoseFactoryTest method testStringPaths.
@Test
public void testStringPaths() throws IOException {
final HdfsFirehoseFactory firehoseFactory = new HdfsFirehoseFactory("/foo/bar", null, null, null, null, null, DEFAULT_CONFIGURATION, DEFAULT_INPUT_SOURCE_CONFIG);
final ObjectMapper mapper = createMapper();
final HdfsFirehoseFactory firehoseFactory2 = (HdfsFirehoseFactory) mapper.readValue(mapper.writeValueAsString(firehoseFactory), FirehoseFactory.class);
Assert.assertEquals(firehoseFactory.getInputPaths(), firehoseFactory2.getInputPaths());
}
use of org.apache.druid.data.input.FirehoseFactory in project druid by druid-io.
the class SinglePhaseParallelIndexTaskRunner method newTaskSpec.
@VisibleForTesting
SubTaskSpec<SinglePhaseSubTask> newTaskSpec(InputSplit split) {
final FirehoseFactory firehoseFactory;
final InputSource inputSource;
if (baseInputSource instanceof FirehoseFactoryToInputSourceAdaptor) {
firehoseFactory = ((FirehoseFactoryToInputSourceAdaptor) baseInputSource).getFirehoseFactory().withSplit(split);
inputSource = null;
} else {
firehoseFactory = null;
inputSource = baseInputSource.withSplit(split);
}
final Map<String, Object> subtaskContext = new HashMap<>(getContext());
return new SinglePhaseSubTaskSpec(getBaseSubtaskSpecName() + "_" + getAndIncrementNextSpecId(), getGroupId(), getTaskId(), new ParallelIndexIngestionSpec(ingestionSchema.getDataSchema(), new ParallelIndexIOConfig(firehoseFactory, inputSource, ingestionSchema.getIOConfig().getInputFormat(), ingestionSchema.getIOConfig().isAppendToExisting(), ingestionSchema.getIOConfig().isDropExisting()), ingestionSchema.getTuningConfig()), subtaskContext, split);
}
use of org.apache.druid.data.input.FirehoseFactory in project druid by druid-io.
the class RealtimeIndexTaskTest method makeRealtimeTask.
private RealtimeIndexTask makeRealtimeTask(final String taskId, final TransformSpec transformSpec, final boolean reportParseExceptions, final long handoffTimeout) {
ObjectMapper objectMapper = new DefaultObjectMapper();
DataSchema dataSchema = new DataSchema("test_ds", TestHelper.makeJsonMapper().convertValue(new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("t", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim1t"))))), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT), new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), transformSpec, objectMapper);
RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehose.TestFirehoseFactory(), null);
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(null, 1000, null, null, new Period("P1Y"), new Period("PT10M"), null, null, new ServerTimeRejectionPolicyFactory(), null, null, null, null, 0, 0, reportParseExceptions, handoffTimeout, null, null, null);
return new RealtimeIndexTask(taskId, null, new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig), null) {
@Override
protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
return true;
}
};
}
use of org.apache.druid.data.input.FirehoseFactory in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) {
runThread = Thread.currentThread();
authorizerMapper = toolbox.getAuthorizerMapper();
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
setupTimeoutAlert();
DataSchema dataSchema = spec.getDataSchema();
RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
this.metrics = fireDepartmentForMetrics.getMetrics();
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
try {
log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
toolbox.getChatHandlerProvider().register(getId(), this, false);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
}
driver.startJob(segmentId -> {
try {
if (lockGranularity == LockGranularity.SEGMENT) {
return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
} else {
final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
}
return true;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
});
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
int sequenceNumber = 0;
String sequenceName = makeSequenceName(getId(), sequenceNumber);
final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
}
if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
}
final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
return toolbox.getTaskActionClient().submit(action);
};
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
ingestionState = IngestionState.BUILD_SEGMENTS;
// Time to read data!
while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
try {
InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
log.debug("Discarded null row, considering thrownAway.");
rowIngestionMeters.incrementThrownAway();
} else {
AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
if (addResult.isOk()) {
final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
if (isPushRequired) {
publishSegments(driver, publisher, committerSupplier, sequenceName);
sequenceNumber++;
sequenceName = makeSequenceName(getId(), sequenceNumber);
}
} else {
// If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
}
}
} catch (ParseException e) {
handleParseException(e);
}
}
ingestionState = IngestionState.COMPLETED;
if (!gracefullyStopped) {
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
log.info("Finishing job...");
// Publish any remaining segments
publishSegments(driver, publisher, committerSupplier, sequenceName);
waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
}
} else if (firehose != null) {
log.info("Task was gracefully stopped, will persist data before exiting");
persistAndWait(driver, committerSupplier.get());
}
} catch (Throwable e) {
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
appenderator.close();
CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
toolbox.removeMonitor(metricsMonitor);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
}
log.info("Job done!");
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
}
use of org.apache.druid.data.input.FirehoseFactory in project druid by druid-io.
the class AbstractBatchIndexTask method findInputSegments.
/**
* If the given firehoseFactory is {@link IngestSegmentFirehoseFactory}, then it finds the segments to lock
* from the firehoseFactory. This is because those segments will be read by this task no matter what segments would be
* filtered by intervalsToRead, so they need to be locked.
* <p>
* However, firehoseFactory is not IngestSegmentFirehoseFactory, it means this task will overwrite some segments
* with data read from some input source outside of Druid. As a result, only the segments falling in intervalsToRead
* should be locked.
* <p>
* The order of segments within the returned list is unspecified, but each segment is guaranteed to appear in the list
* only once.
*/
protected static List<DataSegment> findInputSegments(String dataSource, TaskActionClient actionClient, List<Interval> intervalsToRead, FirehoseFactory firehoseFactory) throws IOException {
if (firehoseFactory instanceof IngestSegmentFirehoseFactory) {
// intervalsToRead is ignored here.
final List<WindowedSegmentId> inputSegments = ((IngestSegmentFirehoseFactory) firehoseFactory).getSegments();
if (inputSegments == null) {
final Interval inputInterval = Preconditions.checkNotNull(((IngestSegmentFirehoseFactory) firehoseFactory).getInterval(), "input interval");
return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, inputInterval, null, Segments.ONLY_VISIBLE)));
} else {
final List<String> inputSegmentIds = inputSegments.stream().map(WindowedSegmentId::getSegmentId).collect(Collectors.toList());
final Collection<DataSegment> dataSegmentsInIntervals = actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, inputSegments.stream().flatMap(windowedSegmentId -> windowedSegmentId.getIntervals().stream()).collect(Collectors.toSet()), Segments.ONLY_VISIBLE));
return dataSegmentsInIntervals.stream().filter(segment -> inputSegmentIds.contains(segment.getId().toString())).collect(Collectors.toList());
}
} else {
return ImmutableList.copyOf(actionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervalsToRead, Segments.ONLY_VISIBLE)));
}
}
Aggregations