use of org.apache.druid.data.input.Firehose in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTimelineTest method testSimple.
private void testSimple() throws Exception {
int count = 0;
long sum = 0;
try (final Firehose firehose = factory.connect(ROW_PARSER, tmpDir)) {
while (firehose.hasMore()) {
final InputRow row = firehose.nextRow();
count++;
sum += row.getMetric(METRICS[0]).longValue();
}
}
Assert.assertEquals("count", expectedCount, count);
Assert.assertEquals("sum", expectedSum, sum);
}
use of org.apache.druid.data.input.Firehose in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method simpleFirehoseReadingTest.
@Test
public void simpleFirehoseReadingTest() throws IOException {
Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), SEGMENT_SET.size());
Integer rowcount = 0;
try (final Firehose firehose = factory.connect(rowParser, TMP_DIR)) {
while (firehose.hasMore()) {
InputRow row = firehose.nextRow();
Assert.assertArrayEquals(new String[] { DIM_NAME }, row.getDimensions().toArray());
Assert.assertArrayEquals(new String[] { DIM_VALUE }, row.getDimension(DIM_NAME).toArray());
Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getMetric(METRIC_LONG_NAME));
Assert.assertEquals(METRIC_FLOAT_VALUE, row.getMetric(METRIC_FLOAT_NAME).floatValue(), METRIC_FLOAT_VALUE * 0.0001);
++rowcount;
}
}
Assert.assertEquals((int) MAX_SHARD_NUMBER * MAX_ROWS, (int) rowcount);
}
use of org.apache.druid.data.input.Firehose in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) {
runThread = Thread.currentThread();
authorizerMapper = toolbox.getAuthorizerMapper();
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
setupTimeoutAlert();
DataSchema dataSchema = spec.getDataSchema();
RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
this.metrics = fireDepartmentForMetrics.getMetrics();
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
try {
log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
toolbox.getChatHandlerProvider().register(getId(), this, false);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
}
driver.startJob(segmentId -> {
try {
if (lockGranularity == LockGranularity.SEGMENT) {
return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
} else {
final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
}
return true;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
});
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
int sequenceNumber = 0;
String sequenceName = makeSequenceName(getId(), sequenceNumber);
final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
}
if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
}
final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
return toolbox.getTaskActionClient().submit(action);
};
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
ingestionState = IngestionState.BUILD_SEGMENTS;
// Time to read data!
while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
try {
InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
log.debug("Discarded null row, considering thrownAway.");
rowIngestionMeters.incrementThrownAway();
} else {
AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
if (addResult.isOk()) {
final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
if (isPushRequired) {
publishSegments(driver, publisher, committerSupplier, sequenceName);
sequenceNumber++;
sequenceName = makeSequenceName(getId(), sequenceNumber);
}
} else {
// If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
}
}
} catch (ParseException e) {
handleParseException(e);
}
}
ingestionState = IngestionState.COMPLETED;
if (!gracefullyStopped) {
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
log.info("Finishing job...");
// Publish any remaining segments
publishSegments(driver, publisher, committerSupplier, sequenceName);
waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
}
} else if (firehose != null) {
log.info("Task was gracefully stopped, will persist data before exiting");
persistAndWait(driver, committerSupplier.get());
}
} catch (Throwable e) {
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
appenderator.close();
CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
toolbox.removeMonitor(metricsMonitor);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
}
log.info("Job done!");
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
}
use of org.apache.druid.data.input.Firehose in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testMultipleParseExceptionsSuccess.
@Test(timeout = 60_000L)
public void testMultipleParseExceptionsSuccess() throws Exception {
expectPublishedSegments(1);
final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, 10, 10);
final ListenableFuture<TaskStatus> statusFuture = runTask(task);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(Arrays.asList(// Good row- will be processed.
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "1"), // Null row- will be thrown away.
null, // Bad metric- will count as processed, but that particular metric won't update.
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "foo"), // Bad long dim- will count as processed, but bad dims will get default values
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "dimLong", "notnumber", "dimFloat", "notnumber", "met1", "foo"), // Bad row- will be unparseable.
ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), // Good row- will be processed.
ImmutableMap.of("t", 1521251960729L, "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
Collection<DataSegment> publishedSegments = awaitSegments();
DataSegment publishedSegment = Iterables.getOnlyElement(publishedSegments);
// Check metrics.
Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
Assert.assertEquals(2, task.getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(0, task.getRowIngestionMeters().getThrownAway());
Assert.assertEquals(2, task.getRowIngestionMeters().getUnparseable());
// Do some queries.
Assert.assertEquals(4, sumMetric(task, null, "rows").longValue());
Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
awaitHandoffs();
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 2, RowIngestionMeters.PROCESSED_WITH_ERROR, 2, RowIngestionMeters.UNPARSEABLE, 2, RowIngestionMeters.THROWN_AWAY, 0));
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = Arrays.asList("Timestamp[null] is unparseable! Event: {dim1=foo, met1=2.0, __fail__=x}", "could not convert value [notnumber] to long", "Unable to parse value[foo] for field[met1]", "Timestamp[null] is unparseable! Event: null");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{dim1=foo, met1=2.0, __fail__=x}", "{t=1521251960729, dim1=foo, dimLong=notnumber, dimFloat=notnumber, met1=foo}", "{t=1521251960729, dim1=foo, met1=foo}", null);
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
Assert.assertEquals(IngestionState.COMPLETED, reportData.getIngestionState());
}
use of org.apache.druid.data.input.Firehose in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testMultipleParseExceptionsFailure.
@Test(timeout = 60_000L)
public void testMultipleParseExceptionsFailure() throws Exception {
expectPublishedSegments(1);
final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, 3, 10);
final ListenableFuture<TaskStatus> statusFuture = runTask(task);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(Arrays.asList(// Good row- will be processed.
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "1"), // Null row- will be thrown away.
null, // Bad metric- will count as processed, but that particular metric won't update.
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "foo"), // Bad long dim- will count as processed, but bad dims will get default values
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "dimLong", "notnumber", "dimFloat", "notnumber", "met1", "foo"), // Bad row- will be unparseable.
ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), // Good row- will be processed.
ImmutableMap.of("t", 1521251960729L, "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode());
Assert.assertTrue(taskStatus.getErrorMsg().contains("Max parse exceptions[3] exceeded"));
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.PROCESSED_WITH_ERROR, 2, RowIngestionMeters.UNPARSEABLE, 2, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = ImmutableList.of("Timestamp[null] is unparseable! Event: {dim1=foo, met1=2.0, __fail__=x}", "could not convert value [notnumber] to long", "Unable to parse value[foo] for field[met1]", "Timestamp[null] is unparseable! Event: null");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{dim1=foo, met1=2.0, __fail__=x}", "{t=1521251960729, dim1=foo, dimLong=notnumber, dimFloat=notnumber, met1=foo}", "{t=1521251960729, dim1=foo, met1=foo}", null);
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
Assert.assertEquals(IngestionState.BUILD_SEGMENTS, reportData.getIngestionState());
}
Aggregations