use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class IndexTaskTest method testMultipleParseExceptionsFailureAtDeterminePartitions.
@Test
public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("time,dim,dimLong,dimFloat,val\n");
// unparseable
writer.write("unparseable,a,2,3.0,1\n");
// valid row
writer.write("2014-01-01T00:00:10Z,a,2,3.0,1\n");
// unparseable
writer.write("9.0,a,2,3.0,1\n");
// thrown away
writer.write("3014-03-01T00:00:10Z,outsideofinterval,2,3.0,1\n");
// unparseable
writer.write("99999999999-01-01T00:00:10Z,b,2,3.0,1\n");
}
// Allow up to 3 parse exceptions, and save up to 2 parse exceptions
final IndexTuningConfig tuningConfig = new IndexTuningConfig(null, null, null, null, null, null, null, null, null, null, new HashedPartitionsSpec(2, null, null), INDEX_SPEC, null, null, true, false, null, null, null, true, 2, 5, null, null);
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat")));
final List<String> columns = Arrays.asList("time", "dim", "dimLong", "dimFloat", "val");
final IndexIngestionSpec ingestionSpec;
List<String> expectedMessages;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, dimensionsSpec, new CsvInputFormat(columns, null, null, true, 0), null, null, tuningConfig, false, false);
expectedMessages = Arrays.asList(StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 3, Line: 6)", tmpFile.toURI()), StringUtils.format("Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 2, Line: 4)", tmpFile.toURI()), StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 2)", tmpFile.toURI()));
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0), null, null, tuningConfig, false, false);
expectedMessages = Arrays.asList("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
TaskStatus status = runTask(indexTask).lhs;
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.DETERMINE_PARTITIONS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, useInputFormatApi ? 1 : 2), RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 0, RowIngestionMeters.UNPARSEABLE, 0, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.DETERMINE_PARTITIONS);
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "{time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", "{time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RealtimeIndexTaskTest method testBasics.
@Test(timeout = 60_000L)
public void testBasics() throws Exception {
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task = makeRealtimeTask(null);
final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
final DataSegment publishedSegment;
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", 2.0), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Check metrics.
Assert.assertEquals(2, task.getMetrics().processed());
Assert.assertEquals(1, task.getMetrics().thrownAway());
Assert.assertEquals(0, task.getMetrics().unparseable());
// Do some queries.
Assert.assertEquals(2, sumMetric(task, null, "rows").longValue());
Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RealtimeIndexTaskTest method testHandoffTimeout.
@Test(timeout = 60_000L, expected = ExecutionException.class)
public void testHandoffTimeout() throws Exception {
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, true, 100L);
final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1")));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
Assert.assertEquals(1, task.getMetrics().processed());
Assert.assertNotNull(Iterables.getOnlyElement(mdc.getPublished()));
// handoff would timeout, resulting in exception
statusFuture.get();
}
use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RealtimeIndexTaskTest method testNoReportParseExceptions.
@Test(timeout = 60_000L)
public void testNoReportParseExceptions() throws Exception {
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task = makeRealtimeTask(null, false);
final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
final DataSegment publishedSegment;
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(Arrays.asList(// Good row- will be processed.
ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), // Null row- will be thrown away.
null, // Bad metric- will count as processed, but that particular metric won't update.
ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "foo"), // Bad row- will be unparseable.
ImmutableMap.of("dim1", "foo", "met1", 2.0, TestFirehose.FAIL_DIM, "x"), // Old row- will be thrownAway.
ImmutableMap.of("t", now.minus(Period.days(1)).getMillis(), "dim1", "foo", "met1", 2.0), // Good row- will be processed.
ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Check metrics.
Assert.assertEquals(3, task.getMetrics().processed());
Assert.assertEquals(1, task.getMetrics().thrownAway());
Assert.assertEquals(2, task.getMetrics().unparseable());
// Do some queries.
Assert.assertEquals(3, sumMetric(task, null, "rows").longValue());
Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.
the class RealtimeIndexTaskTest method testRestore.
@Test(timeout = 60_000L)
public void testRestore() throws Exception {
final File directory = tempFolder.newFolder();
final RealtimeIndexTask task1 = makeRealtimeTask(null);
final DataSegment publishedSegment;
// First run:
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final TaskToolbox taskToolbox = makeToolbox(task1, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task1.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task1.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo")));
// Trigger graceful shutdown.
task1.stopGracefully(taskToolbox.getConfig());
// Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
// Nothing should be published.
Assert.assertEquals(new HashSet<>(), mdc.getPublished());
}
// Second run:
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
final TaskToolbox taskToolbox = makeToolbox(task2, mdc, directory);
final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task2.getFirehose() == null) {
Thread.sleep(50);
}
// Do a query, at this point the previous data should be loaded.
Assert.assertEquals(1, sumMetric(task2, null, "rows").longValue());
final TestFirehose firehose = (TestFirehose) task2.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim2", "bar")));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Do a query.
Assert.assertEquals(2, sumMetric(task2, null, "rows").longValue());
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
}
Aggregations