use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testMaxTotalRows.
@Test(timeout = 60_000L)
public void testMaxTotalRows() throws Exception {
// Expect 2 segments as we will hit maxTotalRows
expectPublishedSegments(2);
final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, Integer.MAX_VALUE, 1500L);
final ListenableFuture<TaskStatus> statusFuture = runTask(task);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
// maxTotalRows is 1500
for (int i = 0; i < 2000; i++) {
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo-" + i, "met1", "1")));
}
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
Collection<DataSegment> publishedSegments = awaitSegments();
// Check metrics.
Assert.assertEquals(2000, task.getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRowIngestionMeters().getThrownAway());
Assert.assertEquals(0, task.getRowIngestionMeters().getUnparseable());
// Do some queries.
Assert.assertEquals(2000, sumMetric(task, null, "rows").longValue());
Assert.assertEquals(2000, sumMetric(task, null, "met1").longValue());
awaitHandoffs();
Assert.assertEquals(2, publishedSegments.size());
for (DataSegment publishedSegment : publishedSegments) {
Pair<Executor, Runnable> executorRunnablePair = handOffCallbacks.get(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()));
Assert.assertNotNull(publishedSegment + " missing from handoff callbacks: " + handOffCallbacks, executorRunnablePair);
// Simulate handoff.
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testMultipleParseExceptionsSuccess.
@Test(timeout = 60_000L)
public void testMultipleParseExceptionsSuccess() throws Exception {
expectPublishedSegments(1);
final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, 10, 10);
final ListenableFuture<TaskStatus> statusFuture = runTask(task);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(Arrays.asList(// Good row- will be processed.
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "1"), // Null row- will be thrown away.
null, // Bad metric- will count as processed, but that particular metric won't update.
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "foo"), // Bad long dim- will count as processed, but bad dims will get default values
ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "dimLong", "notnumber", "dimFloat", "notnumber", "met1", "foo"), // Bad row- will be unparseable.
ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), // Good row- will be processed.
ImmutableMap.of("t", 1521251960729L, "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
Collection<DataSegment> publishedSegments = awaitSegments();
DataSegment publishedSegment = Iterables.getOnlyElement(publishedSegments);
// Check metrics.
Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
Assert.assertEquals(2, task.getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(0, task.getRowIngestionMeters().getThrownAway());
Assert.assertEquals(2, task.getRowIngestionMeters().getUnparseable());
// Do some queries.
Assert.assertEquals(4, sumMetric(task, null, "rows").longValue());
Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
awaitHandoffs();
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 2, RowIngestionMeters.PROCESSED_WITH_ERROR, 2, RowIngestionMeters.UNPARSEABLE, 2, RowIngestionMeters.THROWN_AWAY, 0));
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages = Arrays.asList("Timestamp[null] is unparseable! Event: {dim1=foo, met1=2.0, __fail__=x}", "could not convert value [notnumber] to long", "Unable to parse value[foo] for field[met1]", "Timestamp[null] is unparseable! Event: null");
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{dim1=foo, met1=2.0, __fail__=x}", "{t=1521251960729, dim1=foo, dimLong=notnumber, dimFloat=notnumber, met1=foo}", "{t=1521251960729, dim1=foo, met1=foo}", null);
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
Assert.assertEquals(IngestionState.COMPLETED, reportData.getIngestionState());
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testRestore.
@Test(timeout = 60_000L)
public void testRestore() throws Exception {
expectPublishedSegments(0);
final AppenderatorDriverRealtimeIndexTask task1 = makeRealtimeTask(null);
final DataSegment publishedSegment;
// First run:
{
final ListenableFuture<TaskStatus> statusFuture = runTask(task1);
// Wait for firehose to show up, it starts off null.
while (task1.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task1.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo")));
// Trigger graceful shutdown.
task1.stopGracefully(taskToolboxFactory.build(task1).getConfig());
// Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
// Nothing should be published.
Assert.assertTrue(publishedSegments.isEmpty());
}
// Second run:
{
expectPublishedSegments(1);
final AppenderatorDriverRealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
final ListenableFuture<TaskStatus> statusFuture = runTask(task2);
// Wait for firehose to show up, it starts off null.
while (task2.getFirehose() == null) {
Thread.sleep(50);
}
// Do a query, at this point the previous data should be loaded.
Assert.assertEquals(1, sumMetric(task2, null, "rows").longValue());
final TestFirehose firehose = (TestFirehose) task2.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim2", "bar")));
// Stop the firehose, this will drain out existing events.
firehose.close();
Collection<DataSegment> publishedSegments = awaitSegments();
publishedSegment = Iterables.getOnlyElement(publishedSegments);
// Do a query.
Assert.assertEquals(2, sumMetric(task2, null, "rows").longValue());
awaitHandoffs();
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testRestoreAfterHandoffAttemptDuringShutdown.
@Test(timeout = 60_000L)
public void testRestoreAfterHandoffAttemptDuringShutdown() throws Exception {
final AppenderatorDriverRealtimeIndexTask task1 = makeRealtimeTask(null);
final DataSegment publishedSegment;
// First run:
{
expectPublishedSegments(1);
final ListenableFuture<TaskStatus> statusFuture = runTask(task1);
// Wait for firehose to show up, it starts off null.
while (task1.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task1.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo")));
// Stop the firehose, this will trigger a finishJob.
firehose.close();
Collection<DataSegment> publishedSegments = awaitSegments();
publishedSegment = Iterables.getOnlyElement(publishedSegments);
// Do a query.
Assert.assertEquals(1, sumMetric(task1, null, "rows").longValue());
// Trigger graceful shutdown.
task1.stopGracefully(taskToolboxFactory.build(task1).getConfig());
// Wait for the task to finish. The status doesn't really matter.
while (!statusFuture.isDone()) {
Thread.sleep(50);
}
}
// Second run:
{
expectPublishedSegments(1);
final AppenderatorDriverRealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
final ListenableFuture<TaskStatus> statusFuture = runTask(task2);
// Wait for firehose to show up, it starts off null.
while (task2.getFirehose() == null) {
Thread.sleep(50);
}
// Stop the firehose again, this will start another handoff.
final TestFirehose firehose = (TestFirehose) task2.getFirehose();
// Stop the firehose, this will trigger a finishJob.
firehose.close();
awaitHandoffs();
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class ServerViewUtil method getTargetLocations.
public static List<LocatedSegmentDescriptor> getTargetLocations(TimelineServerView serverView, DataSource datasource, List<Interval> intervals, int numCandidates) {
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(datasource);
final Optional<? extends TimelineLookup<String, ServerSelector>> maybeTimeline = serverView.getTimeline(analysis);
if (!maybeTimeline.isPresent()) {
return Collections.emptyList();
}
List<LocatedSegmentDescriptor> located = new ArrayList<>();
for (Interval interval : intervals) {
for (TimelineObjectHolder<String, ServerSelector> holder : maybeTimeline.get().lookup(interval)) {
for (PartitionChunk<ServerSelector> chunk : holder.getObject()) {
ServerSelector selector = chunk.getObject();
final SegmentDescriptor descriptor = new SegmentDescriptor(holder.getInterval(), holder.getVersion(), chunk.getChunkNumber());
long size = selector.getSegment().getSize();
List<DruidServerMetadata> candidates = selector.getCandidates(numCandidates);
located.add(new LocatedSegmentDescriptor(descriptor, size, candidates));
}
}
}
return located;
}
Aggregations