Search in sources :

Example 16 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class CompactionTaskRunTest method testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingTrue.

@Test
public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingTrue() throws Exception {
    // This test fails with segment lock because of the bug reported in https://github.com/apache/druid/issues/10911.
    if (lockGranularity == LockGranularity.SEGMENT) {
        return;
    }
    // This creates HOUR segments with intervals of
    // - 2014-01-01T00:00:00/2014-01-01T01:00:00
    // - 2014-01-01T01:00:00/2014-01-01T02:00:00
    // - 2014-01-01T02:00:00/2014-01-01T03:00:00
    runIndexTask();
    final Interval compactionPartialInterval = Intervals.of("2014-01-01T01:00:00/2014-01-01T02:00:00");
    // Segments that did not belong in the compaction interval are expected unchanged
    final Set<DataSegment> expectedSegments = new HashSet<>();
    expectedSegments.addAll(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), Segments.ONLY_VISIBLE));
    expectedSegments.addAll(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), Segments.ONLY_VISIBLE));
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask partialCompactionTask = builder.segmentGranularity(Granularities.MINUTE).inputSpec(new CompactionIntervalSpec(compactionPartialInterval, null), true).build();
    final Pair<TaskStatus, List<DataSegment>> partialCompactionResult = runTask(partialCompactionTask);
    Assert.assertTrue(partialCompactionResult.lhs.isSuccess());
    // New segments that was compacted are expected. However, old segments of the compacted interval should be drop
    // regardless of the new segments fully overshadow the old segments or not. Hence, we do not expect old segments
    // of the 2014-01-01T01:00:00/2014-01-01T02:00:00 interval post-compaction
    expectedSegments.addAll(partialCompactionResult.rhs);
    final Set<DataSegment> segmentsAfterPartialCompaction = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction);
    final CompactionTask fullCompactionTask = builder.segmentGranularity(null).inputSpec(new CompactionIntervalSpec(Intervals.of("2014-01-01/2014-01-02"), null), true).build();
    final Pair<TaskStatus, List<DataSegment>> fullCompactionResult = runTask(fullCompactionTask);
    Assert.assertTrue(fullCompactionResult.lhs.isSuccess());
    final List<DataSegment> segmentsAfterFullCompaction = new ArrayList<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    segmentsAfterFullCompaction.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getInterval(), s2.getInterval()));
    Assert.assertEquals(3, segmentsAfterFullCompaction.size());
    // Full Compaction with null segmentGranularity meaning that the original segmentGrnaularity is perserved
    // For the intervals, 2014-01-01T00:00:00.000Z/2014-01-01T01:00:00.000Z and 2014-01-01T02:00:00.000Z/2014-01-01T03:00:00.000Z
    // the original segmentGranularity is HOUR from the initial ingestion.
    // For the interval, 2014-01-01T01:00:00.000Z/2014-01-01T01:01:00.000Z, the original segmentGranularity is
    // MINUTE from the partial compaction done earlier.
    Assert.assertEquals(Intervals.of("2014-01-01T00:00:00.000Z/2014-01-01T01:00:00.000Z"), segmentsAfterFullCompaction.get(0).getInterval());
    Assert.assertEquals(Intervals.of("2014-01-01T01:00:00.000Z/2014-01-01T01:01:00.000Z"), segmentsAfterFullCompaction.get(1).getInterval());
    Assert.assertEquals(Intervals.of("2014-01-01T02:00:00.000Z/2014-01-01T03:00:00.000Z"), segmentsAfterFullCompaction.get(2).getInterval());
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 17 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class CompactionTaskRunTest method testRunIndexAndCompactForSameSegmentAtTheSameTime.

@Test
public void testRunIndexAndCompactForSameSegmentAtTheSameTime() throws Exception {
    runIndexTask();
    // make sure that indexTask becomes ready first, then compactionTask becomes ready, then indexTask runs
    final CountDownLatch compactionTaskReadyLatch = new CountDownLatch(1);
    final CountDownLatch indexTaskStartLatch = new CountDownLatch(1);
    final Future<Pair<TaskStatus, List<DataSegment>>> indexFuture = exec.submit(() -> runIndexTask(compactionTaskReadyLatch, indexTaskStartLatch, false));
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01T00:00:00/2014-01-02T03:00:00")).build();
    final Future<Pair<TaskStatus, List<DataSegment>>> compactionFuture = exec.submit(() -> {
        compactionTaskReadyLatch.await();
        return runTask(compactionTask, indexTaskStartLatch, null);
    });
    Assert.assertTrue(indexFuture.get().lhs.isSuccess());
    List<DataSegment> segments = indexFuture.get().rhs;
    Assert.assertEquals(6, segments.size());
    for (int i = 0; i < 6; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i / 2, i / 2 + 1), segments.get(i).getInterval());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID + i % 2, 0, 2, (short) 1, (short) 2), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(i % 2, 2), segments.get(i).getShardSpec());
        }
    }
    final Pair<TaskStatus, List<DataSegment>> compactionResult = compactionFuture.get();
    Assert.assertEquals(TaskState.FAILED, compactionResult.lhs.getStatusCode());
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) CountDownLatch(java.util.concurrent.CountDownLatch) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Pair(org.apache.druid.java.util.common.Pair) Test(org.junit.Test)

Example 18 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class CompactionTaskRunTest method testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingFalse.

@Test
public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingFalse() throws Exception {
    // This test fails with segment lock because of the bug reported in https://github.com/apache/druid/issues/10911.
    if (lockGranularity == LockGranularity.SEGMENT) {
        return;
    }
    runIndexTask();
    final Set<DataSegment> expectedSegments = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final Interval partialInterval = Intervals.of("2014-01-01T01:00:00/2014-01-01T02:00:00");
    final CompactionTask partialCompactionTask = builder.segmentGranularity(Granularities.MINUTE).inputSpec(new CompactionIntervalSpec(partialInterval, null), false).build();
    final Pair<TaskStatus, List<DataSegment>> partialCompactionResult = runTask(partialCompactionTask);
    Assert.assertTrue(partialCompactionResult.lhs.isSuccess());
    // All segments in the previous expectedSegments should still appear as they have larger segment granularity.
    expectedSegments.addAll(partialCompactionResult.rhs);
    final Set<DataSegment> segmentsAfterPartialCompaction = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction);
    final CompactionTask fullCompactionTask = builder.segmentGranularity(null).inputSpec(new CompactionIntervalSpec(Intervals.of("2014-01-01/2014-01-02"), null), false).build();
    final Pair<TaskStatus, List<DataSegment>> fullCompactionResult = runTask(fullCompactionTask);
    Assert.assertTrue(fullCompactionResult.lhs.isSuccess());
    final List<DataSegment> segmentsAfterFullCompaction = new ArrayList<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    segmentsAfterFullCompaction.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getInterval(), s2.getInterval()));
    Assert.assertEquals(3, segmentsAfterFullCompaction.size());
    for (int i = 0; i < segmentsAfterFullCompaction.size(); i++) {
        Assert.assertEquals(Intervals.of(StringUtils.format("2014-01-01T%02d/2014-01-01T%02d", i, i + 1)), segmentsAfterFullCompaction.get(i).getInterval());
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ArrayList(java.util.ArrayList) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 19 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method testBasics.

@Test(timeout = 60_000L)
public void testBasics() throws Exception {
    expectPublishedSegments(1);
    final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null);
    Assert.assertTrue(task.supportsQueries());
    final ListenableFuture<TaskStatus> statusFuture = runTask(task);
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for publish.
    Collection<DataSegment> publishedSegments = awaitSegments();
    // Check metrics.
    Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
    Assert.assertEquals(0, task.getRowIngestionMeters().getThrownAway());
    Assert.assertEquals(0, task.getRowIngestionMeters().getUnparseable());
    // Do some queries.
    Assert.assertEquals(2, sumMetric(task, null, "rows").longValue());
    Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
    awaitHandoffs();
    for (DataSegment publishedSegment : publishedSegments) {
        Pair<Executor, Runnable> executorRunnablePair = handOffCallbacks.get(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()));
        Assert.assertNotNull(publishedSegment + " missing from handoff callbacks: " + handOffCallbacks, executorRunnablePair);
        // Simulate handoff.
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
Also used : Executor(java.util.concurrent.Executor) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 20 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method testTransformSpec.

@Test(timeout = 60_000L)
public void testTransformSpec() throws Exception {
    expectPublishedSegments(2);
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())));
    final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0, true, 0, 1);
    final ListenableFuture<TaskStatus> statusFuture = runTask(task);
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", 2.0), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    Collection<DataSegment> publishedSegments = awaitSegments();
    // Check metrics.
    Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
    Assert.assertEquals(1, task.getRowIngestionMeters().getThrownAway());
    Assert.assertEquals(0, task.getRowIngestionMeters().getUnparseable());
    // Do some queries.
    Assert.assertEquals(2, sumMetric(task, null, "rows").longValue());
    Assert.assertEquals(2, sumMetric(task, new SelectorDimFilter("dim1t", "foofoo", null), "rows").longValue());
    if (NullHandling.replaceWithDefault()) {
        Assert.assertEquals(0, sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "metric1").longValue());
    } else {
        Assert.assertNull(sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "metric1"));
    }
    Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
    awaitHandoffs();
    for (DataSegment publishedSegment : publishedSegments) {
        Pair<Executor, Runnable> executorRunnablePair = handOffCallbacks.get(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()));
        Assert.assertNotNull(publishedSegment + " missing from handoff callbacks: " + handOffCallbacks, executorRunnablePair);
        // Simulate handoff.
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
Also used : Period(org.joda.time.Period) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Executor(java.util.concurrent.Executor) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

TaskStatus (org.apache.druid.indexer.TaskStatus)135 Test (org.junit.Test)103 DataSegment (org.apache.druid.timeline.DataSegment)55 List (java.util.List)50 ImmutableList (com.google.common.collect.ImmutableList)44 ArrayList (java.util.ArrayList)41 TaskToolbox (org.apache.druid.indexing.common.TaskToolbox)40 Task (org.apache.druid.indexing.common.task.Task)39 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)37 Map (java.util.Map)34 File (java.io.File)32 IOException (java.io.IOException)26 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)25 ImmutableMap (com.google.common.collect.ImmutableMap)25 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)25 DataSchema (org.apache.druid.segment.indexing.DataSchema)25 ISE (org.apache.druid.java.util.common.ISE)24 HashMap (java.util.HashMap)23 Executor (java.util.concurrent.Executor)23 Pair (org.apache.druid.java.util.common.Pair)23