Search in sources :

Example 21 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testAllocatePendingSegmentsForHashBasedNumberedShardSpec.

@Test
public void testAllocatePendingSegmentsForHashBasedNumberedShardSpec() throws IOException {
    final PartialShardSpec partialShardSpec = new HashBasedNumberedPartialShardSpec(null, 2, 5, null);
    final String dataSource = "ds";
    final Interval interval = Intervals.of("2017-01-01/2017-02-01");
    SegmentIdWithShardSpec id = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, "version", true);
    HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
    Assert.assertEquals(0, shardSpec.getPartitionNum());
    Assert.assertEquals(0, shardSpec.getNumCorePartitions());
    Assert.assertEquals(5, shardSpec.getNumBuckets());
    coordinator.announceHistoricalSegments(Collections.singleton(new DataSegment(id.getDataSource(), id.getInterval(), id.getVersion(), null, Collections.emptyList(), Collections.emptyList(), id.getShardSpec(), 0, 10L)));
    id = coordinator.allocatePendingSegment(dataSource, "seq2", null, interval, partialShardSpec, "version", true);
    shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
    Assert.assertEquals(1, shardSpec.getPartitionNum());
    Assert.assertEquals(0, shardSpec.getNumCorePartitions());
    Assert.assertEquals(5, shardSpec.getNumBuckets());
    coordinator.announceHistoricalSegments(Collections.singleton(new DataSegment(id.getDataSource(), id.getInterval(), id.getVersion(), null, Collections.emptyList(), Collections.emptyList(), id.getShardSpec(), 0, 10L)));
    id = coordinator.allocatePendingSegment(dataSource, "seq3", null, interval, new HashBasedNumberedPartialShardSpec(null, 2, 3, null), "version", true);
    shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
    Assert.assertEquals(2, shardSpec.getPartitionNum());
    Assert.assertEquals(0, shardSpec.getNumCorePartitions());
    Assert.assertEquals(3, shardSpec.getNumBuckets());
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) NumberedOverwritePartialShardSpec(org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 22 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testAllocatePendingSegmentAfterDroppingExistingSegment.

/**
 * This test simulates an issue detected on the field consisting of the following sequence of events:
 * - A kafka stream segment was created on a given interval
 * - Later, after the above was published, another segment on same interval was created by the stream
 * - Later, after the above was published, another segment on same interval was created by the stream
 * - Later a compaction was issued for the three segments above
 * - Later, after the above was published, another segment on same interval was created by the stream
 * - Later, the compacted segment got dropped due to a drop rule
 * - Later, after the above was dropped, another segment on same interval was created by the stream but this
 *   time there was an integrity violation in the pending segments table because the
 *   {@link IndexerSQLMetadataStorageCoordinator#createNewSegment(Handle, String, Interval, PartialShardSpec, String)}
 *   method returned an segment id that already existed in the pending segments table
 */
@Test
public void testAllocatePendingSegmentAfterDroppingExistingSegment() {
    String maxVersion = "version_newer_newer";
    // simulate one load using kafka streaming
    final PartialShardSpec partialShardSpec = NumberedPartialShardSpec.instance();
    final String dataSource = "ds";
    final Interval interval = Intervals.of("2017-01-01/2017-02-01");
    final SegmentIdWithShardSpec identifier = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, "version", true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version", identifier.toString());
    // simulate one more load using kafka streaming (as if previous segment was published, note different sequence name)
    final SegmentIdWithShardSpec identifier1 = coordinator.allocatePendingSegment(dataSource, "seq2", identifier.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_1", identifier1.toString());
    // simulate one more load using kafka streaming (as if previous segment was published, note different sequence name)
    final SegmentIdWithShardSpec identifier2 = coordinator.allocatePendingSegment(dataSource, "seq3", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_2", identifier2.toString());
    // now simulate that one compaction was done (batch) ingestion for same interval (like reindex of the previous three):
    DataSegment segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "version_new", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(0), 9, 100);
    Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
    List<String> ids = retrieveUsedSegmentIds();
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new", ids.get(0));
    // one more load on same interval:
    final SegmentIdWithShardSpec identifier3 = coordinator.allocatePendingSegment(dataSource, "seq4", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new_1", identifier3.toString());
    // now drop the used segment previously loaded:
    markAllSegmentsUnused(ImmutableSet.of(segment));
    // and final load, this reproduces an issue that could happen with multiple streaming appends,
    // followed by a reindex, followed by a drop, and more streaming data coming in for same interval
    final SegmentIdWithShardSpec identifier4 = coordinator.allocatePendingSegment(dataSource, "seq5", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new_2", identifier4.toString());
}
Also used : LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) NumberedOverwritePartialShardSpec(org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 23 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class SinglePhaseParallelIndexTaskRunner method allocateNewSegment.

/**
 * Allocate a new segment for the given timestamp locally. This method is called when dynamic partitioning is used
 * and {@link org.apache.druid.indexing.common.LockGranularity} is {@code TIME_CHUNK}.
 *
 * The allocation algorithm is similar to the Overlord-based segment allocation. It keeps the segment allocation
 * history per sequenceName. If the prevSegmentId is found in the segment allocation history, this method
 * returns the next segmentId right after the prevSegmentId in the history. Since the sequenceName is unique
 * per {@link SubTaskSpec} (it is the ID of subtaskSpec), this algorithm guarantees that the same set of segmentIds
 * are created in the same order for the same subtaskSpec.
 *
 * @see org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator#allocatePendingSegmentWithSegmentLineageCheck
 */
public SegmentIdWithShardSpec allocateNewSegment(String dataSource, DateTime timestamp, String sequenceName, @Nullable String prevSegmentId) throws IOException {
    NonnullPair<Interval, String> intervalAndVersion = findIntervalAndVersion(timestamp);
    MutableObject<SegmentIdWithShardSpec> segmentIdHolder = new MutableObject<>();
    sequenceToSegmentIds.compute(sequenceName, (k, v) -> {
        final int prevSegmentIdIndex;
        final List<String> segmentIds;
        if (prevSegmentId == null) {
            prevSegmentIdIndex = -1;
            segmentIds = v == null ? new ArrayList<>() : v;
        } else {
            segmentIds = v;
            if (segmentIds == null) {
                throw new ISE("Can't find previous segmentIds for sequence[%s]", sequenceName);
            }
            prevSegmentIdIndex = segmentIds.indexOf(prevSegmentId);
            if (prevSegmentIdIndex == -1) {
                throw new ISE("Can't find previously allocated segmentId[%s] for sequence[%s]", prevSegmentId, sequenceName);
            }
        }
        final int nextSegmentIdIndex = prevSegmentIdIndex + 1;
        final SegmentIdWithShardSpec newSegmentId;
        if (nextSegmentIdIndex < segmentIds.size()) {
            SegmentId segmentId = SegmentId.tryParse(dataSource, segmentIds.get(nextSegmentIdIndex));
            if (segmentId == null) {
                throw new ISE("Illegal segmentId format [%s]", segmentIds.get(nextSegmentIdIndex));
            }
            newSegmentId = new SegmentIdWithShardSpec(segmentId.getDataSource(), segmentId.getInterval(), segmentId.getVersion(), new BuildingNumberedShardSpec(segmentId.getPartitionNum()));
        } else {
            final int partitionNum = Counters.getAndIncrementInt(partitionNumCountersPerInterval, intervalAndVersion.lhs);
            newSegmentId = new SegmentIdWithShardSpec(dataSource, intervalAndVersion.lhs, intervalAndVersion.rhs, new BuildingNumberedShardSpec(partitionNum));
            segmentIds.add(newSegmentId.toString());
        }
        segmentIdHolder.setValue(newSegmentId);
        return segmentIds;
    });
    return segmentIdHolder.getValue();
}
Also used : SegmentId(org.apache.druid.timeline.SegmentId) ArrayList(java.util.ArrayList) BuildingNumberedShardSpec(org.apache.druid.timeline.partition.BuildingNumberedShardSpec) ISE(org.apache.druid.java.util.common.ISE) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Interval(org.joda.time.Interval) MutableObject(org.apache.commons.lang3.mutable.MutableObject)

Example 24 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class TaskLockboxTest method assertAllocatedSegments.

private void assertAllocatedSegments(LockRequestForNewSegment lockRequest, LockResult result) {
    Assert.assertTrue(result.isOk());
    Assert.assertNotNull(result.getTaskLock());
    Assert.assertTrue(result.getTaskLock() instanceof SegmentLock);
    Assert.assertNotNull(result.getNewSegmentId());
    final SegmentLock segmentLock = (SegmentLock) result.getTaskLock();
    final SegmentIdWithShardSpec segmentId = result.getNewSegmentId();
    Assert.assertEquals(lockRequest.getType(), segmentLock.getType());
    Assert.assertEquals(lockRequest.getGroupId(), segmentLock.getGroupId());
    Assert.assertEquals(lockRequest.getDataSource(), segmentLock.getDataSource());
    Assert.assertEquals(lockRequest.getInterval(), segmentLock.getInterval());
    Assert.assertEquals(lockRequest.getPartialShardSpec().getShardSpecClass(), segmentId.getShardSpec().getClass());
    Assert.assertEquals(lockRequest.getPriority(), lockRequest.getPriority());
}
Also used : SegmentLock(org.apache.druid.indexing.common.SegmentLock) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)

Example 25 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class ActionBasedUsedSegmentChecker method findUsedSegments.

@Override
public Set<DataSegment> findUsedSegments(Set<SegmentIdWithShardSpec> segmentIds) throws IOException {
    // Group by dataSource
    final Map<String, Set<SegmentId>> idsByDataSource = new TreeMap<>();
    for (SegmentIdWithShardSpec segmentId : segmentIds) {
        idsByDataSource.computeIfAbsent(segmentId.getDataSource(), i -> new HashSet<>()).add(segmentId.asSegmentId());
    }
    final Set<DataSegment> usedSegments = new HashSet<>();
    for (Map.Entry<String, Set<SegmentId>> entry : idsByDataSource.entrySet()) {
        String dataSource = entry.getKey();
        Set<SegmentId> segmentIdsInDataSource = entry.getValue();
        final List<Interval> intervals = JodaUtils.condenseIntervals(Iterables.transform(segmentIdsInDataSource, SegmentId::getInterval));
        final Collection<DataSegment> usedSegmentsForIntervals = taskActionClient.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervals, Segments.ONLY_VISIBLE));
        for (DataSegment segment : usedSegmentsForIntervals) {
            if (segmentIdsInDataSource.contains(segment.getId())) {
                usedSegments.add(segment);
            }
        }
    }
    return usedSegments;
}
Also used : Iterables(com.google.common.collect.Iterables) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Set(java.util.Set) IOException(java.io.IOException) UsedSegmentChecker(org.apache.druid.segment.realtime.appenderator.UsedSegmentChecker) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) List(java.util.List) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) TreeMap(java.util.TreeMap) Map(java.util.Map) DataSegment(org.apache.druid.timeline.DataSegment) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) SegmentId(org.apache.druid.timeline.SegmentId) Set(java.util.Set) HashSet(java.util.HashSet) SegmentId(org.apache.druid.timeline.SegmentId) TreeMap(java.util.TreeMap) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) TreeMap(java.util.TreeMap) Map(java.util.Map) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Aggregations

SegmentIdWithShardSpec (org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)36 Test (org.junit.Test)23 DataSegment (org.apache.druid.timeline.DataSegment)14 Interval (org.joda.time.Interval)14 NoopTask (org.apache.druid.indexing.common.task.NoopTask)12 Task (org.apache.druid.indexing.common.task.Task)12 PartialShardSpec (org.apache.druid.timeline.partition.PartialShardSpec)11 HashBasedNumberedPartialShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec)10 NumberedPartialShardSpec (org.apache.druid.timeline.partition.NumberedPartialShardSpec)10 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)9 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)9 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)8 NumberedOverwritePartialShardSpec (org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec)7 IOException (java.io.IOException)6 HashSet (java.util.HashSet)6 Map (java.util.Map)6 DateTime (org.joda.time.DateTime)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Iterables (com.google.common.collect.Iterables)5 List (java.util.List)5