Search in sources :

Example 6 with PartialShardSpec

use of org.apache.druid.timeline.partition.PartialShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testAnotherAllocatePendingSegmentAfterRevertingCompaction.

/**
 * Slightly different that the above test but that involves reverted compaction
 *   1) used segments of version = A, id = 0, 1, 2
 *   2) overwrote segments of version = B, id = 0 <= compaction
 *   3) marked segments unused for version = A, id = 0, 1, 2 <= overshadowing
 *   4) pending segment of version = B, id = 1 <= appending new data, aborted
 *   5) reverted compaction, mark segments used for version = A, id = 0, 1, 2, and mark compacted segments unused
 *   6) used segments of version = A, id = 0, 1, 2
 *   7) pending segment of version = B, id = 1
 */
@Test
public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() {
    String maxVersion = "Z";
    // 1.0) simulate one append load
    final PartialShardSpec partialShardSpec = NumberedPartialShardSpec.instance();
    final String dataSource = "ds";
    final Interval interval = Intervals.of("2017-01-01/2017-02-01");
    final SegmentIdWithShardSpec identifier = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, "A", true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", identifier.toString());
    // Assume it publishes; create its corresponding segment
    DataSegment segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(0), 9, 100);
    Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
    List<String> ids = retrieveUsedSegmentIds();
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", ids.get(0));
    // 1.1) simulate one more append load  (as if previous segment was published, note different sequence name)
    final SegmentIdWithShardSpec identifier1 = coordinator.allocatePendingSegment(dataSource, "seq2", identifier.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", identifier1.toString());
    // Assume it publishes; create its corresponding segment
    segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(1), 9, 100);
    Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
    ids = retrieveUsedSegmentIds();
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", ids.get(1));
    // 1.2) simulate one more append load  (as if previous segment was published, note different sequence name)
    final SegmentIdWithShardSpec identifier2 = coordinator.allocatePendingSegment(dataSource, "seq3", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_2", identifier2.toString());
    // Assume it publishes; create its corresponding segment
    segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(2), 9, 100);
    // state so far:
    // pendings: A: 0,1,2
    // used segments A: 0,1,2
    // unused segments:
    Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
    ids = retrieveUsedSegmentIds();
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_2", ids.get(2));
    // 2)
    // now simulate that one compaction was done (batch) ingestion for same interval (like reindex of the previous three):
    DataSegment compactedSegment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "B", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(0), 9, 100);
    Assert.assertTrue(insertUsedSegments(ImmutableSet.of(compactedSegment)));
    ids = retrieveUsedSegmentIds();
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_B", ids.get(3));
    // 3) When overshadowing, segments are still marked as "used" in the segments table
    // state so far:
    // pendings: A: 0,1,2
    // used segments: A: 0,1,2; B: 0 <- new compacted segment, overshadows previous version A
    // unused segment:
    // 4) pending segment of version = B, id = 1 <= appending new data, aborted
    final SegmentIdWithShardSpec identifier3 = coordinator.allocatePendingSegment(dataSource, "seq4", identifier2.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_B_1", identifier3.toString());
    // no corresponding segment, pending aborted
    // state so far:
    // pendings: A: 0,1,2; B:1 (note that B_1 does not make it into segments since its task aborted)
    // used segments: A: 0,1,2; B: 0 <-  compacted segment, overshadows previous version A
    // unused segment:
    // 5) reverted compaction (by marking B_0 as unused)
    // Revert compaction a manual metadata update which is basically the following two steps:
    // <- drop compacted segment
    markAllSegmentsUnused(ImmutableSet.of(compactedSegment));
    // pending: version = A, id = 0,1,2
    // version = B, id = 1
    // 
    // used segment: version = A, id = 0,1,2
    // unused segment: version = B, id = 0
    List<String> pendings = retrievePendingSegmentIds();
    Assert.assertTrue(pendings.size() == 4);
    List<String> used = retrieveUsedSegmentIds();
    Assert.assertTrue(used.size() == 3);
    List<String> unused = retrieveUnusedSegmentIds();
    Assert.assertTrue(unused.size() == 1);
    // Simulate one more append load
    final SegmentIdWithShardSpec identifier4 = coordinator.allocatePendingSegment(dataSource, "seq5", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
    // maxid = B_1 -> new partno = 2
    // versionofexistingchunk=A
    // ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_2
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_3", identifier4.toString());
    // Assume it publishes; create its corresponding segment
    segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(3), 9, 100);
    // pending: version = A, id = 0,1,2,3
    // version = B, id = 1
    // 
    // used segment: version = A, id = 0,1,2,3
    // unused segment: version = B, id = 0
    Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
    ids = retrieveUsedSegmentIds();
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_3", ids.get(3));
}
Also used : LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) NumberedOverwritePartialShardSpec(org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 7 with PartialShardSpec

use of org.apache.druid.timeline.partition.PartialShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testNoPendingSegmentsAndOneUsedSegment.

@Test
public void testNoPendingSegmentsAndOneUsedSegment() {
    String maxVersion = "Z";
    // create one used segment
    DataSegment segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(0), 9, 100);
    Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
    List<String> ids = retrieveUsedSegmentIds();
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", ids.get(0));
    // simulate one aborted append load
    final PartialShardSpec partialShardSpec = NumberedPartialShardSpec.instance();
    final String dataSource = "ds";
    final Interval interval = Intervals.of("2017-01-01/2017-02-01");
    final SegmentIdWithShardSpec identifier = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", identifier.toString());
}
Also used : LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) NumberedOverwritePartialShardSpec(org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

SegmentIdWithShardSpec (org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)7 PartialShardSpec (org.apache.druid.timeline.partition.PartialShardSpec)7 Interval (org.joda.time.Interval)7 HashBasedNumberedPartialShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec)6 NumberedOverwritePartialShardSpec (org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec)6 NumberedPartialShardSpec (org.apache.druid.timeline.partition.NumberedPartialShardSpec)6 Test (org.junit.Test)6 DataSegment (org.apache.druid.timeline.DataSegment)5 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)3 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 FluentIterable (com.google.common.collect.FluentIterable)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Iterables (com.google.common.collect.Iterables)1 Lists (com.google.common.collect.Lists)1 Hashing (com.google.common.hash.Hashing)1 BaseEncoding (com.google.common.io.BaseEncoding)1