use of org.apache.druid.timeline.partition.PartialShardSpec in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinatorTest method testAnotherAllocatePendingSegmentAfterRevertingCompaction.
/**
* Slightly different that the above test but that involves reverted compaction
* 1) used segments of version = A, id = 0, 1, 2
* 2) overwrote segments of version = B, id = 0 <= compaction
* 3) marked segments unused for version = A, id = 0, 1, 2 <= overshadowing
* 4) pending segment of version = B, id = 1 <= appending new data, aborted
* 5) reverted compaction, mark segments used for version = A, id = 0, 1, 2, and mark compacted segments unused
* 6) used segments of version = A, id = 0, 1, 2
* 7) pending segment of version = B, id = 1
*/
@Test
public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() {
String maxVersion = "Z";
// 1.0) simulate one append load
final PartialShardSpec partialShardSpec = NumberedPartialShardSpec.instance();
final String dataSource = "ds";
final Interval interval = Intervals.of("2017-01-01/2017-02-01");
final SegmentIdWithShardSpec identifier = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, "A", true);
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", identifier.toString());
// Assume it publishes; create its corresponding segment
DataSegment segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(0), 9, 100);
Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
List<String> ids = retrieveUsedSegmentIds();
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", ids.get(0));
// 1.1) simulate one more append load (as if previous segment was published, note different sequence name)
final SegmentIdWithShardSpec identifier1 = coordinator.allocatePendingSegment(dataSource, "seq2", identifier.toString(), interval, partialShardSpec, maxVersion, true);
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", identifier1.toString());
// Assume it publishes; create its corresponding segment
segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(1), 9, 100);
Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
ids = retrieveUsedSegmentIds();
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", ids.get(1));
// 1.2) simulate one more append load (as if previous segment was published, note different sequence name)
final SegmentIdWithShardSpec identifier2 = coordinator.allocatePendingSegment(dataSource, "seq3", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_2", identifier2.toString());
// Assume it publishes; create its corresponding segment
segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(2), 9, 100);
// state so far:
// pendings: A: 0,1,2
// used segments A: 0,1,2
// unused segments:
Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
ids = retrieveUsedSegmentIds();
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_2", ids.get(2));
// 2)
// now simulate that one compaction was done (batch) ingestion for same interval (like reindex of the previous three):
DataSegment compactedSegment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "B", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(0), 9, 100);
Assert.assertTrue(insertUsedSegments(ImmutableSet.of(compactedSegment)));
ids = retrieveUsedSegmentIds();
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_B", ids.get(3));
// 3) When overshadowing, segments are still marked as "used" in the segments table
// state so far:
// pendings: A: 0,1,2
// used segments: A: 0,1,2; B: 0 <- new compacted segment, overshadows previous version A
// unused segment:
// 4) pending segment of version = B, id = 1 <= appending new data, aborted
final SegmentIdWithShardSpec identifier3 = coordinator.allocatePendingSegment(dataSource, "seq4", identifier2.toString(), interval, partialShardSpec, maxVersion, true);
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_B_1", identifier3.toString());
// no corresponding segment, pending aborted
// state so far:
// pendings: A: 0,1,2; B:1 (note that B_1 does not make it into segments since its task aborted)
// used segments: A: 0,1,2; B: 0 <- compacted segment, overshadows previous version A
// unused segment:
// 5) reverted compaction (by marking B_0 as unused)
// Revert compaction a manual metadata update which is basically the following two steps:
// <- drop compacted segment
markAllSegmentsUnused(ImmutableSet.of(compactedSegment));
// pending: version = A, id = 0,1,2
// version = B, id = 1
//
// used segment: version = A, id = 0,1,2
// unused segment: version = B, id = 0
List<String> pendings = retrievePendingSegmentIds();
Assert.assertTrue(pendings.size() == 4);
List<String> used = retrieveUsedSegmentIds();
Assert.assertTrue(used.size() == 3);
List<String> unused = retrieveUnusedSegmentIds();
Assert.assertTrue(unused.size() == 1);
// Simulate one more append load
final SegmentIdWithShardSpec identifier4 = coordinator.allocatePendingSegment(dataSource, "seq5", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
// maxid = B_1 -> new partno = 2
// versionofexistingchunk=A
// ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_2
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_3", identifier4.toString());
// Assume it publishes; create its corresponding segment
segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(3), 9, 100);
// pending: version = A, id = 0,1,2,3
// version = B, id = 1
//
// used segment: version = A, id = 0,1,2,3
// unused segment: version = B, id = 0
Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
ids = retrieveUsedSegmentIds();
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_3", ids.get(3));
}
use of org.apache.druid.timeline.partition.PartialShardSpec in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinatorTest method testNoPendingSegmentsAndOneUsedSegment.
@Test
public void testNoPendingSegmentsAndOneUsedSegment() {
String maxVersion = "Z";
// create one used segment
DataSegment segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "A", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(0), 9, 100);
Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
List<String> ids = retrieveUsedSegmentIds();
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", ids.get(0));
// simulate one aborted append load
final PartialShardSpec partialShardSpec = NumberedPartialShardSpec.instance();
final String dataSource = "ds";
final Interval interval = Intervals.of("2017-01-01/2017-02-01");
final SegmentIdWithShardSpec identifier = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, maxVersion, true);
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", identifier.toString());
}
Aggregations