use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.
the class TaskSerdeTest method testIndexTaskwithResourceSerde.
@Test
public void testIndexTaskwithResourceSerde() throws Exception {
final IndexTask task = new IndexTask(null, new TaskResource("rofl", 2), new IndexIngestionSpec(new DataSchema("foo", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P2D"))), null), new IndexIOConfig(null, new LocalInputSource(new File("lol"), "rofl"), new NoopInputFormat(), true, false), new IndexTuningConfig(null, null, null, 10, null, null, null, null, null, null, new DynamicPartitionsSpec(10000, null), indexSpec, null, 3, false, null, null, null, null, null, null, null, null, null)), null);
for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
jsonMapper.registerModule(jacksonModule);
}
final String json = jsonMapper.writeValueAsString(task);
// Just want to run the clock a bit to make sure the task id doesn't change
Thread.sleep(100);
final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
Assert.assertEquals("foo", task.getDataSource());
Assert.assertEquals(task.getId(), task2.getId());
Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertTrue(task.getIngestionSchema().getIOConfig().getInputSource() instanceof LocalInputSource);
Assert.assertTrue(task2.getIngestionSchema().getIOConfig().getInputSource() instanceof LocalInputSource);
}
use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.
the class RangePartitionMultiPhaseParallelIndexingTest method testAppendLinearlyPartitionedSegmentsToHashPartitionedDatasourceSuccessfullyAppend.
@Test
public void testAppendLinearlyPartitionedSegmentsToHashPartitionedDatasourceSuccessfullyAppend() {
if (useMultivalueDim) {
return;
}
final int targetRowsPerSegment = NUM_ROW / DIM_FILE_CARDINALITY / NUM_PARTITION;
final Set<DataSegment> publishedSegments = new HashSet<>();
publishedSegments.addAll(runTestTask(new SingleDimensionPartitionsSpec(targetRowsPerSegment, null, DIM1, false), TaskState.SUCCESS, false));
// Append
publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(5, null), TaskState.SUCCESS, true));
// And append again
publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
final List<DataSegment> segments = entry.getValue();
final List<DataSegment> rangedSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == SingleDimensionShardSpec.class).collect(Collectors.toList());
final List<DataSegment> linearSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == NumberedShardSpec.class).collect(Collectors.toList());
for (DataSegment rangedSegment : rangedSegments) {
final SingleDimensionShardSpec rangeShardSpec = (SingleDimensionShardSpec) rangedSegment.getShardSpec();
for (DataSegment linearSegment : linearSegments) {
Assert.assertEquals(rangedSegment.getInterval(), linearSegment.getInterval());
Assert.assertEquals(rangedSegment.getVersion(), linearSegment.getVersion());
final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) linearSegment.getShardSpec();
Assert.assertEquals(rangeShardSpec.getNumCorePartitions(), numberedShardSpec.getNumCorePartitions());
Assert.assertTrue(rangeShardSpec.getPartitionNum() < numberedShardSpec.getPartitionNum());
}
}
}
}
use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.
the class ParallelIndexTuningConfigTest method testSerdeWithMaxNumSubTasks.
@Test
public void testSerdeWithMaxNumSubTasks() throws IOException {
final int maxNumSubTasks = 250;
final ParallelIndexTuningConfig tuningConfig = new ParallelIndexTuningConfig(null, null, null, 10, 1000L, null, null, null, null, new DynamicPartitionsSpec(100, 100L), new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.UNCOMPRESSED, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(), 1, false, true, 10000L, OffHeapMemorySegmentWriteOutMediumFactory.instance(), maxNumSubTasks, null, 100, 20L, new Duration(3600), 128, null, null, false, null, null, null, null, null);
final byte[] json = mapper.writeValueAsBytes(tuningConfig);
final ParallelIndexTuningConfig fromJson = (ParallelIndexTuningConfig) mapper.readValue(json, TuningConfig.class);
Assert.assertEquals(fromJson, tuningConfig);
}
use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.
the class PartialCompactionTest method testPartialCompactRangeAndDynamicPartitionedSegments.
@Test
public void testPartialCompactRangeAndDynamicPartitionedSegments() {
final Map<Interval, List<DataSegment>> rangePartitionedSegments = SegmentUtils.groupSegmentsByInterval(runTestTask(new SingleDimensionPartitionsSpec(10, null, "dim1", false), TaskState.SUCCESS, false));
final Map<Interval, List<DataSegment>> linearlyPartitionedSegments = SegmentUtils.groupSegmentsByInterval(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
// Pick half of each partition lists to compact together
rangePartitionedSegments.values().forEach(segmentsInInterval -> segmentsInInterval.sort(Comparator.comparing(segment -> segment.getShardSpec().getPartitionNum())));
linearlyPartitionedSegments.values().forEach(segmentsInInterval -> segmentsInInterval.sort(Comparator.comparing(segment -> segment.getShardSpec().getPartitionNum())));
final List<DataSegment> segmentsToCompact = new ArrayList<>();
for (List<DataSegment> segmentsInInterval : rangePartitionedSegments.values()) {
segmentsToCompact.addAll(segmentsInInterval.subList(segmentsInInterval.size() / 2, segmentsInInterval.size()));
}
for (List<DataSegment> segmentsInInterval : linearlyPartitionedSegments.values()) {
segmentsToCompact.addAll(segmentsInInterval.subList(0, segmentsInInterval.size() / 2));
}
final CompactionTask compactionTask = newCompactionTaskBuilder().inputSpec(SpecificSegmentsSpec.fromSegments(segmentsToCompact)).tuningConfig(newTuningConfig(new DynamicPartitionsSpec(20, null), 2, false)).build();
final Map<Interval, List<DataSegment>> compactedSegments = SegmentUtils.groupSegmentsByInterval(runTask(compactionTask, TaskState.SUCCESS));
for (List<DataSegment> segmentsInInterval : compactedSegments.values()) {
final int expectedAtomicUpdateGroupSize = segmentsInInterval.size();
for (DataSegment segment : segmentsInInterval) {
Assert.assertEquals(expectedAtomicUpdateGroupSize, segment.getShardSpec().getAtomicUpdateGroupSize());
}
}
}
use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.
the class HashPartitionMultiPhaseParallelIndexingTest method testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend.
@Test
public void testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend() {
final Set<DataSegment> publishedSegments = new HashSet<>();
publishedSegments.addAll(runTestTask(new HashedPartitionsSpec(null, numShards, ImmutableList.of("dim1", "dim2")), TaskState.SUCCESS, false));
// Append
publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(5, null), TaskState.SUCCESS, true));
// And append again
publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
final List<DataSegment> segments = entry.getValue();
final List<DataSegment> hashedSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == HashBasedNumberedShardSpec.class).collect(Collectors.toList());
final List<DataSegment> linearSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == NumberedShardSpec.class).collect(Collectors.toList());
for (DataSegment hashedSegment : hashedSegments) {
final HashBasedNumberedShardSpec hashShardSpec = (HashBasedNumberedShardSpec) hashedSegment.getShardSpec();
for (DataSegment linearSegment : linearSegments) {
Assert.assertEquals(hashedSegment.getInterval(), linearSegment.getInterval());
Assert.assertEquals(hashedSegment.getVersion(), linearSegment.getVersion());
final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) linearSegment.getShardSpec();
Assert.assertEquals(hashShardSpec.getNumCorePartitions(), numberedShardSpec.getNumCorePartitions());
Assert.assertTrue(hashShardSpec.getPartitionNum() < numberedShardSpec.getPartitionNum());
}
}
}
}
Aggregations