use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinatorTest method testAllocatePendingSegmentsForHashBasedNumberedShardSpec.
@Test
public void testAllocatePendingSegmentsForHashBasedNumberedShardSpec() throws IOException {
final PartialShardSpec partialShardSpec = new HashBasedNumberedPartialShardSpec(null, 2, 5, null);
final String dataSource = "ds";
final Interval interval = Intervals.of("2017-01-01/2017-02-01");
SegmentIdWithShardSpec id = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, "version", true);
HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
Assert.assertEquals(0, shardSpec.getPartitionNum());
Assert.assertEquals(0, shardSpec.getNumCorePartitions());
Assert.assertEquals(5, shardSpec.getNumBuckets());
coordinator.announceHistoricalSegments(Collections.singleton(new DataSegment(id.getDataSource(), id.getInterval(), id.getVersion(), null, Collections.emptyList(), Collections.emptyList(), id.getShardSpec(), 0, 10L)));
id = coordinator.allocatePendingSegment(dataSource, "seq2", null, interval, partialShardSpec, "version", true);
shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
Assert.assertEquals(1, shardSpec.getPartitionNum());
Assert.assertEquals(0, shardSpec.getNumCorePartitions());
Assert.assertEquals(5, shardSpec.getNumBuckets());
coordinator.announceHistoricalSegments(Collections.singleton(new DataSegment(id.getDataSource(), id.getInterval(), id.getVersion(), null, Collections.emptyList(), Collections.emptyList(), id.getShardSpec(), 0, 10L)));
id = coordinator.allocatePendingSegment(dataSource, "seq3", null, interval, new HashBasedNumberedPartialShardSpec(null, 2, 3, null), "version", true);
shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
Assert.assertEquals(2, shardSpec.getPartitionNum());
Assert.assertEquals(0, shardSpec.getNumCorePartitions());
Assert.assertEquals(3, shardSpec.getNumBuckets());
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class SegmentPublisherHelperTest method testAnnotateCorePartitionSetSizeForHashNumberedShardSpec.
@Test
public void testAnnotateCorePartitionSetSizeForHashNumberedShardSpec() {
final Set<DataSegment> segments = ImmutableSet.of(newSegment(new BuildingHashBasedNumberedShardSpec(0, 0, 3, null, HashPartitionFunction.MURMUR3_32_ABS, new ObjectMapper())), newSegment(new BuildingHashBasedNumberedShardSpec(1, 1, 3, null, HashPartitionFunction.MURMUR3_32_ABS, new ObjectMapper())), newSegment(new BuildingHashBasedNumberedShardSpec(2, 2, 3, null, HashPartitionFunction.MURMUR3_32_ABS, new ObjectMapper())));
final Set<DataSegment> annotated = SegmentPublisherHelper.annotateShardSpec(segments);
for (DataSegment segment : annotated) {
Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(3, shardSpec.getNumCorePartitions());
}
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class IndexGeneratorJobTest method verifyJob.
private void verifyJob(IndexGeneratorJob job) throws IOException {
Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config).forEach(segmentAndIndexZipFilePath -> intervalToSegments.computeIfAbsent(segmentAndIndexZipFilePath.getSegment().getInterval(), k -> new ArrayList<>()).add(segmentAndIndexZipFilePath.getSegment()));
List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
Assert.assertTrue(workingPath.exists());
final Map<Interval, List<File>> intervalToIndexFiles = new HashMap<>();
int segmentNum = 0;
for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
File segmentOutputFolder = new File(StringUtils.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentOutputFolder.exists());
Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
Assert.assertTrue(individualSegmentFolder.exists());
File indexZip = new File(individualSegmentFolder, "index.zip");
Assert.assertTrue(indexZip.exists());
intervalToIndexFiles.computeIfAbsent(new Interval(currTime, currTime.plusDays(1)), k -> new ArrayList<>()).add(indexZip);
}
}
Assert.assertEquals(intervalToSegments.size(), intervalToIndexFiles.size());
segmentNum = 0;
for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
final Interval interval = entry.getKey();
final List<DataSegment> segments = entry.getValue();
final List<File> indexFiles = intervalToIndexFiles.get(interval);
Collections.sort(segments);
indexFiles.sort(Comparator.comparing(File::getAbsolutePath));
Assert.assertNotNull(indexFiles);
Assert.assertEquals(segments.size(), indexFiles.size());
Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
for (int i = 0; i < segments.size(); i++) {
final DataSegment dataSegment = segments.get(i);
final File indexZip = indexFiles.get(i);
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
if ("website".equals(datasourceName)) {
Assert.assertEquals("website", dataSegment.getDataSource());
Assert.assertEquals("host", dataSegment.getDimensions().get(0));
Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
} else if ("inherit_dims".equals(datasourceName)) {
Assert.assertEquals("inherit_dims", dataSegment.getDataSource());
Assert.assertEquals(ImmutableList.of("X", "Y", "M", "Q", "B", "F"), dataSegment.getDimensions());
Assert.assertEquals("count", dataSegment.getMetrics().get(0));
} else if ("inherit_dims2".equals(datasourceName)) {
Assert.assertEquals("inherit_dims2", dataSegment.getDataSource());
Assert.assertEquals(ImmutableList.of("B", "F", "M", "Q", "X", "Y"), dataSegment.getDimensions());
Assert.assertEquals("count", dataSegment.getMetrics().get(0));
} else {
Assert.fail("Test did not specify supported datasource name");
}
if (forceExtendableShardSpecs) {
NumberedShardSpec spec = (NumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(i, spec.getPartitionNum());
Assert.assertEquals(shardInfo.length, spec.getNumCorePartitions());
} else if ("hashed".equals(partitionType)) {
Integer[] hashShardInfo = (Integer[]) shardInfo[i];
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
Assert.assertEquals((int) hashShardInfo[1], spec.getNumCorePartitions());
} else if ("single".equals(partitionType)) {
String[] singleDimensionShardInfo = (String[]) shardInfo[i];
SingleDimensionShardSpec spec = (SingleDimensionShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(singleDimensionShardInfo[0], spec.getStart());
Assert.assertEquals(singleDimensionShardInfo[1], spec.getEnd());
} else {
throw new RE("Invalid partition type:[%s]", partitionType);
}
}
}
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class HadoopDruidDetermineConfigurationJobTest method testRunWithSingleDimensionPartitionsSpecCreateHashBasedNumberedShardSpecWithoutHashPartitionFunction.
@Test
public void testRunWithSingleDimensionPartitionsSpecCreateHashBasedNumberedShardSpecWithoutHashPartitionFunction() {
final Set<Interval> intervals = ImmutableSet.of(Intervals.of("2020-01-01/P1D"), Intervals.of("2020-01-02/P1D"), Intervals.of("2020-01-03/P1D"));
final SingleDimensionPartitionsSpec partitionsSpec = new SingleDimensionPartitionsSpec(1000, null, "dim", false);
final HadoopDruidIndexerConfig config = Mockito.mock(HadoopDruidIndexerConfig.class);
Mockito.when(config.isDeterminingPartitions()).thenReturn(false);
Mockito.when(config.getPartitionsSpec()).thenReturn(partitionsSpec);
Mockito.when(config.getSegmentGranularIntervals()).thenReturn(intervals);
final ArgumentCaptor<Map<Long, List<HadoopyShardSpec>>> resultCaptor = ArgumentCaptor.forClass(Map.class);
Mockito.doNothing().when(config).setShardSpecs(resultCaptor.capture());
final HadoopDruidDetermineConfigurationJob job = new HadoopDruidDetermineConfigurationJob(config);
Assert.assertTrue(job.run());
final Map<Long, List<HadoopyShardSpec>> shardSpecs = resultCaptor.getValue();
Assert.assertEquals(3, shardSpecs.size());
for (Interval interval : intervals) {
final List<HadoopyShardSpec> shardSpecsPerInterval = shardSpecs.get(interval.getStartMillis());
Assert.assertEquals(1, shardSpecsPerInterval.size());
Assert.assertEquals(new HashBasedNumberedShardSpec(0, shardSpecsPerInterval.size(), 0, shardSpecsPerInterval.size(), ImmutableList.of("dim"), null, new ObjectMapper()), shardSpecsPerInterval.get(0).getActualSpec());
}
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class HadoopDruidDetermineConfigurationJobTest method testRunWithHashedPartitionsSpecCreateHashBasedNumberedShardSpecWithHashPartitionFunction.
@Test
public void testRunWithHashedPartitionsSpecCreateHashBasedNumberedShardSpecWithHashPartitionFunction() {
final Set<Interval> intervals = ImmutableSet.of(Intervals.of("2020-01-01/P1D"), Intervals.of("2020-01-02/P1D"), Intervals.of("2020-01-03/P1D"));
final HashedPartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 2, null, HashPartitionFunction.MURMUR3_32_ABS, null, null);
final HadoopDruidIndexerConfig config = Mockito.mock(HadoopDruidIndexerConfig.class);
Mockito.when(config.isDeterminingPartitions()).thenReturn(false);
Mockito.when(config.getPartitionsSpec()).thenReturn(partitionsSpec);
Mockito.when(config.getSegmentGranularIntervals()).thenReturn(intervals);
final ArgumentCaptor<Map<Long, List<HadoopyShardSpec>>> resultCaptor = ArgumentCaptor.forClass(Map.class);
Mockito.doNothing().when(config).setShardSpecs(resultCaptor.capture());
final HadoopDruidDetermineConfigurationJob job = new HadoopDruidDetermineConfigurationJob(config);
Assert.assertTrue(job.run());
final Map<Long, List<HadoopyShardSpec>> shardSpecs = resultCaptor.getValue();
Assert.assertEquals(3, shardSpecs.size());
for (Interval interval : intervals) {
final List<HadoopyShardSpec> shardSpecsPerInterval = shardSpecs.get(interval.getStartMillis());
Assert.assertEquals(2, shardSpecsPerInterval.size());
for (int i = 0; i < shardSpecsPerInterval.size(); i++) {
Assert.assertEquals(new HashBasedNumberedShardSpec(i, shardSpecsPerInterval.size(), i, shardSpecsPerInterval.size(), null, HashPartitionFunction.MURMUR3_32_ABS, new ObjectMapper()), shardSpecsPerInterval.get(i).getActualSpec());
}
}
}
Aggregations