use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class BatchDeltaIngestionTest method makeHadoopDruidIndexerConfig.
private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(Map<String, Object> inputSpec, File tmpDir, AggregatorFactory[] aggregators) throws Exception {
HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host"))), null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, 0), null), Map.class), aggregators != null ? aggregators : new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_num"), new HyperUniquesAggregatorFactory("unique_hosts", "host2") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, MAPPER), new HadoopIOConfig(inputSpec, null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, null, null, false, false, false, false, null, false, false, null, null, false, false, null, null, null, null, null)));
config.setShardSpecs(ImmutableMap.of(INTERVAL_FULL.getStartMillis(), ImmutableList.of(new HadoopyShardSpec(new HashBasedNumberedShardSpec(0, 1, 0, 1, null, HashPartitionFunction.MURMUR3_32_ABS, HadoopDruidIndexerConfig.JSON_MAPPER), 0))));
config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
return config;
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class HashPartitionAdjustingCorePartitionSizeTest method testEqualNumberOfPartitionsToBuckets.
@Test
public void testEqualNumberOfPartitionsToBuckets() throws IOException {
final File inputDir = temporaryFolder.newFolder();
for (int i = 0; i < 10; i++) {
try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
writer.write(StringUtils.format("2020-01-01T00:00:00,%s,b1,%d\n", "aa" + (i + 10), 10 * (i + 1)));
}
}
final DimensionBasedPartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 5, ImmutableList.of("dim1"));
final Set<DataSegment> segments = runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS);
Assert.assertEquals(5, segments.size());
segments.forEach(segment -> {
Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(5, shardSpec.getNumCorePartitions());
Assert.assertEquals(5, shardSpec.getNumBuckets());
Assert.assertEquals(ImmutableList.of("dim1"), shardSpec.getPartitionDimensions());
});
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class HashPartitionMultiPhaseParallelIndexingTest method testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend.
@Test
public void testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend() {
final Set<DataSegment> publishedSegments = new HashSet<>();
publishedSegments.addAll(runTestTask(new HashedPartitionsSpec(null, numShards, ImmutableList.of("dim1", "dim2")), TaskState.SUCCESS, false));
// Append
publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(5, null), TaskState.SUCCESS, true));
// And append again
publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
final List<DataSegment> segments = entry.getValue();
final List<DataSegment> hashedSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == HashBasedNumberedShardSpec.class).collect(Collectors.toList());
final List<DataSegment> linearSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == NumberedShardSpec.class).collect(Collectors.toList());
for (DataSegment hashedSegment : hashedSegments) {
final HashBasedNumberedShardSpec hashShardSpec = (HashBasedNumberedShardSpec) hashedSegment.getShardSpec();
for (DataSegment linearSegment : linearSegments) {
Assert.assertEquals(hashedSegment.getInterval(), linearSegment.getInterval());
Assert.assertEquals(hashedSegment.getVersion(), linearSegment.getVersion());
final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) linearSegment.getShardSpec();
Assert.assertEquals(hashShardSpec.getNumCorePartitions(), numberedShardSpec.getNumCorePartitions());
Assert.assertTrue(hashShardSpec.getPartitionNum() < numberedShardSpec.getPartitionNum());
}
}
}
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class MaterializedViewSupervisorTest method testCreateTask.
/**
* Verifies that creating HadoopIndexTask compleates without raising exception.
*/
@Test
public void testCreateTask() {
List<DataSegment> baseSegments = Collections.singletonList(new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
HadoopIndexTask task = spec.createTask(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", baseSegments);
Assert.assertNotNull(task);
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class MaterializedViewSupervisorTest method testCheckSegments.
@Test
public void testCheckSegments() throws IOException {
Set<DataSegment> baseSegments = Sets.newHashSet(new DataSegment("base", Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "2015-01-02", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024), new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024), new DataSegment("base", Intervals.of("2015-01-03T00Z/2015-01-04T00Z"), "2015-01-04", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
Set<DataSegment> derivativeSegments = Sets.newHashSet(new DataSegment(derivativeDatasourceName, Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "2015-01-02", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024), new DataSegment(derivativeDatasourceName, Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "3015-01-01", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
indexerMetadataStorageCoordinator.announceHistoricalSegments(baseSegments);
indexerMetadataStorageCoordinator.announceHistoricalSegments(derivativeSegments);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes();
Pair<SortedMap<Interval, String>, Map<Interval, List<DataSegment>>> toBuildInterval = supervisor.checkSegments();
Set<Interval> expectedToBuildInterval = Sets.newHashSet(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"));
Map<Interval, List<DataSegment>> expectedSegments = new HashMap<>();
expectedSegments.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), Collections.singletonList(new DataSegment("base", Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "2015-01-02", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024)));
expectedSegments.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), Collections.singletonList(new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024)));
Assert.assertEquals(expectedToBuildInterval, toBuildInterval.lhs.keySet());
Assert.assertEquals(expectedSegments, toBuildInterval.rhs);
}
Aggregations