use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class MaterializedViewSupervisorTest method testCheckSegmentsAndSubmitTasks.
@Test
public void testCheckSegmentsAndSubmitTasks() throws IOException {
Set<DataSegment> baseSegments = Sets.newHashSet(new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
indexerMetadataStorageCoordinator.announceHistoricalSegments(baseSegments);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes();
EasyMock.expect(taskStorage.getStatus("test_task1")).andReturn(Optional.of(TaskStatus.failure("test_task1", "Dummy task status failure err message"))).anyTimes();
EasyMock.expect(taskStorage.getStatus("test_task2")).andReturn(Optional.of(TaskStatus.running("test_task2"))).anyTimes();
EasyMock.replay(taskStorage);
Pair<Map<Interval, HadoopIndexTask>, Map<Interval, String>> runningTasksPair = supervisor.getRunningTasks();
Map<Interval, HadoopIndexTask> runningTasks = runningTasksPair.lhs;
Map<Interval, String> runningVersion = runningTasksPair.rhs;
DataSchema dataSchema = new DataSchema("test_datasource", null, null, null, TransformSpec.NONE, objectMapper);
HadoopIOConfig hadoopIOConfig = new HadoopIOConfig(new HashMap<>(), null, null);
HadoopIngestionSpec spec = new HadoopIngestionSpec(dataSchema, hadoopIOConfig, null);
HadoopIndexTask task1 = new HadoopIndexTask("test_task1", spec, null, null, null, objectMapper, null, null, null);
runningTasks.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), task1);
runningVersion.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "test_version1");
HadoopIndexTask task2 = new HadoopIndexTask("test_task2", spec, null, null, null, objectMapper, null, null, null);
runningTasks.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), task2);
runningVersion.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "test_version2");
supervisor.checkSegmentsAndSubmitTasks();
Map<Interval, HadoopIndexTask> expectedRunningTasks = new HashMap<>();
Map<Interval, String> expectedRunningVersion = new HashMap<>();
expectedRunningTasks.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), task2);
expectedRunningVersion.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "test_version2");
Assert.assertEquals(expectedRunningTasks, runningTasks);
Assert.assertEquals(expectedRunningVersion, runningVersion);
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class IndexTaskTest method testNumShardsAndPartitionDimensionsProvided.
@Test
public void testNumShardsAndPartitionDimensionsProvided() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,1\n");
writer.write("2014-01-01T01:00:20Z,b,1\n");
writer.write("2014-01-01T02:00:30Z,c,1\n");
}
final IndexTask indexTask = new IndexTask(null, null, createDefaultIngestionSpec(jsonMapper, tmpDir, null, null, createTuningConfigWithPartitionsSpec(new HashedPartitionsSpec(null, 2, ImmutableList.of("dim")), true), false, false), null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(2, segments.size());
for (DataSegment segment : segments) {
Assert.assertEquals(DATASOURCE, segment.getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segment.getInterval());
Assert.assertEquals(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec hashBasedNumberedShardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, hashBasedNumberedShardSpec.getPartitionFunction());
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Integer> hashes = cursorSequence.map(cursor -> {
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
final int hash = HashPartitionFunction.MURMUR3_32_ABS.hash(HashBasedNumberedShardSpec.serializeGroupKey(jsonMapper, Collections.singletonList(selector.getObject())), hashBasedNumberedShardSpec.getNumBuckets());
cursor.advance();
return hash;
}).toList();
Assert.assertTrue(hashes.stream().allMatch(h -> h.intValue() == hashes.get(0)));
}
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class HashPartitionAdjustingCorePartitionSizeTest method testLessPartitionsThanBuckets.
@Test
public void testLessPartitionsThanBuckets() throws IOException {
final File inputDir = temporaryFolder.newFolder();
for (int i = 0; i < 3; i++) {
try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
writer.write(StringUtils.format("2020-01-01T00:00:00,%s,b1,%d\n", "a" + (i + 1), 10 * (i + 1)));
}
}
final DimensionBasedPartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 10, ImmutableList.of("dim1"));
final List<DataSegment> segments = new ArrayList<>(runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS));
Assert.assertEquals(3, segments.size());
segments.sort(Comparator.comparing(segment -> segment.getShardSpec().getPartitionNum()));
int prevPartitionId = -1;
for (DataSegment segment : segments) {
Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(3, shardSpec.getNumCorePartitions());
Assert.assertEquals(10, shardSpec.getNumBuckets());
Assert.assertEquals(ImmutableList.of("dim1"), shardSpec.getPartitionDimensions());
Assert.assertEquals(prevPartitionId + 1, shardSpec.getPartitionNum());
prevPartitionId = shardSpec.getPartitionNum();
}
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class HashPartitionMultiPhaseParallelIndexingTest method assertHashedPartition.
private void assertHashedPartition(Set<DataSegment> publishedSegments, Map<Interval, Integer> expectedIntervalToNumSegments) throws IOException {
final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
Assert.assertEquals(new HashSet<>(inputIntervals), intervalToSegments.keySet());
final File tempSegmentDir = temporaryFolder.newFolder();
for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
Interval interval = entry.getKey();
List<DataSegment> segmentsInInterval = entry.getValue();
Assert.assertEquals(expectedIntervalToNumSegments.get(interval).intValue(), segmentsInInterval.size());
for (DataSegment segment : segmentsInInterval) {
Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, shardSpec.getPartitionFunction());
List<ScanResultValue> results = querySegment(segment, ImmutableList.of("dim1", "dim2"), tempSegmentDir);
final int hash = shardSpec.getPartitionFunction().hash(HashBasedNumberedShardSpec.serializeGroupKey(getObjectMapper(), (List<Object>) results.get(0).getEvents()), shardSpec.getNumBuckets());
for (ScanResultValue value : results) {
Assert.assertEquals(hash, shardSpec.getPartitionFunction().hash(HashBasedNumberedShardSpec.serializeGroupKey(getObjectMapper(), (List<Object>) value.getEvents()), shardSpec.getNumBuckets()));
}
}
}
}
use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.
the class SegmentAllocateActionTest method testWithPartialShardSpecAndOvershadowingSegments.
@Test
public void testWithPartialShardSpecAndOvershadowingSegments() throws IOException {
final Task task = NoopTask.create();
taskActionTestKit.getTaskLockbox().add(task);
final ObjectMapper objectMapper = new DefaultObjectMapper();
taskActionTestKit.getMetadataStorageCoordinator().announceHistoricalSegments(ImmutableSet.of(DataSegment.builder().dataSource(DATA_SOURCE).interval(Granularities.HOUR.bucket(PARTY_TIME)).version(PARTY_TIME.toString()).shardSpec(new HashBasedNumberedShardSpec(0, 2, 0, 2, ImmutableList.of("dim1"), null, objectMapper)).size(0).build(), DataSegment.builder().dataSource(DATA_SOURCE).interval(Granularities.HOUR.bucket(PARTY_TIME)).version(PARTY_TIME.toString()).shardSpec(new HashBasedNumberedShardSpec(1, 2, 1, 2, ImmutableList.of("dim1"), null, objectMapper)).size(0).build()));
final SegmentAllocateAction action = new SegmentAllocateAction(DATA_SOURCE, PARTY_TIME, Granularities.MINUTE, Granularities.HOUR, "seq", null, true, new HashBasedNumberedPartialShardSpec(ImmutableList.of("dim1"), 1, 2, null), lockGranularity, null);
final SegmentIdWithShardSpec segmentIdentifier = action.perform(task, taskActionTestKit.getTaskActionToolbox());
Assert.assertNotNull(segmentIdentifier);
final ShardSpec shardSpec = segmentIdentifier.getShardSpec();
Assert.assertEquals(2, shardSpec.getPartitionNum());
Assert.assertTrue(shardSpec instanceof HashBasedNumberedShardSpec);
final HashBasedNumberedShardSpec hashBasedNumberedShardSpec = (HashBasedNumberedShardSpec) shardSpec;
Assert.assertEquals(2, hashBasedNumberedShardSpec.getNumCorePartitions());
Assert.assertEquals(ImmutableList.of("dim1"), hashBasedNumberedShardSpec.getPartitionDimensions());
}
Aggregations