use of org.apache.druid.timeline.partition.PartitionBoundaries in project druid by druid-io.
the class StringSketchMergerTest method mergesCorrectly.
@Test
public void mergesCorrectly() {
StringTuple string1 = StringTuple.create("a");
StringSketch sketch1 = new StringSketch();
sketch1.put(string1);
StringTuple string2 = StringTuple.create("mn");
StringSketch sketch2 = new StringSketch();
sketch2.put(string2);
StringTuple string3 = StringTuple.create("z");
StringSketch sketch3 = new StringSketch();
sketch3.put(string3);
target.merge(sketch2);
target.merge(sketch1);
target.merge(sketch3);
StringDistribution merged = target.getResult();
PartitionBoundaries partitions = merged.getEvenPartitionsByMaxSize(1);
Assert.assertEquals(3, partitions.size());
Assert.assertNull(partitions.get(0));
Assert.assertEquals(string2, partitions.get(1));
Assert.assertNull(partitions.get(2));
}
use of org.apache.druid.timeline.partition.PartitionBoundaries in project druid by druid-io.
the class ParallelIndexSupervisorTask method determineRangePartition.
private PartitionBoundaries determineRangePartition(Collection<StringDistribution> distributions) {
StringDistributionMerger distributionMerger = new StringSketchMerger();
distributions.forEach(distributionMerger::merge);
StringDistribution mergedDistribution = distributionMerger.getResult();
DimensionRangePartitionsSpec partitionsSpec = (DimensionRangePartitionsSpec) ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec();
final PartitionBoundaries partitions;
Integer targetRowsPerSegment = partitionsSpec.getTargetRowsPerSegment();
if (targetRowsPerSegment == null) {
partitions = mergedDistribution.getEvenPartitionsByMaxSize(partitionsSpec.getMaxRowsPerSegment());
} else {
partitions = mergedDistribution.getEvenPartitionsByTargetSize(targetRowsPerSegment);
}
return partitions;
}
use of org.apache.druid.timeline.partition.PartitionBoundaries in project druid by druid-io.
the class ParallelIndexSupervisorTask method runRangePartitionMultiPhaseParallel.
@VisibleForTesting
TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception {
ParallelIndexIngestionSpec ingestionSchemaToUse = ingestionSchema;
ParallelIndexTaskRunner<PartialDimensionDistributionTask, DimensionDistributionReport> distributionRunner = createRunner(toolbox, this::createPartialDimensionDistributionRunner);
TaskState distributionState = runNextPhase(distributionRunner);
if (distributionState.isFailure()) {
String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, distributionRunner.getName());
return TaskStatus.failure(getId(), errMsg);
}
Map<Interval, PartitionBoundaries> intervalToPartitions = determineAllRangePartitions(distributionRunner.getReports().values());
if (intervalToPartitions.isEmpty()) {
String msg = "No valid rows for single dimension partitioning." + " All rows may have invalid timestamps or multiple dimension values.";
LOG.warn(msg);
return TaskStatus.success(getId(), msg);
}
ingestionSchemaToUse = rewriteIngestionSpecWithIntervalsIfMissing(ingestionSchemaToUse, intervalToPartitions.keySet());
final ParallelIndexIngestionSpec segmentCreateIngestionSpec = ingestionSchemaToUse;
ParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport> indexingRunner = createRunner(toolbox, tb -> createPartialRangeSegmentGenerateRunner(tb, intervalToPartitions, segmentCreateIngestionSpec));
TaskState indexingState = runNextPhase(indexingRunner);
if (indexingState.isFailure()) {
String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, indexingRunner.getName());
return TaskStatus.failure(getId(), errMsg);
}
// partition (interval, partitionId) -> partition locations
Map<Partition, List<PartitionLocation>> partitionToLocations = getPartitionToLocations(indexingRunner.getReports());
final List<PartialSegmentMergeIOConfig> ioConfigs = createGenericMergeIOConfigs(ingestionSchema.getTuningConfig().getTotalNumMergeTasks(), partitionToLocations);
final ParallelIndexIngestionSpec segmentMergeIngestionSpec = ingestionSchemaToUse;
ParallelIndexTaskRunner<PartialGenericSegmentMergeTask, PushedSegmentsReport> mergeRunner = createRunner(toolbox, tb -> createPartialGenericSegmentMergeRunner(tb, ioConfigs, segmentMergeIngestionSpec));
TaskState mergeState = runNextPhase(mergeRunner);
TaskStatus taskStatus;
if (mergeState.isSuccess()) {
publishSegments(toolbox, mergeRunner.getReports());
if (awaitSegmentAvailabilityTimeoutMillis > 0) {
waitForSegmentAvailability(mergeRunner.getReports());
}
taskStatus = TaskStatus.success(getId());
} else {
// there is only success or failure after running....
Preconditions.checkState(mergeState.isFailure(), "Unrecognized state after task is complete[%s]", mergeState);
String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, mergeRunner.getName());
taskStatus = TaskStatus.failure(getId(), errMsg);
}
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted));
return taskStatus;
}
use of org.apache.druid.timeline.partition.PartitionBoundaries in project druid by druid-io.
the class RangePartitionCachingLocalSegmentAllocatorTest method getPartitionEnd.
@Nullable
private static StringTuple getPartitionEnd(Interval interval, int bucketId) {
PartitionBoundaries partitions = INTERVAL_TO_PARTITIONS.get(interval);
boolean isLastPartition = (bucketId + 1) == partitions.size();
return isLastPartition ? null : partitions.get(bucketId + 1);
}
Aggregations