Search in sources :

Example 1 with PartitionBoundaries

use of org.apache.druid.timeline.partition.PartitionBoundaries in project druid by druid-io.

the class StringSketchMergerTest method mergesCorrectly.

@Test
public void mergesCorrectly() {
    StringTuple string1 = StringTuple.create("a");
    StringSketch sketch1 = new StringSketch();
    sketch1.put(string1);
    StringTuple string2 = StringTuple.create("mn");
    StringSketch sketch2 = new StringSketch();
    sketch2.put(string2);
    StringTuple string3 = StringTuple.create("z");
    StringSketch sketch3 = new StringSketch();
    sketch3.put(string3);
    target.merge(sketch2);
    target.merge(sketch1);
    target.merge(sketch3);
    StringDistribution merged = target.getResult();
    PartitionBoundaries partitions = merged.getEvenPartitionsByMaxSize(1);
    Assert.assertEquals(3, partitions.size());
    Assert.assertNull(partitions.get(0));
    Assert.assertEquals(string2, partitions.get(1));
    Assert.assertNull(partitions.get(2));
}
Also used : StringTuple(org.apache.druid.data.input.StringTuple) PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries) Test(org.junit.Test)

Example 2 with PartitionBoundaries

use of org.apache.druid.timeline.partition.PartitionBoundaries in project druid by druid-io.

the class ParallelIndexSupervisorTask method determineRangePartition.

private PartitionBoundaries determineRangePartition(Collection<StringDistribution> distributions) {
    StringDistributionMerger distributionMerger = new StringSketchMerger();
    distributions.forEach(distributionMerger::merge);
    StringDistribution mergedDistribution = distributionMerger.getResult();
    DimensionRangePartitionsSpec partitionsSpec = (DimensionRangePartitionsSpec) ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec();
    final PartitionBoundaries partitions;
    Integer targetRowsPerSegment = partitionsSpec.getTargetRowsPerSegment();
    if (targetRowsPerSegment == null) {
        partitions = mergedDistribution.getEvenPartitionsByMaxSize(partitionsSpec.getMaxRowsPerSegment());
    } else {
        partitions = mergedDistribution.getEvenPartitionsByTargetSize(targetRowsPerSegment);
    }
    return partitions;
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StringDistribution(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution) StringSketchMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketchMerger) StringDistributionMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries)

Example 3 with PartitionBoundaries

use of org.apache.druid.timeline.partition.PartitionBoundaries in project druid by druid-io.

the class ParallelIndexSupervisorTask method runRangePartitionMultiPhaseParallel.

@VisibleForTesting
TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception {
    ParallelIndexIngestionSpec ingestionSchemaToUse = ingestionSchema;
    ParallelIndexTaskRunner<PartialDimensionDistributionTask, DimensionDistributionReport> distributionRunner = createRunner(toolbox, this::createPartialDimensionDistributionRunner);
    TaskState distributionState = runNextPhase(distributionRunner);
    if (distributionState.isFailure()) {
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, distributionRunner.getName());
        return TaskStatus.failure(getId(), errMsg);
    }
    Map<Interval, PartitionBoundaries> intervalToPartitions = determineAllRangePartitions(distributionRunner.getReports().values());
    if (intervalToPartitions.isEmpty()) {
        String msg = "No valid rows for single dimension partitioning." + " All rows may have invalid timestamps or multiple dimension values.";
        LOG.warn(msg);
        return TaskStatus.success(getId(), msg);
    }
    ingestionSchemaToUse = rewriteIngestionSpecWithIntervalsIfMissing(ingestionSchemaToUse, intervalToPartitions.keySet());
    final ParallelIndexIngestionSpec segmentCreateIngestionSpec = ingestionSchemaToUse;
    ParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport> indexingRunner = createRunner(toolbox, tb -> createPartialRangeSegmentGenerateRunner(tb, intervalToPartitions, segmentCreateIngestionSpec));
    TaskState indexingState = runNextPhase(indexingRunner);
    if (indexingState.isFailure()) {
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, indexingRunner.getName());
        return TaskStatus.failure(getId(), errMsg);
    }
    // partition (interval, partitionId) -> partition locations
    Map<Partition, List<PartitionLocation>> partitionToLocations = getPartitionToLocations(indexingRunner.getReports());
    final List<PartialSegmentMergeIOConfig> ioConfigs = createGenericMergeIOConfigs(ingestionSchema.getTuningConfig().getTotalNumMergeTasks(), partitionToLocations);
    final ParallelIndexIngestionSpec segmentMergeIngestionSpec = ingestionSchemaToUse;
    ParallelIndexTaskRunner<PartialGenericSegmentMergeTask, PushedSegmentsReport> mergeRunner = createRunner(toolbox, tb -> createPartialGenericSegmentMergeRunner(tb, ioConfigs, segmentMergeIngestionSpec));
    TaskState mergeState = runNextPhase(mergeRunner);
    TaskStatus taskStatus;
    if (mergeState.isSuccess()) {
        publishSegments(toolbox, mergeRunner.getReports());
        if (awaitSegmentAvailabilityTimeoutMillis > 0) {
            waitForSegmentAvailability(mergeRunner.getReports());
        }
        taskStatus = TaskStatus.success(getId());
    } else {
        // there is only success or failure after running....
        Preconditions.checkState(mergeState.isFailure(), "Unrecognized state after task is complete[%s]", mergeState);
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, mergeRunner.getName());
        taskStatus = TaskStatus.failure(getId(), errMsg);
    }
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted));
    return taskStatus;
}
Also used : TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) List(java.util.List) TaskState(org.apache.druid.indexer.TaskState) PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with PartitionBoundaries

use of org.apache.druid.timeline.partition.PartitionBoundaries in project druid by druid-io.

the class RangePartitionCachingLocalSegmentAllocatorTest method getPartitionEnd.

@Nullable
private static StringTuple getPartitionEnd(Interval interval, int bucketId) {
    PartitionBoundaries partitions = INTERVAL_TO_PARTITIONS.get(interval);
    boolean isLastPartition = (bucketId + 1) == partitions.size();
    return isLastPartition ? null : partitions.get(bucketId + 1);
}
Also used : PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries) Nullable(javax.annotation.Nullable)

Aggregations

PartitionBoundaries (org.apache.druid.timeline.partition.PartitionBoundaries)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 Nullable (javax.annotation.Nullable)1 StringTuple (org.apache.druid.data.input.StringTuple)1 TaskState (org.apache.druid.indexer.TaskState)1 TaskStatus (org.apache.druid.indexer.TaskStatus)1 DimensionRangePartitionsSpec (org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec)1 StringDistribution (org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution)1 StringDistributionMerger (org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger)1 StringSketchMerger (org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketchMerger)1 Interval (org.joda.time.Interval)1 Test (org.junit.Test)1