Search in sources :

Example 1 with DimensionRangePartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunParallelWithMultiDimensionRangePartitioningWithSingleTask.

@Test
public void testRunParallelWithMultiDimensionRangePartitioningWithSingleTask() throws Exception {
    // Range partitioning is not supported with segment lock yet
    Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), 1, true)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    for (DataSegment segment : compactedSegments) {
        // Expect compaction state to exist as store compaction state by default
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        Assert.assertSame(DimensionRangeShardSpec.class, segment.getShardSpec().getClass());
        CompactionState expectedState = new CompactionState(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 2 with DimensionRangePartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.

the class RangePartitionAdjustingCorePartitionSizeTest method testLessPartitionsThanBuckets.

@Test
public void testLessPartitionsThanBuckets() throws IOException {
    final File inputDir = temporaryFolder.newFolder();
    for (int i = 0; i < 2; i++) {
        try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
            writer.write(StringUtils.format("2020-01-01T00:00:00,aaa,b1,10\n"));
        }
    }
    for (int i = 0; i < 3; i++) {
        try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + (i + 2)).toPath(), StandardCharsets.UTF_8)) {
            writer.write(StringUtils.format("2020-01-01T00:00:00,zzz,b1,10\n"));
        }
    }
    final List<String> partitionDimensions = Collections.singletonList("dim1");
    final DimensionBasedPartitionsSpec partitionsSpec = new DimensionRangePartitionsSpec(2, null, partitionDimensions, false);
    final List<DataSegment> segments = new ArrayList<>(runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS));
    Assert.assertEquals(1, segments.size());
    final DataSegment segment = segments.get(0);
    Assert.assertSame(SingleDimensionShardSpec.class, segment.getShardSpec().getClass());
    final SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segment.getShardSpec();
    Assert.assertEquals(1, shardSpec.getNumCorePartitions());
    Assert.assertEquals(0, shardSpec.getPartitionNum());
    Assert.assertEquals(partitionDimensions, shardSpec.getDimensions());
}
Also used : ArrayList(java.util.ArrayList) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) DimensionBasedPartitionsSpec(org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec) File(java.io.File) DataSegment(org.apache.druid.timeline.DataSegment) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Writer(java.io.Writer) Test(org.junit.Test)

Example 3 with DimensionRangePartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.

the class RangePartitionCachingLocalSegmentAllocatorTest method setup.

@Before
public void setup() throws IOException {
    TaskToolbox toolbox = createToolbox(INTERVAL_TO_VERSION.keySet().stream().map(RangePartitionCachingLocalSegmentAllocatorTest::createTaskLock).collect(Collectors.toList()));
    final RangePartitionAnalysis partitionAnalysis = new RangePartitionAnalysis(new DimensionRangePartitionsSpec(null, 1, PARTITION_DIMENSIONS, false));
    INTERVAL_TO_PARTITIONS.forEach(partitionAnalysis::updateBucket);
    target = SegmentAllocators.forNonLinearPartitioning(toolbox, DATASOURCE, TASKID, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), new SupervisorTaskAccessWithNullClient(SUPERVISOR_TASKID), partitionAnalysis);
    sequenceNameFunction = ((CachingLocalSegmentAllocator) target).getSequenceNameFunction();
}
Also used : TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RangePartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.RangePartitionAnalysis) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) Before(org.junit.Before)

Example 4 with DimensionRangePartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.

the class ParallelIndexSupervisorTask method determineRangePartition.

private PartitionBoundaries determineRangePartition(Collection<StringDistribution> distributions) {
    StringDistributionMerger distributionMerger = new StringSketchMerger();
    distributions.forEach(distributionMerger::merge);
    StringDistribution mergedDistribution = distributionMerger.getResult();
    DimensionRangePartitionsSpec partitionsSpec = (DimensionRangePartitionsSpec) ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec();
    final PartitionBoundaries partitions;
    Integer targetRowsPerSegment = partitionsSpec.getTargetRowsPerSegment();
    if (targetRowsPerSegment == null) {
        partitions = mergedDistribution.getEvenPartitionsByMaxSize(partitionsSpec.getMaxRowsPerSegment());
    } else {
        partitions = mergedDistribution.getEvenPartitionsByTargetSize(targetRowsPerSegment);
    }
    return partitions;
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StringDistribution(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution) StringSketchMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketchMerger) StringDistributionMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries)

Example 5 with DimensionRangePartitionsSpec

use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.

the class PartialDimensionDistributionTask method runTask.

@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    DataSchema dataSchema = ingestionSchema.getDataSchema();
    GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
    ParallelIndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
    DimensionRangePartitionsSpec partitionsSpec = (DimensionRangePartitionsSpec) tuningConfig.getPartitionsSpec();
    Preconditions.checkNotNull(partitionsSpec, "partitionsSpec required in tuningConfig");
    final List<String> partitionDimensions = partitionsSpec.getPartitionDimensions();
    Preconditions.checkArgument(partitionDimensions != null && !partitionDimensions.isEmpty(), "partitionDimension required in partitionsSpec");
    boolean isAssumeGrouped = partitionsSpec.isAssumeGrouped();
    InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
    InputFormat inputFormat = inputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema) : null;
    final RowIngestionMeters buildSegmentsMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    final ParseExceptionHandler parseExceptionHandler = new ParseExceptionHandler(buildSegmentsMeters, tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.getMaxSavedParseExceptions());
    final boolean determineIntervals = granularitySpec.inputIntervals().isEmpty();
    try (final CloseableIterator<InputRow> inputRowIterator = AbstractBatchIndexTask.inputSourceReader(toolbox.getIndexingTmpDir(), dataSchema, inputSource, inputFormat, determineIntervals ? Objects::nonNull : AbstractBatchIndexTask.defaultRowFilter(granularitySpec), buildSegmentsMeters, parseExceptionHandler);
        HandlingInputRowIterator iterator = new RangePartitionIndexTaskInputRowIteratorBuilder(partitionDimensions, SKIP_NULL).delegate(inputRowIterator).granularitySpec(granularitySpec).build()) {
        Map<Interval, StringDistribution> distribution = determineDistribution(iterator, granularitySpec, partitionDimensions, isAssumeGrouped);
        sendReport(toolbox, new DimensionDistributionReport(getId(), distribution));
    }
    return TaskStatus.success(getId());
}
Also used : InputSource(org.apache.druid.data.input.InputSource) StringDistribution(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) HandlingInputRowIterator(org.apache.druid.data.input.HandlingInputRowIterator) DataSchema(org.apache.druid.segment.indexing.DataSchema) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) InputFormat(org.apache.druid.data.input.InputFormat) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) InputRow(org.apache.druid.data.input.InputRow) RangePartitionIndexTaskInputRowIteratorBuilder(org.apache.druid.indexing.common.task.batch.parallel.iterator.RangePartitionIndexTaskInputRowIteratorBuilder) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Interval(org.joda.time.Interval)

Aggregations

DimensionRangePartitionsSpec (org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec)9 DataSegment (org.apache.druid.timeline.DataSegment)5 Test (org.junit.Test)5 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 File (java.io.File)2 Writer (java.io.Writer)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)2 DimensionBasedPartitionsSpec (org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec)2 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)2 AbstractParallelIndexSupervisorTaskTest (org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest)2 StringDistribution (org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution)2 CompactionState (org.apache.druid.timeline.CompactionState)2 SingleDimensionShardSpec (org.apache.druid.timeline.partition.SingleDimensionShardSpec)2 ArrayList (java.util.ArrayList)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 HandlingInputRowIterator (org.apache.druid.data.input.HandlingInputRowIterator)1 InputFormat (org.apache.druid.data.input.InputFormat)1