Search in sources :

Example 26 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class HadoopDruidDetermineConfigurationJob method run.

@Override
public boolean run() {
    JobHelper.ensurePaths(config);
    if (config.isDeterminingPartitions()) {
        job = createPartitionJob(config);
        config.setHadoopJobIdFileName(hadoopJobIdFile);
        boolean jobSucceeded = JobHelper.runSingleJob(job);
        JobHelper.maybeDeleteIntermediatePath(jobSucceeded, config.getSchema());
        return jobSucceeded;
    } else {
        final PartitionsSpec partitionsSpec = config.getPartitionsSpec();
        final int shardsPerInterval;
        final HashPartitionFunction partitionFunction;
        if (partitionsSpec instanceof HashedPartitionsSpec) {
            final HashedPartitionsSpec hashedPartitionsSpec = (HashedPartitionsSpec) partitionsSpec;
            shardsPerInterval = PartitionsSpec.isEffectivelyNull(hashedPartitionsSpec.getNumShards()) ? 1 : hashedPartitionsSpec.getNumShards();
            partitionFunction = hashedPartitionsSpec.getPartitionFunction();
        } else {
            shardsPerInterval = 1;
            partitionFunction = null;
        }
        Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            DateTime bucket = segmentGranularity.getStart();
            // negative shardsPerInterval means a single shard
            List<HadoopyShardSpec> specs = Lists.newArrayListWithCapacity(shardsPerInterval);
            for (int i = 0; i < shardsPerInterval; i++) {
                specs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, shardsPerInterval, i, shardsPerInterval, config.getPartitionsSpec().getPartitionDimensions(), partitionFunction, HadoopDruidIndexerConfig.JSON_MAPPER), shardCount++));
            }
            shardSpecs.put(bucket.getMillis(), specs);
            log.info("DateTime[%s], spec[%s]", bucket, specs);
        }
        config.setShardSpecs(shardSpecs);
        return true;
    }
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) TreeMap(java.util.TreeMap) DateTime(org.joda.time.DateTime) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) List(java.util.List) Interval(org.joda.time.Interval)

Example 27 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class HadoopIngestionSpecTest method testPartitionsSpecAutoHashed.

@Test
public void testPartitionsSpecAutoHashed() {
    final HadoopIngestionSpec schema;
    try {
        schema = jsonReadWriteRead("{\n" + "    \"tuningConfig\": {\n" + "        \"type\": \"hadoop\",\n" + "        \"partitionsSpec\": {\n" + "            \"targetPartitionSize\": 100\n" + "        }\n" + "    }\n" + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    final PartitionsSpec partitionsSpec = schema.getTuningConfig().getPartitionsSpec();
    Assert.assertTrue("isDeterminingPartitions", partitionsSpec.needsDeterminePartitions(true));
    Assert.assertEquals("getTargetPartitionSize", 100, partitionsSpec.getMaxRowsPerSegment().intValue());
    Assert.assertTrue("partitionSpec", partitionsSpec instanceof HashedPartitionsSpec);
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Test(org.junit.Test)

Example 28 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class NewestSegmentFirstIterator method needsCompaction.

private boolean needsCompaction(DataSourceCompactionConfig config, SegmentsToCompact candidates) {
    Preconditions.checkState(!candidates.isEmpty(), "Empty candidates");
    final ClientCompactionTaskQueryTuningConfig tuningConfig = ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment());
    final PartitionsSpec partitionsSpecFromConfig = findPartitionsSpecFromConfig(tuningConfig);
    final CompactionState lastCompactionState = candidates.segments.get(0).getLastCompactionState();
    if (lastCompactionState == null) {
        log.info("Candidate segment[%s] is not compacted yet. Needs compaction.", candidates.segments.get(0).getId());
        return true;
    }
    final boolean allCandidatesHaveSameLastCompactionState = candidates.segments.stream().allMatch(segment -> lastCompactionState.equals(segment.getLastCompactionState()));
    if (!allCandidatesHaveSameLastCompactionState) {
        log.info("[%s] Candidate segments were compacted with different partitions spec. Needs compaction.", candidates.segments.size());
        log.debugSegments(candidates.segments, "Candidate segments compacted with different partiton spec");
        return true;
    }
    final PartitionsSpec segmentPartitionsSpec = lastCompactionState.getPartitionsSpec();
    final IndexSpec segmentIndexSpec = objectMapper.convertValue(lastCompactionState.getIndexSpec(), IndexSpec.class);
    final IndexSpec configuredIndexSpec;
    if (tuningConfig.getIndexSpec() == null) {
        configuredIndexSpec = new IndexSpec();
    } else {
        configuredIndexSpec = tuningConfig.getIndexSpec();
    }
    if (!Objects.equals(partitionsSpecFromConfig, segmentPartitionsSpec)) {
        log.info("Configured partitionsSpec[%s] is differenet from " + "the partitionsSpec[%s] of segments. Needs compaction.", partitionsSpecFromConfig, segmentPartitionsSpec);
        return true;
    }
    // segmentIndexSpec cannot be null.
    if (!segmentIndexSpec.equals(configuredIndexSpec)) {
        log.info("Configured indexSpec[%s] is different from the one[%s] of segments. Needs compaction", configuredIndexSpec, segmentIndexSpec);
        return true;
    }
    if (config.getGranularitySpec() != null) {
        final ClientCompactionTaskGranularitySpec existingGranularitySpec = lastCompactionState.getGranularitySpec() != null ? objectMapper.convertValue(lastCompactionState.getGranularitySpec(), ClientCompactionTaskGranularitySpec.class) : null;
        // Checks for segmentGranularity
        if (config.getGranularitySpec().getSegmentGranularity() != null) {
            final Granularity existingSegmentGranularity = existingGranularitySpec != null ? existingGranularitySpec.getSegmentGranularity() : null;
            if (existingSegmentGranularity == null) {
                // Candidate segments were all compacted without segment granularity set.
                // We need to check if all segments have the same segment granularity as the configured segment granularity.
                boolean needsCompaction = candidates.segments.stream().anyMatch(segment -> !config.getGranularitySpec().getSegmentGranularity().isAligned(segment.getInterval()));
                if (needsCompaction) {
                    log.info("Segments were previously compacted but without segmentGranularity in auto compaction." + " Configured segmentGranularity[%s] is different from granularity implied by segment intervals. Needs compaction", config.getGranularitySpec().getSegmentGranularity());
                    return true;
                }
            } else if (!config.getGranularitySpec().getSegmentGranularity().equals(existingSegmentGranularity)) {
                log.info("Configured segmentGranularity[%s] is different from the segmentGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getSegmentGranularity(), existingSegmentGranularity);
                return true;
            }
        }
        // Checks for rollup
        if (config.getGranularitySpec().isRollup() != null) {
            final Boolean existingRollup = existingGranularitySpec != null ? existingGranularitySpec.isRollup() : null;
            if (existingRollup == null || !config.getGranularitySpec().isRollup().equals(existingRollup)) {
                log.info("Configured rollup[%s] is different from the rollup[%s] of segments. Needs compaction", config.getGranularitySpec().isRollup(), existingRollup);
                return true;
            }
        }
        // Checks for queryGranularity
        if (config.getGranularitySpec().getQueryGranularity() != null) {
            final Granularity existingQueryGranularity = existingGranularitySpec != null ? existingGranularitySpec.getQueryGranularity() : null;
            if (!config.getGranularitySpec().getQueryGranularity().equals(existingQueryGranularity)) {
                log.info("Configured queryGranularity[%s] is different from the queryGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getQueryGranularity(), existingQueryGranularity);
                return true;
            }
        }
    }
    if (config.getDimensionsSpec() != null) {
        final DimensionsSpec existingDimensionsSpec = lastCompactionState.getDimensionsSpec();
        // Checks for list of dimensions
        if (config.getDimensionsSpec().getDimensions() != null) {
            final List<DimensionSchema> existingDimensions = existingDimensionsSpec != null ? existingDimensionsSpec.getDimensions() : null;
            if (!config.getDimensionsSpec().getDimensions().equals(existingDimensions)) {
                log.info("Configured dimensionsSpec is different from the dimensionsSpec of segments. Needs compaction");
                return true;
            }
        }
    }
    if (config.getTransformSpec() != null) {
        final ClientCompactionTaskTransformSpec existingTransformSpec = lastCompactionState.getTransformSpec() != null ? objectMapper.convertValue(lastCompactionState.getTransformSpec(), ClientCompactionTaskTransformSpec.class) : null;
        // Checks for filters
        if (config.getTransformSpec().getFilter() != null) {
            final DimFilter existingFilters = existingTransformSpec != null ? existingTransformSpec.getFilter() : null;
            if (!config.getTransformSpec().getFilter().equals(existingFilters)) {
                log.info("Configured filter[%s] is different from the filter[%s] of segments. Needs compaction", config.getTransformSpec().getFilter(), existingFilters);
                return true;
            }
        }
    }
    if (ArrayUtils.isNotEmpty(config.getMetricsSpec())) {
        final AggregatorFactory[] existingMetricsSpec = lastCompactionState.getMetricsSpec() == null || lastCompactionState.getMetricsSpec().isEmpty() ? null : objectMapper.convertValue(lastCompactionState.getMetricsSpec(), AggregatorFactory[].class);
        if (existingMetricsSpec == null || !Arrays.deepEquals(config.getMetricsSpec(), existingMetricsSpec)) {
            log.info("Configured metricsSpec[%s] is different from the metricsSpec[%s] of segments. Needs compaction", Arrays.toString(config.getMetricsSpec()), Arrays.toString(existingMetricsSpec));
            return true;
        }
    }
    return false;
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) DimFilter(org.apache.druid.query.filter.DimFilter)

Example 29 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentMetricsSpec.

@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentMetricsSpec() {
    NullHandling.initializeForTests();
    mapper.setInjectableValues(new InjectableValues.Std().addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE));
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have
    // metricsSpec={CountAggregatorFactory("cnt")} for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
    // metricsSpec={CountAggregatorFactory("cnt"), LongSumAggregatorFactory("val", "val")} for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
    // metricsSpec=[] for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (filter was not set during last compaction)
    // and metricsSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (transformSpec was not set during last compaction)
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, mapper.convertValue(new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, new TypeReference<List<Object>>() {
    }), null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, mapper.convertValue(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") }, new TypeReference<List<Object>>() {
    }), null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, mapper.convertValue(new AggregatorFactory[] {}, new TypeReference<List<Object>>() {
    }), null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, null)));
    // Auto compaction config sets metricsSpec={CountAggregatorFactory("cnt"), LongSumAggregatorFactory("val", "val")}
    CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, null, null, new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") })), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00, interval 2017-10-04T00:00:00/2017-10-05T00:00:00, and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
    // Auto compaction config sets metricsSpec=null
    iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, null, null, null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 30 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsNothingAsSegmentsWasCompactedAndHaveSameSegmentGranularityInLastCompactionState.

@Test
public void testIteratorReturnsNothingAsSegmentsWasCompactedAndHaveSameSegmentGranularityInLastCompactionState() {
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("segmentGranularity", "day"))), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("segmentGranularity", "day"))));
    // Auto compaction config sets segmentGranularity=DAY
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.DAY, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Aggregations

PartitionsSpec (org.apache.druid.indexer.partitions.PartitionsSpec)34 Test (org.junit.Test)19 Map (java.util.Map)17 ArrayList (java.util.ArrayList)16 DataSegment (org.apache.druid.timeline.DataSegment)16 Period (org.joda.time.Period)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)15 IndexSpec (org.apache.druid.segment.IndexSpec)15 CompactionState (org.apache.druid.timeline.CompactionState)14 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)11 UserCompactionTaskGranularityConfig (org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig)11 SingleDimensionPartitionsSpec (org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec)10 StringUtils (org.apache.druid.java.util.common.StringUtils)9 Function (java.util.function.Function)8 IOException (java.io.IOException)7 List (java.util.List)7 Pair (org.apache.druid.java.util.common.Pair)5 Interval (org.joda.time.Interval)5 Test (org.testng.annotations.Test)5