use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.
the class HadoopDruidDetermineConfigurationJob method run.
@Override
public boolean run() {
JobHelper.ensurePaths(config);
if (config.isDeterminingPartitions()) {
job = createPartitionJob(config);
config.setHadoopJobIdFileName(hadoopJobIdFile);
boolean jobSucceeded = JobHelper.runSingleJob(job);
JobHelper.maybeDeleteIntermediatePath(jobSucceeded, config.getSchema());
return jobSucceeded;
} else {
final PartitionsSpec partitionsSpec = config.getPartitionsSpec();
final int shardsPerInterval;
final HashPartitionFunction partitionFunction;
if (partitionsSpec instanceof HashedPartitionsSpec) {
final HashedPartitionsSpec hashedPartitionsSpec = (HashedPartitionsSpec) partitionsSpec;
shardsPerInterval = PartitionsSpec.isEffectivelyNull(hashedPartitionsSpec.getNumShards()) ? 1 : hashedPartitionsSpec.getNumShards();
partitionFunction = hashedPartitionsSpec.getPartitionFunction();
} else {
shardsPerInterval = 1;
partitionFunction = null;
}
Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
int shardCount = 0;
for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
DateTime bucket = segmentGranularity.getStart();
// negative shardsPerInterval means a single shard
List<HadoopyShardSpec> specs = Lists.newArrayListWithCapacity(shardsPerInterval);
for (int i = 0; i < shardsPerInterval; i++) {
specs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, shardsPerInterval, i, shardsPerInterval, config.getPartitionsSpec().getPartitionDimensions(), partitionFunction, HadoopDruidIndexerConfig.JSON_MAPPER), shardCount++));
}
shardSpecs.put(bucket.getMillis(), specs);
log.info("DateTime[%s], spec[%s]", bucket, specs);
}
config.setShardSpecs(shardSpecs);
return true;
}
}
use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.
the class HadoopIngestionSpecTest method testPartitionsSpecAutoHashed.
@Test
public void testPartitionsSpecAutoHashed() {
final HadoopIngestionSpec schema;
try {
schema = jsonReadWriteRead("{\n" + " \"tuningConfig\": {\n" + " \"type\": \"hadoop\",\n" + " \"partitionsSpec\": {\n" + " \"targetPartitionSize\": 100\n" + " }\n" + " }\n" + "}", HadoopIngestionSpec.class);
} catch (Exception e) {
throw new RuntimeException(e);
}
final PartitionsSpec partitionsSpec = schema.getTuningConfig().getPartitionsSpec();
Assert.assertTrue("isDeterminingPartitions", partitionsSpec.needsDeterminePartitions(true));
Assert.assertEquals("getTargetPartitionSize", 100, partitionsSpec.getMaxRowsPerSegment().intValue());
Assert.assertTrue("partitionSpec", partitionsSpec instanceof HashedPartitionsSpec);
}
use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.
the class NewestSegmentFirstIterator method needsCompaction.
private boolean needsCompaction(DataSourceCompactionConfig config, SegmentsToCompact candidates) {
Preconditions.checkState(!candidates.isEmpty(), "Empty candidates");
final ClientCompactionTaskQueryTuningConfig tuningConfig = ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment());
final PartitionsSpec partitionsSpecFromConfig = findPartitionsSpecFromConfig(tuningConfig);
final CompactionState lastCompactionState = candidates.segments.get(0).getLastCompactionState();
if (lastCompactionState == null) {
log.info("Candidate segment[%s] is not compacted yet. Needs compaction.", candidates.segments.get(0).getId());
return true;
}
final boolean allCandidatesHaveSameLastCompactionState = candidates.segments.stream().allMatch(segment -> lastCompactionState.equals(segment.getLastCompactionState()));
if (!allCandidatesHaveSameLastCompactionState) {
log.info("[%s] Candidate segments were compacted with different partitions spec. Needs compaction.", candidates.segments.size());
log.debugSegments(candidates.segments, "Candidate segments compacted with different partiton spec");
return true;
}
final PartitionsSpec segmentPartitionsSpec = lastCompactionState.getPartitionsSpec();
final IndexSpec segmentIndexSpec = objectMapper.convertValue(lastCompactionState.getIndexSpec(), IndexSpec.class);
final IndexSpec configuredIndexSpec;
if (tuningConfig.getIndexSpec() == null) {
configuredIndexSpec = new IndexSpec();
} else {
configuredIndexSpec = tuningConfig.getIndexSpec();
}
if (!Objects.equals(partitionsSpecFromConfig, segmentPartitionsSpec)) {
log.info("Configured partitionsSpec[%s] is differenet from " + "the partitionsSpec[%s] of segments. Needs compaction.", partitionsSpecFromConfig, segmentPartitionsSpec);
return true;
}
// segmentIndexSpec cannot be null.
if (!segmentIndexSpec.equals(configuredIndexSpec)) {
log.info("Configured indexSpec[%s] is different from the one[%s] of segments. Needs compaction", configuredIndexSpec, segmentIndexSpec);
return true;
}
if (config.getGranularitySpec() != null) {
final ClientCompactionTaskGranularitySpec existingGranularitySpec = lastCompactionState.getGranularitySpec() != null ? objectMapper.convertValue(lastCompactionState.getGranularitySpec(), ClientCompactionTaskGranularitySpec.class) : null;
// Checks for segmentGranularity
if (config.getGranularitySpec().getSegmentGranularity() != null) {
final Granularity existingSegmentGranularity = existingGranularitySpec != null ? existingGranularitySpec.getSegmentGranularity() : null;
if (existingSegmentGranularity == null) {
// Candidate segments were all compacted without segment granularity set.
// We need to check if all segments have the same segment granularity as the configured segment granularity.
boolean needsCompaction = candidates.segments.stream().anyMatch(segment -> !config.getGranularitySpec().getSegmentGranularity().isAligned(segment.getInterval()));
if (needsCompaction) {
log.info("Segments were previously compacted but without segmentGranularity in auto compaction." + " Configured segmentGranularity[%s] is different from granularity implied by segment intervals. Needs compaction", config.getGranularitySpec().getSegmentGranularity());
return true;
}
} else if (!config.getGranularitySpec().getSegmentGranularity().equals(existingSegmentGranularity)) {
log.info("Configured segmentGranularity[%s] is different from the segmentGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getSegmentGranularity(), existingSegmentGranularity);
return true;
}
}
// Checks for rollup
if (config.getGranularitySpec().isRollup() != null) {
final Boolean existingRollup = existingGranularitySpec != null ? existingGranularitySpec.isRollup() : null;
if (existingRollup == null || !config.getGranularitySpec().isRollup().equals(existingRollup)) {
log.info("Configured rollup[%s] is different from the rollup[%s] of segments. Needs compaction", config.getGranularitySpec().isRollup(), existingRollup);
return true;
}
}
// Checks for queryGranularity
if (config.getGranularitySpec().getQueryGranularity() != null) {
final Granularity existingQueryGranularity = existingGranularitySpec != null ? existingGranularitySpec.getQueryGranularity() : null;
if (!config.getGranularitySpec().getQueryGranularity().equals(existingQueryGranularity)) {
log.info("Configured queryGranularity[%s] is different from the queryGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getQueryGranularity(), existingQueryGranularity);
return true;
}
}
}
if (config.getDimensionsSpec() != null) {
final DimensionsSpec existingDimensionsSpec = lastCompactionState.getDimensionsSpec();
// Checks for list of dimensions
if (config.getDimensionsSpec().getDimensions() != null) {
final List<DimensionSchema> existingDimensions = existingDimensionsSpec != null ? existingDimensionsSpec.getDimensions() : null;
if (!config.getDimensionsSpec().getDimensions().equals(existingDimensions)) {
log.info("Configured dimensionsSpec is different from the dimensionsSpec of segments. Needs compaction");
return true;
}
}
}
if (config.getTransformSpec() != null) {
final ClientCompactionTaskTransformSpec existingTransformSpec = lastCompactionState.getTransformSpec() != null ? objectMapper.convertValue(lastCompactionState.getTransformSpec(), ClientCompactionTaskTransformSpec.class) : null;
// Checks for filters
if (config.getTransformSpec().getFilter() != null) {
final DimFilter existingFilters = existingTransformSpec != null ? existingTransformSpec.getFilter() : null;
if (!config.getTransformSpec().getFilter().equals(existingFilters)) {
log.info("Configured filter[%s] is different from the filter[%s] of segments. Needs compaction", config.getTransformSpec().getFilter(), existingFilters);
return true;
}
}
}
if (ArrayUtils.isNotEmpty(config.getMetricsSpec())) {
final AggregatorFactory[] existingMetricsSpec = lastCompactionState.getMetricsSpec() == null || lastCompactionState.getMetricsSpec().isEmpty() ? null : objectMapper.convertValue(lastCompactionState.getMetricsSpec(), AggregatorFactory[].class);
if (existingMetricsSpec == null || !Arrays.deepEquals(config.getMetricsSpec(), existingMetricsSpec)) {
log.info("Configured metricsSpec[%s] is different from the metricsSpec[%s] of segments. Needs compaction", Arrays.toString(config.getMetricsSpec()), Arrays.toString(existingMetricsSpec));
return true;
}
}
return false;
}
use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.
the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentMetricsSpec.
@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentMetricsSpec() {
NullHandling.initializeForTests();
mapper.setInjectableValues(new InjectableValues.Std().addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE));
// Same indexSpec as what is set in the auto compaction config
Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
});
// Same partitionsSpec as what is set in the auto compaction config
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
// Create segments that were compacted (CompactionState != null) and have
// metricsSpec={CountAggregatorFactory("cnt")} for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
// metricsSpec={CountAggregatorFactory("cnt"), LongSumAggregatorFactory("val", "val")} for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
// metricsSpec=[] for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (filter was not set during last compaction)
// and metricsSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (transformSpec was not set during last compaction)
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, mapper.convertValue(new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, new TypeReference<List<Object>>() {
}), null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, mapper.convertValue(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") }, new TypeReference<List<Object>>() {
}), null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, mapper.convertValue(new AggregatorFactory[] {}, new TypeReference<List<Object>>() {
}), null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, null)));
// Auto compaction config sets metricsSpec={CountAggregatorFactory("cnt"), LongSumAggregatorFactory("val", "val")}
CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, null, null, new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") })), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
// We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00, interval 2017-10-04T00:00:00/2017-10-05T00:00:00, and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
Assert.assertTrue(iterator.hasNext());
List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
Assert.assertTrue(iterator.hasNext());
expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
Assert.assertTrue(iterator.hasNext());
expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
// No more
Assert.assertFalse(iterator.hasNext());
// Auto compaction config sets metricsSpec=null
iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, null, null, null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
// No more
Assert.assertFalse(iterator.hasNext());
}
use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.
the class NewestSegmentFirstPolicyTest method testIteratorReturnsNothingAsSegmentsWasCompactedAndHaveSameSegmentGranularityInLastCompactionState.
@Test
public void testIteratorReturnsNothingAsSegmentsWasCompactedAndHaveSameSegmentGranularityInLastCompactionState() {
// Same indexSpec as what is set in the auto compaction config
Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
});
// Same partitionsSpec as what is set in the auto compaction config
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
// Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("segmentGranularity", "day"))), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("segmentGranularity", "day"))));
// Auto compaction config sets segmentGranularity=DAY
final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.DAY, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
Assert.assertFalse(iterator.hasNext());
}
Aggregations