use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.
the class CompactionTaskParallelRunTest method testRunParallelWithMultiDimensionRangePartitioningWithSingleTask.
@Test
public void testRunParallelWithMultiDimensionRangePartitioningWithSingleTask() throws Exception {
// Range partitioning is not supported with segment lock yet
Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
runIndexTask(null, true);
final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), 1, true)).build();
final Set<DataSegment> compactedSegments = runTask(compactionTask);
for (DataSegment segment : compactedSegments) {
// Expect compaction state to exist as store compaction state by default
Map<String, String> expectedLongSumMetric = new HashMap<>();
expectedLongSumMetric.put("type", "longSum");
expectedLongSumMetric.put("name", "val");
expectedLongSumMetric.put("fieldName", "val");
expectedLongSumMetric.put("expression", null);
Assert.assertSame(DimensionRangeShardSpec.class, segment.getShardSpec().getClass());
CompactionState expectedState = new CompactionState(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
Assert.assertEquals(expectedState, segment.getLastCompactionState());
}
}
use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.
the class RangePartitionAdjustingCorePartitionSizeTest method testLessPartitionsThanBuckets.
@Test
public void testLessPartitionsThanBuckets() throws IOException {
final File inputDir = temporaryFolder.newFolder();
for (int i = 0; i < 2; i++) {
try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
writer.write(StringUtils.format("2020-01-01T00:00:00,aaa,b1,10\n"));
}
}
for (int i = 0; i < 3; i++) {
try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + (i + 2)).toPath(), StandardCharsets.UTF_8)) {
writer.write(StringUtils.format("2020-01-01T00:00:00,zzz,b1,10\n"));
}
}
final List<String> partitionDimensions = Collections.singletonList("dim1");
final DimensionBasedPartitionsSpec partitionsSpec = new DimensionRangePartitionsSpec(2, null, partitionDimensions, false);
final List<DataSegment> segments = new ArrayList<>(runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS));
Assert.assertEquals(1, segments.size());
final DataSegment segment = segments.get(0);
Assert.assertSame(SingleDimensionShardSpec.class, segment.getShardSpec().getClass());
final SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segment.getShardSpec();
Assert.assertEquals(1, shardSpec.getNumCorePartitions());
Assert.assertEquals(0, shardSpec.getPartitionNum());
Assert.assertEquals(partitionDimensions, shardSpec.getDimensions());
}
use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.
the class RangePartitionCachingLocalSegmentAllocatorTest method setup.
@Before
public void setup() throws IOException {
TaskToolbox toolbox = createToolbox(INTERVAL_TO_VERSION.keySet().stream().map(RangePartitionCachingLocalSegmentAllocatorTest::createTaskLock).collect(Collectors.toList()));
final RangePartitionAnalysis partitionAnalysis = new RangePartitionAnalysis(new DimensionRangePartitionsSpec(null, 1, PARTITION_DIMENSIONS, false));
INTERVAL_TO_PARTITIONS.forEach(partitionAnalysis::updateBucket);
target = SegmentAllocators.forNonLinearPartitioning(toolbox, DATASOURCE, TASKID, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), new SupervisorTaskAccessWithNullClient(SUPERVISOR_TASKID), partitionAnalysis);
sequenceNameFunction = ((CachingLocalSegmentAllocator) target).getSequenceNameFunction();
}
use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.
the class ParallelIndexSupervisorTask method determineRangePartition.
private PartitionBoundaries determineRangePartition(Collection<StringDistribution> distributions) {
StringDistributionMerger distributionMerger = new StringSketchMerger();
distributions.forEach(distributionMerger::merge);
StringDistribution mergedDistribution = distributionMerger.getResult();
DimensionRangePartitionsSpec partitionsSpec = (DimensionRangePartitionsSpec) ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec();
final PartitionBoundaries partitions;
Integer targetRowsPerSegment = partitionsSpec.getTargetRowsPerSegment();
if (targetRowsPerSegment == null) {
partitions = mergedDistribution.getEvenPartitionsByMaxSize(partitionsSpec.getMaxRowsPerSegment());
} else {
partitions = mergedDistribution.getEvenPartitionsByTargetSize(targetRowsPerSegment);
}
return partitions;
}
use of org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec in project druid by druid-io.
the class PartialDimensionDistributionTask method runTask.
@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
DataSchema dataSchema = ingestionSchema.getDataSchema();
GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
ParallelIndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
DimensionRangePartitionsSpec partitionsSpec = (DimensionRangePartitionsSpec) tuningConfig.getPartitionsSpec();
Preconditions.checkNotNull(partitionsSpec, "partitionsSpec required in tuningConfig");
final List<String> partitionDimensions = partitionsSpec.getPartitionDimensions();
Preconditions.checkArgument(partitionDimensions != null && !partitionDimensions.isEmpty(), "partitionDimension required in partitionsSpec");
boolean isAssumeGrouped = partitionsSpec.isAssumeGrouped();
InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(ingestionSchema.getDataSchema().getParser());
InputFormat inputFormat = inputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema) : null;
final RowIngestionMeters buildSegmentsMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
final ParseExceptionHandler parseExceptionHandler = new ParseExceptionHandler(buildSegmentsMeters, tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.getMaxSavedParseExceptions());
final boolean determineIntervals = granularitySpec.inputIntervals().isEmpty();
try (final CloseableIterator<InputRow> inputRowIterator = AbstractBatchIndexTask.inputSourceReader(toolbox.getIndexingTmpDir(), dataSchema, inputSource, inputFormat, determineIntervals ? Objects::nonNull : AbstractBatchIndexTask.defaultRowFilter(granularitySpec), buildSegmentsMeters, parseExceptionHandler);
HandlingInputRowIterator iterator = new RangePartitionIndexTaskInputRowIteratorBuilder(partitionDimensions, SKIP_NULL).delegate(inputRowIterator).granularitySpec(granularitySpec).build()) {
Map<Interval, StringDistribution> distribution = determineDistribution(iterator, granularitySpec, partitionDimensions, isAssumeGrouped);
sendReport(toolbox, new DimensionDistributionReport(getId(), distribution));
}
return TaskStatus.success(getId());
}
Aggregations