use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.
the class CompactionTask method runTask.
@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
final List<ParallelIndexIngestionSpec> ingestionSpecs = createIngestionSchema(toolbox, getTaskLockHelper().getLockGranularityToUse(), segmentProvider, partitionConfigurationManager, dimensionsSpec, transformSpec, metricsSpec, granularitySpec, toolbox.getCoordinatorClient(), segmentCacheManagerFactory, retryPolicyFactory, ioConfig.isDropExisting());
final List<ParallelIndexSupervisorTask> indexTaskSpecs = IntStream.range(0, ingestionSpecs.size()).mapToObj(i -> {
// The ID of SubtaskSpecs is used as the base sequenceName in segment allocation protocol.
// The indexing tasks generated by the compaction task should use different sequenceNames
// so that they can allocate valid segment IDs with no duplication.
ParallelIndexIngestionSpec ingestionSpec = ingestionSpecs.get(i);
final String baseSequenceName = createIndexTaskSpecId(i);
return newTask(baseSequenceName, ingestionSpec);
}).collect(Collectors.toList());
if (indexTaskSpecs.isEmpty()) {
String msg = StringUtils.format("Can't find segments from inputSpec[%s], nothing to do.", ioConfig.getInputSpec());
log.warn(msg);
return TaskStatus.failure(getId(), msg);
} else {
registerResourceCloserOnAbnormalExit(currentSubTaskHolder);
final int totalNumSpecs = indexTaskSpecs.size();
log.info("Generated [%d] compaction task specs", totalNumSpecs);
int failCnt = 0;
for (ParallelIndexSupervisorTask eachSpec : indexTaskSpecs) {
final String json = toolbox.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(eachSpec);
if (!currentSubTaskHolder.setTask(eachSpec)) {
String errMsg = "Task was asked to stop. Finish as failed.";
log.info(errMsg);
return TaskStatus.failure(getId(), errMsg);
}
try {
if (eachSpec.isReady(toolbox.getTaskActionClient())) {
log.info("Running indexSpec: " + json);
final TaskStatus eachResult = eachSpec.run(toolbox);
if (!eachResult.isSuccess()) {
failCnt++;
log.warn("Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
}
} else {
failCnt++;
log.warn("indexSpec is not ready: [%s].\nTrying the next indexSpec.", json);
}
} catch (Exception e) {
failCnt++;
log.warn(e, "Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
}
}
String msg = StringUtils.format("Ran [%d] specs, [%d] succeeded, [%d] failed", totalNumSpecs, totalNumSpecs - failCnt, failCnt);
log.info(msg);
return failCnt == 0 ? TaskStatus.success(getId()) : TaskStatus.failure(getId(), msg);
}
}
use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.
the class InputSourceProcessor method process.
/**
* This method opens the given {@link InputSource} and processes data via {@link InputSourceReader}.
* All read data is consumed by {@link BatchAppenderatorDriver} which creates new segments.
* All created segments are pushed when all input data is processed successfully.
*
* @return {@link SegmentsAndCommitMetadata} for the pushed segments.
*/
public static SegmentsAndCommitMetadata process(DataSchema dataSchema, BatchAppenderatorDriver driver, PartitionsSpec partitionsSpec, InputSource inputSource, @Nullable InputFormat inputFormat, File tmpDir, SequenceNameFunction sequenceNameFunction, IndexTaskInputRowIteratorBuilder inputRowIteratorBuilder, RowIngestionMeters buildSegmentsMeters, ParseExceptionHandler parseExceptionHandler, long pushTimeout) throws IOException, InterruptedException, ExecutionException, TimeoutException {
@Nullable final DynamicPartitionsSpec dynamicPartitionsSpec = partitionsSpec instanceof DynamicPartitionsSpec ? (DynamicPartitionsSpec) partitionsSpec : null;
final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
try (final CloseableIterator<InputRow> inputRowIterator = AbstractBatchIndexTask.inputSourceReader(tmpDir, dataSchema, inputSource, inputFormat, AbstractBatchIndexTask.defaultRowFilter(granularitySpec), buildSegmentsMeters, parseExceptionHandler);
final HandlingInputRowIterator iterator = inputRowIteratorBuilder.delegate(inputRowIterator).granularitySpec(granularitySpec).build()) {
while (iterator.hasNext()) {
final InputRow inputRow = iterator.next();
if (inputRow == null) {
continue;
}
// IndexTaskInputRowIteratorBuilder.absentBucketIntervalConsumer() ensures the interval will be present here
Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
@SuppressWarnings("OptionalGetWithoutIsPresent") final Interval interval = optInterval.get();
final String sequenceName = sequenceNameFunction.getSequenceName(interval, inputRow);
final AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName);
if (addResult.isOk()) {
// incremental segment publishment is allowed only when rollup doesn't have to be perfect.
if (dynamicPartitionsSpec != null) {
final boolean isPushRequired = addResult.isPushRequired(dynamicPartitionsSpec.getMaxRowsPerSegment(), dynamicPartitionsSpec.getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
if (isPushRequired) {
// There can be some segments waiting for being pushed even though no more rows will be added to them
// in the future.
// If those segments are not pushed here, the remaining available space in appenderator will be kept
// small which could lead to smaller segments.
final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout);
LOG.debugSegments(pushed.getSegments(), "Pushed segments");
}
}
} else {
throw new ISE("Failed to add a row with timestamp[%s]", inputRow.getTimestamp());
}
}
final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout);
LOG.debugSegments(pushed.getSegments(), "Pushed segments");
return pushed;
}
}
use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.
the class AbstractBatchIndexTask method compactionStateAnnotateFunction.
public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(boolean storeCompactionState, TaskToolbox toolbox, IngestionSpec ingestionSpec) {
if (storeCompactionState) {
TuningConfig tuningConfig = ingestionSpec.getTuningConfig();
GranularitySpec granularitySpec = ingestionSpec.getDataSchema().getGranularitySpec();
// We do not need to store dimensionExclusions and spatialDimensions since auto compaction does not support them
DimensionsSpec dimensionsSpec = ingestionSpec.getDataSchema().getDimensionsSpec() == null ? null : new DimensionsSpec(ingestionSpec.getDataSchema().getDimensionsSpec().getDimensions());
// We only need to store filter since that is the only field auto compaction support
Map<String, Object> transformSpec = ingestionSpec.getDataSchema().getTransformSpec() == null || TransformSpec.NONE.equals(ingestionSpec.getDataSchema().getTransformSpec()) ? null : new ClientCompactionTaskTransformSpec(ingestionSpec.getDataSchema().getTransformSpec().getFilter()).asMap(toolbox.getJsonMapper());
List<Object> metricsSpec = ingestionSpec.getDataSchema().getAggregators() == null ? null : toolbox.getJsonMapper().convertValue(ingestionSpec.getDataSchema().getAggregators(), new TypeReference<List<Object>>() {
});
final CompactionState compactionState = new CompactionState(tuningConfig.getPartitionsSpec(), dimensionsSpec, metricsSpec, transformSpec, tuningConfig.getIndexSpec().asMap(toolbox.getJsonMapper()), granularitySpec.asMap(toolbox.getJsonMapper()));
return segments -> segments.stream().map(s -> s.withLastCompactionState(compactionState)).collect(Collectors.toSet());
} else {
return Function.identity();
}
}
use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.
the class InputSourceSamplerTest method testWithRollup.
@Test
public void testWithRollup() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(4, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.
the class InputSourceSamplerTest method testWithFilter.
@Test
public void testWithFilter() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), null);
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(5, response.getNumRowsRead());
Assert.assertEquals(4, response.getNumRowsIndexed());
Assert.assertEquals(3, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}
Aggregations