Search in sources :

Example 26 with GranularitySpec

use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.

the class CompactionTask method runTask.

@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = createIngestionSchema(toolbox, getTaskLockHelper().getLockGranularityToUse(), segmentProvider, partitionConfigurationManager, dimensionsSpec, transformSpec, metricsSpec, granularitySpec, toolbox.getCoordinatorClient(), segmentCacheManagerFactory, retryPolicyFactory, ioConfig.isDropExisting());
    final List<ParallelIndexSupervisorTask> indexTaskSpecs = IntStream.range(0, ingestionSpecs.size()).mapToObj(i -> {
        // The ID of SubtaskSpecs is used as the base sequenceName in segment allocation protocol.
        // The indexing tasks generated by the compaction task should use different sequenceNames
        // so that they can allocate valid segment IDs with no duplication.
        ParallelIndexIngestionSpec ingestionSpec = ingestionSpecs.get(i);
        final String baseSequenceName = createIndexTaskSpecId(i);
        return newTask(baseSequenceName, ingestionSpec);
    }).collect(Collectors.toList());
    if (indexTaskSpecs.isEmpty()) {
        String msg = StringUtils.format("Can't find segments from inputSpec[%s], nothing to do.", ioConfig.getInputSpec());
        log.warn(msg);
        return TaskStatus.failure(getId(), msg);
    } else {
        registerResourceCloserOnAbnormalExit(currentSubTaskHolder);
        final int totalNumSpecs = indexTaskSpecs.size();
        log.info("Generated [%d] compaction task specs", totalNumSpecs);
        int failCnt = 0;
        for (ParallelIndexSupervisorTask eachSpec : indexTaskSpecs) {
            final String json = toolbox.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(eachSpec);
            if (!currentSubTaskHolder.setTask(eachSpec)) {
                String errMsg = "Task was asked to stop. Finish as failed.";
                log.info(errMsg);
                return TaskStatus.failure(getId(), errMsg);
            }
            try {
                if (eachSpec.isReady(toolbox.getTaskActionClient())) {
                    log.info("Running indexSpec: " + json);
                    final TaskStatus eachResult = eachSpec.run(toolbox);
                    if (!eachResult.isSuccess()) {
                        failCnt++;
                        log.warn("Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
                    }
                } else {
                    failCnt++;
                    log.warn("indexSpec is not ready: [%s].\nTrying the next indexSpec.", json);
                }
            } catch (Exception e) {
                failCnt++;
                log.warn(e, "Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
            }
        }
        String msg = StringUtils.format("Ran [%d] specs, [%d] succeeded, [%d] failed", totalNumSpecs, totalNumSpecs - failCnt, failCnt);
        log.info(msg);
        return failCnt == 0 ? TaskStatus.success(getId()) : TaskStatus.failure(getId(), msg);
    }
}
Also used : Verify(org.apache.curator.shaded.com.google.common.base.Verify) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Comparators(org.apache.druid.java.util.common.guava.Comparators) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) IAE(org.apache.druid.java.util.common.IAE) MultiValueHandling(org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling) BiMap(com.google.common.collect.BiMap) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) Property(org.apache.druid.indexer.Property) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) Segments(org.apache.druid.indexing.overlord.Segments) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) LockGranularity(org.apache.druid.indexing.common.LockGranularity) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) Intervals(org.apache.druid.java.util.common.Intervals) Duration(org.joda.time.Duration) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) HashMap(java.util.HashMap) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) CompactSegments(org.apache.druid.server.coordinator.duty.CompactSegments) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) Nonnull(javax.annotation.Nonnull) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) RE(org.apache.druid.java.util.common.RE) NonnullPair(org.apache.druid.java.util.common.NonnullPair) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IOException(java.io.IOException) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) File(java.io.File) HashBiMap(com.google.common.collect.HashBiMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) DimensionHandler(org.apache.druid.segment.DimensionHandler) TreeMap(java.util.TreeMap) Checks(org.apache.druid.indexer.Checks) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) TaskStatus(org.apache.druid.indexer.TaskStatus) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException)

Example 27 with GranularitySpec

use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.

the class InputSourceProcessor method process.

/**
 * This method opens the given {@link InputSource} and processes data via {@link InputSourceReader}.
 * All read data is consumed by {@link BatchAppenderatorDriver} which creates new segments.
 * All created segments are pushed when all input data is processed successfully.
 *
 * @return {@link SegmentsAndCommitMetadata} for the pushed segments.
 */
public static SegmentsAndCommitMetadata process(DataSchema dataSchema, BatchAppenderatorDriver driver, PartitionsSpec partitionsSpec, InputSource inputSource, @Nullable InputFormat inputFormat, File tmpDir, SequenceNameFunction sequenceNameFunction, IndexTaskInputRowIteratorBuilder inputRowIteratorBuilder, RowIngestionMeters buildSegmentsMeters, ParseExceptionHandler parseExceptionHandler, long pushTimeout) throws IOException, InterruptedException, ExecutionException, TimeoutException {
    @Nullable final DynamicPartitionsSpec dynamicPartitionsSpec = partitionsSpec instanceof DynamicPartitionsSpec ? (DynamicPartitionsSpec) partitionsSpec : null;
    final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
    try (final CloseableIterator<InputRow> inputRowIterator = AbstractBatchIndexTask.inputSourceReader(tmpDir, dataSchema, inputSource, inputFormat, AbstractBatchIndexTask.defaultRowFilter(granularitySpec), buildSegmentsMeters, parseExceptionHandler);
        final HandlingInputRowIterator iterator = inputRowIteratorBuilder.delegate(inputRowIterator).granularitySpec(granularitySpec).build()) {
        while (iterator.hasNext()) {
            final InputRow inputRow = iterator.next();
            if (inputRow == null) {
                continue;
            }
            // IndexTaskInputRowIteratorBuilder.absentBucketIntervalConsumer() ensures the interval will be present here
            Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
            @SuppressWarnings("OptionalGetWithoutIsPresent") final Interval interval = optInterval.get();
            final String sequenceName = sequenceNameFunction.getSequenceName(interval, inputRow);
            final AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName);
            if (addResult.isOk()) {
                // incremental segment publishment is allowed only when rollup doesn't have to be perfect.
                if (dynamicPartitionsSpec != null) {
                    final boolean isPushRequired = addResult.isPushRequired(dynamicPartitionsSpec.getMaxRowsPerSegment(), dynamicPartitionsSpec.getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                    if (isPushRequired) {
                        // There can be some segments waiting for being pushed even though no more rows will be added to them
                        // in the future.
                        // If those segments are not pushed here, the remaining available space in appenderator will be kept
                        // small which could lead to smaller segments.
                        final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout);
                        LOG.debugSegments(pushed.getSegments(), "Pushed segments");
                    }
                }
            } else {
                throw new ISE("Failed to add a row with timestamp[%s]", inputRow.getTimestamp());
            }
        }
        final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout);
        LOG.debugSegments(pushed.getSegments(), "Pushed segments");
        return pushed;
    }
}
Also used : SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) HandlingInputRowIterator(org.apache.druid.data.input.HandlingInputRowIterator) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) InputRow(org.apache.druid.data.input.InputRow) ISE(org.apache.druid.java.util.common.ISE) Nullable(javax.annotation.Nullable) Interval(org.joda.time.Interval)

Example 28 with GranularitySpec

use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.

the class AbstractBatchIndexTask method compactionStateAnnotateFunction.

public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(boolean storeCompactionState, TaskToolbox toolbox, IngestionSpec ingestionSpec) {
    if (storeCompactionState) {
        TuningConfig tuningConfig = ingestionSpec.getTuningConfig();
        GranularitySpec granularitySpec = ingestionSpec.getDataSchema().getGranularitySpec();
        // We do not need to store dimensionExclusions and spatialDimensions since auto compaction does not support them
        DimensionsSpec dimensionsSpec = ingestionSpec.getDataSchema().getDimensionsSpec() == null ? null : new DimensionsSpec(ingestionSpec.getDataSchema().getDimensionsSpec().getDimensions());
        // We only need to store filter since that is the only field auto compaction support
        Map<String, Object> transformSpec = ingestionSpec.getDataSchema().getTransformSpec() == null || TransformSpec.NONE.equals(ingestionSpec.getDataSchema().getTransformSpec()) ? null : new ClientCompactionTaskTransformSpec(ingestionSpec.getDataSchema().getTransformSpec().getFilter()).asMap(toolbox.getJsonMapper());
        List<Object> metricsSpec = ingestionSpec.getDataSchema().getAggregators() == null ? null : toolbox.getJsonMapper().convertValue(ingestionSpec.getDataSchema().getAggregators(), new TypeReference<List<Object>>() {
        });
        final CompactionState compactionState = new CompactionState(tuningConfig.getPartitionsSpec(), dimensionsSpec, metricsSpec, transformSpec, tuningConfig.getIndexSpec().asMap(toolbox.getJsonMapper()), granularitySpec.asMap(toolbox.getJsonMapper()));
        return segments -> segments.stream().map(s -> s.withLastCompactionState(compactionState)).collect(Collectors.toSet());
    } else {
        return Function.identity();
    }
}
Also used : IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) Partitions(org.apache.druid.timeline.Partitions) CompactionState(org.apache.druid.timeline.CompactionState) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Optional(com.google.common.base.Optional) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) TaskLock(org.apache.druid.indexing.common.TaskLock) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Execs(org.apache.druid.java.util.common.concurrent.Execs) InputRowSchemas(org.apache.druid.indexing.input.InputRowSchemas) Predicate(java.util.function.Predicate) GuardedBy(com.google.errorprone.annotations.concurrent.GuardedBy) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IngestionSpec(org.apache.druid.segment.indexing.IngestionSpec) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) IngestSegmentFirehoseFactory(org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory) InputRow(org.apache.druid.data.input.InputRow) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) TaskLockType(org.apache.druid.indexing.common.TaskLockType) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) IntervalsByGranularity(org.apache.druid.java.util.common.granularity.IntervalsByGranularity) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) TimeChunkLockTryAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockTryAcquireAction) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) InputSource(org.apache.druid.data.input.InputSource) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) BiConsumer(java.util.function.BiConsumer) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) Period(org.joda.time.Period) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) ServiceMetricEvent(org.apache.druid.java.util.emitter.service.ServiceMetricEvent) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IOException(java.io.IOException) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) Preconditions(com.google.common.base.Preconditions) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference)

Example 29 with GranularitySpec

use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.

the class InputSourceSamplerTest method testWithRollup.

@Test
public void testWithRollup() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(4, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 30 with GranularitySpec

use of org.apache.druid.segment.indexing.granularity.GranularitySpec in project druid by druid-io.

the class InputSourceSamplerTest method testWithFilter.

@Test
public void testWithFilter() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), null);
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(5, response.getNumRowsRead());
    Assert.assertEquals(4, response.getNumRowsIndexed());
    Assert.assertEquals(3, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)32 DataSchema (org.apache.druid.segment.indexing.DataSchema)21 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)15 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)14 Test (org.junit.Test)14 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)13 InputFormat (org.apache.druid.data.input.InputFormat)11 InputRow (org.apache.druid.data.input.InputRow)11 InputSource (org.apache.druid.data.input.InputSource)11 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)11 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)11 TransformSpec (org.apache.druid.segment.transform.TransformSpec)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 Interval (org.joda.time.Interval)9 Map (java.util.Map)8 SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)8 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)8 RecordSupplierInputSource (org.apache.druid.indexing.seekablestream.RecordSupplierInputSource)8 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)7 InlineInputSource (org.apache.druid.data.input.impl.InlineInputSource)7