Search in sources :

Example 1 with InputEntityIteratingReader

use of org.apache.druid.data.input.impl.InputEntityIteratingReader in project druid by druid-io.

the class DruidInputSource method fixedFormatReader.

@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
    final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
        // noinspection ConstantConditions
        final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
        // noinspection ConstantConditions
        return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
    }).iterator();
    final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
    final InputRowSchema inputRowSchemaToUse;
    if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
        // Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
        LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
        inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
    } else {
        inputRowSchemaToUse = inputRowSchema;
    }
    if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
        // Slight chance the user did this intentionally, but not likely. Log a warning.
        LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
    }
    return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
Also used : SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Comparators(org.apache.druid.java.util.common.guava.Comparators) AbstractInputSource(org.apache.druid.data.input.AbstractInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) FluentIterable(com.google.common.collect.FluentIterable) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) IAE(org.apache.druid.java.util.common.IAE) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) SplittableInputSource(org.apache.druid.data.input.impl.SplittableInputSource) ISE(org.apache.druid.java.util.common.ISE) Objects(java.util.Objects) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) Stream(java.util.stream.Stream) DimFilter(org.apache.druid.query.filter.DimFilter) DataSegment(org.apache.druid.timeline.DataSegment) SortedMap(java.util.SortedMap) Logger(org.apache.druid.java.util.common.logger.Logger) Streams(org.apache.druid.utils.Streams) InputSplit(org.apache.druid.data.input.InputSplit) Duration(org.joda.time.Duration) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) HashMap(java.util.HashMap) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) Nullable(javax.annotation.Nullable) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) File(java.io.File) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader) TreeMap(java.util.TreeMap) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Preconditions(com.google.common.base.Preconditions) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Comparator(java.util.Comparator) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader)

Aggregations

JacksonInject (com.fasterxml.jackson.annotation.JacksonInject)1 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)1 JsonInclude (com.fasterxml.jackson.annotation.JsonInclude)1 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)1 Preconditions (com.google.common.base.Preconditions)1 FluentIterable (com.google.common.collect.FluentIterable)1 ImmutableList (com.google.common.collect.ImmutableList)1 Iterators (com.google.common.collect.Iterators)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 SortedMap (java.util.SortedMap)1 TreeMap (java.util.TreeMap)1