Search in sources :

Example 1 with SegmentCacheManager

use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.

the class SegmentLoadDropHandlerCacheTest method setup.

@Before
public void setup() throws IOException {
    storageLoc = new TestStorageLocation(temporaryFolder);
    SegmentLoaderConfig config = new SegmentLoaderConfig().withLocations(Collections.singletonList(storageLoc.toStorageLocationConfig(MAX_SIZE, null))).withInfoDir(storageLoc.getInfoDir());
    objectMapper = TestHelper.makeJsonMapper();
    objectMapper.registerSubtypes(TestLoadSpec.class);
    objectMapper.registerSubtypes(TestSegmentizerFactory.class);
    SegmentCacheManager cacheManager = new SegmentLocalCacheManager(config, objectMapper);
    SegmentManager segmentManager = new SegmentManager(new SegmentLocalCacheLoader(cacheManager, TestIndex.INDEX_IO, objectMapper));
    segmentAnnouncer = Mockito.mock(DataSegmentAnnouncer.class);
    loadDropHandler = new SegmentLoadDropHandler(objectMapper, config, segmentAnnouncer, Mockito.mock(DataSegmentServerAnnouncer.class), segmentManager, cacheManager, new ServerTypeConfig(ServerType.HISTORICAL));
    EmittingLogger.registerEmitter(new NoopServiceEmitter());
}
Also used : SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) SegmentManager(org.apache.druid.server.SegmentManager) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentLocalCacheLoader(org.apache.druid.segment.loading.SegmentLocalCacheLoader) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) ServerTypeConfig(org.apache.druid.guice.ServerTypeConfig) Before(org.junit.Before)

Example 2 with SegmentCacheManager

use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.

the class CompactionTaskRunTest method getCSVFormatRowsFromSegments.

private List<String> getCSVFormatRowsFromSegments(List<DataSegment> segments) throws Exception {
    final File cacheDir = temporaryFolder.newFolder();
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir);
    List<Cursor> cursors = new ArrayList<>();
    for (DataSegment segment : segments) {
        final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
        final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(testUtils.getTestIndexIO().loadIndex(segmentFile)), segment.getInterval());
        final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
        cursors.addAll(cursorSequence.toList());
    }
    List<String> rowsFromSegment = new ArrayList<>();
    for (Cursor cursor : cursors) {
        cursor.reset();
        while (!cursor.isDone()) {
            final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("ts", "ts"));
            final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
            final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("val", "val"));
            Object dimObject = selector2.getObject();
            String dimVal = null;
            if (dimObject instanceof String) {
                dimVal = (String) dimObject;
            } else if (dimObject instanceof List) {
                dimVal = String.join("|", (List<String>) dimObject);
            }
            rowsFromSegment.add(makeCSVFormatRow(selector1.getObject().toString(), dimVal, selector3.defaultGetObject().toString()));
            cursor.advance();
        }
    }
    return rowsFromSegment;
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) ArrayList(java.util.ArrayList) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Cursor(org.apache.druid.segment.Cursor) DataSegment(org.apache.druid.timeline.DataSegment) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 3 with SegmentCacheManager

use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException {
    log.debug("Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", dataSource, interval, segmentIds);
    final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = getTimeline();
    // Download all segments locally.
    // Note: this requires enough local storage space to fit all of the segments, even though
    // IngestSegmentFirehose iterates over the segments in series. We may want to change this
    // to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory.
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    Map<DataSegment, File> segmentFileMap = Maps.newLinkedHashMap();
    for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
        for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
            final DataSegment segment = chunk.getObject();
            segmentFileMap.computeIfAbsent(segment, k -> {
                try {
                    return segmentCacheManager.getSegmentFiles(segment);
                } catch (SegmentLoadingException e) {
                    throw new RuntimeException(e);
                }
            });
        }
    }
    final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(dimensions, inputRowParser.getParseSpec().getDimensionsSpec(), timeLineSegments);
    final List<String> metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics;
    final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

        @Override
        public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
            return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                @Override
                public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                    final DataSegment segment = input.getObject();
                    try {
                        return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getId()))), holder.getInterval());
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            });
        }
    })));
    final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser);
    return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter);
}
Also used : IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Function(com.google.common.base.Function) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 4 with SegmentCacheManager

use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.

the class DruidInputSource method fixedFormatReader.

@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
    final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
        // noinspection ConstantConditions
        final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
        // noinspection ConstantConditions
        return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
    }).iterator();
    final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
    final InputRowSchema inputRowSchemaToUse;
    if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
        // Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
        LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
        inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
    } else {
        inputRowSchemaToUse = inputRowSchema;
    }
    if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
        // Slight chance the user did this intentionally, but not likely. Log a warning.
        LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
    }
    return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
Also used : SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Comparators(org.apache.druid.java.util.common.guava.Comparators) AbstractInputSource(org.apache.druid.data.input.AbstractInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) FluentIterable(com.google.common.collect.FluentIterable) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) IAE(org.apache.druid.java.util.common.IAE) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) SplittableInputSource(org.apache.druid.data.input.impl.SplittableInputSource) ISE(org.apache.druid.java.util.common.ISE) Objects(java.util.Objects) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) Stream(java.util.stream.Stream) DimFilter(org.apache.druid.query.filter.DimFilter) DataSegment(org.apache.druid.timeline.DataSegment) SortedMap(java.util.SortedMap) Logger(org.apache.druid.java.util.common.logger.Logger) Streams(org.apache.druid.utils.Streams) InputSplit(org.apache.druid.data.input.InputSplit) Duration(org.joda.time.Duration) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) HashMap(java.util.HashMap) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) Nullable(javax.annotation.Nullable) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) File(java.io.File) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader) TreeMap(java.util.TreeMap) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Preconditions(com.google.common.base.Preconditions) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Comparator(java.util.Comparator) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader)

Example 5 with SegmentCacheManager

use of org.apache.druid.segment.loading.SegmentCacheManager in project druid by druid-io.

the class AbstractMultiPhaseParallelIndexingTest method loadSegment.

private Segment loadSegment(DataSegment dataSegment, File tempSegmentDir) {
    final SegmentCacheManager cacheManager = new SegmentCacheManagerFactory(getObjectMapper()).manufacturate(tempSegmentDir);
    final SegmentLoader loader = new SegmentLocalCacheLoader(cacheManager, getIndexIO(), getObjectMapper());
    try {
        return loader.getSegment(dataSegment, false, SegmentLazyLoadFailCallback.NOOP);
    } catch (SegmentLoadingException e) {
        throw new RuntimeException(e);
    }
}
Also used : SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) SegmentLocalCacheLoader(org.apache.druid.segment.loading.SegmentLocalCacheLoader) SegmentLoader(org.apache.druid.segment.loading.SegmentLoader)

Aggregations

SegmentCacheManager (org.apache.druid.segment.loading.SegmentCacheManager)5 File (java.io.File)3 DataSegment (org.apache.druid.timeline.DataSegment)3 ImmutableList (com.google.common.collect.ImmutableList)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 SegmentCacheManagerFactory (org.apache.druid.indexing.common.SegmentCacheManagerFactory)2 SegmentLoadingException (org.apache.druid.segment.loading.SegmentLoadingException)2 TimelineObjectHolder (org.apache.druid.timeline.TimelineObjectHolder)2 PartitionChunk (org.apache.druid.timeline.partition.PartitionChunk)2 JacksonInject (com.fasterxml.jackson.annotation.JacksonInject)1 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)1 JsonInclude (com.fasterxml.jackson.annotation.JsonInclude)1 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)1 Function (com.google.common.base.Function)1 Preconditions (com.google.common.base.Preconditions)1 FluentIterable (com.google.common.collect.FluentIterable)1 Iterators (com.google.common.collect.Iterators)1 IOException (java.io.IOException)1 Collection (java.util.Collection)1