Search in sources :

Example 56 with QueryableIndex

use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.

the class AppenderatorImpl method mergeAndPush.

/**
 * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
 * be run in the single-threaded pushExecutor.
 *
 * @param identifier    sink identifier
 * @param sink          sink to push
 * @param useUniquePath true if the segment should be written to a path with a unique identifier
 *
 * @return segment descriptor, or null if the sink is no longer valid
 */
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean useUniquePath) {
    // noinspection ObjectEquality
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    }
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
        }
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
            }
        }
    }
    try {
        if (descriptorFile.exists()) {
            if (useUniquePath) {
                // Don't reuse the descriptor, because the caller asked for a unique path. Leave the old one as-is, since
                // it might serve some unknown purpose.
                log.debug("Segment[%s] already pushed, but we want a unique path, so will push again with a new path.", identifier);
            } else {
                log.info("Segment[%s] already pushed, skipping.", identifier);
                return objectMapper.readValue(descriptorFile, DataSegment.class);
            }
        }
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        final File mergedFile;
        final long mergeFinishTime;
        final long startTime = System.nanoTime();
        List<QueryableIndex> indexes = new ArrayList<>();
        Closer closer = Closer.create();
        try {
            for (FireHydrant fireHydrant : sink) {
                // if batch, swap/persist did not memory map the incremental index, we need it mapped now:
                if (!isOpenSegments()) {
                    // sanity
                    Pair<File, SegmentId> persistedMetadata = persistedHydrantMetadata.get(fireHydrant);
                    if (persistedMetadata == null) {
                        throw new ISE("Persisted metadata for batch hydrant [%s] is null!", fireHydrant);
                    }
                    File persistedFile = persistedMetadata.lhs;
                    SegmentId persistedSegmentId = persistedMetadata.rhs;
                    // sanity:
                    if (persistedFile == null) {
                        throw new ISE("Persisted file for batch hydrant [%s] is null!", fireHydrant);
                    } else if (persistedSegmentId == null) {
                        throw new ISE("Persisted segmentId for batch hydrant in file [%s] is null!", persistedFile.getPath());
                    }
                    fireHydrant.swapSegment(new QueryableIndexSegment(indexIO.loadIndex(persistedFile), persistedSegmentId));
                }
                Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
                indexes.add(queryableIndex);
                closer.register(segmentAndCloseable.rhs);
            }
            mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
            mergeFinishTime = System.nanoTime();
            log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
        final DataSegment segmentToPush = sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec()));
        // Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
        final DataSegment segment = RetryUtils.retry(// semantics.
        () -> dataSegmentPusher.push(mergedFile, segmentToPush, useUniquePath), exception -> exception instanceof Exception, 5);
        if (!isOpenSegments()) {
            // can generate OOMs during merge if enough of them are held back...
            for (FireHydrant fireHydrant : sink) {
                fireHydrant.swapSegment(null);
            }
        }
        final long pushFinishTime = System.nanoTime();
        objectMapper.writeValue(descriptorFile, segment);
        log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw new RuntimeException(e);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) SegmentId(org.apache.druid.timeline.SegmentId) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Nullable(javax.annotation.Nullable)

Example 57 with QueryableIndex

use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.

the class BatchAppenderator method mergeAndPush.

/**
 * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted.
 *
 * @param identifier    sink identifier
 * @param sink          sink to push
 * @return segment descriptor, or null if the sink is no longer valid
 */
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink) {
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    if (sink.isWritable()) {
        throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
    }
    int numHydrants = 0;
    for (FireHydrant hydrant : sink) {
        if (!hydrant.hasSwapped()) {
            throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
        }
        numHydrants++;
    }
    SinkMetadata sm = sinksMetadata.get(identifier);
    if (sm == null) {
        log.warn("Sink metadata not found just before merge for identifier [%s]", identifier);
    } else if (numHydrants != sm.getNumHydrants()) {
        throw new ISE("Number of restored hydrants[%d] for identifier[%s] does not match expected value[%d]", numHydrants, identifier, sm.getNumHydrants());
    }
    try {
        if (descriptorFile.exists()) {
            // Already pushed.
            log.info("Segment[%s] already pushed, skipping.", identifier);
            return objectMapper.readValue(descriptorFile, DataSegment.class);
        }
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        final File mergedFile;
        final long mergeFinishTime;
        final long startTime = System.nanoTime();
        List<QueryableIndex> indexes = new ArrayList<>();
        Closer closer = Closer.create();
        try {
            for (FireHydrant fireHydrant : sink) {
                Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
                indexes.add(queryableIndex);
                closer.register(segmentAndCloseable.rhs);
            }
            mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
            mergeFinishTime = System.nanoTime();
            log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
        // Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
        final DataSegment segment = RetryUtils.retry(// This appenderator is used only for the local indexing task so unique paths are not required
        () -> dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false), exception -> exception instanceof Exception, 5);
        // can generate OOMs during merge if enough of them are held back...
        for (FireHydrant fireHydrant : sink) {
            fireHydrant.swapSegment(null);
        }
        // cleanup, sink no longer needed
        removeDirectory(computePersistDir(identifier));
        final long pushFinishTime = System.nanoTime();
        log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw new RuntimeException(e);
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Nullable(javax.annotation.Nullable)

Example 58 with QueryableIndex

use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.

the class ToObjectVectorColumnProcessorFactoryTest method setUp.

@Before
public void setUp() {
    final QueryableIndex index = TestIndex.getMMappedTestIndex();
    adapter = new QueryableIndexStorageAdapter(index);
}
Also used : QueryableIndex(org.apache.druid.segment.QueryableIndex) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Before(org.junit.Before)

Example 59 with QueryableIndex

use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.

the class FixedBucketsHistogramQuantileSqlAggregatorTest method createQuerySegmentWalker.

@Override
public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker() throws IOException {
    ApproximateHistogramDruidModule.registerSerde();
    final QueryableIndex index = IndexBuilder.create(CalciteTests.getJsonMapper()).tmpDir(temporaryFolder.newFolder()).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new FixedBucketsHistogramAggregatorFactory("fbhist_m1", "m1", 20, 0, 10, FixedBucketsHistogram.OutlierHandlingMode.IGNORE, false)).withRollup(false).build()).rows(CalciteTests.ROWS1).buildMMappedIndex();
    return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index);
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) SpecificSegmentsQuerySegmentWalker(org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(org.apache.druid.segment.QueryableIndex) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) IndexBuilder(org.apache.druid.segment.IndexBuilder) FixedBucketsHistogramAggregatorFactory(org.apache.druid.query.aggregation.histogram.FixedBucketsHistogramAggregatorFactory)

Example 60 with QueryableIndex

use of org.apache.druid.segment.QueryableIndex in project druid by druid-io.

the class BatchDeltaIngestionTest method testIngestion.

private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment, List<String> expectedDimensions, List<String> expectedMetrics) throws Exception {
    IndexGeneratorJob job = new IndexGeneratorJob(config);
    Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
    List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
    JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
    JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
    File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
    Assert.assertFalse(workingPath.exists());
    File segmentFolder = new File(StringUtils.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
    Assert.assertTrue(segmentFolder.exists());
    File indexZip = new File(segmentFolder, "index.zip");
    Assert.assertTrue(indexZip.exists());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(indexZip, tmpUnzippedSegmentDir);
    QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), TransformSpec.NONE, expectedDimensions, expectedMetrics, null);
    List<InputRow> rows = new ArrayList<>();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRowsGenerated, rows, expectedDimensions, expectedMetrics);
}
Also used : IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) ArrayList(java.util.ArrayList) StorageAdapter(org.apache.druid.segment.StorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter)

Aggregations

QueryableIndex (org.apache.druid.segment.QueryableIndex)67 File (java.io.File)29 ArrayList (java.util.ArrayList)29 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)18 DataSegment (org.apache.druid.timeline.DataSegment)18 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)17 IOException (java.io.IOException)15 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)15 IndexSpec (org.apache.druid.segment.IndexSpec)14 InputRow (org.apache.druid.data.input.InputRow)13 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)13 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)12 SpecificSegmentsQuerySegmentWalker (org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker)12 List (java.util.List)10 Before (org.junit.Before)10 QueryRunner (org.apache.druid.query.QueryRunner)9 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)9 IndexBuilder (org.apache.druid.segment.IndexBuilder)9 Nullable (javax.annotation.Nullable)8 ISE (org.apache.druid.java.util.common.ISE)8