Search in sources :

Example 21 with QueryableIndex

use of io.druid.segment.QueryableIndex in project druid by druid-io.

the class RealtimePlumber method persistAndMerge.

// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
    final String threadName = String.format("%s-%s-persist-n-merge", schema.getDataSource(), new DateTime(truncatedTime));
    mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {

        final Interval interval = sink.getInterval();

        Stopwatch mergeStopwatch = null;

        @Override
        public void doRun() {
            try {
                // Bail out if this sink has been abandoned by a previously-executed task.
                if (sinks.get(truncatedTime) != sink) {
                    log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
                    return;
                }
                // Use a file to indicate that pushing has completed.
                final File persistDir = computePersistDir(schema, interval);
                final File mergedTarget = new File(persistDir, "merged");
                final File isPushedMarker = new File(persistDir, "isPushedMarker");
                if (!isPushedMarker.exists()) {
                    removeSegment(sink, mergedTarget);
                    if (mergedTarget.exists()) {
                        log.wtf("Merged target[%s] exists?!", mergedTarget);
                        return;
                    }
                } else {
                    log.info("Already pushed sink[%s]", sink);
                    return;
                }
                /*
            Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
            hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
            changes to plumber.
             */
                for (FireHydrant hydrant : sink) {
                    synchronized (hydrant) {
                        if (!hydrant.hasSwapped()) {
                            log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
                            final int rowCount = persistHydrant(hydrant, schema, interval, null);
                            metrics.incrementRowOutputCount(rowCount);
                        }
                    }
                }
                final long mergeThreadCpuTime = VMUtils.safeGetThreadCpuTime();
                mergeStopwatch = Stopwatch.createStarted();
                List<QueryableIndex> indexes = Lists.newArrayList();
                for (FireHydrant fireHydrant : sink) {
                    Segment segment = fireHydrant.getSegment();
                    final QueryableIndex queryableIndex = segment.asQueryableIndex();
                    log.info("Adding hydrant[%s]", fireHydrant);
                    indexes.add(queryableIndex);
                }
                final File mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, config.getIndexSpec());
                // emit merge metrics before publishing segment
                metrics.incrementMergeCpuTime(VMUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
                metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
                QueryableIndex index = indexIO.loadIndex(mergedFile);
                log.info("Pushing [%s] to deep storage", sink.getSegment().getIdentifier());
                DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
                log.info("Inserting [%s] to the metadata store", sink.getSegment().getIdentifier());
                segmentPublisher.publishSegment(segment);
                if (!isPushedMarker.createNewFile()) {
                    log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
                }
            } catch (Exception e) {
                metrics.incrementFailedHandoffs();
                log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
                if (shuttingDown) {
                    // We're trying to shut down, and this segment failed to push. Let's just get rid of it.
                    // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                    cleanShutdown = false;
                    abandonSegment(truncatedTime, sink);
                }
            } finally {
                if (mergeStopwatch != null) {
                    mergeStopwatch.stop();
                }
            }
        }
    });
    handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {

        @Override
        public void run() {
            abandonSegment(sink.getInterval().getStartMillis(), sink);
            metrics.incrementHandOffCount();
        }
    });
}
Also used : Stopwatch(com.google.common.base.Stopwatch) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) DataSegment(io.druid.timeline.DataSegment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) Segment(io.druid.segment.Segment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(io.druid.segment.QueryableIndex) SegmentDescriptor(io.druid.query.SegmentDescriptor) ThreadRenamingRunnable(io.druid.common.guava.ThreadRenamingRunnable) List(java.util.List) FireHydrant(io.druid.segment.realtime.FireHydrant) ThreadRenamingRunnable(io.druid.common.guava.ThreadRenamingRunnable) File(java.io.File) Interval(org.joda.time.Interval)

Example 22 with QueryableIndex

use of io.druid.segment.QueryableIndex in project druid by druid-io.

the class RealtimePlumber method bootstrapSinksFromDisk.

protected Object bootstrapSinksFromDisk() {
    final VersioningPolicy versioningPolicy = config.getVersioningPolicy();
    File baseDir = computeBaseDir(schema);
    if (baseDir == null || !baseDir.exists()) {
        return null;
    }
    File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    }
    Object metadata = null;
    long latestCommitTime = 0;
    for (File sinkDir : files) {
        final Interval sinkInterval = new Interval(sinkDir.getName().replace("_", "/"));
        //final File[] sinkFiles = sinkDir.listFiles();
        // To avoid reading and listing of "merged" dir
        final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {

            @Override
            public boolean accept(File dir, String fileName) {
                return !(Ints.tryParse(fileName) == null);
            }
        });
        Arrays.sort(sinkFiles, new Comparator<File>() {

            @Override
            public int compare(File o1, File o2) {
                try {
                    return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
                } catch (NumberFormatException e) {
                    log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2);
                    return o1.compareTo(o2);
                }
            }
        });
        boolean isCorrupted = false;
        List<FireHydrant> hydrants = Lists.newArrayList();
        for (File segmentDir : sinkFiles) {
            log.info("Loading previously persisted segment at [%s]", segmentDir);
            // If 100% sure that this is not needed, this check can be removed.
            if (Ints.tryParse(segmentDir.getName()) == null) {
                continue;
            }
            QueryableIndex queryableIndex = null;
            try {
                queryableIndex = indexIO.loadIndex(segmentDir);
            } catch (IOException e) {
                log.error(e, "Problem loading segmentDir from disk.");
                isCorrupted = true;
            }
            if (isCorrupted) {
                try {
                    File corruptSegmentDir = computeCorruptedFileDumpDir(segmentDir, schema);
                    log.info("Renaming %s to %s", segmentDir.getAbsolutePath(), corruptSegmentDir.getAbsolutePath());
                    FileUtils.copyDirectory(segmentDir, corruptSegmentDir);
                    FileUtils.deleteDirectory(segmentDir);
                } catch (Exception e1) {
                    log.error(e1, "Failed to rename %s", segmentDir.getAbsolutePath());
                }
                //at some point.
                continue;
            }
            Metadata segmentMetadata = queryableIndex.getMetadata();
            if (segmentMetadata != null) {
                Object timestampObj = segmentMetadata.get(COMMIT_METADATA_TIMESTAMP_KEY);
                if (timestampObj != null) {
                    long timestamp = ((Long) timestampObj).longValue();
                    if (timestamp > latestCommitTime) {
                        log.info("Found metaData [%s] with latestCommitTime [%s] greater than previous recorded [%s]", queryableIndex.getMetadata(), timestamp, latestCommitTime);
                        latestCommitTime = timestamp;
                        metadata = queryableIndex.getMetadata().get(COMMIT_METADATA_KEY);
                    }
                }
            }
            hydrants.add(new FireHydrant(new QueryableIndexSegment(DataSegment.makeDataSegmentIdentifier(schema.getDataSource(), sinkInterval.getStart(), sinkInterval.getEnd(), versioningPolicy.getVersion(sinkInterval), config.getShardSpec()), queryableIndex), Integer.parseInt(segmentDir.getName())));
        }
        if (hydrants.isEmpty()) {
            // Probably encountered a corrupt sink directory
            log.warn("Found persisted segment directory with no intermediate segments present at %s, skipping sink creation.", sinkDir.getAbsolutePath());
            continue;
        }
        final Sink currSink = new Sink(sinkInterval, schema, config.getShardSpec(), versioningPolicy.getVersion(sinkInterval), config.getMaxRowsInMemory(), config.isReportParseExceptions(), hydrants);
        addSink(currSink);
    }
    return metadata;
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) Metadata(io.druid.segment.Metadata) IOException(java.io.IOException) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) FilenameFilter(java.io.FilenameFilter) QueryableIndex(io.druid.segment.QueryableIndex) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File) Interval(org.joda.time.Interval)

Example 23 with QueryableIndex

use of io.druid.segment.QueryableIndex in project druid by druid-io.

the class SpatialFilterBonusTest method constructorFeeder.

@Parameterized.Parameters
public static Collection<?> constructorFeeder() throws IOException {
    final IndexSpec indexSpec = new IndexSpec();
    final IncrementalIndex rtIndex = makeIncrementalIndex();
    final QueryableIndex mMappedTestIndex = makeQueryableIndex(indexSpec);
    final QueryableIndex mergedRealtimeIndex = makeMergedQueryableIndex(indexSpec);
    return Arrays.asList(new Object[][] { { new IncrementalIndexSegment(rtIndex, null) }, { new QueryableIndexSegment(null, mMappedTestIndex) }, { new QueryableIndexSegment(null, mergedRealtimeIndex) } });
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) IndexSpec(io.druid.segment.IndexSpec) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) QueryableIndex(io.druid.segment.QueryableIndex)

Example 24 with QueryableIndex

use of io.druid.segment.QueryableIndex in project druid by druid-io.

the class SpatialFilterTest method makeMergedQueryableIndex.

private static QueryableIndex makeMergedQueryableIndex(IndexSpec indexSpec) {
    try {
        IncrementalIndex first = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, 1000);
        IncrementalIndex second = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, 1000);
        IncrementalIndex third = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, NUM_POINTS);
        first.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "foo", "lat", 0.0f, "long", 0.0f, "val", 17L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-02").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-02").toString(), "dim", "foo", "lat", 1.0f, "long", 3.0f, "val", 29L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-03").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-03").toString(), "dim", "foo", "lat", 4.0f, "long", 2.0f, "val", 13L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", "_mmx.unknown", "long", "_mmx.unknown", "val", 101L)));
        first.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "dim.geo", "_mmx.unknown", "val", 501L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-04").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-04").toString(), "dim", "foo", "lat", 7.0f, "long", 3.0f, "val", 91L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", 8.0f, "long", 6.0f, "val", 47L)));
        second.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "lat2", 0.0f, "long2", 0.0f, "val", 13L)));
        // Add a bunch of random points
        Random rand = new Random();
        for (int i = 8; i < NUM_POINTS; i++) {
            third.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "boo", "lat", (float) (rand.nextFloat() * 10 + 10.0), "long", (float) (rand.nextFloat() * 10 + 10.0), "val", i)));
        }
        File tmpFile = File.createTempFile("yay", "who");
        tmpFile.delete();
        File firstFile = new File(tmpFile, "first");
        File secondFile = new File(tmpFile, "second");
        File thirdFile = new File(tmpFile, "third");
        File mergedFile = new File(tmpFile, "merged");
        firstFile.mkdirs();
        firstFile.deleteOnExit();
        secondFile.mkdirs();
        secondFile.deleteOnExit();
        thirdFile.mkdirs();
        thirdFile.deleteOnExit();
        mergedFile.mkdirs();
        mergedFile.deleteOnExit();
        INDEX_MERGER.persist(first, DATA_INTERVAL, firstFile, indexSpec);
        INDEX_MERGER.persist(second, DATA_INTERVAL, secondFile, indexSpec);
        INDEX_MERGER.persist(third, DATA_INTERVAL, thirdFile, indexSpec);
        QueryableIndex mergedRealtime = INDEX_IO.loadIndex(INDEX_MERGER.mergeQueryableIndex(Arrays.asList(INDEX_IO.loadIndex(firstFile), INDEX_IO.loadIndex(secondFile), INDEX_IO.loadIndex(thirdFile)), true, METRIC_AGGS, mergedFile, indexSpec));
        return mergedRealtime;
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) Random(java.util.Random) QueryableIndex(io.druid.segment.QueryableIndex) SpatialDimensionSchema(io.druid.data.input.impl.SpatialDimensionSchema) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File)

Example 25 with QueryableIndex

use of io.druid.segment.QueryableIndex in project druid by druid-io.

the class SpatialFilterTest method constructorFeeder.

@Parameterized.Parameters
public static Collection<?> constructorFeeder() throws IOException {
    final IndexSpec indexSpec = new IndexSpec();
    final IncrementalIndex rtIndex = makeIncrementalIndex();
    final QueryableIndex mMappedTestIndex = makeQueryableIndex(indexSpec);
    final QueryableIndex mergedRealtimeIndex = makeMergedQueryableIndex(indexSpec);
    return Arrays.asList(new Object[][] { { new IncrementalIndexSegment(rtIndex, null) }, { new QueryableIndexSegment(null, mMappedTestIndex) }, { new QueryableIndexSegment(null, mergedRealtimeIndex) } });
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) IndexSpec(io.druid.segment.IndexSpec) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) QueryableIndex(io.druid.segment.QueryableIndex)

Aggregations

QueryableIndex (io.druid.segment.QueryableIndex)35 File (java.io.File)23 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)16 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)12 InputRow (io.druid.data.input.InputRow)10 IndexSpec (io.druid.segment.IndexSpec)10 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)8 IOException (java.io.IOException)8 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)7 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)7 Setup (org.openjdk.jmh.annotations.Setup)7 FireHydrant (io.druid.segment.realtime.FireHydrant)6 DataSegment (io.druid.timeline.DataSegment)6 QueryableIndexStorageAdapter (io.druid.segment.QueryableIndexStorageAdapter)5 StorageAdapter (io.druid.segment.StorageAdapter)5 DateTime (org.joda.time.DateTime)5 Interval (org.joda.time.Interval)5 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)4 IndexSizeExceededException (io.druid.segment.incremental.IndexSizeExceededException)4 ImmutableList (com.google.common.collect.ImmutableList)3