Search in sources :

Example 1 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class AppenderatorImpl method bootstrapSinksFromDisk.

/**
   * Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
   *
   * @return persisted commit metadata
   */
private Object bootstrapSinksFromDisk() {
    Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
    final File baseDir = tuningConfig.getBasePersistDirectory();
    if (!baseDir.exists()) {
        return null;
    }
    final File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    }
    final File commitFile = computeCommitFile();
    final Committed committed;
    try {
        if (commitFile.exists()) {
            committed = objectMapper.readValue(commitFile, Committed.class);
        } else {
            committed = Committed.nil();
        }
    } catch (Exception e) {
        throw new ISE(e, "Failed to read commitFile: %s", commitFile);
    }
    log.info("Loading sinks from[%s]: %s", baseDir, committed.getHydrants().keySet());
    for (File sinkDir : files) {
        final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
        if (!identifierFile.isFile()) {
            // No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
            continue;
        }
        try {
            final SegmentIdentifier identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdentifier.class);
            final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString());
            if (committedHydrants <= 0) {
                log.info("Removing uncommitted sink at [%s]", sinkDir);
                FileUtils.deleteDirectory(sinkDir);
                continue;
            }
            // To avoid reading and listing of "merged" dir and other special files
            final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {

                @Override
                public boolean accept(File dir, String fileName) {
                    return !(Ints.tryParse(fileName) == null);
                }
            });
            Arrays.sort(sinkFiles, new Comparator<File>() {

                @Override
                public int compare(File o1, File o2) {
                    return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
                }
            });
            List<FireHydrant> hydrants = Lists.newArrayList();
            for (File hydrantDir : sinkFiles) {
                final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
                if (hydrantNumber >= committedHydrants) {
                    log.info("Removing uncommitted segment at [%s]", hydrantDir);
                    FileUtils.deleteDirectory(hydrantDir);
                } else {
                    log.info("Loading previously persisted segment at [%s]", hydrantDir);
                    if (hydrantNumber != hydrants.size()) {
                        throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
                    }
                    hydrants.add(new FireHydrant(new QueryableIndexSegment(identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir)), hydrantNumber));
                }
            }
            // Make sure we loaded enough hydrants.
            if (committedHydrants != hydrants.size()) {
                throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
            }
            Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions(), hydrants);
            sinks.put(identifier, currSink);
            sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
            segmentAnnouncer.announceSegment(currSink.getSegment());
        } catch (IOException e) {
            log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
        }
    }
    // Make sure we loaded all committed sinks.
    final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), new Function<SegmentIdentifier, String>() {

        @Override
        public String apply(SegmentIdentifier input) {
            return input.getIdentifierAsString();
        }
    }));
    final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
    if (!missingSinks.isEmpty()) {
        throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
    }
    return committed.getMetadata();
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) IOException(java.io.IOException) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) FilenameFilter(java.io.FilenameFilter) Function(com.google.common.base.Function) Sink(io.druid.segment.realtime.plumber.Sink) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 2 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class AppenderatorImpl method mergeAndPush.

/**
   * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
   * be run in the single-threaded pushExecutor.
   *
   * @param identifier sink identifier
   * @param sink       sink to push
   *
   * @return segment descriptor, or null if the sink is no longer valid
   */
private DataSegment mergeAndPush(final SegmentIdentifier identifier, final Sink sink) {
    // Bail out if this sink is null or otherwise not what we expect.
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    }
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("WTF?! Expected sink to be no longer writable before mergeAndPush. Segment[%s].", identifier);
        }
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("WTF?! Expected sink to be fully persisted before mergeAndPush. Segment[%s].", identifier);
            }
        }
    }
    try {
        if (descriptorFile.exists()) {
            // Already pushed.
            log.info("Segment[%s] already pushed.", identifier);
            return objectMapper.readValue(descriptorFile, DataSegment.class);
        }
        log.info("Pushing merged index for segment[%s].", identifier);
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        List<QueryableIndex> indexes = Lists.newArrayList();
        for (FireHydrant fireHydrant : sink) {
            Segment segment = fireHydrant.getSegment();
            final QueryableIndex queryableIndex = segment.asQueryableIndex();
            log.info("Adding hydrant[%s]", fireHydrant);
            indexes.add(queryableIndex);
        }
        final File mergedFile;
        mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, tuningConfig.getIndexSpec());
        QueryableIndex index = indexIO.loadIndex(mergedFile);
        DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
        objectMapper.writeValue(descriptorFile, segment);
        log.info("Pushed merged index for segment[%s], descriptor is: %s", identifier, segment);
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw Throwables.propagate(e);
    }
}
Also used : QueryableIndex(io.druid.segment.QueryableIndex) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File) DataSegment(io.druid.timeline.DataSegment) DataSegment(io.druid.timeline.DataSegment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) Segment(io.druid.segment.Segment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 3 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class AppenderatorImpl method persistAll.

@Override
public ListenableFuture<Object> persistAll(final Committer committer) {
    // Submit persistAll task to the persistExecutor
    final Map<SegmentIdentifier, Integer> commitHydrants = Maps.newHashMap();
    final List<Pair<FireHydrant, SegmentIdentifier>> indexesToPersist = Lists.newArrayList();
    final Set<SegmentIdentifier> identifiers = sinks.keySet();
    for (SegmentIdentifier identifier : identifiers) {
        final Sink sink = sinks.get(identifier);
        final List<FireHydrant> hydrants = Lists.newArrayList(sink);
        commitHydrants.put(identifier, hydrants.size());
        final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
        for (FireHydrant hydrant : hydrants.subList(0, limit)) {
            if (!hydrant.hasSwapped()) {
                log.info("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
                indexesToPersist.add(Pair.of(hydrant, identifier));
            }
        }
        if (sink.swappable()) {
            indexesToPersist.add(Pair.of(sink.swap(), identifier));
        }
    }
    log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
    final String threadName = String.format("%s-incremental-persist", schema.getDataSource());
    final Object commitMetadata = committer.getMetadata();
    final Stopwatch runExecStopwatch = Stopwatch.createStarted();
    final Stopwatch persistStopwatch = Stopwatch.createStarted();
    final ListenableFuture<Object> future = persistExecutor.submit(new ThreadRenamingCallable<Object>(threadName) {

        @Override
        public Object doCall() {
            try {
                for (Pair<FireHydrant, SegmentIdentifier> pair : indexesToPersist) {
                    metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
                }
                log.info("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(Iterables.transform(commitHydrants.entrySet(), new Function<Map.Entry<SegmentIdentifier, Integer>, String>() {

                    @Override
                    public String apply(Map.Entry<SegmentIdentifier, Integer> entry) {
                        return String.format("%s:%d", entry.getKey().getIdentifierAsString(), entry.getValue());
                    }
                })));
                committer.run();
                objectMapper.writeValue(computeCommitFile(), Committed.create(commitHydrants, commitMetadata));
                return commitMetadata;
            } catch (Exception e) {
                metrics.incrementFailedPersists();
                throw Throwables.propagate(e);
            } finally {
                metrics.incrementNumPersists();
                metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
                persistStopwatch.stop();
            }
        }
    });
    final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
    metrics.incrementPersistBackPressureMillis(startDelay);
    if (startDelay > WARN_DELAY) {
        log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
    }
    runExecStopwatch.stop();
    resetNextFlush();
    // NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
    rowsCurrentlyInMemory.set(0);
    return future;
}
Also used : Stopwatch(com.google.common.base.Stopwatch) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Function(com.google.common.base.Function) Sink(io.druid.segment.realtime.plumber.Sink) FireHydrant(io.druid.segment.realtime.FireHydrant) Map(java.util.Map) Pair(io.druid.java.util.common.Pair)

Example 4 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class RealtimePlumber method abandonSegment.

/**
   * Unannounces a given sink and removes all local references to it. It is important that this is only called
   * from the single-threaded mergeExecutor, since otherwise chaos may ensue if merged segments are deleted while
   * being created.
   *
   * @param truncatedTime sink key
   * @param sink          sink to unannounce
   */
protected void abandonSegment(final long truncatedTime, final Sink sink) {
    if (sinks.containsKey(truncatedTime)) {
        try {
            segmentAnnouncer.unannounceSegment(sink.getSegment());
            removeSegment(sink, computePersistDir(schema, sink.getInterval()));
            log.info("Removing sinkKey %d for segment %s", truncatedTime, sink.getSegment().getIdentifier());
            sinks.remove(truncatedTime);
            metrics.setSinkCount(sinks.size());
            sinkTimeline.remove(sink.getInterval(), sink.getVersion(), new SingleElementPartitionChunk<>(sink));
            for (FireHydrant hydrant : sink) {
                cache.close(SinkQuerySegmentWalker.makeHydrantCacheIdentifier(hydrant));
            }
            synchronized (handoffCondition) {
                handoffCondition.notifyAll();
            }
        } catch (Exception e) {
            log.makeAlert(e, "Unable to abandon old segment for dataSource[%s]", schema.getDataSource()).addData("interval", sink.getInterval()).emit();
        }
    }
}
Also used : FireHydrant(io.druid.segment.realtime.FireHydrant) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException)

Example 5 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class RealtimePlumberSchoolTest method testDimOrderInheritanceHelper.

private void testDimOrderInheritanceHelper(final Object commitMetadata) throws Exception {
    List<List<String>> expectedDims = ImmutableList.<List<String>>of(ImmutableList.of("dimD"), ImmutableList.of("dimC"), ImmutableList.of("dimA"), ImmutableList.of("dimB"), ImmutableList.of("dimE"), ImmutableList.of("dimD", "dimC", "dimA", "dimB", "dimE"));
    QueryableIndex qindex;
    FireHydrant hydrant;
    Map<Long, Sink> sinks;
    RealtimePlumber plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    Assert.assertNull(plumber.startJob());
    final CountDownLatch doneSignal = new CountDownLatch(1);
    final Committer committer = new Committer() {

        @Override
        public Object getMetadata() {
            return commitMetadata;
        }

        @Override
        public void run() {
            doneSignal.countDown();
        }
    };
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimD"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimC"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimA"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimB"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimE"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimA", "dimB", "dimC", "dimD", "dimE"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.persist(committer);
    doneSignal.await();
    plumber.getSinks().clear();
    plumber.finishJob();
    RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    restoredPlumber.bootstrapSinksFromDisk();
    sinks = restoredPlumber.getSinks();
    Assert.assertEquals(1, sinks.size());
    List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(0L));
    for (int i = 0; i < hydrants.size(); i++) {
        hydrant = hydrants.get(i);
        qindex = hydrant.getSegment().asQueryableIndex();
        Assert.assertEquals(i, hydrant.getCount());
        Assert.assertEquals(expectedDims.get(i), ImmutableList.copyOf(qindex.getAvailableDimensions()));
    }
}
Also used : QueryableIndex(io.druid.segment.QueryableIndex) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) FireHydrant(io.druid.segment.realtime.FireHydrant) Committer(io.druid.data.input.Committer) CountDownLatch(java.util.concurrent.CountDownLatch)

Aggregations

FireHydrant (io.druid.segment.realtime.FireHydrant)12 IndexSizeExceededException (io.druid.segment.incremental.IndexSizeExceededException)7 IOException (java.io.IOException)7 QueryableIndex (io.druid.segment.QueryableIndex)6 File (java.io.File)6 ISE (io.druid.java.util.common.ISE)4 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)4 List (java.util.List)4 Interval (org.joda.time.Interval)4 Function (com.google.common.base.Function)3 Committer (io.druid.data.input.Committer)3 Sink (io.druid.segment.realtime.plumber.Sink)3 ExecutionException (java.util.concurrent.ExecutionException)3 DateTime (org.joda.time.DateTime)3 Stopwatch (com.google.common.base.Stopwatch)2 ImmutableList (com.google.common.collect.ImmutableList)2 InputRow (io.druid.data.input.InputRow)2 Pair (io.druid.java.util.common.Pair)2 Query (io.druid.query.Query)2 SegmentDescriptor (io.druid.query.SegmentDescriptor)2