Search in sources :

Example 6 with Sink

use of org.apache.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class StreamAppenderator method add.

@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
    throwPersistErrorIfExists();
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    }
    final Sink sink = getOrCreateSink(identifier);
    metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
    final long bytesInMemoryAfterAdd;
    final IncrementalIndexAddResult addResult;
    try {
        addResult = sink.add(row, !allowIncrementalPersists);
        sinkRowsInMemoryAfterAdd = addResult.getRowCount();
        bytesInMemoryAfterAdd = addResult.getBytesInMemory();
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    }
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    }
    if (addResult.isRowAdded()) {
        rowIngestionMeters.incrementProcessed();
    } else if (addResult.hasParseException()) {
        parseExceptionHandler.handle(addResult.getParseException());
    }
    final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
    rowsCurrentlyInMemory.addAndGet(numAddedRows);
    bytesCurrentlyInMemory.addAndGet(bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
    totalRows.addAndGet(numAddedRows);
    boolean isPersistRequired = false;
    boolean persist = false;
    List<String> persistReasons = new ArrayList<>();
    if (!sink.canAppendRow()) {
        persist = true;
        persistReasons.add("No more rows can be appended to sink");
    }
    if (System.currentTimeMillis() > nextFlush) {
        persist = true;
        persistReasons.add(StringUtils.format("current time[%d] is greater than nextFlush[%d]", System.currentTimeMillis(), nextFlush));
    }
    if (rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
        persist = true;
        persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory.get(), tuningConfig.getMaxRowsInMemory()));
    }
    if (bytesCurrentlyInMemory.get() >= maxBytesTuningConfig) {
        persist = true;
        persistReasons.add(StringUtils.format("(estimated) bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory.get(), maxBytesTuningConfig));
    }
    if (persist) {
        if (allowIncrementalPersists) {
            // persistAll clears rowsCurrentlyInMemory, no need to update it.
            log.info("Flushing in-memory data to disk because %s.", String.join(",", persistReasons));
            long bytesToBePersisted = 0L;
            for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
                final Sink sinkEntry = entry.getValue();
                if (sinkEntry != null) {
                    bytesToBePersisted += sinkEntry.getBytesInMemory();
                    if (sinkEntry.swappable()) {
                        // After swapping the sink, we use memory mapped segment instead (but only for real time appenderators!).
                        // However, the memory mapped segment still consumes memory.
                        // These memory mapped segments are held in memory throughout the ingestion phase and permanently add to the bytesCurrentlyInMemory
                        int memoryStillInUse = calculateMMappedHydrantMemoryInUsed(sink.getCurrHydrant());
                        bytesCurrentlyInMemory.addAndGet(memoryStillInUse);
                    }
                }
            }
            if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory.get() - bytesToBePersisted > maxBytesTuningConfig) {
                // We are still over maxBytesTuningConfig even after persisting.
                // This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
                final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory.get(), bytesToBePersisted, maxBytesTuningConfig);
                final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
                log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
                throw new RuntimeException(errorMessage);
            }
            Futures.addCallback(persistAll(committerSupplier == null ? null : committerSupplier.get()), new FutureCallback<Object>() {

                @Override
                public void onSuccess(@Nullable Object result) {
                // do nothing
                }

                @Override
                public void onFailure(Throwable t) {
                    persistError = t;
                }
            });
        } else {
            isPersistRequired = true;
        }
    }
    return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired);
}
Also used : ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) Iterables(com.google.common.collect.Iterables) Sink(org.apache.druid.segment.realtime.plumber.Sink) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 7 with Sink

use of org.apache.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class StreamAppenderator method closeNow.

/**
 * Unannounce the segments and wait for outstanding persists to finish.
 * Do not unlock base persist dir as we are not waiting for push executor to shut down
 * relying on current JVM to shutdown to not cause any locking problem if the task is restored.
 * In case when task is restored and current task is still active because of push executor (which it shouldn't be
 * since push executor starts daemon threads) then the locking should fail and new task should fail to start.
 * This also means that this method should only be called when task is shutting down.
 */
@Override
public void closeNow() {
    if (!closed.compareAndSet(false, true)) {
        log.debug("Appenderator already closed, skipping closeNow() call.");
        return;
    }
    log.debug("Shutting down immediately...");
    for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
        try {
            segmentAnnouncer.unannounceSegment(entry.getValue().getSegment());
        } catch (Exception e) {
            log.makeAlert(e, "Failed to unannounce segment[%s]", schema.getDataSource()).addData("identifier", entry.getKey().toString()).emit();
        }
    }
    try {
        shutdownExecutors();
        // We don't wait for pushExecutor to be terminated. See Javadoc for more details.
        Preconditions.checkState(persistExecutor == null || persistExecutor.awaitTermination(365, TimeUnit.DAYS), "persistExecutor not terminated");
        Preconditions.checkState(intermediateTempExecutor == null || intermediateTempExecutor.awaitTermination(365, TimeUnit.DAYS), "intermediateTempExecutor not terminated");
        persistExecutor = null;
        intermediateTempExecutor = null;
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new ISE("Failed to shutdown executors during close()");
    }
}
Also used : Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 8 with Sink

use of org.apache.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class StreamAppenderator method bootstrapSinksFromDisk.

/**
 * Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
 *
 * @return persisted commit metadata
 */
private Object bootstrapSinksFromDisk() {
    Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
    final File baseDir = tuningConfig.getBasePersistDirectory();
    if (!baseDir.exists()) {
        return null;
    }
    final File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    }
    final Committed committed;
    File commitFile = null;
    try {
        commitLock.lock();
        commitFile = computeCommitFile();
        if (commitFile.exists()) {
            committed = objectMapper.readValue(commitFile, Committed.class);
        } else {
            committed = Committed.nil();
        }
    } catch (Exception e) {
        throw new ISE(e, "Failed to read commitFile: %s", commitFile);
    } finally {
        commitLock.unlock();
    }
    int rowsSoFar = 0;
    if (committed.equals(Committed.nil())) {
        log.debug("No previously committed metadata.");
    } else {
        log.info("Loading partially-persisted segments[%s] from[%s] with commit metadata: %s", String.join(", ", committed.getHydrants().keySet()), baseDir, committed.getMetadata());
    }
    for (File sinkDir : files) {
        final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
        if (!identifierFile.isFile()) {
            // No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
            continue;
        }
        try {
            final SegmentIdWithShardSpec identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdWithShardSpec.class);
            final int committedHydrants = committed.getCommittedHydrants(identifier.toString());
            if (committedHydrants <= 0) {
                log.info("Removing uncommitted segment at [%s].", sinkDir);
                FileUtils.deleteDirectory(sinkDir);
                continue;
            }
            // To avoid reading and listing of "merged" dir and other special files
            final File[] sinkFiles = sinkDir.listFiles((dir, fileName) -> !(Ints.tryParse(fileName) == null));
            Arrays.sort(sinkFiles, (o1, o2) -> Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())));
            List<FireHydrant> hydrants = new ArrayList<>();
            for (File hydrantDir : sinkFiles) {
                final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
                if (hydrantNumber >= committedHydrants) {
                    log.info("Removing uncommitted partial segment at [%s]", hydrantDir);
                    FileUtils.deleteDirectory(hydrantDir);
                } else {
                    log.debug("Loading previously persisted partial segment at [%s]", hydrantDir);
                    if (hydrantNumber != hydrants.size()) {
                        throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
                    }
                    hydrants.add(new FireHydrant(new QueryableIndexSegment(indexIO.loadIndex(hydrantDir), identifier.asSegmentId()), hydrantNumber));
                }
            }
            // Make sure we loaded enough hydrants.
            if (committedHydrants != hydrants.size()) {
                throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
            }
            Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, null, hydrants);
            rowsSoFar += currSink.getNumRows();
            sinks.put(identifier, currSink);
            sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
            segmentAnnouncer.announceSegment(currSink.getSegment());
        } catch (IOException e) {
            log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
        }
    }
    // Make sure we loaded all committed sinks.
    final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), SegmentIdWithShardSpec::toString));
    final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
    if (!missingSinks.isEmpty()) {
        throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
    }
    totalRows.set(rowsSoFar);
    return committed.getMetadata();
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 9 with Sink

use of org.apache.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class AppenderatorImpl method persistAll.

@Override
public ListenableFuture<Object> persistAll(@Nullable final Committer committer) {
    throwPersistErrorIfExists();
    final Map<String, Integer> currentHydrants = new HashMap<>();
    final List<Pair<FireHydrant, SegmentIdWithShardSpec>> indexesToPersist = new ArrayList<>();
    int numPersistedRows = 0;
    long bytesPersisted = 0L;
    MutableLong totalHydrantsCount = new MutableLong();
    MutableLong totalHydrantsPersisted = new MutableLong();
    final long totalSinks = sinks.size();
    for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
        final SegmentIdWithShardSpec identifier = entry.getKey();
        final Sink sink = entry.getValue();
        if (sink == null) {
            throw new ISE("No sink for identifier: %s", identifier);
        }
        final List<FireHydrant> hydrants = Lists.newArrayList(sink);
        totalHydrantsCount.add(hydrants.size());
        currentHydrants.put(identifier.toString(), hydrants.size());
        numPersistedRows += sink.getNumRowsInMemory();
        bytesPersisted += sink.getBytesInMemory();
        final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
        // gather hydrants that have not been persisted:
        for (FireHydrant hydrant : hydrants.subList(0, limit)) {
            if (!hydrant.hasSwapped()) {
                log.debug("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
                indexesToPersist.add(Pair.of(hydrant, identifier));
                totalHydrantsPersisted.add(1);
            }
        }
        if (sink.swappable()) {
            // It is swappable. Get the old one to persist it and create a new one:
            indexesToPersist.add(Pair.of(sink.swap(), identifier));
            totalHydrantsPersisted.add(1);
        }
    }
    log.debug("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
    final Object commitMetadata = committer == null ? null : committer.getMetadata();
    final Stopwatch runExecStopwatch = Stopwatch.createStarted();
    final Stopwatch persistStopwatch = Stopwatch.createStarted();
    AtomicLong totalPersistedRows = new AtomicLong(numPersistedRows);
    final ListenableFuture<Object> future = persistExecutor.submit(new Callable<Object>() {

        @Override
        public Object call() throws IOException {
            try {
                for (Pair<FireHydrant, SegmentIdWithShardSpec> pair : indexesToPersist) {
                    metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
                }
                if (committer != null) {
                    log.debug("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(currentHydrants.entrySet().stream().map(entry -> StringUtils.format("%s:%d", entry.getKey(), entry.getValue())).collect(Collectors.toList())));
                    committer.run();
                    try {
                        commitLock.lock();
                        final Map<String, Integer> commitHydrants = new HashMap<>();
                        final Committed oldCommit = readCommit();
                        if (oldCommit != null) {
                            // merge current hydrants with existing hydrants
                            commitHydrants.putAll(oldCommit.getHydrants());
                        }
                        commitHydrants.putAll(currentHydrants);
                        writeCommit(new Committed(commitHydrants, commitMetadata));
                    } finally {
                        commitLock.unlock();
                    }
                }
                log.info("Flushed in-memory data with commit metadata [%s] for segments: %s", commitMetadata, indexesToPersist.stream().map(itp -> itp.rhs.asSegmentId().toString()).distinct().collect(Collectors.joining(", ")));
                log.info("Persisted stats: processed rows: [%d], persisted rows[%d], sinks: [%d], total fireHydrants (across sinks): [%d], persisted fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), totalPersistedRows.get(), totalSinks, totalHydrantsCount.longValue(), totalHydrantsPersisted.longValue());
                // return null if committer is null
                return commitMetadata;
            } catch (IOException e) {
                metrics.incrementFailedPersists();
                throw e;
            } finally {
                metrics.incrementNumPersists();
                metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
                persistStopwatch.stop();
            }
        }
    });
    final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
    metrics.incrementPersistBackPressureMillis(startDelay);
    if (startDelay > WARN_DELAY) {
        log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
    }
    runExecStopwatch.stop();
    resetNextFlush();
    // NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
    rowsCurrentlyInMemory.addAndGet(-numPersistedRows);
    bytesCurrentlyInMemory.addAndGet(-bytesPersisted);
    log.info("Persisted rows[%,d] and (estimated) bytes[%,d]", numPersistedRows, bytesPersisted);
    return future;
}
Also used : IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Stopwatch(com.google.common.base.Stopwatch) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) Pair(org.apache.druid.java.util.common.Pair) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MutableLong(org.apache.commons.lang.mutable.MutableLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 10 with Sink

use of org.apache.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class AppenderatorImpl method close.

@Override
public void close() {
    if (!closed.compareAndSet(false, true)) {
        log.debug("Appenderator already closed, skipping close() call.");
        return;
    }
    log.debug("Shutting down...");
    final List<ListenableFuture<?>> futures = new ArrayList<>();
    for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
        futures.add(abandonSegment(entry.getKey(), entry.getValue(), false));
    }
    try {
        Futures.allAsList(futures).get();
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        log.warn(e, "Interrupted during close()");
    } catch (ExecutionException e) {
        log.warn(e, "Unable to abandon existing segments during close()");
    }
    try {
        shutdownExecutors();
        Preconditions.checkState(persistExecutor == null || persistExecutor.awaitTermination(365, TimeUnit.DAYS), "persistExecutor not terminated");
        Preconditions.checkState(pushExecutor == null || pushExecutor.awaitTermination(365, TimeUnit.DAYS), "pushExecutor not terminated");
        Preconditions.checkState(intermediateTempExecutor == null || intermediateTempExecutor.awaitTermination(365, TimeUnit.DAYS), "intermediateTempExecutor not terminated");
        persistExecutor = null;
        pushExecutor = null;
        intermediateTempExecutor = null;
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new ISE("Failed to shutdown executors during close()");
    }
    // Only unlock if executors actually shut down.
    unlockBasePersistDirectory();
}
Also used : Sink(org.apache.druid.segment.realtime.plumber.Sink) ArrayList(java.util.ArrayList) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ISE(org.apache.druid.java.util.common.ISE) ExecutionException(java.util.concurrent.ExecutionException) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Aggregations

Sink (org.apache.druid.segment.realtime.plumber.Sink)20 ISE (org.apache.druid.java.util.common.ISE)14 ArrayList (java.util.ArrayList)13 IOException (java.io.IOException)12 HashMap (java.util.HashMap)12 Map (java.util.Map)12 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)12 ConcurrentMap (java.util.concurrent.ConcurrentMap)10 IndexSizeExceededException (org.apache.druid.segment.incremental.IndexSizeExceededException)10 FireHydrant (org.apache.druid.segment.realtime.FireHydrant)10 ExecutionException (java.util.concurrent.ExecutionException)8 Iterables (com.google.common.collect.Iterables)7 File (java.io.File)7 Pair (org.apache.druid.java.util.common.Pair)7 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)7 Stopwatch (com.google.common.base.Stopwatch)6 IAE (org.apache.druid.java.util.common.IAE)6 IncrementalIndexAddResult (org.apache.druid.segment.incremental.IncrementalIndexAddResult)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)5