Search in sources :

Example 1 with Iterables

use of org.apache.flink.shaded.guava30.com.google.common.collect.Iterables in project cassandra by apache.

the class RangeStreamer method calculateRangesToFetchWithPreferredEndpoints.

/**
 * Get a map of all ranges and the source that will be cleaned up once this bootstrapped node is added for the given ranges.
 * For each range, the list should only contain a single source. This allows us to consistently migrate data without violating
 * consistency.
 */
public static EndpointsByReplica calculateRangesToFetchWithPreferredEndpoints(BiFunction<InetAddressAndPort, EndpointsForRange, EndpointsForRange> snitchGetSortedListByProximity, AbstractReplicationStrategy strat, ReplicaCollection<?> fetchRanges, boolean useStrictConsistency, TokenMetadata tmdBefore, TokenMetadata tmdAfter, String keyspace, Collection<SourceFilter> sourceFilters) {
    EndpointsByRange rangeAddresses = strat.getRangeAddresses(tmdBefore);
    InetAddressAndPort localAddress = FBUtilities.getBroadcastAddressAndPort();
    logger.debug("Keyspace: {}", keyspace);
    logger.debug("To fetch RN: {}", fetchRanges);
    logger.debug("Fetch ranges: {}", rangeAddresses);
    Predicate<Replica> testSourceFilters = and(sourceFilters);
    Function<EndpointsForRange, EndpointsForRange> sorted = endpoints -> snitchGetSortedListByProximity.apply(localAddress, endpoints);
    // This list of replicas is just candidates. With strict consistency it's going to be a narrow list.
    EndpointsByReplica.Builder rangesToFetchWithPreferredEndpoints = new EndpointsByReplica.Builder();
    for (Replica toFetch : fetchRanges) {
        // Replica that is sufficient to provide the data we need
        // With strict consistency and transient replication we may end up with multiple types
        // so this isn't used with strict consistency
        Predicate<Replica> isSufficient = r -> toFetch.isTransient() || r.isFull();
        logger.debug("To fetch {}", toFetch);
        for (Range<Token> range : rangeAddresses.keySet()) {
            if (!range.contains(toFetch.range()))
                continue;
            final EndpointsForRange oldEndpoints = sorted.apply(rangeAddresses.get(range));
            // Ultimately we populate this with whatever is going to be fetched from to satisfy toFetch
            // It could be multiple endpoints and we must fetch from all of them if they are there
            // With transient replication and strict consistency this is to get the full data from a full replica and
            // transient data from the transient replica losing data
            EndpointsForRange sources;
            // Due to CASSANDRA-5953 we can have a higher RF than we have endpoints.
            // So we need to be careful to only be strict when endpoints == RF
            boolean isStrictConsistencyApplicable = useStrictConsistency && (oldEndpoints.size() == strat.getReplicationFactor().allReplicas);
            if (isStrictConsistencyApplicable) {
                EndpointsForRange strictEndpoints;
                // Start with two sets of who replicates the range before and who replicates it after
                EndpointsForRange newEndpoints = strat.calculateNaturalReplicas(toFetch.range().right, tmdAfter);
                logger.debug("Old endpoints {}", oldEndpoints);
                logger.debug("New endpoints {}", newEndpoints);
                // Remove new endpoints from old endpoints based on address
                strictEndpoints = oldEndpoints.without(newEndpoints.endpoints());
                if (strictEndpoints.size() > 1)
                    throw new AssertionError("Expected <= 1 endpoint but found " + strictEndpoints);
                // required for strict consistency
                if (!all(strictEndpoints, testSourceFilters))
                    throw new IllegalStateException("Necessary replicas for strict consistency were removed by source filters: " + buildErrorMessage(sourceFilters, strictEndpoints));
                // So it's an error if we don't find what we need.
                if (strictEndpoints.isEmpty() && toFetch.isTransient())
                    throw new AssertionError("If there are no endpoints to fetch from then we must be transitioning from transient to full for range " + toFetch);
                if (!any(strictEndpoints, isSufficient)) {
                    // need an additional replica; include all our filters, to ensure we include a matching node
                    Optional<Replica> fullReplica = Iterables.<Replica>tryFind(oldEndpoints, and(isSufficient, testSourceFilters)).toJavaUtil();
                    if (fullReplica.isPresent())
                        strictEndpoints = Endpoints.concat(strictEndpoints, EndpointsForRange.of(fullReplica.get()));
                    else
                        throw new IllegalStateException("Couldn't find any matching sufficient replica out of " + buildErrorMessage(sourceFilters, oldEndpoints));
                }
                sources = strictEndpoints;
            } else {
                // Without strict consistency we have given up on correctness so no point in fetching from
                // a random full + transient replica since it's also likely to lose data
                // Also apply testSourceFilters that were given to us so we can safely select a single source
                sources = sorted.apply(oldEndpoints.filter(and(isSufficient, testSourceFilters)));
                // Limit it to just the first possible source, we don't need more than one and downstream
                // will fetch from every source we supply
                sources = sources.size() > 0 ? sources.subList(0, 1) : sources;
            }
            // storing range and preferred endpoint set
            rangesToFetchWithPreferredEndpoints.putAll(toFetch, sources, Conflict.NONE);
            logger.debug("Endpoints to fetch for {} are {}", toFetch, sources);
        }
        EndpointsForRange addressList = rangesToFetchWithPreferredEndpoints.getIfPresent(toFetch);
        if (addressList == null)
            throw new IllegalStateException("Failed to find endpoints to fetch " + toFetch);
        /*
              * When we move forwards (shrink our bucket) we are the one losing a range and no one else loses
              * from that action (we also don't gain). When we move backwards there are two people losing a range. One is a full replica
              * and the other is a transient replica. So we must need fetch from two places in that case for the full range we gain.
              * For a transient range we only need to fetch from one.
              */
        if (useStrictConsistency && addressList.size() > 1 && (addressList.filter(Replica::isFull).size() > 1 || addressList.filter(Replica::isTransient).size() > 1))
            throw new IllegalStateException(String.format("Multiple strict sources found for %s, sources: %s", toFetch, addressList));
        // We must have enough stuff to fetch from
        if (!any(addressList, isSufficient)) {
            if (strat.getReplicationFactor().allReplicas == 1) {
                if (useStrictConsistency) {
                    logger.warn("A node required to move the data consistently is down");
                    throw new IllegalStateException("Unable to find sufficient sources for streaming range " + toFetch + " in keyspace " + keyspace + " with RF=1. " + "Ensure this keyspace contains replicas in the source datacenter.");
                } else
                    logger.warn("Unable to find sufficient sources for streaming range {} in keyspace {} with RF=1. " + "Keyspace might be missing data.", toFetch, keyspace);
            } else {
                if (useStrictConsistency)
                    logger.warn("A node required to move the data consistently is down");
                throw new IllegalStateException("Unable to find sufficient sources for streaming range " + toFetch + " in keyspace " + keyspace);
            }
        }
    }
    return rangesToFetchWithPreferredEndpoints.build();
}
Also used : BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) Iterables.all(com.google.common.collect.Iterables.all) StringUtils(org.apache.commons.lang3.StringUtils) Gossiper(org.apache.cassandra.gms.Gossiper) NetworkTopologyStrategy(org.apache.cassandra.locator.NetworkTopologyStrategy) Predicates.and(com.google.common.base.Predicates.and) StreamResultFuture(org.apache.cassandra.streaming.StreamResultFuture) Replica.fullReplica(org.apache.cassandra.locator.Replica.fullReplica) HashMultimap(com.google.common.collect.HashMultimap) Replicas(org.apache.cassandra.locator.Replicas) Endpoints(org.apache.cassandra.locator.Endpoints) Predicates.not(com.google.common.base.Predicates.not) ReplicaCollection(org.apache.cassandra.locator.ReplicaCollection) Map(java.util.Map) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) Keyspace(org.apache.cassandra.db.Keyspace) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) FBUtilities(org.apache.cassandra.utils.FBUtilities) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) RangesAtEndpoint(org.apache.cassandra.locator.RangesAtEndpoint) List(java.util.List) Predicate(com.google.common.base.Predicate) Conflict(org.apache.cassandra.locator.ReplicaCollection.Builder.Conflict) Optional(java.util.Optional) FailureDetector(org.apache.cassandra.gms.FailureDetector) Iterables.any(com.google.common.collect.Iterables.any) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) Iterables(com.google.common.collect.Iterables) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) SystemKeyspace(org.apache.cassandra.db.SystemKeyspace) ArrayList(java.util.ArrayList) IEndpointSnitch(org.apache.cassandra.locator.IEndpointSnitch) TokenMetadata(org.apache.cassandra.locator.TokenMetadata) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) StreamOperation(org.apache.cassandra.streaming.StreamOperation) Logger(org.slf4j.Logger) Replica(org.apache.cassandra.locator.Replica) IFailureDetector(org.apache.cassandra.gms.IFailureDetector) PreviewKind(org.apache.cassandra.streaming.PreviewKind) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) StreamPlan(org.apache.cassandra.streaming.StreamPlan) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LocalStrategy(org.apache.cassandra.locator.LocalStrategy) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) Replica.fullReplica(org.apache.cassandra.locator.Replica.fullReplica) Replica(org.apache.cassandra.locator.Replica) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange)

Example 2 with Iterables

use of org.apache.flink.shaded.guava30.com.google.common.collect.Iterables in project druid by druid-io.

the class BatchAppenderator method add.

@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
    throwPersistErrorIfExists();
    Preconditions.checkArgument(committerSupplier == null, "Batch appenderator does not need a committer!");
    Preconditions.checkArgument(allowIncrementalPersists, "Batch appenderator should always allow incremental persists!");
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    }
    final Sink sink = getOrCreateSink(identifier);
    metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
    final long bytesInMemoryAfterAdd;
    final IncrementalIndexAddResult addResult;
    try {
        // allow incrememtal persis is always true for batch
        addResult = sink.add(row, false);
        sinkRowsInMemoryAfterAdd = addResult.getRowCount();
        bytesInMemoryAfterAdd = addResult.getBytesInMemory();
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    }
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    }
    if (addResult.isRowAdded()) {
        rowIngestionMeters.incrementProcessed();
    } else if (addResult.hasParseException()) {
        parseExceptionHandler.handle(addResult.getParseException());
    }
    final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
    rowsCurrentlyInMemory += numAddedRows;
    bytesCurrentlyInMemory += (bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
    totalRows += numAddedRows;
    sinksMetadata.computeIfAbsent(identifier, unused -> new SinkMetadata()).addRows(numAddedRows);
    boolean persist = false;
    List<String> persistReasons = new ArrayList<>();
    if (!sink.canAppendRow()) {
        persist = true;
        persistReasons.add("No more rows can be appended to sink");
    }
    if (rowsCurrentlyInMemory >= tuningConfig.getMaxRowsInMemory()) {
        persist = true;
        persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory, tuningConfig.getMaxRowsInMemory()));
    }
    if (bytesCurrentlyInMemory >= maxBytesTuningConfig) {
        persist = true;
        persistReasons.add(StringUtils.format("bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory, maxBytesTuningConfig));
    }
    if (persist) {
        // persistAll clears rowsCurrentlyInMemory, no need to update it.
        log.info("Incremental persist to disk because %s.", String.join(",", persistReasons));
        long bytesToBePersisted = 0L;
        for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
            final Sink sinkEntry = entry.getValue();
            if (sinkEntry != null) {
                bytesToBePersisted += sinkEntry.getBytesInMemory();
                if (sinkEntry.swappable()) {
                    // Code for batch no longer memory maps hydrants, but they still take memory...
                    int memoryStillInUse = calculateMemoryUsedByHydrant();
                    bytesCurrentlyInMemory += memoryStillInUse;
                }
            }
        }
        if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory - bytesToBePersisted > maxBytesTuningConfig) {
            // We are still over maxBytesTuningConfig even after persisting.
            // This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
            final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory, bytesToBePersisted, maxBytesTuningConfig);
            final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
            log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
            throw new RuntimeException(errorMessage);
        }
        Futures.addCallback(persistAll(null), new FutureCallback<Object>() {

            @Override
            public void onSuccess(@Nullable Object result) {
            // do nothing
            }

            @Override
            public void onFailure(Throwable t) {
                persistError = t;
            }
        });
    }
    return new AppenderatorAddResult(identifier, sinksMetadata.get(identifier).numRowsInSegment, false);
}
Also used : Arrays(java.util.Arrays) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Pair(org.apache.druid.java.util.common.Pair) FileLock(java.nio.channels.FileLock) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) IAE(org.apache.druid.java.util.common.IAE) FileUtils(org.apache.druid.java.util.common.FileUtils) Function(com.google.common.base.Function) Execs(org.apache.druid.java.util.common.concurrent.Execs) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) Collection(java.util.Collection) QueryableIndex(org.apache.druid.segment.QueryableIndex) StandardOpenOption(java.nio.file.StandardOpenOption) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) InputRow(org.apache.druid.data.input.InputRow) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Stopwatch(com.google.common.base.Stopwatch) Supplier(com.google.common.base.Supplier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) ArrayList(java.util.ArrayList) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Sink(org.apache.druid.segment.realtime.plumber.Sink) RetryUtils(org.apache.druid.java.util.common.RetryUtils) Nullable(javax.annotation.Nullable) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) RE(org.apache.druid.java.util.common.RE) IndexMerger(org.apache.druid.segment.IndexMerger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) IOException(java.io.IOException) Ints(com.google.common.primitives.Ints) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) FutureCallback(com.google.common.util.concurrent.FutureCallback) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) Closeable(java.io.Closeable) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) IndexIO(org.apache.druid.segment.IndexIO) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) FileChannel(java.nio.channels.FileChannel) ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) Iterables(com.google.common.collect.Iterables) Sink(org.apache.druid.segment.realtime.plumber.Sink) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 3 with Iterables

use of org.apache.flink.shaded.guava30.com.google.common.collect.Iterables in project druid by druid-io.

the class StreamAppenderator method add.

@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
    throwPersistErrorIfExists();
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    }
    final Sink sink = getOrCreateSink(identifier);
    metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
    final long bytesInMemoryAfterAdd;
    final IncrementalIndexAddResult addResult;
    try {
        addResult = sink.add(row, !allowIncrementalPersists);
        sinkRowsInMemoryAfterAdd = addResult.getRowCount();
        bytesInMemoryAfterAdd = addResult.getBytesInMemory();
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    }
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    }
    if (addResult.isRowAdded()) {
        rowIngestionMeters.incrementProcessed();
    } else if (addResult.hasParseException()) {
        parseExceptionHandler.handle(addResult.getParseException());
    }
    final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
    rowsCurrentlyInMemory.addAndGet(numAddedRows);
    bytesCurrentlyInMemory.addAndGet(bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
    totalRows.addAndGet(numAddedRows);
    boolean isPersistRequired = false;
    boolean persist = false;
    List<String> persistReasons = new ArrayList<>();
    if (!sink.canAppendRow()) {
        persist = true;
        persistReasons.add("No more rows can be appended to sink");
    }
    if (System.currentTimeMillis() > nextFlush) {
        persist = true;
        persistReasons.add(StringUtils.format("current time[%d] is greater than nextFlush[%d]", System.currentTimeMillis(), nextFlush));
    }
    if (rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
        persist = true;
        persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory.get(), tuningConfig.getMaxRowsInMemory()));
    }
    if (bytesCurrentlyInMemory.get() >= maxBytesTuningConfig) {
        persist = true;
        persistReasons.add(StringUtils.format("(estimated) bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory.get(), maxBytesTuningConfig));
    }
    if (persist) {
        if (allowIncrementalPersists) {
            // persistAll clears rowsCurrentlyInMemory, no need to update it.
            log.info("Flushing in-memory data to disk because %s.", String.join(",", persistReasons));
            long bytesToBePersisted = 0L;
            for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
                final Sink sinkEntry = entry.getValue();
                if (sinkEntry != null) {
                    bytesToBePersisted += sinkEntry.getBytesInMemory();
                    if (sinkEntry.swappable()) {
                        // After swapping the sink, we use memory mapped segment instead (but only for real time appenderators!).
                        // However, the memory mapped segment still consumes memory.
                        // These memory mapped segments are held in memory throughout the ingestion phase and permanently add to the bytesCurrentlyInMemory
                        int memoryStillInUse = calculateMMappedHydrantMemoryInUsed(sink.getCurrHydrant());
                        bytesCurrentlyInMemory.addAndGet(memoryStillInUse);
                    }
                }
            }
            if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory.get() - bytesToBePersisted > maxBytesTuningConfig) {
                // We are still over maxBytesTuningConfig even after persisting.
                // This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
                final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory.get(), bytesToBePersisted, maxBytesTuningConfig);
                final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
                log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
                throw new RuntimeException(errorMessage);
            }
            Futures.addCallback(persistAll(committerSupplier == null ? null : committerSupplier.get()), new FutureCallback<Object>() {

                @Override
                public void onSuccess(@Nullable Object result) {
                // do nothing
                }

                @Override
                public void onFailure(Throwable t) {
                    persistError = t;
                }
            });
        } else {
            isPersistRequired = true;
        }
    }
    return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired);
}
Also used : ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) Iterables(com.google.common.collect.Iterables) Sink(org.apache.druid.segment.realtime.plumber.Sink) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 4 with Iterables

use of org.apache.flink.shaded.guava30.com.google.common.collect.Iterables in project druid by druid-io.

the class AppenderatorImpl method add.

@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
    throwPersistErrorIfExists();
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    }
    final Sink sink = getOrCreateSink(identifier);
    metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
    final long bytesInMemoryAfterAdd;
    final IncrementalIndexAddResult addResult;
    try {
        addResult = sink.add(row, !allowIncrementalPersists);
        sinkRowsInMemoryAfterAdd = addResult.getRowCount();
        bytesInMemoryAfterAdd = addResult.getBytesInMemory();
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    }
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    }
    if (addResult.isRowAdded()) {
        rowIngestionMeters.incrementProcessed();
    } else if (addResult.hasParseException()) {
        parseExceptionHandler.handle(addResult.getParseException());
    }
    final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
    rowsCurrentlyInMemory.addAndGet(numAddedRows);
    bytesCurrentlyInMemory.addAndGet(bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
    totalRows.addAndGet(numAddedRows);
    boolean isPersistRequired = false;
    boolean persist = false;
    List<String> persistReasons = new ArrayList<>();
    if (!sink.canAppendRow()) {
        persist = true;
        persistReasons.add("No more rows can be appended to sink");
    }
    if (System.currentTimeMillis() > nextFlush) {
        persist = true;
        persistReasons.add(StringUtils.format("current time[%d] is greater than nextFlush[%d]", System.currentTimeMillis(), nextFlush));
    }
    if (rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
        persist = true;
        persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory.get(), tuningConfig.getMaxRowsInMemory()));
    }
    if (bytesCurrentlyInMemory.get() >= maxBytesTuningConfig) {
        persist = true;
        persistReasons.add(StringUtils.format("(estimated) bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory.get(), maxBytesTuningConfig));
    }
    if (persist) {
        if (allowIncrementalPersists) {
            // persistAll clears rowsCurrentlyInMemory, no need to update it.
            log.info("Flushing in-memory data to disk because %s.", String.join(",", persistReasons));
            long bytesToBePersisted = 0L;
            for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
                final Sink sinkEntry = entry.getValue();
                if (sinkEntry != null) {
                    bytesToBePersisted += sinkEntry.getBytesInMemory();
                    if (sinkEntry.swappable()) {
                        // After swapping the sink, we use memory mapped segment instead (but only for real time appenderators!).
                        // However, the memory mapped segment still consumes memory.
                        // These memory mapped segments are held in memory throughout the ingestion phase and permanently add to the bytesCurrentlyInMemory
                        int memoryStillInUse = calculateMMappedHydrantMemoryInUsed(sink.getCurrHydrant());
                        bytesCurrentlyInMemory.addAndGet(memoryStillInUse);
                    }
                }
            }
            if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory.get() - bytesToBePersisted > maxBytesTuningConfig) {
                // We are still over maxBytesTuningConfig even after persisting.
                // This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
                final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory.get(), bytesToBePersisted, maxBytesTuningConfig);
                final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
                log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
                throw new RuntimeException(errorMessage);
            }
            Futures.addCallback(persistAll(committerSupplier == null ? null : committerSupplier.get()), new FutureCallback<Object>() {

                @Override
                public void onSuccess(@Nullable Object result) {
                // do nothing
                }

                @Override
                public void onFailure(Throwable t) {
                    persistError = t;
                }
            });
        } else {
            isPersistRequired = true;
        }
    }
    return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired);
}
Also used : ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) Iterables(com.google.common.collect.Iterables) Sink(org.apache.druid.segment.realtime.plumber.Sink) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Aggregations

Iterables (com.google.common.collect.Iterables)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Map (java.util.Map)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3 IAE (org.apache.druid.java.util.common.IAE)3 IncrementalIndexAddResult (org.apache.druid.segment.incremental.IncrementalIndexAddResult)3 IndexSizeExceededException (org.apache.druid.segment.incremental.IndexSizeExceededException)3 Sink (org.apache.druid.segment.realtime.plumber.Sink)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 Preconditions (com.google.common.base.Preconditions)2 Collection (java.util.Collection)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Function (com.google.common.base.Function)1 Predicate (com.google.common.base.Predicate)1 Predicates.and (com.google.common.base.Predicates.and)1 Predicates.not (com.google.common.base.Predicates.not)1 Stopwatch (com.google.common.base.Stopwatch)1