Search in sources :

Example 1 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class BatchAppenderator method add.

@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
    throwPersistErrorIfExists();
    Preconditions.checkArgument(committerSupplier == null, "Batch appenderator does not need a committer!");
    Preconditions.checkArgument(allowIncrementalPersists, "Batch appenderator should always allow incremental persists!");
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    }
    final Sink sink = getOrCreateSink(identifier);
    metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
    final long bytesInMemoryAfterAdd;
    final IncrementalIndexAddResult addResult;
    try {
        // allow incrememtal persis is always true for batch
        addResult = sink.add(row, false);
        sinkRowsInMemoryAfterAdd = addResult.getRowCount();
        bytesInMemoryAfterAdd = addResult.getBytesInMemory();
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    }
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    }
    if (addResult.isRowAdded()) {
        rowIngestionMeters.incrementProcessed();
    } else if (addResult.hasParseException()) {
        parseExceptionHandler.handle(addResult.getParseException());
    }
    final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
    rowsCurrentlyInMemory += numAddedRows;
    bytesCurrentlyInMemory += (bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
    totalRows += numAddedRows;
    sinksMetadata.computeIfAbsent(identifier, unused -> new SinkMetadata()).addRows(numAddedRows);
    boolean persist = false;
    List<String> persistReasons = new ArrayList<>();
    if (!sink.canAppendRow()) {
        persist = true;
        persistReasons.add("No more rows can be appended to sink");
    }
    if (rowsCurrentlyInMemory >= tuningConfig.getMaxRowsInMemory()) {
        persist = true;
        persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory, tuningConfig.getMaxRowsInMemory()));
    }
    if (bytesCurrentlyInMemory >= maxBytesTuningConfig) {
        persist = true;
        persistReasons.add(StringUtils.format("bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory, maxBytesTuningConfig));
    }
    if (persist) {
        // persistAll clears rowsCurrentlyInMemory, no need to update it.
        log.info("Incremental persist to disk because %s.", String.join(",", persistReasons));
        long bytesToBePersisted = 0L;
        for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
            final Sink sinkEntry = entry.getValue();
            if (sinkEntry != null) {
                bytesToBePersisted += sinkEntry.getBytesInMemory();
                if (sinkEntry.swappable()) {
                    // Code for batch no longer memory maps hydrants, but they still take memory...
                    int memoryStillInUse = calculateMemoryUsedByHydrant();
                    bytesCurrentlyInMemory += memoryStillInUse;
                }
            }
        }
        if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory - bytesToBePersisted > maxBytesTuningConfig) {
            // We are still over maxBytesTuningConfig even after persisting.
            // This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
            final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory, bytesToBePersisted, maxBytesTuningConfig);
            final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
            log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
            throw new RuntimeException(errorMessage);
        }
        Futures.addCallback(persistAll(null), new FutureCallback<Object>() {

            @Override
            public void onSuccess(@Nullable Object result) {
            // do nothing
            }

            @Override
            public void onFailure(Throwable t) {
                persistError = t;
            }
        });
    }
    return new AppenderatorAddResult(identifier, sinksMetadata.get(identifier).numRowsInSegment, false);
}
Also used : Arrays(java.util.Arrays) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Pair(org.apache.druid.java.util.common.Pair) FileLock(java.nio.channels.FileLock) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) IAE(org.apache.druid.java.util.common.IAE) FileUtils(org.apache.druid.java.util.common.FileUtils) Function(com.google.common.base.Function) Execs(org.apache.druid.java.util.common.concurrent.Execs) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) Collection(java.util.Collection) QueryableIndex(org.apache.druid.segment.QueryableIndex) StandardOpenOption(java.nio.file.StandardOpenOption) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) InputRow(org.apache.druid.data.input.InputRow) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Stopwatch(com.google.common.base.Stopwatch) Supplier(com.google.common.base.Supplier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) ArrayList(java.util.ArrayList) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Sink(org.apache.druid.segment.realtime.plumber.Sink) RetryUtils(org.apache.druid.java.util.common.RetryUtils) Nullable(javax.annotation.Nullable) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) RE(org.apache.druid.java.util.common.RE) IndexMerger(org.apache.druid.segment.IndexMerger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) IOException(java.io.IOException) Ints(com.google.common.primitives.Ints) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) FutureCallback(com.google.common.util.concurrent.FutureCallback) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) Closeable(java.io.Closeable) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) IndexIO(org.apache.druid.segment.IndexIO) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) FileChannel(java.nio.channels.FileChannel) ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) Iterables(com.google.common.collect.Iterables) Sink(org.apache.druid.segment.realtime.plumber.Sink) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 2 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class BatchAppenderator method persistAll.

@Override
public ListenableFuture<Object> persistAll(@Nullable final Committer committer) {
    throwPersistErrorIfExists();
    if (committer != null) {
        throw new ISE("committer must be null for BatchAppenderator");
    }
    // Get ready to persist all sinks:
    final Map<SegmentIdWithShardSpec, Sink> sinksToPersist = swapSinks();
    final Stopwatch runExecStopwatch = Stopwatch.createStarted();
    ListenableFuture<Object> future = persistExecutor.submit(() -> {
        log.info("Spawning intermediate persist");
        // figure out hydrants (indices) to persist:
        final List<Pair<FireHydrant, SegmentIdWithShardSpec>> indexesToPersist = new ArrayList<>();
        int numPersistedRows = 0;
        long bytesPersisted = 0;
        int totalHydrantsCount = 0;
        final long totalSinks = sinksToPersist.size();
        for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinksToPersist.entrySet()) {
            final SegmentIdWithShardSpec identifier = entry.getKey();
            final Sink sink = entry.getValue();
            if (sink == null) {
                throw new ISE("No sink for identifier: %s", identifier);
            }
            final List<FireHydrant> hydrants = Lists.newArrayList(sink);
            // Since everytime we persist we also get rid of the in-memory references to sink & hydrants
            // the invariant of exactly one, always swappable, sink with exactly one unpersisted hydrant must hold
            int totalHydrantsForSink = hydrants.size();
            if (totalHydrantsForSink != 1) {
                throw new ISE("There should be only one hydrant for identifier[%s] but there are[%s]", identifier, totalHydrantsForSink);
            }
            totalHydrantsCount++;
            numPersistedRows += sink.getNumRowsInMemory();
            bytesPersisted += sink.getBytesInMemory();
            if (!sink.swappable()) {
                throw new ISE("Sink is not swappable![%s]", identifier);
            }
            indexesToPersist.add(Pair.of(sink.swap(), identifier));
        }
        if (indexesToPersist.isEmpty()) {
            log.info("No indexes will be persisted");
        }
        final Stopwatch persistStopwatch = Stopwatch.createStarted();
        try {
            for (Pair<FireHydrant, SegmentIdWithShardSpec> pair : indexesToPersist) {
                metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
            }
            log.info("Persisted in-memory data for segments: %s", indexesToPersist.stream().filter(itp -> itp.rhs != null).map(itp -> itp.rhs.asSegmentId().toString()).distinct().collect(Collectors.joining(", ")));
            log.info("Persisted stats: processed rows: [%d], persisted rows[%d], persisted sinks: [%d], persisted fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), numPersistedRows, totalSinks, totalHydrantsCount);
        // note that we do not need to reset sinks metadata since we did it at the start...
        } catch (Exception e) {
            metrics.incrementFailedPersists();
            throw e;
        } finally {
            metrics.incrementNumPersists();
            long persistMillis = persistStopwatch.elapsed(TimeUnit.MILLISECONDS);
            metrics.incrementPersistTimeMillis(persistMillis);
            persistStopwatch.stop();
            // make sure no push can start while persisting:
            log.info("Persisted rows[%,d] and bytes[%,d] and removed all sinks & hydrants from memory in[%d] millis", numPersistedRows, bytesPersisted, persistMillis);
            log.info("Persist is done.");
        }
        return null;
    });
    final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
    metrics.incrementPersistBackPressureMillis(startDelay);
    if (startDelay > PERSIST_WARN_DELAY) {
        log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
    }
    runExecStopwatch.stop();
    return future;
}
Also used : Arrays(java.util.Arrays) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Pair(org.apache.druid.java.util.common.Pair) FileLock(java.nio.channels.FileLock) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) IAE(org.apache.druid.java.util.common.IAE) FileUtils(org.apache.druid.java.util.common.FileUtils) Function(com.google.common.base.Function) Execs(org.apache.druid.java.util.common.concurrent.Execs) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) Collection(java.util.Collection) QueryableIndex(org.apache.druid.segment.QueryableIndex) StandardOpenOption(java.nio.file.StandardOpenOption) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) InputRow(org.apache.druid.data.input.InputRow) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Stopwatch(com.google.common.base.Stopwatch) Supplier(com.google.common.base.Supplier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) ArrayList(java.util.ArrayList) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Sink(org.apache.druid.segment.realtime.plumber.Sink) RetryUtils(org.apache.druid.java.util.common.RetryUtils) Nullable(javax.annotation.Nullable) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) RE(org.apache.druid.java.util.common.RE) IndexMerger(org.apache.druid.segment.IndexMerger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) IOException(java.io.IOException) Ints(com.google.common.primitives.Ints) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) FutureCallback(com.google.common.util.concurrent.FutureCallback) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) Closeable(java.io.Closeable) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) IndexIO(org.apache.druid.segment.IndexIO) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) FileChannel(java.nio.channels.FileChannel) Stopwatch(com.google.common.base.Stopwatch) ArrayList(java.util.ArrayList) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Pair(org.apache.druid.java.util.common.Pair)

Example 3 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class StreamAppenderator method push.

@Override
public ListenableFuture<SegmentsAndCommitMetadata> push(final Collection<SegmentIdWithShardSpec> identifiers, @Nullable final Committer committer, final boolean useUniquePath) {
    final Map<SegmentIdWithShardSpec, Sink> theSinks = new HashMap<>();
    AtomicLong pushedHydrantsCount = new AtomicLong();
    for (final SegmentIdWithShardSpec identifier : identifiers) {
        final Sink sink = sinks.get(identifier);
        if (sink == null) {
            throw new ISE("No sink for identifier: %s", identifier);
        }
        theSinks.put(identifier, sink);
        if (sink.finishWriting()) {
            totalRows.addAndGet(-sink.getNumRows());
        }
        // count hydrants for stats:
        pushedHydrantsCount.addAndGet(Iterables.size(sink));
    }
    return Futures.transform(// segments.
    persistAll(committer), (Function<Object, SegmentsAndCommitMetadata>) commitMetadata -> {
        final List<DataSegment> dataSegments = new ArrayList<>();
        log.info("Preparing to push (stats): processed rows: [%d], sinks: [%d], fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), theSinks.size(), pushedHydrantsCount.get());
        log.debug("Building and pushing segments: %s", theSinks.keySet().stream().map(SegmentIdWithShardSpec::toString).collect(Collectors.joining(", ")));
        for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : theSinks.entrySet()) {
            if (droppingSinks.contains(entry.getKey())) {
                log.warn("Skipping push of currently-dropping sink[%s]", entry.getKey());
                continue;
            }
            final DataSegment dataSegment = mergeAndPush(entry.getKey(), entry.getValue(), useUniquePath);
            if (dataSegment != null) {
                dataSegments.add(dataSegment);
            } else {
                log.warn("mergeAndPush[%s] returned null, skipping.", entry.getKey());
            }
        }
        log.info("Push complete...");
        return new SegmentsAndCommitMetadata(dataSegments, commitMetadata);
    }, pushExecutor);
}
Also used : DataSegmentAnnouncer(org.apache.druid.server.coordination.DataSegmentAnnouncer) Arrays(java.util.Arrays) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Pair(org.apache.druid.java.util.common.Pair) FileLock(java.nio.channels.FileLock) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) IAE(org.apache.druid.java.util.common.IAE) FileUtils(org.apache.druid.java.util.common.FileUtils) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) Execs(org.apache.druid.java.util.common.concurrent.Execs) Closer(org.apache.druid.java.util.common.io.Closer) Collection(java.util.Collection) QueryableIndex(org.apache.druid.segment.QueryableIndex) StandardOpenOption(java.nio.file.StandardOpenOption) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) InputRow(org.apache.druid.data.input.InputRow) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) MutableLong(org.apache.commons.lang.mutable.MutableLong) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Stopwatch(com.google.common.base.Stopwatch) Supplier(com.google.common.base.Supplier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Sink(org.apache.druid.segment.realtime.plumber.Sink) RetryUtils(org.apache.druid.java.util.common.RetryUtils) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Nullable(javax.annotation.Nullable) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ReentrantLock(java.util.concurrent.locks.ReentrantLock) RE(org.apache.druid.java.util.common.RE) IndexMerger(org.apache.druid.segment.IndexMerger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) IOException(java.io.IOException) Ints(com.google.common.primitives.Ints) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) FutureCallback(com.google.common.util.concurrent.FutureCallback) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) Futures(com.google.common.util.concurrent.Futures) Lock(java.util.concurrent.locks.Lock) Closeable(java.io.Closeable) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Cache(org.apache.druid.client.cache.Cache) IndexIO(org.apache.druid.segment.IndexIO) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) FileChannel(java.nio.channels.FileChannel) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) AtomicLong(java.util.concurrent.atomic.AtomicLong) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList)

Example 4 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class RealtimePlumberSchoolTest method testPersistHydrantGapsHelper.

private void testPersistHydrantGapsHelper(final Object commitMetadata) throws Exception {
    Interval testInterval = new Interval(DateTimes.of("1970-01-01"), DateTimes.of("1971-01-01"));
    RealtimePlumber plumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    Sink sink = new Sink(testInterval, schema2, tuningConfig.getShardSpec(), DateTimes.of("2014-12-01T12:34:56.789").toString(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
    plumber2.getSinks().put(0L, sink);
    Assert.assertNull(plumber2.startJob());
    final CountDownLatch doneSignal = new CountDownLatch(1);
    final Committer committer = new Committer() {

        @Override
        public Object getMetadata() {
            return commitMetadata;
        }

        @Override
        public void run() {
            doneSignal.countDown();
        }
    };
    plumber2.add(getTestInputRow("1970-01-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-02-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-03-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-04-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-05-01"), Suppliers.ofInstance(committer));
    plumber2.persist(committer);
    doneSignal.await();
    plumber2.getSinks().clear();
    plumber2.finishJob();
    File persistDir = plumber2.computePersistDir(schema2, testInterval);
    /* Check that all hydrants were persisted */
    for (int i = 0; i < 5; i++) {
        Assert.assertTrue(new File(persistDir, String.valueOf(i)).exists());
    }
    /* Create some gaps in the persisted hydrants and reload */
    FileUtils.deleteDirectory(new File(persistDir, "1"));
    FileUtils.deleteDirectory(new File(persistDir, "3"));
    RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    restoredPlumber.bootstrapSinksFromDisk();
    Map<Long, Sink> sinks = restoredPlumber.getSinks();
    Assert.assertEquals(1, sinks.size());
    List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(new Long(0)));
    DateTime startTime = DateTimes.of("1970-01-01T00:00:00.000Z");
    Interval expectedInterval = new Interval(startTime, DateTimes.of("1971-01-01T00:00:00.000Z"));
    Assert.assertEquals(0, hydrants.get(0).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(0).getSegmentDataInterval());
    Assert.assertEquals(2, hydrants.get(1).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(1).getSegmentDataInterval());
    Assert.assertEquals(4, hydrants.get(2).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(2).getSegmentDataInterval());
    /* Delete all the hydrants and reload, no sink should be created */
    FileUtils.deleteDirectory(new File(persistDir, "0"));
    FileUtils.deleteDirectory(new File(persistDir, "2"));
    FileUtils.deleteDirectory(new File(persistDir, "4"));
    RealtimePlumber restoredPlumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    restoredPlumber2.bootstrapSinksFromDisk();
    Assert.assertEquals(0, restoredPlumber2.getSinks().size());
}
Also used : Committer(org.apache.druid.data.input.Committer) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) CountDownLatch(java.util.concurrent.CountDownLatch) File(java.io.File) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval)

Example 5 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class RealtimePlumberSchoolTest method testDimOrderInheritanceHelper.

private void testDimOrderInheritanceHelper(final Object commitMetadata) throws Exception {
    List<List<String>> expectedDims = ImmutableList.of(ImmutableList.of("dimD"), ImmutableList.of("dimC"), ImmutableList.of("dimA"), ImmutableList.of("dimB"), ImmutableList.of("dimE"), ImmutableList.of("dimD", "dimC", "dimA", "dimB", "dimE"));
    QueryableIndex qindex;
    FireHydrant hydrant;
    Map<Long, Sink> sinks;
    RealtimePlumber plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    Assert.assertNull(plumber.startJob());
    final CountDownLatch doneSignal = new CountDownLatch(1);
    final Committer committer = new Committer() {

        @Override
        public Object getMetadata() {
            return commitMetadata;
        }

        @Override
        public void run() {
            doneSignal.countDown();
        }
    };
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimD"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimC"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimA"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimB"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimE"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimA", "dimB", "dimC", "dimD", "dimE"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
    plumber.persist(committer);
    doneSignal.await();
    plumber.getSinks().clear();
    plumber.finishJob();
    RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    restoredPlumber.bootstrapSinksFromDisk();
    sinks = restoredPlumber.getSinks();
    Assert.assertEquals(1, sinks.size());
    List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(0L));
    for (int i = 0; i < hydrants.size(); i++) {
        hydrant = hydrants.get(i);
        ReferenceCountingSegment segment = hydrant.getIncrementedSegment();
        try {
            qindex = segment.asQueryableIndex();
            Assert.assertEquals(i, hydrant.getCount());
            Assert.assertEquals(expectedDims.get(i), ImmutableList.copyOf(qindex.getAvailableDimensions()));
        } finally {
            segment.decrement();
        }
    }
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) CountDownLatch(java.util.concurrent.CountDownLatch) QueryableIndex(org.apache.druid.segment.QueryableIndex) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) Committer(org.apache.druid.data.input.Committer)

Aggregations

Committer (org.apache.druid.data.input.Committer)22 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)13 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 Test (org.junit.Test)11 Supplier (com.google.common.base.Supplier)8 IOException (java.io.IOException)8 List (java.util.List)7 ISE (org.apache.druid.java.util.common.ISE)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 Preconditions (com.google.common.base.Preconditions)6 Futures (com.google.common.util.concurrent.Futures)6 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)6 File (java.io.File)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 Map (java.util.Map)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Function (com.google.common.base.Function)5 Lists (com.google.common.collect.Lists)5