Search in sources :

Example 6 with IndexSizeExceededException

use of org.apache.druid.segment.incremental.IndexSizeExceededException in project druid by druid-io.

the class IndexBuilder method buildIncrementalIndexWithRows.

private static IncrementalIndex buildIncrementalIndexWithRows(IncrementalIndexSchema schema, int maxRows, Iterable<InputRow> rows) {
    Preconditions.checkNotNull(schema, "schema");
    final IncrementalIndex incrementalIndex = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(maxRows).build();
    for (InputRow row : rows) {
        try {
            incrementalIndex.add(row);
        } catch (IndexSizeExceededException e) {
            throw new RuntimeException(e);
        }
    }
    return incrementalIndex;
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) InputRow(org.apache.druid.data.input.InputRow) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 7 with IndexSizeExceededException

use of org.apache.druid.segment.incremental.IndexSizeExceededException in project druid by druid-io.

the class SchemalessIndexTest method getIncrementalIndex.

public static QueryableIndex getIncrementalIndex(int index1, int index2) {
    synchronized (log) {
        if (EVENTS.isEmpty()) {
            makeEvents();
        }
        Map<Integer, QueryableIndex> entry = INCREMENTAL_INDEXES.get(index1);
        if (entry != null) {
            QueryableIndex index = entry.get(index2);
            if (index != null) {
                return index;
            }
        } else {
            entry = new HashMap<>();
            INCREMENTAL_INDEXES.put(index1, entry);
        }
        IncrementalIndex theIndex = null;
        int count = 0;
        for (final Map<String, Object> event : EVENTS) {
            if (count != index1 && count != index2) {
                count++;
                continue;
            }
            final long timestamp = new DateTime(event.get(TIMESTAMP), ISOChronology.getInstanceUTC()).getMillis();
            if (theIndex == null) {
                theIndex = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(timestamp).withQueryGranularity(Granularities.MINUTE).withMetrics(METRIC_AGGS).build()).setMaxRowCount(1000).build();
            }
            final List<String> dims = new ArrayList<>();
            for (final Map.Entry<String, Object> val : event.entrySet()) {
                if (!val.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(val.getKey())) {
                    dims.add(val.getKey());
                }
            }
            try {
                theIndex.add(new MapBasedInputRow(timestamp, dims, event));
            } catch (IndexSizeExceededException e) {
                throw new RuntimeException(e);
            }
            count++;
        }
        QueryableIndex retVal = TestIndex.persistRealtimeAndLoadMMapped(theIndex);
        entry.put(index2, retVal);
        return retVal;
    }
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashMap(java.util.HashMap) Map(java.util.Map) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 8 with IndexSizeExceededException

use of org.apache.druid.segment.incremental.IndexSizeExceededException in project druid by druid-io.

the class ExpressionSelectorsTest method test_incrementalIndexStringSelector.

@Test
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException {
    // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
    // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
    // exception trying to make an optimized string expression selector that was not appropriate to use for the
    // underlying dimension selector.
    // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
    // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
    IncrementalIndexSchema schema = new IncrementalIndexSchema(0, new TimestampSpec("time", "millis", DateTimes.nowUtc()), Granularities.NONE, VirtualColumns.EMPTY, DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true);
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).build();
    index.add(new MapBasedInputRow(DateTimes.nowUtc().getMillis(), ImmutableList.of("x"), ImmutableMap.of("x", "foo")));
    index.add(new MapBasedInputRow(DateTimes.nowUtc().plusMillis(1000).getMillis(), ImmutableList.of("y"), ImmutableMap.of("y", "foo")));
    IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
    Sequence<Cursor> cursors = adapter.makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null);
    int rowsProcessed = cursors.map(cursor -> {
        DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), null);
        DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), null);
        int rowCount = 0;
        while (!cursor.isDone()) {
            Object x = xExprSelector.getObject();
            Object y = yExprSelector.getObject();
            List<String> expectedFoo = Collections.singletonList("foofoo");
            List<String> expectedNull = NullHandling.replaceWithDefault() ? Collections.singletonList("foo") : Collections.singletonList(null);
            if (rowCount == 0) {
                Assert.assertEquals(expectedFoo, x);
                Assert.assertEquals(expectedNull, y);
            } else {
                Assert.assertEquals(expectedNull, x);
                Assert.assertEquals(expectedFoo, y);
            }
            rowCount++;
            cursor.advance();
        }
        return rowCount;
    }).accumulate(0, (in, acc) -> in + acc);
    Assert.assertEquals(2, rowsProcessed);
}
Also used : SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) StorageAdapter(org.apache.druid.segment.StorageAdapter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GeneratorBasicSchemas(org.apache.druid.segment.generator.GeneratorBasicSchemas) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) Expr(org.apache.druid.math.expr.Expr) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) AfterClass(org.junit.AfterClass) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) ExprEval(org.apache.druid.math.expr.ExprEval) BaseSingleValueDimensionSelector(org.apache.druid.segment.BaseSingleValueDimensionSelector) TestObjectColumnSelector(org.apache.druid.segment.TestObjectColumnSelector) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BeforeClass(org.junit.BeforeClass) Intervals(org.apache.druid.java.util.common.Intervals) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) Supplier(com.google.common.base.Supplier) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Parser(org.apache.druid.math.expr.Parser) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) DimensionSelector(org.apache.druid.segment.DimensionSelector) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) Assert(org.junit.Assert) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) BaseSingleValueDimensionSelector(org.apache.druid.segment.BaseSingleValueDimensionSelector) DimensionSelector(org.apache.druid.segment.DimensionSelector) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Cursor(org.apache.druid.segment.Cursor) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 9 with IndexSizeExceededException

use of org.apache.druid.segment.incremental.IndexSizeExceededException in project druid by druid-io.

the class AppenderatorImpl method add.

@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
    throwPersistErrorIfExists();
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    }
    final Sink sink = getOrCreateSink(identifier);
    metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
    final long bytesInMemoryAfterAdd;
    final IncrementalIndexAddResult addResult;
    try {
        addResult = sink.add(row, !allowIncrementalPersists);
        sinkRowsInMemoryAfterAdd = addResult.getRowCount();
        bytesInMemoryAfterAdd = addResult.getBytesInMemory();
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    }
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    }
    if (addResult.isRowAdded()) {
        rowIngestionMeters.incrementProcessed();
    } else if (addResult.hasParseException()) {
        parseExceptionHandler.handle(addResult.getParseException());
    }
    final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
    rowsCurrentlyInMemory.addAndGet(numAddedRows);
    bytesCurrentlyInMemory.addAndGet(bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
    totalRows.addAndGet(numAddedRows);
    boolean isPersistRequired = false;
    boolean persist = false;
    List<String> persistReasons = new ArrayList<>();
    if (!sink.canAppendRow()) {
        persist = true;
        persistReasons.add("No more rows can be appended to sink");
    }
    if (System.currentTimeMillis() > nextFlush) {
        persist = true;
        persistReasons.add(StringUtils.format("current time[%d] is greater than nextFlush[%d]", System.currentTimeMillis(), nextFlush));
    }
    if (rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
        persist = true;
        persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory.get(), tuningConfig.getMaxRowsInMemory()));
    }
    if (bytesCurrentlyInMemory.get() >= maxBytesTuningConfig) {
        persist = true;
        persistReasons.add(StringUtils.format("(estimated) bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory.get(), maxBytesTuningConfig));
    }
    if (persist) {
        if (allowIncrementalPersists) {
            // persistAll clears rowsCurrentlyInMemory, no need to update it.
            log.info("Flushing in-memory data to disk because %s.", String.join(",", persistReasons));
            long bytesToBePersisted = 0L;
            for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
                final Sink sinkEntry = entry.getValue();
                if (sinkEntry != null) {
                    bytesToBePersisted += sinkEntry.getBytesInMemory();
                    if (sinkEntry.swappable()) {
                        // After swapping the sink, we use memory mapped segment instead (but only for real time appenderators!).
                        // However, the memory mapped segment still consumes memory.
                        // These memory mapped segments are held in memory throughout the ingestion phase and permanently add to the bytesCurrentlyInMemory
                        int memoryStillInUse = calculateMMappedHydrantMemoryInUsed(sink.getCurrHydrant());
                        bytesCurrentlyInMemory.addAndGet(memoryStillInUse);
                    }
                }
            }
            if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory.get() - bytesToBePersisted > maxBytesTuningConfig) {
                // We are still over maxBytesTuningConfig even after persisting.
                // This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
                final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory.get(), bytesToBePersisted, maxBytesTuningConfig);
                final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
                log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
                throw new RuntimeException(errorMessage);
            }
            Futures.addCallback(persistAll(committerSupplier == null ? null : committerSupplier.get()), new FutureCallback<Object>() {

                @Override
                public void onSuccess(@Nullable Object result) {
                // do nothing
                }

                @Override
                public void onFailure(Throwable t) {
                    persistError = t;
                }
            });
        } else {
            isPersistRequired = true;
        }
    }
    return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired);
}
Also used : ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) Iterables(com.google.common.collect.Iterables) Sink(org.apache.druid.segment.realtime.plumber.Sink) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 10 with IndexSizeExceededException

use of org.apache.druid.segment.incremental.IndexSizeExceededException in project druid by druid-io.

the class Plumbers method addNextRow.

public static void addNextRow(final Supplier<Committer> committerSupplier, final Firehose firehose, final Plumber plumber, final boolean reportParseExceptions, final FireDepartmentMetrics metrics) throws IOException {
    final InputRow inputRow;
    try {
        inputRow = firehose.nextRow();
    } catch (ParseException e) {
        if (reportParseExceptions) {
            throw e;
        } else {
            log.debug(e, "Discarded row due to exception, considering unparseable.");
            metrics.incrementUnparseable();
            return;
        }
    }
    if (inputRow == null) {
        log.debug("Discarded null row, considering thrownAway.");
        metrics.incrementThrownAway();
        return;
    }
    final IncrementalIndexAddResult addResult;
    try {
        addResult = plumber.add(inputRow, committerSupplier);
    } catch (IndexSizeExceededException e) {
        // plumber.add should be swapping out indexes before they fill up.
        throw new ISE(e, "Index size exceeded");
    }
    if (addResult.getRowCount() == -1) {
        metrics.incrementThrownAway();
        log.debug("Discarded row[%s], considering thrownAway due to %s.", inputRow, addResult.getReasonOfNotAdded());
        return;
    }
    if (addResult.getRowCount() == -2) {
        metrics.incrementDedup();
        log.debug("Discarded row[%s], considering duplication.", inputRow);
        return;
    }
    metrics.incrementProcessed();
}
Also used : IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) InputRow(org.apache.druid.data.input.InputRow) ISE(org.apache.druid.java.util.common.ISE) ParseException(org.apache.druid.java.util.common.parsers.ParseException) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Aggregations

IndexSizeExceededException (org.apache.druid.segment.incremental.IndexSizeExceededException)11 ArrayList (java.util.ArrayList)7 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)7 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)7 HashMap (java.util.HashMap)4 Map (java.util.Map)4 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)4 IncrementalIndexAddResult (org.apache.druid.segment.incremental.IncrementalIndexAddResult)4 IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)4 Iterables (com.google.common.collect.Iterables)3 File (java.io.File)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3 InputRow (org.apache.druid.data.input.InputRow)3 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)3 Function (com.google.common.base.Function)2 Supplier (com.google.common.base.Supplier)2 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)2 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)2