Search in sources :

Example 91 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class IncrementalIndex method toIncrementalIndexRow.

@VisibleForTesting
IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) {
    row = formatRow(row);
    if (row.getTimestampFromEpoch() < minTimestamp) {
        throw new IAE("Cannot add row[%s] because it is below the minTimestamp[%s]", row, DateTimes.utc(minTimestamp));
    }
    final List<String> rowDimensions = row.getDimensions();
    Object[] dims;
    List<Object> overflow = null;
    long dimsKeySize = 0;
    List<String> parseExceptionMessages = new ArrayList<>();
    synchronized (dimensionDescs) {
        // all known dimensions are assumed missing until we encounter in the rowDimensions
        Set<String> absentDimensions = Sets.newHashSet(dimensionDescs.keySet());
        // first, process dimension values present in the row
        dims = new Object[dimensionDescs.size()];
        for (String dimension : rowDimensions) {
            if (Strings.isNullOrEmpty(dimension)) {
                continue;
            }
            boolean wasNewDim = false;
            DimensionDesc desc = dimensionDescs.get(dimension);
            if (desc != null) {
                absentDimensions.remove(dimension);
            } else {
                wasNewDim = true;
                desc = addNewDimension(dimension, DimensionHandlerUtils.getHandlerFromCapabilities(dimension, // based on the value to use a better handler
                makeDefaultCapabilitiesFromValueType(ColumnType.STRING), null));
            }
            DimensionIndexer indexer = desc.getIndexer();
            Object dimsKey = null;
            try {
                final EncodedKeyComponent<?> encodedKeyComponent = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension), true);
                dimsKey = encodedKeyComponent.getComponent();
                dimsKeySize += encodedKeyComponent.getEffectiveSizeBytes();
            } catch (ParseException pe) {
                parseExceptionMessages.add(pe.getMessage());
            }
            if (wasNewDim) {
                // unless this is the first row we are processing, all newly discovered columns will be sparse
                if (maxIngestedEventTime != null) {
                    indexer.setSparseIndexed();
                }
                if (overflow == null) {
                    overflow = new ArrayList<>();
                }
                overflow.add(dimsKey);
            } else if (desc.getIndex() > dims.length || dims[desc.getIndex()] != null) {
                /*
           * index > dims.length requires that we saw this dimension and added it to the dimensionOrder map,
           * otherwise index is null. Since dims is initialized based on the size of dimensionOrder on each call to add,
           * it must have been added to dimensionOrder during this InputRow.
           *
           * if we found an index for this dimension it means we've seen it already. If !(index > dims.length) then
           * we saw it on a previous input row (this its safe to index into dims). If we found a value in
           * the dims array for this index, it means we have seen this dimension already on this input row.
           */
                throw new ISE("Dimension[%s] occurred more than once in InputRow", dimension);
            } else {
                dims[desc.getIndex()] = dimsKey;
            }
        }
        // process any dimensions with missing values in the row
        for (String missing : absentDimensions) {
            dimensionDescs.get(missing).getIndexer().setSparseIndexed();
        }
    }
    if (overflow != null) {
        // Merge overflow and non-overflow
        Object[] newDims = new Object[dims.length + overflow.size()];
        System.arraycopy(dims, 0, newDims, 0, dims.length);
        for (int i = 0; i < overflow.size(); ++i) {
            newDims[dims.length + i] = overflow.get(i);
        }
        dims = newDims;
    }
    long truncated = 0;
    if (row.getTimestamp() != null) {
        truncated = gran.bucketStart(row.getTimestampFromEpoch());
    }
    IncrementalIndexRow incrementalIndexRow = IncrementalIndexRow.createTimeAndDimswithDimsKeySize(Math.max(truncated, minTimestamp), dims, dimensionDescsList, dimsKeySize);
    return new IncrementalIndexRowResult(incrementalIndexRow, parseExceptionMessages);
}
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) DimensionIndexer(org.apache.druid.segment.DimensionIndexer) ISE(org.apache.druid.java.util.common.ISE) UnparseableColumnsParseException(org.apache.druid.java.util.common.parsers.UnparseableColumnsParseException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 92 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class BroadcastSegmentIndexedTable method columnReader.

@Override
public Reader columnReader(int column) {
    if (!rowSignature.contains(column)) {
        throw new IAE("Column[%d] is not a valid column for segment[%s]", column, segment.getId());
    }
    final SimpleAscendingOffset offset = new SimpleAscendingOffset(adapter.getNumRows());
    final BaseColumn baseColumn = queryableIndex.getColumnHolder(rowSignature.getColumnName(column)).getColumn();
    final BaseObjectColumnValueSelector<?> selector = baseColumn.makeColumnValueSelector(offset);
    return new Reader() {

        @Nullable
        @Override
        public Object read(int row) {
            offset.setCurrentOffset(row);
            return selector.getObject();
        }

        @Override
        public void close() throws IOException {
            baseColumn.close();
        }
    };
}
Also used : SimpleAscendingOffset(org.apache.druid.segment.SimpleAscendingOffset) BaseColumn(org.apache.druid.segment.column.BaseColumn) IAE(org.apache.druid.java.util.common.IAE)

Example 93 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class TimestampShiftMacroTest method testDynamicExpression.

@Test
public void testDynamicExpression() {
    // step parameter is not a literal expression
    Expr expr = apply(ImmutableList.of(ExprEval.of(timestamp.getMillis()).toExpr(), ExprEval.of("P1Y").toExpr(), new NotLiteralExpr("step"), ExprEval.of("America/Los_Angeles").toExpr()));
    final int step = 3;
    Assert.assertEquals(timestamp.toDateTime(DateTimes.inferTzFromString("America/Los_Angeles")).withPeriodAdded(Years.ONE, step).getMillis(), expr.eval(new Expr.ObjectBinding() {

        @Nullable
        @Override
        public ExpressionType getType(String name) {
            return null;
        }

        @Nullable
        @Override
        public Object get(String name) {
            if ("step".equals(name)) {
                return step;
            } else {
                throw new IAE("Invalid bindings");
            }
        }
    }).asLong());
}
Also used : Expr(org.apache.druid.math.expr.Expr) IAE(org.apache.druid.java.util.common.IAE) ExpressionType(org.apache.druid.math.expr.ExpressionType) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 94 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class JsonParserIterator method init.

private void init() {
    if (jp == null) {
        try {
            long timeLeftMillis = timeoutAt - System.currentTimeMillis();
            if (checkTimeout(timeLeftMillis)) {
                throw timeoutQuery();
            }
            InputStream is = hasTimeout ? future.get(timeLeftMillis, TimeUnit.MILLISECONDS) : future.get();
            if (is != null) {
                jp = objectMapper.getFactory().createParser(is);
            } else if (checkTimeout()) {
                throw timeoutQuery();
            } else {
                // TODO: NettyHttpClient should check the actual cause of the failure and set it in the future properly.
                throw ResourceLimitExceededException.withMessage("Possibly max scatter-gather bytes limit reached while reading from url[%s].", url);
            }
            final JsonToken nextToken = jp.nextToken();
            if (nextToken == JsonToken.START_ARRAY) {
                jp.nextToken();
                objectCodec = jp.getCodec();
            } else if (nextToken == JsonToken.START_OBJECT) {
                throw convertException(jp.getCodec().readValue(jp, QueryException.class));
            } else {
                throw convertException(new IAE("Next token wasn't a START_ARRAY, was[%s] from url[%s]", jp.getCurrentToken(), url));
            }
        } catch (ExecutionException | CancellationException e) {
            throw convertException(e.getCause() == null ? e : e.getCause());
        } catch (IOException | InterruptedException e) {
            throw convertException(e);
        } catch (TimeoutException e) {
            throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out!", queryId), host);
        }
    }
}
Also used : QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) CancellationException(java.util.concurrent.CancellationException) InputStream(java.io.InputStream) JsonToken(com.fasterxml.jackson.core.JsonToken) IOException(java.io.IOException) IAE(org.apache.druid.java.util.common.IAE) ExecutionException(java.util.concurrent.ExecutionException) QueryInterruptedException(org.apache.druid.query.QueryInterruptedException) TimeoutException(java.util.concurrent.TimeoutException) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException)

Example 95 with IAE

use of org.apache.druid.java.util.common.IAE in project druid by druid-io.

the class AppenderatorImpl method add.

@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
    throwPersistErrorIfExists();
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    }
    final Sink sink = getOrCreateSink(identifier);
    metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
    final long bytesInMemoryAfterAdd;
    final IncrementalIndexAddResult addResult;
    try {
        addResult = sink.add(row, !allowIncrementalPersists);
        sinkRowsInMemoryAfterAdd = addResult.getRowCount();
        bytesInMemoryAfterAdd = addResult.getBytesInMemory();
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    }
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    }
    if (addResult.isRowAdded()) {
        rowIngestionMeters.incrementProcessed();
    } else if (addResult.hasParseException()) {
        parseExceptionHandler.handle(addResult.getParseException());
    }
    final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
    rowsCurrentlyInMemory.addAndGet(numAddedRows);
    bytesCurrentlyInMemory.addAndGet(bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
    totalRows.addAndGet(numAddedRows);
    boolean isPersistRequired = false;
    boolean persist = false;
    List<String> persistReasons = new ArrayList<>();
    if (!sink.canAppendRow()) {
        persist = true;
        persistReasons.add("No more rows can be appended to sink");
    }
    if (System.currentTimeMillis() > nextFlush) {
        persist = true;
        persistReasons.add(StringUtils.format("current time[%d] is greater than nextFlush[%d]", System.currentTimeMillis(), nextFlush));
    }
    if (rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
        persist = true;
        persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory.get(), tuningConfig.getMaxRowsInMemory()));
    }
    if (bytesCurrentlyInMemory.get() >= maxBytesTuningConfig) {
        persist = true;
        persistReasons.add(StringUtils.format("(estimated) bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory.get(), maxBytesTuningConfig));
    }
    if (persist) {
        if (allowIncrementalPersists) {
            // persistAll clears rowsCurrentlyInMemory, no need to update it.
            log.info("Flushing in-memory data to disk because %s.", String.join(",", persistReasons));
            long bytesToBePersisted = 0L;
            for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
                final Sink sinkEntry = entry.getValue();
                if (sinkEntry != null) {
                    bytesToBePersisted += sinkEntry.getBytesInMemory();
                    if (sinkEntry.swappable()) {
                        // After swapping the sink, we use memory mapped segment instead (but only for real time appenderators!).
                        // However, the memory mapped segment still consumes memory.
                        // These memory mapped segments are held in memory throughout the ingestion phase and permanently add to the bytesCurrentlyInMemory
                        int memoryStillInUse = calculateMMappedHydrantMemoryInUsed(sink.getCurrHydrant());
                        bytesCurrentlyInMemory.addAndGet(memoryStillInUse);
                    }
                }
            }
            if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory.get() - bytesToBePersisted > maxBytesTuningConfig) {
                // We are still over maxBytesTuningConfig even after persisting.
                // This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
                final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory.get(), bytesToBePersisted, maxBytesTuningConfig);
                final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
                log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
                throw new RuntimeException(errorMessage);
            }
            Futures.addCallback(persistAll(committerSupplier == null ? null : committerSupplier.get()), new FutureCallback<Object>() {

                @Override
                public void onSuccess(@Nullable Object result) {
                // do nothing
                }

                @Override
                public void onFailure(Throwable t) {
                    persistError = t;
                }
            });
        } else {
            isPersistRequired = true;
        }
    }
    return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired);
}
Also used : ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) Iterables(com.google.common.collect.Iterables) Sink(org.apache.druid.segment.realtime.plumber.Sink) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Aggregations

IAE (org.apache.druid.java.util.common.IAE)115 ISE (org.apache.druid.java.util.common.ISE)23 IOException (java.io.IOException)20 ByteBuffer (java.nio.ByteBuffer)19 ArrayList (java.util.ArrayList)16 List (java.util.List)14 Expr (org.apache.druid.math.expr.Expr)14 Nullable (javax.annotation.Nullable)12 ColumnType (org.apache.druid.segment.column.ColumnType)10 HashSet (java.util.HashSet)8 Map (java.util.Map)8 Interval (org.joda.time.Interval)8 VisibleForTesting (com.google.common.annotations.VisibleForTesting)7 HashMap (java.util.HashMap)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 File (java.io.File)6 Iterables (com.google.common.collect.Iterables)5 Arrays (java.util.Arrays)5 Test (org.junit.Test)5 ImmutableMap (com.google.common.collect.ImmutableMap)4