Search in sources :

Example 91 with IAE

use of in project druid by druid-io.

the class IncrementalIndex method toIncrementalIndexRow.

IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) {
    row = formatRow(row);
    if (row.getTimestampFromEpoch() < minTimestamp) {
        throw new IAE("Cannot add row[%s] because it is below the minTimestamp[%s]", row, DateTimes.utc(minTimestamp));
    final List<String> rowDimensions = row.getDimensions();
    Object[] dims;
    List<Object> overflow = null;
    long dimsKeySize = 0;
    List<String> parseExceptionMessages = new ArrayList<>();
    synchronized (dimensionDescs) {
        // all known dimensions are assumed missing until we encounter in the rowDimensions
        Set<String> absentDimensions = Sets.newHashSet(dimensionDescs.keySet());
        // first, process dimension values present in the row
        dims = new Object[dimensionDescs.size()];
        for (String dimension : rowDimensions) {
            if (Strings.isNullOrEmpty(dimension)) {
            boolean wasNewDim = false;
            DimensionDesc desc = dimensionDescs.get(dimension);
            if (desc != null) {
            } else {
                wasNewDim = true;
                desc = addNewDimension(dimension, DimensionHandlerUtils.getHandlerFromCapabilities(dimension, // based on the value to use a better handler
                makeDefaultCapabilitiesFromValueType(ColumnType.STRING), null));
            DimensionIndexer indexer = desc.getIndexer();
            Object dimsKey = null;
            try {
                final EncodedKeyComponent<?> encodedKeyComponent = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension), true);
                dimsKey = encodedKeyComponent.getComponent();
                dimsKeySize += encodedKeyComponent.getEffectiveSizeBytes();
            } catch (ParseException pe) {
            if (wasNewDim) {
                // unless this is the first row we are processing, all newly discovered columns will be sparse
                if (maxIngestedEventTime != null) {
                if (overflow == null) {
                    overflow = new ArrayList<>();
            } else if (desc.getIndex() > dims.length || dims[desc.getIndex()] != null) {
           * index > dims.length requires that we saw this dimension and added it to the dimensionOrder map,
           * otherwise index is null. Since dims is initialized based on the size of dimensionOrder on each call to add,
           * it must have been added to dimensionOrder during this InputRow.
           * if we found an index for this dimension it means we've seen it already. If !(index > dims.length) then
           * we saw it on a previous input row (this its safe to index into dims). If we found a value in
           * the dims array for this index, it means we have seen this dimension already on this input row.
                throw new ISE("Dimension[%s] occurred more than once in InputRow", dimension);
            } else {
                dims[desc.getIndex()] = dimsKey;
        // process any dimensions with missing values in the row
        for (String missing : absentDimensions) {
    if (overflow != null) {
        // Merge overflow and non-overflow
        Object[] newDims = new Object[dims.length + overflow.size()];
        System.arraycopy(dims, 0, newDims, 0, dims.length);
        for (int i = 0; i < overflow.size(); ++i) {
            newDims[dims.length + i] = overflow.get(i);
        dims = newDims;
    long truncated = 0;
    if (row.getTimestamp() != null) {
        truncated = gran.bucketStart(row.getTimestampFromEpoch());
    IncrementalIndexRow incrementalIndexRow = IncrementalIndexRow.createTimeAndDimswithDimsKeySize(Math.max(truncated, minTimestamp), dims, dimensionDescsList, dimsKeySize);
    return new IncrementalIndexRowResult(incrementalIndexRow, parseExceptionMessages);
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) IAE( DimensionIndexer(org.apache.druid.segment.DimensionIndexer) ISE( UnparseableColumnsParseException( ParseException( VisibleForTesting(

Example 92 with IAE

use of in project druid by druid-io.

the class BroadcastSegmentIndexedTable method columnReader.

public Reader columnReader(int column) {
    if (!rowSignature.contains(column)) {
        throw new IAE("Column[%d] is not a valid column for segment[%s]", column, segment.getId());
    final SimpleAscendingOffset offset = new SimpleAscendingOffset(adapter.getNumRows());
    final BaseColumn baseColumn = queryableIndex.getColumnHolder(rowSignature.getColumnName(column)).getColumn();
    final BaseObjectColumnValueSelector<?> selector = baseColumn.makeColumnValueSelector(offset);
    return new Reader() {

        public Object read(int row) {
            return selector.getObject();

        public void close() throws IOException {
Also used : SimpleAscendingOffset(org.apache.druid.segment.SimpleAscendingOffset) BaseColumn(org.apache.druid.segment.column.BaseColumn) IAE(

Example 93 with IAE

use of in project druid by druid-io.

the class TimestampShiftMacroTest method testDynamicExpression.

public void testDynamicExpression() {
    // step parameter is not a literal expression
    Expr expr = apply(ImmutableList.of(ExprEval.of(timestamp.getMillis()).toExpr(), ExprEval.of("P1Y").toExpr(), new NotLiteralExpr("step"), ExprEval.of("America/Los_Angeles").toExpr()));
    final int step = 3;
    Assert.assertEquals(timestamp.toDateTime(DateTimes.inferTzFromString("America/Los_Angeles")).withPeriodAdded(Years.ONE, step).getMillis(), expr.eval(new Expr.ObjectBinding() {

        public ExpressionType getType(String name) {
            return null;

        public Object get(String name) {
            if ("step".equals(name)) {
                return step;
            } else {
                throw new IAE("Invalid bindings");
Also used : Expr(org.apache.druid.math.expr.Expr) IAE( ExpressionType(org.apache.druid.math.expr.ExpressionType) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 94 with IAE

use of in project druid by druid-io.

the class JsonParserIterator method init.

private void init() {
    if (jp == null) {
        try {
            long timeLeftMillis = timeoutAt - System.currentTimeMillis();
            if (checkTimeout(timeLeftMillis)) {
                throw timeoutQuery();
            InputStream is = hasTimeout ? future.get(timeLeftMillis, TimeUnit.MILLISECONDS) : future.get();
            if (is != null) {
                jp = objectMapper.getFactory().createParser(is);
            } else if (checkTimeout()) {
                throw timeoutQuery();
            } else {
                // TODO: NettyHttpClient should check the actual cause of the failure and set it in the future properly.
                throw ResourceLimitExceededException.withMessage("Possibly max scatter-gather bytes limit reached while reading from url[%s].", url);
            final JsonToken nextToken = jp.nextToken();
            if (nextToken == JsonToken.START_ARRAY) {
                objectCodec = jp.getCodec();
            } else if (nextToken == JsonToken.START_OBJECT) {
                throw convertException(jp.getCodec().readValue(jp, QueryException.class));
            } else {
                throw convertException(new IAE("Next token wasn't a START_ARRAY, was[%s] from url[%s]", jp.getCurrentToken(), url));
        } catch (ExecutionException | CancellationException e) {
            throw convertException(e.getCause() == null ? e : e.getCause());
        } catch (IOException | InterruptedException e) {
            throw convertException(e);
        } catch (TimeoutException e) {
            throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out!", queryId), host);
Also used : QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) CancellationException(java.util.concurrent.CancellationException) InputStream( JsonToken(com.fasterxml.jackson.core.JsonToken) IOException( IAE( ExecutionException(java.util.concurrent.ExecutionException) QueryInterruptedException(org.apache.druid.query.QueryInterruptedException) TimeoutException(java.util.concurrent.TimeoutException) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException)

Example 95 with IAE

use of in project druid by druid-io.

the class AppenderatorImpl method add.

public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    final Sink sink = getOrCreateSink(identifier);
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
    final long bytesInMemoryAfterAdd;
    final IncrementalIndexAddResult addResult;
    try {
        addResult = sink.add(row, !allowIncrementalPersists);
        sinkRowsInMemoryAfterAdd = addResult.getRowCount();
        bytesInMemoryAfterAdd = addResult.getBytesInMemory();
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    if (addResult.isRowAdded()) {
    } else if (addResult.hasParseException()) {
    final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
    bytesCurrentlyInMemory.addAndGet(bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
    boolean isPersistRequired = false;
    boolean persist = false;
    List<String> persistReasons = new ArrayList<>();
    if (!sink.canAppendRow()) {
        persist = true;
        persistReasons.add("No more rows can be appended to sink");
    if (System.currentTimeMillis() > nextFlush) {
        persist = true;
        persistReasons.add(StringUtils.format("current time[%d] is greater than nextFlush[%d]", System.currentTimeMillis(), nextFlush));
    if (rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
        persist = true;
        persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory.get(), tuningConfig.getMaxRowsInMemory()));
    if (bytesCurrentlyInMemory.get() >= maxBytesTuningConfig) {
        persist = true;
        persistReasons.add(StringUtils.format("(estimated) bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory.get(), maxBytesTuningConfig));
    if (persist) {
        if (allowIncrementalPersists) {
            // persistAll clears rowsCurrentlyInMemory, no need to update it.
  "Flushing in-memory data to disk because %s.", String.join(",", persistReasons));
            long bytesToBePersisted = 0L;
            for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
                final Sink sinkEntry = entry.getValue();
                if (sinkEntry != null) {
                    bytesToBePersisted += sinkEntry.getBytesInMemory();
                    if (sinkEntry.swappable()) {
                        // After swapping the sink, we use memory mapped segment instead (but only for real time appenderators!).
                        // However, the memory mapped segment still consumes memory.
                        // These memory mapped segments are held in memory throughout the ingestion phase and permanently add to the bytesCurrentlyInMemory
                        int memoryStillInUse = calculateMMappedHydrantMemoryInUsed(sink.getCurrHydrant());
            if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory.get() - bytesToBePersisted > maxBytesTuningConfig) {
                // We are still over maxBytesTuningConfig even after persisting.
                // This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
                final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory.get(), bytesToBePersisted, maxBytesTuningConfig);
                final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
                log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
                throw new RuntimeException(errorMessage);
            Futures.addCallback(persistAll(committerSupplier == null ? null : committerSupplier.get()), new FutureCallback<Object>() {

                public void onSuccess(@Nullable Object result) {
                // do nothing

                public void onFailure(Throwable t) {
                    persistError = t;
        } else {
            isPersistRequired = true;
    return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired);
Also used : ArrayList(java.util.ArrayList) IAE( Iterables( Sink(org.apache.druid.segment.realtime.plumber.Sink) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)


IAE ( ISE ( IOException ( ByteBuffer (java.nio.ByteBuffer)19 ArrayList (java.util.ArrayList)16 List (java.util.List)14 Expr (org.apache.druid.math.expr.Expr)14 Nullable (javax.annotation.Nullable)12 ColumnType (org.apache.druid.segment.column.ColumnType)10 HashSet (java.util.HashSet)8 Map (java.util.Map)8 Interval (org.joda.time.Interval)8 VisibleForTesting ( HashMap (java.util.HashMap)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 File ( Iterables ( Arrays (java.util.Arrays)5 Test (org.junit.Test)5 ImmutableMap (