Search in sources :

Example 21 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class InputRowSerde method toBytes.

public static SerializeResult toBytes(final Map<String, IndexSerdeTypeHelper> typeHelperMap, final InputRow row, AggregatorFactory[] aggs) {
    try {
        List<String> parseExceptionMessages = new ArrayList<>();
        ByteArrayDataOutput out = ByteStreams.newDataOutput();
        // write timestamp
        out.writeLong(row.getTimestampFromEpoch());
        // writing all dimensions
        List<String> dimList = row.getDimensions();
        WritableUtils.writeVInt(out, dimList.size());
        for (String dim : dimList) {
            IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dim);
            if (typeHelper == null) {
                typeHelper = STRING_HELPER;
            }
            writeString(dim, out);
            try {
                typeHelper.serialize(out, row.getRaw(dim));
            } catch (ParseException pe) {
                parseExceptionMessages.add(pe.getMessage());
            }
        }
        // writing all metrics
        Supplier<InputRow> supplier = () -> row;
        WritableUtils.writeVInt(out, aggs.length);
        for (AggregatorFactory aggFactory : aggs) {
            String k = aggFactory.getName();
            writeString(k, out);
            try (Aggregator agg = aggFactory.factorize(IncrementalIndex.makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true))) {
                try {
                    agg.aggregate();
                } catch (ParseException e) {
                    // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                    log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
                    parseExceptionMessages.add(e.getMessage());
                }
                final ColumnType type = aggFactory.getIntermediateType();
                if (agg.isNull()) {
                    out.writeByte(NullHandling.IS_NULL_BYTE);
                } else {
                    out.writeByte(NullHandling.IS_NOT_NULL_BYTE);
                    if (type.is(ValueType.FLOAT)) {
                        out.writeFloat(agg.getFloat());
                    } else if (type.is(ValueType.LONG)) {
                        WritableUtils.writeVLong(out, agg.getLong());
                    } else if (type.is(ValueType.DOUBLE)) {
                        out.writeDouble(agg.getDouble());
                    } else if (type.is(ValueType.COMPLEX)) {
                        Object val = agg.get();
                        ComplexMetricSerde serde = getComplexMetricSerde(type.getComplexTypeName());
                        writeBytes(serde.toBytes(val), out);
                    } else {
                        throw new IAE("Unable to serialize type[%s]", type.asTypeString());
                    }
                }
            }
        }
        return new SerializeResult(out.toByteArray(), parseExceptionMessages);
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ComplexMetricSerde(org.apache.druid.segment.serde.ComplexMetricSerde) ArrayList(java.util.ArrayList) Aggregator(org.apache.druid.query.aggregation.Aggregator) IOException(java.io.IOException) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) ByteArrayDataOutput(com.google.common.io.ByteArrayDataOutput) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 22 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class HadoopDruidIndexerMapper method map.

@Override
protected void map(Object key, Object value, Context context) throws IOException, InterruptedException {
    try {
        final List<InputRow> inputRows = parseInputRow(value, parser);
        for (InputRow inputRow : inputRows) {
            try {
                if (inputRow == null) {
                    // Throw away null rows from the parser.
                    log.debug("Throwing away row [%s]", value);
                    context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).increment(1);
                    continue;
                }
                if (!Intervals.ETERNITY.contains(inputRow.getTimestamp())) {
                    final String errorMsg = StringUtils.format("Encountered row with timestamp that cannot be represented as a long: [%s]", inputRow);
                    throw new ParseException(null, errorMsg);
                }
                if (granularitySpec.inputIntervals().isEmpty() || granularitySpec.bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch())).isPresent()) {
                    innerMap(inputRow, context);
                } else {
                    context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).increment(1);
                }
            } catch (ParseException pe) {
                handleParseException(pe, context);
            }
        }
    } catch (ParseException pe) {
        handleParseException(pe, context);
    } catch (RuntimeException e) {
        throw new RE(e, "Failure on row[%s]", value);
    }
}
Also used : RE(org.apache.druid.java.util.common.RE) InputRow(org.apache.druid.data.input.InputRow) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 23 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class IncrementalIndexAddResultTest method testHasParseException.

@Test
public void testHasParseException() {
    Assert.assertFalse(new IncrementalIndexAddResult(0, 0L).hasParseException());
    Assert.assertFalse(new IncrementalIndexAddResult(0, 0L, "test").hasParseException());
    Assert.assertTrue(new IncrementalIndexAddResult(0, 0L, new ParseException(null, "test")).hasParseException());
}
Also used : ParseException(org.apache.druid.java.util.common.parsers.ParseException) Test(org.junit.Test)

Example 24 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) {
    runThread = Thread.currentThread();
    authorizerMapper = toolbox.getAuthorizerMapper();
    rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
    setupTimeoutAlert();
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
    this.metrics = fireDepartmentForMetrics.getMetrics();
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
    appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
    TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
    StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
    try {
        log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
        toolbox.getChatHandlerProvider().register(getId(), this, false);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        }
        driver.startJob(segmentId -> {
            try {
                if (lockGranularity == LockGranularity.SEGMENT) {
                    return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
                } else {
                    final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
                    if (lock == null) {
                        return false;
                    }
                    if (lock.isRevoked()) {
                        throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
                    }
                    return true;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        int sequenceNumber = 0;
        String sequenceName = makeSequenceName(getId(), sequenceNumber);
        final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
            if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
            }
            if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
            }
            final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
            return toolbox.getTaskActionClient().submit(action);
        };
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        ingestionState = IngestionState.BUILD_SEGMENTS;
        // Time to read data!
        while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
            try {
                InputRow inputRow = firehose.nextRow();
                if (inputRow == null) {
                    log.debug("Discarded null row, considering thrownAway.");
                    rowIngestionMeters.incrementThrownAway();
                } else {
                    AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
                    if (addResult.isOk()) {
                        final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                        if (isPushRequired) {
                            publishSegments(driver, publisher, committerSupplier, sequenceName);
                            sequenceNumber++;
                            sequenceName = makeSequenceName(getId(), sequenceNumber);
                        }
                    } else {
                        // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                }
            } catch (ParseException e) {
                handleParseException(e);
            }
        }
        ingestionState = IngestionState.COMPLETED;
        if (!gracefullyStopped) {
            synchronized (this) {
                if (gracefullyStopped) {
                    // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                    log.info("Gracefully stopping.");
                } else {
                    finishingJob = true;
                }
            }
            if (finishingJob) {
                log.info("Finishing job...");
                // Publish any remaining segments
                publishSegments(driver, publisher, committerSupplier, sequenceName);
                waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
            }
        } else if (firehose != null) {
            log.info("Task was gracefully stopped, will persist data before exiting");
            persistAndWait(driver, committerSupplier.get());
        }
    } catch (Throwable e) {
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        errorMsg = Throwables.getStackTraceAsString(e);
        toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
        return TaskStatus.failure(getId(), errorMsg);
    } finally {
        toolbox.getChatHandlerProvider().unregister(getId());
        CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
        appenderator.close();
        CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
        toolbox.removeMonitor(metricsMonitor);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().unannounce();
            toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
        }
    }
    log.info("Job done!");
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
    return TaskStatus.success(getId());
}
Also used : StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LookupNodeService(org.apache.druid.discovery.LookupNodeService) Produces(javax.ws.rs.Produces) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) TimeoutException(java.util.concurrent.TimeoutException) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Timer(java.util.Timer) IngestionState(org.apache.druid.indexer.IngestionState) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) MediaType(javax.ws.rs.core.MediaType) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) QueryRunner(org.apache.druid.query.QueryRunner) TimerTask(java.util.TimerTask) DateTimes(org.apache.druid.java.util.common.DateTimes) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) Context(javax.ws.rs.core.Context) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec) ImmutableMap(com.google.common.collect.ImmutableMap) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) CountDownLatch(java.util.concurrent.CountDownLatch) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) Response(javax.ws.rs.core.Response) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TaskLockType(org.apache.druid.indexing.common.TaskLockType) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) NodeRole(org.apache.druid.discovery.NodeRole) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Queue(java.util.Queue) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ActionBasedSegmentAllocator(org.apache.druid.indexing.appenderator.ActionBasedSegmentAllocator) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) GET(javax.ws.rs.GET) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Supplier(com.google.common.base.Supplier) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) HttpServletRequest(javax.servlet.http.HttpServletRequest) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) ActionBasedUsedSegmentChecker(org.apache.druid.indexing.appenderator.ActionBasedUsedSegmentChecker) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) Throwables(com.google.common.base.Throwables) Committers(org.apache.druid.segment.realtime.plumber.Committers) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) SegmentUtils(org.apache.druid.segment.SegmentUtils) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) DataSchema(org.apache.druid.segment.indexing.DataSchema) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) IOException(java.io.IOException) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) TaskLockType(org.apache.druid.indexing.common.TaskLockType) InputRow(org.apache.druid.data.input.InputRow) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Committer(org.apache.druid.data.input.Committer) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 25 with ParseException

use of org.apache.druid.java.util.common.parsers.ParseException in project druid by druid-io.

the class IncrementalIndex method toIncrementalIndexRow.

@VisibleForTesting
IncrementalIndexRowResult toIncrementalIndexRow(InputRow row) {
    row = formatRow(row);
    if (row.getTimestampFromEpoch() < minTimestamp) {
        throw new IAE("Cannot add row[%s] because it is below the minTimestamp[%s]", row, DateTimes.utc(minTimestamp));
    }
    final List<String> rowDimensions = row.getDimensions();
    Object[] dims;
    List<Object> overflow = null;
    long dimsKeySize = 0;
    List<String> parseExceptionMessages = new ArrayList<>();
    synchronized (dimensionDescs) {
        // all known dimensions are assumed missing until we encounter in the rowDimensions
        Set<String> absentDimensions = Sets.newHashSet(dimensionDescs.keySet());
        // first, process dimension values present in the row
        dims = new Object[dimensionDescs.size()];
        for (String dimension : rowDimensions) {
            if (Strings.isNullOrEmpty(dimension)) {
                continue;
            }
            boolean wasNewDim = false;
            DimensionDesc desc = dimensionDescs.get(dimension);
            if (desc != null) {
                absentDimensions.remove(dimension);
            } else {
                wasNewDim = true;
                desc = addNewDimension(dimension, DimensionHandlerUtils.getHandlerFromCapabilities(dimension, // based on the value to use a better handler
                makeDefaultCapabilitiesFromValueType(ColumnType.STRING), null));
            }
            DimensionIndexer indexer = desc.getIndexer();
            Object dimsKey = null;
            try {
                final EncodedKeyComponent<?> encodedKeyComponent = indexer.processRowValsToUnsortedEncodedKeyComponent(row.getRaw(dimension), true);
                dimsKey = encodedKeyComponent.getComponent();
                dimsKeySize += encodedKeyComponent.getEffectiveSizeBytes();
            } catch (ParseException pe) {
                parseExceptionMessages.add(pe.getMessage());
            }
            if (wasNewDim) {
                // unless this is the first row we are processing, all newly discovered columns will be sparse
                if (maxIngestedEventTime != null) {
                    indexer.setSparseIndexed();
                }
                if (overflow == null) {
                    overflow = new ArrayList<>();
                }
                overflow.add(dimsKey);
            } else if (desc.getIndex() > dims.length || dims[desc.getIndex()] != null) {
                /*
           * index > dims.length requires that we saw this dimension and added it to the dimensionOrder map,
           * otherwise index is null. Since dims is initialized based on the size of dimensionOrder on each call to add,
           * it must have been added to dimensionOrder during this InputRow.
           *
           * if we found an index for this dimension it means we've seen it already. If !(index > dims.length) then
           * we saw it on a previous input row (this its safe to index into dims). If we found a value in
           * the dims array for this index, it means we have seen this dimension already on this input row.
           */
                throw new ISE("Dimension[%s] occurred more than once in InputRow", dimension);
            } else {
                dims[desc.getIndex()] = dimsKey;
            }
        }
        // process any dimensions with missing values in the row
        for (String missing : absentDimensions) {
            dimensionDescs.get(missing).getIndexer().setSparseIndexed();
        }
    }
    if (overflow != null) {
        // Merge overflow and non-overflow
        Object[] newDims = new Object[dims.length + overflow.size()];
        System.arraycopy(dims, 0, newDims, 0, dims.length);
        for (int i = 0; i < overflow.size(); ++i) {
            newDims[dims.length + i] = overflow.get(i);
        }
        dims = newDims;
    }
    long truncated = 0;
    if (row.getTimestamp() != null) {
        truncated = gran.bucketStart(row.getTimestampFromEpoch());
    }
    IncrementalIndexRow incrementalIndexRow = IncrementalIndexRow.createTimeAndDimswithDimsKeySize(Math.max(truncated, minTimestamp), dims, dimensionDescsList, dimsKeySize);
    return new IncrementalIndexRowResult(incrementalIndexRow, parseExceptionMessages);
}
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) IAE(org.apache.druid.java.util.common.IAE) DimensionIndexer(org.apache.druid.segment.DimensionIndexer) ISE(org.apache.druid.java.util.common.ISE) UnparseableColumnsParseException(org.apache.druid.java.util.common.parsers.UnparseableColumnsParseException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

ParseException (org.apache.druid.java.util.common.parsers.ParseException)30 IOException (java.io.IOException)9 InputRow (org.apache.druid.data.input.InputRow)8 Map (java.util.Map)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)4 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 List (java.util.List)3 Nullable (javax.annotation.Nullable)3 Schema (org.apache.avro.Schema)3 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 ISE (org.apache.druid.java.util.common.ISE)3 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)2 Descriptors (com.google.protobuf.Descriptors)2 DynamicMessage (com.google.protobuf.DynamicMessage)2 EOFException (java.io.EOFException)2